Compare commits
No commits in common. "c8" and "c9s-nozone7" have entirely different histories.
c8
...
c9s-nozone
27
.gitignore
vendored
27
.gitignore
vendored
@ -1 +1,26 @@
|
||||
SOURCES/papi-5.6.0.tar.gz
|
||||
papi-4.1.0.tar.gz
|
||||
/papi-4.1.1.tar.gz
|
||||
/papi-4.1.2.1.tar.gz
|
||||
/papi-4.1.3.tar.gz
|
||||
/papi-4.2.0.tar.gz
|
||||
/papi-4.2.1.tar.gz
|
||||
/papi-4.4.0.tar.gz
|
||||
/papi-5.0.0.tar.gz
|
||||
/papi-5.0.1.tar.gz
|
||||
/papi-5.1.0.2.tar.gz
|
||||
/papi-5.1.1.tar.gz
|
||||
/papi-5.2.0.tar.gz
|
||||
/papi-5.2.0-2.37.g5c1405a.tar.gz
|
||||
/papi-5.2.0-2.63.g91a6fa5.tar.gz
|
||||
/papi-5.2.0-2.1.gff3e15d.tar.gz
|
||||
/papi-5.3.0.tar.gz
|
||||
/papi-5.3.0-1.16.ga7f6159.tar.gz
|
||||
/papi-5.3.2.tar.gz
|
||||
/papi-5.4.0.tar.gz
|
||||
/papi-5.4.1.tar.gz
|
||||
/papi-5.4.3.tar.gz
|
||||
/papi-5.5.0.tar.gz
|
||||
/papi-5.5.1.tar.gz
|
||||
/papi-5.6.0.tar.gz
|
||||
/papi-5.7.0.tar.gz
|
||||
/papi-6.0.0.tar.gz
|
||||
|
@ -1,205 +0,0 @@
|
||||
commit 61616f7ddaaef1b79df85f0a3e969c886604de6c
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Mon Apr 2 17:47:31 2018 -0400
|
||||
|
||||
PAPI preset event support for Intel Knights Mill.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index faa4ae2f5..bb11f61d3 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -851,8 +851,10 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD
|
||||
#
|
||||
#
|
||||
# Intel MIC / Xeon-Phi / Knights Landing
|
||||
+# Intel Knights Mill
|
||||
#
|
||||
CPU,knl
|
||||
+CPU,knm
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES
|
||||
PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||||
@@ -885,6 +887,7 @@ PRESET,PAPI_BR_NTK,DERIVED_SUB,BR_INST_RETIRED:JCC,BR_INST_RETIRED:TAKEN_JCC
|
||||
PRESET,PAPI_RES_STL,NOT_DERIVED,RS_FULL_STALL:ANY
|
||||
PRESET,PAPI_STL_ICY,NOT_DERIVED,NO_ALLOC_CYCLES:ANY
|
||||
#
|
||||
+# End of knl,knm list
|
||||
|
||||
CPU,Intel Core2
|
||||
CPU,Intel Core
|
||||
commit 85003c716d76eff47607fa0967537c6cf63d8348
|
||||
Author: Steve Walk <swalk.cavium@gmail.com>
|
||||
Date: Fri Jun 8 15:50:50 2018 -0400
|
||||
|
||||
enable Cavium ThunderX2 support
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index bb11f61d3..46827f180 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1841,6 +1841,31 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_READ_ACCESS
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_WRITE_ACCESS
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_READ_REFILL
|
||||
PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_WRITE_REFILL
|
||||
+
|
||||
+#####################
|
||||
+# ARM ThunderX2 #
|
||||
+#####################
|
||||
+CPU,arm_thunderx2
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_RETIRED
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_RETIRED
|
||||
+PRESET,PAPI_L1_DCA,DERIVED_ADD,L1D_CACHE_RD,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit 111d01df256f691c2a2d2e14028fa4ebc9e63bed
|
||||
Author: Vince Weaver <vincent.weaver@maine.edu>
|
||||
Date: Tue Jan 22 17:09:29 2019 -0500
|
||||
|
||||
papi_events: add cascade lake X support
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index f5bcf1a46..009074449 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -724,9 +724,11 @@ CPU,hsw_ep
|
||||
CPU,bdw
|
||||
CPU,bdw_ep
|
||||
CPU,skl
|
||||
-CPU,skx
|
||||
# Note, libpfm4 treats Kaby Lake as just a form of skylake
|
||||
CPU,kbl
|
||||
+CPU,skx
|
||||
+# Note, libpfm4 treats Cascade Lake-X as just a form of skylake-X
|
||||
+CPU,clx
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
||||
PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||||
commit d5a1a9ae2e4102e03063e76e242d4a3547cd5df3
|
||||
Author: Vince Weaver <vincent.weaver@maine.edu>
|
||||
Date: Wed Jan 23 16:58:10 2019 -0500
|
||||
|
||||
papi_events: the skylake events are actually split in two, make sure cascadelake gets both cases too
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 009074449..361813847 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -832,6 +832,7 @@ PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH
|
||||
|
||||
CPU,skl
|
||||
CPU,skx
|
||||
+CPU,clx
|
||||
# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE
|
||||
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
|
||||
@@ -849,7 +850,7 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE
|
||||
PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD
|
||||
|
||||
|
||||
-# End of hsw,bdw,skl list
|
||||
+# End of hsw,bdw,skl,clx list
|
||||
#
|
||||
#
|
||||
# Intel MIC / Xeon-Phi / Knights Landing
|
||||
commit c9d0702caf582179cf89f28d987a68e48b9af0e9
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Mon May 20 16:31:45 2019 -0400
|
||||
|
||||
I have added PAPI POWER9 event definitions for PAPI_L2_DCR, PAPI_L2_DCW, PAPI_BR_CN, PAPI_BR_NTK, PAPI_BR_UCN, and PAPI_BR_TKN.
|
||||
These events have been tested. Their patterns of behavior were measured during the execution of performance benchmarks on Summit's POWER9 processors.
|
||||
The patterns of behavior for the corresponding events on Intel Haswell processors were measured during the execution of the same performance benchmarks.
|
||||
The respective events from each architecture behave similarly.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 361813847..f658931ed 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1588,6 +1588,8 @@ PRESET,PAPI_L1_DCA,DERIVED_ADD,PM_LD_REF_L1,PM_ST_CMPL
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS
|
||||
PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,PM_DATA_FROM_L2
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,PM_L2_ST_HIT
|
||||
PRESET,PAPI_L3_DCR,NOT_DERIVED,PM_DATA_FROM_L2MISS
|
||||
PRESET,PAPI_L3_DCM,DERIVED_ADD,PM_DATA_FROM_LMEM,PM_DATA_FROM_RMEM
|
||||
PRESET,PAPI_L3_LDM,DERIVED_ADD,PM_DATA_FROM_LMEM,PM_DATA_FROM_RMEM
|
||||
@@ -1617,6 +1619,10 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,PM_LD_REF_L1,PM_LD_MISS_L1,PM_ST_FIN
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,PM_BRU_FIN
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,PM_TAKEN_BR_MPRED_CMPL
|
||||
PRESET,PAPI_BR_PRC,NOT_DERIVED,PM_BR_PRED
|
||||
+PRESET,PAPI_BR_CN,DERIVED_SUB,PM_BR_CMPL,PM_BR_UNCOND
|
||||
+PRESET,PAPI_BR_NTK,DERIVED_POSTFIX,N0|N1|-|N2|-|,PM_BR_CMPL,PM_BR_UNCOND,PM_BR_TAKEN_CMPL
|
||||
+PRESET,PAPI_BR_UCN,NOT_DERIVED,PM_BR_UNCOND
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,PM_BR_CORECT_PRED_TAKEN_CMPL
|
||||
PRESET,PAPI_FXU_IDL,NOT_DERIVED,PM_FXU_IDLE
|
||||
#
|
||||
CPU,ultra12
|
||||
commit 6440c5995a10db05959325b1192368734bfa7e5b
|
||||
Author: Carl Love <cel@us.ibm.com>
|
||||
Date: Wed Aug 14 07:52:50 2019 -0400
|
||||
|
||||
Per Carl Love, "The POWER9 event PM_BR_TAKEN_CMPL includes conditional and unconditional branches. The equation for event PAPI_BR_NTK should not include the event PM_BR_UNCOND as PM_BR_TAKEN_CMPL already counts unconditional branches. The POWER9 event PM_LD_REF_L1 includes hits and misses to the L1. Thus we should not be adding PM_LS_MISS_L1_ALT when calculating PAPI_LD_INS on POWER9."
|
||||
|
||||
The definitions for these preset events were changed accordingly, and their patterns of behavior were measured during the execution of performance benchmarks on the IBM POWER9 processors on Summit. The patterns of behavior for the corresponding events on the Intel Skylake and Broadwell processors were measured during the execution of the same performance benchmarks. The respective events from each architecture behave similarly. In addition, the new definitions pass the PAPI validation tests.
|
||||
|
||||
Signed-off-by: Daniel Barry <dbarry@vols.utk.edu>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index f658931ed..8df74866e 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1613,14 +1613,14 @@ PRESET,PAPI_TOT_CYC,NOT_DERIVED,PM_RUN_CYC
|
||||
PRESET,PAPI_HW_INT,NOT_DERIVED,PM_EXT_INT
|
||||
PRESET,PAPI_STL_ICY,DERIVED_POSTFIX,N0|N1|-|,PM_RUN_CYC,PM_1PLUS_PPC_DISP
|
||||
PRESET,PAPI_SR_INS,NOT_DERIVED,PM_ST_FIN
|
||||
-PRESET,PAPI_LD_INS,DERIVED_ADD,PM_LD_REF_L1,PM_LD_MISS_L1_ALT
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,PM_LD_REF_L1
|
||||
PRESET,PAPI_LST_INS,NOT_DERIVED,PM_LSU_FIN
|
||||
PRESET,PAPI_LST_INS,DERIVED_ADD,PM_LD_REF_L1,PM_LD_MISS_L1,PM_ST_FIN
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,PM_BRU_FIN
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,PM_TAKEN_BR_MPRED_CMPL
|
||||
PRESET,PAPI_BR_PRC,NOT_DERIVED,PM_BR_PRED
|
||||
PRESET,PAPI_BR_CN,DERIVED_SUB,PM_BR_CMPL,PM_BR_UNCOND
|
||||
-PRESET,PAPI_BR_NTK,DERIVED_POSTFIX,N0|N1|-|N2|-|,PM_BR_CMPL,PM_BR_UNCOND,PM_BR_TAKEN_CMPL
|
||||
+PRESET,PAPI_BR_NTK,DERIVED_POSTFIX,N0|N1|-|,PM_BR_CMPL,PM_BR_TAKEN_CMPL
|
||||
PRESET,PAPI_BR_UCN,NOT_DERIVED,PM_BR_UNCOND
|
||||
PRESET,PAPI_BR_TKN,NOT_DERIVED,PM_BR_CORECT_PRED_TAKEN_CMPL
|
||||
PRESET,PAPI_FXU_IDL,NOT_DERIVED,PM_FXU_IDLE
|
||||
commit 20890adcb59a1c1648cb70be65332c03a3781e1a
|
||||
Author: Anthony Castaldo <TonyCastaldo@icl.utk.edu>
|
||||
Date: Thu Jan 16 16:43:51 2020 -0500
|
||||
|
||||
Added two machine types to papi_events.csv to be in line with
|
||||
libpfm4 update to support amd64_fam17h_zen1 and zen2.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 97446ad2c..8e96adfbd 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -396,6 +396,8 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE
|
||||
#
|
||||
#
|
||||
CPU,amd64_fam17h
|
||||
+CPU,amd64_fam17h_zen1
|
||||
+CPU,amd64_fam17h_zen2
|
||||
#
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
@ -1,796 +0,0 @@
|
||||
commit ae449f73abd0849f05ab3e1f3a64bde0c670c645
|
||||
Author: Anthony <adanalis@icl.utk.edu>
|
||||
Date: Fri Jul 17 12:05:14 2020 -0400
|
||||
|
||||
Separated the cache preset events of AMD Zen1 and Zen2 and added some more.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 8e96adfbd..2325bd4dc 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE
|
||||
#
|
||||
CPU,amd64_fam17h
|
||||
CPU,amd64_fam17h_zen1
|
||||
-CPU,amd64_fam17h_zen2
|
||||
#
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
@@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT
|
||||
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS
|
||||
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
|
||||
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
|
||||
+# Events discovered via CAT
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam17h_zen2
|
||||
+# Events copied from zen1 that also exist on zen2
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
+# Events discovered via CAT
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
+
|
||||
#
|
||||
#
|
||||
CPU,Intel architectural PMU
|
||||
@@ -1877,6 +1897,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
|
||||
+#########################
|
||||
+# ARM Fujitsu A64FX #
|
||||
+#########################
|
||||
+CPU,arm_a64fx
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 13:33:38 2020 -0400
|
||||
|
||||
Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS)
|
||||
for AMD zen2.
|
||||
|
||||
PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
explains that FLOP events require MergeEvent support, which was included
|
||||
in the 5.6 kernel.
|
||||
|
||||
===>>> Hence, a kernel version 5.6 or greater is required.
|
||||
|
||||
NOTE: without the MergeEvent support in the kernel,
|
||||
there is no guarantee that the SSE/AVX FLOP
|
||||
events produce any useful data whatsoever.
|
||||
|
||||
These events have been tested and verified for
|
||||
scalar flops, SSE, AVX, and FMA:
|
||||
|
||||
(1) for one AVX instruction (e.g. _mm256_add_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns
|
||||
a count of 4 (in the case of double precision), and
|
||||
a count of 8 (in the case of single precision).
|
||||
|
||||
(2) for one AVX FMA instruction (e.g. _mm256_macc_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns
|
||||
a count of 8 (in the case of double precision), and
|
||||
a count of 16 (in the case of single precision).
|
||||
|
||||
(3) for one SSE instruction (e.g. _mm_mul_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns
|
||||
a count of 2 (in the case of double precision), and
|
||||
a count of 4 (in the case of single precision).
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 2325bd4dc..2ff3e4d16 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
|
||||
-#
|
||||
-#
|
||||
+# New FLOP event on zen2
|
||||
+# PPR (under section 2.1.15.3. --
|
||||
+# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
+# explains that FLOP events require MergeEvent support, which was included
|
||||
+# in the 5.6 kernel.
|
||||
+# Hence, a kernel version 5.6 or greater is required.
|
||||
+# NOTE: without the MergeEvent support in the kernel, there is no guarantee
|
||||
+# that this SSE/AVX FLOP event produces any useful data whatsoever.
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+
|
||||
+
|
||||
CPU,Intel architectural PMU
|
||||
CPU,ix86arch
|
||||
#
|
||||
commit 35f93252a6e222299c03f2c94912334488e76b02
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 18:40:59 2020 -0400
|
||||
|
||||
Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP)
|
||||
for AMD zen2.
|
||||
|
||||
For unoptimized code (like native MMM), these events may include
|
||||
non-numeric floating-point instructions, e.g. MOVSD: move or merge
|
||||
scalar double-precision floating-point value instructions.
|
||||
|
||||
Tested with:
|
||||
1) SSE double: _mm_mul_pd / _mm_add_pd
|
||||
2) SSE single: _mm_mul_ps / _mm_add_ps
|
||||
3) AVX double: _mm256_mul_pd / _mm256_add_pd
|
||||
4) AVX single: _mm256_mul_ps / _mm256_add_ps
|
||||
5) FMA double: _mm256_macc_pd
|
||||
6) FMA single: _mm256_macc_pd
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 2ff3e4d16..60a64564d 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
+# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
+PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 344f6493425d865577508ff32b6f65516b1b4394
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 19:03:31 2020 -0400
|
||||
|
||||
Added missing 'PRESET' to csv file.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 60a64564d..724d520f0 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
-PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 4616aa717c5301a9a478876661eb8ac1f18c0333
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Oct 8 11:36:23 2020 -0400
|
||||
|
||||
For zen2, since FP_OPS counts both single- and double-prec operations
|
||||
correctly, we don't need to confuse the user with additional
|
||||
DP_OPS and SP_OPS events. So, I'm taking them out.
|
||||
|
||||
Same applies for events counting FP instructions.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 724d520f0..9ebf557e1 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
# NOTE: without the MergeEvent support in the kernel, there is no guarantee
|
||||
# that this SSE/AVX FLOP event produces any useful data whatsoever.
|
||||
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
-PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
-PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+# Since FP_OPS counts both single- and double-prec operations
|
||||
+# correctly, we don't need to confuse the user with additional
|
||||
+# DP_OPS and SP_OPS events. So, I'm taking them out.
|
||||
+#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+#
|
||||
# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+# Since FP_INS counts both single- and double-prec instuctions
|
||||
+# correctly, we don't need to confuse the user with additional
|
||||
+# VEC_DP and VEC_SP events. So, I'm taking them out.
|
||||
+#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 274219e85ba8adcd2e9c78507adf7edb05b71daa
|
||||
Author: Sebastian Mobo <smobo@vols.utk.edu>
|
||||
Date: Thu Oct 8 13:40:21 2020 -0400
|
||||
|
||||
Added instruction-cache preset events for the Zen2.
|
||||
|
||||
Signed-off-by: Anthony <adanalis@icl.utk.edu>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 9ebf557e1..fd75f9371 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
-
|
||||
+#
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+#
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
|
||||
# New FLOP event on zen2
|
||||
# PPR (under section 2.1.15.3. --
|
||||
# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
commit b87ac4beda096086e0040f8ec1b44c4791a9739c
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Mon Dec 14 14:06:22 2020 +0900
|
||||
|
||||
Corrected typo for A64FX support (PAPI_L2_DCH is a typo of PAPI_L2_DCA)
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index fd75f9371..164f05641 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1937,7 +1937,7 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
|
||||
#
|
||||
commit 869864f813f0681b5c9a4b65de2135c8708a2afb
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Mon Dec 14 19:34:59 2020 +0900
|
||||
|
||||
Add or modify various A64FX support events, including floating point events (PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS).
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 164f05641..9192b1041 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1930,15 +1930,46 @@ PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
#########################
|
||||
CPU,arm_a64fx
|
||||
#
|
||||
+PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_DCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_TCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L2I_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L2D_TLB_REFILL,L2I_TLB_REFILL
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC
|
||||
|
||||
#
|
||||
CPU,mips_74k
|
||||
commit 7a3c22763ef2ba00a2b8cb069c3501f35ecb13de
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Dec 15 13:43:43 2020 +0900
|
||||
|
||||
modify PAPI_FP_INS and PAPI_VEC_INS for A64FX supports
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 9192b1041..7b4ceb674 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1941,11 +1941,11 @@ PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,FP_SPEC
|
||||
PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
-PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
|
||||
PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
commit 530d4763fb8e6dd52109387bd58c8c1305fd6b63
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Fri Feb 12 15:01:21 2021 +0900
|
||||
|
||||
remove PAPI_L1_DCA and PAPI_L1_DCH for a64fx
|
||||
|
||||
There seems to be a problem with PAPI_L1_DCA and PAPI_L1_DCH for a64fx that prefetch overcounts.
|
||||
I delete (comment out) PAPI_L1_DCA and PAPI_L1_DCH for a64fx from the papi_events.csv file.
|
||||
I will issue the pullrequest again once I have identified how to handle the overcount.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 7b4ceb674..0f5ec8344 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1949,8 +1949,8 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
|
||||
PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
-PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
-PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
+#PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+#PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
commit 340f68940234f2db181147fc249907b4f1293e62
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Feb 16 17:16:24 2021 +0900
|
||||
|
||||
remove PAPI_L1_TCA and PAPI_L1_TCH for a64fx
|
||||
|
||||
PAPI_L1_TCA and PAPI_L1_TCH for a64fx measure L1D_CACHE just like PAPI_L1_DCA and PAPI_L1_DCH,
|
||||
so I delete (comment out) PAPI_L1_TCA and PAPI_L1_TCH for a64fx from the papi_events.csv file.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 0f5ec8344..4ef647959 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1955,8 +1955,8 @@ PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
-PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
+#PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
+#PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
commit 02f34baafb868d183f21bebfd3c46574847b9929
|
||||
Author: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
|
||||
Date: Tue May 18 02:51:56 2021 +0530
|
||||
|
||||
Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip
|
||||
|
||||
Signed-off-by: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 4ef647959..d9e9da8a3 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X
|
||||
# VEC_DP and VEC_SP events. So, I'm taking them out.
|
||||
#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam19h_zen3
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
|
||||
+# RETIRED_SSE_AVX_FLOPS requires MergeEvent support.
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
|
||||
+PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
|
||||
+PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
+PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 6964aa356fa606f320c7b871123aceb5c1f21999
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Aug 24 14:17:29 2021 +0900
|
||||
|
||||
Fix the PAPI_FUL_CCY setting for a64fx
|
||||
|
||||
In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect.
|
||||
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT
|
||||
|
||||
The correct settings are:.
|
||||
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 4ef647959..74deb712f 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
|
||||
PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
-PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
|
||||
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Fri Mar 4 15:41:30 2022 +0900
|
||||
|
||||
Add PAPI idle-related preset events for a64fx
|
||||
|
||||
For a64fx, add four PAPI idle-related preset events
|
||||
(PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL).
|
||||
|
||||
PAPI_BRU_IDL = BR_COMP_WAIT
|
||||
PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT
|
||||
PAPI_FPU_IDL = FL_COMP_WAIT
|
||||
PAPI_LSU_IDL = LD_COMP_WAIT
|
||||
|
||||
The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT,
|
||||
and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting"
|
||||
on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:.
|
||||
https://github.com/fujitsu/A64FX/blob/master/doc
|
||||
|
||||
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 74deb712f..1cd498e91 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
|
||||
+PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT
|
||||
+PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT
|
||||
+PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT
|
||||
+PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
commit 3c5364839f583185c1e8dca58d5fe36c9ec82876
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Tue Aug 30 23:17:30 2022 +0000
|
||||
|
||||
papi_avail: add presets for Intel Ice Lake SP
|
||||
|
||||
Define preset events for the Intel Ice Lake SP processor.
|
||||
These presets have been verified using the Counter Analysis Toolkit benchmarks.
|
||||
|
||||
These changes have been tested on the Intel Ice Lake architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index a013f58af..8f23e030c 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -929,6 +929,63 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD
|
||||
|
||||
# End of hsw,bdw,skl,clx list
|
||||
#
|
||||
+
|
||||
+# Intel Ice Lake SP events
|
||||
+CPU,icx
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
||||
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||||
+# Loads and stores
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_LOADS
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_STORES
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_INST_RETIRED:ALL_LOADS,MEM_INST_RETIRED:ALL_STORES
|
||||
+# L1 cache
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
|
||||
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD
|
||||
+# L2 cache
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
+#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
|
||||
+#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
|
||||
+PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
+#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
|
||||
+PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
|
||||
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES
|
||||
+PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD
|
||||
+# L3 cache
|
||||
+PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD
|
||||
+PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
||||
+#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
|
||||
+PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_RETIRED:L3_MISS
|
||||
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES
|
||||
+PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES
|
||||
+# SMP
|
||||
+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
|
||||
+# Branches
|
||||
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN
|
||||
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||||
+#FLOPs
|
||||
+# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
+# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+# End of icx list
|
||||
+
|
||||
#
|
||||
# Intel MIC / Xeon-Phi / Knights Landing
|
||||
# Intel Knights Mill
|
||||
commit d4da29b07befb9f7c11e351dbfef835b74cdd67a
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 17:11:37 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse N1
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 8f23e030c..a4d5a9756 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2059,6 +2059,41 @@ PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIX
|
||||
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC
|
||||
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse N1 #
|
||||
+#########################
|
||||
+CPU,arm_n1
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit 88e686f877abcf19c5f50d4e23cbf8ea920a40b6
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 14:54:41 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse V1
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index a4d5a9756..207d6d1db 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse V1 #
|
||||
+#########################
|
||||
+CPU,arm_v1
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit e911f951115bb551925c5b07e7f5b721d5fe3bbe
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 17:14:18 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse N2
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 207d6d1db..d27d956c1 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse N2 #
|
||||
+#########################
|
||||
+CPU,arm_n2
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#########################
|
||||
# ARM Neoverse V1 #
|
||||
#########################
|
||||
commit 05dc580247cb18fca882a33d8e356d79032d2ed1
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 17:08:35 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse V2
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index d27d956c1..549e337c7 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2164,6 +2164,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse V2 #
|
||||
+#########################
|
||||
+CPU,arm_v2
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
@ -1,321 +0,0 @@
|
||||
commit b969d25f2a87a53365e3e9a040533b093544a05d
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Apr 3 22:30:14 2023 +0000
|
||||
|
||||
Update Neoverse V2 events
|
||||
|
||||
Add/remove PAPI events to match available hardware counters
|
||||
All tests pass on NVIDIA Grace
|
||||
|
||||
Disclaimer:
|
||||
The PAPI team was not able to verify the functionality included in this
|
||||
commit.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 549e337c..3089d2d4 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2170,34 +2170,113 @@ PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
CPU,arm_v2
|
||||
#
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_INT_INS,NOT_DERIVED,DP_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_TOT_IIS,Instructions issued
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CNT_CYCLES
|
||||
+PRESET,PAPI_STL_CCY,NOT_DERIVED,STALL
|
||||
+#NOT_IMPLEMENTED,PAPI_FUL_CCY,Cycles with maximum instructions completed
|
||||
+#NOT_IMPLEMENTED,PAPI_FUL_ICY,Cycles with maximum instruction issue
|
||||
+#NOT_IMPLEMENTED,PAPI_FXU_IDL,Cycles integer units are idle
|
||||
+#NOT_IMPLEMENTED,PAPI_LSU_IDL,Cycles load/store units are idle
|
||||
+#NOT_IMPLEMENTED,PAPI_MEM_RCY,Cycles Stalled Waiting for memory Reads
|
||||
+#NOT_IMPLEMENTED,PAPI_MEM_SCY,Cycles Stalled Waiting for memory accesses
|
||||
+#NOT_IMPLEMENTED,PAPI_MEM_WCY,Cycles Stalled Waiting for memory writes
|
||||
+#NOT_IMPLEMENTED,PAPI_FP_STAL,Cycles the FP unit(s) are stalled
|
||||
+#NOT_IMPLEMENTED,PAPI_FPU_IDL,Cycles floating point units are idle
|
||||
+#NOT_IMPLEMENTED,PAPI_BRU_IDL,Cycles branch units are idle
|
||||
+PRESET,PAPI_STL_ICY,NOT_DERIVED,STALL
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_SP_OPS,Floating point operations; optimized to count scaled single precision vector operations
|
||||
+#NOT_IMPLEMENTED,PAPI_DP_OPS,Floating point operations; optimized to count scaled double precision vector operations
|
||||
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_HP_SPEC,FP_SP_SPEC,FP_DP_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_FAD_INS,Floating point add instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FDV_INS,Floating point divide instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FMA_INS,FMA instructions completed
|
||||
+#NOT_IMPLEMENTED,PAPI_FML_INS,Floating point multiply instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FNV_INS,Floating point inverse instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FSQ_INS,Floating point square root instructions
|
||||
PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_VEC_DP,Double precision vector/SIMD instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_VEC_SP,Single precision vector/SIMD instructions
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
-PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
-PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
-PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_CN,Conditional branch instructions
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_RETIRED,BR_MIS_PRED_RETIRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED_RETIRED
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_NTK,Conditional branch instructions not taken
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_TKN,Conditional branch instructions taken
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_UCN,Unconditional branch instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_BTAC_M,Branch target address cache misses
|
||||
PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
-PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_ICR,Level 1 instruction cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_ICW,Level 1 instruction cache writes
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_LDM,Level 1 load misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_STM,Level 1 store misses
|
||||
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_TCR,Level 1 total cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_TCW,Level 1 total cache writes
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
-PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
-PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_WR
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICA,Level 2 instruction cache accesses
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICH,Level 2 instruction cache hits
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICM,Level 2 instruction cache misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICR,Level 2 instruction cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICW,Level 2 instruction cache writes
|
||||
+PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L3_TCA,NOT_DERIVED,L3D_CACHE
|
||||
+PRESET,PAPI_L3_DCA,NOT_DERIVED,L3D_CACHE
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_DCH,Level 3 data cache hits
|
||||
+PRESET,PAPI_L3_DCM,NOT_DERIVED,L3D_CACHE_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_DCR,Level 3 data cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_DCW,Level 3 data cache writes
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICA,Level 3 instruction cache accesses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICH,Level 3 instruction cache hits
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICM,Level 3 instruction cache misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICR,Level 3 instruction cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICW,Level 3 instruction cache writes
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_LDM,Level 3 load misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_STM,Level 3 store misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCH,Level 3 total cache hits
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCM,Level 3 cache misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCR,Level 3 total cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCW,Level 3 total cache writes
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_TLB_SD,Translation lookaside buffer shootdowns
|
||||
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L1D_TLB_REFILL,L2D_TLB_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_CLN,Requests for exclusive access to clean cache line
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_INV,Requests for cache line invalidation
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_ITV,Requests for cache line intervention
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_SHR,Requests for exclusive access to shared cache line
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_SNP,Requests for a snoop
|
||||
+#NOT_IMPLEMENTED,PAPI_CSR_FAL,Failed store conditional instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_CSR_SUC,Successful store conditional instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_CSR_TOT,Total store conditional instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_PRF_DM,Data prefetch cache misses
|
||||
|
||||
#
|
||||
CPU,mips_74k
|
||||
|
||||
commit 15f32cb3a2e6bdd9e51aa4043842f0130e9dcf24
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Wed Jun 7 14:38:39 2023 +0000
|
||||
|
||||
add branch presets for Zen3 and Zen4
|
||||
|
||||
These changes include all branching preset events for Zen3 and Zen4,
|
||||
validated using the Counter Analysis Toolkit.
|
||||
|
||||
For Zen3, PAPI_BR_TKN was modified to exclude unconditional branches
|
||||
taken, in order to adhere to the preset's meaning.
|
||||
|
||||
These changes have been tested on the AMD Zen3 and Zen4 architectures.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 3089d2d4..319cf82c 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -488,8 +488,12 @@ CPU,amd64_fam19h_zen3
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
-PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_UCN,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_TKN,DERIVED_POSTFIX,N0|N1|-|N2|+|,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
|
||||
PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
|
||||
@@ -509,6 +513,16 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
|
||||
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
|
||||
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam19h_zen4
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit da93ed4dd1fadb70ccee62a976597ff431c9f58c
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Mon Jun 12 17:27:59 2023 +0000
|
||||
|
||||
add flops presets for Zen4
|
||||
|
||||
These changes include FLOPs presets for Zen4, validated using the
|
||||
Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the AMD Zen4 architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 319cf82c..f6a40a35 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -523,6 +523,14 @@ PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDI
|
||||
PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL,RETIRED_FP_OPS_BY_TYPE:SCALAR_ALL
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL
|
||||
+PRESET,PAPI_FMA_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MAC,RETIRED_FP_OPS_BY_TYPE:SCALAR_MAC
|
||||
+PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS_BY_TYPE:SCALAR_MUL
|
||||
+PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD
|
||||
+PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV
|
||||
+PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit a31c3a4e9788e03fee113263a9f94bd638a66721
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Wed Jun 21 15:13:47 2023 +0000
|
||||
|
||||
add cycles and instructions presets for Zen4
|
||||
|
||||
These changes include the 'total cycles' and 'instructions completed'
|
||||
presets for Zen4, validated using the Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the AMD Zen4 architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index f6a40a35..86e11fe6 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -531,6 +531,8 @@ PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS
|
||||
PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD
|
||||
PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV
|
||||
PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit 94303410ce97a84408b0b2d727701a60c6f137aa
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Sun Jul 23 15:38:36 2023 +0000
|
||||
|
||||
add various Sapphire Rapids presets
|
||||
|
||||
These changes include cycles, instructions, branching, and FLOPs presets
|
||||
for Intel Sapphire Rapids, validated using the Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the Intel Sapphire Rapids architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 86e11fe6..eac0855f 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1010,6 +1010,29 @@ PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_
|
||||
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
# End of icx list
|
||||
|
||||
+# Intel Sapphire Rapids events
|
||||
+CPU,spr
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
||||
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||||
+# FLOPs
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+# Branches
|
||||
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN
|
||||
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||||
+# End of spr list
|
||||
+
|
||||
#
|
||||
# Intel MIC / Xeon-Phi / Knights Landing
|
||||
# Intel Knights Mill
|
||||
|
||||
commit 42b14987ca1a7028b6cf6fdc190a2fa6a0fd8e18
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Tue Jul 25 12:16:56 2023 +0000
|
||||
|
||||
add more Ice Lake FLOPs presets
|
||||
|
||||
Since there are enough counters available to monitor both single- and
|
||||
double-precision floating-point events, PAPI_FP_OPS, PAPI_FP_INS, and
|
||||
PAPI_VEC_INS are all defined.
|
||||
These presets have been validated using the Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the Intel Ice Lake architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index eac0855f..df82ac1c 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1006,8 +1006,11 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||||
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
|
||||
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
# End of icx list
|
||||
|
||||
# Intel Sapphire Rapids events
|
@ -1,637 +0,0 @@
|
||||
commit 9a1f2d897f4086bc1d60102de984c849445b5e97
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Feb 21 19:18:40 2023 +0900
|
||||
|
||||
PAPI_read performance improvement for the arm64 processor
|
||||
|
||||
We developed PAPI_read performance improvements for the arm64 processor
|
||||
with a plan to port direct user space PMU register access processing from
|
||||
libperf to the papi library without using libperf.
|
||||
|
||||
The workaround has been implemented that stores the counter value at the
|
||||
time of reset and subtracts the counter value at the time of reset from
|
||||
the read counter value at the next read.
|
||||
When reset processing is called, the value of pc->offset is cleared to 0,
|
||||
and only the counter value read from the PMU counter is referenced.
|
||||
There was no problem with the counters FAILED with negative values during
|
||||
the multiplex+reset test, except for sdsc2-mpx and sdsc4-mpx.
|
||||
To apply the workaround only during reset, the _pe_reset function call sets
|
||||
the reset_flag and the next _pe_start function call clears the reset_flag.
|
||||
The workaround works if the mmap_read_self function is called between calls
|
||||
to the _pe_reset function and the next call to the _pe_start function.
|
||||
|
||||
Switching PMU register direct access from user space from OFF to ON is done by
|
||||
changing the setting of the kernel variable "/proc/sys/kernel/perf_user_access".
|
||||
|
||||
Setting PMU Register Direct Access from User Space Off
|
||||
$ echo 0 > /proc/sys/kernel/perf_user_access
|
||||
$ cat /proc/sys/kernel/perf_user_access
|
||||
0
|
||||
|
||||
Setting PMU Register Direct Access from User Space ON
|
||||
$ echo 1 > /proc/sys/kernel/perf_user_access
|
||||
$ cat /proc/sys/kernel/perf_user_access
|
||||
1
|
||||
|
||||
Performance of PAPI_read has been improved as expected from the execution
|
||||
result of the papi_cost command.
|
||||
|
||||
Improvement effect of switching PMU register direct access from user space
|
||||
from OFF to ON
|
||||
|
||||
Total cost for PAPI_read (2 counters) over 1000000 iterations
|
||||
min cycles: 689 -> 28
|
||||
max cycles: 3876 -> 1323
|
||||
mean cycles: 724.471979 -> 28.888076
|
||||
|
||||
Total cost for PAPI_read_ts (2 counters) over 1000000 iterations
|
||||
min cycles: 693 -> 29
|
||||
max cycles: 4066 -> 3718
|
||||
mean cycles: 726.753003 -> 29.977226
|
||||
|
||||
Total cost for PAPI_read (1 derived_[add|sub] counter) over 1000000 iterations
|
||||
min cycles: 698 -> 28
|
||||
max cycles: 7406 -> 2346
|
||||
mean cycles: 728.527079 -> 28.880691
|
||||
|
||||
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
|
||||
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
|
||||
index b4877d18e..331288c55 100644
|
||||
--- a/src/components/perf_event/perf_event.c
|
||||
+++ b/src/components/perf_event/perf_event.c
|
||||
@@ -682,6 +682,12 @@ set_up_mmap( pe_control_t *ctl, int evt_idx)
|
||||
|
||||
|
||||
|
||||
+/* Request user access for arm64 */
|
||||
+static inline void arm64_request_user_access(struct perf_event_attr *hw_event)
|
||||
+{
|
||||
+ hw_event->config1=0x2; /* Request user access */
|
||||
+}
|
||||
+
|
||||
/* Open all events in the control state */
|
||||
static int
|
||||
open_pe_events( pe_context_t *ctx, pe_control_t *ctl )
|
||||
@@ -735,6 +741,11 @@ open_pe_events( pe_context_t *ctx, pe_control_t *ctl )
|
||||
if (( i == 0 ) || (ctl->multiplexed)) {
|
||||
ctl->events[i].attr.pinned = !ctl->multiplexed;
|
||||
ctl->events[i].attr.disabled = 1;
|
||||
+#if defined(__aarch64__)
|
||||
+ if (_perf_event_vector.cmp_info.fast_counter_read) {
|
||||
+ arm64_request_user_access(&ctl->events[i].attr);
|
||||
+ }
|
||||
+#endif
|
||||
ctl->events[i].group_leader_fd=-1;
|
||||
ctl->events[i].attr.read_format = get_read_format(
|
||||
ctl->multiplexed,
|
||||
@@ -743,6 +754,11 @@ open_pe_events( pe_context_t *ctx, pe_control_t *ctl )
|
||||
} else {
|
||||
ctl->events[i].attr.pinned=0;
|
||||
ctl->events[i].attr.disabled = 0;
|
||||
+#if defined(__aarch64__)
|
||||
+ if (_perf_event_vector.cmp_info.fast_counter_read) {
|
||||
+ arm64_request_user_access(&ctl->events[i].attr);
|
||||
+ }
|
||||
+#endif
|
||||
ctl->events[i].group_leader_fd=ctl->events[0].event_fd;
|
||||
ctl->events[i].attr.read_format = get_read_format(
|
||||
ctl->multiplexed,
|
||||
@@ -1047,8 +1063,16 @@ _pe_reset( hwd_context_t *ctx, hwd_control_state_t *ctl )
|
||||
|
||||
/* We need to reset all of the events, not just the group leaders */
|
||||
for( i = 0; i < pe_ctl->num_events; i++ ) {
|
||||
- ret = ioctl( pe_ctl->events[i].event_fd,
|
||||
- PERF_EVENT_IOC_RESET, NULL );
|
||||
+ if (_perf_event_vector.cmp_info.fast_counter_read) {
|
||||
+ ret = ioctl( pe_ctl->events[i].event_fd,
|
||||
+ PERF_EVENT_IOC_RESET, NULL );
|
||||
+ pe_ctl->reset_counts[i] = mmap_read_reset_count(
|
||||
+ pe_ctl->events[i].mmap_buf);
|
||||
+ pe_ctl->reset_flag = 1;
|
||||
+ } else {
|
||||
+ ret = ioctl( pe_ctl->events[i].event_fd,
|
||||
+ PERF_EVENT_IOC_RESET, NULL );
|
||||
+ }
|
||||
if ( ret == -1 ) {
|
||||
PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) "
|
||||
"returned error, Linux says: %s",
|
||||
@@ -1119,6 +1143,8 @@ _pe_rdpmc_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
|
||||
for ( i = 0; i < pe_ctl->num_events; i++ ) {
|
||||
|
||||
count = mmap_read_self(pe_ctl->events[i].mmap_buf,
|
||||
+ pe_ctl->reset_flag,
|
||||
+ pe_ctl->reset_counts[i],
|
||||
&enabled,&running);
|
||||
|
||||
if (count==0xffffffffffffffffULL) {
|
||||
@@ -1438,6 +1464,10 @@ _pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl )
|
||||
pe_ctl->events[i].event_fd);
|
||||
ret=ioctl( pe_ctl->events[i].event_fd,
|
||||
PERF_EVENT_IOC_ENABLE, NULL) ;
|
||||
+ if (_perf_event_vector.cmp_info.fast_counter_read) {
|
||||
+ pe_ctl->reset_counts[i] = 0LL;
|
||||
+ pe_ctl->reset_flag = 0;
|
||||
+ }
|
||||
|
||||
/* ioctls always return -1 on failure */
|
||||
if (ret == -1) {
|
||||
@@ -2297,6 +2327,29 @@ _pe_shutdown_component( void ) {
|
||||
}
|
||||
|
||||
|
||||
+#if defined(__aarch64__)
|
||||
+/* Check access PMU counter from User space for arm64 support */
|
||||
+static int _pe_detect_arm64_access(void) {
|
||||
+
|
||||
+ FILE *fff;
|
||||
+ int perf_user_access;
|
||||
+ int retval;
|
||||
+
|
||||
+ fff=fopen("/proc/sys/kernel/perf_user_access","r");
|
||||
+ if (fff==NULL) {
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ /* 1 means you can access PMU counter from User space */
|
||||
+ /* 0 means you can not access PMU counter from User space */
|
||||
+ retval=fscanf(fff,"%d",&perf_user_access);
|
||||
+ if (retval!=1) fprintf(stderr,"Error reading /proc/sys/kernel/perf_user_access\n");
|
||||
+ fclose(fff);
|
||||
+
|
||||
+ return perf_user_access;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/* Check the mmap page for rdpmc support */
|
||||
static int _pe_detect_rdpmc(void) {
|
||||
|
||||
@@ -2305,10 +2358,13 @@ static int _pe_detect_rdpmc(void) {
|
||||
void *addr;
|
||||
struct perf_event_mmap_page *our_mmap;
|
||||
int page_size=getpagesize();
|
||||
+#if defined(__aarch64__)
|
||||
+ int retval;
|
||||
+#endif
|
||||
|
||||
-#if defined(__i386__) || defined (__x86_64__)
|
||||
+#if defined(__i386__) || defined (__x86_64__) || defined(__aarch64__)
|
||||
#else
|
||||
- /* We only support rdpmc on x86 for now */
|
||||
+ /* We support rdpmc on x86 and arm64 for now */
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
@@ -2318,12 +2374,23 @@ static int _pe_detect_rdpmc(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
+#if defined(__aarch64__)
|
||||
+ /* Detect if we can use PMU counter from User space for arm64 */
|
||||
+ retval = _pe_detect_arm64_access();
|
||||
+ if (retval == 0) {
|
||||
+ return 0;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
/* Create a fake instructions event so we can read a mmap page */
|
||||
memset(&pe,0,sizeof(struct perf_event_attr));
|
||||
|
||||
pe.type=PERF_TYPE_HARDWARE;
|
||||
pe.size=sizeof(struct perf_event_attr);
|
||||
pe.config=PERF_COUNT_HW_INSTRUCTIONS;
|
||||
+#if defined(__aarch64__)
|
||||
+ arm64_request_user_access(&pe);
|
||||
+#endif
|
||||
pe.exclude_kernel=1;
|
||||
pe.disabled=1;
|
||||
|
||||
diff --git a/src/components/perf_event/perf_event_lib.h b/src/components/perf_event/perf_event_lib.h
|
||||
index 0c50ab9f0..cfba8ac49 100644
|
||||
--- a/src/components/perf_event/perf_event_lib.h
|
||||
+++ b/src/components/perf_event/perf_event_lib.h
|
||||
@@ -36,6 +36,8 @@ typedef struct {
|
||||
pid_t tid; /* thread we are monitoring */
|
||||
pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS];
|
||||
long long counts[PERF_EVENT_MAX_MPX_COUNTERS];
|
||||
+ unsigned int reset_flag;
|
||||
+ long long reset_counts[PERF_EVENT_MAX_MPX_COUNTERS];
|
||||
} pe_control_t;
|
||||
|
||||
|
||||
diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h
|
||||
index 92dca4fd0..097286865 100644
|
||||
--- a/src/components/perf_event/perf_helpers.h
|
||||
+++ b/src/components/perf_event/perf_helpers.h
|
||||
@@ -29,6 +29,74 @@ sys_perf_event_open( struct perf_event_attr *hw_event,
|
||||
return ret;
|
||||
}
|
||||
|
||||
+
|
||||
+/*
|
||||
+ * We define u64 as uint64_t for every architecture
|
||||
+ * so that we can print it with "%"PRIx64 without getting warnings.
|
||||
+ *
|
||||
+ * typedef __u64 u64;
|
||||
+ * typedef __s64 s64;
|
||||
+ */
|
||||
+typedef uint64_t u64;
|
||||
+typedef int64_t s64;
|
||||
+
|
||||
+typedef __u32 u32;
|
||||
+typedef __s32 s32;
|
||||
+
|
||||
+typedef __u16 u16;
|
||||
+typedef __s16 s16;
|
||||
+
|
||||
+typedef __u8 u8;
|
||||
+typedef __s8 s8;
|
||||
+
|
||||
+
|
||||
+#ifdef __SIZEOF_INT128__
|
||||
+static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift)
|
||||
+{
|
||||
+ return (u64)(((unsigned __int128)a * b) >> shift);
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+#ifdef __i386__
|
||||
+static inline u64 mul_u32_u32(u32 a, u32 b)
|
||||
+{
|
||||
+ u32 high, low;
|
||||
+
|
||||
+ asm ("mull %[b]" : "=a" (low), "=d" (high)
|
||||
+ : [a] "a" (a), [b] "rm" (b) );
|
||||
+
|
||||
+ return low | ((u64)high) << 32;
|
||||
+}
|
||||
+#else
|
||||
+static inline u64 mul_u32_u32(u32 a, u32 b)
|
||||
+{
|
||||
+ return (u64)a * b;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift)
|
||||
+{
|
||||
+ u32 ah, al;
|
||||
+ u64 ret;
|
||||
+
|
||||
+ al = a;
|
||||
+ ah = a >> 32;
|
||||
+
|
||||
+ ret = mul_u32_u32(al, b) >> shift;
|
||||
+ if (ah)
|
||||
+ ret += mul_u32_u32(ah, b) << (32 - shift);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+#endif /* __SIZEOF_INT128__ */
|
||||
+
|
||||
+#ifndef ARRAY_SIZE
|
||||
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
+#endif
|
||||
+
|
||||
+
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
|
||||
@@ -52,19 +120,140 @@ static inline unsigned long long rdpmc(unsigned int counter) {
|
||||
|
||||
#define barrier() __asm__ volatile("" ::: "memory")
|
||||
|
||||
+
|
||||
+#elif defined(__aarch64__)
|
||||
+
|
||||
+/* Indirect stringification. Doing two levels allows the parameter to be a
|
||||
+ * macro itself. For example, compile with -DFOO=bar, __stringify(FOO)
|
||||
+ * converts to "bar".
|
||||
+ */
|
||||
+
|
||||
+#define __stringify_1(x...) #x
|
||||
+#define __stringify(x...) __stringify_1(x)
|
||||
+
|
||||
+#define read_sysreg(r) ({ \
|
||||
+ u64 __val; \
|
||||
+ asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
|
||||
+ __val; \
|
||||
+})
|
||||
+
|
||||
+static u64 read_pmccntr(void)
|
||||
+{
|
||||
+ return read_sysreg(pmccntr_el0);
|
||||
+}
|
||||
+
|
||||
+#define PMEVCNTR_READ(idx) \
|
||||
+ static u64 read_pmevcntr_##idx(void) { \
|
||||
+ return read_sysreg(pmevcntr##idx##_el0); \
|
||||
+ }
|
||||
+
|
||||
+PMEVCNTR_READ(0);
|
||||
+PMEVCNTR_READ(1);
|
||||
+PMEVCNTR_READ(2);
|
||||
+PMEVCNTR_READ(3);
|
||||
+PMEVCNTR_READ(4);
|
||||
+PMEVCNTR_READ(5);
|
||||
+PMEVCNTR_READ(6);
|
||||
+PMEVCNTR_READ(7);
|
||||
+PMEVCNTR_READ(8);
|
||||
+PMEVCNTR_READ(9);
|
||||
+PMEVCNTR_READ(10);
|
||||
+PMEVCNTR_READ(11);
|
||||
+PMEVCNTR_READ(12);
|
||||
+PMEVCNTR_READ(13);
|
||||
+PMEVCNTR_READ(14);
|
||||
+PMEVCNTR_READ(15);
|
||||
+PMEVCNTR_READ(16);
|
||||
+PMEVCNTR_READ(17);
|
||||
+PMEVCNTR_READ(18);
|
||||
+PMEVCNTR_READ(19);
|
||||
+PMEVCNTR_READ(20);
|
||||
+PMEVCNTR_READ(21);
|
||||
+PMEVCNTR_READ(22);
|
||||
+PMEVCNTR_READ(23);
|
||||
+PMEVCNTR_READ(24);
|
||||
+PMEVCNTR_READ(25);
|
||||
+PMEVCNTR_READ(26);
|
||||
+PMEVCNTR_READ(27);
|
||||
+PMEVCNTR_READ(28);
|
||||
+PMEVCNTR_READ(29);
|
||||
+PMEVCNTR_READ(30);
|
||||
+
|
||||
+/*
|
||||
+ * Read a value direct from PMEVCNTR<idx>
|
||||
+ */
|
||||
+static u64 rdpmc(unsigned int counter)
|
||||
+{
|
||||
+ static u64 (* const read_f[])(void) = {
|
||||
+ read_pmevcntr_0,
|
||||
+ read_pmevcntr_1,
|
||||
+ read_pmevcntr_2,
|
||||
+ read_pmevcntr_3,
|
||||
+ read_pmevcntr_4,
|
||||
+ read_pmevcntr_5,
|
||||
+ read_pmevcntr_6,
|
||||
+ read_pmevcntr_7,
|
||||
+ read_pmevcntr_8,
|
||||
+ read_pmevcntr_9,
|
||||
+ read_pmevcntr_10,
|
||||
+ read_pmevcntr_11,
|
||||
+ read_pmevcntr_13,
|
||||
+ read_pmevcntr_12,
|
||||
+ read_pmevcntr_14,
|
||||
+ read_pmevcntr_15,
|
||||
+ read_pmevcntr_16,
|
||||
+ read_pmevcntr_17,
|
||||
+ read_pmevcntr_18,
|
||||
+ read_pmevcntr_19,
|
||||
+ read_pmevcntr_20,
|
||||
+ read_pmevcntr_21,
|
||||
+ read_pmevcntr_22,
|
||||
+ read_pmevcntr_23,
|
||||
+ read_pmevcntr_24,
|
||||
+ read_pmevcntr_25,
|
||||
+ read_pmevcntr_26,
|
||||
+ read_pmevcntr_27,
|
||||
+ read_pmevcntr_28,
|
||||
+ read_pmevcntr_29,
|
||||
+ read_pmevcntr_30,
|
||||
+ read_pmccntr
|
||||
+ };
|
||||
+
|
||||
+ if (counter < ARRAY_SIZE(read_f))
|
||||
+ return (read_f[counter])();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static u64 rdtsc(void) { return read_sysreg(cntvct_el0); }
|
||||
+
|
||||
+#define barrier() asm volatile("dmb ish" : : : "memory")
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
|
||||
+
|
||||
+static inline u64 adjust_cap_usr_time_short(u64 a, u64 b, u64 c)
|
||||
+{
|
||||
+ u64 ret;
|
||||
+ ret = b + ((a - b) & c);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
/* based on the code in include/uapi/linux/perf_event.h */
|
||||
static inline unsigned long long mmap_read_self(void *addr,
|
||||
+ int user_reset_flag,
|
||||
+ unsigned long long reset,
|
||||
unsigned long long *en,
|
||||
unsigned long long *ru) {
|
||||
|
||||
struct perf_event_mmap_page *pc = addr;
|
||||
|
||||
- uint32_t seq, time_mult, time_shift, index, width;
|
||||
+ uint32_t seq, time_mult = 0, time_shift = 0, index, width;
|
||||
int64_t count;
|
||||
uint64_t enabled, running;
|
||||
- uint64_t cyc, time_offset;
|
||||
+ uint64_t cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
|
||||
int64_t pmc = 0;
|
||||
- uint64_t quot, rem;
|
||||
uint64_t delta = 0;
|
||||
|
||||
|
||||
@@ -96,12 +285,11 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
time_mult = pc->time_mult;
|
||||
time_shift = pc->time_shift;
|
||||
|
||||
- quot=(cyc>>time_shift);
|
||||
- rem = cyc & (((uint64_t)1 << time_shift) - 1);
|
||||
- delta = time_offset + (quot * time_mult) +
|
||||
- ((rem * time_mult) >> time_shift);
|
||||
+ if (pc->cap_user_time_short) {
|
||||
+ time_cycles = pc->time_cycles;
|
||||
+ time_mask = pc->time_mask;
|
||||
+ }
|
||||
}
|
||||
- enabled+=delta;
|
||||
|
||||
/* actually do the measurement */
|
||||
|
||||
@@ -116,8 +304,9 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
/* numbers which break if an IOC_RESET is done */
|
||||
width = pc->pmc_width;
|
||||
count = pc->offset;
|
||||
- count<<=(64-width);
|
||||
- count>>=(64-width);
|
||||
+ if (user_reset_flag == 1) {
|
||||
+ count = 0;
|
||||
+ }
|
||||
|
||||
/* Ugh, libpfm4 perf_event.h has cap_usr_rdpmc */
|
||||
/* while actual perf_event.h has cap_user_rdpmc */
|
||||
@@ -130,14 +319,14 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
pmc = rdpmc(index-1);
|
||||
|
||||
/* sign extend result */
|
||||
+ if (user_reset_flag == 1) {
|
||||
+ pmc-=reset;
|
||||
+ }
|
||||
pmc<<=(64-width);
|
||||
pmc>>=(64-width);
|
||||
|
||||
/* add current count into the existing kernel count */
|
||||
count+=pmc;
|
||||
-
|
||||
- /* Only adjust if index is valid */
|
||||
- running+=delta;
|
||||
} else {
|
||||
/* Falling back because rdpmc not supported */
|
||||
/* for this event. */
|
||||
@@ -148,14 +337,66 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
|
||||
} while (pc->lock != seq);
|
||||
|
||||
+ if (enabled != running) {
|
||||
+
|
||||
+ /* Adjust for cap_usr_time_short, a nop if not */
|
||||
+ cyc = adjust_cap_usr_time_short(cyc, time_cycles, time_mask);
|
||||
+
|
||||
+ delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
|
||||
+
|
||||
+ enabled+=delta;
|
||||
+ if (index)
|
||||
+ /* Only adjust if index is valid */
|
||||
+ running+=delta;
|
||||
+ }
|
||||
+
|
||||
if (en) *en=enabled;
|
||||
if (ru) *ru=running;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
+static inline unsigned long long mmap_read_reset_count(void *addr) {
|
||||
+
|
||||
+ struct perf_event_mmap_page *pc = addr;
|
||||
+ uint32_t seq, index;
|
||||
+ uint64_t count = 0;
|
||||
+
|
||||
+ if (pc == NULL) {
|
||||
+ return count;
|
||||
+ }
|
||||
+
|
||||
+ do {
|
||||
+ /* The barrier ensures we get the most up to date */
|
||||
+ /* version of the pc->lock variable */
|
||||
+
|
||||
+ seq=pc->lock;
|
||||
+ barrier();
|
||||
+
|
||||
+ /* actually do the measurement */
|
||||
+
|
||||
+ /* Ugh, libpfm4 perf_event.h has cap_usr_rdpmc */
|
||||
+ /* while actual perf_event.h has cap_user_rdpmc */
|
||||
+
|
||||
+ /* Index of register to read */
|
||||
+ /* 0 means stopped/not-active */
|
||||
+ /* Need to subtract 1 to get actual index to rdpmc() */
|
||||
+ index = pc->index;
|
||||
+
|
||||
+ if (pc->cap_usr_rdpmc && index) {
|
||||
+ /* Read counter value */
|
||||
+ count = rdpmc(index-1);
|
||||
+ }
|
||||
+ barrier();
|
||||
+
|
||||
+ } while (pc->lock != seq);
|
||||
+
|
||||
+ return count;
|
||||
+}
|
||||
+
|
||||
#else
|
||||
static inline unsigned long long mmap_read_self(void *addr,
|
||||
+ int user_reset_flag,
|
||||
unsigned long long *en,
|
||||
unsigned long long *ru) {
|
||||
|
||||
commit 693dd5c014d1f0b9a3eae63de051389ed8eb338b
|
||||
Author: Giuseppe Congiu <gcongiu@icl.utk.edu>
|
||||
Date: Tue Feb 21 07:46:14 2023 -0500
|
||||
|
||||
perf_event: bug fix in mmap_read_self
|
||||
|
||||
Commit 9a1f2d897 broke the perf_event component for power cpus. The
|
||||
mmap_read_self function is missing one argument. This patch restores the
|
||||
missing argument in the function.
|
||||
|
||||
diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h
|
||||
index 097286865..7ad3524f0 100644
|
||||
--- a/src/components/perf_event/perf_helpers.h
|
||||
+++ b/src/components/perf_event/perf_helpers.h
|
||||
@@ -397,6 +397,7 @@ static inline unsigned long long mmap_read_reset_count(void *addr) {
|
||||
#else
|
||||
static inline unsigned long long mmap_read_self(void *addr,
|
||||
int user_reset_flag,
|
||||
+ unsigned long long reset,
|
||||
unsigned long long *en,
|
||||
unsigned long long *ru) {
|
||||
|
||||
commit 1b3e75b7f11c7e2b7c590948216d6aaeec299010
|
||||
Author: Giuseppe Congiu <gcongiu@icl.utk.edu>
|
||||
Date: Tue Feb 21 14:21:03 2023 +0100
|
||||
|
||||
perf_event: add missing mmap_read_reset_count for non default cpus
|
||||
|
||||
Power cpus do not have a version of mmap_read_reset_count. Implement the
|
||||
missing function.
|
||||
|
||||
diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h
|
||||
index 7ad3524f0..73e82c8ae 100644
|
||||
--- a/src/components/perf_event/perf_helpers.h
|
||||
+++ b/src/components/perf_event/perf_helpers.h
|
||||
@@ -409,6 +409,11 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
return (unsigned long long)(-1);
|
||||
}
|
||||
|
||||
+static inline unsigned long long mmap_read_reset_count(void *addr __attribute__((unused))) {
|
||||
+
|
||||
+ return (unsigned long long)(-1);
|
||||
+}
|
||||
+
|
||||
#endif
|
||||
|
||||
/* These functions are based on builtin-record.c in the */
|
||||
commit 37d0c77b7b4d00a958dff50dc715cf63e0cd6084
|
||||
Author: Giuseppe Congiu <gcongiu@icl.utk.edu>
|
||||
Date: Tue Feb 21 14:22:53 2023 +0100
|
||||
|
||||
perf_event: used unused attribute in mmap_read_self
|
||||
|
||||
diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h
|
||||
index 73e82c8ae..59c8a2fc8 100644
|
||||
--- a/src/components/perf_event/perf_helpers.h
|
||||
+++ b/src/components/perf_event/perf_helpers.h
|
||||
@@ -395,16 +395,11 @@ static inline unsigned long long mmap_read_reset_count(void *addr) {
|
||||
}
|
||||
|
||||
#else
|
||||
-static inline unsigned long long mmap_read_self(void *addr,
|
||||
- int user_reset_flag,
|
||||
- unsigned long long reset,
|
||||
- unsigned long long *en,
|
||||
- unsigned long long *ru) {
|
||||
-
|
||||
- (void)addr;
|
||||
-
|
||||
- *en=0;
|
||||
- *ru=0;
|
||||
+static inline unsigned long long mmap_read_self(void *addr __attribute__((unused)),
|
||||
+ int user_reset_flag __attribute__((unused)),
|
||||
+ unsigned long long reset __attribute__((unused)),
|
||||
+ unsigned long long *en __attribute__((unused)),
|
||||
+ unsigned long long *ru __attribute__((unused))) {
|
||||
|
||||
return (unsigned long long)(-1);
|
||||
}
|
@ -1,156 +0,0 @@
|
||||
commit 77ee6b54f4080ca27b7efcb4c91679d0f1e090b5
|
||||
Author: Anthony Castaldo <TonyCastaldo@icl.utk.edu>
|
||||
Date: Fri Jan 24 10:25:36 2020 -0500
|
||||
|
||||
New libpfm4 contains "aliased" pmus for backward compatibility,
|
||||
amd64_fam17h == amd64_fam17h_zen1; this causes us to put BOTH pmus
|
||||
into the PMUs supported string and double the events in native_avail.
|
||||
This update recognizes when aliases exist (the names must be hard-coded)
|
||||
and uses only one of the most recent name.
|
||||
|
||||
diff --git a/src/components/perf_event/pe_libpfm4_events.c b/src/components/perf_event/pe_libpfm4_events.c
|
||||
index 3b5f8d13f..3262608cd 100644
|
||||
--- a/src/components/perf_event/pe_libpfm4_events.c
|
||||
+++ b/src/components/perf_event/pe_libpfm4_events.c
|
||||
@@ -31,6 +31,9 @@
|
||||
// used to step through the attributes when enumerating events
|
||||
static int attr_idx;
|
||||
|
||||
+/* alias flags to handle amd_fam17h, amd_fam17h_zen1 both present PMUs*/
|
||||
+static int amd64_fam17h_zen1_present = 0;
|
||||
+
|
||||
/** @class find_existing_event
|
||||
* @brief looks up an event, returns it if it exists
|
||||
*
|
||||
@@ -482,7 +485,13 @@ static struct native_event_t *allocate_native_event(
|
||||
*
|
||||
* @returns returns a libpfm event number
|
||||
* @retval PAPI_ENOEVENT Could not find an event
|
||||
- *
|
||||
+ * Operational note: _pe_libpfm4_init() must be called first to set
|
||||
+ * flags for synonymous PMUs. At this writing only
|
||||
+ * amd64_fam17h_zen1_present is defined.
|
||||
+ * Operational note: We indirectly return the pmu_idx within the
|
||||
+ * event data; the calling code uses that to set
|
||||
+ * pmu_idx for subsequent calls. All we do is find
|
||||
+ * the next valid pmu, if any.
|
||||
*/
|
||||
|
||||
static int
|
||||
@@ -511,6 +520,12 @@ get_first_event_next_pmu(int pmu_idx, int pmu_type)
|
||||
break;
|
||||
}
|
||||
|
||||
+ if ((ret==PFM_SUCCESS) && amd64_fam17h_zen1_present && strcmp(pinfo.name, "amd64_fam17h") == 0) {
|
||||
+ /* Skip as if invalid; we want the PMU amd64_fam17h_zen1 instead. */
|
||||
+ pmu_idx++;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
if ((ret==PFM_SUCCESS) && pmu_is_present_and_right_type(&pinfo,pmu_type)) {
|
||||
|
||||
pidx=pinfo.first_event;
|
||||
@@ -1159,6 +1174,35 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
event_table->default_pmu.size = sizeof(pfm_pmu_info_t);
|
||||
retval=pfm_get_pmu_info(0, &(event_table->default_pmu));
|
||||
|
||||
+ SUBDBG("Prescan for aliases.")
|
||||
+ /* We have to see if we have aliases in there as separate PMUs, */
|
||||
+ /* we don't want both PMUs with all the events duplicated. */
|
||||
+ /* For aliases, either is valid alone, but if both are present */
|
||||
+ /* specify a preference in the code. */
|
||||
+ /* Alias: amd64_fam17h_zen1 over amd64_fam17h. */
|
||||
+ /* Alias flags are static ints global to this file. */
|
||||
+ i=0;
|
||||
+ while(1) {
|
||||
+ memset(&pinfo,0,sizeof(pfm_pmu_info_t));
|
||||
+ pinfo.size = sizeof(pfm_pmu_info_t);
|
||||
+ retval=pfm_get_pmu_info(i, &pinfo);
|
||||
+
|
||||
+ /* We're done if we hit an invalid PMU entry */
|
||||
+ /* We can't check against PFM_PMU_MAX as that might not */
|
||||
+ /* match if libpfm4 is dynamically linked */
|
||||
+
|
||||
+ if (retval==PFM_ERR_INVAL) {
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if ( (retval==PFM_SUCCESS) && (pinfo.name != NULL) &&
|
||||
+ (pmu_is_present_and_right_type(&pinfo,pmu_type)) &&
|
||||
+ (strcmp(pinfo.name,"amd64_fam17h_zen1") == 0) ) {
|
||||
+ amd64_fam17h_zen1_present = 1;
|
||||
+ }
|
||||
+ i++;
|
||||
+ }
|
||||
+
|
||||
SUBDBG("Detected pmus:\n");
|
||||
i=0;
|
||||
while(1) {
|
||||
@@ -1177,6 +1221,12 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
if ((retval==PFM_SUCCESS) && (pinfo.name != NULL) &&
|
||||
(pmu_is_present_and_right_type(&pinfo,pmu_type))) {
|
||||
|
||||
+ /* skip if it is amd64_fam17h and zen1 is also present. */
|
||||
+ if (strcmp(pinfo.name,"amd64_fam17h") == 0 && amd64_fam17h_zen1_present) {
|
||||
+ i++;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
SUBDBG("\t%d %s %s %d\n",i,
|
||||
pinfo.name,pinfo.desc,pinfo.type);
|
||||
|
||||
@@ -1193,11 +1243,9 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
/* Hack to have "default core" PMU */
|
||||
if ( (pinfo.type==PFM_PMU_TYPE_CORE) &&
|
||||
strcmp(pinfo.name,"ix86arch")) {
|
||||
-
|
||||
- SUBDBG("\t %s is default\n",pinfo.name);
|
||||
- memcpy(&(event_table->default_pmu),
|
||||
- &pinfo,sizeof(pfm_pmu_info_t));
|
||||
- found_default++;
|
||||
+ memcpy(&(event_table->default_pmu),
|
||||
+ &pinfo,sizeof(pfm_pmu_info_t));
|
||||
+ found_default++;
|
||||
}
|
||||
}
|
||||
|
||||
commit 79fe2a025afb8acb317032030c8847c9cbfd0162
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Jan 5 13:45:34 2021 +0900
|
||||
|
||||
Get model_string for ARM processor from pfm_get_pmu_info() function
|
||||
|
||||
On ARM processors, the model_string does not appear in /proc/cpuinfo.
|
||||
Instead of looking at the /proc/cpuinfo information, you can look at the lscpu command information at the following URL:.
|
||||
https://github.com/google/cpu_features/issues/26
|
||||
http://suihkulokki.blogspot.com/2018/02/making-sense-of-proccpuinfo-on-arm.html
|
||||
|
||||
The libpfm4 library identifies the ARM processor type from the "CPU implement" and "CPU part" in the /proc/cpuinfo information.
|
||||
The papi library can use the pfm_get_pmu_info() function from the libpfm4 library to obtain a string identifying the ARM processor type.
|
||||
|
||||
diff --git a/src/components/perf_event/pe_libpfm4_events.c b/src/components/perf_event/pe_libpfm4_events.c
|
||||
index a84819cc0..744851ff0 100644
|
||||
--- a/src/components/perf_event/pe_libpfm4_events.c
|
||||
+++ b/src/components/perf_event/pe_libpfm4_events.c
|
||||
@@ -1149,6 +1149,7 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
|
||||
pfm_err_t retval = PFM_SUCCESS;
|
||||
pfm_pmu_info_t pinfo;
|
||||
+ unsigned int strSize;
|
||||
|
||||
/* allocate the native event structure */
|
||||
event_table->num_native_events=0;
|
||||
@@ -1247,6 +1248,13 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
&pinfo,sizeof(pfm_pmu_info_t));
|
||||
found_default++;
|
||||
}
|
||||
+ if ( (pinfo.type==PFM_PMU_TYPE_CORE) &&
|
||||
+ ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM)) {
|
||||
+ if (strlen(_papi_hwi_system_info.hw_info.model_string) == 0) {
|
||||
+ strSize = sizeof(_papi_hwi_system_info.hw_info.model_string);
|
||||
+ strncpy( _papi_hwi_system_info.hw_info.model_string, pinfo.desc, strSize - 1);
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
if (pmu_type==PMU_TYPE_UNCORE) {
|
@ -1,43 +0,0 @@
|
||||
commit 7a6ae407b62615d3ffa9b0d2ac17771b7fc63056
|
||||
Author: Vince Weaver <vince@deater.net>
|
||||
Date: Thu Sep 27 23:47:58 2018 -0400
|
||||
|
||||
perf_event: avoid floating point exception if running is 0
|
||||
|
||||
The perf_event interface isn't supposed to return 0 for running, but
|
||||
it happens occasionally. So be sure not to divide by zero if this
|
||||
happens. This makes the rdpmc code match the generic perf code in this
|
||||
case.
|
||||
|
||||
This is in response to bitbucket issue #52
|
||||
|
||||
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
|
||||
index 7fd753ed..82b7d398 100644
|
||||
--- a/src/components/perf_event/perf_event.c
|
||||
+++ b/src/components/perf_event/perf_event.c
|
||||
@@ -1099,14 +1099,23 @@ _pe_rdpmc_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
|
||||
count = mmap_read_self(pe_ctl->events[i].mmap_buf,
|
||||
&enabled,&running);
|
||||
|
||||
- /* TODO: error checking? */
|
||||
+ /* TODO: more error checking? */
|
||||
|
||||
/* Handle multiplexing case */
|
||||
- if (enabled!=running) {
|
||||
+ if (enabled == running) {
|
||||
+ /* no adjustment needed */
|
||||
+ }
|
||||
+ else if (enabled && running) {
|
||||
adjusted = (enabled * 128LL) / running;
|
||||
adjusted = adjusted * count;
|
||||
adjusted = adjusted / 128LL;
|
||||
count = adjusted;
|
||||
+ } else {
|
||||
+ /* This should not happen, but we have had it reported */
|
||||
+ SUBDBG("perf_event kernel bug(?) count, enabled, "
|
||||
+ "running: %lld, %lld, %lld\n",
|
||||
+ papi_pe_buffer[0],enabled,running);
|
||||
+
|
||||
}
|
||||
|
||||
pe_ctl->counts[i] = count;
|
@ -1,83 +0,0 @@
|
||||
commit 6de699f68e6e2bffb44a1b943aca52d7d838d4d4
|
||||
Author: Vince Weaver <vincent.weaver@maine.edu>
|
||||
Date: Thu Jan 10 14:54:42 2019 -0500
|
||||
|
||||
perf_event: internally indicate we need fallback when rdpmc not available
|
||||
|
||||
diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h
|
||||
index 20dfbacd1..7f0ff6c95 100644
|
||||
--- a/src/components/perf_event/perf_helpers.h
|
||||
+++ b/src/components/perf_event/perf_helpers.h
|
||||
@@ -138,6 +138,10 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
|
||||
/* Only adjust if index is valid */
|
||||
running+=delta;
|
||||
+ } else {
|
||||
+ /* Falling back because rdpmc not supported */
|
||||
+ /* for this event. */
|
||||
+ return 0xffffffffffffffffULL;
|
||||
}
|
||||
|
||||
barrier();
|
||||
commit a76b1580f035726b28b0e37afc75de760ceaf1e4
|
||||
Author: Vince Weaver <vincent.weaver@maine.edu>
|
||||
Date: Thu Jan 10 15:36:03 2019 -0500
|
||||
|
||||
perf_event: properly fall back to read() if rdpmc read attempt fails
|
||||
|
||||
The code wasn't properly handling this.
|
||||
|
||||
We now fall back to read() if *any* rdpmc call in an eventset fails.
|
||||
In theory it is possible to only fall back in a per-event fashion but
|
||||
that would make the code a lot more complex.
|
||||
|
||||
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
|
||||
index a3bd0b800..1f4bbcee7 100644
|
||||
--- a/src/components/perf_event/perf_event.c
|
||||
+++ b/src/components/perf_event/perf_event.c
|
||||
@@ -1094,6 +1094,7 @@ _pe_rdpmc_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
|
||||
int i;
|
||||
pe_control_t *pe_ctl = ( pe_control_t *) ctl;
|
||||
unsigned long long count, enabled, running, adjusted;
|
||||
+ int errors=0;
|
||||
|
||||
/* we must read each counter individually */
|
||||
for ( i = 0; i < pe_ctl->num_events; i++ ) {
|
||||
@@ -1101,7 +1102,9 @@ _pe_rdpmc_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
|
||||
count = mmap_read_self(pe_ctl->events[i].mmap_buf,
|
||||
&enabled,&running);
|
||||
|
||||
- /* TODO: more error checking? */
|
||||
+ if (count==0xffffffffffffffffULL) {
|
||||
+ errors++;
|
||||
+ }
|
||||
|
||||
/* Handle multiplexing case */
|
||||
if (enabled == running) {
|
||||
@@ -1127,6 +1130,8 @@ _pe_rdpmc_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
|
||||
|
||||
SUBDBG("EXIT: *events: %p\n", *events);
|
||||
|
||||
+ if (errors) return PAPI_ESYS;
|
||||
+
|
||||
return PAPI_OK;
|
||||
}
|
||||
|
||||
@@ -1253,10 +1258,16 @@ _pe_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
|
||||
int i, j, ret = -1;
|
||||
pe_control_t *pe_ctl = ( pe_control_t *) ctl;
|
||||
long long papi_pe_buffer[READ_BUFFER_SIZE];
|
||||
+ int result;
|
||||
|
||||
/* Handle fast case */
|
||||
+ /* FIXME: we fallback to slow reads if *any* event in eventset fails */
|
||||
+ /* in theory we could only fall back for the one event */
|
||||
+ /* but that makes the code more complicated. */
|
||||
if ((_perf_event_vector.cmp_info.fast_counter_read) && (!pe_ctl->inherit)) {
|
||||
- return _pe_rdpmc_read( ctx, ctl, events, flags);
|
||||
+ result=_pe_rdpmc_read( ctx, ctl, events, flags);
|
||||
+ /* if successful we are done, otherwise fall back to read */
|
||||
+ if (result==PAPI_OK) return PAPI_OK;
|
||||
}
|
||||
|
||||
/* Handle case where we are multiplexing */
|
@ -1,90 +0,0 @@
|
||||
commit c754f3bf1763358aaf70c0d64bc6cc2df29d8fec
|
||||
Author: Vince Weaver <vincent.weaver@maine.edu>
|
||||
Date: Thu Jan 10 20:42:56 2019 -0500
|
||||
|
||||
perf_event: fix granularity setting for attached processes
|
||||
|
||||
the old code was setting the granularity wrong when attaching to a CPU.
|
||||
|
||||
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
|
||||
index 1f4bbcee..2f2f380e 100644
|
||||
--- a/src/components/perf_event/perf_event.c
|
||||
+++ b/src/components/perf_event/perf_event.c
|
||||
@@ -684,11 +684,23 @@ open_pe_events( pe_context_t *ctx, pe_control_t *ctl )
|
||||
int i, ret = PAPI_OK;
|
||||
long pid;
|
||||
|
||||
- if (ctl->granularity==PAPI_GRN_SYS) {
|
||||
- pid = -1;
|
||||
+
|
||||
+ /* Set the pid setting */
|
||||
+ /* If attached, this is the pid of process we are attached to. */
|
||||
+ /* If GRN_THRD then it is 0 meaning current process only */
|
||||
+ /* If GRN_SYS then it is -1 meaning all procs on this CPU */
|
||||
+ /* Note if GRN_SYS then CPU must be specified, not -1 */
|
||||
+
|
||||
+ if (ctl->attached) {
|
||||
+ pid = ctl->tid;
|
||||
}
|
||||
else {
|
||||
- pid = ctl->tid;
|
||||
+ if (ctl->granularity==PAPI_GRN_SYS) {
|
||||
+ pid = -1;
|
||||
+ }
|
||||
+ else {
|
||||
+ pid = 0;
|
||||
+ }
|
||||
}
|
||||
|
||||
for( i = 0; i < ctl->num_events; i++ ) {
|
||||
@@ -1650,6 +1662,7 @@ _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
|
||||
return ret;
|
||||
}
|
||||
|
||||
+ pe_ctl->attached = 1;
|
||||
pe_ctl->tid = option->attach.tid;
|
||||
|
||||
/* If events have been already been added, something may */
|
||||
@@ -1662,7 +1675,9 @@ _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
|
||||
case PAPI_DETACH:
|
||||
pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state );
|
||||
|
||||
+ pe_ctl->attached = 0;
|
||||
pe_ctl->tid = 0;
|
||||
+
|
||||
return PAPI_OK;
|
||||
|
||||
case PAPI_CPU_ATTACH:
|
||||
@@ -1676,11 +1691,6 @@ _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
|
||||
}
|
||||
/* looks like we are allowed so set cpu number */
|
||||
|
||||
- /* this tells the kernel not to count for a thread */
|
||||
- /* should we warn if we try to set both? perf_event */
|
||||
- /* will reject it. */
|
||||
- pe_ctl->tid = -1;
|
||||
-
|
||||
pe_ctl->cpu = option->cpu.cpu_num;
|
||||
|
||||
return PAPI_OK;
|
||||
@@ -1696,7 +1706,7 @@ _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
|
||||
return ret;
|
||||
}
|
||||
/* looks like we are allowed, so set event set level counting domains */
|
||||
- pe_ctl->domain = option->domain.domain;
|
||||
+ pe_ctl->domain = option->domain.domain;
|
||||
return PAPI_OK;
|
||||
|
||||
case PAPI_GRANUL:
|
||||
diff --git a/src/components/perf_event/perf_event_lib.h b/src/components/perf_event/perf_event_lib.h
|
||||
index f4ad0c5d..0c50ab9f 100644
|
||||
--- a/src/components/perf_event/perf_event_lib.h
|
||||
+++ b/src/components/perf_event/perf_event_lib.h
|
||||
@@ -30,6 +30,7 @@ typedef struct {
|
||||
unsigned int overflow; /* overflow enable */
|
||||
unsigned int inherit; /* inherit enable */
|
||||
unsigned int overflow_signal; /* overflow signal */
|
||||
+ unsigned int attached; /* attached to a process */
|
||||
int cidx; /* current component */
|
||||
int cpu; /* which cpu to measure */
|
||||
pid_t tid; /* thread we are monitoring */
|
@ -1,115 +0,0 @@
|
||||
commit bde3da26f1f2755689e16fc9f5ab404367d1fdc8
|
||||
Author: Vince Weaver <vincent.weaver@maine.edu>
|
||||
Date: Wed Jan 24 14:13:28 2018 -0500
|
||||
|
||||
build: fix various LDFLAGS/CFLAGS issues
|
||||
|
||||
issues were reported by Andreas Beckmann <anbe@debian.org>
|
||||
|
||||
diff --git a/src/components/Makefile_comp_tests.target.in b/src/components/Makefile_comp_tests.target.in
|
||||
index 9a369adb..a4412bea 100644
|
||||
--- a/src/components/Makefile_comp_tests.target.in
|
||||
+++ b/src/components/Makefile_comp_tests.target.in
|
||||
@@ -9,7 +9,7 @@ INCLUDE = -I. -I@includedir@ -I$(datadir) -I$(testlibdir) -I$(validationlibdir)
|
||||
LIBDIR = @libdir@
|
||||
PAPILIB = $(datadir)/@LIBRARY@
|
||||
TESTLIB = $(testlibdir)/libtestlib.a
|
||||
-LDFLAGS = @LDL@
|
||||
+LDFLAGS = @LDFLAGS@ @LDL@
|
||||
CC = @CC@
|
||||
F77 = @F77@
|
||||
CC_R = @CC_R@
|
||||
diff --git a/src/components/perf_event_uncore/tests/Makefile b/src/components/perf_event_uncore/tests/Makefile
|
||||
index 3ee8fc2a..d70debe6 100644
|
||||
--- a/src/components/perf_event_uncore/tests/Makefile
|
||||
+++ b/src/components/perf_event_uncore/tests/Makefile
|
||||
@@ -17,19 +17,19 @@ perf_event_uncore_lib.o: perf_event_uncore_lib.c perf_event_uncore_lib.h
|
||||
|
||||
|
||||
perf_event_amd_northbridge: perf_event_amd_northbridge.o $(DOLOOPS) $(UTILOBJS) $(PAPILIB) $(DOLOOPS)
|
||||
- $(CC) $(LFLAGS) -o perf_event_amd_northbridge perf_event_amd_northbridge.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS)
|
||||
+ $(CC) $(CFLAGS) -o perf_event_amd_northbridge perf_event_amd_northbridge.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS)
|
||||
|
||||
perf_event_uncore: perf_event_uncore.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) perf_event_uncore_lib.o
|
||||
- $(CC) $(LFLAGS) -o perf_event_uncore perf_event_uncore.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS)
|
||||
+ $(CC) $(CFLAGS) -o perf_event_uncore perf_event_uncore.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS)
|
||||
|
||||
perf_event_uncore_attach: perf_event_uncore_attach.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) perf_event_uncore_lib.o
|
||||
- $(CC) $(LFLAGS) -o perf_event_uncore_attach perf_event_uncore_attach.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS)
|
||||
+ $(CC) $(CFLAGS) -o perf_event_uncore_attach perf_event_uncore_attach.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS)
|
||||
|
||||
perf_event_uncore_multiple: perf_event_uncore_multiple.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB)
|
||||
- $(CC) $(LFLAGS) $(INCLUDE) -o perf_event_uncore_multiple perf_event_uncore_multiple.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS)
|
||||
+ $(CC) $(CFLAGS) $(INCLUDE) -o perf_event_uncore_multiple perf_event_uncore_multiple.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS)
|
||||
|
||||
perf_event_uncore_cbox: perf_event_uncore_cbox.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB)
|
||||
- $(CC) $(LFLAGS) $(INCLUDE) -o perf_event_uncore_cbox perf_event_uncore_cbox.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS)
|
||||
+ $(CC) $(CFLAGS) $(INCLUDE) -o perf_event_uncore_cbox perf_event_uncore_cbox.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS)
|
||||
|
||||
|
||||
|
||||
diff --git a/src/ctests/Makefile.recipies b/src/ctests/Makefile.recipies
|
||||
index 63c107c0..201f3c85 100644
|
||||
--- a/src/ctests/Makefile.recipies
|
||||
+++ b/src/ctests/Makefile.recipies
|
||||
@@ -350,7 +350,7 @@ code2name: code2name.c $(TESTLIB) $(PAPILIB)
|
||||
$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) code2name.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o code2name
|
||||
|
||||
attach_target: attach_target.c $(DOLOOPS)
|
||||
- -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) attach_target.c -o attach_target $(DOLOOPS) $(TESTLIB)
|
||||
+ -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) attach_target.c -o attach_target $(DOLOOPS) $(TESTLIB) $(LDFLAGS)
|
||||
|
||||
zero_attach: zero_attach.c $(TESTLIB) $(DOLOOPS) $(PAPILIB)
|
||||
-$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_attach.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_attach
|
||||
diff --git a/src/ctests/Makefile.target.in b/src/ctests/Makefile.target.in
|
||||
index bb51c350..fcc3373b 100644
|
||||
--- a/src/ctests/Makefile.target.in
|
||||
+++ b/src/ctests/Makefile.target.in
|
||||
@@ -12,7 +12,7 @@ LIBRARY=@LIBRARY@
|
||||
SHLIB=@SHLIB@
|
||||
PAPILIB = ../@LINKLIB@
|
||||
TESTLIB = $(testlibdir)/libtestlib.a
|
||||
-LDFLAGS = @LDL@ @STATIC@
|
||||
+LDFLAGS = @LDFLAGS@ @LDL@ @STATIC@
|
||||
CC = @CC@
|
||||
MPICC = @MPICC@
|
||||
F77 = @F77@
|
||||
diff --git a/src/ftests/Makefile.target.in b/src/ftests/Makefile.target.in
|
||||
index 718586e5..8006dd8d 100644
|
||||
--- a/src/ftests/Makefile.target.in
|
||||
+++ b/src/ftests/Makefile.target.in
|
||||
@@ -11,7 +11,7 @@ LIBRARY = @LIBRARY@
|
||||
SHLIB=@SHLIB@
|
||||
PAPILIB = ../@LINKLIB@
|
||||
TESTLIB = $(testlibdir)/libtestlib.a
|
||||
-LDFLAGS = @LDL@
|
||||
+LDFLAGS = @LDFLAGS@ @LDL@
|
||||
CC = @CC@
|
||||
F77 = @F77@
|
||||
CC_R = @CC_R@
|
||||
diff --git a/src/utils/Makefile.target.in b/src/utils/Makefile.target.in
|
||||
index a5eab438..58d438a1 100644
|
||||
--- a/src/utils/Makefile.target.in
|
||||
+++ b/src/utils/Makefile.target.in
|
||||
@@ -11,7 +11,7 @@ LIBRARY=@LIBRARY@
|
||||
SHLIB=@SHLIB@
|
||||
PAPILIB = ../@LINKLIB@
|
||||
TESTLIB = $(testlibdir)/libtestlib.a
|
||||
-LDFLAGS = @LDL@ @STATIC@
|
||||
+LDFLAGS = @LDFLAGS@ @LDL@ @STATIC@
|
||||
CC = @CC@
|
||||
MPICC = @MPICC@
|
||||
F77 = @F77@
|
||||
diff --git a/src/validation_tests/Makefile.target.in b/src/validation_tests/Makefile.target.in
|
||||
index a5eab438..58d438a1 100644
|
||||
--- a/src/validation_tests/Makefile.target.in
|
||||
+++ b/src/validation_tests/Makefile.target.in
|
||||
@@ -11,7 +11,7 @@ LIBRARY=@LIBRARY@
|
||||
SHLIB=@SHLIB@
|
||||
PAPILIB = ../@LINKLIB@
|
||||
TESTLIB = $(testlibdir)/libtestlib.a
|
||||
-LDFLAGS = @LDL@ @STATIC@
|
||||
+LDFLAGS = @LDFLAGS@ @LDL@ @STATIC@
|
||||
CC = @CC@
|
||||
MPICC = @MPICC@
|
||||
F77 = @F77@
|
@ -1,144 +0,0 @@
|
||||
commit 3a6c9a855195e6f6f44ad6dffe2cd4046426ab53
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Wed Nov 25 21:46:26 2020 +0900
|
||||
|
||||
fix for performance improvement of _mx_init_component() function
|
||||
|
||||
diff --git a/src/components/mx/linux-mx.c b/src/components/mx/linux-mx.c
|
||||
index 2da406d14..34e6f02c2 100644
|
||||
--- a/src/components/mx/linux-mx.c
|
||||
+++ b/src/components/mx/linux-mx.c
|
||||
@@ -224,26 +224,35 @@ _mx_init_component( int cidx )
|
||||
{
|
||||
|
||||
FILE *fff;
|
||||
- char test_string[BUFSIZ];
|
||||
+ char *path;
|
||||
+ int len, pathlen;
|
||||
|
||||
/* detect if MX available */
|
||||
|
||||
- strncpy(mx_counters_exe,"mx_counters 2> /dev/null",BUFSIZ);
|
||||
- fff=popen(mx_counters_exe,"r");
|
||||
- /* popen only returns NULL if "sh" fails, not the actual command */
|
||||
- if (fgets(test_string,BUFSIZ,fff)==NULL) {
|
||||
- pclose(fff);
|
||||
- strncpy(mx_counters_exe,"./components/mx/utils/fake_mx_counters 2> /dev/null",BUFSIZ);
|
||||
- fff=popen(mx_counters_exe,"r");
|
||||
- if (fgets(test_string,BUFSIZ,fff)==NULL) {
|
||||
- pclose(fff);
|
||||
- /* neither real nor fake found */
|
||||
- strncpy(_mx_vector.cmp_info.disabled_reason,
|
||||
- "No MX utilities found",PAPI_MAX_STR_LEN);
|
||||
- return PAPI_ECMP;
|
||||
+ path = getenv("PATH");
|
||||
+ pathlen = strlen(path);
|
||||
+ while(pathlen > 0) {
|
||||
+ len = strcspn(path, ":");
|
||||
+ strncpy(mx_counters_exe, path, len);
|
||||
+ mx_counters_exe[len] = '\0';
|
||||
+ strcat(mx_counters_exe, "/mx_counters");
|
||||
+ fff = fopen(mx_counters_exe, "r");
|
||||
+ if (fff != NULL) {
|
||||
+ strcat(mx_counters_exe, " 2> /dev/null");
|
||||
+ break;
|
||||
}
|
||||
+ pathlen = pathlen - len - 1;
|
||||
+ if (pathlen > 0) {
|
||||
+ path = path + len + 1;
|
||||
+ }
|
||||
+ }
|
||||
+ if (fff == NULL) {
|
||||
+ /* neither real nor fake found */
|
||||
+ strncpy(_mx_vector.cmp_info.disabled_reason,
|
||||
+ "No MX utilities found",PAPI_MAX_STR_LEN);
|
||||
+ return PAPI_ECMP;
|
||||
}
|
||||
- pclose(fff);
|
||||
+ fclose(fff);
|
||||
|
||||
num_events=MX_MAX_COUNTERS;
|
||||
_mx_vector.cmp_info.num_native_events=num_events;
|
||||
commit 3a2560a86be44f4b15d96a45eda8e7f387b9166c
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Jan 26 16:30:40 2021 +0900
|
||||
|
||||
Add string length check before strncpy() and strcat() calls in _mx_init_component()
|
||||
|
||||
Myrinet Express-related component MX modules are initialized with the _mx_init_component() function,
|
||||
which is called from the PAPI_library_init() function.
|
||||
The popen(3) call runs a loadable module called "mx_counters",
|
||||
and if the loadable module does not exist,
|
||||
it attempts to run a loadable module called "./components/mx/utils/fake_mx_counters".
|
||||
In an environment where there are no "mx_counters" and "./components/mx/utils/fake_mx_counters" loadable modules,
|
||||
popen(3) will be called twice uselessly.
|
||||
popen(3) internally calls pipe(2) once, fork(2) twice and exec(2) once.
|
||||
|
||||
The size of the user space of the application calling the PAPI_library_init() function affects the performance of fork(2),
|
||||
which is called as an extension of popen(3).
|
||||
As a result, the performance of the PAPI_library_init() function is affected by the amount of user space in the application
|
||||
that called the PAPI_library_init() function.
|
||||
|
||||
In the _mx_init_component() function,
|
||||
the MX module only needs to be able to verify that a load module named "mx_counters" exists.
|
||||
We improved the _mx_init_component() function to call fopen(3) instead of popen(3).
|
||||
We add string length check before strncpy() and strcat() calls in _mx_init_component() function.
|
||||
|
||||
diff --git a/src/components/mx/linux-mx.c b/src/components/mx/linux-mx.c
|
||||
index 34e6f02c2..c2920d65b 100644
|
||||
--- a/src/components/mx/linux-mx.c
|
||||
+++ b/src/components/mx/linux-mx.c
|
||||
@@ -225,7 +225,7 @@ _mx_init_component( int cidx )
|
||||
|
||||
FILE *fff;
|
||||
char *path;
|
||||
- int len, pathlen;
|
||||
+ int checklen, len, pathlen;
|
||||
|
||||
/* detect if MX available */
|
||||
|
||||
@@ -233,13 +233,31 @@ _mx_init_component( int cidx )
|
||||
pathlen = strlen(path);
|
||||
while(pathlen > 0) {
|
||||
len = strcspn(path, ":");
|
||||
- strncpy(mx_counters_exe, path, len);
|
||||
+ if (len < BUFSIZ) {
|
||||
+ strncpy(mx_counters_exe, path, len);
|
||||
+ } else {
|
||||
+ fff = NULL;
|
||||
+ break;
|
||||
+ }
|
||||
mx_counters_exe[len] = '\0';
|
||||
- strcat(mx_counters_exe, "/mx_counters");
|
||||
+ checklen = len + strlen("/mx_counters");
|
||||
+ if (checklen < BUFSIZ) {
|
||||
+ strcat(mx_counters_exe, "/mx_counters");
|
||||
+ } else {
|
||||
+ fff = NULL;
|
||||
+ break;
|
||||
+ }
|
||||
fff = fopen(mx_counters_exe, "r");
|
||||
if (fff != NULL) {
|
||||
- strcat(mx_counters_exe, " 2> /dev/null");
|
||||
- break;
|
||||
+ checklen = checklen + strlen(" 2> /dev/null");
|
||||
+ if (checklen < BUFSIZ) {
|
||||
+ strcat(mx_counters_exe, " 2> /dev/null");
|
||||
+ break;
|
||||
+ } else {
|
||||
+ fclose(fff);
|
||||
+ fff = NULL;
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
pathlen = pathlen - len - 1;
|
||||
if (pathlen > 0) {
|
||||
@@ -247,7 +265,7 @@ _mx_init_component( int cidx )
|
||||
}
|
||||
}
|
||||
if (fff == NULL) {
|
||||
- /* neither real nor fake found */
|
||||
+ /* mx_counters not found */
|
||||
strncpy(_mx_vector.cmp_info.disabled_reason,
|
||||
"No MX utilities found",PAPI_MAX_STR_LEN);
|
||||
return PAPI_ECMP;
|
@ -1,607 +0,0 @@
|
||||
commit 660bfd20bc89a26629e99de958d38b031db4250d
|
||||
Author: William Cohen <wcohen@redhat.com>
|
||||
Date: Thu Oct 31 15:30:00 2019 -0400
|
||||
|
||||
This code is a modification of krentel_pthreads.c, to better test
|
||||
some race conditions. It is not included in the standard tests;
|
||||
it is a diagnostic that should be run with "valgrind --tool=helgrind".
|
||||
|
||||
Signed-off-by: Anthony Castaldo <TonyCastaldo@icl.utk.edu>
|
||||
|
||||
diff --git a/src/ctests/krentel_pthreads_race.c b/src/ctests/krentel_pthreads_race.c
|
||||
new file mode 100644
|
||||
index 000000000..0ebfb5056
|
||||
--- /dev/null
|
||||
+++ b/src/ctests/krentel_pthreads_race.c
|
||||
@@ -0,0 +1,236 @@
|
||||
+/*
|
||||
+ * Test PAPI with multiple threads.
|
||||
+ * This code is a modification of krentel_pthreads.c by William Cohen
|
||||
+ * <wcohen@redhat.com>, on Sep 10 2019, to exercise and test for the race
|
||||
+ * condition in papi_internal.c involving the formerly static variables
|
||||
+ * papi_event_code and papi_event_code_changed. This code should be run with
|
||||
+ * "valgrind --tool=helgrind" to show any data races. If run with:
|
||||
+ * "valgrind --tool=helgrind --log-file=helgrind_out.txt"
|
||||
+ * The output will be captured in helgrind_out.txt and can then be processed
|
||||
+ * with the program filter_helgrind.c; see commentary at the top of that file.
|
||||
+ */
|
||||
+
|
||||
+#define MAX_THREADS 256
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <pthread.h>
|
||||
+#include <sys/time.h>
|
||||
+
|
||||
+#include "papi.h"
|
||||
+#include "papi_test.h"
|
||||
+
|
||||
+#define EVENT PAPI_TOT_CYC
|
||||
+
|
||||
+static int program_time = 5;
|
||||
+static int threshold = 20000000;
|
||||
+static int num_threads = 3;
|
||||
+
|
||||
+static long count[MAX_THREADS];
|
||||
+static long iter[MAX_THREADS];
|
||||
+static struct timeval last[MAX_THREADS];
|
||||
+
|
||||
+static pthread_key_t key;
|
||||
+
|
||||
+static struct timeval start;
|
||||
+
|
||||
+static void
|
||||
+my_handler( int EventSet, void *pc, long long ovec, void *context )
|
||||
+{
|
||||
+ ( void ) EventSet;
|
||||
+ ( void ) pc;
|
||||
+ ( void ) ovec;
|
||||
+ ( void ) context;
|
||||
+
|
||||
+ long num = ( long ) pthread_getspecific( key );
|
||||
+
|
||||
+ if ( num < 0 || num > num_threads )
|
||||
+ test_fail( __FILE__, __LINE__, "getspecific failed", 1 );
|
||||
+ count[num]++;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+print_rate( long num )
|
||||
+{
|
||||
+ struct timeval now;
|
||||
+ long st_secs;
|
||||
+ double last_secs;
|
||||
+
|
||||
+ gettimeofday( &now, NULL );
|
||||
+ st_secs = now.tv_sec - start.tv_sec;
|
||||
+ last_secs = ( double ) ( now.tv_sec - last[num].tv_sec )
|
||||
+ + ( ( double ) ( now.tv_usec - last[num].tv_usec ) ) / 1000000.0;
|
||||
+ if ( last_secs <= 0.001 )
|
||||
+ last_secs = 0.001;
|
||||
+
|
||||
+ if (!TESTS_QUIET) {
|
||||
+ printf( "[%ld] time = %ld, count = %ld, iter = %ld, "
|
||||
+ "rate = %.1f/Kiter\n",
|
||||
+ num, st_secs, count[num], iter[num],
|
||||
+ ( 1000.0 * ( double ) count[num] ) / ( double ) iter[num] );
|
||||
+ }
|
||||
+
|
||||
+ count[num] = 0;
|
||||
+ iter[num] = 0;
|
||||
+ last[num] = now;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+do_cycles( long num, int len )
|
||||
+{
|
||||
+ struct timeval start, now;
|
||||
+ double x, sum;
|
||||
+
|
||||
+ gettimeofday( &start, NULL );
|
||||
+
|
||||
+ for ( ;; ) {
|
||||
+ sum = 1.0;
|
||||
+ for ( x = 1.0; x < 250000.0; x += 1.0 )
|
||||
+ sum += x;
|
||||
+ if ( sum < 0.0 )
|
||||
+ printf( "==>> SUM IS NEGATIVE !! <<==\n" );
|
||||
+
|
||||
+ iter[num]++;
|
||||
+
|
||||
+ gettimeofday( &now, NULL );
|
||||
+ if ( now.tv_sec >= start.tv_sec + len )
|
||||
+ break;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void *
|
||||
+my_thread( void *v )
|
||||
+{
|
||||
+ long num = ( long ) v;
|
||||
+ int n;
|
||||
+ int EventSet = PAPI_NULL;
|
||||
+ int event_code;
|
||||
+ long long value;
|
||||
+
|
||||
+ int retval;
|
||||
+
|
||||
+ retval = PAPI_register_thread( );
|
||||
+ if ( retval != PAPI_OK ) {
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval );
|
||||
+ }
|
||||
+ pthread_setspecific( key, v );
|
||||
+
|
||||
+ count[num] = 0;
|
||||
+ iter[num] = 0;
|
||||
+ last[num] = start;
|
||||
+
|
||||
+ retval = PAPI_create_eventset( &EventSet );
|
||||
+ if ( retval != PAPI_OK ) {
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_create_eventset failed", retval );
|
||||
+ }
|
||||
+
|
||||
+ retval = PAPI_event_name_to_code("PAPI_TOT_CYC", &event_code);
|
||||
+ if (retval != PAPI_OK ) {
|
||||
+ if (!TESTS_QUIET) printf("Trouble creating event name\n");
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code failed", retval );
|
||||
+ }
|
||||
+
|
||||
+ retval = PAPI_add_event( EventSet, EVENT );
|
||||
+ if (retval != PAPI_OK ) {
|
||||
+ if (!TESTS_QUIET) printf("Trouble adding event\n");
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_add_event failed", retval );
|
||||
+ }
|
||||
+
|
||||
+ if ( PAPI_overflow( EventSet, EVENT, threshold, 0, my_handler ) != PAPI_OK )
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_overflow failed", 1 );
|
||||
+
|
||||
+ if ( PAPI_start( EventSet ) != PAPI_OK )
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_start failed", 1 );
|
||||
+
|
||||
+ if (!TESTS_QUIET) printf( "launched timer in thread %ld\n", num );
|
||||
+
|
||||
+ for ( n = 1; n <= program_time; n++ ) {
|
||||
+ do_cycles( num, 1 );
|
||||
+ print_rate( num );
|
||||
+ }
|
||||
+
|
||||
+ PAPI_stop( EventSet, &value );
|
||||
+
|
||||
+ retval = PAPI_overflow( EventSet, EVENT, 0, 0, my_handler);
|
||||
+ if ( retval != PAPI_OK )
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_overflow failed to reset the overflow handler", retval );
|
||||
+
|
||||
+ if ( PAPI_remove_event( EventSet, EVENT ) != PAPI_OK )
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_remove_event", 1 );
|
||||
+
|
||||
+ if ( PAPI_destroy_eventset( &EventSet ) != PAPI_OK )
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", 1 );
|
||||
+
|
||||
+ if ( PAPI_unregister_thread( ) != PAPI_OK )
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", 1 );
|
||||
+
|
||||
+ return ( NULL );
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main( int argc, char **argv )
|
||||
+{
|
||||
+ pthread_t *td = NULL;
|
||||
+ long n;
|
||||
+ int quiet,retval;
|
||||
+
|
||||
+ /* Set TESTS_QUIET variable */
|
||||
+ quiet=tests_quiet( argc, argv );
|
||||
+
|
||||
+ if ( argc < 2 || sscanf( argv[1], "%d", &program_time ) < 1 )
|
||||
+ program_time = 6;
|
||||
+ if ( argc < 3 || sscanf( argv[2], "%d", &threshold ) < 1 )
|
||||
+ threshold = 20000000;
|
||||
+ if ( argc < 4 || sscanf( argv[3], "%d", &num_threads ) < 1 )
|
||||
+ num_threads = 32;
|
||||
+
|
||||
+ td = malloc((num_threads+1) * sizeof(pthread_t));
|
||||
+ if (!td) {
|
||||
+ test_fail( __FILE__, __LINE__, "td malloc failed", 1 );
|
||||
+ }
|
||||
+
|
||||
+ if (!quiet) {
|
||||
+ printf( "program_time = %d, threshold = %d, num_threads = %d\n\n",
|
||||
+ program_time, threshold, num_threads );
|
||||
+ }
|
||||
+
|
||||
+ if ( PAPI_library_init( PAPI_VER_CURRENT ) != PAPI_VER_CURRENT )
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_library_init failed", 1 );
|
||||
+
|
||||
+ /* Test to be sure we can add events */
|
||||
+ retval = PAPI_query_event( EVENT );
|
||||
+ if (retval!=PAPI_OK) {
|
||||
+ if (!quiet) printf("Trouble finding event\n");
|
||||
+ test_skip(__FILE__,__LINE__,"Event not available",1);
|
||||
+ }
|
||||
+
|
||||
+ if ( PAPI_thread_init( ( unsigned long ( * )( void ) ) ( pthread_self ) ) !=
|
||||
+ PAPI_OK )
|
||||
+ test_fail( __FILE__, __LINE__, "PAPI_thread_init failed", 1 );
|
||||
+
|
||||
+ if ( pthread_key_create( &key, NULL ) != 0 )
|
||||
+ test_fail( __FILE__, __LINE__, "pthread key create failed", 1 );
|
||||
+
|
||||
+ gettimeofday( &start, NULL );
|
||||
+
|
||||
+ for ( n = 1; n <= num_threads; n++ ) {
|
||||
+ if ( pthread_create( &(td[n]), NULL, my_thread, ( void * ) n ) != 0 )
|
||||
+ test_fail( __FILE__, __LINE__, "pthread create failed", 1 );
|
||||
+ }
|
||||
+
|
||||
+ my_thread( ( void * ) 0 );
|
||||
+
|
||||
+ /* wait for all the threads */
|
||||
+ for ( n = 1; n <= num_threads; n++ ) {
|
||||
+ if ( pthread_join( td[n], NULL))
|
||||
+ test_fail( __FILE__, __LINE__, "pthread join failed", 1 );
|
||||
+ }
|
||||
+
|
||||
+ free(td);
|
||||
+
|
||||
+ if (!quiet) printf( "done\n" );
|
||||
+
|
||||
+ test_pass( __FILE__ );
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
commit 979e80136fd5e0ee2fb26f7374b36a8433147a68
|
||||
Author: Anthony Castaldo <TonyCastaldo@icl.utk.edu>
|
||||
Date: Thu Oct 31 15:56:55 2019 -0400
|
||||
|
||||
The changes to papi.c, papi_internal.c, threads.h and threads.c
|
||||
correct a race condition that was the result of all threads using
|
||||
the same two static variables (papi_event_code and papi_event_code_changed)
|
||||
to temporarily record a state of operation. The solution was to
|
||||
make these variables unique per thread, using the ThreadInfo_t
|
||||
structure already provided in PAPI for such purposes. The file
|
||||
krentel_pthread_race.c is a stress test to produce race conditions.
|
||||
filter_helgrind.c reduces the volume of --tool-helgrind output to
|
||||
a more manageable summary. Both are added to Makefile.recipies.
|
||||
|
||||
diff --git a/src/ctests/Makefile.recipies b/src/ctests/Makefile.recipies
|
||||
index 87340831d..b7c1963d7 100644
|
||||
--- a/src/ctests/Makefile.recipies
|
||||
+++ b/src/ctests/Makefile.recipies
|
||||
@@ -161,6 +161,12 @@ locks_pthreads: locks_pthreads.c $(TESTLIB) $(PAPILIB)
|
||||
krentel_pthreads: krentel_pthreads.c $(TESTLIB) $(PAPILIB)
|
||||
$(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) krentel_pthreads.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o krentel_pthreads -lpthread
|
||||
|
||||
+# krentel_pthreads_race is not included with the standard tests;
|
||||
+# it is a modification of krentel_pthreads intended to be run with
|
||||
+# "valgrind --tool=helgrind" to test for race conditions.
|
||||
+krentel_pthreads_race: krentel_pthreads_race.c $(TESTLIB) $(PAPILIB)
|
||||
+ $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) krentel_pthreads_race.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o krentel_pthreads_race -lpthread
|
||||
+
|
||||
overflow_pthreads: overflow_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB)
|
||||
$(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_pthreads -lpthread
|
||||
|
||||
@@ -434,6 +440,9 @@ forkexec4: forkexec4.c $(TESTLIB) $(PAPILIB)
|
||||
prof_utils.o: prof_utils.c $(testlibdir)/papi_test.h prof_utils.h
|
||||
$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -c prof_utils.c
|
||||
|
||||
+filter_helgrind: filter_helgrind.c $(TESTLIB) $(PAPILIB)
|
||||
+ -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) filter_helgrind.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o filter_helgrind
|
||||
+
|
||||
.PHONY : all default ctests ctest clean
|
||||
|
||||
clean:
|
||||
diff --git a/src/ctests/filter_helgrind.c b/src/ctests/filter_helgrind.c
|
||||
new file mode 100644
|
||||
index 000000000..d918a789e
|
||||
--- /dev/null
|
||||
+++ b/src/ctests/filter_helgrind.c
|
||||
@@ -0,0 +1,170 @@
|
||||
+/*
|
||||
+ * This code is a simple filter for the helgrind_out.txt file
|
||||
+ * produced by:
|
||||
+ * "valgrind --tool=helgrind --log-file=helgrind_out.txt someProgram"
|
||||
+ *
|
||||
+ * This is useful because the tool does not recognize PAPI locks,
|
||||
+ * thus reports as possible race conditions reads/writes by
|
||||
+ * different threads that are actually fine (surrounded by locks).
|
||||
+ *
|
||||
+ * This was written particularly for krentel_pthreads_race.c
|
||||
+ * when processed by the above valgrind. We produce a line per
|
||||
+ * condition, in the form:
|
||||
+ * OP@file:line OP@file:line
|
||||
+ * where OP is R or W. The first file:line code occurred
|
||||
+ * after the second file:line code, and on a different thread.
|
||||
+ *
|
||||
+ * We print the results to stdout. It is useful to filter this
|
||||
+ * through the standard utility 'uniq', each occurrence only
|
||||
+ * needs to be investigated once. Just insure there are
|
||||
+ * MATCHING locks around each operation within the code.
|
||||
+ *
|
||||
+ * An example run (using uniq): The options -uc will print
|
||||
+ * only unique lines, preceeded by a count of how many times
|
||||
+ * it occurs.
|
||||
+ *
|
||||
+ * ./filter_helgrind | uniq -uc
|
||||
+ *
|
||||
+ * An example output line (piped through uniq as above):
|
||||
+ * 1 R@threads.c:190 W@threads.c:206
|
||||
+ * An investigation shows threads.c:190 is protected by
|
||||
+ * _papi_hwi_lock(THREADS_LOCK); and threads.c:206 is
|
||||
+ * protected by the same lock. Thus no data race can
|
||||
+ * occur for this instance.
|
||||
+ *
|
||||
+ * Compilation within the papi/src/ctests directory:
|
||||
+ * make filter_helgrind
|
||||
+ *
|
||||
+ */
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <string.h>
|
||||
+
|
||||
+int main(int argc, char** args) {
|
||||
+ (void) argc;
|
||||
+ (void) args;
|
||||
+
|
||||
+ char myLine[16384];
|
||||
+ int state, size;
|
||||
+ char type1, type2;
|
||||
+ char fname1[256], fname2[256];
|
||||
+ char *paren1, *paren2;
|
||||
+
|
||||
+ FILE *HELOUT = fopen("helgrind_out.txt", "r"); // Read the file.
|
||||
+ if (HELOUT == NULL) {
|
||||
+ fprintf(stderr, "Could not open helgrind_out.txt.\n");
|
||||
+ exit(-1);
|
||||
+ }
|
||||
+
|
||||
+ char PDRR[]="Possible data race during read";
|
||||
+ char PDRW[]="Possible data race during write";
|
||||
+ char TCWW[]="This conflicts with a previous write";
|
||||
+ char TCWR[]="This conflicts with a previous read";
|
||||
+ char atSTR[]=" at ";
|
||||
+
|
||||
+ // State machine:
|
||||
+ // State 0: We are looking for a line with PDRR or PDRW.
|
||||
+ // We don't exit until we find it, or run out of lines.
|
||||
+ // if we find it, we remember which and go to state 1.
|
||||
+ // State 1: Looking for " at " in column 11.
|
||||
+ // When found, we extract the string betweeen '(' and ')'
|
||||
+ // which is program name:line. go to state 2.
|
||||
+ // State 2: We are searching for TCWW, TCWR, PDRW, PDRR.
|
||||
+ // If we find the first two:
|
||||
+ // Remember which, and go to state 3.
|
||||
+ // If we find either of the second two, go back to State 1.
|
||||
+ // State 3: Looking for " at " in column 11.
|
||||
+ // When found, extract the string betweeen '(' and ')',
|
||||
+ // which is program name:line.
|
||||
+ // OUTPUT LINE for an investigation.
|
||||
+ // Go to State 0.
|
||||
+
|
||||
+ state = 0; // looking for PDRR, PDRW.
|
||||
+ while (fgets(myLine, 16384, HELOUT) != NULL) {
|
||||
+ if (strlen(myLine) < 20) continue;
|
||||
+ switch (state) {
|
||||
+ case 0: // Looking for PDRR or PRDW.
|
||||
+ if (strstr(myLine, PDRR) != NULL) {
|
||||
+ type1='R';
|
||||
+ state=1;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (strstr(myLine, PDRW) != NULL) {
|
||||
+ type1='W';
|
||||
+ state=1;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ continue;
|
||||
+ break;
|
||||
+
|
||||
+ case 1: // Looking for atSTR in column 11.
|
||||
+ if (strncmp(myLine+10, atSTR, 6) != 0) continue;
|
||||
+ paren1=strchr(myLine, '(');
|
||||
+ paren2=strchr(myLine, ')');
|
||||
+ if (paren1 == NULL || paren2 == NULL ||
|
||||
+ paren1 > paren2) {
|
||||
+ state=0; // Abort, found something I don't understand.
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ size = paren2-paren1-1; // compute length of name.
|
||||
+ strncpy(fname1, paren1+1, size); // Copy the name.
|
||||
+ fname1[size]=0; // install z-terminator.
|
||||
+ state=2;
|
||||
+ continue;
|
||||
+ break;
|
||||
+
|
||||
+ case 2: // Looking for TCWW, TCWR, PDRR, PDRW.
|
||||
+ if (strstr(myLine, TCWR) != NULL) {
|
||||
+ type2='R';
|
||||
+ state=3;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (strstr(myLine, TCWW) != NULL) {
|
||||
+ type2='W';
|
||||
+ state=3;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (strstr(myLine, PDRR) != NULL) {
|
||||
+ type1='R';
|
||||
+ state=1;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (strstr(myLine, PDRW) != NULL) {
|
||||
+ type1='W';
|
||||
+ state=1;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ continue;
|
||||
+ break;
|
||||
+
|
||||
+ case 3: // Looking for atSTR in column 11.
|
||||
+ if (strncmp(myLine+10, atSTR, 6) != 0) continue;
|
||||
+ paren1=strchr(myLine, '(');
|
||||
+ paren2=strchr(myLine, ')');
|
||||
+ if (paren1 == NULL || paren2 == NULL ||
|
||||
+ paren1 > paren2) {
|
||||
+ state=0; // Abort, found something I don't understand.
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ size = paren2-paren1-1; // compute length of name.
|
||||
+ strncpy(fname2, paren1+1, size); // Copy the name.
|
||||
+ fname2[size]=0; // install z-terminator.
|
||||
+ fprintf(stdout, "%c@%-32s %c@%-32s\n", type1, fname1, type2, fname2);
|
||||
+ state=0;
|
||||
+ continue;
|
||||
+ break;
|
||||
+ } // end switch.
|
||||
+ } // end while.
|
||||
+
|
||||
+ fclose(HELOUT);
|
||||
+ exit(0);
|
||||
+}
|
||||
diff --git a/src/papi.c b/src/papi.c
|
||||
index 4e08dc840..070e3f8c6 100644
|
||||
--- a/src/papi.c
|
||||
+++ b/src/papi.c
|
||||
@@ -608,32 +608,26 @@ PAPI_library_init( int version )
|
||||
papi_return( init_retval );
|
||||
}
|
||||
|
||||
- /* Initialize component globals */
|
||||
+ /* Initialize thread globals, including the main threads */
|
||||
|
||||
- tmp = _papi_hwi_init_global( );
|
||||
+ tmp = _papi_hwi_init_global_threads( );
|
||||
if ( tmp ) {
|
||||
init_retval = tmp;
|
||||
_papi_hwi_shutdown_global_internal( );
|
||||
- _in_papi_library_init_cnt--;
|
||||
+ _in_papi_library_init_cnt--;
|
||||
papi_return( init_retval );
|
||||
}
|
||||
-
|
||||
- /* Initialize thread globals, including the main threads */
|
||||
|
||||
- tmp = _papi_hwi_init_global_threads( );
|
||||
+ /* Initialize component globals */
|
||||
+
|
||||
+ tmp = _papi_hwi_init_global( );
|
||||
if ( tmp ) {
|
||||
- int i;
|
||||
init_retval = tmp;
|
||||
_papi_hwi_shutdown_global_internal( );
|
||||
- for ( i = 0; i < papi_num_components; i++ ) {
|
||||
- if (!_papi_hwd[i]->cmp_info.disabled) {
|
||||
- _papi_hwd[i]->shutdown_component( );
|
||||
- }
|
||||
- }
|
||||
_in_papi_library_init_cnt--;
|
||||
papi_return( init_retval );
|
||||
}
|
||||
-
|
||||
+
|
||||
init_level = PAPI_LOW_LEVEL_INITED;
|
||||
_in_papi_library_init_cnt--;
|
||||
|
||||
diff --git a/src/papi_internal.c b/src/papi_internal.c
|
||||
index 2412eca63..f0e457bf7 100644
|
||||
--- a/src/papi_internal.c
|
||||
+++ b/src/papi_internal.c
|
||||
@@ -111,31 +111,28 @@ _papi_hwi_free_papi_event_string() {
|
||||
}
|
||||
return;
|
||||
}
|
||||
-// A place to keep the current papi event code so some component functions can fetch its value
|
||||
-// The current event code can be stored here prior to component calls and cleared after the component returns
|
||||
-static unsigned int papi_event_code = -1;
|
||||
-static int papi_event_code_changed = -1;
|
||||
+
|
||||
void
|
||||
_papi_hwi_set_papi_event_code (unsigned int event_code, int update_flag) {
|
||||
INTDBG("new event_code: %#x, update_flag: %d, previous event_code: %#x\n", event_code, update_flag, papi_event_code);
|
||||
|
||||
// if call is just to reset and start over, set both flags to show nothing saved yet
|
||||
if (update_flag < 0) {
|
||||
- papi_event_code_changed = -1;
|
||||
- papi_event_code = -1;
|
||||
+ _papi_hwi_my_thread->tls_papi_event_code_changed = -1;
|
||||
+ _papi_hwi_my_thread->tls_papi_event_code = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
// if 0, it is being set prior to calling a component, if >0 it is being changed by the component
|
||||
- papi_event_code_changed = update_flag;
|
||||
+ _papi_hwi_my_thread->tls_papi_event_code_changed = update_flag;
|
||||
// save the event code passed in
|
||||
- papi_event_code = event_code;
|
||||
+ _papi_hwi_my_thread->tls_papi_event_code = event_code;
|
||||
return;
|
||||
}
|
||||
unsigned int
|
||||
_papi_hwi_get_papi_event_code () {
|
||||
INTDBG("papi_event_code: %#x\n", papi_event_code);
|
||||
- return papi_event_code;
|
||||
+ return _papi_hwi_my_thread->tls_papi_event_code;
|
||||
}
|
||||
/* Get the index into the ESI->NativeInfoArray for the current PAPI event code */
|
||||
int
|
||||
@@ -560,7 +557,7 @@ _papi_hwi_native_to_eventcode(int cidx, int event_code, int ntv_idx, const char
|
||||
|
||||
int result;
|
||||
|
||||
- if (papi_event_code_changed > 0) {
|
||||
+ if (_papi_hwi_my_thread->tls_papi_event_code_changed > 0) {
|
||||
result = _papi_hwi_get_papi_event_code();
|
||||
INTDBG("EXIT: papi_event_code: %#x set by the component\n", result);
|
||||
return result;
|
||||
diff --git a/src/threads.c b/src/threads.c
|
||||
index 4dd0cf4e3..9f586c415 100644
|
||||
--- a/src/threads.c
|
||||
+++ b/src/threads.c
|
||||
@@ -286,6 +286,10 @@ _papi_hwi_initialize_thread( ThreadInfo_t ** dest, int tid )
|
||||
return PAPI_ENOMEM;
|
||||
}
|
||||
|
||||
+ /* init event memory variables, used by papi_internal.c */
|
||||
+ thread->tls_papi_event_code = -1;
|
||||
+ thread->tls_papi_event_code_changed = -1;
|
||||
+
|
||||
/* Call the component to fill in anything special. */
|
||||
|
||||
for ( i = 0; i < papi_num_components; i++ ) {
|
||||
@@ -421,6 +425,11 @@ _papi_hwi_shutdown_thread( ThreadInfo_t * thread, int force_shutdown )
|
||||
unsigned long tid;
|
||||
int i, failure = 0;
|
||||
|
||||
+ /* Clear event memory variables */
|
||||
+ thread->tls_papi_event_code = -1;
|
||||
+ thread->tls_papi_event_code_changed = -1;
|
||||
+
|
||||
+ /* Get thread id */
|
||||
if ( _papi_hwi_thread_id_fn )
|
||||
tid = ( *_papi_hwi_thread_id_fn ) ( );
|
||||
else
|
||||
diff --git a/src/threads.h b/src/threads.h
|
||||
index cd3369068..264d9f3a6 100644
|
||||
--- a/src/threads.h
|
||||
+++ b/src/threads.h
|
||||
@@ -30,6 +30,11 @@ typedef struct _ThreadInfo
|
||||
EventSetInfo_t **running_eventset;
|
||||
EventSetInfo_t *from_esi; /* ESI used for last update this control state */
|
||||
int wants_signal;
|
||||
+
|
||||
+ // The current event code can be stored here prior to
|
||||
+ // component calls and cleared after the component returns.
|
||||
+ unsigned int tls_papi_event_code;
|
||||
+ int tls_papi_event_code_changed;
|
||||
} ThreadInfo_t;
|
||||
|
||||
/** The list of threads, gets initialized to master process with TID of getpid()
|
@ -1,22 +0,0 @@
|
||||
diff -up papi-5.6.0/src/papi_events.csv.p9 papi-5.6.0/src/papi_events.csv
|
||||
--- papi-5.6.0/src/papi_events.csv.p9 2021-05-25 16:19:17.342588151 -0400
|
||||
+++ papi-5.6.0/src/papi_events.csv 2021-05-25 16:54:06.357460525 -0400
|
||||
@@ -1580,15 +1580,15 @@ PRESET,PAPI_L1_DCR,DERIVED_SUB,PM_LD_REF
|
||||
#PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1_ALT
|
||||
PRESET,PAPI_L1_DCA,DERIVED_ADD,PM_LD_REF_L1,PM_ST_CMPL
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS
|
||||
-PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS
|
||||
-PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS
|
||||
+#PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS
|
||||
+#PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS
|
||||
PRESET,PAPI_L3_DCR,NOT_DERIVED,PM_DATA_FROM_L2MISS
|
||||
PRESET,PAPI_L3_DCM,DERIVED_ADD,PM_DATA_FROM_LMEM,PM_DATA_FROM_RMEM
|
||||
PRESET,PAPI_L3_LDM,DERIVED_ADD,PM_DATA_FROM_LMEM,PM_DATA_FROM_RMEM
|
||||
PRESET,PAPI_L1_ICH,NOT_DERIVED,PM_INST_FROM_L1
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,PM_L1_ICACHE_MISS
|
||||
PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_INST_FROM_L2MISS
|
||||
-PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_L2_INST_MISS
|
||||
+#PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_L2_INST_MISS
|
||||
PRESET,PAPI_L2_ICH,NOT_DERIVED,PM_INST_FROM_L2
|
||||
PRESET,PAPI_L3_ICA,NOT_DERIVED,PM_INST_FROM_L2MISS
|
||||
PRESET,PAPI_L3_ICH,NOT_DERIVED,PM_INST_FROM_L3
|
@ -1,42 +0,0 @@
|
||||
commit b78d7665bc02a0ce17adc6c09ab052064a940937
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Wed Dec 8 19:39:44 2021 +0900
|
||||
|
||||
Improve the papi_xml_event_info command.
|
||||
|
||||
Modify the papi_xml_event_info command as follows:.
|
||||
- Test only the event name even if the event has a unit mask.
|
||||
- Test other unit masks in the event even if
|
||||
there is an error in one unit mask in the event.
|
||||
|
||||
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
|
||||
diff --git a/src/utils/papi_xml_event_info.c b/src/utils/papi_xml_event_info.c
|
||||
index 2a777a9fe..c024cc036 100644
|
||||
--- a/src/utils/papi_xml_event_info.c
|
||||
+++ b/src/utils/papi_xml_event_info.c
|
||||
@@ -226,9 +226,6 @@ enum_native_events( FILE * f, int cidx)
|
||||
k = i;
|
||||
if ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cidx ) == PAPI_OK ) {
|
||||
|
||||
- /* Test if event can be added */
|
||||
- if ( test_event( k ) == PAPI_OK ) {
|
||||
-
|
||||
/* add the event */
|
||||
xmlize_event( f, &info, num );
|
||||
|
||||
@@ -237,13 +234,12 @@ enum_native_events( FILE * f, int cidx)
|
||||
retval = PAPI_get_event_info( k, &info );
|
||||
if ( retval == PAPI_OK ) {
|
||||
if ( test_event( k )!=PAPI_OK ) {
|
||||
- break;
|
||||
+ continue;
|
||||
}
|
||||
xmlize_event( f, &info, -1 );
|
||||
}
|
||||
} while ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cidx ) == PAPI_OK);
|
||||
fprintf( f, " </event>\n" );
|
||||
- }
|
||||
} else {
|
||||
/* this event has no unit masks; test & write the event */
|
||||
if ( test_event( i ) == PAPI_OK ) {
|
@ -1,140 +0,0 @@
|
||||
commit 2098e8656156084104ab8d1981b53c50d22b8f62
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Fri Mar 4 13:34:20 2022 +0900
|
||||
|
||||
PAPI_get_hardware_info: improve PAPI_hw_info_t for ARM processors
|
||||
|
||||
Currently, it is not possible to determine which company the ARM processor
|
||||
was designed by from the PAPI_hw_info_t available in PAPI_get_hardware_info().
|
||||
On ARM processors, the PAPI_hw_info_t obtained with PAPI_get_hardware_info()
|
||||
does not contain information indicating which company was designed.
|
||||
For ARM processors, improve the vendor and vendor_string entries
|
||||
in PAPI_hw_info_t, which can be retrieved with PAPI_get_hardware_info(),
|
||||
to include information indicating which company was designed.
|
||||
|
||||
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
|
||||
diff --git a/src/components/perf_event/pe_libpfm4_events.c b/src/components/perf_event/pe_libpfm4_events.c
|
||||
index 744851ff0..6dcb5e023 100644
|
||||
--- a/src/components/perf_event/pe_libpfm4_events.c
|
||||
+++ b/src/components/perf_event/pe_libpfm4_events.c
|
||||
@@ -1248,8 +1248,10 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
&pinfo,sizeof(pfm_pmu_info_t));
|
||||
found_default++;
|
||||
}
|
||||
+
|
||||
+ /* For ARM processors, */
|
||||
if ( (pinfo.type==PFM_PMU_TYPE_CORE) &&
|
||||
- ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM)) {
|
||||
+ ( _papi_hwi_system_info.hw_info.vendor >= PAPI_VENDOR_ARM_ARM)) {
|
||||
if (strlen(_papi_hwi_system_info.hw_info.model_string) == 0) {
|
||||
strSize = sizeof(_papi_hwi_system_info.hw_info.model_string);
|
||||
strncpy( _papi_hwi_system_info.hw_info.model_string, pinfo.desc, strSize - 1);
|
||||
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
|
||||
index a50194cf6..6985fc4cb 100644
|
||||
--- a/src/components/perf_event/perf_event.c
|
||||
+++ b/src/components/perf_event/perf_event.c
|
||||
@@ -137,7 +137,8 @@ pe_vendor_fixups(papi_vector_t *vector)
|
||||
}
|
||||
|
||||
/* ARM */
|
||||
- if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM) {
|
||||
+ /* If implementer is ARM Limited. */
|
||||
+ if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM_ARM) {
|
||||
|
||||
/* Some ARMv7 and earlier could not measure */
|
||||
/* KERNEL and USER separately. */
|
||||
diff --git a/src/linux-common.c b/src/linux-common.c
|
||||
index 99601db86..2527981ad 100644
|
||||
--- a/src/linux-common.c
|
||||
+++ b/src/linux-common.c
|
||||
@@ -112,8 +112,20 @@ decode_vendor_string( char *s, int *vendor )
|
||||
*vendor = PAPI_VENDOR_IBM;
|
||||
else if ( strcasecmp( s, "Cray" ) == 0 )
|
||||
*vendor = PAPI_VENDOR_CRAY;
|
||||
- else if ( strcasecmp( s, "ARM" ) == 0 )
|
||||
- *vendor = PAPI_VENDOR_ARM;
|
||||
+ else if ( strcasecmp( s, "ARM_ARM" ) == 0 )
|
||||
+ *vendor = PAPI_VENDOR_ARM_ARM;
|
||||
+ else if ( strcasecmp( s, "ARM_BROADCOM" ) == 0 )
|
||||
+ *vendor = PAPI_VENDOR_ARM_BROADCOM;
|
||||
+ else if ( strcasecmp( s, "ARM_CAVIUM" ) == 0 )
|
||||
+ *vendor = PAPI_VENDOR_ARM_CAVIUM;
|
||||
+ else if ( strcasecmp( s, "ARM_FUJITSU" ) == 0 )
|
||||
+ *vendor = PAPI_VENDOR_ARM_FUJITSU;
|
||||
+ else if ( strcasecmp( s, "ARM_HISILICON") == 0 )
|
||||
+ *vendor = PAPI_VENDOR_ARM_HISILICON;
|
||||
+ else if ( strcasecmp( s, "ARM_APM" ) == 0 )
|
||||
+ *vendor = PAPI_VENDOR_ARM_APM;
|
||||
+ else if ( strcasecmp( s, "ARM_QUALCOMM" ) == 0 )
|
||||
+ *vendor = PAPI_VENDOR_ARM_QUALCOMM;
|
||||
else if ( strcasecmp( s, "MIPS" ) == 0 )
|
||||
*vendor = PAPI_VENDOR_MIPS;
|
||||
else if ( strcasecmp( s, "SiCortex" ) == 0 )
|
||||
@@ -409,9 +421,38 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz )
|
||||
}
|
||||
else {
|
||||
/* "CPU implementer" indicates ARM */
|
||||
+ /* For ARM processors, hwinfo->vendor >= PAPI_VENDOR_ARM_ARM(0x41). */
|
||||
+ /* If implementer is ARM Limited., hwinfo->vendor == PAPI_VENDOR_ARM_ARM. */
|
||||
+ /* If implementer is Cavium Inc., hwinfo->vendor == PAPI_VENDOR_ARM_CAVIUM(0x43). */
|
||||
s = search_cpu_info( f, "CPU implementer");
|
||||
if ( s ) {
|
||||
- strcpy( hwinfo->vendor_string, "ARM" );
|
||||
+ int tmp;
|
||||
+ sscanf( s, "%x", &tmp );
|
||||
+ switch( tmp ) {
|
||||
+ case PAPI_VENDOR_ARM_ARM:
|
||||
+ strcpy( hwinfo->vendor_string, "ARM_ARM" );
|
||||
+ break;
|
||||
+ case PAPI_VENDOR_ARM_BROADCOM:
|
||||
+ strcpy( hwinfo->vendor_string, "ARM_BROADCOM" );
|
||||
+ break;
|
||||
+ case PAPI_VENDOR_ARM_CAVIUM:
|
||||
+ strcpy( hwinfo->vendor_string, "ARM_CAVIUM" );
|
||||
+ break;
|
||||
+ case PAPI_VENDOR_ARM_FUJITSU:
|
||||
+ strcpy( hwinfo->vendor_string, "ARM_FUJITSU" );
|
||||
+ break;
|
||||
+ case PAPI_VENDOR_ARM_HISILICON:
|
||||
+ strcpy( hwinfo->vendor_string, "ARM_HISILICON" );
|
||||
+ break;
|
||||
+ case PAPI_VENDOR_ARM_APM:
|
||||
+ strcpy( hwinfo->vendor_string, "ARM_APM" );
|
||||
+ break;
|
||||
+ case PAPI_VENDOR_ARM_QUALCOMM:
|
||||
+ strcpy( hwinfo->vendor_string, "ARM_QUALCOMM" );
|
||||
+ break;
|
||||
+ default:
|
||||
+ strcpy( hwinfo->vendor_string, "ARM_UNKNOWN" );
|
||||
+ }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -438,7 +479,7 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz )
|
||||
decode_cpuinfo_power(f,hwinfo);
|
||||
}
|
||||
|
||||
- if (hwinfo->vendor==PAPI_VENDOR_ARM) {
|
||||
+ if (hwinfo->vendor>=PAPI_VENDOR_ARM_ARM) {
|
||||
|
||||
decode_cpuinfo_arm(f,hwinfo);
|
||||
}
|
||||
diff --git a/src/papi.h b/src/papi.h
|
||||
index 14b05da1f..b05b368cb 100644
|
||||
--- a/src/papi.h
|
||||
+++ b/src/papi.h
|
||||
@@ -354,6 +354,13 @@ All of the functions in the PerfAPI should use the following set of constants.
|
||||
#define PAPI_VENDOR_FREESCALE 6
|
||||
#define PAPI_VENDOR_ARM 7
|
||||
#define PAPI_VENDOR_MIPS 8
|
||||
+#define PAPI_VENDOR_ARM_ARM 0x41
|
||||
+#define PAPI_VENDOR_ARM_BROADCOM 0x42
|
||||
+#define PAPI_VENDOR_ARM_CAVIUM 0x43
|
||||
+#define PAPI_VENDOR_ARM_FUJITSU 0x46
|
||||
+#define PAPI_VENDOR_ARM_HISILICON 0x48
|
||||
+#define PAPI_VENDOR_ARM_APM 0x50
|
||||
+#define PAPI_VENDOR_ARM_QUALCOMM 0x51
|
||||
/** @} */
|
||||
|
||||
/** @internal
|
@ -1,139 +0,0 @@
|
||||
commit 617eeabe0bbfb5357c10b22ebd72b24a4a872e52
|
||||
Author: Anthony <adanalis@icl.utk.edu>
|
||||
Date: Mon Jan 6 15:09:42 2020 -0500
|
||||
|
||||
Updated the variables that are used in the debug messages in accordance to a previous commit that made these variables thread safe.
|
||||
|
||||
diff --git a/src/papi_internal.c b/src/papi_internal.c
|
||||
index f0e457bf7..69b2914d0 100644
|
||||
--- a/src/papi_internal.c
|
||||
+++ b/src/papi_internal.c
|
||||
@@ -114,7 +114,7 @@ _papi_hwi_free_papi_event_string() {
|
||||
|
||||
void
|
||||
_papi_hwi_set_papi_event_code (unsigned int event_code, int update_flag) {
|
||||
- INTDBG("new event_code: %#x, update_flag: %d, previous event_code: %#x\n", event_code, update_flag, papi_event_code);
|
||||
+ INTDBG("new event_code: %#x, update_flag: %d, previous event_code: %#x\n", event_code, update_flag, _papi_hwi_my_thread->tls_papi_event_code);
|
||||
|
||||
// if call is just to reset and start over, set both flags to show nothing saved yet
|
||||
if (update_flag < 0) {
|
||||
@@ -131,7 +131,7 @@ _papi_hwi_set_papi_event_code (unsigned int event_code, int update_flag) {
|
||||
}
|
||||
unsigned int
|
||||
_papi_hwi_get_papi_event_code () {
|
||||
- INTDBG("papi_event_code: %#x\n", papi_event_code);
|
||||
+ INTDBG("papi_event_code: %#x\n", _papi_hwi_my_thread->tls_papi_event_code);
|
||||
return _papi_hwi_my_thread->tls_papi_event_code;
|
||||
}
|
||||
/* Get the index into the ESI->NativeInfoArray for the current PAPI event code */
|
||||
From 3cc3b6679e1ace7516c3037105ad16410ce7d3db Mon Sep 17 00:00:00 2001
|
||||
From: William Cohen <wcohen@redhat.com>
|
||||
Date: Wed, 12 Aug 2020 10:12:59 -0400
|
||||
Subject: [PATCH] Initialize component globals before threads globals
|
||||
|
||||
An earlier commit (979e80136) swapped the order of initializing
|
||||
globals and threads. This caused issues with the perf_event, appio,
|
||||
and stealtime components which could be observed with the
|
||||
all_native_events, appio_test_pthreads, and stealtime_basic tests
|
||||
respectively. The component initialization needs to be performed
|
||||
before the thread initialization.
|
||||
|
||||
The order of initialization has been changed back to initializing the
|
||||
component then the threads. One complication is that papi_internal.c
|
||||
had functions (_papi_hwi_set_papi_event_code and
|
||||
_papi_hwi_get_papi_event_code) that required thread local storage that
|
||||
was being setup in commit 979e80136 by the thread initialization.
|
||||
This was the original reason for swapping the order of initialization
|
||||
of component and thread. Using __thread on the file scope
|
||||
declarations of the variables allow the original order of
|
||||
initialization.
|
||||
---
|
||||
src/papi.c | 10 +++++-----
|
||||
src/papi_internal.c | 21 +++++++++++++--------
|
||||
2 files changed, 18 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/src/papi.c b/src/papi.c
|
||||
index 33cc29935..107a15044 100644
|
||||
--- a/src/papi.c
|
||||
+++ b/src/papi.c
|
||||
@@ -1151,19 +1151,19 @@ PAPI_library_init( int version )
|
||||
papi_return( init_retval );
|
||||
}
|
||||
|
||||
- /* Initialize thread globals, including the main threads */
|
||||
+ /* Initialize component globals */
|
||||
|
||||
- tmp = _papi_hwi_init_global_threads( );
|
||||
+ tmp = _papi_hwi_init_global( );
|
||||
if ( tmp ) {
|
||||
init_retval = tmp;
|
||||
_papi_hwi_shutdown_global_internal( );
|
||||
- _in_papi_library_init_cnt--;
|
||||
+ _in_papi_library_init_cnt--;
|
||||
papi_return( init_retval );
|
||||
}
|
||||
|
||||
- /* Initialize component globals */
|
||||
+ /* Initialize thread globals, including the main threads */
|
||||
|
||||
- tmp = _papi_hwi_init_global( );
|
||||
+ tmp = _papi_hwi_init_global_threads( );
|
||||
if ( tmp ) {
|
||||
init_retval = tmp;
|
||||
_papi_hwi_shutdown_global_internal( );
|
||||
diff --git a/src/papi_internal.c b/src/papi_internal.c
|
||||
index 5a1ccd433..bdf30f875 100644
|
||||
--- a/src/papi_internal.c
|
||||
+++ b/src/papi_internal.c
|
||||
@@ -115,27 +115,32 @@ _papi_hwi_free_papi_event_string() {
|
||||
return;
|
||||
}
|
||||
|
||||
+// A place to keep the current papi event code so some component functions can fetch its value
|
||||
+// The current event code can be stored here prior to component calls and cleared after the component returns
|
||||
+static THREAD_LOCAL_STORAGE_KEYWORD unsigned int papi_event_code = -1;
|
||||
+static THREAD_LOCAL_STORAGE_KEYWORD int papi_event_code_changed = -1;
|
||||
+
|
||||
void
|
||||
_papi_hwi_set_papi_event_code (unsigned int event_code, int update_flag) {
|
||||
- INTDBG("new event_code: %#x, update_flag: %d, previous event_code: %#x\n", event_code, update_flag, _papi_hwi_my_thread->tls_papi_event_code);
|
||||
+ INTDBG("new event_code: %#x, update_flag: %d, previous event_code: %#x\n", event_code, update_flag, papi_event_code);
|
||||
|
||||
// if call is just to reset and start over, set both flags to show nothing saved yet
|
||||
if (update_flag < 0) {
|
||||
- _papi_hwi_my_thread->tls_papi_event_code_changed = -1;
|
||||
- _papi_hwi_my_thread->tls_papi_event_code = -1;
|
||||
+ papi_event_code_changed = -1;
|
||||
+ papi_event_code = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
// if 0, it is being set prior to calling a component, if >0 it is being changed by the component
|
||||
- _papi_hwi_my_thread->tls_papi_event_code_changed = update_flag;
|
||||
+ papi_event_code_changed = update_flag;
|
||||
// save the event code passed in
|
||||
- _papi_hwi_my_thread->tls_papi_event_code = event_code;
|
||||
+ papi_event_code = event_code;
|
||||
return;
|
||||
}
|
||||
unsigned int
|
||||
_papi_hwi_get_papi_event_code () {
|
||||
- INTDBG("papi_event_code: %#x\n", _papi_hwi_my_thread->tls_papi_event_code);
|
||||
- return _papi_hwi_my_thread->tls_papi_event_code;
|
||||
+ INTDBG("papi_event_code: %#x\n", papi_event_code);
|
||||
+ return papi_event_code;
|
||||
}
|
||||
/* Get the index into the ESI->NativeInfoArray for the current PAPI event code */
|
||||
int
|
||||
@@ -560,7 +565,7 @@ _papi_hwi_native_to_eventcode(int cidx, int event_code, int ntv_idx, const char
|
||||
|
||||
int result;
|
||||
|
||||
- if (_papi_hwi_my_thread->tls_papi_event_code_changed > 0) {
|
||||
+ if (papi_event_code_changed > 0) {
|
||||
result = _papi_hwi_get_papi_event_code();
|
||||
INTDBG("EXIT: papi_event_code: %#x set by the component\n", result);
|
||||
return result;
|
||||
--
|
||||
2.26.2
|
||||
|
34
papi-a64fx.patch
Normal file
34
papi-a64fx.patch
Normal file
@ -0,0 +1,34 @@
|
||||
commit 9a44d82928ed17ba2ff21eb88b89c5829d0ea30e
|
||||
Author: Steve Kaufmann <steven.kaufmann@hpe.com>
|
||||
Date: Wed Jun 24 14:08:08 2020 -0400
|
||||
|
||||
Added PAPI preset support for Fujitsu A64FX.
|
||||
|
||||
Signed-off-by: Heike Jagode <jagode@icl.utk.edu>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 8e96adfbd..1b5c15542 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1877,6 +1877,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
|
||||
+#########################
|
||||
+# ARM Fujitsu A64FX #
|
||||
+#########################
|
||||
+CPU,arm_a64fx
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
348
papi-config.patch
Normal file
348
papi-config.patch
Normal file
@ -0,0 +1,348 @@
|
||||
commit 38290c41abbb105ca198411ec3c466ac027f5b8f
|
||||
Author: Frank Winkler <frankbook@Franks-MacBook-Air.local>
|
||||
Date: Fri Apr 24 16:18:22 2020 +0200
|
||||
|
||||
Fixed configure options for shared and static builds.
|
||||
|
||||
1) --with-static-lib=no (force PAPI to build shared libraries and tools)
|
||||
2) --with-shlib-tools (use internal libpfm via rpath-link)
|
||||
|
||||
diff --git a/src/configure.in b/src/configure.in
|
||||
index 3cf47edc1..1f58f7c8e 100644
|
||||
--- a/src/configure.in
|
||||
+++ b/src/configure.in
|
||||
@@ -200,9 +200,13 @@ else
|
||||
AC_MSG_ERROR([cannot find dlopen and dlerror symbols neither in the base system libraries nor in -ldl])
|
||||
fi
|
||||
fi
|
||||
+
|
||||
+# Disable LDL for static builds
|
||||
+# if test "x${STATIC}" = "x"; then
|
||||
+# LDL=""
|
||||
+# fi
|
||||
AC_SUBST(LDL)
|
||||
-
|
||||
-
|
||||
+
|
||||
if test "$OS" = "CLE"; then
|
||||
virtualtimer=times
|
||||
tls=__thread
|
||||
@@ -827,10 +831,6 @@ AC_ARG_WITH(static_tools,
|
||||
AC_MSG_RESULT(yes)],
|
||||
[AC_MSG_RESULT(no)])
|
||||
|
||||
-if test "$static_lib" = "no"; then
|
||||
- AC_MSG_ERROR(Building tests and utilities static but no static papi library to be built)
|
||||
-fi
|
||||
-
|
||||
AC_MSG_CHECKING(for linking with papi shared library of tests and utilities)
|
||||
AC_ARG_WITH(shlib_tools,
|
||||
[ --with-shlib-tools Specify linking with papi library of tests and utilities],
|
||||
@@ -839,6 +839,14 @@ AC_ARG_WITH(shlib_tools,
|
||||
[shlib_tools=no
|
||||
AC_MSG_RESULT(no)])
|
||||
|
||||
+if test "$static_lib" = "no"; then
|
||||
+ shlib_tools=yes
|
||||
+fi
|
||||
+
|
||||
+if test "$static_lib" = "no" -a "$shlib_tools" = "no"; then
|
||||
+ AC_MSG_ERROR(Building tests and utilities static but no static papi library to be built)
|
||||
+fi
|
||||
+
|
||||
if test "$shlib_tools" = "yes"; then
|
||||
if test "$shared_lib" != "yes"; then
|
||||
AC_MSG_ERROR(Building static but specified shared linking for tests and utilities)
|
||||
@@ -847,6 +855,8 @@ if test "$shlib_tools" = "yes"; then
|
||||
AC_MSG_ERROR([Building shared but specified static linking])
|
||||
fi
|
||||
LINKLIB='$(SHLIB)'
|
||||
+ #WORKAROUND: if libpfm cannot be found at link time
|
||||
+ LDFLAGS="$LDFLAGS -Wl,-rpath-link,$PWD/libpfm4/lib"
|
||||
elif test "$shlib_tools" = "no"; then
|
||||
if test "$static_lib" != "yes"; then
|
||||
AC_MSG_ERROR([Building shared but specified static linking for tests and utilities])
|
||||
commit d6f4e34d083f18cfdba38dd5e4bbfb2a580b8a9e
|
||||
Author: Frank Winkler <frankbook@Franks-MacBook-Air.local>
|
||||
Date: Fri Apr 24 16:38:18 2020 +0200
|
||||
|
||||
Another test for "--with-static-tools".
|
||||
|
||||
diff --git a/src/configure.in b/src/configure.in
|
||||
index 1f58f7c8e..e8d769578 100644
|
||||
--- a/src/configure.in
|
||||
+++ b/src/configure.in
|
||||
@@ -201,10 +201,6 @@ else
|
||||
fi
|
||||
fi
|
||||
|
||||
-# Disable LDL for static builds
|
||||
-# if test "x${STATIC}" = "x"; then
|
||||
-# LDL=""
|
||||
-# fi
|
||||
AC_SUBST(LDL)
|
||||
|
||||
if test "$OS" = "CLE"; then
|
||||
@@ -831,6 +827,11 @@ AC_ARG_WITH(static_tools,
|
||||
AC_MSG_RESULT(yes)],
|
||||
[AC_MSG_RESULT(no)])
|
||||
|
||||
+# Disable LDL for static builds
|
||||
+# if test "$STATIC" = "-static"; then
|
||||
+# LDL=""
|
||||
+# fi
|
||||
+
|
||||
AC_MSG_CHECKING(for linking with papi shared library of tests and utilities)
|
||||
AC_ARG_WITH(shlib_tools,
|
||||
[ --with-shlib-tools Specify linking with papi library of tests and utilities],
|
||||
commit 1c333c9954b872cda1b4d873fa81b14ec58a58a7
|
||||
Author: Frank Winkler <frankbook@Franks-MacBook-Air.local>
|
||||
Date: Thu Apr 30 18:51:34 2020 +0200
|
||||
|
||||
Fixed static build.
|
||||
- SDE component is disabled
|
||||
- "ctest" shlib is disabled
|
||||
|
||||
diff --git a/src/configure.in b/src/configure.in
|
||||
index e8d769578..0eee98ea1 100644
|
||||
--- a/src/configure.in
|
||||
+++ b/src/configure.in
|
||||
@@ -827,10 +827,11 @@ AC_ARG_WITH(static_tools,
|
||||
AC_MSG_RESULT(yes)],
|
||||
[AC_MSG_RESULT(no)])
|
||||
|
||||
-# Disable LDL for static builds
|
||||
-# if test "$STATIC" = "-static"; then
|
||||
-# LDL=""
|
||||
-# fi
|
||||
+# Disable LDL AND SDE for static builds
|
||||
+if test "$STATIC" = "-static"; then
|
||||
+ LDL=""
|
||||
+ SDE_ENABLED=
|
||||
+fi
|
||||
|
||||
AC_MSG_CHECKING(for linking with papi shared library of tests and utilities)
|
||||
AC_ARG_WITH(shlib_tools,
|
||||
@@ -1768,6 +1769,7 @@ for comp in $components; do
|
||||
if test "x$comp" = "xsde" ; then
|
||||
LDFLAGS="$LDFLAGS $LRT"
|
||||
LIBS="$LIBS $LRT"
|
||||
+ SDE_ENABLED=1
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -1862,6 +1864,7 @@ AC_SUBST(BGP_SYSDIR)
|
||||
AC_SUBST(BITFLAGS)
|
||||
AC_SUBST(COMPONENT_RULES)
|
||||
AC_SUBST(COMPONENTS)
|
||||
+AC_SUBST(SDE_ENABLED)
|
||||
AC_SUBST(FTEST_TARGETS)
|
||||
AC_SUBST(HAVE_NO_OVERRIDE_INIT)
|
||||
AC_SUBST(BGPM_INSTALL_DIR)
|
||||
diff --git a/src/ctests/Makefile.recipies b/src/ctests/Makefile.recipies
|
||||
index b7c1963d7..44e19b398 100644
|
||||
--- a/src/ctests/Makefile.recipies
|
||||
+++ b/src/ctests/Makefile.recipies
|
||||
@@ -11,7 +11,11 @@ MPX = max_multiplex multiplex1 multiplex2 mendes-alt sdsc-mpx sdsc2-mpx \
|
||||
MPXPTHR = multiplex1_pthreads multiplex3_pthreads kufrin
|
||||
MPI = mpi_hl mpi_omp_hl \
|
||||
mpifirst
|
||||
+
|
||||
+ifeq ($(STATIC),)
|
||||
SHARED = shlib
|
||||
+endif
|
||||
+
|
||||
SERIAL = serial_hl serial_hl_ll_comb\
|
||||
all_events all_native_events branches calibrate case1 case2 \
|
||||
cmpinfo code2name derived describe destroy disable_component \
|
||||
@@ -344,8 +348,10 @@ case2: case2.c $(TESTLIB) $(PAPILIB)
|
||||
low-level: low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB)
|
||||
$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o low-level
|
||||
|
||||
+ifeq ($(STATIC),)
|
||||
shlib: shlib.c $(TESTLIB) $(PAPILIB)
|
||||
$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) shlib.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o shlib $(LDL)
|
||||
+endif
|
||||
|
||||
exeinfo: exeinfo.c $(TESTLIB) $(PAPILIB)
|
||||
-$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) exeinfo.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o exeinfo
|
||||
diff --git a/src/ctests/Makefile.target.in b/src/ctests/Makefile.target.in
|
||||
index edc04f1b7..af64e157c 100644
|
||||
--- a/src/ctests/Makefile.target.in
|
||||
+++ b/src/ctests/Makefile.target.in
|
||||
@@ -10,6 +10,7 @@ INCLUDE = -I. -I@includedir@ -I$(testlibdir) -I$(validationlibdir)
|
||||
LIBDIR = @libdir@
|
||||
LIBRARY = @LIBRARY@
|
||||
SHLIB = @SHLIB@
|
||||
+STATIC = @STATIC@
|
||||
PAPILIB = ../@LINKLIB@
|
||||
TESTLIB = $(testlibdir)/libtestlib.a
|
||||
LDFLAGS = @LDFLAGS@ @LDL@ @STATIC@
|
||||
diff --git a/src/utils/Makefile b/src/utils/Makefile
|
||||
index 4abfd6cb8..64a2b8f9f 100644
|
||||
--- a/src/utils/Makefile
|
||||
+++ b/src/utils/Makefile
|
||||
@@ -48,8 +48,13 @@ papi_mem_info: papi_mem_info.o $(PAPILIB)
|
||||
papi_multiplex_cost: papi_multiplex_cost.o $(PAPILIB) cost_utils.o
|
||||
$(CC) -o papi_multiplex_cost papi_multiplex_cost.o cost_utils.o $(PAPILIB) -lm $(LDFLAGS)
|
||||
|
||||
+ifneq ($(SDE_ENABLED),)
|
||||
papi_native_avail: papi_native_avail.o $(PAPILIB) print_header.o papi_sde_interface.o
|
||||
$(CC) -o papi_native_avail papi_native_avail.o $(PAPILIB) print_header.o $(LDFLAGS) papi_sde_interface.o
|
||||
+else
|
||||
+papi_native_avail: papi_native_avail.o $(PAPILIB) print_header.o
|
||||
+ $(CC) -o papi_native_avail papi_native_avail.o $(PAPILIB) print_header.o $(LDFLAGS)
|
||||
+endif
|
||||
|
||||
papi_version: papi_version.o $(PAPILIB)
|
||||
$(CC) -o papi_version papi_version.o $(PAPILIB) $(LDFLAGS)
|
||||
@@ -65,8 +70,10 @@ cost_utils.o: ../testlib/papi_test.h cost_utils.c
|
||||
print_header.o: print_header.h print_header.c
|
||||
$(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c print_header.c
|
||||
|
||||
+ifneq ($(SDE_ENABLED),)
|
||||
papi_sde_interface.o: papi_sde_interface.c
|
||||
$(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c papi_sde_interface.c
|
||||
+endif
|
||||
|
||||
clean:
|
||||
rm -f *.o *.stderr *.stdout core *~ $(ALL)
|
||||
diff --git a/src/utils/Makefile.target.in b/src/utils/Makefile.target.in
|
||||
index bcdbe94e9..9c76b37af 100644
|
||||
--- a/src/utils/Makefile.target.in
|
||||
+++ b/src/utils/Makefile.target.in
|
||||
@@ -9,6 +9,7 @@ INCLUDE = -I. -I@includedir@ -I$(testlibdir)
|
||||
LIBDIR = @libdir@
|
||||
LIBRARY = @LIBRARY@
|
||||
SHLIB = @SHLIB@
|
||||
+SDE_ENABLED = @SDE_ENABLED@
|
||||
PAPILIB = ../@LINKLIB@
|
||||
TESTLIB = $(testlibdir)/libtestlib.a
|
||||
LDFLAGS = @LDFLAGS@ @LDL@ @STATIC@
|
||||
diff --git a/src/utils/papi_native_avail.c b/src/utils/papi_native_avail.c
|
||||
index ae6dbb9e5..902ed7996 100644
|
||||
--- a/src/utils/papi_native_avail.c
|
||||
+++ b/src/utils/papi_native_avail.c
|
||||
@@ -51,8 +51,9 @@
|
||||
|
||||
#include "papi.h"
|
||||
#include "print_header.h"
|
||||
+#ifdef SDE_ENABLED
|
||||
#include "components/sde/interface/papi_sde_interface.h"
|
||||
-
|
||||
+#endif
|
||||
#define EVT_LINE 80
|
||||
#define EVT_LINE_BUF_SIZE 4096
|
||||
|
||||
@@ -84,7 +85,9 @@ print_help( char **argv )
|
||||
printf( "\nGeneral command options:\n" );
|
||||
printf( "\t-h, --help print this help message\n" );
|
||||
printf( "\t-c, --check attempts to add each event\n");
|
||||
+#ifdef SDE_ENABLED
|
||||
printf( "\t-sde FILE lists SDEs that are registered by the library or executable in FILE\n" );
|
||||
+#endif
|
||||
printf( "\t-e EVENTNAME display detailed information about named native event\n" );
|
||||
printf( "\t-i EVENTSTR include only event names that contain EVENTSTR\n" );
|
||||
printf( "\t-x EVENTSTR exclude any event names that contain EVENTSTR\n" );
|
||||
@@ -368,6 +371,7 @@ parse_event_qualifiers( PAPI_event_info_t * info )
|
||||
return ( 1 );
|
||||
}
|
||||
|
||||
+#ifdef SDE_ENABLED
|
||||
void
|
||||
invoke_hook_fptr( char *lib_path )
|
||||
{
|
||||
@@ -394,6 +398,7 @@ invoke_hook_fptr( char *lib_path )
|
||||
dlclose(dl_handle);
|
||||
return;
|
||||
}
|
||||
+#endif
|
||||
|
||||
int
|
||||
main( int argc, char **argv )
|
||||
@@ -444,6 +449,7 @@ main( int argc, char **argv )
|
||||
return 2;
|
||||
}
|
||||
|
||||
+#ifdef SDE_ENABLED
|
||||
/*
|
||||
The following code will execute if the user wants to list the SDEs in the
|
||||
library (or executable) stored in flags.path. This code will not list the
|
||||
@@ -514,6 +520,7 @@ skip_lib:
|
||||
if( NULL != cmd ) free(cmd);
|
||||
}
|
||||
no_sdes:
|
||||
+#endif //SDE_ENABLED
|
||||
|
||||
/* Do this code if the event name option was specified on the commandline */
|
||||
if ( flags.named ) {
|
||||
commit b5111efaf1b234541c94b8ef7e5791bf8eb094b3
|
||||
Author: Frank Winkler <frankbook@franks-air.localdomain>
|
||||
Date: Thu May 7 09:00:53 2020 +0200
|
||||
|
||||
Added CFLAG -DSDE.
|
||||
|
||||
diff --git a/src/configure.in b/src/configure.in
|
||||
index 0eee98ea1..781148e5b 100644
|
||||
--- a/src/configure.in
|
||||
+++ b/src/configure.in
|
||||
@@ -1767,6 +1767,7 @@ tests="$tests comp_tests"
|
||||
# check for SDE component to determine if we need -lrt in LDFLAGS
|
||||
for comp in $components; do
|
||||
if test "x$comp" = "xsde" ; then
|
||||
+ CFLAGS="$CFLAGS -DSDE"
|
||||
LDFLAGS="$LDFLAGS $LRT"
|
||||
LIBS="$LIBS $LRT"
|
||||
SDE_ENABLED=1
|
||||
diff --git a/src/utils/papi_native_avail.c b/src/utils/papi_native_avail.c
|
||||
index 902ed7996..7d90c4064 100644
|
||||
--- a/src/utils/papi_native_avail.c
|
||||
+++ b/src/utils/papi_native_avail.c
|
||||
@@ -51,9 +51,10 @@
|
||||
|
||||
#include "papi.h"
|
||||
#include "print_header.h"
|
||||
-#ifdef SDE_ENABLED
|
||||
+#if SDE
|
||||
#include "components/sde/interface/papi_sde_interface.h"
|
||||
#endif
|
||||
+
|
||||
#define EVT_LINE 80
|
||||
#define EVT_LINE_BUF_SIZE 4096
|
||||
|
||||
@@ -85,7 +86,7 @@ print_help( char **argv )
|
||||
printf( "\nGeneral command options:\n" );
|
||||
printf( "\t-h, --help print this help message\n" );
|
||||
printf( "\t-c, --check attempts to add each event\n");
|
||||
-#ifdef SDE_ENABLED
|
||||
+#if SDE
|
||||
printf( "\t-sde FILE lists SDEs that are registered by the library or executable in FILE\n" );
|
||||
#endif
|
||||
printf( "\t-e EVENTNAME display detailed information about named native event\n" );
|
||||
@@ -371,7 +372,7 @@ parse_event_qualifiers( PAPI_event_info_t * info )
|
||||
return ( 1 );
|
||||
}
|
||||
|
||||
-#ifdef SDE_ENABLED
|
||||
+#if SDE
|
||||
void
|
||||
invoke_hook_fptr( char *lib_path )
|
||||
{
|
||||
@@ -449,7 +450,7 @@ main( int argc, char **argv )
|
||||
return 2;
|
||||
}
|
||||
|
||||
-#ifdef SDE_ENABLED
|
||||
+#if SDE
|
||||
/*
|
||||
The following code will execute if the user wants to list the SDEs in the
|
||||
library (or executable) stored in flags.path. This code will not list the
|
||||
@@ -520,7 +521,7 @@ skip_lib:
|
||||
if( NULL != cmd ) free(cmd);
|
||||
}
|
||||
no_sdes:
|
||||
-#endif //SDE_ENABLED
|
||||
+#endif //SDE
|
||||
|
||||
/* Do this code if the event name option was specified on the commandline */
|
||||
if ( flags.named ) {
|
34628
papi-no-iozone.patch
Normal file
34628
papi-no-iozone.patch
Normal file
File diff suppressed because it is too large
Load Diff
30
papi-nostatic.patch
Normal file
30
papi-nostatic.patch
Normal file
@ -0,0 +1,30 @@
|
||||
commit cc34c978778adb40df1a200059a31c8d628b10ee
|
||||
Author: William Cohen <wcohen@redhat.com>
|
||||
Date: Thu Jan 21 14:48:01 2021 -0500
|
||||
|
||||
Only check for libpfm.a if static libraries are being used.
|
||||
|
||||
Even when static libraries are not be used papi was checking for
|
||||
libpfm.a, this would cause a failure if libpfm.a wasn't installed.
|
||||
Exclude checking for libpfm.a if no static libpfm library is needed.
|
||||
|
||||
diff --git a/src/Rules.pfm4_pe b/src/Rules.pfm4_pe
|
||||
index 61eedc8a3..65a9635c6 100644
|
||||
--- a/src/Rules.pfm4_pe
|
||||
+++ b/src/Rules.pfm4_pe
|
||||
@@ -32,6 +32,7 @@ ifeq (yes,$(MIC))
|
||||
FORCE_PFM_ARCH="CONFIG_PFMLIB_ARCH_X86=y"
|
||||
endif
|
||||
|
||||
+ifneq (,$(STATIC))
|
||||
ifeq (,$(PFM_OBJS))
|
||||
$(PFM_LIB_PATH)/libpfm.a:
|
||||
ifneq (,${PFM_ROOT})
|
||||
@@ -49,6 +50,7 @@ else
|
||||
endif
|
||||
$(MAKE)
|
||||
endif
|
||||
+endif
|
||||
|
||||
include Makefile.inc
|
||||
|
10
papi-python3.patch
Normal file
10
papi-python3.patch
Normal file
@ -0,0 +1,10 @@
|
||||
diff --git a/src/high-level/scripts/papi_hl_output_writer.py b/src/high-level/scripts/papi_hl_output_writer.py
|
||||
index 123d2cd0..34bfbd73 100755
|
||||
--- a/src/high-level/scripts/papi_hl_output_writer.py
|
||||
+++ b/src/high-level/scripts/papi_hl_output_writer.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/python
|
||||
+#!/usr/bin/python3
|
||||
from __future__ import division
|
||||
from collections import OrderedDict
|
||||
|
@ -1,3 +1,5 @@
|
||||
# Default to no static libraries
|
||||
%{!?with_static: %global with_static 0}
|
||||
%bcond_with bundled_libpfm
|
||||
# rdma is not available
|
||||
%ifarch %{arm}
|
||||
@ -5,30 +7,21 @@
|
||||
%else
|
||||
%{!?with_rdma: %global with_rdma 1}
|
||||
%endif
|
||||
%{!?with_pcp: %global with_pcp 1}
|
||||
Summary: Performance Application Programming Interface
|
||||
Name: papi
|
||||
Version: 5.6.0
|
||||
Release: 20%{?dist}
|
||||
Version: 6.0.0
|
||||
Release: 8%{?dist}
|
||||
License: BSD
|
||||
Group: Development/System
|
||||
Requires: papi-libs = %{version}-%{release}
|
||||
URL: http://icl.cs.utk.edu/papi/
|
||||
Source0: http://icl.cs.utk.edu/projects/papi/downloads/%{name}-%{version}.tar.gz
|
||||
Patch1: papi-ldflags.patch
|
||||
Patch2: papi-divzero.patch
|
||||
Patch3: papi-rhbz1807346.patch
|
||||
Patch4: papi-thread_init.patch
|
||||
Patch5: papi-mx.patch
|
||||
Patch6: papi-bz1908126.patch
|
||||
Patch7: papi-rhbz1918721.patch
|
||||
Patch9: papi-rhbz2037426.patch
|
||||
Patch10: papi-rhbz2037427.patch
|
||||
Patch20: papi-fastread.patch
|
||||
Patch21: papi-arm64fastread.patch
|
||||
Patch30: papi-560_600eventupdate.patch
|
||||
Patch31: papi-701eventupdate.patch
|
||||
Patch40: papi-granularity.patch
|
||||
Patch41: papi-71eventupdate.patch
|
||||
Patch1: papi-python3.patch
|
||||
Patch2: papi-a64fx.patch
|
||||
Patch3: papi-no-iozone.patch
|
||||
Patch4: papi-config.patch
|
||||
Patch5: papi-nostatic.patch
|
||||
BuildRequires: make
|
||||
BuildRequires: autoconf
|
||||
BuildRequires: doxygen
|
||||
BuildRequires: ncurses-devel
|
||||
@ -37,8 +30,10 @@ BuildRequires: kernel-headers >= 2.6.32
|
||||
BuildRequires: chrpath
|
||||
BuildRequires: lm_sensors-devel
|
||||
%if %{without bundled_libpfm}
|
||||
BuildRequires: libpfm-devel >= 4.13.0-1
|
||||
BuildRequires: libpfm-static >= 4.13.0-1
|
||||
BuildRequires: libpfm-devel >= 4.6.0-1
|
||||
%if %{with_static}
|
||||
BuildRequires: libpfm-static >= 4.6.0-1
|
||||
%endif
|
||||
%endif
|
||||
# Following required for net component
|
||||
BuildRequires: net-tools
|
||||
@ -47,6 +42,9 @@ BuildRequires: net-tools
|
||||
BuildRequires: rdma-core-devel
|
||||
BuildRequires: infiniband-diags-devel
|
||||
%endif
|
||||
%if %{with_pcp}
|
||||
BuildRequires: pcp-libs-devel
|
||||
%endif
|
||||
BuildRequires: perl-generators
|
||||
#Right now libpfm does not know anything about s390 and will fail
|
||||
ExcludeArch: s390 s390x
|
||||
@ -57,14 +55,12 @@ running programs.
|
||||
|
||||
%package libs
|
||||
Summary: Libraries for PAPI clients
|
||||
Group: Development/System
|
||||
%description libs
|
||||
This package contains the run-time libraries for any application that wishes
|
||||
to use PAPI.
|
||||
|
||||
%package devel
|
||||
Summary: Header files for the compiling programs with PAPI
|
||||
Group: Development/System
|
||||
Requires: papi = %{version}-%{release}
|
||||
Requires: papi-libs = %{version}-%{release}
|
||||
Requires: pkgconfig
|
||||
@ -75,68 +71,72 @@ that uses PAPI.
|
||||
|
||||
%package testsuite
|
||||
Summary: Set of tests for checking PAPI functionality
|
||||
Group: Development/System
|
||||
Requires: papi = %{version}-%{release}
|
||||
Requires: papi-libs = %{version}-%{release}
|
||||
%description testsuite
|
||||
PAPI-testsuite includes compiled versions of papi tests to ensure
|
||||
that PAPI functions on particular hardware.
|
||||
|
||||
%if %{with_static}
|
||||
%package static
|
||||
Summary: Static libraries for the compiling programs with PAPI
|
||||
Group: Development/System
|
||||
Requires: papi = %{version}-%{release}
|
||||
%description static
|
||||
PAPI-static includes the static versions of the library files for
|
||||
the PAPI user-space libraries and interfaces.
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%setup -q
|
||||
%patch1 -p1 -b .ldflags
|
||||
%patch2 -p1 -b .divzero
|
||||
%patch3 -p1 -b .rhbz1807346
|
||||
%patch4 -p1 -b .thread_init
|
||||
%patch1 -p1 -b .python3
|
||||
%patch2 -p1 -b .a64fx
|
||||
%patch3 -p1
|
||||
%patch4 -p1
|
||||
%patch5 -p1
|
||||
%patch6 -p1
|
||||
%patch7 -p1
|
||||
%patch9 -p1
|
||||
%patch10 -p1
|
||||
%patch20 -p1
|
||||
%patch21 -p1
|
||||
%patch30 -p1
|
||||
%patch31 -p1
|
||||
%patch40 -p1
|
||||
%patch41 -p1
|
||||
|
||||
%build
|
||||
# This package fails to build with LTO due to undefined symbols. LTO
|
||||
# was disabled in OpenSuSE as well, but with no real explanation why
|
||||
# beyond the undefined symbols. It really shold be investigated further.
|
||||
# Disable LTO
|
||||
%define _lto_cflags %{nil}
|
||||
|
||||
%if %{without bundled_libpfm}
|
||||
# Build our own copy of libpfm.
|
||||
%global libpfm_config --with-pfm-incdir=%{_includedir} --with-pfm-libdir=%{_libdir}
|
||||
%endif
|
||||
|
||||
%if %{with_static}
|
||||
%global static_lib_config --with-static-lib=yes
|
||||
%else
|
||||
%global static_lib_config --with-static-lib=no
|
||||
%endif
|
||||
|
||||
# set up environment variable for the various components
|
||||
# cuda
|
||||
# host_micpower
|
||||
%if %{with_rdma}
|
||||
export PAPI_INFINIBAND_UMAD_ROOT=/usr
|
||||
%endif
|
||||
# lmsensors
|
||||
export PAPI_LMSENSORS_ROOT=/usr
|
||||
#pushd vmware; ./configure; popd
|
||||
%if %{with_pcp}
|
||||
%global pcp_enable pcp
|
||||
export PAPI_PCP_ROOT=/usr
|
||||
%endif
|
||||
|
||||
cd src
|
||||
autoconf
|
||||
%configure --with-perf-events \
|
||||
%{?libpfm_config} \
|
||||
--with-static-lib=yes --with-shared-lib=yes --with-shlib --with-shlib-tools \
|
||||
--with-components="appio coretemp example infiniband lmsensors lustre micpower mx net rapl stealtime"
|
||||
%{?static_lib_config} \
|
||||
--with-shared-lib=yes --with-shlib --with-shlib-tools \
|
||||
--with-components="appio coretemp example infiniband lmsensors lustre micpower mx net %{?pcp_enable} rapl stealtime"
|
||||
# implicit enabled components: perf_event perf_event_uncore
|
||||
#components currently left out because of build configure/build issues
|
||||
# --with-components="bgpm coretemp_freebsd cuda host_micpower nvml vmware"
|
||||
|
||||
pushd components
|
||||
#pushd cuda; ./configure; popd
|
||||
#pushd host_micpower; ./configure; popd
|
||||
%if %{with_rdma}
|
||||
pushd infiniband_umad; %configure; popd
|
||||
%endif
|
||||
pushd lmsensors; \
|
||||
%configure --with-sensors_incdir=/usr/include/sensors \
|
||||
--with-sensors_libdir=%{_libdir}; \
|
||||
popd
|
||||
#pushd vmware; ./configure; popd
|
||||
popd
|
||||
|
||||
#DBG workaround to make sure libpfm just uses the normal CFLAGS
|
||||
DBG="" make %{?_smp_mflags}
|
||||
|
||||
@ -158,15 +158,14 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so*
|
||||
%{_bindir}/*
|
||||
%dir /usr/share/papi
|
||||
/usr/share/papi/papi_events.csv
|
||||
%doc INSTALL.txt README LICENSE.txt RELEASENOTES.txt
|
||||
%doc INSTALL.txt README.md LICENSE.txt RELEASENOTES.txt
|
||||
%doc %{_mandir}/man1/*
|
||||
|
||||
%post libs -p /sbin/ldconfig
|
||||
%postun libs -p /sbin/ldconfig
|
||||
%ldconfig_scriptlets libs
|
||||
|
||||
%files libs
|
||||
%{_libdir}/*.so.*
|
||||
%doc INSTALL.txt README LICENSE.txt RELEASENOTES.txt
|
||||
%doc INSTALL.txt README.md LICENSE.txt RELEASENOTES.txt
|
||||
|
||||
%files devel
|
||||
%{_includedir}/*.h
|
||||
@ -185,50 +184,61 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so*
|
||||
/usr/share/papi/components
|
||||
/usr/share/papi/testlib
|
||||
|
||||
%if %{with_static}
|
||||
%files static
|
||||
%{_libdir}/*.a
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Fri Nov 17 2023 William Cohen <wcohen@redhat.com> - 5.6.0-20
|
||||
- Update papi event presets (RHEL-9320, RHEL-9336, RHEL-9337)
|
||||
* Fri Apr 16 2021 Mohan Boddu <mboddu@redhat.com> - 6.0.0-8
|
||||
- Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937
|
||||
|
||||
* Wed Jul 19 2023 William Cohen <wcohen@redhat.com> - 5.6.0-19
|
||||
- Fix granularity setting (rhbz2221846)
|
||||
* Thu Jan 28 2021 William Cohen <wcohen@redhat.com> - 6.0.0-7
|
||||
- By default disable genaration of static libraries.
|
||||
|
||||
* Fri May 5 2023 William Cohen <wcohen@redhat.com> - 5.6.0-18
|
||||
- Add event presets for Arm Neoverse processors (rhbz2111982, rhbz2111988)
|
||||
* Tue Jan 26 2021 Fedora Release Engineering <releng@fedoraproject.org> - 6.0.0-6
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild
|
||||
|
||||
* Thu Apr 27 2023 William Cohen <wcohen@redhat.com> - 5.6.0-17
|
||||
- Improve aarch64 read speed. (rhbz2161146)
|
||||
* Thu Dec 17 2020 William Cohen <wcohen@redhat.com> - 6.0.0-5
|
||||
- Remove iozone source code. (#1901077)
|
||||
|
||||
* Fri May 6 2022 William Cohen <wcohen@redhat.com> - 5.6.0-16
|
||||
- AMD Zen2/3 support (rhbz2071558)
|
||||
* Mon Nov 09 2020 William Cohen <wcohen@redhat.com> - 6.0.0-4
|
||||
- Add Fujitsu A64FX presets.
|
||||
|
||||
* Tue Apr 19 2022 William Cohen <wcohen@redhat.com> - 5.6.0-15
|
||||
- Fujitsu A64FX improvements. (rhbz2037417,rhbz2037426,rhbz2037427)
|
||||
* Tue Jul 28 2020 Fedora Release Engineering <releng@fedoraproject.org> - 6.0.0-3
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild
|
||||
|
||||
* Tue May 25 2021 William Cohen <wcohen@redhat.com> - 5.6.0-14
|
||||
- Disable problematic IBM Power9 events.
|
||||
* Wed Jul 01 2020 Jeff Law <law@redhat.com> - 6.0.0-2
|
||||
- Disable LTO
|
||||
|
||||
* Tue May 25 2021 William Cohen <wcohen@redhat.com> - 5.6.0-13
|
||||
- Add Fujitsu A64FX support.
|
||||
* Wed Mar 04 2020 William Cohen <wcohen@redhat.com> - 6.0.0-1
|
||||
- Rebase to official papi-6.0.0.
|
||||
|
||||
* Tue May 18 2021 William Cohen <wcohen@redhat.com> - 5.6.0-12
|
||||
- Improvements to mx component.
|
||||
* Wed Jan 29 2020 Fedora Release Engineering <releng@fedoraproject.org> - 5.7.0-4
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_32_Mass_Rebuild
|
||||
|
||||
* Fri Aug 21 2020 William Cohen <wcohen@redhat.com> - 5.6.0-11
|
||||
- Correct the handling of multiple threads. (rhbz1807346)
|
||||
* Fri Jul 26 2019 Fedora Release Engineering <releng@fedoraproject.org> - 5.7.0-3
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_31_Mass_Rebuild
|
||||
|
||||
* Wed May 27 2020 William Cohen <wcohen@redhat.com> - 5.6.0-10
|
||||
- Rebuild with current libpfm-4.10.1.
|
||||
* Mon Mar 04 2019 William Cohen <wcohen@redhat.com> - 5.7.0-2
|
||||
- Rebase to official papi-5.7.0.
|
||||
|
||||
* Tue May 26 2020 William Cohen <wcohen@redhat.com> - 5.6.0-9
|
||||
* Mon Feb 18 2019 William Cohen <wcohen@redhat.com> - 5.7.0-1
|
||||
- Rebase to papi-5.7.0.
|
||||
|
||||
* Fri Feb 01 2019 Fedora Release Engineering <releng@fedoraproject.org> - 5.6.0-10
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_30_Mass_Rebuild
|
||||
|
||||
* Mon Jan 7 2019 William Cohen <wcohen@redhat.com> - 5.6.0-9
|
||||
- Correct typo in papi-testsuite description.
|
||||
- Add papi-libs for papi-testsuite and papi-devel.
|
||||
|
||||
* Fri Nov 2 2018 Fedora Release Engineering <releng@fedoraproject.org> - 5.6.0-8
|
||||
- Pull in patch to avoid division-by-0.
|
||||
|
||||
* Fri Jul 13 2018 Fedora Release Engineering <releng@fedoraproject.org> - 5.6.0-7
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_29_Mass_Rebuild
|
||||
|
||||
* Thu May 17 2018 William Cohen <wcohen@redhat.com> - 5.6.0-6
|
||||
- Dynamically link utilities and tests to papi libraries.
|
||||
|
Loading…
Reference in New Issue
Block a user