322 lines
19 KiB
Diff
322 lines
19 KiB
Diff
|
commit b969d25f2a87a53365e3e9a040533b093544a05d
|
||
|
Author: John Linford <jlinford@nvidia.com>
|
||
|
Date: Mon Apr 3 22:30:14 2023 +0000
|
||
|
|
||
|
Update Neoverse V2 events
|
||
|
|
||
|
Add/remove PAPI events to match available hardware counters
|
||
|
All tests pass on NVIDIA Grace
|
||
|
|
||
|
Disclaimer:
|
||
|
The PAPI team was not able to verify the functionality included in this
|
||
|
commit.
|
||
|
|
||
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||
|
index 549e337c..3089d2d4 100644
|
||
|
--- a/src/papi_events.csv
|
||
|
+++ b/src/papi_events.csv
|
||
|
@@ -2170,34 +2170,113 @@ PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||
|
CPU,arm_v2
|
||
|
#
|
||
|
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||
|
+PRESET,PAPI_INT_INS,NOT_DERIVED,DP_SPEC
|
||
|
+#NOT_IMPLEMENTED,PAPI_TOT_IIS,Instructions issued
|
||
|
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||
|
-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||
|
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CNT_CYCLES
|
||
|
+PRESET,PAPI_STL_CCY,NOT_DERIVED,STALL
|
||
|
+#NOT_IMPLEMENTED,PAPI_FUL_CCY,Cycles with maximum instructions completed
|
||
|
+#NOT_IMPLEMENTED,PAPI_FUL_ICY,Cycles with maximum instruction issue
|
||
|
+#NOT_IMPLEMENTED,PAPI_FXU_IDL,Cycles integer units are idle
|
||
|
+#NOT_IMPLEMENTED,PAPI_LSU_IDL,Cycles load/store units are idle
|
||
|
+#NOT_IMPLEMENTED,PAPI_MEM_RCY,Cycles Stalled Waiting for memory Reads
|
||
|
+#NOT_IMPLEMENTED,PAPI_MEM_SCY,Cycles Stalled Waiting for memory accesses
|
||
|
+#NOT_IMPLEMENTED,PAPI_MEM_WCY,Cycles Stalled Waiting for memory writes
|
||
|
+#NOT_IMPLEMENTED,PAPI_FP_STAL,Cycles the FP unit(s) are stalled
|
||
|
+#NOT_IMPLEMENTED,PAPI_FPU_IDL,Cycles floating point units are idle
|
||
|
+#NOT_IMPLEMENTED,PAPI_BRU_IDL,Cycles branch units are idle
|
||
|
+PRESET,PAPI_STL_ICY,NOT_DERIVED,STALL
|
||
|
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||
|
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
|
||
|
+#NOT_IMPLEMENTED,PAPI_SP_OPS,Floating point operations; optimized to count scaled single precision vector operations
|
||
|
+#NOT_IMPLEMENTED,PAPI_DP_OPS,Floating point operations; optimized to count scaled double precision vector operations
|
||
|
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_HP_SPEC,FP_SP_SPEC,FP_DP_SPEC
|
||
|
+#NOT_IMPLEMENTED,PAPI_FAD_INS,Floating point add instructions
|
||
|
+#NOT_IMPLEMENTED,PAPI_FDV_INS,Floating point divide instructions
|
||
|
+#NOT_IMPLEMENTED,PAPI_FMA_INS,FMA instructions completed
|
||
|
+#NOT_IMPLEMENTED,PAPI_FML_INS,Floating point multiply instructions
|
||
|
+#NOT_IMPLEMENTED,PAPI_FNV_INS,Floating point inverse instructions
|
||
|
+#NOT_IMPLEMENTED,PAPI_FSQ_INS,Floating point square root instructions
|
||
|
PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||
|
+#NOT_IMPLEMENTED,PAPI_VEC_DP,Double precision vector/SIMD instructions
|
||
|
+#NOT_IMPLEMENTED,PAPI_VEC_SP,Single precision vector/SIMD instructions
|
||
|
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||
|
-PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||
|
-PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||
|
-PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||
|
+#NOT_IMPLEMENTED,PAPI_BR_CN,Conditional branch instructions
|
||
|
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_RETIRED,BR_MIS_PRED_RETIRED
|
||
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED_RETIRED
|
||
|
+#NOT_IMPLEMENTED,PAPI_BR_NTK,Conditional branch instructions not taken
|
||
|
+#NOT_IMPLEMENTED,PAPI_BR_TKN,Conditional branch instructions taken
|
||
|
+#NOT_IMPLEMENTED,PAPI_BR_UCN,Unconditional branch instructions
|
||
|
+#NOT_IMPLEMENTED,PAPI_BTAC_M,Branch target address cache misses
|
||
|
PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||
|
PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||
|
PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||
|
PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||
|
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||
|
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||
|
PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||
|
PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||
|
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||
|
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||
|
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||
|
-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||
|
-PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||
|
+#NOT_IMPLEMENTED,PAPI_L1_ICR,Level 1 instruction cache reads
|
||
|
+#NOT_IMPLEMENTED,PAPI_L1_ICW,Level 1 instruction cache writes
|
||
|
+#NOT_IMPLEMENTED,PAPI_L1_LDM,Level 1 load misses
|
||
|
+#NOT_IMPLEMENTED,PAPI_L1_STM,Level 1 store misses
|
||
|
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE_ACCESS
|
||
|
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||
|
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||
|
+#NOT_IMPLEMENTED,PAPI_L1_TCR,Level 1 total cache reads
|
||
|
+#NOT_IMPLEMENTED,PAPI_L1_TCW,Level 1 total cache writes
|
||
|
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
|
||
|
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||
|
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||
|
PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||
|
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||
|
+PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
|
||
|
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||
|
-PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||
|
-PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||
|
+PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_WR
|
||
|
+#NOT_IMPLEMENTED,PAPI_L2_ICA,Level 2 instruction cache accesses
|
||
|
+#NOT_IMPLEMENTED,PAPI_L2_ICH,Level 2 instruction cache hits
|
||
|
+#NOT_IMPLEMENTED,PAPI_L2_ICM,Level 2 instruction cache misses
|
||
|
+#NOT_IMPLEMENTED,PAPI_L2_ICR,Level 2 instruction cache reads
|
||
|
+#NOT_IMPLEMENTED,PAPI_L2_ICW,Level 2 instruction cache writes
|
||
|
+PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
|
||
|
+PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||
|
+PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD
|
||
|
+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR
|
||
|
+PRESET,PAPI_L3_TCA,NOT_DERIVED,L3D_CACHE
|
||
|
+PRESET,PAPI_L3_DCA,NOT_DERIVED,L3D_CACHE
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_DCH,Level 3 data cache hits
|
||
|
+PRESET,PAPI_L3_DCM,NOT_DERIVED,L3D_CACHE_REFILL
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_DCR,Level 3 data cache reads
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_DCW,Level 3 data cache writes
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_ICA,Level 3 instruction cache accesses
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_ICH,Level 3 instruction cache hits
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_ICM,Level 3 instruction cache misses
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_ICR,Level 3 instruction cache reads
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_ICW,Level 3 instruction cache writes
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_LDM,Level 3 load misses
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_STM,Level 3 store misses
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_TCH,Level 3 total cache hits
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_TCM,Level 3 cache misses
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_TCR,Level 3 total cache reads
|
||
|
+#NOT_IMPLEMENTED,PAPI_L3_TCW,Level 3 total cache writes
|
||
|
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||
|
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||
|
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||
|
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL
|
||
|
+#NOT_IMPLEMENTED,PAPI_TLB_SD,Translation lookaside buffer shootdowns
|
||
|
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L1D_TLB_REFILL,L2D_TLB_REFILL
|
||
|
+#NOT_IMPLEMENTED,PAPI_CA_CLN,Requests for exclusive access to clean cache line
|
||
|
+#NOT_IMPLEMENTED,PAPI_CA_INV,Requests for cache line invalidation
|
||
|
+#NOT_IMPLEMENTED,PAPI_CA_ITV,Requests for cache line intervention
|
||
|
+#NOT_IMPLEMENTED,PAPI_CA_SHR,Requests for exclusive access to shared cache line
|
||
|
+#NOT_IMPLEMENTED,PAPI_CA_SNP,Requests for a snoop
|
||
|
+#NOT_IMPLEMENTED,PAPI_CSR_FAL,Failed store conditional instructions
|
||
|
+#NOT_IMPLEMENTED,PAPI_CSR_SUC,Successful store conditional instructions
|
||
|
+#NOT_IMPLEMENTED,PAPI_CSR_TOT,Total store conditional instructions
|
||
|
+#NOT_IMPLEMENTED,PAPI_PRF_DM,Data prefetch cache misses
|
||
|
|
||
|
#
|
||
|
CPU,mips_74k
|
||
|
|
||
|
commit 15f32cb3a2e6bdd9e51aa4043842f0130e9dcf24
|
||
|
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||
|
Date: Wed Jun 7 14:38:39 2023 +0000
|
||
|
|
||
|
add branch presets for Zen3 and Zen4
|
||
|
|
||
|
These changes include all branching preset events for Zen3 and Zen4,
|
||
|
validated using the Counter Analysis Toolkit.
|
||
|
|
||
|
For Zen3, PAPI_BR_TKN was modified to exclude unconditional branches
|
||
|
taken, in order to adhere to the preset's meaning.
|
||
|
|
||
|
These changes have been tested on the AMD Zen3 and Zen4 architectures.
|
||
|
|
||
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||
|
index 3089d2d4..319cf82c 100644
|
||
|
--- a/src/papi_events.csv
|
||
|
+++ b/src/papi_events.csv
|
||
|
@@ -488,8 +488,12 @@ CPU,amd64_fam19h_zen3
|
||
|
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||
|
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||
|
PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||
|
-PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||
|
+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||
|
+PRESET,PAPI_BR_UCN,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||
|
+PRESET,PAPI_BR_TKN,DERIVED_POSTFIX,N0|N1|-|N2|+|,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||
|
+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||
|
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||
|
+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||
|
PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||
|
PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
|
||
|
PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
|
||
|
@@ -509,6 +513,16 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
|
||
|
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
|
||
|
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||
|
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||
|
+#
|
||
|
+#
|
||
|
+CPU,amd64_fam19h_zen4
|
||
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||
|
+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||
|
+PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS
|
||
|
+PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS
|
||
|
+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||
|
+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||
|
|
||
|
|
||
|
CPU,Intel architectural PMU
|
||
|
|
||
|
commit da93ed4dd1fadb70ccee62a976597ff431c9f58c
|
||
|
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||
|
Date: Mon Jun 12 17:27:59 2023 +0000
|
||
|
|
||
|
add flops presets for Zen4
|
||
|
|
||
|
These changes include FLOPs presets for Zen4, validated using the
|
||
|
Counter Analysis Toolkit.
|
||
|
|
||
|
These changes have been tested on the AMD Zen4 architecture.
|
||
|
|
||
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||
|
index 319cf82c..f6a40a35 100644
|
||
|
--- a/src/papi_events.csv
|
||
|
+++ b/src/papi_events.csv
|
||
|
@@ -523,6 +523,14 @@ PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDI
|
||
|
PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||
|
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||
|
PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||
|
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||
|
+PRESET,PAPI_FP_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL,RETIRED_FP_OPS_BY_TYPE:SCALAR_ALL
|
||
|
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL
|
||
|
+PRESET,PAPI_FMA_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MAC,RETIRED_FP_OPS_BY_TYPE:SCALAR_MAC
|
||
|
+PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS_BY_TYPE:SCALAR_MUL
|
||
|
+PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD
|
||
|
+PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV
|
||
|
+PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT
|
||
|
|
||
|
|
||
|
CPU,Intel architectural PMU
|
||
|
|
||
|
commit a31c3a4e9788e03fee113263a9f94bd638a66721
|
||
|
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||
|
Date: Wed Jun 21 15:13:47 2023 +0000
|
||
|
|
||
|
add cycles and instructions presets for Zen4
|
||
|
|
||
|
These changes include the 'total cycles' and 'instructions completed'
|
||
|
presets for Zen4, validated using the Counter Analysis Toolkit.
|
||
|
|
||
|
These changes have been tested on the AMD Zen4 architecture.
|
||
|
|
||
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||
|
index f6a40a35..86e11fe6 100644
|
||
|
--- a/src/papi_events.csv
|
||
|
+++ b/src/papi_events.csv
|
||
|
@@ -531,6 +531,8 @@ PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS
|
||
|
PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD
|
||
|
PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV
|
||
|
PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT
|
||
|
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||
|
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||
|
|
||
|
|
||
|
CPU,Intel architectural PMU
|
||
|
|
||
|
commit 94303410ce97a84408b0b2d727701a60c6f137aa
|
||
|
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||
|
Date: Sun Jul 23 15:38:36 2023 +0000
|
||
|
|
||
|
add various Sapphire Rapids presets
|
||
|
|
||
|
These changes include cycles, instructions, branching, and FLOPs presets
|
||
|
for Intel Sapphire Rapids, validated using the Counter Analysis Toolkit.
|
||
|
|
||
|
These changes have been tested on the Intel Sapphire Rapids architecture.
|
||
|
|
||
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||
|
index 86e11fe6..eac0855f 100644
|
||
|
--- a/src/papi_events.csv
|
||
|
+++ b/src/papi_events.csv
|
||
|
@@ -1010,6 +1010,29 @@ PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_
|
||
|
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||
|
# End of icx list
|
||
|
|
||
|
+# Intel Sapphire Rapids events
|
||
|
+CPU,spr
|
||
|
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
|
||
|
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
||
|
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||
|
+# FLOPs
|
||
|
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||
|
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE
|
||
|
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||
|
+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||
|
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||
|
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE
|
||
|
+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||
|
+# Branches
|
||
|
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
|
||
|
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
|
||
|
+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN
|
||
|
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
|
||
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
|
||
|
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
|
||
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||
|
+# End of spr list
|
||
|
+
|
||
|
#
|
||
|
# Intel MIC / Xeon-Phi / Knights Landing
|
||
|
# Intel Knights Mill
|
||
|
|
||
|
commit 42b14987ca1a7028b6cf6fdc190a2fa6a0fd8e18
|
||
|
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||
|
Date: Tue Jul 25 12:16:56 2023 +0000
|
||
|
|
||
|
add more Ice Lake FLOPs presets
|
||
|
|
||
|
Since there are enough counters available to monitor both single- and
|
||
|
double-precision floating-point events, PAPI_FP_OPS, PAPI_FP_INS, and
|
||
|
PAPI_VEC_INS are all defined.
|
||
|
These presets have been validated using the Counter Analysis Toolkit.
|
||
|
|
||
|
These changes have been tested on the Intel Ice Lake architecture.
|
||
|
|
||
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||
|
index eac0855f..df82ac1c 100644
|
||
|
--- a/src/papi_events.csv
|
||
|
+++ b/src/papi_events.csv
|
||
|
@@ -1006,8 +1006,11 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||
|
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||
|
# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
|
||
|
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||
|
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||
|
+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||
|
PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||
|
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||
|
+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||
|
# End of icx list
|
||
|
|
||
|
# Intel Sapphire Rapids events
|