commit b969d25f2a87a53365e3e9a040533b093544a05d Author: John Linford Date: Mon Apr 3 22:30:14 2023 +0000 Update Neoverse V2 events Add/remove PAPI events to match available hardware counters All tests pass on NVIDIA Grace Disclaimer: The PAPI team was not able to verify the functionality included in this commit. diff --git a/src/papi_events.csv b/src/papi_events.csv index 549e337c..3089d2d4 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -2170,34 +2170,113 @@ PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL CPU,arm_v2 # PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED +PRESET,PAPI_INT_INS,NOT_DERIVED,DP_SPEC +#NOT_IMPLEMENTED,PAPI_TOT_IIS,Instructions issued PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES -PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC +PRESET,PAPI_REF_CYC,NOT_DERIVED,CNT_CYCLES +PRESET,PAPI_STL_CCY,NOT_DERIVED,STALL +#NOT_IMPLEMENTED,PAPI_FUL_CCY,Cycles with maximum instructions completed +#NOT_IMPLEMENTED,PAPI_FUL_ICY,Cycles with maximum instruction issue +#NOT_IMPLEMENTED,PAPI_FXU_IDL,Cycles integer units are idle +#NOT_IMPLEMENTED,PAPI_LSU_IDL,Cycles load/store units are idle +#NOT_IMPLEMENTED,PAPI_MEM_RCY,Cycles Stalled Waiting for memory Reads +#NOT_IMPLEMENTED,PAPI_MEM_SCY,Cycles Stalled Waiting for memory accesses +#NOT_IMPLEMENTED,PAPI_MEM_WCY,Cycles Stalled Waiting for memory writes +#NOT_IMPLEMENTED,PAPI_FP_STAL,Cycles the FP unit(s) are stalled +#NOT_IMPLEMENTED,PAPI_FPU_IDL,Cycles floating point units are idle +#NOT_IMPLEMENTED,PAPI_BRU_IDL,Cycles branch units are idle +PRESET,PAPI_STL_ICY,NOT_DERIVED,STALL +PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND +PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC +#NOT_IMPLEMENTED,PAPI_SP_OPS,Floating point operations; optimized to count scaled single precision vector operations +#NOT_IMPLEMENTED,PAPI_DP_OPS,Floating point operations; optimized to count scaled double precision vector operations +PRESET,PAPI_FP_INS,DERIVED_ADD,FP_HP_SPEC,FP_SP_SPEC,FP_DP_SPEC +#NOT_IMPLEMENTED,PAPI_FAD_INS,Floating point add instructions +#NOT_IMPLEMENTED,PAPI_FDV_INS,Floating point divide instructions +#NOT_IMPLEMENTED,PAPI_FMA_INS,FMA instructions completed +#NOT_IMPLEMENTED,PAPI_FML_INS,Floating point multiply instructions +#NOT_IMPLEMENTED,PAPI_FNV_INS,Floating point inverse instructions +#NOT_IMPLEMENTED,PAPI_FSQ_INS,Floating point square root instructions PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC +#NOT_IMPLEMENTED,PAPI_VEC_DP,Double precision vector/SIMD instructions +#NOT_IMPLEMENTED,PAPI_VEC_SP,Single precision vector/SIMD instructions PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED -PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED -PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED -PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED +#NOT_IMPLEMENTED,PAPI_BR_CN,Conditional branch instructions +PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_RETIRED,BR_MIS_PRED_RETIRED +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED_RETIRED +#NOT_IMPLEMENTED,PAPI_BR_NTK,Conditional branch instructions not taken +#NOT_IMPLEMENTED,PAPI_BR_TKN,Conditional branch instructions taken +#NOT_IMPLEMENTED,PAPI_BR_UCN,Unconditional branch instructions +#NOT_IMPLEMENTED,PAPI_BTAC_M,Branch target address cache misses PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE +PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL -PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS -PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR +#NOT_IMPLEMENTED,PAPI_L1_ICR,Level 1 instruction cache reads +#NOT_IMPLEMENTED,PAPI_L1_ICW,Level 1 instruction cache writes +#NOT_IMPLEMENTED,PAPI_L1_LDM,Level 1 load misses +#NOT_IMPLEMENTED,PAPI_L1_STM,Level 1 store misses +PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE_ACCESS +PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE_ACCESS,L1I_CACHE_REFILL +PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL +#NOT_IMPLEMENTED,PAPI_L1_TCR,Level 1 total cache reads +#NOT_IMPLEMENTED,PAPI_L1_TCW,Level 1 total cache writes +PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE +PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR +PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD -PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND -PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND +PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_WR +#NOT_IMPLEMENTED,PAPI_L2_ICA,Level 2 instruction cache accesses +#NOT_IMPLEMENTED,PAPI_L2_ICH,Level 2 instruction cache hits +#NOT_IMPLEMENTED,PAPI_L2_ICM,Level 2 instruction cache misses +#NOT_IMPLEMENTED,PAPI_L2_ICR,Level 2 instruction cache reads +#NOT_IMPLEMENTED,PAPI_L2_ICW,Level 2 instruction cache writes +PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL +PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL +PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD +PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR +PRESET,PAPI_L3_TCA,NOT_DERIVED,L3D_CACHE +PRESET,PAPI_L3_DCA,NOT_DERIVED,L3D_CACHE +#NOT_IMPLEMENTED,PAPI_L3_DCH,Level 3 data cache hits +PRESET,PAPI_L3_DCM,NOT_DERIVED,L3D_CACHE_REFILL +#NOT_IMPLEMENTED,PAPI_L3_DCR,Level 3 data cache reads +#NOT_IMPLEMENTED,PAPI_L3_DCW,Level 3 data cache writes +#NOT_IMPLEMENTED,PAPI_L3_ICA,Level 3 instruction cache accesses +#NOT_IMPLEMENTED,PAPI_L3_ICH,Level 3 instruction cache hits +#NOT_IMPLEMENTED,PAPI_L3_ICM,Level 3 instruction cache misses +#NOT_IMPLEMENTED,PAPI_L3_ICR,Level 3 instruction cache reads +#NOT_IMPLEMENTED,PAPI_L3_ICW,Level 3 instruction cache writes +#NOT_IMPLEMENTED,PAPI_L3_LDM,Level 3 load misses +#NOT_IMPLEMENTED,PAPI_L3_STM,Level 3 store misses +#NOT_IMPLEMENTED,PAPI_L3_TCH,Level 3 total cache hits +#NOT_IMPLEMENTED,PAPI_L3_TCM,Level 3 cache misses +#NOT_IMPLEMENTED,PAPI_L3_TCR,Level 3 total cache reads +#NOT_IMPLEMENTED,PAPI_L3_TCW,Level 3 total cache writes PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL +PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL +#NOT_IMPLEMENTED,PAPI_TLB_SD,Translation lookaside buffer shootdowns +PRESET,PAPI_TLB_TL,DERIVED_ADD,L1D_TLB_REFILL,L2D_TLB_REFILL +#NOT_IMPLEMENTED,PAPI_CA_CLN,Requests for exclusive access to clean cache line +#NOT_IMPLEMENTED,PAPI_CA_INV,Requests for cache line invalidation +#NOT_IMPLEMENTED,PAPI_CA_ITV,Requests for cache line intervention +#NOT_IMPLEMENTED,PAPI_CA_SHR,Requests for exclusive access to shared cache line +#NOT_IMPLEMENTED,PAPI_CA_SNP,Requests for a snoop +#NOT_IMPLEMENTED,PAPI_CSR_FAL,Failed store conditional instructions +#NOT_IMPLEMENTED,PAPI_CSR_SUC,Successful store conditional instructions +#NOT_IMPLEMENTED,PAPI_CSR_TOT,Total store conditional instructions +#NOT_IMPLEMENTED,PAPI_PRF_DM,Data prefetch cache misses # CPU,mips_74k commit 15f32cb3a2e6bdd9e51aa4043842f0130e9dcf24 Author: Daniel Barry Date: Wed Jun 7 14:38:39 2023 +0000 add branch presets for Zen3 and Zen4 These changes include all branching preset events for Zen3 and Zen4, validated using the Counter Analysis Toolkit. For Zen3, PAPI_BR_TKN was modified to exclude unconditional branches taken, in order to adhere to the preset's meaning. These changes have been tested on the AMD Zen3 and Zen4 architectures. diff --git a/src/papi_events.csv b/src/papi_events.csv index 3089d2d4..319cf82c 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -488,8 +488,12 @@ CPU,amd64_fam19h_zen3 PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS -PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_UCN,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_TKN,DERIVED_POSTFIX,N0|N1|-|N2|+|,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED +PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH @@ -509,6 +513,16 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS +# +# +CPU,amd64_fam19h_zen4 +PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED +PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED CPU,Intel architectural PMU commit da93ed4dd1fadb70ccee62a976597ff431c9f58c Author: Daniel Barry Date: Mon Jun 12 17:27:59 2023 +0000 add flops presets for Zen4 These changes include FLOPs presets for Zen4, validated using the Counter Analysis Toolkit. These changes have been tested on the AMD Zen4 architecture. diff --git a/src/papi_events.csv b/src/papi_events.csv index 319cf82c..f6a40a35 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -523,6 +523,14 @@ PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDI PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED +PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +PRESET,PAPI_FP_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL,RETIRED_FP_OPS_BY_TYPE:SCALAR_ALL +PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL +PRESET,PAPI_FMA_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MAC,RETIRED_FP_OPS_BY_TYPE:SCALAR_MAC +PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS_BY_TYPE:SCALAR_MUL +PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD +PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV +PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT CPU,Intel architectural PMU commit a31c3a4e9788e03fee113263a9f94bd638a66721 Author: Daniel Barry Date: Wed Jun 21 15:13:47 2023 +0000 add cycles and instructions presets for Zen4 These changes include the 'total cycles' and 'instructions completed' presets for Zen4, validated using the Counter Analysis Toolkit. These changes have been tested on the AMD Zen4 architecture. diff --git a/src/papi_events.csv b/src/papi_events.csv index f6a40a35..86e11fe6 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -531,6 +531,8 @@ PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT +PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT CPU,Intel architectural PMU commit 94303410ce97a84408b0b2d727701a60c6f137aa Author: Daniel Barry Date: Sun Jul 23 15:38:36 2023 +0000 add various Sapphire Rapids presets These changes include cycles, instructions, branching, and FLOPs presets for Intel Sapphire Rapids, validated using the Counter Analysis Toolkit. These changes have been tested on the Intel Sapphire Rapids architecture. diff --git a/src/papi_events.csv b/src/papi_events.csv index 86e11fe6..eac0855f 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1010,6 +1010,29 @@ PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE # End of icx list +# Intel Sapphire Rapids events +CPU,spr +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P +PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P +PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES +# FLOPs +PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE +PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE +PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE +PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE +PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE +PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE +PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE +# Branches +PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND +PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND +PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN +PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND +PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES +# End of spr list + # # Intel MIC / Xeon-Phi / Knights Landing # Intel Knights Mill commit 42b14987ca1a7028b6cf6fdc190a2fa6a0fd8e18 Author: Daniel Barry Date: Tue Jul 25 12:16:56 2023 +0000 add more Ice Lake FLOPs presets Since there are enough counters available to monitor both single- and double-precision floating-point events, PAPI_FP_OPS, PAPI_FP_INS, and PAPI_VEC_INS are all defined. These presets have been validated using the Counter Analysis Toolkit. These changes have been tested on the Intel Ice Lake architecture. diff --git a/src/papi_events.csv b/src/papi_events.csv index eac0855f..df82ac1c 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1006,8 +1006,11 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE # PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE +PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE +PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE +PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE # End of icx list # Intel Sapphire Rapids events