commit ae449f73abd0849f05ab3e1f3a64bde0c670c645 Author: Anthony Date: Fri Jul 17 12:05:14 2020 -0400 Separated the cache preset events of AMD Zen1 and Zen2 and added some more. diff --git a/src/papi_events.csv b/src/papi_events.csv index 8e96adfbd..2325bd4dc 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE # CPU,amd64_fam17h CPU,amd64_fam17h_zen1 -CPU,amd64_fam17h_zen2 # PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT @@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions" PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions" +# Events discovered via CAT +PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C +PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L +PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X +# +# +CPU,amd64_fam17h_zen2 +# Events copied from zen1 that also exist on zen2 +PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT +PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K +PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED +PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS +PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT +# Events discovered via CAT +PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS +PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C +PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L +PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X + # # CPU,Intel architectural PMU @@ -1877,6 +1897,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD +######################### +# ARM Fujitsu A64FX # +######################### +CPU,arm_a64fx +# +PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES +PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC +PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC +PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL +PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE +PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL + # CPU,mips_74k # commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d Author: Heike Jagode Date: Thu Sep 24 13:33:38 2020 -0400 Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS) for AMD zen2. PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) explains that FLOP events require MergeEvent support, which was included in the 5.6 kernel. ===>>> Hence, a kernel version 5.6 or greater is required. NOTE: without the MergeEvent support in the kernel, there is no guarantee that the SSE/AVX FLOP events produce any useful data whatsoever. These events have been tested and verified for scalar flops, SSE, AVX, and FMA: (1) for one AVX instruction (e.g. _mm256_add_pd()), the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns a count of 4 (in the case of double precision), and a count of 8 (in the case of single precision). (2) for one AVX FMA instruction (e.g. _mm256_macc_pd()), the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns a count of 8 (in the case of double precision), and a count of 16 (in the case of single precision). (3) for one SSE instruction (e.g. _mm_mul_pd()), the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns a count of 2 (in the case of double precision), and a count of 4 (in the case of single precision). diff --git a/src/papi_events.csv b/src/papi_events.csv index 2325bd4dc..2ff3e4d16 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X -# -# +# New FLOP event on zen2 +# PPR (under section 2.1.15.3. -- +# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) +# explains that FLOP events require MergeEvent support, which was included +# in the 5.6 kernel. +# Hence, a kernel version 5.6 or greater is required. +# NOTE: without the MergeEvent support in the kernel, there is no guarantee +# that this SSE/AVX FLOP event produces any useful data whatsoever. +PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + + CPU,Intel architectural PMU CPU,ix86arch # commit 35f93252a6e222299c03f2c94912334488e76b02 Author: Heike Jagode Date: Thu Sep 24 18:40:59 2020 -0400 Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP) for AMD zen2. For unoptimized code (like native MMM), these events may include non-numeric floating-point instructions, e.g. MOVSD: move or merge scalar double-precision floating-point value instructions. Tested with: 1) SSE double: _mm_mul_pd / _mm_add_pd 2) SSE single: _mm_mul_ps / _mm_add_ps 3) AVX double: _mm256_mul_pd / _mm256_add_pd 4) AVX single: _mm256_mul_ps / _mm256_add_ps 5) FMA double: _mm256_macc_pd 6) FMA single: _mm256_macc_pd diff --git a/src/papi_events.csv b/src/papi_events.csv index 2ff3e4d16..60a64564d 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +# Floating-point instructions (including non-numeric floating-point instructions, +# e.g. Move or Merge Scalar Double-Precision Floating-Point values) +PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR CPU,Intel architectural PMU commit 344f6493425d865577508ff32b6f65516b1b4394 Author: Heike Jagode Date: Thu Sep 24 19:03:31 2020 -0400 Added missing 'PRESET' to csv file. diff --git a/src/papi_events.csv b/src/papi_events.csv index 60a64564d..724d520f0 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY # Floating-point instructions (including non-numeric floating-point instructions, # e.g. Move or Merge Scalar Double-Precision Floating-Point values) -PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR CPU,Intel architectural PMU commit 4616aa717c5301a9a478876661eb8ac1f18c0333 Author: Heike Jagode Date: Thu Oct 8 11:36:23 2020 -0400 For zen2, since FP_OPS counts both single- and double-prec operations correctly, we don't need to confuse the user with additional DP_OPS and SP_OPS events. So, I'm taking them out. Same applies for events counting FP instructions. diff --git a/src/papi_events.csv b/src/papi_events.csv index 724d520f0..9ebf557e1 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ # NOTE: without the MergeEvent support in the kernel, there is no guarantee # that this SSE/AVX FLOP event produces any useful data whatsoever. PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +# Since FP_OPS counts both single- and double-prec operations +# correctly, we don't need to confuse the user with additional +# DP_OPS and SP_OPS events. So, I'm taking them out. +#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +# # Floating-point instructions (including non-numeric floating-point instructions, # e.g. Move or Merge Scalar Double-Precision Floating-Point values) PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +# Since FP_INS counts both single- and double-prec instuctions +# correctly, we don't need to confuse the user with additional +# VEC_DP and VEC_SP events. So, I'm taking them out. +#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR CPU,Intel architectural PMU commit 274219e85ba8adcd2e9c78507adf7edb05b71daa Author: Sebastian Mobo Date: Thu Oct 8 13:40:21 2020 -0400 Added instruction-cache preset events for the Zen2. Signed-off-by: Anthony diff --git a/src/papi_events.csv b/src/papi_events.csv index 9ebf557e1..fd75f9371 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X - +# +PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ +# +PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ +PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS +PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S # New FLOP event on zen2 # PPR (under section 2.1.15.3. -- # https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) commit b87ac4beda096086e0040f8ec1b44c4791a9739c Author: Masahiko, Yamada Date: Mon Dec 14 14:06:22 2020 +0900 Corrected typo for A64FX support (PAPI_L2_DCH is a typo of PAPI_L2_DCA) diff --git a/src/papi_events.csv b/src/papi_events.csv index fd75f9371..164f05641 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1937,7 +1937,7 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL -PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE +PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL # commit 869864f813f0681b5c9a4b65de2135c8708a2afb Author: Masahiko, Yamada Date: Mon Dec 14 19:34:59 2020 +0900 Add or modify various A64FX support events, including floating point events (PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS). diff --git a/src/papi_events.csv b/src/papi_events.csv index 164f05641..9192b1041 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1930,15 +1930,46 @@ PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD ######################### CPU,arm_a64fx # +PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF +PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS +PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND +PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT +PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT +PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED +PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC +PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC +PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC +PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND +PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC +PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC +PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE +PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE +PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE +PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL +PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE -PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL +PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF +PRESET,PAPI_L2_DCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF +PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE +PRESET,PAPI_L2_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF +PRESET,PAPI_L2_TCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF +PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL +PRESET,PAPI_TLB_IM,NOT_DERIVED,L2I_TLB_REFILL +PRESET,PAPI_TLB_TL,DERIVED_ADD,L2D_TLB_REFILL,L2I_TLB_REFILL +PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC +PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC +PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC # CPU,mips_74k commit 7a3c22763ef2ba00a2b8cb069c3501f35ecb13de Author: Masahiko, Yamada Date: Tue Dec 15 13:43:43 2020 +0900 modify PAPI_FP_INS and PAPI_VEC_INS for A64FX supports diff --git a/src/papi_events.csv b/src/papi_events.csv index 9192b1041..7b4ceb674 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1941,11 +1941,11 @@ PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES -PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC +PRESET,PAPI_FP_INS,NOT_DERIVED,FP_SPEC PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED -PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC +PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC commit 530d4763fb8e6dd52109387bd58c8c1305fd6b63 Author: Masahiko, Yamada Date: Fri Feb 12 15:01:21 2021 +0900 remove PAPI_L1_DCA and PAPI_L1_DCH for a64fx There seems to be a problem with PAPI_L1_DCA and PAPI_L1_DCH for a64fx that prefetch overcounts. I delete (comment out) PAPI_L1_DCA and PAPI_L1_DCH for a64fx from the papi_events.csv file. I will issue the pullrequest again once I have identified how to handle the overcount. diff --git a/src/papi_events.csv b/src/papi_events.csv index 7b4ceb674..0f5ec8344 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1949,8 +1949,8 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC -PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE -PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL +#PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE +#PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL commit 340f68940234f2db181147fc249907b4f1293e62 Author: Masahiko, Yamada Date: Tue Feb 16 17:16:24 2021 +0900 remove PAPI_L1_TCA and PAPI_L1_TCH for a64fx PAPI_L1_TCA and PAPI_L1_TCH for a64fx measure L1D_CACHE just like PAPI_L1_DCA and PAPI_L1_DCH, so I delete (comment out) PAPI_L1_TCA and PAPI_L1_TCH for a64fx from the papi_events.csv file. diff --git a/src/papi_events.csv b/src/papi_events.csv index 0f5ec8344..4ef647959 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1955,8 +1955,8 @@ PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL -PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE -PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL +#PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE +#PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF commit 02f34baafb868d183f21bebfd3c46574847b9929 Author: Swarup Sahoo Date: Tue May 18 02:51:56 2021 +0530 Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip Signed-off-by: Swarup Sahoo diff --git a/src/papi_events.csv b/src/papi_events.csv index 4ef647959..d9e9da8a3 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X # VEC_DP and VEC_SP events. So, I'm taking them out. #PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR #PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +# +# +CPU,amd64_fam19h_zen3 +PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT +PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS +PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED +PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT +PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K +PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH +PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X +PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C +PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X +PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X +PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ +PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ +PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS +PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S +# RETIRED_SSE_AVX_FLOPS requires MergeEvent support. +PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS +PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS +PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS +PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS CPU,Intel architectural PMU commit 6964aa356fa606f320c7b871123aceb5c1f21999 Author: Masahiko, Yamada Date: Tue Aug 24 14:17:29 2021 +0900 Fix the PAPI_FUL_CCY setting for a64fx In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect. PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT The correct settings are:. PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT diff --git a/src/papi_events.csv b/src/papi_events.csv index 4ef647959..74deb712f 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT -PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT +PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f Author: Masahiko, Yamada Date: Fri Mar 4 15:41:30 2022 +0900 Add PAPI idle-related preset events for a64fx For a64fx, add four PAPI idle-related preset events (PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL). PAPI_BRU_IDL = BR_COMP_WAIT PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT PAPI_FPU_IDL = FL_COMP_WAIT PAPI_LSU_IDL = LD_COMP_WAIT The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT, and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting" on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:. https://github.com/fujitsu/A64FX/blob/master/doc Signed-off-by: Masahiko, Yamada diff --git a/src/papi_events.csv b/src/papi_events.csv index 74deb712f..1cd498e91 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT +PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT +PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT +PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT +PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED commit 3c5364839f583185c1e8dca58d5fe36c9ec82876 Author: Daniel Barry Date: Tue Aug 30 23:17:30 2022 +0000 papi_avail: add presets for Intel Ice Lake SP Define preset events for the Intel Ice Lake SP processor. These presets have been verified using the Counter Analysis Toolkit benchmarks. These changes have been tested on the Intel Ice Lake architecture. diff --git a/src/papi_events.csv b/src/papi_events.csv index a013f58af..8f23e030c 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -929,6 +929,63 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD # End of hsw,bdw,skl,clx list # + +# Intel Ice Lake SP events +CPU,icx +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P +PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P +PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES +# Loads and stores +PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_LOADS +PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_STORES +PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_INST_RETIRED:ALL_LOADS,MEM_INST_RETIRED:ALL_STORES +# L1 cache +PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD +PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT +PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD +# L2 cache +PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES +PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD +PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT +PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS +PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD +#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT +#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS +PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS +PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD +#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT +PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS +PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD +PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES +PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD +# L3 cache +PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS +PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD +PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS +PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS +#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT +PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_RETIRED:L3_MISS +PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES +PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES +# SMP +PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD +# Branches +PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND +PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND +PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN +PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND +PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES +#FLOPs +# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE +PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE +# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE +PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE +PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE +PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE +# End of icx list + # # Intel MIC / Xeon-Phi / Knights Landing # Intel Knights Mill commit d4da29b07befb9f7c11e351dbfef835b74cdd67a Author: John Linford Date: Mon Mar 20 17:11:37 2023 -0500 Add minimal events for Arm Neoverse N1 diff --git a/src/papi_events.csv b/src/papi_events.csv index 8f23e030c..a4d5a9756 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -2059,6 +2059,41 @@ PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIX PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC +######################### +# ARM Neoverse N1 # +######################### +CPU,arm_n1 +# +PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES +PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC +PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED +PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED +PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC +PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC +PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC +PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE +PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL +PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD +PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR +PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS +PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS +PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR +PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL +PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD +PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR +PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD +PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND +PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND +PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ +PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC +PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + # CPU,mips_74k # commit 88e686f877abcf19c5f50d4e23cbf8ea920a40b6 Author: John Linford Date: Mon Mar 20 14:54:41 2023 -0500 Add minimal events for Arm Neoverse V1 diff --git a/src/papi_events.csv b/src/papi_events.csv index a4d5a9756..207d6d1db 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL +######################### +# ARM Neoverse V1 # +######################### +CPU,arm_v1 +# +PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES +PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC +PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED +PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED +PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC +PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC +PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC +PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE +PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL +PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD +PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR +PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS +PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS +PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR +PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL +PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD +PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR +PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD +PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND +PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND +PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ +PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC +PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + # CPU,mips_74k # commit e911f951115bb551925c5b07e7f5b721d5fe3bbe Author: John Linford Date: Mon Mar 20 17:14:18 2023 -0500 Add minimal events for Arm Neoverse N2 diff --git a/src/papi_events.csv b/src/papi_events.csv index 207d6d1db..d27d956c1 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL +######################### +# ARM Neoverse N2 # +######################### +CPU,arm_n2 +# +PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES +PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC +PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED +PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED +PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC +PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC +PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC +PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE +PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL +PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD +PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR +PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS +PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS +PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR +PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL +PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD +PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR +PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD +PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND +PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND +PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ +PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC +PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + ######################### # ARM Neoverse V1 # ######################### commit 05dc580247cb18fca882a33d8e356d79032d2ed1 Author: John Linford Date: Mon Mar 20 17:08:35 2023 -0500 Add minimal events for Arm Neoverse V2 diff --git a/src/papi_events.csv b/src/papi_events.csv index d27d956c1..549e337c7 100644 --- a/src/papi_events.csv +++ b/src/papi_events.csv @@ -2164,6 +2164,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL +######################### +# ARM Neoverse V2 # +######################### +CPU,arm_v2 +# +PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES +PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC +PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED +PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED +PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC +PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC +PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC +PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE +PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL +PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD +PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR +PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS +PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS +PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR +PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL +PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD +PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR +PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD +PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND +PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND +PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ +PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC +PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + # CPU,mips_74k #