From 08f087241b6ac68947f9ef54cef7d28497eaebac Mon Sep 17 00:00:00 2001 From: William Cohen Date: Thu, 4 May 2023 13:52:28 -0400 Subject: [PATCH] Update PAPI event presets Resolves: RHBZ#2111923, RHBZ#2111942, RHBZ#2111947 --- papi-701eventupdate.patch | 796 ++++++++++++++++++++++++++++++++++++++ papi-a64fx.patch | 34 -- papi.spec | 9 +- 3 files changed, 802 insertions(+), 37 deletions(-) create mode 100644 papi-701eventupdate.patch delete mode 100644 papi-a64fx.patch diff --git a/papi-701eventupdate.patch b/papi-701eventupdate.patch new file mode 100644 index 0000000..46d592b --- /dev/null +++ b/papi-701eventupdate.patch @@ -0,0 +1,796 @@ +commit ae449f73abd0849f05ab3e1f3a64bde0c670c645 +Author: Anthony +Date: Fri Jul 17 12:05:14 2020 -0400 + + Separated the cache preset events of AMD Zen1 and Zen2 and added some more. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 8e96adfbd..2325bd4dc 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE + # + CPU,amd64_fam17h + CPU,amd64_fam17h_zen1 +-CPU,amd64_fam17h_zen2 + # + PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT +@@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT + PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS + PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions" + PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions" ++# Events discovered via CAT ++PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C ++PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L ++PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X ++# ++# ++CPU,amd64_fam17h_zen2 ++# Events copied from zen1 that also exist on zen2 ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT ++PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K ++PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS ++PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT ++# Events discovered via CAT ++PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS ++PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C ++PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L ++PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X ++ + # + # + CPU,Intel architectural PMU +@@ -1877,6 +1897,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD + PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR + PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD + ++######################### ++# ARM Fujitsu A64FX # ++######################### ++CPU,arm_a64fx ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++ + # + CPU,mips_74k + # +commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d +Author: Heike Jagode +Date: Thu Sep 24 13:33:38 2020 -0400 + + Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS) + for AMD zen2. + + PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) + explains that FLOP events require MergeEvent support, which was included + in the 5.6 kernel. + + ===>>> Hence, a kernel version 5.6 or greater is required. + + NOTE: without the MergeEvent support in the kernel, + there is no guarantee that the SSE/AVX FLOP + events produce any useful data whatsoever. + + These events have been tested and verified for + scalar flops, SSE, AVX, and FMA: + + (1) for one AVX instruction (e.g. _mm256_add_pd()), + the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns + a count of 4 (in the case of double precision), and + a count of 8 (in the case of single precision). + + (2) for one AVX FMA instruction (e.g. _mm256_macc_pd()), + the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns + a count of 8 (in the case of double precision), and + a count of 16 (in the case of single precision). + + (3) for one SSE instruction (e.g. _mm_mul_pd()), + the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns + a count of 2 (in the case of double precision), and + a count of 4 (in the case of single precision). + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2325bd4dc..2ff3e4d16 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ + PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L + PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X + +-# +-# ++# New FLOP event on zen2 ++# PPR (under section 2.1.15.3. -- ++# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) ++# explains that FLOP events require MergeEvent support, which was included ++# in the 5.6 kernel. ++# Hence, a kernel version 5.6 or greater is required. ++# NOTE: without the MergeEvent support in the kernel, there is no guarantee ++# that this SSE/AVX FLOP event produces any useful data whatsoever. ++PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++ ++ + CPU,Intel architectural PMU + CPU,ix86arch + # +commit 35f93252a6e222299c03f2c94912334488e76b02 +Author: Heike Jagode +Date: Thu Sep 24 18:40:59 2020 -0400 + + Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP) + for AMD zen2. + + For unoptimized code (like native MMM), these events may include + non-numeric floating-point instructions, e.g. MOVSD: move or merge + scalar double-precision floating-point value instructions. + + Tested with: + 1) SSE double: _mm_mul_pd / _mm_add_pd + 2) SSE single: _mm_mul_ps / _mm_add_ps + 3) AVX double: _mm256_mul_pd / _mm256_add_pd + 4) AVX single: _mm256_mul_ps / _mm256_add_ps + 5) FMA double: _mm256_macc_pd + 6) FMA single: _mm256_macc_pd + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2ff3e4d16..60a64564d 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ + PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++# Floating-point instructions (including non-numeric floating-point instructions, ++# e.g. Move or Merge Scalar Double-Precision Floating-Point values) ++PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + + + CPU,Intel architectural PMU +commit 344f6493425d865577508ff32b6f65516b1b4394 +Author: Heike Jagode +Date: Thu Sep 24 19:03:31 2020 -0400 + + Added missing 'PRESET' to csv file. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 60a64564d..724d520f0 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + # Floating-point instructions (including non-numeric floating-point instructions, + # e.g. Move or Merge Scalar Double-Precision Floating-Point values) +-PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + + + CPU,Intel architectural PMU +commit 4616aa717c5301a9a478876661eb8ac1f18c0333 +Author: Heike Jagode +Date: Thu Oct 8 11:36:23 2020 -0400 + + For zen2, since FP_OPS counts both single- and double-prec operations + correctly, we don't need to confuse the user with additional + DP_OPS and SP_OPS events. So, I'm taking them out. + + Same applies for events counting FP instructions. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 724d520f0..9ebf557e1 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ + # NOTE: without the MergeEvent support in the kernel, there is no guarantee + # that this SSE/AVX FLOP event produces any useful data whatsoever. + PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +-PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +-PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++# Since FP_OPS counts both single- and double-prec operations ++# correctly, we don't need to confuse the user with additional ++# DP_OPS and SP_OPS events. So, I'm taking them out. ++#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++# + # Floating-point instructions (including non-numeric floating-point instructions, + # e.g. Move or Merge Scalar Double-Precision Floating-Point values) + PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++# Since FP_INS counts both single- and double-prec instuctions ++# correctly, we don't need to confuse the user with additional ++# VEC_DP and VEC_SP events. So, I'm taking them out. ++#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + + + CPU,Intel architectural PMU +commit 274219e85ba8adcd2e9c78507adf7edb05b71daa +Author: Sebastian Mobo +Date: Thu Oct 8 13:40:21 2020 -0400 + + Added instruction-cache preset events for the Zen2. + + Signed-off-by: Anthony + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 9ebf557e1..fd75f9371 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS + PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C + PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L + PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X +- ++# ++PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++# ++PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS ++PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S + # New FLOP event on zen2 + # PPR (under section 2.1.15.3. -- + # https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) +commit b87ac4beda096086e0040f8ec1b44c4791a9739c +Author: Masahiko, Yamada +Date: Mon Dec 14 14:06:22 2020 +0900 + + Corrected typo for A64FX support (PAPI_L2_DCH is a typo of PAPI_L2_DCA) + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index fd75f9371..164f05641 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1937,7 +1937,7 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE ++PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE + PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL + + # +commit 869864f813f0681b5c9a4b65de2135c8708a2afb +Author: Masahiko, Yamada +Date: Mon Dec 14 19:34:59 2020 +0900 + + Add or modify various A64FX support events, including floating point events (PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS). + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 164f05641..9192b1041 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1930,15 +1930,46 @@ PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD + ######################### + CPU,arm_a64fx + # ++PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF ++PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS ++PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND ++PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT ++PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT ++PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED ++PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES + PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC ++PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED + PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC ++PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC ++PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE ++PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL ++PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE +-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF ++PRESET,PAPI_L2_DCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE ++PRESET,PAPI_L2_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF ++PRESET,PAPI_L2_TCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++PRESET,PAPI_TLB_IM,NOT_DERIVED,L2I_TLB_REFILL ++PRESET,PAPI_TLB_TL,DERIVED_ADD,L2D_TLB_REFILL,L2I_TLB_REFILL ++PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC + + # + CPU,mips_74k +commit 7a3c22763ef2ba00a2b8cb069c3501f35ecb13de +Author: Masahiko, Yamada +Date: Tue Dec 15 13:43:43 2020 +0900 + + modify PAPI_FP_INS and PAPI_VEC_INS for A64FX supports + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 9192b1041..7b4ceb674 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1941,11 +1941,11 @@ PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED + PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES +-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_FP_INS,NOT_DERIVED,FP_SPEC + PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC + PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC + PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED +-PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC ++PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED + PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND + PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC +commit 530d4763fb8e6dd52109387bd58c8c1305fd6b63 +Author: Masahiko, Yamada +Date: Fri Feb 12 15:01:21 2021 +0900 + + remove PAPI_L1_DCA and PAPI_L1_DCH for a64fx + + There seems to be a problem with PAPI_L1_DCA and PAPI_L1_DCH for a64fx that prefetch overcounts. + I delete (comment out) PAPI_L1_DCA and PAPI_L1_DCH for a64fx from the papi_events.csv file. + I will issue the pullrequest again once I have identified how to handle the overcount. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 7b4ceb674..0f5ec8344 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1949,8 +1949,8 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED + PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND + PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC +-PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE +-PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL ++#PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++#PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE + PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL +commit 340f68940234f2db181147fc249907b4f1293e62 +Author: Masahiko, Yamada +Date: Tue Feb 16 17:16:24 2021 +0900 + + remove PAPI_L1_TCA and PAPI_L1_TCH for a64fx + + PAPI_L1_TCA and PAPI_L1_TCH for a64fx measure L1D_CACHE just like PAPI_L1_DCA and PAPI_L1_DCH, + so I delete (comment out) PAPI_L1_TCA and PAPI_L1_TCH for a64fx from the papi_events.csv file. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 0f5ec8344..4ef647959 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1955,8 +1955,8 @@ PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE + PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +-PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE +-PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL ++#PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE ++#PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL + PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL + PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE + PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF +commit 02f34baafb868d183f21bebfd3c46574847b9929 +Author: Swarup Sahoo +Date: Tue May 18 02:51:56 2021 +0530 + + Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip + + Signed-off-by: Swarup Sahoo + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 4ef647959..d9e9da8a3 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X + # VEC_DP and VEC_SP events. So, I'm taking them out. + #PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + #PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++# ++# ++CPU,amd64_fam19h_zen3 ++PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT ++PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT ++PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K ++PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH ++PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X ++PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C ++PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X ++PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X ++PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS ++PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S ++# RETIRED_SSE_AVX_FLOPS requires MergeEvent support. ++PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS ++PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS ++PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS ++PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS + + + CPU,Intel architectural PMU +commit 6964aa356fa606f320c7b871123aceb5c1f21999 +Author: Masahiko, Yamada +Date: Tue Aug 24 14:17:29 2021 +0900 + + Fix the PAPI_FUL_CCY setting for a64fx + + In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect. + PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT + + The correct settings are:. + PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 4ef647959..74deb712f 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF + PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS + PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND + PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT +-PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT ++PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT + PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED + PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f +Author: Masahiko, Yamada +Date: Fri Mar 4 15:41:30 2022 +0900 + + Add PAPI idle-related preset events for a64fx + + For a64fx, add four PAPI idle-related preset events + (PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL). + + PAPI_BRU_IDL = BR_COMP_WAIT + PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT + PAPI_FPU_IDL = FL_COMP_WAIT + PAPI_LSU_IDL = LD_COMP_WAIT + + The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT, + and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting" + on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:. + https://github.com/fujitsu/A64FX/blob/master/doc + + Signed-off-by: Masahiko, Yamada + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 74deb712f..1cd498e91 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS + PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND + PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT + PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT ++PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT ++PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT ++PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT ++PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT + PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED + PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +commit 3c5364839f583185c1e8dca58d5fe36c9ec82876 +Author: Daniel Barry +Date: Tue Aug 30 23:17:30 2022 +0000 + + papi_avail: add presets for Intel Ice Lake SP + + Define preset events for the Intel Ice Lake SP processor. + These presets have been verified using the Counter Analysis Toolkit benchmarks. + + These changes have been tested on the Intel Ice Lake architecture. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index a013f58af..8f23e030c 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -929,6 +929,63 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD + + # End of hsw,bdw,skl,clx list + # ++ ++# Intel Ice Lake SP events ++CPU,icx ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P ++PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES ++# Loads and stores ++PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_LOADS ++PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_STORES ++PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_INST_RETIRED:ALL_LOADS,MEM_INST_RETIRED:ALL_STORES ++# L1 cache ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT ++PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD ++# L2 cache ++PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD ++PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT ++PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD ++#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT ++#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS ++PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD ++#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS ++PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD ++PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES ++PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD ++# L3 cache ++PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD ++PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS ++PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS ++#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT ++PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_RETIRED:L3_MISS ++PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES ++PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES ++# SMP ++PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD ++# Branches ++PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND ++PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND ++PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN ++PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES ++#FLOPs ++# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE ++# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE ++PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++# End of icx list ++ + # + # Intel MIC / Xeon-Phi / Knights Landing + # Intel Knights Mill +commit d4da29b07befb9f7c11e351dbfef835b74cdd67a +Author: John Linford +Date: Mon Mar 20 17:11:37 2023 -0500 + + Add minimal events for Arm Neoverse N1 + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 8f23e030c..a4d5a9756 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2059,6 +2059,41 @@ PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIX + PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC + PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC + ++######################### ++# ARM Neoverse N1 # ++######################### ++CPU,arm_n1 ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED ++PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC ++PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC ++PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC ++PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL ++PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD ++PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS ++PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD ++PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ ++PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++ + # + CPU,mips_74k + # +commit 88e686f877abcf19c5f50d4e23cbf8ea920a40b6 +Author: John Linford +Date: Mon Mar 20 14:54:41 2023 -0500 + + Add minimal events for Arm Neoverse V1 + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index a4d5a9756..207d6d1db 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC + PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + ++######################### ++# ARM Neoverse V1 # ++######################### ++CPU,arm_v1 ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED ++PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC ++PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC ++PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC ++PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL ++PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD ++PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS ++PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD ++PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ ++PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++ + # + CPU,mips_74k + # +commit e911f951115bb551925c5b07e7f5b721d5fe3bbe +Author: John Linford +Date: Mon Mar 20 17:14:18 2023 -0500 + + Add minimal events for Arm Neoverse N2 + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 207d6d1db..d27d956c1 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC + PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + ++######################### ++# ARM Neoverse N2 # ++######################### ++CPU,arm_n2 ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED ++PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC ++PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC ++PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC ++PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL ++PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD ++PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS ++PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD ++PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ ++PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++ + ######################### + # ARM Neoverse V1 # + ######################### +commit 05dc580247cb18fca882a33d8e356d79032d2ed1 +Author: John Linford +Date: Mon Mar 20 17:08:35 2023 -0500 + + Add minimal events for Arm Neoverse V2 + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index d27d956c1..549e337c7 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2164,6 +2164,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC + PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + ++######################### ++# ARM Neoverse V2 # ++######################### ++CPU,arm_v2 ++# ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES ++PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED ++PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC ++PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC ++PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC ++PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL ++PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD ++PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS ++PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR ++PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD ++PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD ++PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ ++PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++ + # + CPU,mips_74k + # diff --git a/papi-a64fx.patch b/papi-a64fx.patch deleted file mode 100644 index f4d1132..0000000 --- a/papi-a64fx.patch +++ /dev/null @@ -1,34 +0,0 @@ -commit 9a44d82928ed17ba2ff21eb88b89c5829d0ea30e -Author: Steve Kaufmann -Date: Wed Jun 24 14:08:08 2020 -0400 - - Added PAPI preset support for Fujitsu A64FX. - - Signed-off-by: Heike Jagode - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 8e96adfbd..1b5c15542 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -1877,6 +1877,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD - PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR - PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD - -+######################### -+# ARM Fujitsu A64FX # -+######################### -+CPU,arm_a64fx -+# -+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES -+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC -+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC -+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL -+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE -+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL -+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE -+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL -+ - # - CPU,mips_74k - # diff --git a/papi.spec b/papi.spec index cdb1c3a..3f06408 100644 --- a/papi.spec +++ b/papi.spec @@ -11,7 +11,7 @@ Summary: Performance Application Programming Interface Name: papi Version: 6.0.0 -Release: 13%{?dist} +Release: 14%{?dist} License: BSD Requires: papi-libs = %{version}-%{release} URL: http://icl.cs.utk.edu/papi/ @@ -22,12 +22,12 @@ URL: http://icl.cs.utk.edu/papi/ # so when papi is rebased to a newer version it can be used as is. Source0: http://icl.cs.utk.edu/projects/papi/downloads/%{name}-%{version}-noiozone.tar.gz Patch1: papi-python3.patch -Patch2: papi-a64fx.patch Patch4: papi-config.patch Patch5: papi-nostatic.patch Patch6: papi-lto.patch Patch7: papi-rhbz1923967.patch Patch21: papi-arm64fastread.patch +Patch31: papi-701eventupdate.patch BuildRequires: make BuildRequires: autoconf BuildRequires: doxygen @@ -96,12 +96,12 @@ the PAPI user-space libraries and interfaces. %prep %setup -q %patch1 -p1 -b .python3 -%patch2 -p1 -b .a64fx %patch4 -p1 %patch5 -p1 %patch6 -p1 %patch7 -p1 %patch21 -p1 +%patch31 -p1 %build @@ -194,6 +194,9 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* %endif %changelog +* Thu May 4 2023 William Cohen - 6.0.0-14 +- Update papi event presets (RHBZ#2111923, RHBZ#2111942, RHBZ#2111947) + * Thu Apr 27 2023 William Cohen - 6.0.0-13 - Improve aarch64 read speed. (rhbz2186927)