From a3a2eb07f7aa20b21d5b9206093b1aabce6805fe Mon Sep 17 00:00:00 2001 From: William Cohen Date: Fri, 17 Nov 2023 16:58:55 -0500 Subject: [PATCH] Resolves: RHEL-9333, RHEL-9334, RHEL-9335 --- papi-71eventupdate.patch | 321 +++++++++++++++++++++++++++++++++++++++ papi.spec | 7 +- 2 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 papi-71eventupdate.patch diff --git a/papi-71eventupdate.patch b/papi-71eventupdate.patch new file mode 100644 index 0000000..aeb576d --- /dev/null +++ b/papi-71eventupdate.patch @@ -0,0 +1,321 @@ +commit b969d25f2a87a53365e3e9a040533b093544a05d +Author: John Linford +Date: Mon Apr 3 22:30:14 2023 +0000 + + Update Neoverse V2 events + + Add/remove PAPI events to match available hardware counters + All tests pass on NVIDIA Grace + + Disclaimer: + The PAPI team was not able to verify the functionality included in this + commit. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 549e337c..3089d2d4 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2170,34 +2170,113 @@ PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL + CPU,arm_v2 + # + PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED ++PRESET,PAPI_INT_INS,NOT_DERIVED,DP_SPEC ++#NOT_IMPLEMENTED,PAPI_TOT_IIS,Instructions issued + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES +-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC ++PRESET,PAPI_REF_CYC,NOT_DERIVED,CNT_CYCLES ++PRESET,PAPI_STL_CCY,NOT_DERIVED,STALL ++#NOT_IMPLEMENTED,PAPI_FUL_CCY,Cycles with maximum instructions completed ++#NOT_IMPLEMENTED,PAPI_FUL_ICY,Cycles with maximum instruction issue ++#NOT_IMPLEMENTED,PAPI_FXU_IDL,Cycles integer units are idle ++#NOT_IMPLEMENTED,PAPI_LSU_IDL,Cycles load/store units are idle ++#NOT_IMPLEMENTED,PAPI_MEM_RCY,Cycles Stalled Waiting for memory Reads ++#NOT_IMPLEMENTED,PAPI_MEM_SCY,Cycles Stalled Waiting for memory accesses ++#NOT_IMPLEMENTED,PAPI_MEM_WCY,Cycles Stalled Waiting for memory writes ++#NOT_IMPLEMENTED,PAPI_FP_STAL,Cycles the FP unit(s) are stalled ++#NOT_IMPLEMENTED,PAPI_FPU_IDL,Cycles floating point units are idle ++#NOT_IMPLEMENTED,PAPI_BRU_IDL,Cycles branch units are idle ++PRESET,PAPI_STL_ICY,NOT_DERIVED,STALL ++PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC ++#NOT_IMPLEMENTED,PAPI_SP_OPS,Floating point operations; optimized to count scaled single precision vector operations ++#NOT_IMPLEMENTED,PAPI_DP_OPS,Floating point operations; optimized to count scaled double precision vector operations ++PRESET,PAPI_FP_INS,DERIVED_ADD,FP_HP_SPEC,FP_SP_SPEC,FP_DP_SPEC ++#NOT_IMPLEMENTED,PAPI_FAD_INS,Floating point add instructions ++#NOT_IMPLEMENTED,PAPI_FDV_INS,Floating point divide instructions ++#NOT_IMPLEMENTED,PAPI_FMA_INS,FMA instructions completed ++#NOT_IMPLEMENTED,PAPI_FML_INS,Floating point multiply instructions ++#NOT_IMPLEMENTED,PAPI_FNV_INS,Floating point inverse instructions ++#NOT_IMPLEMENTED,PAPI_FSQ_INS,Floating point square root instructions + PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC ++#NOT_IMPLEMENTED,PAPI_VEC_DP,Double precision vector/SIMD instructions ++#NOT_IMPLEMENTED,PAPI_VEC_SP,Single precision vector/SIMD instructions + PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED +-PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +-PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED +-PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED ++#NOT_IMPLEMENTED,PAPI_BR_CN,Conditional branch instructions ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_RETIRED,BR_MIS_PRED_RETIRED ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED_RETIRED ++#NOT_IMPLEMENTED,PAPI_BR_NTK,Conditional branch instructions not taken ++#NOT_IMPLEMENTED,PAPI_BR_TKN,Conditional branch instructions taken ++#NOT_IMPLEMENTED,PAPI_BR_UCN,Unconditional branch instructions ++#NOT_IMPLEMENTED,PAPI_BTAC_M,Branch target address cache misses + PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC + PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC + PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC + PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE ++PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD + PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR + PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS + PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL +-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS +-PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR ++#NOT_IMPLEMENTED,PAPI_L1_ICR,Level 1 instruction cache reads ++#NOT_IMPLEMENTED,PAPI_L1_ICW,Level 1 instruction cache writes ++#NOT_IMPLEMENTED,PAPI_L1_LDM,Level 1 load misses ++#NOT_IMPLEMENTED,PAPI_L1_STM,Level 1 store misses ++PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE_ACCESS,L1I_CACHE_REFILL ++PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL ++#NOT_IMPLEMENTED,PAPI_L1_TCR,Level 1 total cache reads ++#NOT_IMPLEMENTED,PAPI_L1_TCW,Level 1 total cache writes ++PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE ++PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE + PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL + PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD + PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL + PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD +-PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND +-PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND ++PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_WR ++#NOT_IMPLEMENTED,PAPI_L2_ICA,Level 2 instruction cache accesses ++#NOT_IMPLEMENTED,PAPI_L2_ICH,Level 2 instruction cache hits ++#NOT_IMPLEMENTED,PAPI_L2_ICM,Level 2 instruction cache misses ++#NOT_IMPLEMENTED,PAPI_L2_ICR,Level 2 instruction cache reads ++#NOT_IMPLEMENTED,PAPI_L2_ICW,Level 2 instruction cache writes ++PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL ++PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL ++PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD ++PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR ++PRESET,PAPI_L3_TCA,NOT_DERIVED,L3D_CACHE ++PRESET,PAPI_L3_DCA,NOT_DERIVED,L3D_CACHE ++#NOT_IMPLEMENTED,PAPI_L3_DCH,Level 3 data cache hits ++PRESET,PAPI_L3_DCM,NOT_DERIVED,L3D_CACHE_REFILL ++#NOT_IMPLEMENTED,PAPI_L3_DCR,Level 3 data cache reads ++#NOT_IMPLEMENTED,PAPI_L3_DCW,Level 3 data cache writes ++#NOT_IMPLEMENTED,PAPI_L3_ICA,Level 3 instruction cache accesses ++#NOT_IMPLEMENTED,PAPI_L3_ICH,Level 3 instruction cache hits ++#NOT_IMPLEMENTED,PAPI_L3_ICM,Level 3 instruction cache misses ++#NOT_IMPLEMENTED,PAPI_L3_ICR,Level 3 instruction cache reads ++#NOT_IMPLEMENTED,PAPI_L3_ICW,Level 3 instruction cache writes ++#NOT_IMPLEMENTED,PAPI_L3_LDM,Level 3 load misses ++#NOT_IMPLEMENTED,PAPI_L3_STM,Level 3 store misses ++#NOT_IMPLEMENTED,PAPI_L3_TCH,Level 3 total cache hits ++#NOT_IMPLEMENTED,PAPI_L3_TCM,Level 3 cache misses ++#NOT_IMPLEMENTED,PAPI_L3_TCR,Level 3 total cache reads ++#NOT_IMPLEMENTED,PAPI_L3_TCW,Level 3 total cache writes + PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC + PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL ++PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL ++#NOT_IMPLEMENTED,PAPI_TLB_SD,Translation lookaside buffer shootdowns ++PRESET,PAPI_TLB_TL,DERIVED_ADD,L1D_TLB_REFILL,L2D_TLB_REFILL ++#NOT_IMPLEMENTED,PAPI_CA_CLN,Requests for exclusive access to clean cache line ++#NOT_IMPLEMENTED,PAPI_CA_INV,Requests for cache line invalidation ++#NOT_IMPLEMENTED,PAPI_CA_ITV,Requests for cache line intervention ++#NOT_IMPLEMENTED,PAPI_CA_SHR,Requests for exclusive access to shared cache line ++#NOT_IMPLEMENTED,PAPI_CA_SNP,Requests for a snoop ++#NOT_IMPLEMENTED,PAPI_CSR_FAL,Failed store conditional instructions ++#NOT_IMPLEMENTED,PAPI_CSR_SUC,Successful store conditional instructions ++#NOT_IMPLEMENTED,PAPI_CSR_TOT,Total store conditional instructions ++#NOT_IMPLEMENTED,PAPI_PRF_DM,Data prefetch cache misses + + # + CPU,mips_74k + +commit 15f32cb3a2e6bdd9e51aa4043842f0130e9dcf24 +Author: Daniel Barry +Date: Wed Jun 7 14:38:39 2023 +0000 + + add branch presets for Zen3 and Zen4 + + These changes include all branching preset events for Zen3 and Zen4, + validated using the Counter Analysis Toolkit. + + For Zen3, PAPI_BR_TKN was modified to exclude unconditional branches + taken, in order to adhere to the preset's meaning. + + These changes have been tested on the AMD Zen3 and Zen4 architectures. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 3089d2d4..319cf82c 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -488,8 +488,12 @@ CPU,amd64_fam19h_zen3 + PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT + PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS +-PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_UCN,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_TKN,DERIVED_POSTFIX,N0|N1|-|N2|+|,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS + PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED + PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT + PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K + PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH +@@ -509,6 +513,16 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS + PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS + PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS + PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS ++# ++# ++CPU,amd64_fam19h_zen4 ++PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED + + + CPU,Intel architectural PMU + +commit da93ed4dd1fadb70ccee62a976597ff431c9f58c +Author: Daniel Barry +Date: Mon Jun 12 17:27:59 2023 +0000 + + add flops presets for Zen4 + + These changes include FLOPs presets for Zen4, validated using the + Counter Analysis Toolkit. + + These changes have been tested on the AMD Zen4 architecture. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 319cf82c..f6a40a35 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -523,6 +523,14 @@ PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDI + PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS + PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED + PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FP_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL,RETIRED_FP_OPS_BY_TYPE:SCALAR_ALL ++PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL ++PRESET,PAPI_FMA_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MAC,RETIRED_FP_OPS_BY_TYPE:SCALAR_MAC ++PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS_BY_TYPE:SCALAR_MUL ++PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD ++PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV ++PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT + + + CPU,Intel architectural PMU + +commit a31c3a4e9788e03fee113263a9f94bd638a66721 +Author: Daniel Barry +Date: Wed Jun 21 15:13:47 2023 +0000 + + add cycles and instructions presets for Zen4 + + These changes include the 'total cycles' and 'instructions completed' + presets for Zen4, validated using the Counter Analysis Toolkit. + + These changes have been tested on the AMD Zen4 architecture. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index f6a40a35..86e11fe6 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -531,6 +531,8 @@ PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS + PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD + PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV + PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT ++PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT + + + CPU,Intel architectural PMU + +commit 94303410ce97a84408b0b2d727701a60c6f137aa +Author: Daniel Barry +Date: Sun Jul 23 15:38:36 2023 +0000 + + add various Sapphire Rapids presets + + These changes include cycles, instructions, branching, and FLOPs presets + for Intel Sapphire Rapids, validated using the Counter Analysis Toolkit. + + These changes have been tested on the Intel Sapphire Rapids architecture. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 86e11fe6..eac0855f 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1010,6 +1010,29 @@ PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ + PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE + # End of icx list + ++# Intel Sapphire Rapids events ++CPU,spr ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P ++PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P ++PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES ++# FLOPs ++PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE ++PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE ++PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++# Branches ++PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND ++PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND ++PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN ++PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN ++PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND ++PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND ++PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES ++# End of spr list ++ + # + # Intel MIC / Xeon-Phi / Knights Landing + # Intel Knights Mill + +commit 42b14987ca1a7028b6cf6fdc190a2fa6a0fd8e18 +Author: Daniel Barry +Date: Tue Jul 25 12:16:56 2023 +0000 + + add more Ice Lake FLOPs presets + + Since there are enough counters available to monitor both single- and + double-precision floating-point events, PAPI_FP_OPS, PAPI_FP_INS, and + PAPI_VEC_INS are all defined. + These presets have been validated using the Counter Analysis Toolkit. + + These changes have been tested on the Intel Ice Lake architecture. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index eac0855f..df82ac1c 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1006,8 +1006,11 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES + PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE + # PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE + PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE ++PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE + PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE + PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE ++PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE + # End of icx list + + # Intel Sapphire Rapids events diff --git a/papi.spec b/papi.spec index 80dd15c..c4eeb46 100644 --- a/papi.spec +++ b/papi.spec @@ -11,7 +11,7 @@ Summary: Performance Application Programming Interface Name: papi Version: 6.0.0 -Release: 15%{?dist} +Release: 16%{?dist} License: BSD Requires: papi-libs = %{version}-%{release} URL: http://icl.cs.utk.edu/papi/ @@ -29,6 +29,7 @@ Patch7: papi-rhbz1923967.patch Patch21: papi-arm64fastread.patch Patch31: papi-701eventupdate.patch Patch40: papi-thread_init.patch +Patch41: papi-71eventupdate.patch BuildRequires: make BuildRequires: autoconf BuildRequires: doxygen @@ -104,6 +105,7 @@ the PAPI user-space libraries and interfaces. %patch21 -p1 %patch31 -p1 %patch40 -p1 +%patch41 -p1 %build @@ -196,6 +198,9 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* %endif %changelog +* Fri Nov 17 2023 William Cohen - 6.0.0-16 +- Update papi event presets (RHEL-9333, RHEL-9334, RHEL-9335) + * Fri Jun 16 2023 William Cohen - 6.0.0-15 - Address thread initialization order. (RHBZ#2215582)