Resolves: RHEL-9333, RHEL-9334, RHEL-9335

This commit is contained in:
William Cohen 2023-11-17 16:58:55 -05:00
parent bcbcf49b7f
commit a3a2eb07f7
2 changed files with 327 additions and 1 deletions

321
papi-71eventupdate.patch Normal file
View File

@ -0,0 +1,321 @@
commit b969d25f2a87a53365e3e9a040533b093544a05d
Author: John Linford <jlinford@nvidia.com>
Date: Mon Apr 3 22:30:14 2023 +0000
Update Neoverse V2 events
Add/remove PAPI events to match available hardware counters
All tests pass on NVIDIA Grace
Disclaimer:
The PAPI team was not able to verify the functionality included in this
commit.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 549e337c..3089d2d4 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -2170,34 +2170,113 @@ PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
CPU,arm_v2
#
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
+PRESET,PAPI_INT_INS,NOT_DERIVED,DP_SPEC
+#NOT_IMPLEMENTED,PAPI_TOT_IIS,Instructions issued
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CNT_CYCLES
+PRESET,PAPI_STL_CCY,NOT_DERIVED,STALL
+#NOT_IMPLEMENTED,PAPI_FUL_CCY,Cycles with maximum instructions completed
+#NOT_IMPLEMENTED,PAPI_FUL_ICY,Cycles with maximum instruction issue
+#NOT_IMPLEMENTED,PAPI_FXU_IDL,Cycles integer units are idle
+#NOT_IMPLEMENTED,PAPI_LSU_IDL,Cycles load/store units are idle
+#NOT_IMPLEMENTED,PAPI_MEM_RCY,Cycles Stalled Waiting for memory Reads
+#NOT_IMPLEMENTED,PAPI_MEM_SCY,Cycles Stalled Waiting for memory accesses
+#NOT_IMPLEMENTED,PAPI_MEM_WCY,Cycles Stalled Waiting for memory writes
+#NOT_IMPLEMENTED,PAPI_FP_STAL,Cycles the FP unit(s) are stalled
+#NOT_IMPLEMENTED,PAPI_FPU_IDL,Cycles floating point units are idle
+#NOT_IMPLEMENTED,PAPI_BRU_IDL,Cycles branch units are idle
+PRESET,PAPI_STL_ICY,NOT_DERIVED,STALL
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
+#NOT_IMPLEMENTED,PAPI_SP_OPS,Floating point operations; optimized to count scaled single precision vector operations
+#NOT_IMPLEMENTED,PAPI_DP_OPS,Floating point operations; optimized to count scaled double precision vector operations
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_HP_SPEC,FP_SP_SPEC,FP_DP_SPEC
+#NOT_IMPLEMENTED,PAPI_FAD_INS,Floating point add instructions
+#NOT_IMPLEMENTED,PAPI_FDV_INS,Floating point divide instructions
+#NOT_IMPLEMENTED,PAPI_FMA_INS,FMA instructions completed
+#NOT_IMPLEMENTED,PAPI_FML_INS,Floating point multiply instructions
+#NOT_IMPLEMENTED,PAPI_FNV_INS,Floating point inverse instructions
+#NOT_IMPLEMENTED,PAPI_FSQ_INS,Floating point square root instructions
PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
+#NOT_IMPLEMENTED,PAPI_VEC_DP,Double precision vector/SIMD instructions
+#NOT_IMPLEMENTED,PAPI_VEC_SP,Single precision vector/SIMD instructions
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
-PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
-PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
-PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
+#NOT_IMPLEMENTED,PAPI_BR_CN,Conditional branch instructions
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_RETIRED,BR_MIS_PRED_RETIRED
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED_RETIRED
+#NOT_IMPLEMENTED,PAPI_BR_NTK,Conditional branch instructions not taken
+#NOT_IMPLEMENTED,PAPI_BR_TKN,Conditional branch instructions taken
+#NOT_IMPLEMENTED,PAPI_BR_UCN,Unconditional branch instructions
+#NOT_IMPLEMENTED,PAPI_BTAC_M,Branch target address cache misses
PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
-PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
+#NOT_IMPLEMENTED,PAPI_L1_ICR,Level 1 instruction cache reads
+#NOT_IMPLEMENTED,PAPI_L1_ICW,Level 1 instruction cache writes
+#NOT_IMPLEMENTED,PAPI_L1_LDM,Level 1 load misses
+#NOT_IMPLEMENTED,PAPI_L1_STM,Level 1 store misses
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE_ACCESS
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
+#NOT_IMPLEMENTED,PAPI_L1_TCR,Level 1 total cache reads
+#NOT_IMPLEMENTED,PAPI_L1_TCW,Level 1 total cache writes
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
+PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
-PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
-PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
+PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_WR
+#NOT_IMPLEMENTED,PAPI_L2_ICA,Level 2 instruction cache accesses
+#NOT_IMPLEMENTED,PAPI_L2_ICH,Level 2 instruction cache hits
+#NOT_IMPLEMENTED,PAPI_L2_ICM,Level 2 instruction cache misses
+#NOT_IMPLEMENTED,PAPI_L2_ICR,Level 2 instruction cache reads
+#NOT_IMPLEMENTED,PAPI_L2_ICW,Level 2 instruction cache writes
+PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
+PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL
+PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD
+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR
+PRESET,PAPI_L3_TCA,NOT_DERIVED,L3D_CACHE
+PRESET,PAPI_L3_DCA,NOT_DERIVED,L3D_CACHE
+#NOT_IMPLEMENTED,PAPI_L3_DCH,Level 3 data cache hits
+PRESET,PAPI_L3_DCM,NOT_DERIVED,L3D_CACHE_REFILL
+#NOT_IMPLEMENTED,PAPI_L3_DCR,Level 3 data cache reads
+#NOT_IMPLEMENTED,PAPI_L3_DCW,Level 3 data cache writes
+#NOT_IMPLEMENTED,PAPI_L3_ICA,Level 3 instruction cache accesses
+#NOT_IMPLEMENTED,PAPI_L3_ICH,Level 3 instruction cache hits
+#NOT_IMPLEMENTED,PAPI_L3_ICM,Level 3 instruction cache misses
+#NOT_IMPLEMENTED,PAPI_L3_ICR,Level 3 instruction cache reads
+#NOT_IMPLEMENTED,PAPI_L3_ICW,Level 3 instruction cache writes
+#NOT_IMPLEMENTED,PAPI_L3_LDM,Level 3 load misses
+#NOT_IMPLEMENTED,PAPI_L3_STM,Level 3 store misses
+#NOT_IMPLEMENTED,PAPI_L3_TCH,Level 3 total cache hits
+#NOT_IMPLEMENTED,PAPI_L3_TCM,Level 3 cache misses
+#NOT_IMPLEMENTED,PAPI_L3_TCR,Level 3 total cache reads
+#NOT_IMPLEMENTED,PAPI_L3_TCW,Level 3 total cache writes
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL
+#NOT_IMPLEMENTED,PAPI_TLB_SD,Translation lookaside buffer shootdowns
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L1D_TLB_REFILL,L2D_TLB_REFILL
+#NOT_IMPLEMENTED,PAPI_CA_CLN,Requests for exclusive access to clean cache line
+#NOT_IMPLEMENTED,PAPI_CA_INV,Requests for cache line invalidation
+#NOT_IMPLEMENTED,PAPI_CA_ITV,Requests for cache line intervention
+#NOT_IMPLEMENTED,PAPI_CA_SHR,Requests for exclusive access to shared cache line
+#NOT_IMPLEMENTED,PAPI_CA_SNP,Requests for a snoop
+#NOT_IMPLEMENTED,PAPI_CSR_FAL,Failed store conditional instructions
+#NOT_IMPLEMENTED,PAPI_CSR_SUC,Successful store conditional instructions
+#NOT_IMPLEMENTED,PAPI_CSR_TOT,Total store conditional instructions
+#NOT_IMPLEMENTED,PAPI_PRF_DM,Data prefetch cache misses
#
CPU,mips_74k
commit 15f32cb3a2e6bdd9e51aa4043842f0130e9dcf24
Author: Daniel Barry <dbarry@vols.utk.edu>
Date: Wed Jun 7 14:38:39 2023 +0000
add branch presets for Zen3 and Zen4
These changes include all branching preset events for Zen3 and Zen4,
validated using the Counter Analysis Toolkit.
For Zen3, PAPI_BR_TKN was modified to exclude unconditional branches
taken, in order to adhere to the preset's meaning.
These changes have been tested on the AMD Zen3 and Zen4 architectures.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 3089d2d4..319cf82c 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -488,8 +488,12 @@ CPU,amd64_fam19h_zen3
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
-PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_UCN,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_TKN,DERIVED_POSTFIX,N0|N1|-|N2|+|,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
@@ -509,6 +513,16 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
+#
+#
+CPU,amd64_fam19h_zen4
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
CPU,Intel architectural PMU
commit da93ed4dd1fadb70ccee62a976597ff431c9f58c
Author: Daniel Barry <dbarry@vols.utk.edu>
Date: Mon Jun 12 17:27:59 2023 +0000
add flops presets for Zen4
These changes include FLOPs presets for Zen4, validated using the
Counter Analysis Toolkit.
These changes have been tested on the AMD Zen4 architecture.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 319cf82c..f6a40a35 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -523,6 +523,14 @@ PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDI
PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+PRESET,PAPI_FP_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL,RETIRED_FP_OPS_BY_TYPE:SCALAR_ALL
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL
+PRESET,PAPI_FMA_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MAC,RETIRED_FP_OPS_BY_TYPE:SCALAR_MAC
+PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS_BY_TYPE:SCALAR_MUL
+PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD
+PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV
+PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT
CPU,Intel architectural PMU
commit a31c3a4e9788e03fee113263a9f94bd638a66721
Author: Daniel Barry <dbarry@vols.utk.edu>
Date: Wed Jun 21 15:13:47 2023 +0000
add cycles and instructions presets for Zen4
These changes include the 'total cycles' and 'instructions completed'
presets for Zen4, validated using the Counter Analysis Toolkit.
These changes have been tested on the AMD Zen4 architecture.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index f6a40a35..86e11fe6 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -531,6 +531,8 @@ PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS
PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD
PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV
PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
CPU,Intel architectural PMU
commit 94303410ce97a84408b0b2d727701a60c6f137aa
Author: Daniel Barry <dbarry@vols.utk.edu>
Date: Sun Jul 23 15:38:36 2023 +0000
add various Sapphire Rapids presets
These changes include cycles, instructions, branching, and FLOPs presets
for Intel Sapphire Rapids, validated using the Counter Analysis Toolkit.
These changes have been tested on the Intel Sapphire Rapids architecture.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 86e11fe6..eac0855f 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -1010,6 +1010,29 @@ PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
# End of icx list
+# Intel Sapphire Rapids events
+CPU,spr
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
+# FLOPs
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE
+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
+# Branches
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
+# End of spr list
+
#
# Intel MIC / Xeon-Phi / Knights Landing
# Intel Knights Mill
commit 42b14987ca1a7028b6cf6fdc190a2fa6a0fd8e18
Author: Daniel Barry <dbarry@vols.utk.edu>
Date: Tue Jul 25 12:16:56 2023 +0000
add more Ice Lake FLOPs presets
Since there are enough counters available to monitor both single- and
double-precision floating-point events, PAPI_FP_OPS, PAPI_FP_INS, and
PAPI_VEC_INS are all defined.
These presets have been validated using the Counter Analysis Toolkit.
These changes have been tested on the Intel Ice Lake architecture.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index eac0855f..df82ac1c 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -1006,8 +1006,11 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
# End of icx list
# Intel Sapphire Rapids events

View File

@ -11,7 +11,7 @@
Summary: Performance Application Programming Interface
Name: papi
Version: 6.0.0
Release: 15%{?dist}
Release: 16%{?dist}
License: BSD
Requires: papi-libs = %{version}-%{release}
URL: http://icl.cs.utk.edu/papi/
@ -29,6 +29,7 @@ Patch7: papi-rhbz1923967.patch
Patch21: papi-arm64fastread.patch
Patch31: papi-701eventupdate.patch
Patch40: papi-thread_init.patch
Patch41: papi-71eventupdate.patch
BuildRequires: make
BuildRequires: autoconf
BuildRequires: doxygen
@ -104,6 +105,7 @@ the PAPI user-space libraries and interfaces.
%patch21 -p1
%patch31 -p1
%patch40 -p1
%patch41 -p1
%build
@ -196,6 +198,9 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so*
%endif
%changelog
* Fri Nov 17 2023 William Cohen <wcohen@redhat.com> - 6.0.0-16
- Update papi event presets (RHEL-9333, RHEL-9334, RHEL-9335)
* Fri Jun 16 2023 William Cohen <wcohen@redhat.com> - 6.0.0-15
- Address thread initialization order. (RHBZ#2215582)