From 67be98067832dc3ad86cdc8aeacfd9c1931e51fe Mon Sep 17 00:00:00 2001 From: AlmaLinux RelEng Bot Date: Mon, 30 Mar 2026 10:51:21 -0400 Subject: [PATCH] import CS papi-7.2.0-1.el9 --- .gitignore | 2 +- .papi.metadata | 2 +- SOURCES/papi-701eventupdate.patch | 796 --------------------------- SOURCES/papi-71eventupdate.patch | 321 ----------- SOURCES/papi-arm64fastread.patch | 637 --------------------- SOURCES/papi-avail-path-fix.patch | 34 ++ SOURCES/papi-config.patch | 348 ------------ SOURCES/papi-lto.patch | 124 ----- SOURCES/papi-python3.patch | 10 - SOURCES/papi-revert-arm-test.patch | 267 +++++++++ SOURCES/papi-revert-event-depr.patch | 71 +++ SOURCES/papi-rhbz1923967.patch | 23 - SOURCES/papi-thread_init.patch | 106 ---- SPECS/papi.spec | 56 +- 14 files changed, 401 insertions(+), 2396 deletions(-) delete mode 100644 SOURCES/papi-701eventupdate.patch delete mode 100644 SOURCES/papi-71eventupdate.patch delete mode 100644 SOURCES/papi-arm64fastread.patch create mode 100644 SOURCES/papi-avail-path-fix.patch delete mode 100644 SOURCES/papi-config.patch delete mode 100644 SOURCES/papi-lto.patch delete mode 100644 SOURCES/papi-python3.patch create mode 100644 SOURCES/papi-revert-arm-test.patch create mode 100644 SOURCES/papi-revert-event-depr.patch delete mode 100644 SOURCES/papi-rhbz1923967.patch delete mode 100644 SOURCES/papi-thread_init.patch diff --git a/.gitignore b/.gitignore index 866657f..1074b8d 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/papi-6.0.0-noiozone.tar.gz +SOURCES/papi-7.2.0.tar.gz diff --git a/.papi.metadata b/.papi.metadata index 090cd0f..e407d13 100644 --- a/.papi.metadata +++ b/.papi.metadata @@ -1 +1 @@ -96415af51f021dcbb71c04ac236037941cf5babc SOURCES/papi-6.0.0-noiozone.tar.gz +f1769503bf3e9f10c6467c4fe6322121cfdb7b79 SOURCES/papi-7.2.0.tar.gz diff --git a/SOURCES/papi-701eventupdate.patch b/SOURCES/papi-701eventupdate.patch deleted file mode 100644 index 46d592b..0000000 --- a/SOURCES/papi-701eventupdate.patch +++ /dev/null @@ -1,796 +0,0 @@ -commit ae449f73abd0849f05ab3e1f3a64bde0c670c645 -Author: Anthony -Date: Fri Jul 17 12:05:14 2020 -0400 - - Separated the cache preset events of AMD Zen1 and Zen2 and added some more. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 8e96adfbd..2325bd4dc 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE - # - CPU,amd64_fam17h - CPU,amd64_fam17h_zen1 --CPU,amd64_fam17h_zen2 - # - PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS - PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT -@@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT - PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS - PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions" - PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions" -+# Events discovered via CAT -+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C -+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L -+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X -+# -+# -+CPU,amd64_fam17h_zen2 -+# Events copied from zen1 that also exist on zen2 -+PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT -+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K -+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED -+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS -+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT -+# Events discovered via CAT -+PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS -+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C -+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L -+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X -+ - # - # - CPU,Intel architectural PMU -@@ -1877,6 +1897,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD - PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR - PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD - -+######################### -+# ARM Fujitsu A64FX # -+######################### -+CPU,arm_a64fx -+# -+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES -+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC -+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC -+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL -+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE -+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL -+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE -+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL -+ - # - CPU,mips_74k - # -commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d -Author: Heike Jagode -Date: Thu Sep 24 13:33:38 2020 -0400 - - Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS) - for AMD zen2. - - PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) - explains that FLOP events require MergeEvent support, which was included - in the 5.6 kernel. - - ===>>> Hence, a kernel version 5.6 or greater is required. - - NOTE: without the MergeEvent support in the kernel, - there is no guarantee that the SSE/AVX FLOP - events produce any useful data whatsoever. - - These events have been tested and verified for - scalar flops, SSE, AVX, and FMA: - - (1) for one AVX instruction (e.g. _mm256_add_pd()), - the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns - a count of 4 (in the case of double precision), and - a count of 8 (in the case of single precision). - - (2) for one AVX FMA instruction (e.g. _mm256_macc_pd()), - the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns - a count of 8 (in the case of double precision), and - a count of 16 (in the case of single precision). - - (3) for one SSE instruction (e.g. _mm_mul_pd()), - the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns - a count of 2 (in the case of double precision), and - a count of 4 (in the case of single precision). - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 2325bd4dc..2ff3e4d16 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ - PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L - PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X - --# --# -+# New FLOP event on zen2 -+# PPR (under section 2.1.15.3. -- -+# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) -+# explains that FLOP events require MergeEvent support, which was included -+# in the 5.6 kernel. -+# Hence, a kernel version 5.6 or greater is required. -+# NOTE: without the MergeEvent support in the kernel, there is no guarantee -+# that this SSE/AVX FLOP event produces any useful data whatsoever. -+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+ -+ - CPU,Intel architectural PMU - CPU,ix86arch - # -commit 35f93252a6e222299c03f2c94912334488e76b02 -Author: Heike Jagode -Date: Thu Sep 24 18:40:59 2020 -0400 - - Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP) - for AMD zen2. - - For unoptimized code (like native MMM), these events may include - non-numeric floating-point instructions, e.g. MOVSD: move or merge - scalar double-precision floating-point value instructions. - - Tested with: - 1) SSE double: _mm_mul_pd / _mm_add_pd - 2) SSE single: _mm_mul_ps / _mm_add_ps - 3) AVX double: _mm256_mul_pd / _mm256_add_pd - 4) AVX single: _mm256_mul_ps / _mm256_add_ps - 5) FMA double: _mm256_macc_pd - 6) FMA single: _mm256_macc_pd - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 2ff3e4d16..60a64564d 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ - PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY - PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY - PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+# Floating-point instructions (including non-numeric floating-point instructions, -+# e.g. Move or Merge Scalar Double-Precision Floating-Point values) -+PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -+PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -+PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR - - - CPU,Intel architectural PMU -commit 344f6493425d865577508ff32b6f65516b1b4394 -Author: Heike Jagode -Date: Thu Sep 24 19:03:31 2020 -0400 - - Added missing 'PRESET' to csv file. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 60a64564d..724d520f0 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY - PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY - # Floating-point instructions (including non-numeric floating-point instructions, - # e.g. Move or Merge Scalar Double-Precision Floating-Point values) --PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR --PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR --PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -+PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -+PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR - - - CPU,Intel architectural PMU -commit 4616aa717c5301a9a478876661eb8ac1f18c0333 -Author: Heike Jagode -Date: Thu Oct 8 11:36:23 2020 -0400 - - For zen2, since FP_OPS counts both single- and double-prec operations - correctly, we don't need to confuse the user with additional - DP_OPS and SP_OPS events. So, I'm taking them out. - - Same applies for events counting FP instructions. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 724d520f0..9ebf557e1 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ - # NOTE: without the MergeEvent support in the kernel, there is no guarantee - # that this SSE/AVX FLOP event produces any useful data whatsoever. - PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY --PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY --PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+# Since FP_OPS counts both single- and double-prec operations -+# correctly, we don't need to confuse the user with additional -+# DP_OPS and SP_OPS events. So, I'm taking them out. -+#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+# - # Floating-point instructions (including non-numeric floating-point instructions, - # e.g. Move or Merge Scalar Double-Precision Floating-Point values) - PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR --PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR --PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -+# Since FP_INS counts both single- and double-prec instuctions -+# correctly, we don't need to confuse the user with additional -+# VEC_DP and VEC_SP events. So, I'm taking them out. -+#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -+#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR - - - CPU,Intel architectural PMU -commit 274219e85ba8adcd2e9c78507adf7edb05b71daa -Author: Sebastian Mobo -Date: Thu Oct 8 13:40:21 2020 -0400 - - Added instruction-cache preset events for the Zen2. - - Signed-off-by: Anthony - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 9ebf557e1..fd75f9371 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS - PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C - PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L - PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X -- -+# -+PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ -+# -+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ -+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS -+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S - # New FLOP event on zen2 - # PPR (under section 2.1.15.3. -- - # https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) -commit b87ac4beda096086e0040f8ec1b44c4791a9739c -Author: Masahiko, Yamada -Date: Mon Dec 14 14:06:22 2020 +0900 - - Corrected typo for A64FX support (PAPI_L2_DCH is a typo of PAPI_L2_DCA) - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index fd75f9371..164f05641 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -1937,7 +1937,7 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC - PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL - PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE - PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL --PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE -+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE - PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL - - # -commit 869864f813f0681b5c9a4b65de2135c8708a2afb -Author: Masahiko, Yamada -Date: Mon Dec 14 19:34:59 2020 +0900 - - Add or modify various A64FX support events, including floating point events (PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS). - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 164f05641..9192b1041 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -1930,15 +1930,46 @@ PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD - ######################### - CPU,arm_a64fx - # -+PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF -+PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS -+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND -+PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT -+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT -+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ -+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED -+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED -+PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC - PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED - PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES - PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC -+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC -+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED - PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC -+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND -+PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC -+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC -+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE -+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL - PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL - PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE -+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL - PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL -+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE -+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL -+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL - PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE --PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL -+PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF -+PRESET,PAPI_L2_DCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF -+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE -+PRESET,PAPI_L2_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF -+PRESET,PAPI_L2_TCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF -+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL -+PRESET,PAPI_TLB_IM,NOT_DERIVED,L2I_TLB_REFILL -+PRESET,PAPI_TLB_TL,DERIVED_ADD,L2D_TLB_REFILL,L2I_TLB_REFILL -+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC -+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC -+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC - - # - CPU,mips_74k -commit 7a3c22763ef2ba00a2b8cb069c3501f35ecb13de -Author: Masahiko, Yamada -Date: Tue Dec 15 13:43:43 2020 +0900 - - modify PAPI_FP_INS and PAPI_VEC_INS for A64FX supports - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 9192b1041..7b4ceb674 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -1941,11 +1941,11 @@ PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED - PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC - PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED - PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES --PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC -+PRESET,PAPI_FP_INS,NOT_DERIVED,FP_SPEC - PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC - PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC - PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED --PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC -+PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED - PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND - PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC - PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC -commit 530d4763fb8e6dd52109387bd58c8c1305fd6b63 -Author: Masahiko, Yamada -Date: Fri Feb 12 15:01:21 2021 +0900 - - remove PAPI_L1_DCA and PAPI_L1_DCH for a64fx - - There seems to be a problem with PAPI_L1_DCA and PAPI_L1_DCH for a64fx that prefetch overcounts. - I delete (comment out) PAPI_L1_DCA and PAPI_L1_DCH for a64fx from the papi_events.csv file. - I will issue the pullrequest again once I have identified how to handle the overcount. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 7b4ceb674..0f5ec8344 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -1949,8 +1949,8 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED - PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND - PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC - PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC --PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE --PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL -+#PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE -+#PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL - PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL - PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE - PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL -commit 340f68940234f2db181147fc249907b4f1293e62 -Author: Masahiko, Yamada -Date: Tue Feb 16 17:16:24 2021 +0900 - - remove PAPI_L1_TCA and PAPI_L1_TCH for a64fx - - PAPI_L1_TCA and PAPI_L1_TCH for a64fx measure L1D_CACHE just like PAPI_L1_DCA and PAPI_L1_DCH, - so I delete (comment out) PAPI_L1_TCA and PAPI_L1_TCH for a64fx from the papi_events.csv file. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 0f5ec8344..4ef647959 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -1955,8 +1955,8 @@ PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL - PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE - PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL - PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL --PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE --PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL -+#PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE -+#PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL - PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL - PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE - PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF -commit 02f34baafb868d183f21bebfd3c46574847b9929 -Author: Swarup Sahoo -Date: Tue May 18 02:51:56 2021 +0530 - - Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip - - Signed-off-by: Swarup Sahoo - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 4ef647959..d9e9da8a3 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X - # VEC_DP and VEC_SP events. So, I'm taking them out. - #PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR - #PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR -+# -+# -+CPU,amd64_fam19h_zen3 -+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT -+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED -+PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT -+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K -+PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH -+PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X -+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C -+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X -+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X -+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ -+PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ -+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS -+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S -+# RETIRED_SSE_AVX_FLOPS requires MergeEvent support. -+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS -+PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS -+PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS -+PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS - - - CPU,Intel architectural PMU -commit 6964aa356fa606f320c7b871123aceb5c1f21999 -Author: Masahiko, Yamada -Date: Tue Aug 24 14:17:29 2021 +0900 - - Fix the PAPI_FUL_CCY setting for a64fx - - In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect. - PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT - - The correct settings are:. - PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 4ef647959..74deb712f 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF - PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS - PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND - PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT --PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT -+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT - PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ - PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED - PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED -commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f -Author: Masahiko, Yamada -Date: Fri Mar 4 15:41:30 2022 +0900 - - Add PAPI idle-related preset events for a64fx - - For a64fx, add four PAPI idle-related preset events - (PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL). - - PAPI_BRU_IDL = BR_COMP_WAIT - PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT - PAPI_FPU_IDL = FL_COMP_WAIT - PAPI_LSU_IDL = LD_COMP_WAIT - - The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT, - and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting" - on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:. - https://github.com/fujitsu/A64FX/blob/master/doc - - Signed-off-by: Masahiko, Yamada - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 74deb712f..1cd498e91 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS - PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND - PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT - PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT -+PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT -+PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT -+PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT -+PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT - PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ - PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED - PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED -commit 3c5364839f583185c1e8dca58d5fe36c9ec82876 -Author: Daniel Barry -Date: Tue Aug 30 23:17:30 2022 +0000 - - papi_avail: add presets for Intel Ice Lake SP - - Define preset events for the Intel Ice Lake SP processor. - These presets have been verified using the Counter Analysis Toolkit benchmarks. - - These changes have been tested on the Intel Ice Lake architecture. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index a013f58af..8f23e030c 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -929,6 +929,63 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD - - # End of hsw,bdw,skl,clx list - # -+ -+# Intel Ice Lake SP events -+CPU,icx -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P -+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P -+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES -+# Loads and stores -+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_LOADS -+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_STORES -+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_INST_RETIRED:ALL_LOADS,MEM_INST_RETIRED:ALL_STORES -+# L1 cache -+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD -+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT -+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD -+# L2 cache -+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES -+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD -+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT -+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS -+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD -+#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT -+#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS -+PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS -+PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD -+#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT -+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS -+PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD -+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES -+PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD -+# L3 cache -+PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS -+PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD -+PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS -+PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS -+#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT -+PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_RETIRED:L3_MISS -+PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES -+PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES -+# SMP -+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD -+# Branches -+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND -+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND -+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN -+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN -+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND -+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES -+#FLOPs -+# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE -+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE -+# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE -+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE -+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE -+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE -+# End of icx list -+ - # - # Intel MIC / Xeon-Phi / Knights Landing - # Intel Knights Mill -commit d4da29b07befb9f7c11e351dbfef835b74cdd67a -Author: John Linford -Date: Mon Mar 20 17:11:37 2023 -0500 - - Add minimal events for Arm Neoverse N1 - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 8f23e030c..a4d5a9756 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -2059,6 +2059,41 @@ PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIX - PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC - PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC - -+######################### -+# ARM Neoverse N1 # -+######################### -+CPU,arm_n1 -+# -+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES -+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC -+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED -+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED -+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED -+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC -+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC -+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC -+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE -+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL -+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD -+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR -+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS -+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL -+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL -+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS -+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR -+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL -+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD -+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR -+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD -+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND -+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND -+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ -+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC -+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL -+ - # - CPU,mips_74k - # -commit 88e686f877abcf19c5f50d4e23cbf8ea920a40b6 -Author: John Linford -Date: Mon Mar 20 14:54:41 2023 -0500 - - Add minimal events for Arm Neoverse V1 - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index a4d5a9756..207d6d1db 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ - PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC - PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL - -+######################### -+# ARM Neoverse V1 # -+######################### -+CPU,arm_v1 -+# -+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES -+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC -+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED -+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED -+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED -+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC -+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC -+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC -+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE -+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL -+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD -+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR -+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS -+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL -+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL -+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS -+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR -+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL -+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD -+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR -+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD -+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND -+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND -+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ -+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC -+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL -+ - # - CPU,mips_74k - # -commit e911f951115bb551925c5b07e7f5b721d5fe3bbe -Author: John Linford -Date: Mon Mar 20 17:14:18 2023 -0500 - - Add minimal events for Arm Neoverse N2 - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 207d6d1db..d27d956c1 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ - PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC - PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL - -+######################### -+# ARM Neoverse N2 # -+######################### -+CPU,arm_n2 -+# -+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES -+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC -+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED -+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED -+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED -+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC -+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC -+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC -+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE -+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL -+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD -+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR -+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS -+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL -+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL -+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS -+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR -+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL -+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD -+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR -+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD -+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND -+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND -+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ -+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC -+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL -+ - ######################### - # ARM Neoverse V1 # - ######################### -commit 05dc580247cb18fca882a33d8e356d79032d2ed1 -Author: John Linford -Date: Mon Mar 20 17:08:35 2023 -0500 - - Add minimal events for Arm Neoverse V2 - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index d27d956c1..549e337c7 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -2164,6 +2164,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ - PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC - PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL - -+######################### -+# ARM Neoverse V2 # -+######################### -+CPU,arm_v2 -+# -+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES -+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC -+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED -+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED -+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED -+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC -+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC -+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC -+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE -+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL -+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD -+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR -+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS -+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL -+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL -+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS -+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR -+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL -+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD -+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR -+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD -+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND -+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND -+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ -+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC -+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL -+ - # - CPU,mips_74k - # diff --git a/SOURCES/papi-71eventupdate.patch b/SOURCES/papi-71eventupdate.patch deleted file mode 100644 index aeb576d..0000000 --- a/SOURCES/papi-71eventupdate.patch +++ /dev/null @@ -1,321 +0,0 @@ -commit b969d25f2a87a53365e3e9a040533b093544a05d -Author: John Linford -Date: Mon Apr 3 22:30:14 2023 +0000 - - Update Neoverse V2 events - - Add/remove PAPI events to match available hardware counters - All tests pass on NVIDIA Grace - - Disclaimer: - The PAPI team was not able to verify the functionality included in this - commit. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 549e337c..3089d2d4 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -2170,34 +2170,113 @@ PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL - CPU,arm_v2 - # - PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED -+PRESET,PAPI_INT_INS,NOT_DERIVED,DP_SPEC -+#NOT_IMPLEMENTED,PAPI_TOT_IIS,Instructions issued - PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES --PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC -+PRESET,PAPI_REF_CYC,NOT_DERIVED,CNT_CYCLES -+PRESET,PAPI_STL_CCY,NOT_DERIVED,STALL -+#NOT_IMPLEMENTED,PAPI_FUL_CCY,Cycles with maximum instructions completed -+#NOT_IMPLEMENTED,PAPI_FUL_ICY,Cycles with maximum instruction issue -+#NOT_IMPLEMENTED,PAPI_FXU_IDL,Cycles integer units are idle -+#NOT_IMPLEMENTED,PAPI_LSU_IDL,Cycles load/store units are idle -+#NOT_IMPLEMENTED,PAPI_MEM_RCY,Cycles Stalled Waiting for memory Reads -+#NOT_IMPLEMENTED,PAPI_MEM_SCY,Cycles Stalled Waiting for memory accesses -+#NOT_IMPLEMENTED,PAPI_MEM_WCY,Cycles Stalled Waiting for memory writes -+#NOT_IMPLEMENTED,PAPI_FP_STAL,Cycles the FP unit(s) are stalled -+#NOT_IMPLEMENTED,PAPI_FPU_IDL,Cycles floating point units are idle -+#NOT_IMPLEMENTED,PAPI_BRU_IDL,Cycles branch units are idle -+PRESET,PAPI_STL_ICY,NOT_DERIVED,STALL -+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND -+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC -+#NOT_IMPLEMENTED,PAPI_SP_OPS,Floating point operations; optimized to count scaled single precision vector operations -+#NOT_IMPLEMENTED,PAPI_DP_OPS,Floating point operations; optimized to count scaled double precision vector operations -+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_HP_SPEC,FP_SP_SPEC,FP_DP_SPEC -+#NOT_IMPLEMENTED,PAPI_FAD_INS,Floating point add instructions -+#NOT_IMPLEMENTED,PAPI_FDV_INS,Floating point divide instructions -+#NOT_IMPLEMENTED,PAPI_FMA_INS,FMA instructions completed -+#NOT_IMPLEMENTED,PAPI_FML_INS,Floating point multiply instructions -+#NOT_IMPLEMENTED,PAPI_FNV_INS,Floating point inverse instructions -+#NOT_IMPLEMENTED,PAPI_FSQ_INS,Floating point square root instructions - PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC -+#NOT_IMPLEMENTED,PAPI_VEC_DP,Double precision vector/SIMD instructions -+#NOT_IMPLEMENTED,PAPI_VEC_SP,Single precision vector/SIMD instructions - PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED --PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED --PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED --PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED -+#NOT_IMPLEMENTED,PAPI_BR_CN,Conditional branch instructions -+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_RETIRED,BR_MIS_PRED_RETIRED -+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED_RETIRED -+#NOT_IMPLEMENTED,PAPI_BR_NTK,Conditional branch instructions not taken -+#NOT_IMPLEMENTED,PAPI_BR_TKN,Conditional branch instructions taken -+#NOT_IMPLEMENTED,PAPI_BR_UCN,Unconditional branch instructions -+#NOT_IMPLEMENTED,PAPI_BTAC_M,Branch target address cache misses - PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC - PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC - PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC - PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE -+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL - PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL - PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD - PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR - PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS - PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL - PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL --PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS --PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR -+#NOT_IMPLEMENTED,PAPI_L1_ICR,Level 1 instruction cache reads -+#NOT_IMPLEMENTED,PAPI_L1_ICW,Level 1 instruction cache writes -+#NOT_IMPLEMENTED,PAPI_L1_LDM,Level 1 load misses -+#NOT_IMPLEMENTED,PAPI_L1_STM,Level 1 store misses -+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE_ACCESS -+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE_ACCESS,L1I_CACHE_REFILL -+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL -+#NOT_IMPLEMENTED,PAPI_L1_TCR,Level 1 total cache reads -+#NOT_IMPLEMENTED,PAPI_L1_TCW,Level 1 total cache writes -+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE -+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE - PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL - PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD - PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR -+PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL - PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD --PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND --PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND -+PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_WR -+#NOT_IMPLEMENTED,PAPI_L2_ICA,Level 2 instruction cache accesses -+#NOT_IMPLEMENTED,PAPI_L2_ICH,Level 2 instruction cache hits -+#NOT_IMPLEMENTED,PAPI_L2_ICM,Level 2 instruction cache misses -+#NOT_IMPLEMENTED,PAPI_L2_ICR,Level 2 instruction cache reads -+#NOT_IMPLEMENTED,PAPI_L2_ICW,Level 2 instruction cache writes -+PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL -+PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL -+PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD -+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR -+PRESET,PAPI_L3_TCA,NOT_DERIVED,L3D_CACHE -+PRESET,PAPI_L3_DCA,NOT_DERIVED,L3D_CACHE -+#NOT_IMPLEMENTED,PAPI_L3_DCH,Level 3 data cache hits -+PRESET,PAPI_L3_DCM,NOT_DERIVED,L3D_CACHE_REFILL -+#NOT_IMPLEMENTED,PAPI_L3_DCR,Level 3 data cache reads -+#NOT_IMPLEMENTED,PAPI_L3_DCW,Level 3 data cache writes -+#NOT_IMPLEMENTED,PAPI_L3_ICA,Level 3 instruction cache accesses -+#NOT_IMPLEMENTED,PAPI_L3_ICH,Level 3 instruction cache hits -+#NOT_IMPLEMENTED,PAPI_L3_ICM,Level 3 instruction cache misses -+#NOT_IMPLEMENTED,PAPI_L3_ICR,Level 3 instruction cache reads -+#NOT_IMPLEMENTED,PAPI_L3_ICW,Level 3 instruction cache writes -+#NOT_IMPLEMENTED,PAPI_L3_LDM,Level 3 load misses -+#NOT_IMPLEMENTED,PAPI_L3_STM,Level 3 store misses -+#NOT_IMPLEMENTED,PAPI_L3_TCH,Level 3 total cache hits -+#NOT_IMPLEMENTED,PAPI_L3_TCM,Level 3 cache misses -+#NOT_IMPLEMENTED,PAPI_L3_TCR,Level 3 total cache reads -+#NOT_IMPLEMENTED,PAPI_L3_TCW,Level 3 total cache writes - PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ - PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC - PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL -+PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL -+#NOT_IMPLEMENTED,PAPI_TLB_SD,Translation lookaside buffer shootdowns -+PRESET,PAPI_TLB_TL,DERIVED_ADD,L1D_TLB_REFILL,L2D_TLB_REFILL -+#NOT_IMPLEMENTED,PAPI_CA_CLN,Requests for exclusive access to clean cache line -+#NOT_IMPLEMENTED,PAPI_CA_INV,Requests for cache line invalidation -+#NOT_IMPLEMENTED,PAPI_CA_ITV,Requests for cache line intervention -+#NOT_IMPLEMENTED,PAPI_CA_SHR,Requests for exclusive access to shared cache line -+#NOT_IMPLEMENTED,PAPI_CA_SNP,Requests for a snoop -+#NOT_IMPLEMENTED,PAPI_CSR_FAL,Failed store conditional instructions -+#NOT_IMPLEMENTED,PAPI_CSR_SUC,Successful store conditional instructions -+#NOT_IMPLEMENTED,PAPI_CSR_TOT,Total store conditional instructions -+#NOT_IMPLEMENTED,PAPI_PRF_DM,Data prefetch cache misses - - # - CPU,mips_74k - -commit 15f32cb3a2e6bdd9e51aa4043842f0130e9dcf24 -Author: Daniel Barry -Date: Wed Jun 7 14:38:39 2023 +0000 - - add branch presets for Zen3 and Zen4 - - These changes include all branching preset events for Zen3 and Zen4, - validated using the Counter Analysis Toolkit. - - For Zen3, PAPI_BR_TKN was modified to exclude unconditional branches - taken, in order to adhere to the preset's meaning. - - These changes have been tested on the AMD Zen3 and Zen4 architectures. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 3089d2d4..319cf82c 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -488,8 +488,12 @@ CPU,amd64_fam19h_zen3 - PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS - PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT - PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS --PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_UCN,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_TKN,DERIVED_POSTFIX,N0|N1|-|N2|+|,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS - PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED -+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED - PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT - PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K - PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH -@@ -509,6 +513,16 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS - PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS - PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS - PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS -+# -+# -+CPU,amd64_fam19h_zen4 -+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS -+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED -+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED - - - CPU,Intel architectural PMU - -commit da93ed4dd1fadb70ccee62a976597ff431c9f58c -Author: Daniel Barry -Date: Mon Jun 12 17:27:59 2023 +0000 - - add flops presets for Zen4 - - These changes include FLOPs presets for Zen4, validated using the - Counter Analysis Toolkit. - - These changes have been tested on the AMD Zen4 architecture. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 319cf82c..f6a40a35 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -523,6 +523,14 @@ PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDI - PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS - PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED - PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED -+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY -+PRESET,PAPI_FP_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL,RETIRED_FP_OPS_BY_TYPE:SCALAR_ALL -+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL -+PRESET,PAPI_FMA_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MAC,RETIRED_FP_OPS_BY_TYPE:SCALAR_MAC -+PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS_BY_TYPE:SCALAR_MUL -+PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD -+PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV -+PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT - - - CPU,Intel architectural PMU - -commit a31c3a4e9788e03fee113263a9f94bd638a66721 -Author: Daniel Barry -Date: Wed Jun 21 15:13:47 2023 +0000 - - add cycles and instructions presets for Zen4 - - These changes include the 'total cycles' and 'instructions completed' - presets for Zen4, validated using the Counter Analysis Toolkit. - - These changes have been tested on the AMD Zen4 architecture. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index f6a40a35..86e11fe6 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -531,6 +531,8 @@ PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS - PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD - PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV - PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT -+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT - - - CPU,Intel architectural PMU - -commit 94303410ce97a84408b0b2d727701a60c6f137aa -Author: Daniel Barry -Date: Sun Jul 23 15:38:36 2023 +0000 - - add various Sapphire Rapids presets - - These changes include cycles, instructions, branching, and FLOPs presets - for Intel Sapphire Rapids, validated using the Counter Analysis Toolkit. - - These changes have been tested on the Intel Sapphire Rapids architecture. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index 86e11fe6..eac0855f 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -1010,6 +1010,29 @@ PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ - PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE - # End of icx list - -+# Intel Sapphire Rapids events -+CPU,spr -+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P -+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P -+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES -+# FLOPs -+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE -+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE -+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE -+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE -+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE -+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE -+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE -+# Branches -+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND -+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND -+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN -+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN -+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND -+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND -+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES -+# End of spr list -+ - # - # Intel MIC / Xeon-Phi / Knights Landing - # Intel Knights Mill - -commit 42b14987ca1a7028b6cf6fdc190a2fa6a0fd8e18 -Author: Daniel Barry -Date: Tue Jul 25 12:16:56 2023 +0000 - - add more Ice Lake FLOPs presets - - Since there are enough counters available to monitor both single- and - double-precision floating-point events, PAPI_FP_OPS, PAPI_FP_INS, and - PAPI_VEC_INS are all defined. - These presets have been validated using the Counter Analysis Toolkit. - - These changes have been tested on the Intel Ice Lake architecture. - -diff --git a/src/papi_events.csv b/src/papi_events.csv -index eac0855f..df82ac1c 100644 ---- a/src/papi_events.csv -+++ b/src/papi_events.csv -@@ -1006,8 +1006,11 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES - PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE - # PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE - PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE -+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE -+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE - PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE - PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE -+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE - # End of icx list - - # Intel Sapphire Rapids events diff --git a/SOURCES/papi-arm64fastread.patch b/SOURCES/papi-arm64fastread.patch deleted file mode 100644 index 986743b..0000000 --- a/SOURCES/papi-arm64fastread.patch +++ /dev/null @@ -1,637 +0,0 @@ -commit 9a1f2d897f4086bc1d60102de984c849445b5e97 -Author: Masahiko, Yamada -Date: Tue Feb 21 19:18:40 2023 +0900 - - PAPI_read performance improvement for the arm64 processor - - We developed PAPI_read performance improvements for the arm64 processor - with a plan to port direct user space PMU register access processing from - libperf to the papi library without using libperf. - - The workaround has been implemented that stores the counter value at the - time of reset and subtracts the counter value at the time of reset from - the read counter value at the next read. - When reset processing is called, the value of pc->offset is cleared to 0, - and only the counter value read from the PMU counter is referenced. - There was no problem with the counters FAILED with negative values during - the multiplex+reset test, except for sdsc2-mpx and sdsc4-mpx. - To apply the workaround only during reset, the _pe_reset function call sets - the reset_flag and the next _pe_start function call clears the reset_flag. - The workaround works if the mmap_read_self function is called between calls - to the _pe_reset function and the next call to the _pe_start function. - - Switching PMU register direct access from user space from OFF to ON is done by - changing the setting of the kernel variable "/proc/sys/kernel/perf_user_access". - - Setting PMU Register Direct Access from User Space Off - $ echo 0 > /proc/sys/kernel/perf_user_access - $ cat /proc/sys/kernel/perf_user_access - 0 - - Setting PMU Register Direct Access from User Space ON - $ echo 1 > /proc/sys/kernel/perf_user_access - $ cat /proc/sys/kernel/perf_user_access - 1 - - Performance of PAPI_read has been improved as expected from the execution - result of the papi_cost command. - - Improvement effect of switching PMU register direct access from user space - from OFF to ON - - Total cost for PAPI_read (2 counters) over 1000000 iterations - min cycles: 689 -> 28 - max cycles: 3876 -> 1323 - mean cycles: 724.471979 -> 28.888076 - - Total cost for PAPI_read_ts (2 counters) over 1000000 iterations - min cycles: 693 -> 29 - max cycles: 4066 -> 3718 - mean cycles: 726.753003 -> 29.977226 - - Total cost for PAPI_read (1 derived_[add|sub] counter) over 1000000 iterations - min cycles: 698 -> 28 - max cycles: 7406 -> 2346 - mean cycles: 728.527079 -> 28.880691 - - Signed-off-by: Masahiko, Yamada - -diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c -index b4877d18e..331288c55 100644 ---- a/src/components/perf_event/perf_event.c -+++ b/src/components/perf_event/perf_event.c -@@ -682,6 +682,12 @@ set_up_mmap( pe_control_t *ctl, int evt_idx) - - - -+/* Request user access for arm64 */ -+static inline void arm64_request_user_access(struct perf_event_attr *hw_event) -+{ -+ hw_event->config1=0x2; /* Request user access */ -+} -+ - /* Open all events in the control state */ - static int - open_pe_events( pe_context_t *ctx, pe_control_t *ctl ) -@@ -735,6 +741,11 @@ open_pe_events( pe_context_t *ctx, pe_control_t *ctl ) - if (( i == 0 ) || (ctl->multiplexed)) { - ctl->events[i].attr.pinned = !ctl->multiplexed; - ctl->events[i].attr.disabled = 1; -+#if defined(__aarch64__) -+ if (_perf_event_vector.cmp_info.fast_counter_read) { -+ arm64_request_user_access(&ctl->events[i].attr); -+ } -+#endif - ctl->events[i].group_leader_fd=-1; - ctl->events[i].attr.read_format = get_read_format( - ctl->multiplexed, -@@ -743,6 +754,11 @@ open_pe_events( pe_context_t *ctx, pe_control_t *ctl ) - } else { - ctl->events[i].attr.pinned=0; - ctl->events[i].attr.disabled = 0; -+#if defined(__aarch64__) -+ if (_perf_event_vector.cmp_info.fast_counter_read) { -+ arm64_request_user_access(&ctl->events[i].attr); -+ } -+#endif - ctl->events[i].group_leader_fd=ctl->events[0].event_fd; - ctl->events[i].attr.read_format = get_read_format( - ctl->multiplexed, -@@ -1047,8 +1063,16 @@ _pe_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) - - /* We need to reset all of the events, not just the group leaders */ - for( i = 0; i < pe_ctl->num_events; i++ ) { -- ret = ioctl( pe_ctl->events[i].event_fd, -- PERF_EVENT_IOC_RESET, NULL ); -+ if (_perf_event_vector.cmp_info.fast_counter_read) { -+ ret = ioctl( pe_ctl->events[i].event_fd, -+ PERF_EVENT_IOC_RESET, NULL ); -+ pe_ctl->reset_counts[i] = mmap_read_reset_count( -+ pe_ctl->events[i].mmap_buf); -+ pe_ctl->reset_flag = 1; -+ } else { -+ ret = ioctl( pe_ctl->events[i].event_fd, -+ PERF_EVENT_IOC_RESET, NULL ); -+ } - if ( ret == -1 ) { - PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) " - "returned error, Linux says: %s", -@@ -1119,6 +1143,8 @@ _pe_rdpmc_read( hwd_context_t *ctx, hwd_control_state_t *ctl, - for ( i = 0; i < pe_ctl->num_events; i++ ) { - - count = mmap_read_self(pe_ctl->events[i].mmap_buf, -+ pe_ctl->reset_flag, -+ pe_ctl->reset_counts[i], - &enabled,&running); - - if (count==0xffffffffffffffffULL) { -@@ -1438,6 +1464,10 @@ _pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) - pe_ctl->events[i].event_fd); - ret=ioctl( pe_ctl->events[i].event_fd, - PERF_EVENT_IOC_ENABLE, NULL) ; -+ if (_perf_event_vector.cmp_info.fast_counter_read) { -+ pe_ctl->reset_counts[i] = 0LL; -+ pe_ctl->reset_flag = 0; -+ } - - /* ioctls always return -1 on failure */ - if (ret == -1) { -@@ -2297,6 +2327,29 @@ _pe_shutdown_component( void ) { - } - - -+#if defined(__aarch64__) -+/* Check access PMU counter from User space for arm64 support */ -+static int _pe_detect_arm64_access(void) { -+ -+ FILE *fff; -+ int perf_user_access; -+ int retval; -+ -+ fff=fopen("/proc/sys/kernel/perf_user_access","r"); -+ if (fff==NULL) { -+ return 0; -+ } -+ -+ /* 1 means you can access PMU counter from User space */ -+ /* 0 means you can not access PMU counter from User space */ -+ retval=fscanf(fff,"%d",&perf_user_access); -+ if (retval!=1) fprintf(stderr,"Error reading /proc/sys/kernel/perf_user_access\n"); -+ fclose(fff); -+ -+ return perf_user_access; -+} -+#endif -+ - /* Check the mmap page for rdpmc support */ - static int _pe_detect_rdpmc(void) { - -@@ -2305,10 +2358,13 @@ static int _pe_detect_rdpmc(void) { - void *addr; - struct perf_event_mmap_page *our_mmap; - int page_size=getpagesize(); -+#if defined(__aarch64__) -+ int retval; -+#endif - --#if defined(__i386__) || defined (__x86_64__) -+#if defined(__i386__) || defined (__x86_64__) || defined(__aarch64__) - #else -- /* We only support rdpmc on x86 for now */ -+ /* We support rdpmc on x86 and arm64 for now */ - return 0; - #endif - -@@ -2318,12 +2374,23 @@ static int _pe_detect_rdpmc(void) { - return 0; - } - -+#if defined(__aarch64__) -+ /* Detect if we can use PMU counter from User space for arm64 */ -+ retval = _pe_detect_arm64_access(); -+ if (retval == 0) { -+ return 0; -+ } -+#endif -+ - /* Create a fake instructions event so we can read a mmap page */ - memset(&pe,0,sizeof(struct perf_event_attr)); - - pe.type=PERF_TYPE_HARDWARE; - pe.size=sizeof(struct perf_event_attr); - pe.config=PERF_COUNT_HW_INSTRUCTIONS; -+#if defined(__aarch64__) -+ arm64_request_user_access(&pe); -+#endif - pe.exclude_kernel=1; - pe.disabled=1; - -diff --git a/src/components/perf_event/perf_event_lib.h b/src/components/perf_event/perf_event_lib.h -index 0c50ab9f0..cfba8ac49 100644 ---- a/src/components/perf_event/perf_event_lib.h -+++ b/src/components/perf_event/perf_event_lib.h -@@ -36,6 +36,8 @@ typedef struct { - pid_t tid; /* thread we are monitoring */ - pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS]; - long long counts[PERF_EVENT_MAX_MPX_COUNTERS]; -+ unsigned int reset_flag; -+ long long reset_counts[PERF_EVENT_MAX_MPX_COUNTERS]; - } pe_control_t; - - -diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h -index 92dca4fd0..097286865 100644 ---- a/src/components/perf_event/perf_helpers.h -+++ b/src/components/perf_event/perf_helpers.h -@@ -29,6 +29,74 @@ sys_perf_event_open( struct perf_event_attr *hw_event, - return ret; - } - -+ -+/* -+ * We define u64 as uint64_t for every architecture -+ * so that we can print it with "%"PRIx64 without getting warnings. -+ * -+ * typedef __u64 u64; -+ * typedef __s64 s64; -+ */ -+typedef uint64_t u64; -+typedef int64_t s64; -+ -+typedef __u32 u32; -+typedef __s32 s32; -+ -+typedef __u16 u16; -+typedef __s16 s16; -+ -+typedef __u8 u8; -+typedef __s8 s8; -+ -+ -+#ifdef __SIZEOF_INT128__ -+static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) -+{ -+ return (u64)(((unsigned __int128)a * b) >> shift); -+} -+ -+#else -+ -+#ifdef __i386__ -+static inline u64 mul_u32_u32(u32 a, u32 b) -+{ -+ u32 high, low; -+ -+ asm ("mull %[b]" : "=a" (low), "=d" (high) -+ : [a] "a" (a), [b] "rm" (b) ); -+ -+ return low | ((u64)high) << 32; -+} -+#else -+static inline u64 mul_u32_u32(u32 a, u32 b) -+{ -+ return (u64)a * b; -+} -+#endif -+ -+static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) -+{ -+ u32 ah, al; -+ u64 ret; -+ -+ al = a; -+ ah = a >> 32; -+ -+ ret = mul_u32_u32(al, b) >> shift; -+ if (ah) -+ ret += mul_u32_u32(ah, b) << (32 - shift); -+ -+ return ret; -+} -+ -+#endif /* __SIZEOF_INT128__ */ -+ -+#ifndef ARRAY_SIZE -+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) -+#endif -+ -+ - #if defined(__x86_64__) || defined(__i386__) - - -@@ -52,19 +120,140 @@ static inline unsigned long long rdpmc(unsigned int counter) { - - #define barrier() __asm__ volatile("" ::: "memory") - -+ -+#elif defined(__aarch64__) -+ -+/* Indirect stringification. Doing two levels allows the parameter to be a -+ * macro itself. For example, compile with -DFOO=bar, __stringify(FOO) -+ * converts to "bar". -+ */ -+ -+#define __stringify_1(x...) #x -+#define __stringify(x...) __stringify_1(x) -+ -+#define read_sysreg(r) ({ \ -+ u64 __val; \ -+ asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ -+ __val; \ -+}) -+ -+static u64 read_pmccntr(void) -+{ -+ return read_sysreg(pmccntr_el0); -+} -+ -+#define PMEVCNTR_READ(idx) \ -+ static u64 read_pmevcntr_##idx(void) { \ -+ return read_sysreg(pmevcntr##idx##_el0); \ -+ } -+ -+PMEVCNTR_READ(0); -+PMEVCNTR_READ(1); -+PMEVCNTR_READ(2); -+PMEVCNTR_READ(3); -+PMEVCNTR_READ(4); -+PMEVCNTR_READ(5); -+PMEVCNTR_READ(6); -+PMEVCNTR_READ(7); -+PMEVCNTR_READ(8); -+PMEVCNTR_READ(9); -+PMEVCNTR_READ(10); -+PMEVCNTR_READ(11); -+PMEVCNTR_READ(12); -+PMEVCNTR_READ(13); -+PMEVCNTR_READ(14); -+PMEVCNTR_READ(15); -+PMEVCNTR_READ(16); -+PMEVCNTR_READ(17); -+PMEVCNTR_READ(18); -+PMEVCNTR_READ(19); -+PMEVCNTR_READ(20); -+PMEVCNTR_READ(21); -+PMEVCNTR_READ(22); -+PMEVCNTR_READ(23); -+PMEVCNTR_READ(24); -+PMEVCNTR_READ(25); -+PMEVCNTR_READ(26); -+PMEVCNTR_READ(27); -+PMEVCNTR_READ(28); -+PMEVCNTR_READ(29); -+PMEVCNTR_READ(30); -+ -+/* -+ * Read a value direct from PMEVCNTR -+ */ -+static u64 rdpmc(unsigned int counter) -+{ -+ static u64 (* const read_f[])(void) = { -+ read_pmevcntr_0, -+ read_pmevcntr_1, -+ read_pmevcntr_2, -+ read_pmevcntr_3, -+ read_pmevcntr_4, -+ read_pmevcntr_5, -+ read_pmevcntr_6, -+ read_pmevcntr_7, -+ read_pmevcntr_8, -+ read_pmevcntr_9, -+ read_pmevcntr_10, -+ read_pmevcntr_11, -+ read_pmevcntr_13, -+ read_pmevcntr_12, -+ read_pmevcntr_14, -+ read_pmevcntr_15, -+ read_pmevcntr_16, -+ read_pmevcntr_17, -+ read_pmevcntr_18, -+ read_pmevcntr_19, -+ read_pmevcntr_20, -+ read_pmevcntr_21, -+ read_pmevcntr_22, -+ read_pmevcntr_23, -+ read_pmevcntr_24, -+ read_pmevcntr_25, -+ read_pmevcntr_26, -+ read_pmevcntr_27, -+ read_pmevcntr_28, -+ read_pmevcntr_29, -+ read_pmevcntr_30, -+ read_pmccntr -+ }; -+ -+ if (counter < ARRAY_SIZE(read_f)) -+ return (read_f[counter])(); -+ -+ return 0; -+} -+ -+static u64 rdtsc(void) { return read_sysreg(cntvct_el0); } -+ -+#define barrier() asm volatile("dmb ish" : : : "memory") -+ -+#endif -+ -+#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) -+ -+static inline u64 adjust_cap_usr_time_short(u64 a, u64 b, u64 c) -+{ -+ u64 ret; -+ ret = b + ((a - b) & c); -+ return ret; -+} -+ - /* based on the code in include/uapi/linux/perf_event.h */ - static inline unsigned long long mmap_read_self(void *addr, -+ int user_reset_flag, -+ unsigned long long reset, - unsigned long long *en, - unsigned long long *ru) { - - struct perf_event_mmap_page *pc = addr; - -- uint32_t seq, time_mult, time_shift, index, width; -+ uint32_t seq, time_mult = 0, time_shift = 0, index, width; - int64_t count; - uint64_t enabled, running; -- uint64_t cyc, time_offset; -+ uint64_t cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL; - int64_t pmc = 0; -- uint64_t quot, rem; - uint64_t delta = 0; - - -@@ -96,12 +285,11 @@ static inline unsigned long long mmap_read_self(void *addr, - time_mult = pc->time_mult; - time_shift = pc->time_shift; - -- quot=(cyc>>time_shift); -- rem = cyc & (((uint64_t)1 << time_shift) - 1); -- delta = time_offset + (quot * time_mult) + -- ((rem * time_mult) >> time_shift); -+ if (pc->cap_user_time_short) { -+ time_cycles = pc->time_cycles; -+ time_mask = pc->time_mask; -+ } - } -- enabled+=delta; - - /* actually do the measurement */ - -@@ -116,8 +304,9 @@ static inline unsigned long long mmap_read_self(void *addr, - /* numbers which break if an IOC_RESET is done */ - width = pc->pmc_width; - count = pc->offset; -- count<<=(64-width); -- count>>=(64-width); -+ if (user_reset_flag == 1) { -+ count = 0; -+ } - - /* Ugh, libpfm4 perf_event.h has cap_usr_rdpmc */ - /* while actual perf_event.h has cap_user_rdpmc */ -@@ -130,14 +319,14 @@ static inline unsigned long long mmap_read_self(void *addr, - pmc = rdpmc(index-1); - - /* sign extend result */ -+ if (user_reset_flag == 1) { -+ pmc-=reset; -+ } - pmc<<=(64-width); - pmc>>=(64-width); - - /* add current count into the existing kernel count */ - count+=pmc; -- -- /* Only adjust if index is valid */ -- running+=delta; - } else { - /* Falling back because rdpmc not supported */ - /* for this event. */ -@@ -148,14 +337,66 @@ static inline unsigned long long mmap_read_self(void *addr, - - } while (pc->lock != seq); - -+ if (enabled != running) { -+ -+ /* Adjust for cap_usr_time_short, a nop if not */ -+ cyc = adjust_cap_usr_time_short(cyc, time_cycles, time_mask); -+ -+ delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift); -+ -+ enabled+=delta; -+ if (index) -+ /* Only adjust if index is valid */ -+ running+=delta; -+ } -+ - if (en) *en=enabled; - if (ru) *ru=running; - - return count; - } - -+static inline unsigned long long mmap_read_reset_count(void *addr) { -+ -+ struct perf_event_mmap_page *pc = addr; -+ uint32_t seq, index; -+ uint64_t count = 0; -+ -+ if (pc == NULL) { -+ return count; -+ } -+ -+ do { -+ /* The barrier ensures we get the most up to date */ -+ /* version of the pc->lock variable */ -+ -+ seq=pc->lock; -+ barrier(); -+ -+ /* actually do the measurement */ -+ -+ /* Ugh, libpfm4 perf_event.h has cap_usr_rdpmc */ -+ /* while actual perf_event.h has cap_user_rdpmc */ -+ -+ /* Index of register to read */ -+ /* 0 means stopped/not-active */ -+ /* Need to subtract 1 to get actual index to rdpmc() */ -+ index = pc->index; -+ -+ if (pc->cap_usr_rdpmc && index) { -+ /* Read counter value */ -+ count = rdpmc(index-1); -+ } -+ barrier(); -+ -+ } while (pc->lock != seq); -+ -+ return count; -+} -+ - #else - static inline unsigned long long mmap_read_self(void *addr, -+ int user_reset_flag, - unsigned long long *en, - unsigned long long *ru) { - -commit 693dd5c014d1f0b9a3eae63de051389ed8eb338b -Author: Giuseppe Congiu -Date: Tue Feb 21 07:46:14 2023 -0500 - - perf_event: bug fix in mmap_read_self - - Commit 9a1f2d897 broke the perf_event component for power cpus. The - mmap_read_self function is missing one argument. This patch restores the - missing argument in the function. - -diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h -index 097286865..7ad3524f0 100644 ---- a/src/components/perf_event/perf_helpers.h -+++ b/src/components/perf_event/perf_helpers.h -@@ -397,6 +397,7 @@ static inline unsigned long long mmap_read_reset_count(void *addr) { - #else - static inline unsigned long long mmap_read_self(void *addr, - int user_reset_flag, -+ unsigned long long reset, - unsigned long long *en, - unsigned long long *ru) { - -commit 1b3e75b7f11c7e2b7c590948216d6aaeec299010 -Author: Giuseppe Congiu -Date: Tue Feb 21 14:21:03 2023 +0100 - - perf_event: add missing mmap_read_reset_count for non default cpus - - Power cpus do not have a version of mmap_read_reset_count. Implement the - missing function. - -diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h -index 7ad3524f0..73e82c8ae 100644 ---- a/src/components/perf_event/perf_helpers.h -+++ b/src/components/perf_event/perf_helpers.h -@@ -409,6 +409,11 @@ static inline unsigned long long mmap_read_self(void *addr, - return (unsigned long long)(-1); - } - -+static inline unsigned long long mmap_read_reset_count(void *addr __attribute__((unused))) { -+ -+ return (unsigned long long)(-1); -+} -+ - #endif - - /* These functions are based on builtin-record.c in the */ -commit 37d0c77b7b4d00a958dff50dc715cf63e0cd6084 -Author: Giuseppe Congiu -Date: Tue Feb 21 14:22:53 2023 +0100 - - perf_event: used unused attribute in mmap_read_self - -diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h -index 73e82c8ae..59c8a2fc8 100644 ---- a/src/components/perf_event/perf_helpers.h -+++ b/src/components/perf_event/perf_helpers.h -@@ -395,16 +395,11 @@ static inline unsigned long long mmap_read_reset_count(void *addr) { - } - - #else --static inline unsigned long long mmap_read_self(void *addr, -- int user_reset_flag, -- unsigned long long reset, -- unsigned long long *en, -- unsigned long long *ru) { -- -- (void)addr; -- -- *en=0; -- *ru=0; -+static inline unsigned long long mmap_read_self(void *addr __attribute__((unused)), -+ int user_reset_flag __attribute__((unused)), -+ unsigned long long reset __attribute__((unused)), -+ unsigned long long *en __attribute__((unused)), -+ unsigned long long *ru __attribute__((unused))) { - - return (unsigned long long)(-1); - } diff --git a/SOURCES/papi-avail-path-fix.patch b/SOURCES/papi-avail-path-fix.patch new file mode 100644 index 0000000..a1718f6 --- /dev/null +++ b/SOURCES/papi-avail-path-fix.patch @@ -0,0 +1,34 @@ +From fd36e088d4fc883a2ab8979fcbeb8c669553f1d2 Mon Sep 17 00:00:00 2001 +From: Aaron Merey +Date: Tue, 8 Jul 2025 15:45:09 -0400 +Subject: [PATCH] Avoid using srcdir path for papi_component_avail + +--- + src/run_tests.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/run_tests.sh b/src/run_tests.sh +index 1b11e82..16b44e9 100755 +--- a/src/run_tests.sh ++++ b/src/run_tests.sh +@@ -48,7 +48,7 @@ if [ "x$VALGRIND" != "x" ]; then + fi + + # Check for active 'perf_event' component +-PERF_EVENT_ACTIVE=$(utils/papi_component_avail | awk '/Active components:/{flag=1; next} flag' | grep -q "perf_event" && echo "true" || echo "false") ++PERF_EVENT_ACTIVE=$(papi_component_avail | awk '/Active components:/{flag=1; next} flag' | grep -q "perf_event" && echo "true" || echo "false") + + if [ "$PERF_EVENT_ACTIVE" = "true" ]; then + VTESTS=`find validation_tests/* -prune -perm -u+x -type f ! -name "*.[c|h]"`; +@@ -60,7 +60,7 @@ else + fi + + # List of active components +-ACTIVE_COMPONENTS_PATTERN=$(utils/papi_component_avail | awk '/Active components:/{flag=1; next} flag' | grep "Name:" | sed 's/Name: //' | awk '{print $1}' | paste -sd'|' -) ++ACTIVE_COMPONENTS_PATTERN=$(papi_component_avail | awk '/Active components:/{flag=1; next} flag' | grep "Name:" | sed 's/Name: //' | awk '{print $1}' | paste -sd'|' -) + + # Find the test files, filtering for only the active components + COMPTESTS=$(find components/*/tests -perm -u+x -type f ! \( -name "*.[c|h]" -o -name "*.cu" -o -name "*.so" \) | grep -E "components/($ACTIVE_COMPONENTS_PATTERN)/") +-- +2.50.0 + diff --git a/SOURCES/papi-config.patch b/SOURCES/papi-config.patch deleted file mode 100644 index 9b34cf5..0000000 --- a/SOURCES/papi-config.patch +++ /dev/null @@ -1,348 +0,0 @@ -commit 38290c41abbb105ca198411ec3c466ac027f5b8f -Author: Frank Winkler -Date: Fri Apr 24 16:18:22 2020 +0200 - - Fixed configure options for shared and static builds. - - 1) --with-static-lib=no (force PAPI to build shared libraries and tools) - 2) --with-shlib-tools (use internal libpfm via rpath-link) - -diff --git a/src/configure.in b/src/configure.in -index 3cf47edc1..1f58f7c8e 100644 ---- a/src/configure.in -+++ b/src/configure.in -@@ -200,9 +200,13 @@ else - AC_MSG_ERROR([cannot find dlopen and dlerror symbols neither in the base system libraries nor in -ldl]) - fi - fi -+ -+# Disable LDL for static builds -+# if test "x${STATIC}" = "x"; then -+# LDL="" -+# fi - AC_SUBST(LDL) -- -- -+ - if test "$OS" = "CLE"; then - virtualtimer=times - tls=__thread -@@ -827,10 +831,6 @@ AC_ARG_WITH(static_tools, - AC_MSG_RESULT(yes)], - [AC_MSG_RESULT(no)]) - --if test "$static_lib" = "no"; then -- AC_MSG_ERROR(Building tests and utilities static but no static papi library to be built) --fi -- - AC_MSG_CHECKING(for linking with papi shared library of tests and utilities) - AC_ARG_WITH(shlib_tools, - [ --with-shlib-tools Specify linking with papi library of tests and utilities], -@@ -839,6 +839,14 @@ AC_ARG_WITH(shlib_tools, - [shlib_tools=no - AC_MSG_RESULT(no)]) - -+if test "$static_lib" = "no"; then -+ shlib_tools=yes -+fi -+ -+if test "$static_lib" = "no" -a "$shlib_tools" = "no"; then -+ AC_MSG_ERROR(Building tests and utilities static but no static papi library to be built) -+fi -+ - if test "$shlib_tools" = "yes"; then - if test "$shared_lib" != "yes"; then - AC_MSG_ERROR(Building static but specified shared linking for tests and utilities) -@@ -847,6 +855,8 @@ if test "$shlib_tools" = "yes"; then - AC_MSG_ERROR([Building shared but specified static linking]) - fi - LINKLIB='$(SHLIB)' -+ #WORKAROUND: if libpfm cannot be found at link time -+ LDFLAGS="$LDFLAGS -Wl,-rpath-link,$PWD/libpfm4/lib" - elif test "$shlib_tools" = "no"; then - if test "$static_lib" != "yes"; then - AC_MSG_ERROR([Building shared but specified static linking for tests and utilities]) -commit d6f4e34d083f18cfdba38dd5e4bbfb2a580b8a9e -Author: Frank Winkler -Date: Fri Apr 24 16:38:18 2020 +0200 - - Another test for "--with-static-tools". - -diff --git a/src/configure.in b/src/configure.in -index 1f58f7c8e..e8d769578 100644 ---- a/src/configure.in -+++ b/src/configure.in -@@ -201,10 +201,6 @@ else - fi - fi - --# Disable LDL for static builds --# if test "x${STATIC}" = "x"; then --# LDL="" --# fi - AC_SUBST(LDL) - - if test "$OS" = "CLE"; then -@@ -831,6 +827,11 @@ AC_ARG_WITH(static_tools, - AC_MSG_RESULT(yes)], - [AC_MSG_RESULT(no)]) - -+# Disable LDL for static builds -+# if test "$STATIC" = "-static"; then -+# LDL="" -+# fi -+ - AC_MSG_CHECKING(for linking with papi shared library of tests and utilities) - AC_ARG_WITH(shlib_tools, - [ --with-shlib-tools Specify linking with papi library of tests and utilities], -commit 1c333c9954b872cda1b4d873fa81b14ec58a58a7 -Author: Frank Winkler -Date: Thu Apr 30 18:51:34 2020 +0200 - - Fixed static build. - - SDE component is disabled - - "ctest" shlib is disabled - -diff --git a/src/configure.in b/src/configure.in -index e8d769578..0eee98ea1 100644 ---- a/src/configure.in -+++ b/src/configure.in -@@ -827,10 +827,11 @@ AC_ARG_WITH(static_tools, - AC_MSG_RESULT(yes)], - [AC_MSG_RESULT(no)]) - --# Disable LDL for static builds --# if test "$STATIC" = "-static"; then --# LDL="" --# fi -+# Disable LDL AND SDE for static builds -+if test "$STATIC" = "-static"; then -+ LDL="" -+ SDE_ENABLED= -+fi - - AC_MSG_CHECKING(for linking with papi shared library of tests and utilities) - AC_ARG_WITH(shlib_tools, -@@ -1768,6 +1769,7 @@ for comp in $components; do - if test "x$comp" = "xsde" ; then - LDFLAGS="$LDFLAGS $LRT" - LIBS="$LIBS $LRT" -+ SDE_ENABLED=1 - fi - done - -@@ -1862,6 +1864,7 @@ AC_SUBST(BGP_SYSDIR) - AC_SUBST(BITFLAGS) - AC_SUBST(COMPONENT_RULES) - AC_SUBST(COMPONENTS) -+AC_SUBST(SDE_ENABLED) - AC_SUBST(FTEST_TARGETS) - AC_SUBST(HAVE_NO_OVERRIDE_INIT) - AC_SUBST(BGPM_INSTALL_DIR) -diff --git a/src/ctests/Makefile.recipies b/src/ctests/Makefile.recipies -index b7c1963d7..44e19b398 100644 ---- a/src/ctests/Makefile.recipies -+++ b/src/ctests/Makefile.recipies -@@ -11,7 +11,11 @@ MPX = max_multiplex multiplex1 multiplex2 mendes-alt sdsc-mpx sdsc2-mpx \ - MPXPTHR = multiplex1_pthreads multiplex3_pthreads kufrin - MPI = mpi_hl mpi_omp_hl \ - mpifirst -+ -+ifeq ($(STATIC),) - SHARED = shlib -+endif -+ - SERIAL = serial_hl serial_hl_ll_comb\ - all_events all_native_events branches calibrate case1 case2 \ - cmpinfo code2name derived describe destroy disable_component \ -@@ -344,8 +348,10 @@ case2: case2.c $(TESTLIB) $(PAPILIB) - low-level: low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) - $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o low-level - -+ifeq ($(STATIC),) - shlib: shlib.c $(TESTLIB) $(PAPILIB) - $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) shlib.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o shlib $(LDL) -+endif - - exeinfo: exeinfo.c $(TESTLIB) $(PAPILIB) - -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) exeinfo.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o exeinfo -diff --git a/src/ctests/Makefile.target.in b/src/ctests/Makefile.target.in -index edc04f1b7..af64e157c 100644 ---- a/src/ctests/Makefile.target.in -+++ b/src/ctests/Makefile.target.in -@@ -10,6 +10,7 @@ INCLUDE = -I. -I@includedir@ -I$(testlibdir) -I$(validationlibdir) - LIBDIR = @libdir@ - LIBRARY = @LIBRARY@ - SHLIB = @SHLIB@ -+STATIC = @STATIC@ - PAPILIB = ../@LINKLIB@ - TESTLIB = $(testlibdir)/libtestlib.a - LDFLAGS = @LDFLAGS@ @LDL@ @STATIC@ -diff --git a/src/utils/Makefile b/src/utils/Makefile -index 4abfd6cb8..64a2b8f9f 100644 ---- a/src/utils/Makefile -+++ b/src/utils/Makefile -@@ -48,8 +48,13 @@ papi_mem_info: papi_mem_info.o $(PAPILIB) - papi_multiplex_cost: papi_multiplex_cost.o $(PAPILIB) cost_utils.o - $(CC) -o papi_multiplex_cost papi_multiplex_cost.o cost_utils.o $(PAPILIB) -lm $(LDFLAGS) - -+ifneq ($(SDE_ENABLED),) - papi_native_avail: papi_native_avail.o $(PAPILIB) print_header.o papi_sde_interface.o - $(CC) -o papi_native_avail papi_native_avail.o $(PAPILIB) print_header.o $(LDFLAGS) papi_sde_interface.o -+else -+papi_native_avail: papi_native_avail.o $(PAPILIB) print_header.o -+ $(CC) -o papi_native_avail papi_native_avail.o $(PAPILIB) print_header.o $(LDFLAGS) -+endif - - papi_version: papi_version.o $(PAPILIB) - $(CC) -o papi_version papi_version.o $(PAPILIB) $(LDFLAGS) -@@ -65,8 +70,10 @@ cost_utils.o: ../testlib/papi_test.h cost_utils.c - print_header.o: print_header.h print_header.c - $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c print_header.c - -+ifneq ($(SDE_ENABLED),) - papi_sde_interface.o: papi_sde_interface.c - $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c papi_sde_interface.c -+endif - - clean: - rm -f *.o *.stderr *.stdout core *~ $(ALL) -diff --git a/src/utils/Makefile.target.in b/src/utils/Makefile.target.in -index bcdbe94e9..9c76b37af 100644 ---- a/src/utils/Makefile.target.in -+++ b/src/utils/Makefile.target.in -@@ -9,6 +9,7 @@ INCLUDE = -I. -I@includedir@ -I$(testlibdir) - LIBDIR = @libdir@ - LIBRARY = @LIBRARY@ - SHLIB = @SHLIB@ -+SDE_ENABLED = @SDE_ENABLED@ - PAPILIB = ../@LINKLIB@ - TESTLIB = $(testlibdir)/libtestlib.a - LDFLAGS = @LDFLAGS@ @LDL@ @STATIC@ -diff --git a/src/utils/papi_native_avail.c b/src/utils/papi_native_avail.c -index ae6dbb9e5..902ed7996 100644 ---- a/src/utils/papi_native_avail.c -+++ b/src/utils/papi_native_avail.c -@@ -51,8 +51,9 @@ - - #include "papi.h" - #include "print_header.h" -+#ifdef SDE_ENABLED - #include "components/sde/interface/papi_sde_interface.h" -- -+#endif - #define EVT_LINE 80 - #define EVT_LINE_BUF_SIZE 4096 - -@@ -84,7 +85,9 @@ print_help( char **argv ) - printf( "\nGeneral command options:\n" ); - printf( "\t-h, --help print this help message\n" ); - printf( "\t-c, --check attempts to add each event\n"); -+#ifdef SDE_ENABLED - printf( "\t-sde FILE lists SDEs that are registered by the library or executable in FILE\n" ); -+#endif - printf( "\t-e EVENTNAME display detailed information about named native event\n" ); - printf( "\t-i EVENTSTR include only event names that contain EVENTSTR\n" ); - printf( "\t-x EVENTSTR exclude any event names that contain EVENTSTR\n" ); -@@ -368,6 +371,7 @@ parse_event_qualifiers( PAPI_event_info_t * info ) - return ( 1 ); - } - -+#ifdef SDE_ENABLED - void - invoke_hook_fptr( char *lib_path ) - { -@@ -394,6 +398,7 @@ invoke_hook_fptr( char *lib_path ) - dlclose(dl_handle); - return; - } -+#endif - - int - main( int argc, char **argv ) -@@ -444,6 +449,7 @@ main( int argc, char **argv ) - return 2; - } - -+#ifdef SDE_ENABLED - /* - The following code will execute if the user wants to list the SDEs in the - library (or executable) stored in flags.path. This code will not list the -@@ -514,6 +520,7 @@ skip_lib: - if( NULL != cmd ) free(cmd); - } - no_sdes: -+#endif //SDE_ENABLED - - /* Do this code if the event name option was specified on the commandline */ - if ( flags.named ) { -commit b5111efaf1b234541c94b8ef7e5791bf8eb094b3 -Author: Frank Winkler -Date: Thu May 7 09:00:53 2020 +0200 - - Added CFLAG -DSDE. - -diff --git a/src/configure.in b/src/configure.in -index 0eee98ea1..781148e5b 100644 ---- a/src/configure.in -+++ b/src/configure.in -@@ -1767,6 +1767,7 @@ tests="$tests comp_tests" - # check for SDE component to determine if we need -lrt in LDFLAGS - for comp in $components; do - if test "x$comp" = "xsde" ; then -+ CFLAGS="$CFLAGS -DSDE" - LDFLAGS="$LDFLAGS $LRT" - LIBS="$LIBS $LRT" - SDE_ENABLED=1 -diff --git a/src/utils/papi_native_avail.c b/src/utils/papi_native_avail.c -index 902ed7996..7d90c4064 100644 ---- a/src/utils/papi_native_avail.c -+++ b/src/utils/papi_native_avail.c -@@ -51,9 +51,10 @@ - - #include "papi.h" - #include "print_header.h" --#ifdef SDE_ENABLED -+#if SDE - #include "components/sde/interface/papi_sde_interface.h" - #endif -+ - #define EVT_LINE 80 - #define EVT_LINE_BUF_SIZE 4096 - -@@ -85,7 +86,7 @@ print_help( char **argv ) - printf( "\nGeneral command options:\n" ); - printf( "\t-h, --help print this help message\n" ); - printf( "\t-c, --check attempts to add each event\n"); --#ifdef SDE_ENABLED -+#if SDE - printf( "\t-sde FILE lists SDEs that are registered by the library or executable in FILE\n" ); - #endif - printf( "\t-e EVENTNAME display detailed information about named native event\n" ); -@@ -371,7 +372,7 @@ parse_event_qualifiers( PAPI_event_info_t * info ) - return ( 1 ); - } - --#ifdef SDE_ENABLED -+#if SDE - void - invoke_hook_fptr( char *lib_path ) - { -@@ -449,7 +450,7 @@ main( int argc, char **argv ) - return 2; - } - --#ifdef SDE_ENABLED -+#if SDE - /* - The following code will execute if the user wants to list the SDEs in the - library (or executable) stored in flags.path. This code will not list the -@@ -520,7 +521,7 @@ skip_lib: - if( NULL != cmd ) free(cmd); - } - no_sdes: --#endif //SDE_ENABLED -+#endif //SDE - - /* Do this code if the event name option was specified on the commandline */ - if ( flags.named ) { diff --git a/SOURCES/papi-lto.patch b/SOURCES/papi-lto.patch deleted file mode 100644 index 17bd7cf..0000000 --- a/SOURCES/papi-lto.patch +++ /dev/null @@ -1,124 +0,0 @@ -commit cbca67dae5722d65590e33b8b885a561ac3fff5d -Author: William Cohen -Date: Tue Jun 15 21:48:15 2021 -0400 - - Use numeric local labels to allow compilation with LTO enabled - - Some assembly snippets in instructions_testcode.c used regular label - names. Unfortunately, when multiple copies of the snippets are - inlined in different places with LTO enabled the multiple copies of a - label by the same name cause the build to fail because of the - redefinition of the label. To avoid this problem all those labels - have been converted to numeric local labels to allow multiple copies - to peacefully coexist in the LTO enabled code. - -diff --git a/src/validation_tests/instructions_testcode.c b/src/validation_tests/instructions_testcode.c -index 3634b1f90..128127c25 100644 ---- a/src/validation_tests/instructions_testcode.c -+++ b/src/validation_tests/instructions_testcode.c -@@ -10,9 +10,9 @@ int instructions_million(void) { - #if defined(__i386__) || (defined __x86_64__) - asm( " xor %%ecx,%%ecx\n" - " mov $499999,%%ecx\n" -- "test_loop:\n" -+ "55:\n" - " dec %%ecx\n" -- " jnz test_loop\n" -+ " jnz 55b\n" - : /* no output registers */ - : /* no inputs */ - : "cc", "%ecx" /* clobbered */ -@@ -47,9 +47,9 @@ int instructions_million(void) { - #elif defined(__sparc__) - asm( " sethi %%hi(333333), %%l0\n" - " or %%l0,%%lo(333333),%%l0\n" -- "test_loop:\n" -+ "55:\n" - " deccc %%l0 ! decrement count\n" -- " bnz test_loop ! repeat until zero\n" -+ " bnz 55b ! repeat until zero\n" - " nop ! branch delay slot\n" - : /* no output registers */ - : /* no inputs */ -@@ -57,13 +57,13 @@ int instructions_million(void) { - ); - return 0; - #elif defined(__arm__) -- asm( " ldr r2,count @ set count\n" -- " b test_loop\n" -- "count: .word 333332\n" -- "test_loop:\n" -+ asm( " ldr r2,42f @ set count\n" -+ " b 55f\n" -+ "42: .word 333332\n" -+ "55:\n" - " add r2,r2,#-1\n" - " cmp r2,#0\n" -- " bne test_loop @ repeat till zero\n" -+ " bne 55b @ repeat till zero\n" - : /* no output registers */ - : /* no inputs */ - : "cc", "r2" /* clobbered */ -@@ -71,10 +71,10 @@ int instructions_million(void) { - return 0; - #elif defined(__aarch64__) - asm( " ldr x2,=333332 // set count\n" -- "test_loop:\n" -+ "55:\n" - " add x2,x2,#-1\n" - " cmp x2,#0\n" -- " bne test_loop // repeat till zero\n" -+ " bne 55b // repeat till zero\n" - : /* no output registers */ - : /* no inputs */ - : "cc", "r2" /* clobbered */ -@@ -97,7 +97,7 @@ int instructions_fldcw(void) { - double three=3.0; - - asm( " mov $100000,%%ecx\n" -- "big_loop:\n" -+ "44:\n" - " fldl %1 # load value onto fp stack\n" - " fnstcw %0 # store control word to mem\n" - " movzwl %0, %%eax # load cw from mem, zero extending\n" -@@ -106,7 +106,7 @@ int instructions_fldcw(void) { - " fldcw %3 # save new rounding mode\n" - " fistpl %2 # save stack value as integer to mem\n" - " fldcw %0 # restore old cw\n" -- " loop big_loop # loop to make the count more obvious\n" -+ " loop 44b # loop to make the count more obvious\n" - : /* no output registers */ - : "m"(saved_cw), "m"(three), "m"(result), "m"(cw) /* inputs */ - : "cc", "%ecx","%eax" /* clobbered */ -@@ -129,13 +129,13 @@ int instructions_rep(void) { - - asm( " mov $1000,%%edx\n" - " cld\n" -- "loadstore: # test 8-bit store\n" -+ "66: # test 8-bit store\n" - " mov $0xd, %%al # set eax to d\n" - " mov $16384, %%ecx\n" - " mov %0, %%edi # set destination\n" - " rep stosb # store d 16384 times, auto-increment\n" - " dec %%edx\n" -- " jnz loadstore\n" -+ " jnz 66b\n" - : /* outputs */ - : "rm" (buffer_out) /* inputs */ - : "cc", "%esi","%edi","%edx","%ecx","%eax","memory" /* clobbered */ -@@ -147,13 +147,13 @@ int instructions_rep(void) { - - asm( " mov $1000,%%edx\n" - " cld\n" -- "loadstore: # test 8-bit store\n" -+ "66: # test 8-bit store\n" - " mov $0xd, %%al # set eax to d\n" - " mov $16384, %%ecx\n" - " mov %0, %%rdi # set destination\n" - " rep stosb # store d 16384 times, auto-increment\n" - " dec %%edx\n" -- " jnz loadstore\n" -+ " jnz 66b\n" - : /* outputs */ - : "rm" (buffer_out) /* inputs */ - : "cc", "%esi","%edi","%edx","%ecx","%eax","memory" /* clobbered */ diff --git a/SOURCES/papi-python3.patch b/SOURCES/papi-python3.patch deleted file mode 100644 index c5e7509..0000000 --- a/SOURCES/papi-python3.patch +++ /dev/null @@ -1,10 +0,0 @@ -diff --git a/src/high-level/scripts/papi_hl_output_writer.py b/src/high-level/scripts/papi_hl_output_writer.py -index 123d2cd0..34bfbd73 100755 ---- a/src/high-level/scripts/papi_hl_output_writer.py -+++ b/src/high-level/scripts/papi_hl_output_writer.py -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - from __future__ import division - from collections import OrderedDict - diff --git a/SOURCES/papi-revert-arm-test.patch b/SOURCES/papi-revert-arm-test.patch new file mode 100644 index 0000000..905039c --- /dev/null +++ b/SOURCES/papi-revert-arm-test.patch @@ -0,0 +1,267 @@ +From fe1124b81a07290ff0c88e04cc6348b1c438eecd Mon Sep 17 00:00:00 2001 +From: Aaron Merey +Date: Wed, 16 Jul 2025 17:04:09 -0400 +Subject: [PATCH] Revert "validation_tests: Add load/store ARM assembly + testcode" + +This reverts commit 7710a20ed72bb984b52a1d575a2d707d548de35b. +--- + src/validation_tests/Makefile.recipies | 11 ++--- + src/validation_tests/load_store_testcode.c | 57 ---------------------- + src/validation_tests/papi_ld_ins.c | 29 +++-------- + src/validation_tests/papi_sr_ins.c | 28 +++-------- + src/validation_tests/testcode.h | 4 -- + 5 files changed, 19 insertions(+), 110 deletions(-) + delete mode 100644 src/validation_tests/load_store_testcode.c + +diff --git a/src/validation_tests/Makefile.recipies b/src/validation_tests/Makefile.recipies +index c5ea36582..8949e5e9c 100644 +--- a/src/validation_tests/Makefile.recipies ++++ b/src/validation_tests/Makefile.recipies +@@ -38,9 +38,6 @@ instructions_testcode.o: instructions_testcode.c testcode.h + matrix_multiply.o: matrix_multiply.c matrix_multiply.h + $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -O1 -c matrix_multiply.c + +-load_store_testcode.o: load_store_testcode.c testcode.h +- $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c load_store_testcode.c +- + fp_validation_hl: fp_validation_hl.o $(TESTLIB) $(PAPILIB) flops_testcode.o + $(CC) -o fp_validation_hl fp_validation_hl.o $(TESTLIB) flops_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB) -lpthread + +@@ -84,8 +81,8 @@ papi_fp_ops: papi_fp_ops.o $(TESTLIB) $(PAPILIB) display_error.o branches_testco + papi_hw_int: papi_hw_int.o $(TESTLIB) $(PAPILIB) + $(CC) -o papi_hw_int papi_hw_int.o $(TESTLIB) $(PAPILIB) $(LDFLAGS) $(EXTRALIB) + +-papi_ld_ins: papi_ld_ins.o $(TESTLIB) $(PAPILIB) display_error.o matrix_multiply.o load_store_testcode.o +- $(CC) -o papi_ld_ins papi_ld_ins.o $(TESTLIB) display_error.o matrix_multiply.o load_store_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB) ++papi_ld_ins: papi_ld_ins.o $(TESTLIB) $(PAPILIB) display_error.o matrix_multiply.o ++ $(CC) -o papi_ld_ins papi_ld_ins.o $(TESTLIB) display_error.o matrix_multiply.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB) + + papi_l1_dca: papi_l1_dca.o $(TESTLIB) $(PAPILIB) cache_testcode.o display_error.o matrix_multiply.o + $(CC) -o papi_l1_dca papi_l1_dca.o $(TESTLIB) cache_testcode.o display_error.o matrix_multiply.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB) +@@ -111,8 +108,8 @@ papi_ref_cyc: papi_ref_cyc.o $(TESTLIB) $(PAPILIB) display_error.o flops_testcod + papi_sp_ops: papi_sp_ops.o $(TESTLIB) $(PAPILIB) display_error.o branches_testcode.o flops_testcode.o + $(CC) -o papi_sp_ops papi_sp_ops.o $(TESTLIB) display_error.o branches_testcode.o flops_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB) + +-papi_sr_ins: papi_sr_ins.o $(TESTLIB) $(PAPILIB) display_error.o matrix_multiply.o load_store_testcode.o +- $(CC) -o papi_sr_ins papi_sr_ins.o $(TESTLIB) display_error.o matrix_multiply.o load_store_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB) ++papi_sr_ins: papi_sr_ins.o $(TESTLIB) $(PAPILIB) display_error.o matrix_multiply.o ++ $(CC) -o papi_sr_ins papi_sr_ins.o $(TESTLIB) display_error.o matrix_multiply.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB) + + papi_tot_cyc: papi_tot_cyc.o $(TESTLIB) $(PAPILIB) display_error.o matrix_multiply.o + $(CC) -o papi_tot_cyc papi_tot_cyc.o $(TESTLIB) display_error.o matrix_multiply.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB) +diff --git a/src/validation_tests/load_store_testcode.c b/src/validation_tests/load_store_testcode.c +deleted file mode 100644 +index 4b8f13c87..000000000 +--- a/src/validation_tests/load_store_testcode.c ++++ /dev/null +@@ -1,57 +0,0 @@ +-#include "testcode.h" +- +-/* Execute n stores */ +-int execute_stores(int n) { +- +-#if defined(__aarch64__) +- +- __asm( ".data\n" +- "stvar: .word 1 /* stvar in memory */\n" +- ".text\n" +- " ldr x2, =stvar /* address of stvar */\n" +- " mov x4, %0\n" +- " mov x1, #0\n" +- "str_loop:\n" +- " str x1, [x2] /* store into stvar */\n" +- " add x1, x1, #1\n" +- " cmp x1, x4\n" +- " bne str_loop\n" +- : +- : "r" (n) +- : "cc" /* clobbered */ +- ); +- +- return 0; +- +-#endif +- return CODE_UNIMPLEMENTED; +- +-} +- +-/* Execute n loads */ +-int execute_loads(int n) { +- +-#if defined(__aarch64__) +- +- __asm( ".data\n" +- "ldvar: .word 1 /* ldvar in memory */\n" +- ".text\n" +- " ldr x2, =ldvar /* address of ldvar */\n" +- " mov x4, %0\n" +- " mov x1, #0\n" +- "ldr_loop:\n" +- " ldr x3, [x2] /* load from ldvar */\n" +- " add x1, x1, x3\n" +- " cmp x1, x4\n" +- " bne ldr_loop\n" +- : +- : "r" (n) +- : "cc" /* clobbered */ +- ); +- +- return 0; +- +-#endif +- return CODE_UNIMPLEMENTED; +- +-} +diff --git a/src/validation_tests/papi_ld_ins.c b/src/validation_tests/papi_ld_ins.c +index 24935b290..6c92befd6 100644 +--- a/src/validation_tests/papi_ld_ins.c ++++ b/src/validation_tests/papi_ld_ins.c +@@ -23,7 +23,6 @@ + #include "display_error.h" + + #include "matrix_multiply.h" +-#include "testcode.h" + + #define SLEEP_RUNS 3 + +@@ -102,26 +101,18 @@ int main(int argc, char **argv) { + test_fail( __FILE__, __LINE__, "idle average", retval ); + } + +- /***********************************/ +- /* testing a large number of loads */ +- /***********************************/ ++ /*****************************/ ++ /* testing Matrix Matrix GHz */ ++ /*****************************/ + + if (!quiet) { +- printf("\nTesting a large number of loads\n"); ++ printf("\nTesting with matrix matrix multiply\n"); + } + +- expected=naive_matrix_multiply_estimated_loads(quiet); +- + PAPI_reset(eventset); + PAPI_start(eventset); + +- retval = execute_loads(expected); +- if (retval == CODE_UNIMPLEMENTED) { +- if (!quiet) { +- printf("\tNo asm test found for the current hardware. Testing matrix multiply\n"); +- } +- naive_matrix_multiply(quiet); +- } ++ naive_matrix_multiply(quiet); + + retval=PAPI_stop(eventset,&count); + +@@ -129,6 +120,8 @@ int main(int argc, char **argv) { + test_fail( __FILE__, __LINE__, "Problem stopping!", retval ); + } + ++ expected=naive_matrix_multiply_estimated_loads(quiet); ++ + if (!quiet) { + printf("\tActual measured loads = %lld\n",count); + } +@@ -161,13 +154,7 @@ int main(int argc, char **argv) { + PAPI_start(eventset); + + for(i=0;i +Date: Tue, 15 Jul 2025 20:20:20 -0400 +Subject: [PATCH] Revert "Updating papi_events.csv to support addition of + L1I_CACHE and deprecation of L1I_CACHE_ACCESS in libpfm4." + +This reverts commit 6b2745ddc924183f968f066b2d48eade8c816b29. +--- + src/papi_events.csv | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2706af696..5e095933a 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -2290,8 +2290,8 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD + PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR +-PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE +-PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL + PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS + PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR +@@ -2325,8 +2325,8 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD + PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR +-PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE +-PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL + PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS + PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR +@@ -2360,8 +2360,8 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD + PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR +-PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE +-PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL + PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS + PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR +@@ -2438,15 +2438,15 @@ PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL + PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD + PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR +-PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE +-PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL ++PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL + PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL + #NOT_IMPLEMENTED,PAPI_L1_ICR,Level 1 instruction cache reads + #NOT_IMPLEMENTED,PAPI_L1_ICW,Level 1 instruction cache writes + #NOT_IMPLEMENTED,PAPI_L1_LDM,Level 1 load misses + #NOT_IMPLEMENTED,PAPI_L1_STM,Level 1 store misses +-PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE +-PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL ++PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE_ACCESS ++PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE_ACCESS,L1I_CACHE_REFILL + PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL + #NOT_IMPLEMENTED,PAPI_L1_TCR,Level 1 total cache reads + #NOT_IMPLEMENTED,PAPI_L1_TCW,Level 1 total cache writes +-- +2.50.0 + diff --git a/SOURCES/papi-rhbz1923967.patch b/SOURCES/papi-rhbz1923967.patch deleted file mode 100644 index 9770f8e..0000000 --- a/SOURCES/papi-rhbz1923967.patch +++ /dev/null @@ -1,23 +0,0 @@ -diff -up papi-6.0.0/src/papi_events.csv.rhbz1923967 papi-6.0.0/src/papi_events.csv ---- papi-6.0.0/src/papi_events.csv.rhbz1923967 2022-05-26 11:20:59.138469200 -0400 -+++ papi-6.0.0/src/papi_events.csv 2022-05-26 11:23:30.686302618 -0400 -@@ -1588,8 +1588,8 @@ PRESET,PAPI_L1_DCR,DERIVED_SUB,PM_LD_REF - #PRESET,PAPI_L1_DCA,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-,PM_ST_FIN,PM_ST_MISS_L1,PM_LD_REF_L1,PM_LD_MISS_L1_ALT - PRESET,PAPI_L1_DCA,DERIVED_ADD,PM_LD_REF_L1,PM_ST_CMPL - PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS --PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS --PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS -+#PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS -+#PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS - PRESET,PAPI_L2_DCR,NOT_DERIVED,PM_DATA_FROM_L2 - PRESET,PAPI_L2_DCW,NOT_DERIVED,PM_L2_ST_HIT - PRESET,PAPI_L3_DCR,NOT_DERIVED,PM_DATA_FROM_L2MISS -@@ -1598,7 +1598,7 @@ PRESET,PAPI_L3_LDM,DERIVED_ADD,PM_DATA_F - PRESET,PAPI_L1_ICH,NOT_DERIVED,PM_INST_FROM_L1 - PRESET,PAPI_L1_ICM,NOT_DERIVED,PM_L1_ICACHE_MISS - PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_INST_FROM_L2MISS --PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_L2_INST_MISS -+#PRESET,PAPI_L2_ICM,NOT_DERIVED,PM_L2_INST_MISS - PRESET,PAPI_L2_ICH,NOT_DERIVED,PM_INST_FROM_L2 - PRESET,PAPI_L3_ICA,NOT_DERIVED,PM_INST_FROM_L2MISS - PRESET,PAPI_L3_ICH,NOT_DERIVED,PM_INST_FROM_L3 diff --git a/SOURCES/papi-thread_init.patch b/SOURCES/papi-thread_init.patch deleted file mode 100644 index 0e790c6..0000000 --- a/SOURCES/papi-thread_init.patch +++ /dev/null @@ -1,106 +0,0 @@ -commit 3625bdbad9fd57d1cdb1e5615854545167d4adcb -Author: Anthony Castaldo -Date: Wed Aug 26 17:18:29 2020 -0400 - - This modifies PAPI_library_init() to initialize components in two classes, - separated by the initialization of the papi thread structure. The first class - is those that need no thread structure, currently everything but perf_event and - perf_event_uncore. Following the init of the threading structure, we init the - second class (perf_event and perf_event_uncore) that DOES need the thread - structure to successfully init_component(). This required a change to - _papi_hwi_init_global(), to add an argument to distinguish which class it - should initialize. - -diff --git a/src/papi.c b/src/papi.c -index 33cc2993..ed75af49 100644 ---- a/src/papi.c -+++ b/src/papi.c -@@ -1151,7 +1151,23 @@ PAPI_library_init( int version ) - papi_return( init_retval ); - } - -- /* Initialize thread globals, including the main threads */ -+ /* Initialize component globals EXCEPT for perf_event, perf_event_uncore. -+ * To avoid race conditions, these components use the thread local storage -+ * construct initialized by _papi_hwi_init_global_threads(), from within -+ * their init_component(). So these must have init_component() run AFTER -+ * _papi_hwi_init_global_threads. Other components demand that init threads -+ * run AFTER init_component(), which sets up globals they need. -+ */ -+ -+ tmp = _papi_hwi_init_global( 0 ); /* Selector 0 to skip perf_event, perf_event_uncore */ -+ if ( tmp ) { -+ init_retval = tmp; -+ _papi_hwi_shutdown_global_internal( ); -+ _in_papi_library_init_cnt--; -+ papi_return( init_retval ); -+ } -+ -+ /* Initialize thread globals, including the main threads */ - - tmp = _papi_hwi_init_global_threads( ); - if ( tmp ) { -@@ -1161,9 +1177,9 @@ PAPI_library_init( int version ) - papi_return( init_retval ); - } - -- /* Initialize component globals */ -+ /* Initialize perf_event, perf_event_uncore components */ - -- tmp = _papi_hwi_init_global( ); -+ tmp = _papi_hwi_init_global( 1 ); /* Selector 1 for only perf_event, perf_event_uncore */ - if ( tmp ) { - init_retval = tmp; - _papi_hwi_shutdown_global_internal( ); -diff --git a/src/papi_internal.c b/src/papi_internal.c -index 5a1ccd43..e6dd319c 100644 ---- a/src/papi_internal.c -+++ b/src/papi_internal.c -@@ -1928,11 +1928,13 @@ int papi_num_components = ( sizeof ( _papi_hwd ) / sizeof ( *_papi_hwd ) ) - 1; - * Routine that initializes all available components. - * A component is available if a pointer to its info vector - * appears in the NULL terminated_papi_hwd table. -+ * Modified to accept an arg: 0=do not init perf_event or -+ * perf_event_uncore. 1=init ONLY perf_event or perf_event_uncore. - */ - int --_papi_hwi_init_global( void ) -+_papi_hwi_init_global( int PE_OR_PEU ) - { -- int retval, i = 0; -+ int retval, is_pe_peu, i = 0; - - retval = _papi_hwi_innoculate_os_vector( &_papi_os_vector ); - if ( retval != PAPI_OK ) { -@@ -1940,14 +1942,16 @@ _papi_hwi_init_global( void ) - } - - while ( _papi_hwd[i] ) { -- -+ is_pe_peu = 0; -+ if (strcmp(_papi_hwd[i]->cmp_info.name, "perf_event") == 0) is_pe_peu=1; -+ if (strcmp(_papi_hwd[i]->cmp_info.name, "perf_event_uncore") == 0) is_pe_peu=1; - retval = _papi_hwi_innoculate_vector( _papi_hwd[i] ); - if ( retval != PAPI_OK ) { - return retval; - } - - /* We can be disabled by user before init */ -- if (!_papi_hwd[i]->cmp_info.disabled) { -+ if (!_papi_hwd[i]->cmp_info.disabled && (PE_OR_PEU == is_pe_peu)) { - retval = _papi_hwd[i]->init_component( i ); - _papi_hwd[i]->cmp_info.disabled=retval; - -diff --git a/src/papi_internal.h b/src/papi_internal.h -index 6492fea4..e0f5acd7 100644 ---- a/src/papi_internal.h -+++ b/src/papi_internal.h -@@ -467,7 +467,7 @@ int _papi_hwi_read( hwd_context_t * context, EventSetInfo_t * ESI, - long long *values ); - int _papi_hwi_cleanup_eventset( EventSetInfo_t * ESI ); - int _papi_hwi_convert_eventset_to_multiplex( _papi_int_multiplex_t * mpx ); --int _papi_hwi_init_global( void ); -+int _papi_hwi_init_global( int PE_OR_PEU ); - int _papi_hwi_init_global_internal( void ); - int _papi_hwi_init_os(void); - void _papi_hwi_init_errors(void); diff --git a/SPECS/papi.spec b/SPECS/papi.spec index c4eeb46..320cae9 100644 --- a/SPECS/papi.spec +++ b/SPECS/papi.spec @@ -10,26 +10,16 @@ %{!?with_pcp: %global with_pcp 1} Summary: Performance Application Programming Interface Name: papi -Version: 6.0.0 -Release: 16%{?dist} -License: BSD +Version: 7.2.0 +Release: 1%{?dist} +License: BSD-3-Clause Requires: papi-libs = %{version}-%{release} URL: http://icl.cs.utk.edu/papi/ -# The upstream papi tar.gz file include iozone source code in it. -# The license for iozone source code is not compatible, so it needs -# to be eliminated from the srpm. -# The iozone code has been removed from the upstream papi git repo -# so when papi is rebased to a newer version it can be used as is. -Source0: http://icl.cs.utk.edu/projects/papi/downloads/%{name}-%{version}-noiozone.tar.gz -Patch1: papi-python3.patch -Patch4: papi-config.patch -Patch5: papi-nostatic.patch -Patch6: papi-lto.patch -Patch7: papi-rhbz1923967.patch -Patch21: papi-arm64fastread.patch -Patch31: papi-701eventupdate.patch -Patch40: papi-thread_init.patch -Patch41: papi-71eventupdate.patch +Source0: http://icl.cs.utk.edu/projects/papi/downloads/%{name}-%{version}.tar.gz +Patch1: papi-nostatic.patch +Patch2: papi-avail-path-fix.patch +Patch3: papi-revert-event-depr.patch +Patch4: papi-revert-arm-test.patch BuildRequires: make BuildRequires: autoconf BuildRequires: doxygen @@ -63,12 +53,14 @@ PAPI provides a programmer interface to monitor the performance of running programs. %package libs +License: BSD-3-Clause Summary: Libraries for PAPI clients %description libs This package contains the run-time libraries for any application that wishes to use PAPI. %package devel +License: BSD-3-Clause Summary: Header files for the compiling programs with PAPI Requires: papi = %{version}-%{release} Requires: papi-libs = %{version}-%{release} @@ -79,6 +71,7 @@ libraries and interfaces. This is required for rebuilding any program that uses PAPI. %package testsuite +License: BSD-3-Clause Summary: Set of tests for checking PAPI functionality Requires: papi = %{version}-%{release} Requires: papi-libs = %{version}-%{release} @@ -88,6 +81,7 @@ that PAPI functions on particular hardware. %if %{with_static} %package static +License: BSD-3-Clause Summary: Static libraries for the compiling programs with PAPI Requires: papi = %{version}-%{release} %description static @@ -97,15 +91,10 @@ the PAPI user-space libraries and interfaces. %prep %setup -q -%patch1 -p1 -b .python3 -%patch4 -p1 -%patch5 -p1 -%patch6 -p1 -%patch7 -p1 -%patch21 -p1 -%patch31 -p1 -%patch40 -p1 -%patch41 -p1 +%patch 1 -p1 -b papi-nostatic.patch +%patch 2 -p1 -b papi-avail-path-fix.patch +%patch 3 -p1 -b revert-event-depr.patch +%patch 4 -p1 -b revert-arm-test.patch %build @@ -139,7 +128,7 @@ autoconf %configure --with-perf-events \ %{?libpfm_config} \ %{?static_lib_config} \ ---with-shared-lib=yes --with-shlib --with-shlib-tools \ +--with-shared-lib=yes --with-shlib-tools \ --with-components="appio coretemp example infiniband lmsensors lustre micpower mx net %{?pcp_enable} rapl stealtime" # implicit enabled components: perf_event perf_event_uncore #components currently left out because of build configure/build issues @@ -160,7 +149,8 @@ rm -rf $RPM_BUILD_ROOT cd src make DESTDIR=$RPM_BUILD_ROOT LDCONFIG=/bin/true install-all -chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* +# Scrub the rpath/runpath from all the binaries. +find %{buildroot} -type f -executable ! -iname "*.py" ! -iname "*.sh" | xargs chrpath --delete %files %{_bindir}/* @@ -177,6 +167,7 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* %files devel %{_includedir}/*.h +%{_includedir}/*.hpp %if %{with bundled_libpfm} %{_includedir}/perfmon/*.h %endif @@ -198,6 +189,13 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* %endif %changelog +* Mon Nov 03 2025 RHEL Packaging Agent - 7.2.0-1 +- Rebase to papi 7.2.0 +- Update license tag to BSD-3-Clause +- Remove old patches and add new patches for 7.2.0 compatibility +- Update configure options and package files +- Resolves: RHEL-121673 + * Fri Nov 17 2023 William Cohen - 6.0.0-16 - Update papi event presets (RHEL-9333, RHEL-9334, RHEL-9335)