From 35b77c1f5a6ce73c5d404bbed3e17fd906631caf Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 28 Jun 2022 07:00:00 -0400 Subject: [PATCH] import papi-5.6.0-14.el8_6.1 --- SOURCES/papi-rhbz2037417.patch | 61 +++++++ SOURCES/papi-rhbz2037426.patch | 42 +++++ SOURCES/papi-rhbz2037427.patch | 140 +++++++++++++++ SOURCES/papi-zen.patch | 302 +++++++++++++++++++++++++++++++++ SPECS/papi.spec | 13 +- 5 files changed, 557 insertions(+), 1 deletion(-) create mode 100644 SOURCES/papi-rhbz2037417.patch create mode 100644 SOURCES/papi-rhbz2037426.patch create mode 100644 SOURCES/papi-rhbz2037427.patch create mode 100644 SOURCES/papi-zen.patch diff --git a/SOURCES/papi-rhbz2037417.patch b/SOURCES/papi-rhbz2037417.patch new file mode 100644 index 0000000..c6341c5 --- /dev/null +++ b/SOURCES/papi-rhbz2037417.patch @@ -0,0 +1,61 @@ +commit 6964aa356fa606f320c7b871123aceb5c1f21999 +Author: Masahiko, Yamada +Date: Tue Aug 24 14:17:29 2021 +0900 + + Fix the PAPI_FUL_CCY setting for a64fx + + In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect. + PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT + + The correct settings are:. + PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 4ef647959..74deb712f 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF + PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS + PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND + PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT +-PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT ++PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT + PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED + PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED +commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f +Author: Masahiko, Yamada +Date: Fri Mar 4 15:41:30 2022 +0900 + + Add PAPI idle-related preset events for a64fx + + For a64fx, add four PAPI idle-related preset events + (PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL). + + PAPI_BRU_IDL = BR_COMP_WAIT + PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT + PAPI_FPU_IDL = FL_COMP_WAIT + PAPI_LSU_IDL = LD_COMP_WAIT + + The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT, + and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting" + on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:. + https://github.com/fujitsu/A64FX/blob/master/doc + + Signed-off-by: Masahiko, Yamada + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 74deb712f..1cd498e91 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS + PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND + PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT + PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT ++PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT ++PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT ++PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT ++PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT + PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ + PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED + PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED diff --git a/SOURCES/papi-rhbz2037426.patch b/SOURCES/papi-rhbz2037426.patch new file mode 100644 index 0000000..ed03cb3 --- /dev/null +++ b/SOURCES/papi-rhbz2037426.patch @@ -0,0 +1,42 @@ +commit b78d7665bc02a0ce17adc6c09ab052064a940937 +Author: Masahiko, Yamada +Date: Wed Dec 8 19:39:44 2021 +0900 + + Improve the papi_xml_event_info command. + + Modify the papi_xml_event_info command as follows:. + - Test only the event name even if the event has a unit mask. + - Test other unit masks in the event even if + there is an error in one unit mask in the event. + + Signed-off-by: Masahiko, Yamada + +diff --git a/src/utils/papi_xml_event_info.c b/src/utils/papi_xml_event_info.c +index 2a777a9fe..c024cc036 100644 +--- a/src/utils/papi_xml_event_info.c ++++ b/src/utils/papi_xml_event_info.c +@@ -226,9 +226,6 @@ enum_native_events( FILE * f, int cidx) + k = i; + if ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cidx ) == PAPI_OK ) { + +- /* Test if event can be added */ +- if ( test_event( k ) == PAPI_OK ) { +- + /* add the event */ + xmlize_event( f, &info, num ); + +@@ -237,13 +234,12 @@ enum_native_events( FILE * f, int cidx) + retval = PAPI_get_event_info( k, &info ); + if ( retval == PAPI_OK ) { + if ( test_event( k )!=PAPI_OK ) { +- break; ++ continue; + } + xmlize_event( f, &info, -1 ); + } + } while ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cidx ) == PAPI_OK); + fprintf( f, " \n" ); +- } + } else { + /* this event has no unit masks; test & write the event */ + if ( test_event( i ) == PAPI_OK ) { diff --git a/SOURCES/papi-rhbz2037427.patch b/SOURCES/papi-rhbz2037427.patch new file mode 100644 index 0000000..3213976 --- /dev/null +++ b/SOURCES/papi-rhbz2037427.patch @@ -0,0 +1,140 @@ +commit 2098e8656156084104ab8d1981b53c50d22b8f62 +Author: Masahiko, Yamada +Date: Fri Mar 4 13:34:20 2022 +0900 + + PAPI_get_hardware_info: improve PAPI_hw_info_t for ARM processors + + Currently, it is not possible to determine which company the ARM processor + was designed by from the PAPI_hw_info_t available in PAPI_get_hardware_info(). + On ARM processors, the PAPI_hw_info_t obtained with PAPI_get_hardware_info() + does not contain information indicating which company was designed. + For ARM processors, improve the vendor and vendor_string entries + in PAPI_hw_info_t, which can be retrieved with PAPI_get_hardware_info(), + to include information indicating which company was designed. + + Signed-off-by: Masahiko, Yamada + +diff --git a/src/components/perf_event/pe_libpfm4_events.c b/src/components/perf_event/pe_libpfm4_events.c +index 744851ff0..6dcb5e023 100644 +--- a/src/components/perf_event/pe_libpfm4_events.c ++++ b/src/components/perf_event/pe_libpfm4_events.c +@@ -1248,8 +1248,10 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx, + &pinfo,sizeof(pfm_pmu_info_t)); + found_default++; + } ++ ++ /* For ARM processors, */ + if ( (pinfo.type==PFM_PMU_TYPE_CORE) && +- ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM)) { ++ ( _papi_hwi_system_info.hw_info.vendor >= PAPI_VENDOR_ARM_ARM)) { + if (strlen(_papi_hwi_system_info.hw_info.model_string) == 0) { + strSize = sizeof(_papi_hwi_system_info.hw_info.model_string); + strncpy( _papi_hwi_system_info.hw_info.model_string, pinfo.desc, strSize - 1); +diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c +index a50194cf6..6985fc4cb 100644 +--- a/src/components/perf_event/perf_event.c ++++ b/src/components/perf_event/perf_event.c +@@ -137,7 +137,8 @@ pe_vendor_fixups(papi_vector_t *vector) + } + + /* ARM */ +- if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM) { ++ /* If implementer is ARM Limited. */ ++ if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM_ARM) { + + /* Some ARMv7 and earlier could not measure */ + /* KERNEL and USER separately. */ +diff --git a/src/linux-common.c b/src/linux-common.c +index 99601db86..2527981ad 100644 +--- a/src/linux-common.c ++++ b/src/linux-common.c +@@ -112,8 +112,20 @@ decode_vendor_string( char *s, int *vendor ) + *vendor = PAPI_VENDOR_IBM; + else if ( strcasecmp( s, "Cray" ) == 0 ) + *vendor = PAPI_VENDOR_CRAY; +- else if ( strcasecmp( s, "ARM" ) == 0 ) +- *vendor = PAPI_VENDOR_ARM; ++ else if ( strcasecmp( s, "ARM_ARM" ) == 0 ) ++ *vendor = PAPI_VENDOR_ARM_ARM; ++ else if ( strcasecmp( s, "ARM_BROADCOM" ) == 0 ) ++ *vendor = PAPI_VENDOR_ARM_BROADCOM; ++ else if ( strcasecmp( s, "ARM_CAVIUM" ) == 0 ) ++ *vendor = PAPI_VENDOR_ARM_CAVIUM; ++ else if ( strcasecmp( s, "ARM_FUJITSU" ) == 0 ) ++ *vendor = PAPI_VENDOR_ARM_FUJITSU; ++ else if ( strcasecmp( s, "ARM_HISILICON") == 0 ) ++ *vendor = PAPI_VENDOR_ARM_HISILICON; ++ else if ( strcasecmp( s, "ARM_APM" ) == 0 ) ++ *vendor = PAPI_VENDOR_ARM_APM; ++ else if ( strcasecmp( s, "ARM_QUALCOMM" ) == 0 ) ++ *vendor = PAPI_VENDOR_ARM_QUALCOMM; + else if ( strcasecmp( s, "MIPS" ) == 0 ) + *vendor = PAPI_VENDOR_MIPS; + else if ( strcasecmp( s, "SiCortex" ) == 0 ) +@@ -409,9 +421,38 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz ) + } + else { + /* "CPU implementer" indicates ARM */ ++ /* For ARM processors, hwinfo->vendor >= PAPI_VENDOR_ARM_ARM(0x41). */ ++ /* If implementer is ARM Limited., hwinfo->vendor == PAPI_VENDOR_ARM_ARM. */ ++ /* If implementer is Cavium Inc., hwinfo->vendor == PAPI_VENDOR_ARM_CAVIUM(0x43). */ + s = search_cpu_info( f, "CPU implementer"); + if ( s ) { +- strcpy( hwinfo->vendor_string, "ARM" ); ++ int tmp; ++ sscanf( s, "%x", &tmp ); ++ switch( tmp ) { ++ case PAPI_VENDOR_ARM_ARM: ++ strcpy( hwinfo->vendor_string, "ARM_ARM" ); ++ break; ++ case PAPI_VENDOR_ARM_BROADCOM: ++ strcpy( hwinfo->vendor_string, "ARM_BROADCOM" ); ++ break; ++ case PAPI_VENDOR_ARM_CAVIUM: ++ strcpy( hwinfo->vendor_string, "ARM_CAVIUM" ); ++ break; ++ case PAPI_VENDOR_ARM_FUJITSU: ++ strcpy( hwinfo->vendor_string, "ARM_FUJITSU" ); ++ break; ++ case PAPI_VENDOR_ARM_HISILICON: ++ strcpy( hwinfo->vendor_string, "ARM_HISILICON" ); ++ break; ++ case PAPI_VENDOR_ARM_APM: ++ strcpy( hwinfo->vendor_string, "ARM_APM" ); ++ break; ++ case PAPI_VENDOR_ARM_QUALCOMM: ++ strcpy( hwinfo->vendor_string, "ARM_QUALCOMM" ); ++ break; ++ default: ++ strcpy( hwinfo->vendor_string, "ARM_UNKNOWN" ); ++ } + } + } + } +@@ -438,7 +479,7 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz ) + decode_cpuinfo_power(f,hwinfo); + } + +- if (hwinfo->vendor==PAPI_VENDOR_ARM) { ++ if (hwinfo->vendor>=PAPI_VENDOR_ARM_ARM) { + + decode_cpuinfo_arm(f,hwinfo); + } +diff --git a/src/papi.h b/src/papi.h +index 14b05da1f..b05b368cb 100644 +--- a/src/papi.h ++++ b/src/papi.h +@@ -354,6 +354,13 @@ All of the functions in the PerfAPI should use the following set of constants. + #define PAPI_VENDOR_FREESCALE 6 + #define PAPI_VENDOR_ARM 7 + #define PAPI_VENDOR_MIPS 8 ++#define PAPI_VENDOR_ARM_ARM 0x41 ++#define PAPI_VENDOR_ARM_BROADCOM 0x42 ++#define PAPI_VENDOR_ARM_CAVIUM 0x43 ++#define PAPI_VENDOR_ARM_FUJITSU 0x46 ++#define PAPI_VENDOR_ARM_HISILICON 0x48 ++#define PAPI_VENDOR_ARM_APM 0x50 ++#define PAPI_VENDOR_ARM_QUALCOMM 0x51 + /** @} */ + + /** @internal diff --git a/SOURCES/papi-zen.patch b/SOURCES/papi-zen.patch new file mode 100644 index 0000000..5bdbe5c --- /dev/null +++ b/SOURCES/papi-zen.patch @@ -0,0 +1,302 @@ +commit 20890adcb59a1c1648cb70be65332c03a3781e1a +Author: Anthony Castaldo +Date: Thu Jan 16 16:43:51 2020 -0500 + + Added two machine types to papi_events.csv to be in line with + libpfm4 update to support amd64_fam17h_zen1 and zen2. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 97446ad2c..8e96adfbd 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -396,6 +396,8 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE + # + # + CPU,amd64_fam17h ++CPU,amd64_fam17h_zen1 ++CPU,amd64_fam17h_zen2 + # + PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT + +commit ae449f73abd0849f05ab3e1f3a64bde0c670c645 +Author: Anthony +Date: Fri Jul 17 12:05:14 2020 -0400 + + Separated the cache preset events of AMD Zen1 and Zen2 and added some more. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 8e96adfbd..2325bd4dc 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE + # + CPU,amd64_fam17h + CPU,amd64_fam17h_zen1 +-CPU,amd64_fam17h_zen2 + # + PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS + PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT +@@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT + PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS + PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions" + PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions" ++# Events discovered via CAT ++PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C ++PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L ++PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X ++# ++# ++CPU,amd64_fam17h_zen2 ++# Events copied from zen1 that also exist on zen2 ++PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT ++PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K ++PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS ++PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT ++# Events discovered via CAT ++PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS ++PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C ++PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L ++PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X ++ + # + # + CPU,Intel architectural PMU +commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d +Author: Heike Jagode +Date: Thu Sep 24 13:33:38 2020 -0400 + + Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS) + for AMD zen2. + + PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) + explains that FLOP events require MergeEvent support, which was included + in the 5.6 kernel. + + ===>>> Hence, a kernel version 5.6 or greater is required. + + NOTE: without the MergeEvent support in the kernel, + there is no guarantee that the SSE/AVX FLOP + events produce any useful data whatsoever. + + These events have been tested and verified for + scalar flops, SSE, AVX, and FMA: + + (1) for one AVX instruction (e.g. _mm256_add_pd()), + the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns + a count of 4 (in the case of double precision), and + a count of 8 (in the case of single precision). + + (2) for one AVX FMA instruction (e.g. _mm256_macc_pd()), + the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns + a count of 8 (in the case of double precision), and + a count of 16 (in the case of single precision). + + (3) for one SSE instruction (e.g. _mm_mul_pd()), + the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns + a count of 2 (in the case of double precision), and + a count of 4 (in the case of single precision). + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2325bd4dc..2ff3e4d16 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ + PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L + PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X + +-# +-# ++# New FLOP event on zen2 ++# PPR (under section 2.1.15.3. -- ++# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) ++# explains that FLOP events require MergeEvent support, which was included ++# in the 5.6 kernel. ++# Hence, a kernel version 5.6 or greater is required. ++# NOTE: without the MergeEvent support in the kernel, there is no guarantee ++# that this SSE/AVX FLOP event produces any useful data whatsoever. ++PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++ ++ + CPU,Intel architectural PMU + CPU,ix86arch + # + +commit 35f93252a6e222299c03f2c94912334488e76b02 +Author: Heike Jagode +Date: Thu Sep 24 18:40:59 2020 -0400 + + Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP) + for AMD zen2. + + For unoptimized code (like native MMM), these events may include + non-numeric floating-point instructions, e.g. MOVSD: move or merge + scalar double-precision floating-point value instructions. + + Tested with: + 1) SSE double: _mm_mul_pd / _mm_add_pd + 2) SSE single: _mm_mul_ps / _mm_add_ps + 3) AVX double: _mm256_mul_pd / _mm256_add_pd + 4) AVX single: _mm256_mul_ps / _mm256_add_ps + 5) FMA double: _mm256_macc_pd + 6) FMA single: _mm256_macc_pd + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 2ff3e4d16..60a64564d 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ + PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++# Floating-point instructions (including non-numeric floating-point instructions, ++# e.g. Move or Merge Scalar Double-Precision Floating-Point values) ++PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + + + CPU,Intel architectural PMU + +commit 344f6493425d865577508ff32b6f65516b1b4394 +Author: Heike Jagode +Date: Thu Sep 24 19:03:31 2020 -0400 + + Added missing 'PRESET' to csv file. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 60a64564d..724d520f0 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY + # Floating-point instructions (including non-numeric floating-point instructions, + # e.g. Move or Merge Scalar Double-Precision Floating-Point values) +-PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + + + CPU,Intel architectural PMU + +commit 4616aa717c5301a9a478876661eb8ac1f18c0333 +Author: Heike Jagode +Date: Thu Oct 8 11:36:23 2020 -0400 + + For zen2, since FP_OPS counts both single- and double-prec operations + correctly, we don't need to confuse the user with additional + DP_OPS and SP_OPS events. So, I'm taking them out. + + Same applies for events counting FP instructions. + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 724d520f0..9ebf557e1 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_ + # NOTE: without the MergeEvent support in the kernel, there is no guarantee + # that this SSE/AVX FLOP event produces any useful data whatsoever. + PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +-PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY +-PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++# Since FP_OPS counts both single- and double-prec operations ++# correctly, we don't need to confuse the user with additional ++# DP_OPS and SP_OPS events. So, I'm taking them out. ++#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++# + # Floating-point instructions (including non-numeric floating-point instructions, + # e.g. Move or Merge Scalar Double-Precision Floating-Point values) + PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR +-PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++# Since FP_INS counts both single- and double-prec instuctions ++# correctly, we don't need to confuse the user with additional ++# VEC_DP and VEC_SP events. So, I'm taking them out. ++#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + + + CPU,Intel architectural PMU + +commit 274219e85ba8adcd2e9c78507adf7edb05b71daa +Author: Sebastian Mobo +Date: Thu Oct 8 13:40:21 2020 -0400 + + Added instruction-cache preset events for the Zen2. + + Signed-off-by: Anthony + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 9ebf557e1..fd75f9371 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS + PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C + PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L + PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X +- ++# ++PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++# ++PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS ++PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S + # New FLOP event on zen2 + # PPR (under section 2.1.15.3. -- + # https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip) + +commit 02f34baafb868d183f21bebfd3c46574847b9929 +Author: Swarup Sahoo +Date: Tue May 18 02:51:56 2021 +0530 + + Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip + + Signed-off-by: Swarup Sahoo + +diff --git a/src/papi_events.csv b/src/papi_events.csv +index 4ef647959..d9e9da8a3 100644 +--- a/src/papi_events.csv ++++ b/src/papi_events.csv +@@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X + # VEC_DP and VEC_SP events. So, I'm taking them out. + #PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR + #PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR ++# ++# ++CPU,amd64_fam19h_zen3 ++PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS ++PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT ++PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS ++PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED ++PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT ++PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K ++PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH ++PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X ++PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C ++PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X ++PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X ++PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ ++PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS ++PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S ++# RETIRED_SSE_AVX_FLOPS requires MergeEvent support. ++PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY ++PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS ++PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS ++PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS ++PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS + + + CPU,Intel architectural PMU diff --git a/SPECS/papi.spec b/SPECS/papi.spec index 0602c4e..1ea1db8 100644 --- a/SPECS/papi.spec +++ b/SPECS/papi.spec @@ -8,7 +8,7 @@ Summary: Performance Application Programming Interface Name: papi Version: 5.6.0 -Release: 14%{?dist} +Release: 14%{?dist}.1 License: BSD Group: Development/System Requires: papi-libs = %{version}-%{release} @@ -21,6 +21,10 @@ Patch4: papi-thread_init.patch Patch5: papi-mx.patch Patch6: papi-bz1908126.patch Patch7: papi-rhbz1918721.patch +Patch8: papi-rhbz2037417.patch +Patch9: papi-rhbz2037426.patch +Patch10: papi-rhbz2037427.patch +Patch11: papi-zen.patch BuildRequires: autoconf BuildRequires: doxygen BuildRequires: ncurses-devel @@ -91,6 +95,10 @@ the PAPI user-space libraries and interfaces. %patch5 -p1 %patch6 -p1 %patch7 -p1 +%patch8 -p1 +%patch9 -p1 +%patch10 -p1 +%patch11 -p1 %build %if %{without bundled_libpfm} @@ -173,6 +181,9 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* %{_libdir}/*.a %changelog +* Thu May 19 2022 William Cohen - 5.6.0-14.1 +- AMD Zen2/3 support (RHBZ #2088284) + * Tue May 25 2021 William Cohen - 5.6.0-14 - Disable problematic IBM Power9 events.