import papi-5.6.0-14.el8_6.1

This commit is contained in:
CentOS Sources 2022-06-28 07:00:00 -04:00 committed by root
parent dfd9af71bc
commit 35b77c1f5a
5 changed files with 557 additions and 1 deletions

View File

@ -0,0 +1,61 @@
commit 6964aa356fa606f320c7b871123aceb5c1f21999
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
Date: Tue Aug 24 14:17:29 2021 +0900
Fix the PAPI_FUL_CCY setting for a64fx
In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect.
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT
The correct settings are:.
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 4ef647959..74deb712f 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
-PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
Date: Fri Mar 4 15:41:30 2022 +0900
Add PAPI idle-related preset events for a64fx
For a64fx, add four PAPI idle-related preset events
(PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL).
PAPI_BRU_IDL = BR_COMP_WAIT
PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT
PAPI_FPU_IDL = FL_COMP_WAIT
PAPI_LSU_IDL = LD_COMP_WAIT
The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT,
and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting"
on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:.
https://github.com/fujitsu/A64FX/blob/master/doc
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 74deb712f..1cd498e91 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
+PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT
+PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT
+PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT
+PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED

View File

@ -0,0 +1,42 @@
commit b78d7665bc02a0ce17adc6c09ab052064a940937
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
Date: Wed Dec 8 19:39:44 2021 +0900
Improve the papi_xml_event_info command.
Modify the papi_xml_event_info command as follows:.
- Test only the event name even if the event has a unit mask.
- Test other unit masks in the event even if
there is an error in one unit mask in the event.
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
diff --git a/src/utils/papi_xml_event_info.c b/src/utils/papi_xml_event_info.c
index 2a777a9fe..c024cc036 100644
--- a/src/utils/papi_xml_event_info.c
+++ b/src/utils/papi_xml_event_info.c
@@ -226,9 +226,6 @@ enum_native_events( FILE * f, int cidx)
k = i;
if ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cidx ) == PAPI_OK ) {
- /* Test if event can be added */
- if ( test_event( k ) == PAPI_OK ) {
-
/* add the event */
xmlize_event( f, &info, num );
@@ -237,13 +234,12 @@ enum_native_events( FILE * f, int cidx)
retval = PAPI_get_event_info( k, &info );
if ( retval == PAPI_OK ) {
if ( test_event( k )!=PAPI_OK ) {
- break;
+ continue;
}
xmlize_event( f, &info, -1 );
}
} while ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cidx ) == PAPI_OK);
fprintf( f, " </event>\n" );
- }
} else {
/* this event has no unit masks; test & write the event */
if ( test_event( i ) == PAPI_OK ) {

View File

@ -0,0 +1,140 @@
commit 2098e8656156084104ab8d1981b53c50d22b8f62
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
Date: Fri Mar 4 13:34:20 2022 +0900
PAPI_get_hardware_info: improve PAPI_hw_info_t for ARM processors
Currently, it is not possible to determine which company the ARM processor
was designed by from the PAPI_hw_info_t available in PAPI_get_hardware_info().
On ARM processors, the PAPI_hw_info_t obtained with PAPI_get_hardware_info()
does not contain information indicating which company was designed.
For ARM processors, improve the vendor and vendor_string entries
in PAPI_hw_info_t, which can be retrieved with PAPI_get_hardware_info(),
to include information indicating which company was designed.
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
diff --git a/src/components/perf_event/pe_libpfm4_events.c b/src/components/perf_event/pe_libpfm4_events.c
index 744851ff0..6dcb5e023 100644
--- a/src/components/perf_event/pe_libpfm4_events.c
+++ b/src/components/perf_event/pe_libpfm4_events.c
@@ -1248,8 +1248,10 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
&pinfo,sizeof(pfm_pmu_info_t));
found_default++;
}
+
+ /* For ARM processors, */
if ( (pinfo.type==PFM_PMU_TYPE_CORE) &&
- ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM)) {
+ ( _papi_hwi_system_info.hw_info.vendor >= PAPI_VENDOR_ARM_ARM)) {
if (strlen(_papi_hwi_system_info.hw_info.model_string) == 0) {
strSize = sizeof(_papi_hwi_system_info.hw_info.model_string);
strncpy( _papi_hwi_system_info.hw_info.model_string, pinfo.desc, strSize - 1);
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
index a50194cf6..6985fc4cb 100644
--- a/src/components/perf_event/perf_event.c
+++ b/src/components/perf_event/perf_event.c
@@ -137,7 +137,8 @@ pe_vendor_fixups(papi_vector_t *vector)
}
/* ARM */
- if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM) {
+ /* If implementer is ARM Limited. */
+ if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM_ARM) {
/* Some ARMv7 and earlier could not measure */
/* KERNEL and USER separately. */
diff --git a/src/linux-common.c b/src/linux-common.c
index 99601db86..2527981ad 100644
--- a/src/linux-common.c
+++ b/src/linux-common.c
@@ -112,8 +112,20 @@ decode_vendor_string( char *s, int *vendor )
*vendor = PAPI_VENDOR_IBM;
else if ( strcasecmp( s, "Cray" ) == 0 )
*vendor = PAPI_VENDOR_CRAY;
- else if ( strcasecmp( s, "ARM" ) == 0 )
- *vendor = PAPI_VENDOR_ARM;
+ else if ( strcasecmp( s, "ARM_ARM" ) == 0 )
+ *vendor = PAPI_VENDOR_ARM_ARM;
+ else if ( strcasecmp( s, "ARM_BROADCOM" ) == 0 )
+ *vendor = PAPI_VENDOR_ARM_BROADCOM;
+ else if ( strcasecmp( s, "ARM_CAVIUM" ) == 0 )
+ *vendor = PAPI_VENDOR_ARM_CAVIUM;
+ else if ( strcasecmp( s, "ARM_FUJITSU" ) == 0 )
+ *vendor = PAPI_VENDOR_ARM_FUJITSU;
+ else if ( strcasecmp( s, "ARM_HISILICON") == 0 )
+ *vendor = PAPI_VENDOR_ARM_HISILICON;
+ else if ( strcasecmp( s, "ARM_APM" ) == 0 )
+ *vendor = PAPI_VENDOR_ARM_APM;
+ else if ( strcasecmp( s, "ARM_QUALCOMM" ) == 0 )
+ *vendor = PAPI_VENDOR_ARM_QUALCOMM;
else if ( strcasecmp( s, "MIPS" ) == 0 )
*vendor = PAPI_VENDOR_MIPS;
else if ( strcasecmp( s, "SiCortex" ) == 0 )
@@ -409,9 +421,38 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz )
}
else {
/* "CPU implementer" indicates ARM */
+ /* For ARM processors, hwinfo->vendor >= PAPI_VENDOR_ARM_ARM(0x41). */
+ /* If implementer is ARM Limited., hwinfo->vendor == PAPI_VENDOR_ARM_ARM. */
+ /* If implementer is Cavium Inc., hwinfo->vendor == PAPI_VENDOR_ARM_CAVIUM(0x43). */
s = search_cpu_info( f, "CPU implementer");
if ( s ) {
- strcpy( hwinfo->vendor_string, "ARM" );
+ int tmp;
+ sscanf( s, "%x", &tmp );
+ switch( tmp ) {
+ case PAPI_VENDOR_ARM_ARM:
+ strcpy( hwinfo->vendor_string, "ARM_ARM" );
+ break;
+ case PAPI_VENDOR_ARM_BROADCOM:
+ strcpy( hwinfo->vendor_string, "ARM_BROADCOM" );
+ break;
+ case PAPI_VENDOR_ARM_CAVIUM:
+ strcpy( hwinfo->vendor_string, "ARM_CAVIUM" );
+ break;
+ case PAPI_VENDOR_ARM_FUJITSU:
+ strcpy( hwinfo->vendor_string, "ARM_FUJITSU" );
+ break;
+ case PAPI_VENDOR_ARM_HISILICON:
+ strcpy( hwinfo->vendor_string, "ARM_HISILICON" );
+ break;
+ case PAPI_VENDOR_ARM_APM:
+ strcpy( hwinfo->vendor_string, "ARM_APM" );
+ break;
+ case PAPI_VENDOR_ARM_QUALCOMM:
+ strcpy( hwinfo->vendor_string, "ARM_QUALCOMM" );
+ break;
+ default:
+ strcpy( hwinfo->vendor_string, "ARM_UNKNOWN" );
+ }
}
}
}
@@ -438,7 +479,7 @@ _linux_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz )
decode_cpuinfo_power(f,hwinfo);
}
- if (hwinfo->vendor==PAPI_VENDOR_ARM) {
+ if (hwinfo->vendor>=PAPI_VENDOR_ARM_ARM) {
decode_cpuinfo_arm(f,hwinfo);
}
diff --git a/src/papi.h b/src/papi.h
index 14b05da1f..b05b368cb 100644
--- a/src/papi.h
+++ b/src/papi.h
@@ -354,6 +354,13 @@ All of the functions in the PerfAPI should use the following set of constants.
#define PAPI_VENDOR_FREESCALE 6
#define PAPI_VENDOR_ARM 7
#define PAPI_VENDOR_MIPS 8
+#define PAPI_VENDOR_ARM_ARM 0x41
+#define PAPI_VENDOR_ARM_BROADCOM 0x42
+#define PAPI_VENDOR_ARM_CAVIUM 0x43
+#define PAPI_VENDOR_ARM_FUJITSU 0x46
+#define PAPI_VENDOR_ARM_HISILICON 0x48
+#define PAPI_VENDOR_ARM_APM 0x50
+#define PAPI_VENDOR_ARM_QUALCOMM 0x51
/** @} */
/** @internal

302
SOURCES/papi-zen.patch Normal file
View File

@ -0,0 +1,302 @@
commit 20890adcb59a1c1648cb70be65332c03a3781e1a
Author: Anthony Castaldo <TonyCastaldo@icl.utk.edu>
Date: Thu Jan 16 16:43:51 2020 -0500
Added two machine types to papi_events.csv to be in line with
libpfm4 update to support amd64_fam17h_zen1 and zen2.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 97446ad2c..8e96adfbd 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -396,6 +396,8 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE
#
#
CPU,amd64_fam17h
+CPU,amd64_fam17h_zen1
+CPU,amd64_fam17h_zen2
#
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
commit ae449f73abd0849f05ab3e1f3a64bde0c670c645
Author: Anthony <adanalis@icl.utk.edu>
Date: Fri Jul 17 12:05:14 2020 -0400
Separated the cache preset events of AMD Zen1 and Zen2 and added some more.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 8e96adfbd..2325bd4dc 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE
#
CPU,amd64_fam17h
CPU,amd64_fam17h_zen1
-CPU,amd64_fam17h_zen2
#
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
@@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
+# Events discovered via CAT
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
+#
+#
+CPU,amd64_fam17h_zen2
+# Events copied from zen1 that also exist on zen2
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
+# Events discovered via CAT
+PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
+
#
#
CPU,Intel architectural PMU
commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d
Author: Heike Jagode <jagode@icl.utk.edu>
Date: Thu Sep 24 13:33:38 2020 -0400
Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS)
for AMD zen2.
PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
explains that FLOP events require MergeEvent support, which was included
in the 5.6 kernel.
===>>> Hence, a kernel version 5.6 or greater is required.
NOTE: without the MergeEvent support in the kernel,
there is no guarantee that the SSE/AVX FLOP
events produce any useful data whatsoever.
These events have been tested and verified for
scalar flops, SSE, AVX, and FMA:
(1) for one AVX instruction (e.g. _mm256_add_pd()),
the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns
a count of 4 (in the case of double precision), and
a count of 8 (in the case of single precision).
(2) for one AVX FMA instruction (e.g. _mm256_macc_pd()),
the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns
a count of 8 (in the case of double precision), and
a count of 16 (in the case of single precision).
(3) for one SSE instruction (e.g. _mm_mul_pd()),
the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns
a count of 2 (in the case of double precision), and
a count of 4 (in the case of single precision).
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 2325bd4dc..2ff3e4d16 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
-#
-#
+# New FLOP event on zen2
+# PPR (under section 2.1.15.3. --
+# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
+# explains that FLOP events require MergeEvent support, which was included
+# in the 5.6 kernel.
+# Hence, a kernel version 5.6 or greater is required.
+# NOTE: without the MergeEvent support in the kernel, there is no guarantee
+# that this SSE/AVX FLOP event produces any useful data whatsoever.
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+
+
CPU,Intel architectural PMU
CPU,ix86arch
#
commit 35f93252a6e222299c03f2c94912334488e76b02
Author: Heike Jagode <jagode@icl.utk.edu>
Date: Thu Sep 24 18:40:59 2020 -0400
Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP)
for AMD zen2.
For unoptimized code (like native MMM), these events may include
non-numeric floating-point instructions, e.g. MOVSD: move or merge
scalar double-precision floating-point value instructions.
Tested with:
1) SSE double: _mm_mul_pd / _mm_add_pd
2) SSE single: _mm_mul_ps / _mm_add_ps
3) AVX double: _mm256_mul_pd / _mm256_add_pd
4) AVX single: _mm256_mul_ps / _mm256_add_ps
5) FMA double: _mm256_macc_pd
6) FMA single: _mm256_macc_pd
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 2ff3e4d16..60a64564d 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+# Floating-point instructions (including non-numeric floating-point instructions,
+# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
+PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
+PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
+PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
CPU,Intel architectural PMU
commit 344f6493425d865577508ff32b6f65516b1b4394
Author: Heike Jagode <jagode@icl.utk.edu>
Date: Thu Sep 24 19:03:31 2020 -0400
Added missing 'PRESET' to csv file.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 60a64564d..724d520f0 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
# Floating-point instructions (including non-numeric floating-point instructions,
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
-PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
-PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
-PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
+PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
+PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
CPU,Intel architectural PMU
commit 4616aa717c5301a9a478876661eb8ac1f18c0333
Author: Heike Jagode <jagode@icl.utk.edu>
Date: Thu Oct 8 11:36:23 2020 -0400
For zen2, since FP_OPS counts both single- and double-prec operations
correctly, we don't need to confuse the user with additional
DP_OPS and SP_OPS events. So, I'm taking them out.
Same applies for events counting FP instructions.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 724d520f0..9ebf557e1 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
# NOTE: without the MergeEvent support in the kernel, there is no guarantee
# that this SSE/AVX FLOP event produces any useful data whatsoever.
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
-PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
-PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+# Since FP_OPS counts both single- and double-prec operations
+# correctly, we don't need to confuse the user with additional
+# DP_OPS and SP_OPS events. So, I'm taking them out.
+#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+#
# Floating-point instructions (including non-numeric floating-point instructions,
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
-PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
-PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
+# Since FP_INS counts both single- and double-prec instuctions
+# correctly, we don't need to confuse the user with additional
+# VEC_DP and VEC_SP events. So, I'm taking them out.
+#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
+#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
CPU,Intel architectural PMU
commit 274219e85ba8adcd2e9c78507adf7edb05b71daa
Author: Sebastian Mobo <smobo@vols.utk.edu>
Date: Thu Oct 8 13:40:21 2020 -0400
Added instruction-cache preset events for the Zen2.
Signed-off-by: Anthony <adanalis@icl.utk.edu>
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 9ebf557e1..fd75f9371 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
-
+#
+PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
+#
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
# New FLOP event on zen2
# PPR (under section 2.1.15.3. --
# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
commit 02f34baafb868d183f21bebfd3c46574847b9929
Author: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
Date: Tue May 18 02:51:56 2021 +0530
Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip
Signed-off-by: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 4ef647959..d9e9da8a3 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X
# VEC_DP and VEC_SP events. So, I'm taking them out.
#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
+#
+#
+CPU,amd64_fam19h_zen3
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
+PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
+PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
+PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
+PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
+# RETIRED_SSE_AVX_FLOPS requires MergeEvent support.
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
+PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
+PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
+PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
+PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
CPU,Intel architectural PMU

View File

@ -8,7 +8,7 @@
Summary: Performance Application Programming Interface
Name: papi
Version: 5.6.0
Release: 14%{?dist}
Release: 14%{?dist}.1
License: BSD
Group: Development/System
Requires: papi-libs = %{version}-%{release}
@ -21,6 +21,10 @@ Patch4: papi-thread_init.patch
Patch5: papi-mx.patch
Patch6: papi-bz1908126.patch
Patch7: papi-rhbz1918721.patch
Patch8: papi-rhbz2037417.patch
Patch9: papi-rhbz2037426.patch
Patch10: papi-rhbz2037427.patch
Patch11: papi-zen.patch
BuildRequires: autoconf
BuildRequires: doxygen
BuildRequires: ncurses-devel
@ -91,6 +95,10 @@ the PAPI user-space libraries and interfaces.
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
%patch9 -p1
%patch10 -p1
%patch11 -p1
%build
%if %{without bundled_libpfm}
@ -173,6 +181,9 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so*
%{_libdir}/*.a
%changelog
* Thu May 19 2022 William Cohen <wcohen@redhat.com> - 5.6.0-14.1
- AMD Zen2/3 support (RHBZ #2088284)
* Tue May 25 2021 William Cohen <wcohen@redhat.com> - 5.6.0-14
- Disable problematic IBM Power9 events.