From 8956deaa5552b2cf90a02be3f9083b7ef6c5dc22 Mon Sep 17 00:00:00 2001 From: William Cohen Date: Thu, 20 Apr 2023 11:35:12 -0400 Subject: [PATCH] Upgrade to upstream libpf-4.13.0 Resolves: #2185653 routine rebase of libpfm for rhel 8.9 --- .gitignore | 1 + libpfm-a64fx.patch | 1470 ----------- libpfm-amd_merge.patch | 56 - libpfm-flags.patch | 184 -- libpfm-initp.patch | 31 + libpfm-tx2.patch | 1341 ---------- libpfm-zen23.patch | 5345 ---------------------------------------- libpfm-zseries.patch | 128 - libpfm.spec | 21 +- sources | 2 +- 10 files changed, 40 insertions(+), 8539 deletions(-) delete mode 100644 libpfm-a64fx.patch delete mode 100644 libpfm-amd_merge.patch delete mode 100644 libpfm-flags.patch create mode 100644 libpfm-initp.patch delete mode 100644 libpfm-tx2.patch delete mode 100644 libpfm-zen23.patch delete mode 100644 libpfm-zseries.patch diff --git a/.gitignore b/.gitignore index 1b5ac24..450aec0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ SOURCES/libpfm-4.10.1.tar.gz /libpfm-4.10.1.tar.gz +/libpfm-4.13.0.tar.gz diff --git a/libpfm-a64fx.patch b/libpfm-a64fx.patch deleted file mode 100644 index 740c757..0000000 --- a/libpfm-a64fx.patch +++ /dev/null @@ -1,1470 +0,0 @@ -commit 0cfc35f73e0e39d54ba48c24e663bec93d164211 -Author: Steve Kaufmann -Date: Mon May 18 09:33:57 2020 -0700 - - Enable support for Fujitsu A64FX core PMU - - This patch adds support for Fujitsu A64FX core PMU. This - includes ARMv8 generic core events and Fujitsu model - specfic events. - - Signed-off-by: Steve Kaufmann - -diff --git a/README b/README -index f40489e..c21fb28 100644 ---- a/README -+++ b/README -@@ -72,6 +72,7 @@ The library supports many PMUs. The current version can handle: - ARMV8 Cortex A57, A53 - Applied Micro X-Gene - Qualcomm Krait -+ Fujitsu A64FX - - - For SPARC - Ultra I, II -diff --git a/docs/Makefile b/docs/Makefile -index e39de6b..995ece0 100644 ---- a/docs/Makefile -+++ b/docs/Makefile -@@ -122,7 +122,8 @@ ARCH_MAN += libpfm_arm_xgene.3 \ - libpfm_arm_ac15.3 \ - libpfm_arm_ac8.3 \ - libpfm_arm_ac9.3 \ -- libpfm_arm_qcom_krait.3 -+ libpfm_arm_qcom_krait.3 \ -+ libpfm_arm_a64fx.3 - endif - - ifeq ($(CONFIG_PFMLIB_ARCH_ARM64),y) -diff --git a/docs/man3/libpfm_arm_a64fx.3 b/docs/man3/libpfm_arm_a64fx.3 -new file mode 100644 -index 0000000..6214eb7 ---- /dev/null -+++ b/docs/man3/libpfm_arm_a64fx.3 -@@ -0,0 +1,36 @@ -+.TH LIBPFM 3 "May, 2020" "" "Linux Programmer's Manual" -+.SH NAME -+libpfm_arm_a64fx - support for Fujitsu A64FX PMU -+.SH SYNOPSIS -+.nf -+.B #include -+.sp -+.B PMU name: arm_a64fx -+.B PMU desc: Fujitsu A64FX -+.sp -+.SH DESCRIPTION -+The library supports the Fujitsu A64FX core PMU. -+ -+This PMU supports 6 counters and privilege levels filtering. -+It can operate in both 32 and 64 bit modes. -+ -+.SH MODIFIERS -+The following modifiers are supported on Fujitsu A64FX: -+.TP -+.B u -+Measure at the user level. This corresponds to \fBPFM_PLM3\fR. -+This is a boolean modifier. -+.TP -+.B k -+Measure at the kernel level. This corresponds to \fBPFM_PLM0\fR. -+This is a boolean modifier. -+.TP -+.B hv -+Measure at the hypervisor level. This corresponds to \fBPFM_PLMH\fR. -+This is a boolean modifier. -+ -+.SH AUTHORS -+.nf -+Stephane Eranian -+.if -+.PP -diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h -index e19772a..caa93db 100644 ---- a/include/perfmon/pfmlib.h -+++ b/include/perfmon/pfmlib.h -@@ -559,6 +559,8 @@ typedef enum { - PFM_PMU_ARM_THUNDERX2_LLC1, /* Marvell ThunderX2 LLC unit 1 uncore */ - PFM_PMU_ARM_THUNDERX2_CCPI0, /* Marvell ThunderX2 Cross-Socket Interconnect unit 0 uncore */ - PFM_PMU_ARM_THUNDERX2_CCPI1, /* Marvell ThunderX2 Cross-Socket Interconnect unit 1 uncore */ -+ -+ PFM_PMU_ARM_A64FX, /* Fujitsu A64FX processor */ - /* MUST ADD NEW PMU MODELS HERE */ - - PFM_PMU_MAX /* end marker */ -diff --git a/lib/events/arm_fujitsu_a64fx_events.h b/lib/events/arm_fujitsu_a64fx_events.h -new file mode 100644 -index 0000000..3b555f0 ---- /dev/null -+++ b/lib/events/arm_fujitsu_a64fx_events.h -@@ -0,0 +1,1136 @@ -+/* -+ * Copyright 2020 Cray Inc. All Rights Reserved. -+ */ -+ -+/* -+ * Fujitsu A64FX processor -+ * -+ * A64FX® PMU Events -+ * Fujitsu Limited -+ * 1.2, 28 April 2020 -+ */ -+ -+static const arm_entry_t arm_a64fx_pe[ ] = { -+ { -+ .name = "SW_INCR", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0000, -+ .desc = "This event counts on writes to the PMSWINC register.", -+ }, -+ { -+ .name = "L1I_CACHE_REFILL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0001, -+ .desc = "This event counts operations that cause a refill of at least the L1I cache.", -+ }, -+ { -+ .name = "L1I_TLB_REFILL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0002, -+ .desc = "This event counts operations that cause a TLB refill of at least the L1I TLB.", -+ }, -+ { -+ .name = "L1D_CACHE_REFILL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0003, -+ .desc = "This event counts operations that cause a refill of at least the L1D cache.", -+ }, -+ { -+ .name = "L1D_CACHE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0004, -+ .desc = "This event counts operations that cause a cache access to at least the L1D cache.", -+ }, -+ { -+ .name = "L1D_TLB_REFILL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0005, -+ .desc = "This event counts operations that cause a TLB refill of at least the L1D TLB.", -+ }, -+ { -+ .name = "INST_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0008, -+ .desc = "This event counts every architecturally executed instruction.", -+ }, -+ { -+ .name = "EXC_TAKEN", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0009, -+ .desc = "This event counts each exception taken.", -+ }, -+ { -+ .name = "EXC_RETURN", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x000a, -+ .desc = "This event counts each executed exception return instruction.", -+ }, -+ { -+ .name = "CID_WRITE_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x000b, -+ .desc = "This event counts every write to CONTEXTIDR.", -+ }, -+ { -+ .name = "BR_MIS_PRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0010, -+ .desc = "This event counts each correction to the predicted program flow that occurs because of a misprediction from, or no prediction from, the branch prediction resources and that relates to instructions that the branch prediction resources are capable of predicting.", -+ }, -+ { -+ .name = "CPU_CYCLES", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0011, -+ .desc = "This event counts every cycle.", -+ }, -+ { -+ .name = "BR_PRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0012, -+ .desc = "This event counts every branch or other change in the program flow that the branch prediction resources are capable of predicting.", -+ }, -+ { -+ .name = "L1I_CACHE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0014, -+ .desc = "This event counts operations that cause a cache access to at least the L1I cache.", -+ }, -+ { -+ .name = "L1D_CACHE_WB", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0015, -+ .desc = "This event counts every write-back of data from the L1D cache.", -+ }, -+ { -+ .name = "L2D_CACHE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0016, -+ .desc = "This event counts operations that cause a cache access to at least the L2 cache.", -+ }, -+ { -+ .name = "L2D_CACHE_REFILL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0017, -+ .desc = "This event counts operations that cause a refill of at least the L2 cache.", -+ }, -+ { -+ .name = "L2D_CACHE_WB", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0018, -+ .desc = "This event counts every write-back of data from the L2 cache.", -+ }, -+ { -+ .name = "INST_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x001b, -+ .desc = "This event counts every architecturally executed instruction.", -+ }, -+ { -+ .name = "STALL_FRONTEND", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0023, -+ .desc = "This event counts every cycle counted by the CPU_CYCLES event on that no operations are issued because there are no operations available to issue for this PE from the frontend.", -+ }, -+ { -+ .name = "STALL_BACKEND", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0024, -+ .desc = "This event counts every cycle counted by the CPU_CYCLES event on that no operations are issued because the backend is unable to accept any operations.", -+ }, -+ { -+ .name = "L2D_TLB_REFILL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x002d, -+ .desc = "This event counts operations that cause a TLB refill of at least the L2D TLB.", -+ }, -+ { -+ .name = "L2I_TLB_REFILL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x002e, -+ .desc = "This event counts operations that cause a TLB refill of at least the L2I TLB.", -+ }, -+ { -+ .name = "L2D_TLB", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x002f, -+ .desc = "This event counts operations that cause a TLB access to at least the L2D TLB.", -+ }, -+ { -+ .name = "L2I_TLB", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0030, -+ .desc = "This event counts operations that cause a TLB access to at least the L2I TLB.", -+ }, -+ { -+ .name = "L1D_CACHE_REFILL_PRF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0049, -+ .desc = "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch.", -+ }, -+ { -+ .name = "L2D_CACHE_REFILL_PRF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0059, -+ .desc = "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch.", -+ }, -+ { -+ .name = "LDREX_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x006c, -+ .desc = "This event counts architecturally executed load-exclusive instructions.", -+ }, -+ { -+ .name = "STREX_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x006f, -+ .desc = "This event counts architecturally executed store-exclusive instructions.", -+ }, -+ { -+ .name = "LD_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0070, -+ .desc = "This event counts architecturally executed memory-reading instructions, as defined by the LD_RETIRED event.", -+ }, -+ { -+ .name = "ST_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0071, -+ .desc = "This event counts architecturally executed memory-writing instructions, as defined by the ST_RETIRED event. This event counts DCZVA as a store operation.", -+ }, -+ { -+ .name = "LDST_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0072, -+ .desc = "This event counts architecturally executed memory-reading instructions and memory-writing instructions, as defined by the LD_RETIRED and ST_RETIRED events.", -+ }, -+ { -+ .name = "DP_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0073, -+ .desc = "This event counts architecturally executed integer data-processing instructions.", -+ }, -+ { -+ .name = "ASE_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0074, -+ .desc = "This event counts architecturally executed Advanced SIMD data-processing instructions.", -+ }, -+ { -+ .name = "VFP_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0075, -+ .desc = "This event counts architecturally executed floating-point data-processing instructions.", -+ }, -+ { -+ .name = "PC_WRITE_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0076, -+ .desc = "This event counts only software changes of the PC that defined by the instruction architecturally executed, condition code check pass and software change of the PC event.", -+ }, -+ { -+ .name = "CRYPTO_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0077, -+ .desc = "This event counts architecturally executed cryptographic instructions, except PMULL and VMULL.", -+ }, -+ { -+ .name = "BR_IMMED_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0078, -+ .desc = "This event counts architecturally executed immediate branch instructions.", -+ }, -+ { -+ .name = "BR_RETURN_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0079, -+ .desc = "This event counts architecturally executed procedure return operations that defined by the BR_RETURN_RETIRED event.", -+ }, -+ { -+ .name = "BR_INDIRECT_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x007a, -+ .desc = "This event counts architecturally executed indirect branch instructions that includes software change of the PC other than exception-generating instructions and immediate branch instructions.", -+ }, -+ { -+ .name = "ISB_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x007c, -+ .desc = "This event counts architecturally executed Instruction Synchronization Barrier instructions.", -+ }, -+ { -+ .name = "DSB_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x007d, -+ .desc = "This event counts architecturally executed Data Synchronization Barrier instructions.", -+ }, -+ { -+ .name = "DMB_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x007e, -+ .desc = "This event counts architecturally executed Data Memory Barrier instructions, excluding the implied barrier operations of load/store operations with release consistency semantics.", -+ }, -+ { -+ .name = "EXC_UNDEF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0081, -+ .desc = "This event counts only other synchronous exceptions that are taken locally.", -+ }, -+ { -+ .name = "EXC_SVC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0082, -+ .desc = "This event counts only Supervisor Call exceptions that are taken locally.", -+ }, -+ { -+ .name = "EXC_PABORT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0083, -+ .desc = "This event counts only Instruction Abort exceptions that are taken locally.", -+ }, -+ { -+ .name = "EXC_DABORT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0084, -+ .desc = "This event counts only Data Abort or SError interrupt exceptions that are taken locally.", -+ }, -+ { -+ .name = "EXC_IRQ", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0086, -+ .desc = "This event counts only IRQ exceptions that are taken locally, including Virtual IRQ exceptions.", -+ }, -+ { -+ .name = "EXC_FIQ", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0087, -+ .desc = "This event counts only FIQ exceptions that are taken locally, including Virtual FIQ exceptions.", -+ }, -+ { -+ .name = "EXC_SMC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0088, -+ .desc = "This event counts only Secure Monitor Call exceptions. The counter does not increment on SMC instructions trapped as a Hyp Trap exception.", -+ }, -+ { -+ .name = "EXC_HVC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x008a, -+ .desc = "This event counts for both Hypervisor Call exceptions taken locally in the hypervisor and those taken as an exception from Non-secure EL1.", -+ }, -+ { -+ .name = "DCZVA_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x009f, -+ .desc = "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction.", -+ }, -+ { -+ .name = "FP_MV_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0105, -+ .desc = "This event counts architecturally executed floating-point move operations.", -+ }, -+ { -+ .name = "PRD_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0108, -+ .desc = "This event counts architecturally executed operations that using predicate register.", -+ }, -+ { -+ .name = "IEL_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0109, -+ .desc = "This event counts architecturally executed inter-element manipulation operations.", -+ }, -+ { -+ .name = "IREG_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x010a, -+ .desc = "This event counts architecturally executed inter-register manipulation operations.", -+ }, -+ { -+ .name = "FP_LD_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0112, -+ .desc = "This event counts architecturally executed NOSIMD load operations that using SIMD and FP registers.", -+ }, -+ { -+ .name = "FP_ST_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0113, -+ .desc = "This event counts architecturally executed NOSIMD store operations that using SIMD and FP registers.", -+ }, -+ { -+ .name = "BC_LD_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x011a, -+ .desc = "This event counts architecturally executed SIMD broadcast floating-point load operations.", -+ }, -+ { -+ .name = "EFFECTIVE_INST_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0121, -+ .desc = "This event counts architecturally executed instructions, excluding the MOVPRFX instruction.", -+ }, -+ { -+ .name = "PRE_INDEX_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0123, -+ .desc = "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode.", -+ }, -+ { -+ .name = "POST_INDEX_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0124, -+ .desc = "This event counts architecturally executed operations that uses 'post-index' as its addressing mode.", -+ }, -+ { -+ .name = "UOP_SPLIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0139, -+ .desc = "This event counts the occurrence count of the micro-operation split.", -+ }, -+ { -+ .name = "LD_COMP_WAIT_L2_MISS", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0180, -+ .desc = "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store operation waits for memory access.", -+ }, -+ { -+ .name = "LD_COMP_WAIT_L2_MISS_EX", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0181, -+ .desc = "This event counts every cycle that no instructions are committed because the oldest and uncommitted integer load instruction waits for memory access.", -+ }, -+ { -+ .name = "LD_COMP_WAIT_L1_MISS", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0182, -+ .desc = "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store operation waits for L2 cache access.", -+ }, -+ { -+ .name = "LD_COMP_WAIT_L1_MISS_EX", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0183, -+ .desc = "This event counts every cycle that no instructions are committed because the oldest and uncommitted integer load instruction waits for L2 cache access.", -+ }, -+ { -+ .name = "LD_COMP_WAIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0184, -+ .desc = "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store operation waits for L1D, L2 and memory access.", -+ }, -+ { -+ .name = "LD_COMP_WAIT_EX", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0185, -+ .desc = "This event counts every cycle that no instructions are committed because the oldest and uncommitted integer load instruction waits for L1D, L2 and memory access.", -+ }, -+ { -+ .name = "LD_COMP_WAIT_PFP_BUSY", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0186, -+ .desc = "This event counts every cycle that no instructions are committed due to the lack of an available prefetch port.", -+ }, -+ { -+ .name = "LD_COMP_WAIT_PFP_BUSY_EX", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0187, -+ .desc = "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation.", -+ }, -+ { -+ .name = "LD_COMP_WAIT_PFP_BUSY_SWPF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0188, -+ .desc = "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction.", -+ }, -+ { -+ .name = "EU_COMP_WAIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0189, -+ .desc = "This event counts every cycle that no instructions are committed, and the oldest and uncommitted instruction is an integer or floating-point instruction.", -+ }, -+ { -+ .name = "FL_COMP_WAIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x018a, -+ .desc = "This event counts every cycle that no instructions are committed, and the oldest and uncommitted instruction is a floating-point instruction.", -+ }, -+ { -+ .name = "BR_COMP_WAIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x018b, -+ .desc = "This event counts every cycle that no instructions are committed, and the oldest and uncommitted instruction is a branch instruction.", -+ }, -+ { -+ .name = "ROB_EMPTY", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x018c, -+ .desc = "This event counts every cycle that no instructions are committed because the CSE is empty.", -+ }, -+ { -+ .name = "ROB_EMPTY_STQ_BUSY", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x018d, -+ .desc = "This event counts every cycle that no instructions are committed because the CSE is empty and the all store ports are full.", -+ }, -+ { -+ .name = "WFE_WFI_CYCLE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x018e, -+ .desc = "This event counts every cycle that the WFE/WFI instruction brings the instruction unit to a halt.", -+ }, -+ { -+ .name = "0INST_COMMIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0190, -+ .desc = "This event counts every cycle that no instructions are committed, but counts at the time when commits MOVPRFX only.", -+ }, -+ { -+ .name = "1INST_COMMIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0191, -+ .desc = "This event counts every cycle that one instruction is committed.", -+ }, -+ { -+ .name = "2INST_COMMIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0192, -+ .desc = "This event counts every cycle that two instructions are committed.", -+ }, -+ { -+ .name = "3INST_COMMIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0193, -+ .desc = "This event counts every cycle that three instructions are committed.", -+ }, -+ { -+ .name = "4INST_COMMIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0194, -+ .desc = "This event counts every cycle that four instructions are committed.", -+ }, -+ { -+ .name = "UOP_ONLY_COMMIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0198, -+ .desc = "This event counts every cycle that only any micro-operations are committed.", -+ }, -+ { -+ .name = "SINGLE_MOVPRFX_COMMIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0199, -+ .desc = "This event counts every cycle that only the MOVPRFX instruction is committed.", -+ }, -+ { -+ .name = "EAGA_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x01a0, -+ .desc = "This event counts valid cycles of EAGA pipeline.", -+ }, -+ { -+ .name = "EAGB_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x01a1, -+ .desc = "This event counts valid cycles of EAGB pipeline.", -+ }, -+ { -+ .name = "EXA_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x01a2, -+ .desc = "This event counts valid cycles of EXA pipeline.", -+ }, -+ { -+ .name = "EXB_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x01a3, -+ .desc = "This event counts valid cycles of EXB pipeline.", -+ }, -+ { -+ .name = "FLA_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x01a4, -+ .desc = "This event counts valid cycles of FLA pipeline.", -+ }, -+ { -+ .name = "FLB_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x01a5, -+ .desc = "This event counts valid cycles of FLB pipeline.", -+ }, -+ { -+ .name = "PRX_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x01a6, -+ .desc = "This event counts valid cycles of PRX pipeline.", -+ }, -+ { -+ .name = "FLA_VAL_PRD_CNT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x01b4, -+ .desc = "This event counts the number of 1 in the predicate bits of request in FLA pipeline, and corrects itself to be 16 when all bits are 1.", -+ }, -+ { -+ .name = "FLB_VAL_PRD_CNT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x01b5, -+ .desc = "This event counts the number of 1 in the predicate bits of request in FLB pipeline, and corrects itself to be 16 when all bits are 1.", -+ }, -+ { -+ .name = "EA_CORE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x01e0, -+ .desc = "This event counts energy consumption per cycle of core.", -+ }, -+ { -+ .name = "L1D_CACHE_REFILL_DM", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0200, -+ .desc = "This event counts L1D_CACHE_REFILL caused by demand access.", -+ }, -+ { -+ .name = "L1D_CACHE_REFILL_HWPRF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0202, -+ .desc = "This event counts L1D_CACHE_REFILL caused by hardware prefetch.", -+ }, -+ { -+ .name = "L1_MISS_WAIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0208, -+ .desc = "This event counts outstanding L1D cache miss requests per cycle.", -+ }, -+ { -+ .name = "L1I_MISS_WAIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0209, -+ .desc = "This event counts outstanding L1I cache miss requests per cycle.", -+ }, -+ { -+ .name = "L1HWPF_STREAM_PF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0230, -+ .desc = "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher.", -+ }, -+ { -+ .name = "L1HWPF_INJ_ALLOC_PF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0231, -+ .desc = "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", -+ }, -+ { -+ .name = "L1HWPF_INJ_NOALLOC_PF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0232, -+ .desc = "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", -+ }, -+ { -+ .name = "L2HWPF_STREAM_PF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0233, -+ .desc = "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher.", -+ }, -+ { -+ .name = "L2HWPF_INJ_ALLOC_PF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0234, -+ .desc = "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", -+ }, -+ { -+ .name = "L2HWPF_INJ_NOALLOC_PF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0235, -+ .desc = "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", -+ }, -+ { -+ .name = "L2HWPF_OTHER", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0236, -+ .desc = "This event counts prefetch requests to L2 cache generated by the other causes.", -+ }, -+ { -+ .name = "L1_PIPE0_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0240, -+ .desc = "This event counts valid cycles of L1D cache pipeline#0.", -+ }, -+ { -+ .name = "L1_PIPE1_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0241, -+ .desc = "This event counts valid cycles of L1D cache pipeline#1.", -+ }, -+ { -+ .name = "L1_PIPE0_VAL_IU_TAG_ADRS_SCE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0250, -+ .desc = "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1.", -+ }, -+ { -+ .name = "L1_PIPE0_VAL_IU_TAG_ADRS_PFE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0251, -+ .desc = "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1.", -+ }, -+ { -+ .name = "L1_PIPE1_VAL_IU_TAG_ADRS_SCE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0252, -+ .desc = "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1.", -+ }, -+ { -+ .name = "L1_PIPE1_VAL_IU_TAG_ADRS_PFE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0253, -+ .desc = "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1.", -+ }, -+ { -+ .name = "L1_PIPE0_COMP", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0260, -+ .desc = "This event counts completed requests in L1D cache pipeline#0.", -+ }, -+ { -+ .name = "L1_PIPE1_COMP", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0261, -+ .desc = "This event counts completed requests in L1D cache pipeline#1.", -+ }, -+ { -+ .name = "L1I_PIPE_COMP", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0268, -+ .desc = "This event counts completed requests in L1I cache pipeline.", -+ }, -+ { -+ .name = "L1I_PIPE_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0269, -+ .desc = "This event counts valid cycles of L1I cache pipeline.", -+ }, -+ { -+ .name = "L1_PIPE_ABORT_STLD_INTLK", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0274, -+ .desc = "This event counts aborted requests in L1D pipelines that due to store-load interlock.", -+ }, -+ { -+ .name = "L1_PIPE0_VAL_IU_NOT_SEC0", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x02a0, -+ .desc = "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0.", -+ }, -+ { -+ .name = "L1_PIPE1_VAL_IU_NOT_SEC0", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x02a1, -+ .desc = "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0.", -+ }, -+ { -+ .name = "L1_PIPE_COMP_GATHER_2FLOW", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x02b0, -+ .desc = "This event counts the number of times where 2 elements of the gather instructions became 2flows because 2 elements could not be combined.", -+ }, -+ { -+ .name = "L1_PIPE_COMP_GATHER_1FLOW", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x02b1, -+ .desc = "This event counts the number of times where 2 elements of the gather instructions became 1flow because 2 elements could be combined.", -+ }, -+ { -+ .name = "L1_PIPE_COMP_GATHER_0FLOW", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x02b2, -+ .desc = "This event counts the number of times where 2 elements of the gather instructions became 0flow because both predicate values are 0.", -+ }, -+ { -+ .name = "L1_PIPE_COMP_SCATTER_1FLOW", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x02b3, -+ .desc = "This event counts the number of flows of the scatter instructions.", -+ }, -+ { -+ .name = "L1_PIPE0_COMP_PRD_CNT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x02b8, -+ .desc = "This event counts the number of 1 in the predicate bits of request in L1D cache pipeline#0, and corrects itself to be 16 when all bits are 1.", -+ }, -+ { -+ .name = "L1_PIPE1_COMP_PRD_CNT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x02b9, -+ .desc = "This event counts the number of 1 in the predicate bits of request in L1D cache pipeline#1, and corrects itself to be 16 when all bits are 1.", -+ }, -+ { -+ .name = "L2D_CACHE_REFILL_DM", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0300, -+ .desc = "This event counts L2D_CACHE_REFILL caused by demand access.", -+ }, -+ { -+ .name = "L2D_CACHE_REFILL_HWPRF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0302, -+ .desc = "This event counts L2D_CACHE_REFILL caused by hardware prefetch.", -+ }, -+ { -+ .name = "L2_MISS_WAIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0308, -+ .desc = "This event counts outstanding L2 cache miss requests per cycle. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "L2_MISS_COUNT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0309, -+ .desc = "This event counts the number of times of L2 cache miss. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_READ_TOTAL_CMG0", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0310, -+ .desc = "This event counts read requests from CMG0 to measured CMG, if measured CMG is not CMG0. Otherwise, this event counts read requests from CMG0 local memory to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_READ_TOTAL_CMG1", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0311, -+ .desc = "This event counts read requests from CMG1 to measured CMG, if measured CMG is not CMG1. Otherwise, this event counts read requests from CMG1 local memory to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_READ_TOTAL_CMG2", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0312, -+ .desc = "This event counts read requests from CMG2 to measured CMG, if measured CMG is not CMG2. Otherwise, this event counts read requests from CMG2 local memory to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_READ_TOTAL_CMG3", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0313, -+ .desc = "This event counts read requests from CMG3 to measured CMG, if measured CMG is not CMG3. Otherwise, this event counts read requests from CMG3 local memory to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_READ_TOTAL_TOFU", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0314, -+ .desc = "This event counts read requests from tofu controller to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_READ_TOTAL_PCI", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0315, -+ .desc = "This event counts read requests from PCI controller to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_READ_TOTAL_MEM", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0316, -+ .desc = "This event counts read requests from measured CMG local memory to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_WRITE_TOTAL_CMG0", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0318, -+ .desc = "This event counts write requests from measured CMG to CMG0, if measured CMG is not CMG0. Otherwise, this event counts write requests from measured CMG to CMG0 local memory. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_WRITE_TOTAL_CMG1", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0319, -+ .desc = "This event counts write requests from measured CMG to CMG1, if measured CMG is not CMG1. Otherwise, this event counts write requests from measured CMG to CMG1 local memory. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_WRITE_TOTAL_CMG2", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x031a, -+ .desc = "This event counts write requests from measured CMG to CMG2, if measured CMG is not CMG2. Otherwise, this event counts write requests from measured CMG to CMG2 local memory. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_WRITE_TOTAL_CMG3", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x031b, -+ .desc = "This event counts write requests from measured CMG to CMG3, if measured CMG is not CMG3. Otherwise, this event counts write requests from measured CMG to CMG3 local memory. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_WRITE_TOTAL_TOFU", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x031c, -+ .desc = "This event counts write requests from measured CMG to tofu controller. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_WRITE_TOTAL_PCI", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x031d, -+ .desc = "This event counts write requests from measured CMG to PCI controller. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "BUS_WRITE_TOTAL_MEM", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x031e, -+ .desc = "This event counts write requests from measured CMG to measured CMG local memory. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "L2D_SWAP_DM", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0325, -+ .desc = "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.", -+ }, -+ { -+ .name = "L2D_CACHE_MIBMCH_PRF", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0326, -+ .desc = "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.", -+ }, -+ { -+ .name = "L2_PIPE_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0330, -+ .desc = "This event counts valid cycles of L2 cache pipeline. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "L2_PIPE_COMP_ALL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0350, -+ .desc = "This event counts completed requests in L2 cache pipeline. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "L2_PIPE_COMP_PF_L2MIB_MCH", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0370, -+ .desc = "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "L2D_CACHE_SWAP_LOCAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x0396, -+ .desc = "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "EA_L2", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x03e0, -+ .desc = "This event counts energy consumption per cycle of L2 cache. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "EA_MEMORY", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x03e8, -+ .desc = "This event counts energy consumption per cycle of CMG local memory. It counts all events caused in measured CMG regardless of measured PE.", -+ }, -+ { -+ .name = "SIMD_INST_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8000, -+ .desc = "This event counts architecturally executed SIMD instructions, excluding the Advanced SIMD scalar instructions and the instructions listed in Non-SIMD SVE instructions section of SVE Reference Manual.", -+ }, -+ { -+ .name = "SVE_INST_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8002, -+ .desc = "This event counts architecturally executed Advanced SIMD instructions, including the instructions listed in Non-SIMD SVE instructions section of SVE Reference Manual.", -+ }, -+ { -+ .name = "UOP_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8008, -+ .desc = "This event counts all architecturally executed micro-operations.", -+ }, -+ { -+ .name = "SVE_MATH_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x800e, -+ .desc = "This event counts architecturally executed math function operations due to the SVE FTSMUL, FTMAD, FTSSEL, and FEXPA instructions.", -+ }, -+ { -+ .name = "FP_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8010, -+ .desc = "This event counts architecturally executed operations due to scalar, Advanced SIMD, and SVE instructions listed in Floating-point instructions section of SVE Reference Manual.", -+ }, -+ { -+ .name = "FP_FMA_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8028, -+ .desc = "This event counts architecturally executed floating-point fused multiply-add and multiply-subtract operations.", -+ }, -+ { -+ .name = "FP_RECPE_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8034, -+ .desc = "This event counts architecturally executed floating-point reciprocal estimate operations due to the Advanced SIMD scalar, Advanced SIMD vector, and SVE FRECPE and FRSQRTE instructions.", -+ }, -+ { -+ .name = "FP_CVT_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8038, -+ .desc = "This event counts architecturally executed floating-point convert operations due to the scalar, Advanced SIMD, and SVE floating-point conversion instructions listed in Floating-point conversions section of SVE Reference Manual.", -+ }, -+ { -+ .name = "ASE_SVE_INT_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8043, -+ .desc = "This event counts architecturally executed integer arithmetic operations due to Advanced SIMD and SVE data-processing instructions listed in Integer instructions section of SVE Reference Manual.", -+ }, -+ { -+ .name = "SVE_PRED_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8074, -+ .desc = "This event counts architecturally executed SIMD data-processing and load/store operations due to SVE instructions with a Governing predicate operand that determines the Active elements.", -+ }, -+ { -+ .name = "SVE_MOVPRFX_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x807c, -+ .desc = "This event counts architecturally executed operations due to MOVPRFX instructions, whether or not they are fused with the prefixed instruction.", -+ }, -+ { -+ .name = "SVE_MOVPRFX_U_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x807f, -+ .desc = "This event counts architecturally executed operations due to MOVPRFX instructions that are not fused with the prefixed instruction.", -+ }, -+ { -+ .name = "ASE_SVE_LD_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8085, -+ .desc = "This event counts architecturally executed operations that read from memory due to SVE and Advanced SIMD load instructions.", -+ }, -+ { -+ .name = "ASE_SVE_ST_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8086, -+ .desc = "This event counts architecturally executed operations that write to memory due to SVE and Advanced SIMD store instructions.", -+ }, -+ { -+ .name = "PRF_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8087, -+ .desc = "This event counts architecturally executed prefetch operations due to scalar PRFM and SVE PRF instructions.", -+ }, -+ { -+ .name = "BASE_LD_REG_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8089, -+ .desc = "This event counts architecturally executed operations that read from memory due to an instruction that loads a general-purpose register.", -+ }, -+ { -+ .name = "BASE_ST_REG_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x808a, -+ .desc = "This event counts architecturally executed operations that write to memory due to an instruction that stores a general-purpose register, excluding the 'DC ZVA' instruction.", -+ }, -+ { -+ .name = "SVE_LDR_REG_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8091, -+ .desc = "This event counts architecturally executed operations that read from memory due to an SVE LDR instruction.", -+ }, -+ { -+ .name = "SVE_STR_REG_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8092, -+ .desc = "This event counts architecturally executed operations that write to memory due to an SVE STR instruction.", -+ }, -+ { -+ .name = "SVE_LDR_PREG_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8095, -+ .desc = "This event counts architecturally executed operations that read from memory due to an SVE LDR (predicate) instruction.", -+ }, -+ { -+ .name = "SVE_STR_PREG_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x8096, -+ .desc = "This event counts architecturally executed operations that write to memory due to an SVE STR (predicate) instruction.", -+ }, -+ { -+ .name = "SVE_PRF_CONTIG_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x809f, -+ .desc = "This event counts architecturally executed operations that prefetch memory due to an SVE predicated single contiguous element prefetch instruction.", -+ }, -+ { -+ .name = "ASE_SVE_LD_MULTI_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80a5, -+ .desc = "This event counts architecturally executed operations that read from memory due to SVE and Advanced SIMD multiple vector contiguous structure load instructions.", -+ }, -+ { -+ .name = "ASE_SVE_ST_MULTI_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80a6, -+ .desc = "This event counts architecturally executed operations that write to memory due to SVE and Advanced SIMD multiple vector contiguous structure store instructions.", -+ }, -+ { -+ .name = "SVE_LD_GATHER_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80ad, -+ .desc = "This event counts architecturally executed operations that read from memory due to SVE noncontiguous gather-load instructions.", -+ }, -+ { -+ .name = "SVE_ST_SCATTER_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80ae, -+ .desc = "This event counts architecturally executed operations that write to memory due to SVE noncontiguous scatter-store instructions.", -+ }, -+ { -+ .name = "SVE_PRF_GATHER_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80af, -+ .desc = "This event counts architecturally executed operations that prefetch memory due to SVE noncontiguous gather-prefetch instructions.", -+ }, -+ { -+ .name = "SVE_LDFF_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80bc, -+ .desc = "This event counts architecturally executed memory read operations due to SVE First-fault and Non-fault load instructions.", -+ }, -+ { -+ .name = "FP_SCALE_OPS_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80c0, -+ .desc = "This event counts architecturally executed SVE arithmetic operations. This event counter is incremented by (128 / CSIZE) and by twice that amount for operations that would also be counted by SVE_FP_FMA_SPEC.", -+ }, -+ { -+ .name = "FP_FIXED_OPS_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80c1, -+ .desc = "This event counts architecturally executed v8SIMD and FP arithmetic operations. The event counter is incremented by the specified number of elements for Advanced SIMD operations or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC.", -+ }, -+ { -+ .name = "FP_HP_SCALE_OPS_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80c2, -+ .desc = "This event counts architecturally executed SVE half-precision arithmetic operations. This event counter is incremented by 8, or by 16 for operations that would also be counted by SVE_FP_FMA_SPEC.", -+ }, -+ { -+ .name = "FP_HP_FIXED_OPS_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80c3, -+ .desc = "This event counts architecturally executed v8SIMD and FP half-precision arithmetic operations. This event counter is incremented by the number of 16-bit elements for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC.", -+ }, -+ { -+ .name = "FP_SP_SCALE_OPS_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80c4, -+ .desc = "This event counts architecturally executed SVE single-precision arithmetic operations. This event counter is incremented by 4, or by 8 for operations that would also be counted by SVE_FP_FMA_SPEC.", -+ }, -+ { -+ .name = "FP_SP_FIXED_OPS_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80c5, -+ .desc = "This event counts architecturally executed v8SIMD and FP single-precision arithmetic operations. This event counter is incremented by the number of 32-bit elements for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC.", -+ }, -+ { -+ .name = "FP_DP_SCALE_OPS_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80c6, -+ .desc = "This event counts architecturally executed SVE double-precision arithmetic operations. This event counter is incremented by 2, or by 4 for operations that would also be counted by SVE_FP_FMA_SPEC.", -+ }, -+ { -+ .name = "FP_DP_FIXED_OPS_SPEC", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x80c7, -+ .desc = "This event counts architecturally executed v8SIMD and FP double-precision arithmetic operations. This event counter is incremented by 2 for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC.", -+ }, -+}; -diff --git a/lib/pfmlib_arm_armv8.c b/lib/pfmlib_arm_armv8.c -index a252951..26b3f8c 100644 ---- a/lib/pfmlib_arm_armv8.c -+++ b/lib/pfmlib_arm_armv8.c -@@ -35,6 +35,7 @@ - #include "events/arm_xgene_events.h" /* Applied Micro X-Gene tables */ - #include "events/arm_cavium_tx2_events.h" /* Marvell ThunderX2 tables */ - #include "events/arm_marvell_tx2_unc_events.h" /* Marvell ThunderX2 PMU tables */ -+#include "events/arm_fujitsu_a64fx_events.h" /* Fujitsu A64FX PMU tables */ - - static int - pfm_arm_detect_cortex_a57(void *this) -@@ -104,6 +105,22 @@ pfm_arm_detect_thunderx2(void *this) - return PFM_ERR_NOTSUPP; - } - -+static int -+pfm_arm_detect_a64fx(void *this) -+{ -+ int ret; -+ -+ ret = pfm_arm_detect(this); -+ if (ret != PFM_SUCCESS) -+ return PFM_ERR_NOTSUPP; -+ -+ if ((pfm_arm_cfg.implementer == 0x46) && /* Fujitsu */ -+ (pfm_arm_cfg.part == 0x001)) { /* a64fx */ -+ return PFM_SUCCESS; -+ } -+ return PFM_ERR_NOTSUPP; -+} -+ - /* ARM Cortex A57 support */ - pfmlib_pmu_t arm_cortex_a57_support={ - .desc = "ARM Cortex A57", -@@ -204,6 +221,31 @@ pfmlib_pmu_t arm_thunderx2_support={ - .get_event_nattrs = pfm_arm_get_event_nattrs, - }; - -+/* Fujitsu A64FX support */ -+pfmlib_pmu_t arm_fujitsu_a64fx_support={ -+ .desc = "Fujitsu A64FX", -+ .name = "arm_a64fx", -+ .pmu = PFM_PMU_ARM_A64FX, -+ .pme_count = LIBPFM_ARRAY_SIZE(arm_a64fx_pe), -+ .type = PFM_PMU_TYPE_CORE, -+ .pe = arm_a64fx_pe, -+ -+ .pmu_detect = pfm_arm_detect_a64fx, -+ .max_encoding = 1, -+ .num_cntrs = 6, -+ -+ .get_event_encoding[PFM_OS_NONE] = pfm_arm_get_encoding, -+ PFMLIB_ENCODE_PERF(pfm_arm_get_perf_encoding), -+ .get_event_first = pfm_arm_get_event_first, -+ .get_event_next = pfm_arm_get_event_next, -+ .event_is_valid = pfm_arm_event_is_valid, -+ .validate_table = pfm_arm_validate_table, -+ .get_event_info = pfm_arm_get_event_info, -+ .get_event_attr_info = pfm_arm_get_event_attr_info, -+ PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), -+ .get_event_nattrs = pfm_arm_get_event_nattrs, -+}; -+ - // For uncore, each socket has a separate perf name, otherwise they are the same, use macro - - #define DEFINE_TX2_DMC(n) \ -diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c -index 335155e..102db37 100644 ---- a/lib/pfmlib_common.c -+++ b/lib/pfmlib_common.c -@@ -494,6 +494,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= - &arm_thunderx2_llc1_support, - &arm_thunderx2_ccpi0_support, - &arm_thunderx2_ccpi1_support, -+ &arm_fujitsu_a64fx_support, - #endif - #ifdef CONFIG_PFMLIB_ARCH_ARM64 - &arm_cortex_a57_support, -@@ -506,6 +507,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= - &arm_thunderx2_llc1_support, - &arm_thunderx2_ccpi0_support, - &arm_thunderx2_ccpi1_support, -+ &arm_fujitsu_a64fx_support, - #endif - - #ifdef CONFIG_PFMLIB_ARCH_S390X -diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h -index 845a13e..237e8d6 100644 ---- a/lib/pfmlib_priv.h -+++ b/lib/pfmlib_priv.h -@@ -662,6 +662,8 @@ extern pfmlib_pmu_t arm_thunderx2_llc1_support; - extern pfmlib_pmu_t arm_thunderx2_ccpi0_support; - extern pfmlib_pmu_t arm_thunderx2_ccpi1_support; - -+extern pfmlib_pmu_t arm_fujitsu_a64fx_support; -+ - extern pfmlib_pmu_t mips_74k_support; - extern pfmlib_pmu_t s390x_cpum_cf_support; - extern pfmlib_pmu_t s390x_cpum_sf_support; -diff --git a/tests/validate_arm64.c b/tests/validate_arm64.c -index 5cb1966..96060bb 100644 ---- a/tests/validate_arm64.c -+++ b/tests/validate_arm64.c -@@ -198,6 +198,27 @@ static const test_event_t arm64_test_events[]={ - .codes[0] = 0xd, - .fstr = "tx2_llc0::UNC_LLC_READ", - }, -+ { SRC_LINE, -+ .name = "arm_a64fx::CPU_CYCLES", -+ .ret = PFM_SUCCESS, -+ .count = 1, -+ .codes[0] = 0x8000011, -+ .fstr = "arm_a64fx::CPU_CYCLES:k=1:u=1:hv=0", -+ }, -+ { SRC_LINE, -+ .name = "arm_a64fx::CPU_CYCLES:k", -+ .ret = PFM_SUCCESS, -+ .count = 1, -+ .codes[0] = 0x88000011, -+ .fstr = "arm_a64fx::CPU_CYCLES:k=1:u=0:hv=0", -+ }, -+ { SRC_LINE, -+ .name = "arm_a64fx::INST_RETIRED", -+ .ret = PFM_SUCCESS, -+ .count = 1, -+ .codes[0] = 0x8000008, -+ .fstr = "arm_a64fx::INST_RETIRED:k=1:u=1:hv=0", -+ }, - }; - #define NUM_TEST_EVENTS (int)(sizeof(arm64_test_events)/sizeof(test_event_t)) - -commit dfe30a72c18dc64ea8e55c469a9adcfec9c09340 -Author: Stephane Eranian -Date: Wed Sep 23 16:53:53 2020 -0700 - - install Fujitsu A64FX man page in ARM64 mode - - This patch corrects the documentation Makefile to install - the libpfm_a64fx.3 man page when bulding for ARM64. Otherwise - the man page woul only be installed in ARM (32-bit) mode. - - Reported-by: William Cohen - Signed-off-by: Stephane Eranian - -diff --git a/docs/Makefile b/docs/Makefile -index f8beebc..f5c0935 100644 ---- a/docs/Makefile -+++ b/docs/Makefile -@@ -131,6 +131,7 @@ ifeq ($(CONFIG_PFMLIB_ARCH_ARM64),y) - ARCH_MAN += libpfm_arm_xgene.3 \ - libpfm_arm_ac57.3 \ -- libpfm_arm_ac53.3 -+ libpfm_arm_ac53.3 \ -+ libpfm_arm_a64fx.3 - endif - - ifeq ($(CONFIG_PFMLIB_ARCH_MIPS),y) -commit f6500e77563e606c8510ff26f57d321328bd8157 -Author: Masahiko, Yamada -Date: Wed Jan 27 20:12:59 2021 +0900 - - Changing the number of PMU counters and deleting the ARM(32-bit) mode for A64FX - - The current libpfm4 implementation treats PMCR_EL0.N = 0x6 like other ARM Reference processors. - On an A64FX, PMCR_EL0.N = 0x8 (The number of PMU counters is 8.). - Therefore, only 6 counters are available in the current implementation. - The A64FX core also supports the AArch64 state and the A64 Instruction set. - The AArch32 state and the A32, T32 Instruction set are not supported and cannot be transitioned to this Execution state. - Currently, the libpfm manual(docs/man3/libpfm_arm_a64fx.3) states that A32/A64 can be used, but A32 cannot be used. - - I have created a patch with the above fixes, so please review and merge it. - - Originally, the specification of the A64FX which Fujitsu published should have described the above two points, - but the description was omitted. - A64FX Specification HPC Extension v1.1 will add:. - - On a A64FX, PMCR_EL0.N = 0x8 (The number of PMU counters is 8.). - - A64FX does not support the AArch32 state and the A32, T32 Instruction set and cannot transition to this Execution state. - -diff --git a/docs/Makefile b/docs/Makefile -index f5c0935..e124747 100644 ---- a/docs/Makefile -+++ b/docs/Makefile -@@ -123,8 +123,7 @@ ARCH_MAN += libpfm_arm_xgene.3 \ - libpfm_arm_ac15.3 \ - libpfm_arm_ac8.3 \ - libpfm_arm_ac9.3 \ -- libpfm_arm_qcom_krait.3 \ -- libpfm_arm_a64fx.3 -+ libpfm_arm_qcom_krait.3 - endif - - ifeq ($(CONFIG_PFMLIB_ARCH_ARM64),y) -diff --git a/docs/man3/libpfm_arm_a64fx.3 b/docs/man3/libpfm_arm_a64fx.3 -index 6214eb7..587eea6 100644 ---- a/docs/man3/libpfm_arm_a64fx.3 -+++ b/docs/man3/libpfm_arm_a64fx.3 -@@ -11,8 +11,8 @@ libpfm_arm_a64fx - support for Fujitsu A64FX PMU - .SH DESCRIPTION - The library supports the Fujitsu A64FX core PMU. - --This PMU supports 6 counters and privilege levels filtering. --It can operate in both 32 and 64 bit modes. -+This PMU supports 8 counters and privilege levels filtering. -+It can operate in 64 bit mode only. - - .SH MODIFIERS - The following modifiers are supported on Fujitsu A64FX: -diff --git a/lib/Makefile b/lib/Makefile -index 483e0e1..7afe411 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -371,7 +371,8 @@ INC_ARM64=events/arm_cortex_a57_events.h \ - INC_ARM64=events/arm_cortex_a57_events.h \ - events/arm_cortex_a53_events.h \ - events/arm_cavium_tx2_events.h \ -- events/arm_marvell_tx2_unc_events.h -+ events/arm_marvell_tx2_unc_events.h \ -+ events/arm_fujitsu_a64fx_events.h \ - - INCDEP=$(INC_COMMON) $(INCARCH) - -diff --git a/lib/pfmlib_arm_armv8.c b/lib/pfmlib_arm_armv8.c -index e147d6c..00e2e3f 100644 ---- a/lib/pfmlib_arm_armv8.c -+++ b/lib/pfmlib_arm_armv8.c -@@ -254,7 +254,7 @@ pfmlib_pmu_t arm_fujitsu_a64fx_support={ - - .pmu_detect = pfm_arm_detect_a64fx, - .max_encoding = 1, -- .num_cntrs = 6, -+ .num_cntrs = 8, - - .get_event_encoding[PFM_OS_NONE] = pfm_arm_get_encoding, - PFMLIB_ENCODE_PERF(pfm_arm_get_perf_encoding), diff --git a/libpfm-amd_merge.patch b/libpfm-amd_merge.patch deleted file mode 100644 index 40841a5..0000000 --- a/libpfm-amd_merge.patch +++ /dev/null @@ -1,56 +0,0 @@ -commit afa8700ce726153fa1547ce118f7f9b029e675c9 -Author: Stephane Eranian -Date: Wed Jan 16 13:23:11 2019 -0800 - - clarify description of AMD Fam17h MERGE event - - This is a special event used when merging two counters together. - It needs to be programmed on the od-numbered performance counter. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h.h b/lib/events/amd64_events_fam17h.h -index 4f865b9..b0db898 100644 ---- a/lib/events/amd64_events_fam17h.h -+++ b/lib/events/amd64_events_fam17h.h -@@ -954,7 +954,7 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .umasks = amd64_fam17h_tablewalker_allocation, - }, - { .name = "MERGE", -- .desc = "See .", -+ .desc = "Merge two counters together. This event must be programmed on the odd performance counter", - .modmsk = AMD64_FAM17H_ATTRS, - .code = 0xfff, - .flags = 0, - -commit ac95d1957771d527f7bf8d11a97fb10ee223d6c0 -Author: Stephane Eranian -Date: Thu Mar 14 15:19:13 2019 -0700 - - remove MERGE event from AMD Fam17h table - - The MERGE event is not measuring an actual micro-architectural event. Instead - it is used to fuse two consecutive counters together to get a wider counter. - This should be used cautiously and is not currently compatible with Linux - perf_events interface and implementation. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h.h b/lib/events/amd64_events_fam17h.h -index b0db898..deadbed 100644 ---- a/lib/events/amd64_events_fam17h.h -+++ b/lib/events/amd64_events_fam17h.h -@@ -953,13 +953,6 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_tablewalker_allocation), - .umasks = amd64_fam17h_tablewalker_allocation, - }, -- { .name = "MERGE", -- .desc = "Merge two counters together. This event must be programmed on the odd performance counter", -- .modmsk = AMD64_FAM17H_ATTRS, -- .code = 0xfff, -- .flags = 0, -- .ngrp = 0, -- }, - { .name = "L1_BTB_CORRECTION", - .desc = "TBD", - .modmsk = AMD64_FAM17H_ATTRS, diff --git a/libpfm-flags.patch b/libpfm-flags.patch deleted file mode 100644 index b68ab95..0000000 --- a/libpfm-flags.patch +++ /dev/null @@ -1,184 +0,0 @@ -commit 20bd642e0ebc2c1d29e39417ee4665271df43d93 -Author: Stephane Eranian -Date: Thu Nov 7 10:47:02 2019 -0800 - - add support for speculation event information - - This patch extends the information returned by get_event_info() - with speculation information. Some events can include occurrences - happening during speculative execution. This is important information - because it impacts the cost associated with such event. The - pfm_event_info_t struct is extended with a 2-bit field describing - whether or not the event counts during speculation. - - Given that the speculation information is not always available from - hardware vendors, the field can have 3 values: - - PFM_EVENT_SPEC_INFO_NA: no information available (default) - - PFM_EVENT_INFO_SPEC_TRUE: event includes speculative execution - - PFM_EVENT_INFO_SPEC_FALSE: evnet does not include speculative execution - - Signed-off-by: Stephane Eranian - -diff --git a/docs/man3/pfm_get_event_info.3 b/docs/man3/pfm_get_event_info.3 -index 7eaa6cf..a8c7546 100644 ---- a/docs/man3/pfm_get_event_info.3 -+++ b/docs/man3/pfm_get_event_info.3 -@@ -85,6 +85,16 @@ field means that at least one umask supports precise sampling. On Intel X86 - processors, this indicates whether the event supports Precise Event-Based - Sampling (PEBS). - .PP -+.TP -+.B is_speculative -+This bitfield indicates whether or not the event includes occurrences happening -+during speculative execution for both wrong and correct path. Given that this -+kind of event information is not always available from vendors, this field uses -+multiple bits. A value of \fBPFM_EVENT_INFO_SPEC_NA\fR indicates that speculation -+information is not available. A value of \fBPFM_EVENT_INFO_SPEC_TRUE\fR indicates -+that the event count during speculative execution. A value of \fBPFM_EVENT_INFO_SPEC_FALS\fR -+indicates that the event does not count during speculative execution. -+.PP - - The \fBpfm_os_t\fR enumeration provides the following choices: - .TP -diff --git a/examples/showevtinfo.c b/examples/showevtinfo.c -index 40966ac..44e958e 100644 ---- a/examples/showevtinfo.c -+++ b/examples/showevtinfo.c -@@ -376,11 +376,16 @@ static void - print_event_flags(pfm_event_info_t *info) - { - int n = 0; -+ int spec = info->is_speculative; - - if (info->is_precise) { - printf("[precise] "); - n++; - } -+ if (spec > PFM_EVENT_INFO_SPEC_NA) { -+ printf("[%s] ", spec == PFM_EVENT_INFO_SPEC_TRUE ? "speculative" : "non-speculative"); -+ n++; -+ } - if (!n) - printf("None"); - } -diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h -index bd6f935..09c673d 100644 ---- a/include/perfmon/pfmlib.h -+++ b/include/perfmon/pfmlib.h -@@ -645,6 +645,12 @@ typedef struct { - } SWIG_NAME(flags); - } pfm_pmu_info_t; - -+typedef enum { -+ PFM_EVENT_INFO_SPEC_NA = 0, -+ PFM_EVENT_INFO_SPEC_TRUE = 1, -+ PFM_EVENT_INFO_SPEC_FALSE = 2, -+} pfm_event_info_spec_t; -+ - typedef struct { - const char *name; /* event name */ - const char *desc; /* event description */ -@@ -657,8 +663,9 @@ typedef struct { - int nattrs; /* number of attributes */ - int reserved; /* for future use */ - struct { -- unsigned int is_precise:1; /* precise sampling (Intel X86=PEBS) */ -- unsigned int reserved_bits:31; -+ unsigned int is_precise:1; /* precise sampling (Intel X86=PEBS) */ -+ unsigned int is_speculative:2;/* count correct and wrong path occurrences */ -+ unsigned int reserved_bits:29; - } SWIG_NAME(flags); - } pfm_event_info_t; - -diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c -index 688edb6..2b6cbb4 100644 ---- a/lib/pfmlib_common.c -+++ b/lib/pfmlib_common.c -@@ -1951,7 +1951,8 @@ pfm_get_event_info(int idx, pfm_os_t os, pfm_event_info_t *uinfo) - info.dtype = PFM_DTYPE_UINT64; - - /* reset flags */ -- info.is_precise = 0; -+ info.is_precise = 0; -+ info.is_speculative = PFM_EVENT_INFO_SPEC_NA; - - ret = pmu->get_event_info(pmu, pidx, &info); - if (ret != PFM_SUCCESS) -diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h -index fe13351..b0070a6 100644 ---- a/lib/pfmlib_priv.h -+++ b/lib/pfmlib_priv.h -@@ -186,6 +186,7 @@ typedef struct { - #define PFMLIB_PMU_FL_RAW_UMASK 0x4 /* PMU supports PFM_ATTR_RAW_UMASKS */ - #define PFMLIB_PMU_FL_ARCH_DFL 0x8 /* PMU is arch default */ - #define PFMLIB_PMU_FL_NO_SMPL 0x10 /* PMU does not support sampling */ -+#define PFMLIB_PMU_FL_SPEC 0x20 /* PMU provides event speculation info */ - - typedef struct { - int initdone; -commit fb31170eab2d62d6cb182f14df3a6d8e065303d2 -Author: Stephane Eranian -Date: Thu Dec 19 16:13:16 2019 -0800 - - add PFMLIB_PMU_FL_DEPR flag - - To mark a PMU model as deprecated. This is useful when a PMU model - is superseded by another one, yet the obsolete model must remain - for backward compatibility reason. - - The ensures that a fully qualified event string with the old pmu - name will still be accepted. But when running on the matching - CPU model, the new PMU model will be selected by default when - the pmu model name is not specified. - - Example: when running on pmu_old PMU model: - - pmu_old::cycles is still accepted - - pmu_new::cycles is accepted - - cycles is mapped to pmu_new::cycles - - Signed-off-by: Stephane Eranian - -diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c -index 8cb8998..31d16e9 100644 ---- a/lib/pfmlib_common.c -+++ b/lib/pfmlib_common.c -@@ -712,6 +712,12 @@ pfmlib_pmu_active(pfmlib_pmu_t *pmu) - return !!(pmu->flags & PFMLIB_PMU_FL_ACTIVE); - } - -+static inline int -+pfmlib_pmu_deprecated(pfmlib_pmu_t *pmu) -+{ -+ return !!(pmu->flags & PFMLIB_PMU_FL_DEPR); -+} -+ - static inline int - pfmlib_pmu_initialized(pfmlib_pmu_t *pmu) - { -@@ -1495,6 +1501,14 @@ pfmlib_parse_event(const char *event, pfmlib_event_desc_t *d) - */ - if (!pname && !pfmlib_pmu_active(pmu)) - continue; -+ -+ /* -+ * if the PMU name is not passed, then if -+ * the pmu is deprecated, then skip it. It means -+ * there is a better candidate in the active list -+ */ -+ if (!pname && pfmlib_pmu_deprecated(pmu)) -+ continue; - /* - * check for requested PMU name, - */ -diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h -index 1340a6b..5cddc9c 100644 ---- a/lib/pfmlib_priv.h -+++ b/lib/pfmlib_priv.h -@@ -187,6 +187,7 @@ typedef struct { - #define PFMLIB_PMU_FL_ARCH_DFL 0x8 /* PMU is arch default */ - #define PFMLIB_PMU_FL_NO_SMPL 0x10 /* PMU does not support sampling */ - #define PFMLIB_PMU_FL_SPEC 0x20 /* PMU provides event speculation info */ -+#define PFMLIB_PMU_FL_DEPR 0x40 /* PMU model is deprecated */ - - typedef struct { - int initdone; diff --git a/libpfm-initp.patch b/libpfm-initp.patch new file mode 100644 index 0000000..6796191 --- /dev/null +++ b/libpfm-initp.patch @@ -0,0 +1,31 @@ +commit 874feacbbe97fe567d3d8b1582d881d1b424dd5e +Author: William Cohen +Date: Fri Apr 14 16:07:07 2023 -0400 + + Make sure that p is set to a known value before using. + + Need to ensure that p was initialized at the start of function + gen_tracepoint_table otherwise on some architectures such as s390x + will get the following error when compiling with -Werror: + + make[1]: Entering directory '/root/rpmbuild/BUILD/libpfm-4.13.0/lib' + cc -O2 -flto=auto -ffat-lto-objects -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -march=z14 -mtune=z15 -fasynchronous-unwind-tables -fstack-clash-protection -g -Wall -Werror -Wextra -Wno-unused-parameter -I. -I/root/rpmbuild/BUILD/libpfm-4.13.0/lib/../include -DCONFIG_PFMLIB_DEBUG -DCONFIG_PFMLIB_OS_LINUX -D_REENTRANT -I. -fvisibility=hidden -DCONFIG_PFMLIB_ARCH_S390X -I. -c pfmlib_perf_event_pmu.c + pfmlib_perf_event_pmu.c: In function 'gen_tracepoint_table': + pfmlib_perf_event_pmu.c:434:35: error: 'p' may be used uninitialized in this function [-Werror=maybe-uninitialized] + 434 | p->modmsk = 0; + | ~~~~~~~~~~^~~ + cc1: all warnings being treated as errors + +diff --git a/lib/pfmlib_perf_event_pmu.c b/lib/pfmlib_perf_event_pmu.c +index 637c5b1..8f7d7d1 100644 +--- a/lib/pfmlib_perf_event_pmu.c ++++ b/lib/pfmlib_perf_event_pmu.c +@@ -361,7 +361,7 @@ gen_tracepoint_table(void) + { + DIR *dir1, *dir2; + struct dirent *d1, *d2; +- perf_event_t *p; ++ perf_event_t *p = NULL; + perf_umask_t *um; + char d2path[MAXPATHLEN]; + char idpath[MAXPATHLEN]; diff --git a/libpfm-tx2.patch b/libpfm-tx2.patch deleted file mode 100644 index d730c43..0000000 --- a/libpfm-tx2.patch +++ /dev/null @@ -1,1341 +0,0 @@ -commit 6c9e44b95a55b8bf62cbd64009c4c9b30964a66c -Author: Steve Walk -Date: Tue Mar 20 09:37:56 2018 -0700 - - update Cavium ThunderX2 with now public events - - This patch adds new model specific events to the - Cavium Thunder X2 core PMU. The updated list is based - on publicly available documentation from Cavium which - is available at: - https://cavium.com/resources.html - - Signed-off-by: Steve Walk - -diff --git a/lib/events/arm_cavium_tx2_events.h b/lib/events/arm_cavium_tx2_events.h -index 67de9f8..198d33d 100644 ---- a/lib/events/arm_cavium_tx2_events.h -+++ b/lib/events/arm_cavium_tx2_events.h -@@ -23,6 +23,9 @@ - * - * ARM Architecture Reference Manual, ARMv8, for ARMv8-A architecture profile, - * ARM DDI 0487B.a (ID033117) -+ * -+ * Cavium ThunderX2 C99XX PMU Events (Abridged), July 31, 2018 -+ * https://cavium.com/resources.html - */ - - static const arm_entry_t arm_thunderx2_pe[]={ -@@ -161,6 +164,11 @@ static const arm_entry_t arm_thunderx2_pe[]={ - .code = 0x1C, - .desc = "Instruction architecturally executed (condition check pass) Write to translation table base" - }, -+ {.name = "CHAIN", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x1E, -+ .desc = "For odd-numbered counters, increments the count by one for each overflow of the proceeding even counter" -+ }, - {.name = "L1D_CACHE_ALLOCATE", - .modmsk = ARMV8_ATTRS, - .code = 0x1F, -@@ -556,6 +564,274 @@ static const arm_entry_t arm_thunderx2_pe[]={ - .code = 0x91, - .desc = "Release consistency instruction speculatively executed (store-release)" - }, -- -- /* END Cavium ThunderX2 specific events */ -+ {.name = "L1D_LHS_VANOTP", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xC1, -+ .desc = "A Load hit store retry" -+ }, -+ {.name = "L1D_LHS_OVRLAP", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xC2, -+ .desc = "A Load hit store retry, VA match, PA mismatch" -+ }, -+ {.name = "L1D_LHS_VANOSD", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xC3, -+ .desc = "A Load hit store retry, VA match, store data not issued" -+ }, -+ {.name = "L1D_LHS_FWD", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xC4, -+ .desc = "A Load hit store forwarding. Load completes" -+ }, -+ {.name = "L1D_BNKCFL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xC6, -+ .desc = "Bank conflict load retry" -+ }, -+ {.name = "L1D_LSMQ_FULL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xC7, -+ .desc = "LSMQ retry" -+ }, -+ {.name = "L1D_LSMQ_HIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xC8, -+ .desc = "LSMQ hit retry" -+ }, -+ {.name = "L1D_EXPB_MISS", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xC9, -+ .desc = "An external probe missed the L1" -+ }, -+ {.name = "L1D_L2EV_MISS", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xCA, -+ .desc = "An L2 evict operation missed the L1" -+ }, -+ {.name = "L1D_EXPB_HITM", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xCB, -+ .desc = "An external probe hit a modified line in the L1" -+ }, -+ {.name = "L1D_L2EV_HITM", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xCC, -+ .desc = "An L2 evict operation hit a modified line in the L1" -+ }, -+ {.name = "L1D_EXPB_HIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xCD, -+ .desc = "An external probe hit in the L1" -+ }, -+ {.name = "L1D_L2EV_HIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xCE, -+ .desc = "An L2 evict operation hit in the L1" -+ }, -+ {.name = "L1D_EXPB_RETRY", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xCF, -+ .desc = "An external probe hit was retried" -+ }, -+ {.name = "L1D_L2EV_RETRY", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xD0, -+ .desc = "An L2 evict operation was retried" -+ }, -+ {.name = "L1D_ST_RMW", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xD1, -+ .desc = "A read modify write store was drained and updated the L1" -+ }, -+ {.name = "L1D_LSMQ00_LDREQ", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xD2, -+ .desc = "A load has allocated LSMQ entry 0" -+ }, -+ {.name = "L1D_LSMQ00_LDVLD", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xD3, -+ .desc = "LSMQ entry 0 was initiated by a load" -+ }, -+ {.name = "L1D_LSMQ15_STREQ", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xD4, -+ .desc = "A store was allocated LSMQ entry 15" -+ }, -+ {.name = "L1D_LSMQ15_STVLD", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xD5, -+ .desc = "LSMQ entry 15 was initiated by a store" -+ }, -+ {.name = "L1D_PB_FLUSH", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xD6, -+ .desc = "LRQ ordering flush" -+ }, -+ {.name = "BR_COND_MIS_PRED_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xE0, -+ .desc = "Conditional branch instruction executed, but mis-predicted" -+ }, -+ {.name = "BR_IND_MIS_PRED_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xE1, -+ .desc = "Indirect branch instruction executed, but mis-predicted" -+ }, -+ {.name = "BR_RETURN_MIS_PRED_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xE2, -+ .desc = "Return branch instruction executed, but mis-predicted" -+ }, -+ {.name = "OP_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xE8, -+ .desc = "Uops executed" -+ }, -+ {.name = "LD_OP_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xE9, -+ .desc = "Load uops executed" -+ }, -+ {.name = "ST_OP_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xEA, -+ .desc = "Store uops executed" -+ }, -+ {.name = "FUSED_OP_RETIRED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xEB, -+ .desc = "Fused uops executed" -+ }, -+ {.name = "IRQ_MASK", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xF8, -+ .desc = "Cumulative duration of a PSTATE.I interrupt mask set to 1" -+ }, -+ {.name = "FIQ_MASK", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xF9, -+ .desc = "Cumulative duration of a PSTATE.F interrupt mask set to 1" -+ }, -+ {.name = "SERROR_MASK", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0xFA, -+ .desc = "Cumulative duration of PSTATE.A interrupt mask set to 1" -+ }, -+ {.name = "WFIWFE_SLEEP", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x108, -+ .desc = "Number of cycles in which CPU is in low power mode due to WFI/WFE instruction" -+ }, -+ {.name = "L2TLB_4K_PAGE_MISS", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x127, -+ .desc = "L2 TLB lookup miss using 4K page size" -+ }, -+ {.name = "L2TLB_64K_PAGE_MISS", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x128, -+ .desc = "L2 TLB lookup miss using 64K page size" -+ }, -+ {.name = "L2TLB_2M_PAGE_MISS", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x129, -+ .desc = "L2 TLB lookup miss using 2M page size" -+ }, -+ {.name = "L2TLB_512M_PAGE_MISS", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x12A, -+ .desc = "L2 TLB lookup miss using 512M page size" -+ }, -+ {.name = "ISB_EMPTY", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x150, -+ .desc = "Number of cycles during which micro-op skid-buffer is empty" -+ }, -+ {.name = "ISB_FULL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x151, -+ .desc = "Number of cycles during which micro-op skid-buffer is back-pressuring decode" -+ }, -+ {.name = "STALL_NOTSELECTED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x152, -+ .desc = "Number of cycles during which thread was available for dispatch but not selected" -+ }, -+ {.name = "ROB_RECYCLE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x153, -+ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to ROB full" -+ }, -+ {.name = "ISSQ_RECYCLE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x154, -+ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to ISSQ full" -+ }, -+ {.name = "GPR_RECYCLE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x155, -+ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to GPR full" -+ }, -+ {.name = "FPR_RECYCLE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x156, -+ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to FPR full" -+ }, -+ {.name = "LRQ_RECYCLE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x158, -+ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to LRQ full" -+ }, -+ {.name = "SRQ_RECYCLE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x159, -+ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to SRQ full" -+ }, -+ {.name = "BSR_RECYCLE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x15B, -+ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to BSR full" -+ }, -+ {.name = "UOPSFUSED", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x164, -+ .desc = "Number of fused micro-ops dispatched" -+ }, -+ {.name = "L2D_TLBI_INT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x20B, -+ .desc = "Internal mmu tlbi cacheops" -+ }, -+ {.name = "L2D_TLBI_EXT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x20C, -+ .desc = "External mmu tlbi cacheops" -+ }, -+ {.name = "L2D_HWPF_DMD_HIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x218, -+ .desc = "Scu ld/st requests that hit cache or msg for lines brought in by the hardware prefetcher" -+ }, -+ {.name = "L2D_HWPF_REQ_VAL", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x219, -+ .desc = "Scu hwpf requests into the pipeline" -+ }, -+ {.name = "L2D_HWPF_REQ_LD", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x21A, -+ .desc = "Scu hwpf ld requests into the pipeline" -+ }, -+ {.name = "L2D_HWPF_REQ_MISS", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x21B, -+ .desc = "Scu hwpf ld requests that miss" -+ }, -+ {.name = "L2D_HWPF_NEXT_LINE", -+ .modmsk = ARMV8_ATTRS, -+ .code = 0x21C, -+ .desc = "Scu hwpf next line requests generated" -+ }, - }; -From 0b050ca9ba2a2bf74f87fa3a8b4ed8aec9d1dfa8 Mon Sep 17 00:00:00 2001 -From: Shay Gal-On -Date: Wed, 23 Oct 2019 18:58:03 -0700 -Subject: [PATCH 1/4] ThunderX2 uncore support - -This patch adds ThundeX2 uncore PMUs support. - -The following uncore PMUs are added: -- tx2_llc0, tx2_llc1 (last level cache) -- tx2_dmc0, tx2_dmc1 (memory controller) - -Based on documentation available at: -https://www.marvell.com/documents/hrur6mybdvk5uki1w0z7/ - -Signed-off-by: Shay Gal-On ---- - include/perfmon/pfmlib.h | 5 ++ - lib/Makefile | 2 +- - lib/events/arm_cavium_tx2_events.h | 61 +++++++++++++ - lib/pfmlib_arm_armv8.c | 55 ++++++++++++ - lib/pfmlib_common.c | 4 + - lib/pfmlib_priv.h | 6 ++ - lib/pfmlib_tx2_unc_perf_event.c | 139 +++++++++++++++++++++++++++++ - tests/validate_arm64.c | 6 ++ - 8 files changed, 277 insertions(+), 1 deletion(-) - create mode 100644 lib/pfmlib_tx2_unc_perf_event.c - -diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h -index 09c673d..20d5feb 100644 ---- a/include/perfmon/pfmlib.h -+++ b/include/perfmon/pfmlib.h -@@ -546,6 +546,11 @@ typedef enum { - PFM_PMU_INTEL_KNM_UNC_UBOX, /* Intel Knights Mill Ubox uncore */ - PFM_PMU_INTEL_KNM_UNC_M2PCIE, /* Intel Knights Mill M2PCIe uncore */ - PFM_PMU_ARM_THUNDERX2, /* Cavium ThunderX2 */ -+ -+ PFM_PMU_ARM_THUNDERX2_DMC0, /* Cavium ThunderX2 DMC unit 0 uncore */ -+ PFM_PMU_ARM_THUNDERX2_DMC1, /* Cavium ThunderX2 DMC unit 1 uncore */ -+ PFM_PMU_ARM_THUNDERX2_LLC0, /* Cavium ThunderX2 LLC unit 0 uncore */ -+ PFM_PMU_ARM_THUNDERX2_LLC1, /* Cavium ThunderX2 LLC unit 1 uncore */ - /* MUST ADD NEW PMU MODELS HERE */ - - PFM_PMU_MAX /* end marker */ -diff --git a/lib/Makefile b/lib/Makefile -index 2eb3ebb..f45515d 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -188,7 +188,7 @@ SRCS += pfmlib_arm_perf_event.c - endif - - INCARCH = $(INC_ARM64) --SRCS += pfmlib_arm.c pfmlib_arm_armv8.c -+SRCS += pfmlib_arm.c pfmlib_arm_armv8.c pfmlib_tx2_unc_perf_event.c - CFLAGS += -DCONFIG_PFMLIB_ARCH_ARM64 - endif - -diff --git a/lib/events/arm_cavium_tx2_events.h b/lib/events/arm_cavium_tx2_events.h -index 198d33d..18d8931 100644 ---- a/lib/events/arm_cavium_tx2_events.h -+++ b/lib/events/arm_cavium_tx2_events.h -@@ -835,3 +835,64 @@ static const arm_entry_t arm_thunderx2_pe[]={ - .desc = "Scu hwpf next line requests generated" - }, - }; -+ -+#define ARM_TX2_CORE_EVENT_COUNT (sizeof(arm_thunderx2_pe)/sizeof(arm_entry_t)) -+ -+/* L3C event IDs */ -+#define L3_EVENT_READ_REQ 0xD -+#define L3_EVENT_WRITEBACK_REQ 0xE -+#define L3_EVENT_EVICT_REQ 0x13 -+#define L3_EVENT_READ_HIT 0x17 -+#define L3_EVENT_MAX 0x18 -+ -+/* DMC event IDs */ -+#define DMC_EVENT_COUNT_CYCLES 0x1 -+#define DMC_EVENT_WRITE_TXNS 0xB -+#define DMC_EVENT_DATA_TRANSFERS 0xD -+#define DMC_EVENT_READ_TXNS 0xF -+#define DMC_EVENT_MAX 0x10 -+ -+static const arm_entry_t arm_thunderx2_unc_dmc_pe[]={ -+ {.name = "UNC_DMC_READS", -+ .modmsk = ARMV8_ATTRS, -+ .code = DMC_EVENT_READ_TXNS, -+ .desc = "Memory read transactions" -+ }, -+ {.name = "UNC_DMC_WRITES", -+ .modmsk = ARMV8_ATTRS, -+ .code = DMC_EVENT_WRITE_TXNS, -+ .desc = "Memory write transactions" -+ }, -+}; -+ -+#define ARM_TX2_CORE_DMC_COUNT (sizeof(arm_thunderx2_unc_dmc_pe)/sizeof(arm_entry_t)) -+ -+static const arm_entry_t arm_thunderx2_unc_llc_pe[]={ -+ {.name = "UNC_LLC_READ", -+ .modmsk = ARMV8_ATTRS, -+ .code = L3_EVENT_READ_REQ, -+ .desc = "Read requests to LLC" -+ }, -+ {.name = "UNC_LLC_EVICT", -+ .modmsk = ARMV8_ATTRS, -+ .code = L3_EVENT_EVICT_REQ, -+ .desc = "Evict requests to LLC" -+ }, -+ {.name = "UNC_LLC_READ_HIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = L3_EVENT_READ_HIT, -+ .desc = "Read requests to LLC which hit" -+ }, -+ {.name = "UNC_LLC_WB", -+ .modmsk = ARMV8_ATTRS, -+ .code = L3_EVENT_WRITEBACK_REQ, -+ .desc = "Writeback requests to LLC" -+ } -+}; -+ -+#define ARM_TX2_CORE_LLC_COUNT (sizeof(arm_thunderx2_unc_llc_pe)/sizeof(arm_entry_t)) -+//Uncore accessor functions -+int -+pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e); -+int -+pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); -diff --git a/lib/pfmlib_arm_armv8.c b/lib/pfmlib_arm_armv8.c -index 0a3313f..35ff70f 100644 ---- a/lib/pfmlib_arm_armv8.c -+++ b/lib/pfmlib_arm_armv8.c -@@ -203,3 +203,58 @@ pfmlib_pmu_t arm_thunderx2_support={ - .get_event_nattrs = pfm_arm_get_event_nattrs, - }; - -+// For uncore, each socket has a separate perf name, otherwise they are the same, use macro -+ -+#define DEFINE_TX2_DMC(n) \ -+pfmlib_pmu_t arm_thunderx2_dmc##n##_support={ \ -+ .desc = "Cavium ThunderX2 Node"#n" DMC", \ -+ .name = "tx2_dmc"#n, \ -+ .perf_name = "uncore_dmc_"#n, \ -+ .pmu = PFM_PMU_ARM_THUNDERX2_DMC##n, \ -+ .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_dmc_pe), \ -+ .type = PFM_PMU_TYPE_UNCORE, \ -+ .pe = arm_thunderx2_unc_dmc_pe, \ -+ .pmu_detect = pfm_arm_detect_thunderx2, \ -+ .max_encoding = 1, \ -+ .num_cntrs = 4, \ -+ .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ -+ PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ -+ .get_event_first = pfm_arm_get_event_first, \ -+ .get_event_next = pfm_arm_get_event_next, \ -+ .event_is_valid = pfm_arm_event_is_valid, \ -+ .validate_table = pfm_arm_validate_table, \ -+ .get_event_info = pfm_arm_get_event_info, \ -+ .get_event_attr_info = pfm_arm_get_event_attr_info, \ -+ PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ -+ .get_event_nattrs = pfm_arm_get_event_nattrs, \ -+}; -+ -+DEFINE_TX2_DMC(0); -+DEFINE_TX2_DMC(1); -+ -+#define DEFINE_TX2_LLC(n) \ -+pfmlib_pmu_t arm_thunderx2_llc##n##_support={ \ -+ .desc = "Cavium ThunderX2 node "#n" LLC", \ -+ .name = "tx2_llc"#n, \ -+ .perf_name = "uncore_l3c_"#n, \ -+ .pmu = PFM_PMU_ARM_THUNDERX2_LLC##n, \ -+ .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_llc_pe), \ -+ .type = PFM_PMU_TYPE_UNCORE, \ -+ .pe = arm_thunderx2_unc_llc_pe, \ -+ .pmu_detect = pfm_arm_detect_thunderx2, \ -+ .max_encoding = 1, \ -+ .num_cntrs = 4, \ -+ .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ -+ PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ -+ .get_event_first = pfm_arm_get_event_first, \ -+ .get_event_next = pfm_arm_get_event_next, \ -+ .event_is_valid = pfm_arm_event_is_valid, \ -+ .validate_table = pfm_arm_validate_table, \ -+ .get_event_info = pfm_arm_get_event_info, \ -+ .get_event_attr_info = pfm_arm_get_event_attr_info, \ -+ PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ -+ .get_event_nattrs = pfm_arm_get_event_nattrs, \ -+}; -+ -+DEFINE_TX2_LLC(0); -+DEFINE_TX2_LLC(1); -diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c -index 2b6cbb4..8314d4b 100644 ---- a/lib/pfmlib_common.c -+++ b/lib/pfmlib_common.c -@@ -490,6 +490,10 @@ static pfmlib_pmu_t *pfmlib_pmus[]= - &arm_cortex_a53_support, - &arm_xgene_support, - &arm_thunderx2_support, -+ &arm_thunderx2_dmc0_support, -+ &arm_thunderx2_dmc1_support, -+ &arm_thunderx2_llc0_support, -+ &arm_thunderx2_llc1_support, - #endif - - #ifdef CONFIG_PFMLIB_ARCH_S390X -diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h -index b0070a6..cb83f43 100644 ---- a/lib/pfmlib_priv.h -+++ b/lib/pfmlib_priv.h -@@ -644,7 +644,13 @@ extern pfmlib_pmu_t arm_qcom_krait_support; - extern pfmlib_pmu_t arm_cortex_a57_support; - extern pfmlib_pmu_t arm_cortex_a53_support; - extern pfmlib_pmu_t arm_xgene_support; -+ - extern pfmlib_pmu_t arm_thunderx2_support; -+extern pfmlib_pmu_t arm_thunderx2_dmc0_support; -+extern pfmlib_pmu_t arm_thunderx2_dmc1_support; -+extern pfmlib_pmu_t arm_thunderx2_llc0_support; -+extern pfmlib_pmu_t arm_thunderx2_llc1_support; -+ - extern pfmlib_pmu_t mips_74k_support; - extern pfmlib_pmu_t s390x_cpum_cf_support; - extern pfmlib_pmu_t s390x_cpum_sf_support; -diff --git a/lib/pfmlib_tx2_unc_perf_event.c b/lib/pfmlib_tx2_unc_perf_event.c -new file mode 100644 -index 0000000..1a04e1d ---- /dev/null -+++ b/lib/pfmlib_tx2_unc_perf_event.c -@@ -0,0 +1,139 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* private headers */ -+#include "pfmlib_priv.h" -+#include "pfmlib_perf_event_priv.h" -+#include "pfmlib_arm_priv.h" -+ -+typedef union { -+ uint64_t val; -+ struct { -+ unsigned long unc_event:8; /* event code */ -+ unsigned long unc_umask:8; /* unit mask */ -+ unsigned long unc_res1:1; /* reserved */ -+ unsigned long unc_rst:1; /* reset */ -+ unsigned long unc_edge:1; /* edge detect */ -+ unsigned long unc_res2:3; /* reserved */ -+ unsigned long unc_en:1; /* enable */ -+ unsigned long unc_inv:1; /* invert counter mask */ -+ unsigned long unc_thres:8; /* counter mask */ -+ unsigned long unc_res3:32; /* reserved */ -+ } com; /* covers common fields for DMC/L3C */ -+} tx2_unc_data_t; -+ -+static void -+display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg); -+static void -+display_com(void *this, pfmlib_event_desc_t *e, void *val); -+static int -+find_pmu_type_by_name(const char *name); -+ -+int -+pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e) -+{ -+ //from pe field in for the uncore, get the array with all the event defs -+ const arm_entry_t *event_list = this_pe(this); -+ tx2_unc_data_t reg; -+ //get code for the event from the table -+ reg.val = event_list[e->event].code; -+ //pass the data back to the caller -+ e->codes[0] = reg.val; -+ e->count = 1; -+ evt_strcat(e->fstr, "%s", event_list[e->event].name); -+ display_reg(this, e, reg); -+ return PFM_SUCCESS; -+} -+ -+int -+pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e) -+{ -+ pfmlib_pmu_t *pmu = this; -+ struct perf_event_attr *attr = e->os_data; -+ tx2_unc_data_t reg; -+ int ret; -+ -+ if (!pmu->get_event_encoding[PFM_OS_NONE]) -+ return PFM_ERR_NOTSUPP; -+ -+ ret = pmu->get_event_encoding[PFM_OS_NONE](this, e); -+ if (ret != PFM_SUCCESS) -+ return ret; -+ //get pmu type to probe -+ ret = find_pmu_type_by_name(pmu->perf_name); -+ if (ret < 0) -+ return ret; -+ -+ attr->type = ret; -+ //get code to provide to the uncore pmu probe -+ reg.val = e->codes[0]; -+ attr->config = reg.val; -+ -+ // if needed, can use attr->config1 or attr->config2 for extra info from event structure defines e->codes[i] -+ -+ // uncore measures at all priv levels -+ attr->exclude_hv = 0; -+ attr->exclude_kernel = 0; -+ attr->exclude_user = 0; -+ -+ return PFM_SUCCESS; -+} -+ -+ -+static void -+display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg) -+{ -+ pfmlib_pmu_t *pmu = this; -+ if (pmu->display_reg) -+ pmu->display_reg(this, e, ®); -+ else -+ display_com(this, e, ®); -+} -+ -+static void -+display_com(void *this, pfmlib_event_desc_t *e, void *val) -+{ -+ const arm_entry_t *pe = this_pe(this); -+ tx2_unc_data_t *reg = val; -+ -+ __pfm_vbprintf("[UNC=0x%"PRIx64" event=0x%x umask=0x%x en=%d " -+ "inv=%d edge=%d thres=%d] %s\n", -+ reg->val, -+ reg->com.unc_event, -+ reg->com.unc_umask, -+ reg->com.unc_en, -+ reg->com.unc_inv, -+ reg->com.unc_edge, -+ reg->com.unc_thres, -+ pe[e->event].name); -+} -+ -+static int -+find_pmu_type_by_name(const char *name) -+{ -+ char filename[PATH_MAX]; -+ FILE *fp; -+ int ret, type; -+ -+ if (!name) -+ return PFM_ERR_NOTSUPP; -+ -+ sprintf(filename, "/sys/bus/event_source/devices/%s/type", name); -+ -+ fp = fopen(filename, "r"); -+ if (!fp) -+ return PFM_ERR_NOTSUPP; -+ -+ ret = fscanf(fp, "%d", &type); -+ if (ret != 1) -+ type = PFM_ERR_NOTSUPP; -+ -+ fclose(fp); -+ -+ return type; -+} -+ -diff --git a/tests/validate_arm64.c b/tests/validate_arm64.c -index f7f021a..35eb6ef 100644 ---- a/tests/validate_arm64.c -+++ b/tests/validate_arm64.c -@@ -177,6 +177,12 @@ static const test_event_t arm64_test_events[]={ - .codes[0] = 0x8000008, - .fstr = "arm_thunderx2::INST_RETIRED:k=1:u=1:hv=0", - }, -+ { SRC_LINE, -+ .name = "tx2_dmc1::UNC_DMC_READS", -+ .ret = PFM_SUCCESS, -+ .count = 1, -+ .codes[0] = 0xf, -+ }, - }; - #define NUM_TEST_EVENTS (int)(sizeof(arm64_test_events)/sizeof(test_event_t)) - --- -2.21.0 - -From 6641952170c23c5ab69c1af19197a9d8284c1e53 Mon Sep 17 00:00:00 2001 -From: Shay Gal-On -Date: Thu, 21 Nov 2019 10:41:26 -0800 -Subject: [PATCH 2/4] Moved TX2 uncore event to separate file - -To make event files cleaner. -Also added link to marvell doc publishing the uncore event lists. - -Signed-off-by: Shay Gal-On ---- - lib/Makefile | 8 ++- - lib/events/arm_cavium_tx2_events.h | 61 ----------------- - lib/events/arm_marvell_tx2_unc_events.h | 90 +++++++++++++++++++++++++ - lib/pfmlib_arm_armv8.c | 3 +- - 4 files changed, 97 insertions(+), 65 deletions(-) - create mode 100755 lib/events/arm_marvell_tx2_unc_events.h - -diff --git a/lib/Makefile b/lib/Makefile -index f45515d..686264b 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -360,11 +360,13 @@ INC_ARM=pfmlib_arm_priv.h \ - events/arm_cortex_a15_events.h \ - events/arm_cortex_a57_events.h \ - events/arm_cortex_a53_events.h \ -- events/arm_cavium_tx2_events.h -+ events/arm_cavium_tx2_events.h \ -+ events/arm_marvell_tx2_unc_events.h - - INC_ARM64=events/arm_cortex_a57_events.h \ -- events/arm_cortex_a53_events.h \ -- events/arm_cavium_tx2_events.h -+ events/arm_cortex_a53_events.h \ -+ events/arm_cavium_tx2_events.h \ -+ events/arm_marvell_tx2_unc_events.h - - INCDEP=$(INC_COMMON) $(INCARCH) - -diff --git a/lib/events/arm_cavium_tx2_events.h b/lib/events/arm_cavium_tx2_events.h -index 18d8931..198d33d 100644 ---- a/lib/events/arm_cavium_tx2_events.h -+++ b/lib/events/arm_cavium_tx2_events.h -@@ -835,64 +835,3 @@ static const arm_entry_t arm_thunderx2_pe[]={ - .desc = "Scu hwpf next line requests generated" - }, - }; -- --#define ARM_TX2_CORE_EVENT_COUNT (sizeof(arm_thunderx2_pe)/sizeof(arm_entry_t)) -- --/* L3C event IDs */ --#define L3_EVENT_READ_REQ 0xD --#define L3_EVENT_WRITEBACK_REQ 0xE --#define L3_EVENT_EVICT_REQ 0x13 --#define L3_EVENT_READ_HIT 0x17 --#define L3_EVENT_MAX 0x18 -- --/* DMC event IDs */ --#define DMC_EVENT_COUNT_CYCLES 0x1 --#define DMC_EVENT_WRITE_TXNS 0xB --#define DMC_EVENT_DATA_TRANSFERS 0xD --#define DMC_EVENT_READ_TXNS 0xF --#define DMC_EVENT_MAX 0x10 -- --static const arm_entry_t arm_thunderx2_unc_dmc_pe[]={ -- {.name = "UNC_DMC_READS", -- .modmsk = ARMV8_ATTRS, -- .code = DMC_EVENT_READ_TXNS, -- .desc = "Memory read transactions" -- }, -- {.name = "UNC_DMC_WRITES", -- .modmsk = ARMV8_ATTRS, -- .code = DMC_EVENT_WRITE_TXNS, -- .desc = "Memory write transactions" -- }, --}; -- --#define ARM_TX2_CORE_DMC_COUNT (sizeof(arm_thunderx2_unc_dmc_pe)/sizeof(arm_entry_t)) -- --static const arm_entry_t arm_thunderx2_unc_llc_pe[]={ -- {.name = "UNC_LLC_READ", -- .modmsk = ARMV8_ATTRS, -- .code = L3_EVENT_READ_REQ, -- .desc = "Read requests to LLC" -- }, -- {.name = "UNC_LLC_EVICT", -- .modmsk = ARMV8_ATTRS, -- .code = L3_EVENT_EVICT_REQ, -- .desc = "Evict requests to LLC" -- }, -- {.name = "UNC_LLC_READ_HIT", -- .modmsk = ARMV8_ATTRS, -- .code = L3_EVENT_READ_HIT, -- .desc = "Read requests to LLC which hit" -- }, -- {.name = "UNC_LLC_WB", -- .modmsk = ARMV8_ATTRS, -- .code = L3_EVENT_WRITEBACK_REQ, -- .desc = "Writeback requests to LLC" -- } --}; -- --#define ARM_TX2_CORE_LLC_COUNT (sizeof(arm_thunderx2_unc_llc_pe)/sizeof(arm_entry_t)) --//Uncore accessor functions --int --pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e); --int --pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); -diff --git a/lib/events/arm_marvell_tx2_unc_events.h b/lib/events/arm_marvell_tx2_unc_events.h -new file mode 100755 -index 0000000..9b0a1b4 ---- /dev/null -+++ b/lib/events/arm_marvell_tx2_unc_events.h -@@ -0,0 +1,90 @@ -+/* -+ * Copyright (c) 2019 Marvell Technology Group Ltd -+ * Contributed by Shay Gal-On -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -+ * of the Software, and to permit persons to whom the Software is furnished to do so, -+ * subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in all -+ * copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ * Marvell ThunderX2 -+ * -+ * ARM Architecture Reference Manual, ARMv8, for ARMv8-A architecture profile, -+ * ARM DDI 0487B.a (ID033117) -+ * -+ * Marvell ThunderX2 C99XX Core and Uncore PMU Events (Abridged) can be found at -+ * https://www.marvell.com/documents/hrur6mybdvk5uki1w0z7/ -+ * -+ */ -+ -+ -+/* L3C event IDs */ -+#define L3_EVENT_READ_REQ 0xD -+#define L3_EVENT_WRITEBACK_REQ 0xE -+#define L3_EVENT_EVICT_REQ 0x13 -+#define L3_EVENT_READ_HIT 0x17 -+#define L3_EVENT_MAX 0x18 -+ -+/* DMC event IDs */ -+#define DMC_EVENT_COUNT_CYCLES 0x1 -+#define DMC_EVENT_WRITE_TXNS 0xB -+#define DMC_EVENT_DATA_TRANSFERS 0xD -+#define DMC_EVENT_READ_TXNS 0xF -+#define DMC_EVENT_MAX 0x10 -+ -+static const arm_entry_t arm_thunderx2_unc_dmc_pe[]={ -+ {.name = "UNC_DMC_READS", -+ .modmsk = ARMV8_ATTRS, -+ .code = DMC_EVENT_READ_TXNS, -+ .desc = "Memory read transactions" -+ }, -+ {.name = "UNC_DMC_WRITES", -+ .modmsk = ARMV8_ATTRS, -+ .code = DMC_EVENT_WRITE_TXNS, -+ .desc = "Memory write transactions" -+ }, -+}; -+ -+#define ARM_TX2_CORE_DMC_COUNT (sizeof(arm_thunderx2_unc_dmc_pe)/sizeof(arm_entry_t)) -+ -+static const arm_entry_t arm_thunderx2_unc_llc_pe[]={ -+ {.name = "UNC_LLC_READ", -+ .modmsk = ARMV8_ATTRS, -+ .code = L3_EVENT_READ_REQ, -+ .desc = "Read requests to LLC" -+ }, -+ {.name = "UNC_LLC_EVICT", -+ .modmsk = ARMV8_ATTRS, -+ .code = L3_EVENT_EVICT_REQ, -+ .desc = "Evict requests to LLC" -+ }, -+ {.name = "UNC_LLC_READ_HIT", -+ .modmsk = ARMV8_ATTRS, -+ .code = L3_EVENT_READ_HIT, -+ .desc = "Read requests to LLC which hit" -+ }, -+ {.name = "UNC_LLC_WB", -+ .modmsk = ARMV8_ATTRS, -+ .code = L3_EVENT_WRITEBACK_REQ, -+ .desc = "Writeback requests to LLC" -+ } -+}; -+ -+#define ARM_TX2_CORE_LLC_COUNT (sizeof(arm_thunderx2_unc_llc_pe)/sizeof(arm_entry_t)) -+//Uncore accessor functions -+int -+pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e); -+int -+pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); -diff --git a/lib/pfmlib_arm_armv8.c b/lib/pfmlib_arm_armv8.c -index 35ff70f..291ac60 100644 ---- a/lib/pfmlib_arm_armv8.c -+++ b/lib/pfmlib_arm_armv8.c -@@ -33,7 +33,8 @@ - #include "events/arm_cortex_a57_events.h" /* A57 event tables */ - #include "events/arm_cortex_a53_events.h" /* A53 event tables */ - #include "events/arm_xgene_events.h" /* Applied Micro X-Gene tables */ --#include "events/arm_cavium_tx2_events.h" /* Cavium ThunderX2 tables */ -+#include "events/arm_cavium_tx2_events.h" /* Marvell ThunderX2 tables */ -+#include "events/arm_marvell_tx2_unc_events.h" /* Marvell ThunderX2 PMU tables */ - - static int - pfm_arm_detect_cortex_a57(void *this) --- -2.21.0 - -From dc1da4573eb8d24bdf64b9bb5e04ed956075d712 Mon Sep 17 00:00:00 2001 -From: Shay Gal-On -Date: Mon, 25 Nov 2019 12:00:15 -0800 -Subject: [PATCH 3/4] Add ThunderX2 DMC events and CCPI events - -This patch adds missing 2 DMC events for ThunderX2 -and adds support for the Cross Core Complex Interconnect -(CCPI) PMU and events. - -The following PMU models are added: - - tx2_ccpi0, tx2_ccpi1 - - tx2_dmc0, tx2_dmc1 - -Signed-off-by: Shay Gal-On ---- - include/perfmon/pfmlib.h | 12 +-- - lib/Makefile | 2 +- - lib/events/arm_marvell_tx2_unc_events.h | 42 ++++++++++ - lib/pfmlib_arm_armv8.c | 33 +++++++- - lib/pfmlib_common.c | 8 ++ - lib/pfmlib_priv.h | 2 + - lib/pfmlib_tx2_unc_perf_event.c | 101 ++++++++++-------------- - tests/validate_arm64.c | 15 ++++ - 8 files changed, 148 insertions(+), 67 deletions(-) - -diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h -index 20d5feb..3f1d2f5 100644 ---- a/include/perfmon/pfmlib.h -+++ b/include/perfmon/pfmlib.h -@@ -543,12 +543,14 @@ typedef enum { - - PFM_PMU_INTEL_KNM_UNC_UBOX, /* Intel Knights Mill Ubox uncore */ - PFM_PMU_INTEL_KNM_UNC_M2PCIE, /* Intel Knights Mill M2PCIe uncore */ -- PFM_PMU_ARM_THUNDERX2, /* Cavium ThunderX2 */ -+ PFM_PMU_ARM_THUNDERX2, /* Marvell ThunderX2 */ - -- PFM_PMU_ARM_THUNDERX2_DMC0, /* Cavium ThunderX2 DMC unit 0 uncore */ -- PFM_PMU_ARM_THUNDERX2_DMC1, /* Cavium ThunderX2 DMC unit 1 uncore */ -- PFM_PMU_ARM_THUNDERX2_LLC0, /* Cavium ThunderX2 LLC unit 0 uncore */ -- PFM_PMU_ARM_THUNDERX2_LLC1, /* Cavium ThunderX2 LLC unit 1 uncore */ -+ PFM_PMU_ARM_THUNDERX2_DMC0, /* Marvell ThunderX2 DMC unit 0 uncore */ -+ PFM_PMU_ARM_THUNDERX2_DMC1, /* Marvell ThunderX2 DMC unit 1 uncore */ -+ PFM_PMU_ARM_THUNDERX2_LLC0, /* Marvell ThunderX2 LLC unit 0 uncore */ -+ PFM_PMU_ARM_THUNDERX2_LLC1, /* Marvell ThunderX2 LLC unit 1 uncore */ -+ PFM_PMU_ARM_THUNDERX2_CCPI0, /* Marvell ThunderX2 Cross-Socket Interconnect unit 0 uncore */ -+ PFM_PMU_ARM_THUNDERX2_CCPI1, /* Marvell ThunderX2 Cross-Socket Interconnect unit 1 uncore */ - /* MUST ADD NEW PMU MODELS HERE */ - - PFM_PMU_MAX /* end marker */ -diff --git a/lib/Makefile b/lib/Makefile -index 686264b..4a4dc3b 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -177,7 +177,7 @@ SRCS += pfmlib_arm_perf_event.c - endif - - INCARCH = $(INC_ARM) --SRCS += pfmlib_arm.c pfmlib_arm_armv7_pmuv1.c pfmlib_arm_armv6.c pfmlib_arm_armv8.c -+SRCS += pfmlib_arm.c pfmlib_arm_armv7_pmuv1.c pfmlib_arm_armv6.c pfmlib_arm_armv8.c pfmlib_tx2_unc_perf_event.c - CFLAGS += -DCONFIG_PFMLIB_ARCH_ARM - endif - -diff --git a/lib/events/arm_marvell_tx2_unc_events.h b/lib/events/arm_marvell_tx2_unc_events.h -index 9b0a1b4..51e6b4d 100755 ---- a/lib/events/arm_marvell_tx2_unc_events.h -+++ b/lib/events/arm_marvell_tx2_unc_events.h -@@ -44,6 +44,13 @@ - #define DMC_EVENT_READ_TXNS 0xF - #define DMC_EVENT_MAX 0x10 - -+/* CCPI event IDs */ -+#define CCPI2_EVENT_REQ_PKT_SENT 0x3D -+#define CCPI2_EVENT_SNOOP_PKT_SENT 0x65 -+#define CCPI2_EVENT_DATA_PKT_SENT 0x105 -+#define CCPI2_EVENT_GIC_PKT_SENT 0x12D -+ -+ - static const arm_entry_t arm_thunderx2_unc_dmc_pe[]={ - {.name = "UNC_DMC_READS", - .modmsk = ARMV8_ATTRS, -@@ -55,10 +62,45 @@ static const arm_entry_t arm_thunderx2_unc_dmc_pe[]={ - .code = DMC_EVENT_WRITE_TXNS, - .desc = "Memory write transactions" - }, -+ {.name = "UNC_DMC_DATA_TRANSFERS", -+ .modmsk = ARMV8_ATTRS, -+ .code = DMC_EVENT_DATA_TRANSFERS, -+ .desc = "Memory data transfers" -+ }, -+ {.name = "UNC_DMC_CYCLES", -+ .modmsk = ARMV8_ATTRS, -+ .code = DMC_EVENT_COUNT_CYCLES, -+ .desc = "Clocks at the DMC clock rate" -+ } - }; - - #define ARM_TX2_CORE_DMC_COUNT (sizeof(arm_thunderx2_unc_dmc_pe)/sizeof(arm_entry_t)) - -+static const arm_entry_t arm_thunderx2_unc_ccpi_pe[]={ -+ {.name = "UNC_CCPI_REQ", -+ .modmsk = ARMV8_ATTRS, -+ .code = CCPI2_EVENT_REQ_PKT_SENT, -+ .desc = "Request packets sent from this node" -+ }, -+ {.name = "UNC_CCPI_SNOOP", -+ .modmsk = ARMV8_ATTRS, -+ .code = CCPI2_EVENT_SNOOP_PKT_SENT, -+ .desc = "Snoop packets sent from this node" -+ }, -+ {.name = "UNC_CCPI_DATA", -+ .modmsk = ARMV8_ATTRS, -+ .code = CCPI2_EVENT_DATA_PKT_SENT , -+ .desc = "Data packets sent from this node" -+ }, -+ {.name = "UNC_CCPI_GIC", -+ .modmsk = ARMV8_ATTRS, -+ .code = CCPI2_EVENT_GIC_PKT_SENT, -+ .desc = "Interrupt related packets sent from this node" -+ } -+}; -+ -+#define ARM_TX2_CORE_CCPI_COUNT (sizeof(arm_thunderx2_unc_ccpi_pe)/sizeof(arm_entry_t)) -+ - static const arm_entry_t arm_thunderx2_unc_llc_pe[]={ - {.name = "UNC_LLC_READ", - .modmsk = ARMV8_ATTRS, -diff --git a/lib/pfmlib_arm_armv8.c b/lib/pfmlib_arm_armv8.c -index 291ac60..a252951 100644 ---- a/lib/pfmlib_arm_armv8.c -+++ b/lib/pfmlib_arm_armv8.c -@@ -179,7 +179,7 @@ pfmlib_pmu_t arm_xgene_support={ - .get_event_nattrs = pfm_arm_get_event_nattrs, - }; - --/* Cavium ThunderX2 support */ -+/* Marvell ThunderX2 support */ - pfmlib_pmu_t arm_thunderx2_support={ - .desc = "Cavium ThunderX2", - .name = "arm_thunderx2", -@@ -208,7 +208,7 @@ pfmlib_pmu_t arm_thunderx2_support={ - - #define DEFINE_TX2_DMC(n) \ - pfmlib_pmu_t arm_thunderx2_dmc##n##_support={ \ -- .desc = "Cavium ThunderX2 Node"#n" DMC", \ -+ .desc = "Marvell ThunderX2 Node"#n" DMC", \ - .name = "tx2_dmc"#n, \ - .perf_name = "uncore_dmc_"#n, \ - .pmu = PFM_PMU_ARM_THUNDERX2_DMC##n, \ -@@ -235,7 +235,7 @@ DEFINE_TX2_DMC(1); - - #define DEFINE_TX2_LLC(n) \ - pfmlib_pmu_t arm_thunderx2_llc##n##_support={ \ -- .desc = "Cavium ThunderX2 node "#n" LLC", \ -+ .desc = "Marvell ThunderX2 node "#n" LLC", \ - .name = "tx2_llc"#n, \ - .perf_name = "uncore_l3c_"#n, \ - .pmu = PFM_PMU_ARM_THUNDERX2_LLC##n, \ -@@ -259,3 +259,30 @@ pfmlib_pmu_t arm_thunderx2_llc##n##_support={ \ - - DEFINE_TX2_LLC(0); - DEFINE_TX2_LLC(1); -+ -+#define DEFINE_TX2_CCPI(n) \ -+pfmlib_pmu_t arm_thunderx2_ccpi##n##_support={ \ -+ .desc = "Marvell ThunderX2 node "#n" Cross-Socket Interconnect", \ -+ .name = "tx2_ccpi"#n, \ -+ .perf_name = "uncore_ccpi_"#n, \ -+ .pmu = PFM_PMU_ARM_THUNDERX2_CCPI##n, \ -+ .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_ccpi_pe), \ -+ .type = PFM_PMU_TYPE_UNCORE, \ -+ .pe = arm_thunderx2_unc_ccpi_pe, \ -+ .pmu_detect = pfm_arm_detect_thunderx2, \ -+ .max_encoding = 1, \ -+ .num_cntrs = 4, \ -+ .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ -+ PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ -+ .get_event_first = pfm_arm_get_event_first, \ -+ .get_event_next = pfm_arm_get_event_next, \ -+ .event_is_valid = pfm_arm_event_is_valid, \ -+ .validate_table = pfm_arm_validate_table, \ -+ .get_event_info = pfm_arm_get_event_info, \ -+ .get_event_attr_info = pfm_arm_get_event_attr_info, \ -+ PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ -+ .get_event_nattrs = pfm_arm_get_event_nattrs, \ -+}; -+ -+DEFINE_TX2_CCPI(0); -+DEFINE_TX2_CCPI(1); -diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c -index 8314d4b..8cb8998 100644 ---- a/lib/pfmlib_common.c -+++ b/lib/pfmlib_common.c -@@ -484,6 +484,12 @@ static pfmlib_pmu_t *pfmlib_pmus[]= - &arm_cortex_a53_support, - &arm_xgene_support, - &arm_thunderx2_support, -+ &arm_thunderx2_dmc0_support, -+ &arm_thunderx2_dmc1_support, -+ &arm_thunderx2_llc0_support, -+ &arm_thunderx2_llc1_support, -+ &arm_thunderx2_ccpi0_support, -+ &arm_thunderx2_ccpi1_support, - #endif - #ifdef CONFIG_PFMLIB_ARCH_ARM64 - &arm_cortex_a57_support, -@@ -494,6 +500,8 @@ static pfmlib_pmu_t *pfmlib_pmus[]= - &arm_thunderx2_dmc1_support, - &arm_thunderx2_llc0_support, - &arm_thunderx2_llc1_support, -+ &arm_thunderx2_ccpi0_support, -+ &arm_thunderx2_ccpi1_support, - #endif - - #ifdef CONFIG_PFMLIB_ARCH_S390X -diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h -index cb83f43..1340a6b 100644 ---- a/lib/pfmlib_priv.h -+++ b/lib/pfmlib_priv.h -@@ -650,6 +650,8 @@ extern pfmlib_pmu_t arm_thunderx2_dmc0_support; - extern pfmlib_pmu_t arm_thunderx2_dmc1_support; - extern pfmlib_pmu_t arm_thunderx2_llc0_support; - extern pfmlib_pmu_t arm_thunderx2_llc1_support; -+extern pfmlib_pmu_t arm_thunderx2_ccpi0_support; -+extern pfmlib_pmu_t arm_thunderx2_ccpi1_support; - - extern pfmlib_pmu_t mips_74k_support; - extern pfmlib_pmu_t s390x_cpum_cf_support; -diff --git a/lib/pfmlib_tx2_unc_perf_event.c b/lib/pfmlib_tx2_unc_perf_event.c -index 1a04e1d..7dc2372 100644 ---- a/lib/pfmlib_tx2_unc_perf_event.c -+++ b/lib/pfmlib_tx2_unc_perf_event.c -@@ -27,11 +27,51 @@ typedef union { - } tx2_unc_data_t; - - static void --display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg); -+display_com(void *this, pfmlib_event_desc_t *e, void *val) -+{ -+ const arm_entry_t *pe = this_pe(this); -+ tx2_unc_data_t *reg = val; -+ -+ __pfm_vbprintf("[UNC=0x%"PRIx64"] %s\n", -+ reg->val, -+ pe[e->event].name); -+} -+ - static void --display_com(void *this, pfmlib_event_desc_t *e, void *val); -+display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg) -+{ -+ pfmlib_pmu_t *pmu = this; -+ if (pmu->display_reg) -+ pmu->display_reg(this, e, ®); -+ else -+ display_com(this, e, ®); -+} -+ -+ - static int --find_pmu_type_by_name(const char *name); -+find_pmu_type_by_name(const char *name) -+{ -+ char filename[PATH_MAX]; -+ FILE *fp; -+ int ret, type; -+ -+ if (!name) -+ return PFM_ERR_NOTSUPP; -+ -+ sprintf(filename, "/sys/bus/event_source/devices/%s/type", name); -+ -+ fp = fopen(filename, "r"); -+ if (!fp) -+ return PFM_ERR_NOTSUPP; -+ -+ ret = fscanf(fp, "%d", &type); -+ if (ret != 1) -+ type = PFM_ERR_NOTSUPP; -+ -+ fclose(fp); -+ -+ return type; -+} - - int - pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e) -@@ -82,58 +122,3 @@ pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e) - - return PFM_SUCCESS; - } -- -- --static void --display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg) --{ -- pfmlib_pmu_t *pmu = this; -- if (pmu->display_reg) -- pmu->display_reg(this, e, ®); -- else -- display_com(this, e, ®); --} -- --static void --display_com(void *this, pfmlib_event_desc_t *e, void *val) --{ -- const arm_entry_t *pe = this_pe(this); -- tx2_unc_data_t *reg = val; -- -- __pfm_vbprintf("[UNC=0x%"PRIx64" event=0x%x umask=0x%x en=%d " -- "inv=%d edge=%d thres=%d] %s\n", -- reg->val, -- reg->com.unc_event, -- reg->com.unc_umask, -- reg->com.unc_en, -- reg->com.unc_inv, -- reg->com.unc_edge, -- reg->com.unc_thres, -- pe[e->event].name); --} -- --static int --find_pmu_type_by_name(const char *name) --{ -- char filename[PATH_MAX]; -- FILE *fp; -- int ret, type; -- -- if (!name) -- return PFM_ERR_NOTSUPP; -- -- sprintf(filename, "/sys/bus/event_source/devices/%s/type", name); -- -- fp = fopen(filename, "r"); -- if (!fp) -- return PFM_ERR_NOTSUPP; -- -- ret = fscanf(fp, "%d", &type); -- if (ret != 1) -- type = PFM_ERR_NOTSUPP; -- -- fclose(fp); -- -- return type; --} -- -diff --git a/tests/validate_arm64.c b/tests/validate_arm64.c -index 35eb6ef..5cb1966 100644 ---- a/tests/validate_arm64.c -+++ b/tests/validate_arm64.c -@@ -182,6 +182,21 @@ static const test_event_t arm64_test_events[]={ - .ret = PFM_SUCCESS, - .count = 1, - .codes[0] = 0xf, -+ .fstr = "tx2_dmc1::UNC_DMC_READS", -+ }, -+ { SRC_LINE, -+ .name = "tx2_ccpi0::UNC_CCPI_GIC", -+ .ret = PFM_SUCCESS, -+ .count = 1, -+ .codes[0] = 0x12d, -+ .fstr = "tx2_ccpi0::UNC_CCPI_GIC", -+ }, -+ { SRC_LINE, -+ .name = "tx2_llc0::UNC_LLC_READ", -+ .ret = PFM_SUCCESS, -+ .count = 1, -+ .codes[0] = 0xd, -+ .fstr = "tx2_llc0::UNC_LLC_READ", - }, - }; - #define NUM_TEST_EVENTS (int)(sizeof(arm64_test_events)/sizeof(test_event_t)) --- -2.21.0 - -From e401d29e89b92e999615e11ea17808e90eda93fd Mon Sep 17 00:00:00 2001 -From: Shay Gal-On -Date: Tue, 3 Dec 2019 09:54:37 -0800 -Subject: [PATCH 4/4] Removed extra fields from tx2_unc_data_t - -Removed useless fields from tx2_unc_data_t. - -Signed-off-by: Shay Gal-On ---- - lib/events/arm_marvell_tx2_unc_events.h | 0 - lib/pfmlib_tx2_unc_perf_event.c | 13 ++----------- - 2 files changed, 2 insertions(+), 11 deletions(-) - mode change 100755 => 100644 lib/events/arm_marvell_tx2_unc_events.h - -diff --git a/lib/events/arm_marvell_tx2_unc_events.h b/lib/events/arm_marvell_tx2_unc_events.h -old mode 100755 -new mode 100644 -diff --git a/lib/pfmlib_tx2_unc_perf_event.c b/lib/pfmlib_tx2_unc_perf_event.c -index 7dc2372..154cb0a 100644 ---- a/lib/pfmlib_tx2_unc_perf_event.c -+++ b/lib/pfmlib_tx2_unc_perf_event.c -@@ -13,17 +13,8 @@ - typedef union { - uint64_t val; - struct { -- unsigned long unc_event:8; /* event code */ -- unsigned long unc_umask:8; /* unit mask */ -- unsigned long unc_res1:1; /* reserved */ -- unsigned long unc_rst:1; /* reset */ -- unsigned long unc_edge:1; /* edge detect */ -- unsigned long unc_res2:3; /* reserved */ -- unsigned long unc_en:1; /* enable */ -- unsigned long unc_inv:1; /* invert counter mask */ -- unsigned long unc_thres:8; /* counter mask */ -- unsigned long unc_res3:32; /* reserved */ -- } com; /* covers common fields for DMC/L3C */ -+ unsigned long unc_res1:32; /* reserved */ -+ } com; /* reserved space for future extensions */ - } tx2_unc_data_t; - - static void --- -2.21.0 - diff --git a/libpfm-zen23.patch b/libpfm-zen23.patch deleted file mode 100644 index 6aeee95..0000000 --- a/libpfm-zen23.patch +++ /dev/null @@ -1,5345 +0,0 @@ -commit 32af524f03eefac249d51d138b9a3065a1d07960 -Author: Stephane Eranian -Date: Tue Dec 17 22:19:50 2019 -0800 - - prepare for AMD Zen2 support - - Rename Fam17h event file to amd_fam17h_zen1.h. - - Because we used amd64_fam17h as the PMU name for AMD Fam17h Zen1, - we keep it for backward compatibility reasons. However we mark it - as deprecated. Instead we introduce amd_fam17h_zen1 which provides - the same events. Both pmu names are still valid, though, amd_fam17h_zen1 - is now the preferred choice. - - Signed-off-by: Stephane Eranian - -diff --git a/README b/README -index 9dea749..b588353 100644 ---- a/README -+++ b/README -@@ -39,7 +39,7 @@ The library supports many PMUs. The current version can handle: - AMD64 Fam14h (Bobcat) - AMD64 Fam15h (Bulldozer) (core and uncore) - AMD64 Fam16h (Jaguar) -- AMD64 Fam17h (Zen) -+ AMD64 Fam17h (Zen1) - - - For Intel X86: - Intel P6 (Pentium II, Pentium Pro, Pentium III, Pentium M) -diff --git a/docs/man3/libpfm_amd64_fam17h.3 b/docs/man3/libpfm_amd64_fam17h.3 -index a19653c..7925db2 100644 ---- a/docs/man3/libpfm_amd64_fam17h.3 -+++ b/docs/man3/libpfm_amd64_fam17h.3 -@@ -5,14 +5,16 @@ libpfm_amd64_fam17h - support for AMD64 Family 17h processors - .nf - .B #include - .sp --.B PMU name: amd64_fam15h --.B PMU desc: AMD64 Fam17h Zen -+.B PMU name: amd64_fam17h (deprecated), amd_fam17h_zen1 -+.B PMU desc: AMD64 Fam17h Zen1 - .sp - .SH DESCRIPTION --The library supports AMD Family 17h processors core PMU in both 32 and 64-bit modes. -+The library supports AMD Family 17h processors Zen1 core PMU in both 32 and 64-bit modes. -+The amd64_fam17h PMU model name has been deprecated in favor of amd_fam17_zen1. The old -+name is maintained for backward compatibility reasons, but should not be used anymore. - - .SH MODIFIERS --The following modifiers are supported on AMD64 Family 17h core PMU: -+The following modifiers are supported on AMD64 Family 17h Zen1 core PMU: - .TP - .B u - Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. -diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h -index 3f1d2f5..c8dc719 100644 ---- a/include/perfmon/pfmlib.h -+++ b/include/perfmon/pfmlib.h -@@ -419,7 +419,7 @@ typedef enum { - PFM_PMU_INTEL_BDX_UNC_SB2, /* Intel Broadwell-X S-Box 2 uncore */ - PFM_PMU_INTEL_BDX_UNC_SB3, /* Intel Broadwell-X S-Box 3 uncore */ - -- PFM_PMU_AMD64_FAM17H, /* AMD AMD64 Fam17h Zen */ -+ PFM_PMU_AMD64_FAM17H, /* AMD AMD64 Fam17h Zen1 (deprecated) */ - PFM_PMU_AMD64_FAM16H, /* AMD AMD64 Fam16h Jaguar */ - - PFM_PMU_INTEL_SKX, /* Intel Skylake-X */ -@@ -553,6 +553,8 @@ typedef enum { - PFM_PMU_ARM_THUNDERX2_CCPI1, /* Marvell ThunderX2 Cross-Socket Interconnect unit 1 uncore */ - - PFM_PMU_ARM_A64FX, /* Fujitsu A64FX processor */ -+ -+ PFM_PMU_AMD64_FAM17H_ZEN1, /* AMD AMD64 Fam17h Zen1 */ - /* MUST ADD NEW PMU MODELS HERE */ - - PFM_PMU_MAX /* end marker */ -diff --git a/lib/Makefile b/lib/Makefile -index 4a4dc3b..9610cc5 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -248,7 +248,7 @@ INC_X86= pfmlib_intel_x86_priv.h \ - events/amd64_events_fam12h.h \ - events/amd64_events_fam14h.h \ - events/amd64_events_fam15h.h \ -- events/amd64_events_fam17h.h \ -+ events/amd64_events_fam17h_zen1.h \ - events/amd64_events_fam16h.h \ - events/intel_p6_events.h \ - events/intel_netburst_events.h \ -diff --git a/lib/events/amd64_events_fam17h.h b/lib/events/amd64_events_fam17h_zen1.h -similarity index 83% -rename from lib/events/amd64_events_fam17h.h -rename to lib/events/amd64_events_fam17h_zen1.h -index e02559b..218ee8f 100644 ---- a/lib/events/amd64_events_fam17h.h -+++ b/lib/events/amd64_events_fam17h_zen1.h -@@ -24,7 +24,7 @@ - * PMU: amd64_fam17h (AMD64 Fam17h)) - */ - --static const amd64_umask_t amd64_fam17h_l1_itlb_miss_l2_itlb_miss[]={ -+static const amd64_umask_t amd64_fam17h_zen1_l1_itlb_miss_l2_itlb_miss[]={ - { .uname = "IF1G", - .udesc = "TBD", - .ucode = 0x4, -@@ -39,7 +39,7 @@ static const amd64_umask_t amd64_fam17h_l1_itlb_miss_l2_itlb_miss[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_retired_mmx_fp_instructions[]={ -+static const amd64_umask_t amd64_fam17h_zen1_retired_mmx_fp_instructions[]={ - { .uname = "SSE_INSTR", - .udesc = "TBD", - .ucode = 0x4, -@@ -54,7 +54,7 @@ static const amd64_umask_t amd64_fam17h_retired_mmx_fp_instructions[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_tagged_ibs_ops[]={ -+static const amd64_umask_t amd64_fam17h_zen1_tagged_ibs_ops[]={ - { .uname = "IBS_COUNT_ROLLOVER", - .udesc = "Number of times a uop could not be tagged by IBS because of a previous tagged uop that has not retired.", - .ucode = 0x4, -@@ -69,7 +69,7 @@ static const amd64_umask_t amd64_fam17h_tagged_ibs_ops[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_number_of_move_elimination_and_scalar_op_optimization[]={ -+static const amd64_umask_t amd64_fam17h_zen1_number_of_move_elimination_and_scalar_op_optimization[]={ - { .uname = "OPTIMIZED", - .udesc = "Number of scalar ops optimized.", - .ucode = 0x8, -@@ -88,7 +88,7 @@ static const amd64_umask_t amd64_fam17h_number_of_move_elimination_and_scalar_op - }, - }; - --static const amd64_umask_t amd64_fam17h_retired_sse_avx_operations[]={ -+static const amd64_umask_t amd64_fam17h_zen1_retired_sse_avx_operations[]={ - { .uname = "DP_MULT_ADD_FLOPS", - .udesc = "Double precision multiply-add flops.", - .ucode = 0x80, -@@ -123,7 +123,7 @@ static const amd64_umask_t amd64_fam17h_retired_sse_avx_operations[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_retired_serializing_ops[]={ -+static const amd64_umask_t amd64_fam17h_zen1_retired_serializing_ops[]={ - { .uname = "X87_CTRL_RET", - .udesc = "X87 control word mispredict traps due to mispredction in RC or PC, or changes in mask bits.", - .ucode = 0x8, -@@ -142,7 +142,7 @@ static const amd64_umask_t amd64_fam17h_retired_serializing_ops[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_retired_x87_floating_point_operations[]={ -+static const amd64_umask_t amd64_fam17h_zen1_retired_x87_floating_point_operations[]={ - { .uname = "DIV_SQR_R_OPS", - .udesc = "Divide and square root ops", - .ucode = 0x4, -@@ -157,7 +157,7 @@ static const amd64_umask_t amd64_fam17h_retired_x87_floating_point_operations[]= - }, - }; - --static const amd64_umask_t amd64_fam17h_fpu_pipe_assignment[]={ -+static const amd64_umask_t amd64_fam17h_zen1_fpu_pipe_assignment[]={ - { .uname = "DUAL3", - .udesc = "Total number of multi-pipe uops assigned to pipe3", - .ucode = 0x80, -@@ -192,7 +192,7 @@ static const amd64_umask_t amd64_fam17h_fpu_pipe_assignment[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_instruction_cache_lines_invalidated[]={ -+static const amd64_umask_t amd64_fam17h_zen1_instruction_cache_lines_invalidated[]={ - { .uname = "L2_INVALIDATING_PROBE", - .udesc = "IC line invalidated due to L2 invalidating probe (external or LS).", - .ucode = 0x2, -@@ -203,7 +203,7 @@ static const amd64_umask_t amd64_fam17h_instruction_cache_lines_invalidated[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_instruction_pipe_stall[]={ -+static const amd64_umask_t amd64_fam17h_zen1_instruction_pipe_stall[]={ - { .uname = "IC_STALL_ANY", - .udesc = "IC pipe was stalled during this clock cycle for any reason (nothing valud in pipe ICM1).", - .ucode = 0x4, -@@ -218,7 +218,7 @@ static const amd64_umask_t amd64_fam17h_instruction_pipe_stall[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_core_to_l2_cacheable_request_access_status[]={ -+static const amd64_umask_t amd64_fam17h_zen1_core_to_l2_cacheable_request_access_status[]={ - { .uname = "LS_RD_BLK_C_S", - .udesc = "Load/Store ReadBlock C/S hit", - .ucode = 0x80, -@@ -253,7 +253,7 @@ static const amd64_umask_t amd64_fam17h_core_to_l2_cacheable_request_access_stat - }, - }; - --static const amd64_umask_t amd64_fam17h_cycles_with_fill_pending_from_l2[]={ -+static const amd64_umask_t amd64_fam17h_zen1_cycles_with_fill_pending_from_l2[]={ - { .uname = "L2_FILL_BUSY", - .udesc = "TBD", - .ucode = 0x1, -@@ -261,7 +261,7 @@ static const amd64_umask_t amd64_fam17h_cycles_with_fill_pending_from_l2[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_l2_latency[]={ -+static const amd64_umask_t amd64_fam17h_zen1_l2_latency[]={ - { .uname = "L2_CYCLES_WAITING_ON_FILLS", - .udesc = "TBD", - .ucode = 0x1, -@@ -269,7 +269,7 @@ static const amd64_umask_t amd64_fam17h_l2_latency[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_requests_to_l2_group1[]={ -+static const amd64_umask_t amd64_fam17h_zen1_requests_to_l2_group1[]={ - { .uname = "RD_BLK_L", - .udesc = "TBD", - .ucode = 0x80, -@@ -304,7 +304,7 @@ static const amd64_umask_t amd64_fam17h_requests_to_l2_group1[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_requests_to_l2_group2[]={ -+static const amd64_umask_t amd64_fam17h_zen1_requests_to_l2_group2[]={ - { .uname = "GROUP1", - .udesc = "TBD", - .ucode = 0x80, -@@ -339,7 +339,7 @@ static const amd64_umask_t amd64_fam17h_requests_to_l2_group2[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_ls_to_l2_wbc_requests[]={ -+static const amd64_umask_t amd64_fam17h_zen1_ls_to_l2_wbc_requests[]={ - { .uname = "WCB_WRITE", - .udesc = "TBD", - .ucode = 0x40, -@@ -370,7 +370,7 @@ static const amd64_umask_t amd64_fam17h_ls_to_l2_wbc_requests[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_ls_dispatch[]={ -+static const amd64_umask_t amd64_fam17h_zen1_ls_dispatch[]={ - { .uname = "LD_ST_DISPATCH", - .udesc = "Load/Store uops dispatched.", - .ucode = 0x4, -@@ -385,7 +385,7 @@ static const amd64_umask_t amd64_fam17h_ls_dispatch[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_ineffective_software_prefetch[]={ -+static const amd64_umask_t amd64_fam17h_zen1_ineffective_software_prefetch[]={ - { .uname = "MAB_MCH_CNT", - .udesc = "TBD", - .ucode = 0x2, -@@ -396,7 +396,7 @@ static const amd64_umask_t amd64_fam17h_ineffective_software_prefetch[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_l1_dtlb_miss[]={ -+static const amd64_umask_t amd64_fam17h_zen1_l1_dtlb_miss[]={ - { .uname = "TLB_RELOAD_1G_L2_MISS", - .udesc = "TBD", - .ucode = 0x80, -@@ -431,7 +431,7 @@ static const amd64_umask_t amd64_fam17h_l1_dtlb_miss[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_locks[]={ -+static const amd64_umask_t amd64_fam17h_zen1_locks[]={ - { .uname = "SPEC_LOCK_MAP_COMMIT", - .udesc = "TBD", - .ucode = 0x8, -@@ -450,7 +450,7 @@ static const amd64_umask_t amd64_fam17h_locks[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_mab_allocation_by_pipe[]={ -+static const amd64_umask_t amd64_fam17h_zen1_mab_allocation_by_pipe[]={ - { .uname = "TLB_PIPE_EARLY", - .udesc = "TBD", - .ucode = 0x10, -@@ -473,7 +473,7 @@ static const amd64_umask_t amd64_fam17h_mab_allocation_by_pipe[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_prefetch_instructions_dispatched[]={ -+static const amd64_umask_t amd64_fam17h_zen1_prefetch_instructions_dispatched[]={ - { .uname = "PREFETCH_NTA", - .udesc = "Non-temporal prefetches.", - .ucode = 0x4, -@@ -488,7 +488,7 @@ static const amd64_umask_t amd64_fam17h_prefetch_instructions_dispatched[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_tablewalker_allocation[]={ -+static const amd64_umask_t amd64_fam17h_zen1_tablewalker_allocation[]={ - { .uname = "ALLOC_ISIDE1", - .udesc = "TBD", - .ucode = 0x8, -@@ -507,7 +507,7 @@ static const amd64_umask_t amd64_fam17h_tablewalker_allocation[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_oc_mode_switch[]={ -+static const amd64_umask_t amd64_fam17h_zen1_oc_mode_switch[]={ - { .uname = "OC_IC_MODE_SWITCH", - .udesc = "TBD", - .ucode = 0x2, -@@ -518,7 +518,7 @@ static const amd64_umask_t amd64_fam17h_oc_mode_switch[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_dynamic_tokens_dispatch_stall_cycles_0[]={ -+static const amd64_umask_t amd64_fam17h_zen1_dynamic_tokens_dispatch_stall_cycles_0[]={ - { .uname = "RETIRE_TOKEN_STALL", - .udesc = "Retire tokens unavailable", - .ucode = 0x40, -@@ -549,7 +549,7 @@ static const amd64_umask_t amd64_fam17h_dynamic_tokens_dispatch_stall_cycles_0[] - }, - }; - --static const amd64_entry_t amd64_fam17h_pe[]={ -+static const amd64_entry_t amd64_fam17h_zen1_pe[]={ - { .name = "L1_ITLB_MISS_L2_ITLB_HIT", - .desc = "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB.", - .modmsk = AMD64_FAM17H_ATTRS, -@@ -563,8 +563,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x85, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_l1_itlb_miss_l2_itlb_miss), -- .umasks = amd64_fam17h_l1_itlb_miss_l2_itlb_miss, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_l1_itlb_miss_l2_itlb_miss), -+ .umasks = amd64_fam17h_zen1_l1_itlb_miss_l2_itlb_miss, - }, - { .name = "PIPELINE_RESTART_DUE_TO_INSTRUCTION_STREAM_PROBE", - .desc = "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event.", -@@ -684,8 +684,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0xcb, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_retired_mmx_fp_instructions), -- .umasks = amd64_fam17h_retired_mmx_fp_instructions, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_retired_mmx_fp_instructions), -+ .umasks = amd64_fam17h_zen1_retired_mmx_fp_instructions, - }, - { .name = "RETIRED_NEAR_RETURNS", - .desc = "The number of near return instructions (RET or RETI) retired.", -@@ -707,8 +707,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x1cf, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_tagged_ibs_ops), -- .umasks = amd64_fam17h_tagged_ibs_ops, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_tagged_ibs_ops), -+ .umasks = amd64_fam17h_zen1_tagged_ibs_ops, - }, - { .name = "NUMBER_OF_MOVE_ELIMINATION_AND_SCALAR_OP_OPTIMIZATION", - .desc = "This is a dispatch based speculative event. It is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", -@@ -716,8 +716,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x4, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_number_of_move_elimination_and_scalar_op_optimization), -- .umasks = amd64_fam17h_number_of_move_elimination_and_scalar_op_optimization, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_number_of_move_elimination_and_scalar_op_optimization), -+ .umasks = amd64_fam17h_zen1_number_of_move_elimination_and_scalar_op_optimization, - }, - { .name = "RETIRED_SSE_AVX_OPERATIONS", - .desc = "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", -@@ -725,8 +725,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x3, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_retired_sse_avx_operations), -- .umasks = amd64_fam17h_retired_sse_avx_operations, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_retired_sse_avx_operations), -+ .umasks = amd64_fam17h_zen1_retired_sse_avx_operations, - }, - { .name = "RETIRED_SERIALIZING_OPS", - .desc = "The number of serializing Ops retired.", -@@ -734,8 +734,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x5, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_retired_serializing_ops), -- .umasks = amd64_fam17h_retired_serializing_ops, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_retired_serializing_ops), -+ .umasks = amd64_fam17h_zen1_retired_serializing_ops, - }, - { .name = "RETIRED_X87_FLOATING_POINT_OPERATIONS", - .desc = "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.", -@@ -743,8 +743,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x2, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_retired_x87_floating_point_operations), -- .umasks = amd64_fam17h_retired_x87_floating_point_operations, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_retired_x87_floating_point_operations), -+ .umasks = amd64_fam17h_zen1_retired_x87_floating_point_operations, - }, - { .name = "FP_SCHEDULER_EMPTY", - .desc = "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler. Invert this to count cycles in which at least one FPU operation is present in the FPU.", -@@ -759,8 +759,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x0, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_fpu_pipe_assignment), -- .umasks = amd64_fam17h_fpu_pipe_assignment, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_fpu_pipe_assignment), -+ .umasks = amd64_fam17h_zen1_fpu_pipe_assignment, - }, - { .name = "INSTRUCTION_CACHE_REFILLS_FROM_L2", - .desc = "The number of 64-byte instruction cachelines that was fulfilled by the L2 cache.", -@@ -782,8 +782,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x8c, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_instruction_cache_lines_invalidated), -- .umasks = amd64_fam17h_instruction_cache_lines_invalidated, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_instruction_cache_lines_invalidated), -+ .umasks = amd64_fam17h_zen1_instruction_cache_lines_invalidated, - }, - { .name = "INSTRUCTION_PIPE_STALL", - .desc = "TBD", -@@ -791,8 +791,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x87, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_instruction_pipe_stall), -- .umasks = amd64_fam17h_instruction_pipe_stall, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_instruction_pipe_stall), -+ .umasks = amd64_fam17h_zen1_instruction_pipe_stall, - }, - { .name = "32_BYTE_INSTRUCTION_CACHE_FETCH", - .desc = "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses).", -@@ -814,8 +814,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x64, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_core_to_l2_cacheable_request_access_status), -- .umasks = amd64_fam17h_core_to_l2_cacheable_request_access_status, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_core_to_l2_cacheable_request_access_status), -+ .umasks = amd64_fam17h_zen1_core_to_l2_cacheable_request_access_status, - }, - { .name = "CYCLES_WITH_FILL_PENDING_FROM_L2", - .desc = "Total cycles spent with one or more fill requests in flight from L2.", -@@ -823,8 +823,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x6d, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_cycles_with_fill_pending_from_l2), -- .umasks = amd64_fam17h_cycles_with_fill_pending_from_l2, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_cycles_with_fill_pending_from_l2), -+ .umasks = amd64_fam17h_zen1_cycles_with_fill_pending_from_l2, - }, - { .name = "L2_LATENCY", - .desc = "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. This may be used to calculate average latency by multiplying this count by four and then dividing by the total number of L2 fills (umask L2RequestG1). Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", -@@ -832,8 +832,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x62, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_l2_latency), -- .umasks = amd64_fam17h_l2_latency, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_l2_latency), -+ .umasks = amd64_fam17h_zen1_l2_latency, - }, - { .name = "REQUESTS_TO_L2_GROUP1", - .desc = "TBD", -@@ -841,8 +841,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x60, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_requests_to_l2_group1), -- .umasks = amd64_fam17h_requests_to_l2_group1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_requests_to_l2_group1), -+ .umasks = amd64_fam17h_zen1_requests_to_l2_group1, - }, - { .name = "REQUESTS_TO_L2_GROUP2", - .desc = "Multi-events in that LS and IF requests can be received simultaneous.", -@@ -850,8 +850,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x61, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_requests_to_l2_group2), -- .umasks = amd64_fam17h_requests_to_l2_group2, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_requests_to_l2_group2), -+ .umasks = amd64_fam17h_zen1_requests_to_l2_group2, - }, - { .name = "LS_TO_L2_WBC_REQUESTS", - .desc = "TBD", -@@ -859,8 +859,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x63, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_ls_to_l2_wbc_requests), -- .umasks = amd64_fam17h_ls_to_l2_wbc_requests, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_ls_to_l2_wbc_requests), -+ .umasks = amd64_fam17h_zen1_ls_to_l2_wbc_requests, - }, - { .name = "DATA_CACHE_ACCESSES", - .desc = "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event.", -@@ -875,8 +875,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x29, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_ls_dispatch), -- .umasks = amd64_fam17h_ls_dispatch, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_ls_dispatch), -+ .umasks = amd64_fam17h_zen1_ls_dispatch, - }, - { .name = "INEFFECTIVE_SOFTWARE_PREFETCH", - .desc = "The number of software prefetches that did not fetch data outside of the processor core.", -@@ -884,8 +884,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x52, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_ineffective_software_prefetch), -- .umasks = amd64_fam17h_ineffective_software_prefetch, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_ineffective_software_prefetch), -+ .umasks = amd64_fam17h_zen1_ineffective_software_prefetch, - }, - { .name = "L1_DTLB_MISS", - .desc = "L1 Data TLB misses.", -@@ -893,8 +893,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x45, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_l1_dtlb_miss), -- .umasks = amd64_fam17h_l1_dtlb_miss, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_l1_dtlb_miss), -+ .umasks = amd64_fam17h_zen1_l1_dtlb_miss, - }, - { .name = "LOCKS", - .desc = "Lock operations. Unit masks ORed", -@@ -902,8 +902,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x25, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_locks), -- .umasks = amd64_fam17h_locks, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_locks), -+ .umasks = amd64_fam17h_zen1_locks, - }, - { .name = "MAB_ALLOCATION_BY_PIPE", - .desc = "TBD", -@@ -911,8 +911,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x41, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_mab_allocation_by_pipe), -- .umasks = amd64_fam17h_mab_allocation_by_pipe, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_mab_allocation_by_pipe), -+ .umasks = amd64_fam17h_zen1_mab_allocation_by_pipe, - }, - { .name = "MISALIGNED_LOADS", - .desc = "TBD", -@@ -934,8 +934,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x4b, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_prefetch_instructions_dispatched), -- .umasks = amd64_fam17h_prefetch_instructions_dispatched, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_prefetch_instructions_dispatched), -+ .umasks = amd64_fam17h_zen1_prefetch_instructions_dispatched, - }, - { .name = "STORE_TO_LOAD_FORWARD", - .desc = "Number of STore Lad Forward hits.", -@@ -950,8 +950,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x46, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_tablewalker_allocation), -- .umasks = amd64_fam17h_tablewalker_allocation, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_tablewalker_allocation), -+ .umasks = amd64_fam17h_zen1_tablewalker_allocation, - }, - { .name = "L1_BTB_CORRECTION", - .desc = "TBD", -@@ -973,8 +973,8 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0x28a, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_oc_mode_switch), -- .umasks = amd64_fam17h_oc_mode_switch, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_oc_mode_switch), -+ .umasks = amd64_fam17h_zen1_oc_mode_switch, - }, - { .name = "DYNAMIC_TOKENS_DISPATCH_STALLS_CYCLES_0", - .desc = "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", -@@ -982,7 +982,7 @@ static const amd64_entry_t amd64_fam17h_pe[]={ - .code = 0xaf, - .flags = 0, - .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_dynamic_tokens_dispatch_stall_cycles_0), -- .umasks = amd64_fam17h_dynamic_tokens_dispatch_stall_cycles_0, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_dynamic_tokens_dispatch_stall_cycles_0), -+ .umasks = amd64_fam17h_zen1_dynamic_tokens_dispatch_stall_cycles_0, - }, - }; -diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c -index a701d78..29efd08 100644 ---- a/lib/pfmlib_amd64.c -+++ b/lib/pfmlib_amd64.c -@@ -174,7 +174,7 @@ amd64_get_revision(pfm_amd64_config_t *cfg) - } else if (cfg->family == 21) { /* family 15h */ - rev = PFM_PMU_AMD64_FAM15H_INTERLAGOS; - } else if (cfg->family == 23) { /* family 17h */ -- rev = PFM_PMU_AMD64_FAM17H; -+ rev = PFM_PMU_AMD64_FAM17H_ZEN1; - } else if (cfg->family == 22) { /* family 16h */ - rev = PFM_PMU_AMD64_FAM16H; - } -diff --git a/lib/pfmlib_amd64_fam17h.c b/lib/pfmlib_amd64_fam17h.c -index 443b25f..ea67f79 100644 ---- a/lib/pfmlib_amd64_fam17h.c -+++ b/lib/pfmlib_amd64_fam17h.c -@@ -26,22 +26,70 @@ - /* private headers */ - #include "pfmlib_priv.h" - #include "pfmlib_amd64_priv.h" --#include "events/amd64_events_fam17h.h" -+#include "events/amd64_events_fam17h_zen1.h" - --pfmlib_pmu_t amd64_fam17h_support={ -- .desc = "AMD64 Fam17h Zen", -+/* -+ * This function detects ZEN1 for the deprecated -+ * amd_fam17h pmu model name. -+ */ -+static int -+pfm_amd64_family_detect_zen1(void *this) -+{ -+ int ret, rev; -+ -+ ret = pfm_amd64_detect(this); -+ if (ret != PFM_SUCCESS) -+ return ret; -+ -+ rev = pfm_amd64_cfg.revision; -+ return rev == PFM_PMU_AMD64_FAM17H_ZEN1 ? PFM_SUCCESS: PFM_ERR_NOTSUPP; -+} -+ -+/* -+ * Deprecated PMU model, kept here for backward compatibility. -+ * Should use amd_fam17h_zen1 instead. -+ */ -+pfmlib_pmu_t amd64_fam17h_deprecated_support={ -+ .desc = "AMD64 Fam17h Zen1 (deprecated - use amd_fam17_zen1 instead)", - .name = "amd64_fam17h", - .pmu = PFM_PMU_AMD64_FAM17H, - .pmu_rev = 0, -- .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam17h_pe), -+ .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_pe), - .type = PFM_PMU_TYPE_CORE, - .supported_plm = AMD64_FAM10H_PLM, - .num_cntrs = 6, - .max_encoding = 1, -- .pe = amd64_fam17h_pe, -+ .pe = amd64_fam17h_zen1_pe, - .atdesc = amd64_mods, -- .flags = PFMLIB_PMU_FL_RAW_UMASK, -+ .flags = PFMLIB_PMU_FL_RAW_UMASK | PFMLIB_PMU_FL_DEPR, - .cpu_family = PFM_PMU_AMD64_FAM17H, -+ .pmu_detect = pfm_amd64_family_detect_zen1, -+ .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding, -+ PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), -+ .get_event_first = pfm_amd64_get_event_first, -+ .get_event_next = pfm_amd64_get_event_next, -+ .event_is_valid = pfm_amd64_event_is_valid, -+ .validate_table = pfm_amd64_validate_table, -+ .get_event_info = pfm_amd64_get_event_info, -+ .get_event_attr_info = pfm_amd64_get_event_attr_info, -+ PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs), -+ .get_event_nattrs = pfm_amd64_get_event_nattrs, -+}; -+ -+pfmlib_pmu_t amd64_fam17h_zen1_support={ -+ .desc = "AMD64 Fam17h Zen1", -+ .name = "amd64_fam17h_zen1", -+ .pmu = PFM_PMU_AMD64_FAM17H_ZEN1, -+ .pmu_rev = 0, -+ .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_pe), -+ .type = PFM_PMU_TYPE_CORE, -+ .supported_plm = AMD64_FAM10H_PLM, -+ .num_cntrs = 6, -+ .max_encoding = 1, -+ .pe = amd64_fam17h_zen1_pe, -+ .atdesc = amd64_mods, -+ .flags = PFMLIB_PMU_FL_RAW_UMASK, -+ .cpu_family = PFM_PMU_AMD64_FAM17H_ZEN1, - .pmu_detect = pfm_amd64_family_detect, - .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding, - PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), -diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c -index 31d16e9..3f2d689 100644 ---- a/lib/pfmlib_common.c -+++ b/lib/pfmlib_common.c -@@ -78,7 +78,8 @@ static pfmlib_pmu_t *pfmlib_pmus[]= - &amd64_fam15h_interlagos_support, - &amd64_fam15h_nb_support, - &amd64_fam16h_support, -- &amd64_fam17h_support, -+ &amd64_fam17h_deprecated_support, -+ &amd64_fam17h_zen1_support, - &intel_core_support, - &intel_atom_support, - &intel_nhm_support, -diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h -index 5cddc9c..ea618ae 100644 ---- a/lib/pfmlib_priv.h -+++ b/lib/pfmlib_priv.h -@@ -248,7 +248,8 @@ extern pfmlib_pmu_t amd64_fam14h_bobcat_support; - extern pfmlib_pmu_t amd64_fam15h_interlagos_support; - extern pfmlib_pmu_t amd64_fam15h_nb_support; - extern pfmlib_pmu_t amd64_fam16h_support; --extern pfmlib_pmu_t amd64_fam17h_support; -+extern pfmlib_pmu_t amd64_fam17h_deprecated_support; -+extern pfmlib_pmu_t amd64_fam17h_zen1_support; - extern pfmlib_pmu_t intel_p6_support; - extern pfmlib_pmu_t intel_ppro_support; - extern pfmlib_pmu_t intel_pii_support; -diff --git a/tests/validate_x86.c b/tests/validate_x86.c -index b32cf22..af5f664 100644 ---- a/tests/validate_x86.c -+++ b/tests/validate_x86.c -@@ -5641,6 +5641,30 @@ static const test_event_t x86_test_events[]={ - .codes[0] = 0x510845ull, - .fstr = "amd64_fam17h::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:k=0:u=1:e=0:i=0:c=0:h=0:g=0", - }, -+ { SRC_LINE, -+ .name = "amd64_fam17h_zen1::retired_uops", -+ .count = 1, -+ .codes[0] = 0x5300c1ull, -+ .fstr = "amd64_fam17h_zen1::RETIRED_UOPS:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam17h_zen1::cycles_not_in_halt", -+ .count = 1, -+ .codes[0] = 0x530076ull, -+ .fstr = "amd64_fam17h_zen1::CYCLES_NOT_IN_HALT:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam17h_zen1::locks:spec_lock", -+ .count = 1, -+ .codes[0] = 0x530425ull, -+ .fstr = "amd64_fam17h_zen1::LOCKS:SPEC_LOCK:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam17h::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:u", -+ .count = 1, -+ .codes[0] = 0x510845ull, -+ .fstr = "amd64_fam17h::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:k=0:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, - { SRC_LINE, - .name = "amd64_fam16h::RETIRED_INSTRUCTIONS", - .ret = PFM_SUCCESS, - -commit ddafe9d7c187b48950095c283435f47b59b953e9 -Author: Stephane Eranian -Date: Tue Dec 17 22:39:53 2019 -0800 - - add AMD Fam17h Zen2 core PMU support - - This patch adds AMD Fam17h Zen2 core PMU support. This is based on the - public specifications PPR (#55803) Rev 0.54 - Sep 12, 2019. - - Available at: https://developer.amd.com/wp-content/resources/55803_0.54-PUB.pdf - - The patch adds a new PMU model: amd_fam17_zen2 - The amd_fam17h is still assigned to zen1. - - Signed-off-by: Stephane Eranian - -diff --git a/README b/README -index b588353..55efedf 100644 ---- a/README -+++ b/README -@@ -40,6 +40,7 @@ The library supports many PMUs. The current version can handle: - AMD64 Fam15h (Bulldozer) (core and uncore) - AMD64 Fam16h (Jaguar) - AMD64 Fam17h (Zen1) -+ AMD64 Fam17h (Zen2) - - - For Intel X86: - Intel P6 (Pentium II, Pentium Pro, Pentium III, Pentium M) -diff --git a/docs/Makefile b/docs/Makefile -index 4862d22..1c57881 100644 ---- a/docs/Makefile -+++ b/docs/Makefile -@@ -40,6 +40,7 @@ ARCH_MAN=libpfm_intel_core.3 \ - libpfm_amd64_fam15h.3 \ - libpfm_amd64_fam16h.3 \ - libpfm_amd64_fam17h.3 \ -+ libpfm_amd64_fam17h_zen2.3 \ - libpfm_intel_atom.3 \ - libpfm_intel_nhm.3 \ - libpfm_intel_nhm_unc.3 \ -diff --git a/docs/man3/libpfm_amd64_fam17h_zen2.3 b/docs/man3/libpfm_amd64_fam17h_zen2.3 -new file mode 100644 -index 0000000..05a14f6 ---- /dev/null -+++ b/docs/man3/libpfm_amd64_fam17h_zen2.3 -@@ -0,0 +1,49 @@ -+.TH LIBPFM 3 "December, 2019" "" "Linux Programmer's Manual" -+.SH NAME -+libpfm_amd64_fam17h_zen2 - support for AMD64 Family 17h model 31h processors -+.SH SYNOPSIS -+.nf -+.B #include -+.sp -+.B PMU name: amd64_fam17h_zen2 -+.B PMU desc: AMD64 Fam17h Zen2 -+.sp -+.SH DESCRIPTION -+The library supports AMD Family 17h processors Zen2 core PMU in both 32 and 64-bit modes. -+ -+.SH MODIFIERS -+The following modifiers are supported on AMD64 Family 17h Zen2 core PMU: -+.TP -+.B u -+Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. -+This is a boolean modifier. -+.TP -+.B k -+Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. -+This is a boolean modifier. -+.TP -+.B h -+Measure at while executing in host mode (when using virtualization). This corresponds to \fBPFM_PLMH\fR. -+This modifier is available starting with Fam10h. This is a boolean modifier. -+.TP -+.B g -+Measure at while executing in guest mode (when using virtualization). This modifier is available -+starting with Fam10h. This is a boolean modifier. -+.TP -+.B i -+Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR -+occurring. This is a boolean modifier -+.TP -+.B e -+Enable edge detection, i.e., count only when there is a state transition. This is a boolean modifier. -+.TP -+.B c -+Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles -+in which the number of occurrences of the event is greater or equal to the threshold. This is an integer -+modifier with values in the range [0:255]. -+ -+.SH AUTHORS -+.nf -+Stephane Eranian -+.if -+.PP -diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h -index c8dc719..c214f17 100644 ---- a/include/perfmon/pfmlib.h -+++ b/include/perfmon/pfmlib.h -@@ -555,6 +555,7 @@ typedef enum { - PFM_PMU_ARM_A64FX, /* Fujitsu A64FX processor */ - - PFM_PMU_AMD64_FAM17H_ZEN1, /* AMD AMD64 Fam17h Zen1 */ -+ PFM_PMU_AMD64_FAM17H_ZEN2, /* AMD AMD64 Fam17h Zen2 */ - /* MUST ADD NEW PMU MODELS HERE */ - - PFM_PMU_MAX /* end marker */ -diff --git a/lib/Makefile b/lib/Makefile -index 9610cc5..0556967 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -249,6 +249,7 @@ INC_X86= pfmlib_intel_x86_priv.h \ - events/amd64_events_fam14h.h \ - events/amd64_events_fam15h.h \ - events/amd64_events_fam17h_zen1.h \ -+ events/amd64_events_fam17h_zen2.h \ - events/amd64_events_fam16h.h \ - events/intel_p6_events.h \ - events/intel_netburst_events.h \ -diff --git a/lib/events/amd64_events_fam17h_zen2.h b/lib/events/amd64_events_fam17h_zen2.h -new file mode 100644 -index 0000000..87dfff0 ---- /dev/null -+++ b/lib/events/amd64_events_fam17h_zen2.h -@@ -0,0 +1,1056 @@ -+/* -+ * Contributed by Stephane Eranian -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -+ * of the Software, and to permit persons to whom the Software is furnished to do so, -+ * subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in all -+ * copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ * This file is part of libpfm, a performance monitoring support library for -+ * applications on Linux. -+ * -+ * PMU: amd64_fam17h_zen2_zen2 (AMD64 Fam17h Zen2)) -+ */ -+ -+static const amd64_umask_t amd64_fam17h_zen2_l1_itlb_miss_l2_itlb_miss[]={ -+ { .uname = "IF1G", -+ .udesc = "Number of instruction fetches to a 1GB page", -+ .ucode = 0x4, -+ }, -+ { .uname = "IF2M", -+ .udesc = "Number of instruction fetches to a 2MB page", -+ .ucode = 0x2, -+ }, -+ { .uname = "IF4K", -+ .udesc = "Number of instruction fetches to a 4KB page", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_itlb_fetch_hit[]={ -+ { .uname = "IF1G", -+ .udesc = "L1 instruction fetch that hit a 1GB page.", -+ .ucode = 0x4, -+ }, -+ { .uname = "IF2M", -+ .udesc = "L1 instruction fetch that hit a 2MB page.", -+ .ucode = 0x2, -+ }, -+ { .uname = "IF4K", -+ .udesc = "L1 instruction fetch that hit a 4KB page.", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_retired_mmx_fp_instructions[]={ -+ { .uname = "SSE_INSTR", -+ .udesc = "Number of SSE instructions (SSE, SSE2, SSE3, SSE$, SSE4A, SSE41, SSE42, AVX).", -+ .ucode = 0x4, -+ }, -+ { .uname = "MMX_INSTR", -+ .udesc = "Number of MMX instructions.", -+ .ucode = 0x2, -+ }, -+ { .uname = "X87_INSTR", -+ .udesc = "Number of X87 instructions.", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_tagged_ibs_ops[]={ -+ { .uname = "IBS_COUNT_ROLLOVER", -+ .udesc = "Number of times a uop could not be tagged by IBS because of a previous tagged uop that has not retired.", -+ .ucode = 0x4, -+ }, -+ { .uname = "IBS_TAGGED_OPS_RET", -+ .udesc = "Number of uops tagged by IBS that retired.", -+ .ucode = 0x2, -+ }, -+ { .uname = "IBS_TAGGED_OPS", -+ .udesc = "Number of uops tagged by IBS.", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_number_of_move_elimination_and_scalar_op_optimization[]={ -+ { .uname = "OPTIMIZED", -+ .udesc = "Number of scalar ops optimized.", -+ .ucode = 0x8, -+ }, -+ { .uname = "OPT_POTENTIAL", -+ .udesc = "Number of ops that are candidates for optimization (have z-bit either set or pass.", -+ .ucode = 0x4, -+ }, -+ { .uname = "SSE_MOV_OPS_ELIM", -+ .udesc = "Number of SSE move ops eliminated.", -+ .ucode = 0x2, -+ }, -+ { .uname = "SSE_MOV_OPS", -+ .udesc = "Number of SSE move ops.", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_retired_sse_avx_operations[]={ -+ { .uname = "MAC_FLOPS", -+ .udesc = "Mac flops. MAC FLOPS count as 2 FLOPS.", -+ .ucode = 0x8, -+ }, -+ { .uname = "DIV_FLOPS", -+ .udesc = "Divide/square root flops.", -+ .ucode = 0x4, -+ }, -+ { .uname = "MULT_FLOPS", -+ .udesc = "Multiply flops.", -+ .ucode = 0x2, -+ }, -+ { .uname = "ADD_SUB_FLOPS", -+ .udesc = "Add/subtract flops.", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_retired_serializing_ops[]={ -+ { .uname = "X87_CTRL_RET", -+ .udesc = "X87 control word mispredict traps due to mispredction in RC or PC, or changes in mask bits.", -+ .ucode = 0x1, -+ }, -+ { .uname = "X87_BOT_RET", -+ .udesc = "X87 bottom-executing uops retired.", -+ .ucode = 0x2, -+ }, -+ { .uname = "SSE_CTRL_RET", -+ .udesc = "SSE control word mispreduct traps due to mispredctions in RC, FTZ or DAZ or changes in mask bits.", -+ .ucode = 0x4, -+ }, -+ { .uname = "SSE_BOT_RET", -+ .udesc = "SSE bottom-executing uops retired.", -+ .ucode = 0x8, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_fp_dispatch_faults[]={ -+ { .uname = "X87_FULL_FAULT", -+ .udesc = "X87 fill faults", -+ .ucode = 0x1, -+ }, -+ { .uname = "XMM_FILL_FAULT", -+ .udesc = "XMM fill faults", -+ .ucode = 0x2, -+ }, -+ { .uname = "YMM_FILL_FAULT", -+ .udesc = "YMM fill faults", -+ .ucode = 0x4, -+ }, -+ { .uname = "YMM_SPILL_FAULT", -+ .udesc = "YMM spill faults", -+ .ucode = 0x8, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_retired_x87_floating_point_operations[]={ -+ { .uname = "DIV_SQR_R_OPS", -+ .udesc = "Divide and square root ops", -+ .ucode = 0x4, -+ }, -+ { .uname = "MUL_OPS", -+ .udesc = "Multiple ops", -+ .ucode = 0x2, -+ }, -+ { .uname = "ADD_SUB_OPS", -+ .udesc = "Add/subtract ops", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_fpu_pipe_assignment[]={ -+ { .uname = "DUAL3", -+ .udesc = "Total number of multi-pipe uops assigned to pipe3", -+ .ucode = 0x80, -+ }, -+ { .uname = "DUAL2", -+ .udesc = "Total number of multi-pipe uops assigned to pipe2", -+ .ucode = 0x40, -+ }, -+ { .uname = "DUAL1", -+ .udesc = "Total number of multi-pipe uops assigned to pipe1", -+ .ucode = 0x20, -+ }, -+ { .uname = "DUAL0", -+ .udesc = "Total number of multi-pipe uops assigned to pipe0", -+ .ucode = 0x10, -+ }, -+ { .uname = "TOTAL3", -+ .udesc = "Total number of uops assigned to pipe3", -+ .ucode = 0x8, -+ }, -+ { .uname = "TOTAL2", -+ .udesc = "Total number of uops assigned to pipe2", -+ .ucode = 0x4, -+ }, -+ { .uname = "TOTAL1", -+ .udesc = "Total number of uops assigned to pipe1", -+ .ucode = 0x2, -+ }, -+ { .uname = "TOTAL0", -+ .udesc = "Total number of uops assigned to pipe0", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_instruction_cache_lines_invalidated[]={ -+ { .uname = "L2_INVALIDATING_PROBE", -+ .udesc = "IC line invalidated due to L2 invalidating probe (external or LS).", -+ .ucode = 0x2, -+ }, -+ { .uname = "FILL_INVALIDATED", -+ .udesc = "IC line invalidated due to overwriting fill response.", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_instruction_pipe_stall[]={ -+ { .uname = "IC_STALL_ANY", -+ .udesc = "IC pipe was stalled during this clock cycle for any reason (nothing valud in pipe ICM1).", -+ .ucode = 0x4, -+ }, -+ { .uname = "IC_STALL_DQ_EMPTY", -+ .udesc = "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", -+ .ucode = 0x2, -+ }, -+ { .uname = "IC_STALL_BACK_PRESSURE", -+ .udesc = "IC pipe was stalled during this clock cycle (ncluding IC to OC fetches) due to back pressure.", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_core_to_l2_cacheable_request_access_status[]={ -+ { .uname = "LS_RD_BLK_C_S", -+ .udesc = "Number of data cache shared read hitting in the L2.", -+ .ucode = 0x80, -+ }, -+ { .uname = "LS_RD_BLK_L_HIT_X", -+ .udesc = "Number of data cache reads hitting in the L2.", -+ .ucode = 0x40, -+ }, -+ { .uname = "LS_RD_BLK_L_HIT_S", -+ .udesc = "Number of data cache reads hitting a shared in line in the L2.", -+ .ucode = 0x20, -+ }, -+ { .uname = "LS_RD_BLK_X", -+ .udesc = "Number of data cache store or state change (to exclusive) requests hitting in the L2.", -+ .ucode = 0x10, -+ }, -+ { .uname = "LS_RD_BLK_C", -+ .udesc = "Number of data cache fill requests missing in the L2 (all types).", -+ .ucode = 0x8, -+ }, -+ { .uname = "IC_FILL_HIT_X", -+ .udesc = "Number of I-cache fill requests hitting a modifiable (exclusive) line in the L2.", -+ .ucode = 0x4, -+ }, -+ { .uname = "IC_FILL_HIT_S", -+ .udesc = "Number of I-cache fill requests hitting a clean line in the L2.", -+ .ucode = 0x2, -+ }, -+ { .uname = "IC_FILL_MISS", -+ .udesc = "Number of I-cache fill requests missing the L2.", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_l2_prefetch_hit_l2[]={ -+ { .uname = "ANY", -+ .udesc = "Any L2 prefetch requests", -+ .ucode = 0x1f, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_cycles_with_fill_pending_from_l2[]={ -+ { .uname = "L2_FILL_BUSY", -+ .udesc = "TBD", -+ .ucode = 0x1, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_l2_latency[]={ -+ { .uname = "L2_CYCLES_WAITING_ON_FILLS", -+ .udesc = "TBD", -+ .ucode = 0x1, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_requests_to_l2_group1[]={ -+ { .uname = "RD_BLK_L", -+ .udesc = "Number of data cache reads (including software and hardware prefetches).", -+ .ucode = 0x80, -+ }, -+ { .uname = "RD_BLK_X", -+ .udesc = "Number of data cache stores", -+ .ucode = 0x40, -+ }, -+ { .uname = "LS_RD_BLK_C_S", -+ .udesc = "Number of data cache shared reads.", -+ .ucode = 0x20, -+ }, -+ { .uname = "CACHEABLE_IC_READ", -+ .udesc = "Number of instruction cache reads.", -+ .ucode = 0x10, -+ }, -+ { .uname = "CHANGE_TO_X", -+ .udesc = "Number of requests change to writable. Check L2 for current state.", -+ .ucode = 0x8, -+ }, -+ { .uname = "PREFETCH_L2", -+ .udesc = "TBD", -+ .ucode = 0x4, -+ }, -+ { .uname = "L2_HW_PF", -+ .udesc = "Number of prefetches accepted by L2 pipeline, hit or miss.", -+ .ucode = 0x2, -+ }, -+ { .uname = "GROUP2", -+ .udesc = "Number of miscellaneous requests covered in more details by REQUESTS_TO_L2_GROUP1", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_requests_to_l2_group2[]={ -+ { .uname = "GROUP1", -+ .udesc = "Number of miscellaneous requests covered in more details by REQUESTS_TO_L2_GROUP2", -+ .ucode = 0x80, -+ }, -+ { .uname = "LS_RD_SIZED", -+ .udesc = "Number of data cache reads sized.", -+ .ucode = 0x40, -+ }, -+ { .uname = "LS_RD_SIZED_N_C", -+ .udesc = "Number of data cache reads sized non-cacheable.", -+ .ucode = 0x20, -+ }, -+ { .uname = "IC_RD_SIZED", -+ .udesc = "Number of instruction cache reads sized.", -+ .ucode = 0x10, -+ }, -+ { .uname = "IC_RD_SIZED_N_C", -+ .udesc = "Number of instruction cache reads sized non-cacheable.", -+ .ucode = 0x8, -+ }, -+ { .uname = "SMC_INVAL", -+ .udesc = "Number of self-modifying code invalidates.", -+ .ucode = 0x4, -+ }, -+ { .uname = "BUS_LOCKS_ORIGINATOR", -+ .udesc = "Number of bus locks.", -+ .ucode = 0x2, -+ }, -+ { .uname = "BUS_LOCKS_RESPONSES", -+ .udesc = "Number of bus lock responses.", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_ls_to_l2_wbc_requests[]={ -+ { .uname = "WCB_WRITE", -+ .udesc = "TBD", -+ .ucode = 0x40, -+ }, -+ { .uname = "WCB_CLOSE", -+ .udesc = "TBD", -+ .ucode = 0x20, -+ }, -+ { .uname = "CACHE_LINE_FLUSH", -+ .udesc = "TBD", -+ .ucode = 0x10, -+ }, -+ { .uname = "I_LINE_FLUSH", -+ .udesc = "TBD", -+ .ucode = 0x8, -+ }, -+ { .uname = "ZERO_BYTE_STORE", -+ .udesc = "TBD", -+ .ucode = 0x4, -+ }, -+ { .uname = "LOCAL_IC_CLR", -+ .udesc = "TBD", -+ .ucode = 0x2, -+ }, -+ { .uname = "C_L_ZERO", -+ .udesc = "TBD", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_bad_status_2[]={ -+ { .uname = "STLI_OTHER", -+ .udesc = "Store-to-load conflicts. A load was unable to complete due to a non-forwardable conflict with an older store.", -+ .ucode = 0x2, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_retired_lock_instructions[]={ -+ { .uname = "CACHEABLE_LOCKS", -+ .udesc = "Lock in cacheable memory region.", -+ .ucode = 0xe, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_tlb_flushes[]={ -+ { .uname = "ANY", -+ .udesc = "ANY TLB flush.", -+ .ucode = 0xff, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_ls_dispatch[]={ -+ { .uname = "LD_ST_DISPATCH", -+ .udesc = "Load/Store single uops dispatched (compare-and-exchange).", -+ .ucode = 0x4, -+ }, -+ { .uname = "STORE_DISPATCH", -+ .udesc = "Store uops dispatched.", -+ .ucode = 0x2, -+ }, -+ { .uname = "LD_DISPATCH", -+ .udesc = "Load uops dispatched.", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_ineffective_software_prefetch[]={ -+ { .uname = "MAB_MCH_CNT", -+ .udesc = "Software prefetch instructions saw a match on an already allocated miss request buffer.", -+ .ucode = 0x2, -+ }, -+ { .uname = "DATA_PIPE_SW_PF_DC_HIT", -+ .udesc = "Software Prefetch instruction saw a DC hit", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_software_prefetch_data_cache_fills[]={ -+ { .uname = "MABRESP_LCL_L2", -+ .udesc = "Fill from local L2.", -+ .ucode = 0x1, -+ }, -+ { .uname = "LS_MABRESP_LCL_CACHE", -+ .udesc = "Fill from another cache (home node local).", -+ .ucode = 0x2, -+ }, -+ { .uname = "LS_MABRESP_LCL_DRAM", -+ .udesc = "Fill from DRAM (home node local).", -+ .ucode = 0x8, -+ }, -+ { .uname = "LS_MABRESP_LCL_RMT_CACHE", -+ .udesc = "Fill from another cache (home node remote).", -+ .ucode = 0x10, -+ }, -+ { .uname = "LS_MABRESP_LCL_RMT_DRAM", -+ .udesc = "Fill from DRAM (home node remote).", -+ .ucode = 0x40, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_store_commit_cancels_2[]={ -+ { .uname = "WCB_FULL", -+ .udesc = "Non cacheable store and the non-cacheable commit buffer is full.", -+ .ucode = 0x1, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_l1_dtlb_miss[]={ -+ { .uname = "TLB_RELOAD_1G_L2_MISS", -+ .udesc = "Data TLB reload to a 1GB page that missed in the L2 TLB", -+ .ucode = 0x80, -+ }, -+ { .uname = "TLB_RELOAD_2M_L2_MISS", -+ .udesc = "Data TLB reload to a 2MB page that missed in the L2 TLB", -+ .ucode = 0x40, -+ }, -+ { .uname = "TLB_RELOAD_COALESCED_PAGE_MISS", -+ .udesc = "Data TLB reload to coalesced pages that missed", -+ .ucode = 0x20, -+ }, -+ { .uname = "TLB_RELOAD_4K_L2_MISS", -+ .udesc = "Data TLB reload to a 4KB page that missed in the L2 TLB", -+ .ucode = 0x10, -+ }, -+ { .uname = "TLB_RELOAD_1G_L2_HIT", -+ .udesc = "Data TLB reload to a 1GB page that hit in the L2 TLB", -+ .ucode = 0x8, -+ }, -+ { .uname = "TLB_RELOAD_2M_L2_HIT", -+ .udesc = "Data TLB reload to a 2MB page that hit in the L2 TLB", -+ .ucode = 0x4, -+ }, -+ { .uname = "TLB_RELOAD_COALESCED_PAGE_HIT", -+ .udesc = "Data TLB reload to coalesced pages that hit", -+ .ucode = 0x2, -+ }, -+ { .uname = "TLB_RELOAD_4K_L2_HIT", -+ .udesc = "Data TLB reload to a 4KB page thta hit in the L2 TLB", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_locks[]={ -+ { .uname = "SPEC_LOCK_MAP_COMMIT", -+ .udesc = "TBD", -+ .ucode = 0x8, -+ }, -+ { .uname = "SPEC_LOCK", -+ .udesc = "TBD", -+ .ucode = 0x4, -+ }, -+ { .uname = "NON_SPEC_LOCK", -+ .udesc = "TBD", -+ .ucode = 0x2, -+ }, -+ { .uname = "BUS_LOCK", -+ .udesc = "TBD", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_mab_allocation_by_pipe[]={ -+ { .uname = "TLB_PIPE_EARLY", -+ .udesc = "TBD", -+ .ucode = 0x10, -+ }, -+ { .uname = "HW_PF", -+ .udesc = "hw_pf", -+ .ucode = 0x8, -+ }, -+ { .uname = "TLB_PIPE_LATE", -+ .udesc = "TBD", -+ .ucode = 0x4, -+ }, -+ { .uname = "ST_PIPE", -+ .udesc = "TBD", -+ .ucode = 0x2, -+ }, -+ { .uname = "DATA_PIPE", -+ .udesc = "TBD", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_prefetch_instructions_dispatched[]={ -+ { .uname = "ANY", -+ .udesc = "Any prefetch", -+ .ucode = 0xff, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_tablewalker_allocation[]={ -+ { .uname = "ALLOC_ISIDE1", -+ .udesc = "TBD", -+ .ucode = 0x8, -+ }, -+ { .uname = "ALLOC_ISIDE0", -+ .udesc = "TBD", -+ .ucode = 0x4, -+ }, -+ { .uname = "ALLOC_DSIDE1", -+ .udesc = "TBD", -+ .ucode = 0x2, -+ }, -+ { .uname = "ALLOC_DSIDE0", -+ .udesc = "TBD", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_oc_mode_switch[]={ -+ { .uname = "OC_IC_MODE_SWITCH", -+ .udesc = "TBD", -+ .ucode = 0x2, -+ }, -+ { .uname = "IC_OC_MODE_SWITCH", -+ .udesc = "TBD", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_uops_dispatched_from_decoder[]={ -+ { .uname = "DECODER_DISPATCHED", -+ .udesc = "Number of uops dispatched from the Decoder", -+ .ucode = 0x1, -+ }, -+ { .uname = "OPCACHE_DISPATCHED", -+ .udesc = "Number of uops dispatched from the OpCache", -+ .ucode = 0x2, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_dispatch_resource_stall_cycles_1[]={ -+ { .uname = "INT_PHY_REG_FILE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to integer physical register file resource stalls. Applies to all uops that have integer destination register.", -+ .ucode = 0x1, -+ }, -+ { .uname = "LOAD_QUEUE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to load queue resource stalls. Applies to all uops with load semantics.", -+ .ucode = 0x2, -+ }, -+ { .uname = "STORE_QUEUE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to store queue resource stalls. Applies to all uops with store semantics.", -+ .ucode = 0x4, -+ }, -+ { .uname = "INT_SCHEDULER_MISC_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to integer scheduler miscellaneous resource stalls.", -+ .ucode = 0x8, -+ }, -+ { .uname = "TAKEN_BRANCH_BUFFER_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to taken branch buffer resource stalls.", -+ .ucode = 0x10, -+ }, -+ { .uname = "FP_REG_FILE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to floating-point register file resource stalls.", -+ .ucode = 0x20, -+ }, -+ { .uname = "FP_SCHEDULER_FILE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to floating-point scheduler resource stalls.", -+ .ucode = 0x40, -+ }, -+ { .uname = "FP_MISC_FILE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to floating-point miscellaneous resource unavailable.", -+ .ucode = 0x80, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen2_dispatch_resource_stall_cycles_0[]={ -+ { .uname = "ALU_TOKEN_STALL", -+ .udesc = "Number of cycles ALU tokens total unavailable.", -+ .ucode = 0x8, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_entry_t amd64_fam17h_zen2_pe[]={ -+ { .name = "L1_ITLB_MISS_L2_ITLB_HIT", -+ .desc = "Number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x84, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "L1_ITLB_MISS_L2_ITLB_MISS", -+ .desc = "Number of instruction fetches that miss in both the L1 and L2 TLBs.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x85, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_l1_itlb_miss_l2_itlb_miss), -+ .umasks = amd64_fam17h_zen2_l1_itlb_miss_l2_itlb_miss, -+ }, -+ { .name = "DIV_CYCLES_BUSY_COUNT", -+ .desc = "Number of cycles when the divider is busy.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xd3, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "DIV_OP_COUNT", -+ .desc = "Number of divide uops.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xd4, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_BRANCH_INSTRUCTIONS", -+ .desc = "Number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xc2, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_FAR_CONTROL_TRANSFERS", -+ .desc = "Number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xc6, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_INDIRECT_BRANCH_INSTRUCTIONS_MISPREDICTED", -+ .desc = "Number of indirect branches retired there were not correctly predicted. Each such mispredict incurs the same penalty as a mispredicted condition branch instruction. Only EX mispredicts are counted.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xca, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED", -+ .desc = "Number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts).", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xc3, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS", -+ .desc = "Number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xc4, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED", -+ .desc = "Number of retired taken branch instructions that were mispredicted.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xc5, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS", -+ .desc = "Number of retired conditional branch instructions.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xd1, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_UOPS", -+ .desc = "Number of uops retired. This includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 8.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xc1, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_FUSED_INSTRUCTIONS", -+ .desc = "Number of fused retired branch instructions retired per cycle. The number of events logged per cycle can vary from 0 to 3.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x1d0, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_INSTRUCTIONS", -+ .desc = "Instructions Retired.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xc0, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_MMX_FP_INSTRUCTIONS", -+ .desc = "Number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions, it is not suitable for measuring MFLOPS.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xcb, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_retired_mmx_fp_instructions), -+ .umasks = amd64_fam17h_zen2_retired_mmx_fp_instructions, -+ }, -+ { .name = "RETIRED_NEAR_RETURNS", -+ .desc = "Number of near return instructions (RET or RETI) retired.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xc8, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_NEAR_RETURNS_MISPREDICTED", -+ .desc = "Number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xc9, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "TAGGED_IBS_OPS", -+ .desc = "TBD", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x1cf, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_tagged_ibs_ops), -+ .umasks = amd64_fam17h_zen2_tagged_ibs_ops, -+ }, -+ { .name = "INSTRUCTION_CACHE_REFILLS_FROM_L2", -+ .desc = "Number of 64-byte instruction cachelines that was fulfilled by the L2 cache.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x82, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM", -+ .desc = "Number of 64-byte instruction cachelines fulfilled from system memory or another cache.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x83, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS", -+ .desc = "L2 cache request outcomes. This event does not count accesses to the L2 cache by the L2 prefetcher.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x64, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_core_to_l2_cacheable_request_access_status), -+ .umasks = amd64_fam17h_zen2_core_to_l2_cacheable_request_access_status, -+ }, -+ { .name = "L2_PREFETCH_HIT_L2", -+ .desc = "Number of L2 prefetcher hits in the L2", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x70, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_l2_prefetch_hit_l2), -+ .umasks = amd64_fam17h_zen2_l2_prefetch_hit_l2, -+ }, -+ { .name = "L2_PREFETCH_HIT_L3", -+ .desc = "Number of L2 prefetcher hits in the L3", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x71, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_l2_prefetch_hit_l2), -+ .umasks = amd64_fam17h_zen2_l2_prefetch_hit_l2, /* shared */ -+ }, -+ { .name = "REQUESTS_TO_L2_GROUP1", -+ .desc = "TBD", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x60, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_requests_to_l2_group1), -+ .umasks = amd64_fam17h_zen2_requests_to_l2_group1, -+ }, -+ { .name = "REQUESTS_TO_L2_GROUP2", -+ .desc = "Multi-events in that LS and IF requests can be received simultaneous.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x61, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_requests_to_l2_group2), -+ .umasks = amd64_fam17h_zen2_requests_to_l2_group2, -+ }, -+ { .name = "BAD_STATUS_2", -+ .desc = "TBD", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x24, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_bad_status_2), -+ .umasks = amd64_fam17h_zen2_bad_status_2, -+ }, -+ { .name = "LS_DISPATCH", -+ .desc = "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x29, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_ls_dispatch), -+ .umasks = amd64_fam17h_zen2_ls_dispatch, -+ }, -+ { .name = "INEFFECTIVE_SOFTWARE_PREFETCH", -+ .desc = "Number of software prefetches that did not fetch data outside of the processor core.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x52, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_ineffective_software_prefetch), -+ .umasks = amd64_fam17h_zen2_ineffective_software_prefetch, -+ }, -+ { .name = "SOFTWARE_PREFETCH_DATA_CACHE_FILLS", -+ .desc = "Number of software prefetches fills by data source", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x59, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_software_prefetch_data_cache_fills), -+ .umasks = amd64_fam17h_zen2_software_prefetch_data_cache_fills, -+ }, -+ { .name = "HARDWARE_PREFETCH_DATA_CACHE_FILLS", -+ .desc = "Number of hardware prefetches fills by data source", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x5a, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_software_prefetch_data_cache_fills), -+ .umasks = amd64_fam17h_zen2_software_prefetch_data_cache_fills, /* shared */ -+ }, -+ { .name = "L1_DTLB_MISS", -+ .desc = "L1 Data TLB misses.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x45, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_l1_dtlb_miss), -+ .umasks = amd64_fam17h_zen2_l1_dtlb_miss, -+ }, -+ { .name = "RETIRED_LOCK_INSTRUCTIONS", -+ .desc = "Counts the number of retired locked instructions", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x25, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_retired_lock_instructions), -+ .umasks = amd64_fam17h_zen2_retired_lock_instructions, -+ }, -+ { .name = "RETIRED_CLFLUSH_INSTRUCTIONS", -+ .desc = "Counts the number of retired non-speculative clflush instructions", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x26, -+ .flags = 0, -+ }, -+ { .name = "RETIRED_CPUID_INSTRUCTIONS", -+ .desc = "Counts the number of retired cpuid instructions", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x27, -+ .flags = 0, -+ }, -+ { .name = "SMI_RECEIVED", -+ .desc = "Counts the number system management interrupts (SMI) received", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x2b, -+ .flags = 0, -+ }, -+ { .name = "INTERRUPT_TAKEN", -+ .desc = "Counts the number of interrupts taken", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x2c, -+ .flags = 0, -+ }, -+ { .name = "MAB_ALLOCATION_BY_PIPE", -+ .desc = "TBD", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x41, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_mab_allocation_by_pipe), -+ .umasks = amd64_fam17h_zen2_mab_allocation_by_pipe, -+ }, -+ { .name = "MISALIGNED_LOADS", -+ .desc = "Misaligned loads retired", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x47, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "CYCLES_NOT_IN_HALT", -+ .desc = "Number of core cycles not in halted state", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x76, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "TLB_FLUSHES", -+ .desc = "Number of TLB flushes", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x78, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_tlb_flushes), -+ .umasks = amd64_fam17h_zen2_tlb_flushes, -+ }, -+ { .name = "PREFETCH_INSTRUCTIONS_DISPATCHED", -+ .desc = "Software Prefetch Instructions Dispatched. This is a speculative event", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x4b, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_prefetch_instructions_dispatched), -+ .umasks = amd64_fam17h_zen2_prefetch_instructions_dispatched, -+ }, -+ { .name = "STORE_TO_LOAD_FORWARD", -+ .desc = "Number of STore Lad Forward hits.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x35, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "STORE_COMMIT_CANCELS_2", -+ .desc = "Number of store commit cancellations", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x37, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_store_commit_cancels_2), -+ .umasks = amd64_fam17h_zen2_store_commit_cancels_2, -+ }, -+ { .name = "L1_BTB_CORRECTION", -+ .desc = "Number of L1 branch prediction overrides of existing prediction. This is a speculative event.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x8a, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "L2_BTB_CORRECTION", -+ .desc = "Number of L2 branch prediction overrides of existing prediction. This is a speculative event.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x8b, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "DYNAMIC_INDIRECT_PREDICTIONS", -+ .desc = "Number of indirect branch prediction for potential multi-target branch. This is a speculative event.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x8e, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "DECODER_OVERRIDE_BRANCH_PRED", -+ .desc = "Numbner of decoder overrides of existing brnach prediction. This is a speculative event.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x91, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "ITLB_FETCH_HIT", -+ .desc = "Instruction fetches that hit in the L1 ITLB", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x94, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_itlb_fetch_hit), -+ .umasks = amd64_fam17h_zen2_itlb_fetch_hit, -+ }, -+ { .name = "UOPS_QUEUE_EMPTY", -+ .desc = "Cycles where the uops queue is empty", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xa9, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "UOPS_DISPATCHED_FROM_DECODER", -+ .desc = "Number of uops dispatched from either the Decoder, OpCache or both", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xaa, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_uops_dispatched_from_decoder), -+ .umasks = amd64_fam17h_zen2_uops_dispatched_from_decoder, -+ }, -+ { .name = "DISPATCH_RESOURCE_STALL_CYCLES_1", -+ .desc = "Number of cycles where a dispatch group is valid but does not get dispatched due to a Token Stall", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xae, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_dispatch_resource_stall_cycles_1), -+ .umasks = amd64_fam17h_zen2_dispatch_resource_stall_cycles_1, -+ }, -+ { .name = "DISPATCH_RESOURCE_STALL_CYCLES_0", -+ .desc = "Number of cycles where a dispatch group is valid but does not get dispatched due to a Token Stall", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xaf, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_dispatch_resource_stall_cycles_0), -+ .umasks = amd64_fam17h_zen2_dispatch_resource_stall_cycles_0, -+ }, -+}; -diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c -index 29efd08..3d97c98 100644 ---- a/lib/pfmlib_amd64.c -+++ b/lib/pfmlib_amd64.c -@@ -174,7 +174,13 @@ amd64_get_revision(pfm_amd64_config_t *cfg) - } else if (cfg->family == 21) { /* family 15h */ - rev = PFM_PMU_AMD64_FAM15H_INTERLAGOS; - } else if (cfg->family == 23) { /* family 17h */ -- rev = PFM_PMU_AMD64_FAM17H_ZEN1; -+ switch (cfg->model) { -+ case 49: -+ rev = PFM_PMU_AMD64_FAM17H_ZEN2; -+ break; -+ default: -+ rev = PFM_PMU_AMD64_FAM17H_ZEN1; -+ } - } else if (cfg->family == 22) { /* family 16h */ - rev = PFM_PMU_AMD64_FAM16H; - } -diff --git a/lib/pfmlib_amd64_fam17h.c b/lib/pfmlib_amd64_fam17h.c -index ea67f79..f35af92 100644 ---- a/lib/pfmlib_amd64_fam17h.c -+++ b/lib/pfmlib_amd64_fam17h.c -@@ -27,6 +27,7 @@ - #include "pfmlib_priv.h" - #include "pfmlib_amd64_priv.h" - #include "events/amd64_events_fam17h_zen1.h" -+#include "events/amd64_events_fam17h_zen2.h" - - /* - * This function detects ZEN1 for the deprecated -@@ -102,3 +103,30 @@ pfmlib_pmu_t amd64_fam17h_zen1_support={ - PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs), - .get_event_nattrs = pfm_amd64_get_event_nattrs, - }; -+ -+pfmlib_pmu_t amd64_fam17h_zen2_support={ -+ .desc = "AMD64 Fam17h Zen2", -+ .name = "amd64_fam17h_zen2", -+ .pmu = PFM_PMU_AMD64_FAM17H_ZEN2, -+ .pmu_rev = 0, -+ .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_pe), -+ .type = PFM_PMU_TYPE_CORE, -+ .supported_plm = AMD64_FAM10H_PLM, -+ .num_cntrs = 6, -+ .max_encoding = 1, -+ .pe = amd64_fam17h_zen2_pe, -+ .atdesc = amd64_mods, -+ .flags = PFMLIB_PMU_FL_RAW_UMASK, -+ .cpu_family = PFM_PMU_AMD64_FAM17H_ZEN2, -+ .pmu_detect = pfm_amd64_family_detect, -+ .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding, -+ PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), -+ .get_event_first = pfm_amd64_get_event_first, -+ .get_event_next = pfm_amd64_get_event_next, -+ .event_is_valid = pfm_amd64_event_is_valid, -+ .validate_table = pfm_amd64_validate_table, -+ .get_event_info = pfm_amd64_get_event_info, -+ .get_event_attr_info = pfm_amd64_get_event_attr_info, -+ PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs), -+ .get_event_nattrs = pfm_amd64_get_event_nattrs, -+}; -diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c -index 3f2d689..19e2060 100644 ---- a/lib/pfmlib_common.c -+++ b/lib/pfmlib_common.c -@@ -80,6 +80,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= - &amd64_fam16h_support, - &amd64_fam17h_deprecated_support, - &amd64_fam17h_zen1_support, -+ &amd64_fam17h_zen2_support, - &intel_core_support, - &intel_atom_support, - &intel_nhm_support, -diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h -index ea618ae..75b935a 100644 ---- a/lib/pfmlib_priv.h -+++ b/lib/pfmlib_priv.h -@@ -250,6 +250,7 @@ extern pfmlib_pmu_t amd64_fam15h_nb_support; - extern pfmlib_pmu_t amd64_fam16h_support; - extern pfmlib_pmu_t amd64_fam17h_deprecated_support; - extern pfmlib_pmu_t amd64_fam17h_zen1_support; -+extern pfmlib_pmu_t amd64_fam17h_zen2_support; - extern pfmlib_pmu_t intel_p6_support; - extern pfmlib_pmu_t intel_ppro_support; - extern pfmlib_pmu_t intel_pii_support; -diff --git a/tests/validate_x86.c b/tests/validate_x86.c -index af5f664..36896df 100644 ---- a/tests/validate_x86.c -+++ b/tests/validate_x86.c -@@ -6932,6 +6932,36 @@ static const test_event_t x86_test_events[]={ - .codes[0] = 0x0825, - .fstr = "knm_unc_m2pcie::UNC_M2P_EGRESS_CYCLES_FULL:AD_1", - }, -+ { SRC_LINE, -+ .name = "amd64_fam17h_zen2::retired_uops", -+ .count = 1, -+ .codes[0] = 0x5300c1ull, -+ .fstr = "amd64_fam17h_zen2::RETIRED_UOPS:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam17h_zen2::cycles_not_in_halt", -+ .count = 1, -+ .codes[0] = 0x530076ull, -+ .fstr = "amd64_fam17h_zen2::CYCLES_NOT_IN_HALT:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam17h_zen2::L2_PREFETCH_HIT_L2", -+ .count = 1, -+ .codes[0] = 0x531f70ull, -+ .fstr = "amd64_fam17h_zen2::L2_PREFETCH_HIT_L2:ANY:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam17h_zen2::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:u", -+ .count = 1, -+ .codes[0] = 0x510845ull, -+ .fstr = "amd64_fam17h_zen2::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:k=0:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam17h_zen2::RETIRED_FUSED_INSTRUCTIONS", -+ .count = 1, -+ .codes[0] = 0x1005300d0ull, -+ .fstr = "amd64_fam17h_zen2::RETIRED_FUSED_INSTRUCTIONS:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, - }; - - #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) - -commit 9982ae5470c20caa716a243bee6bc98a2fba4bfe -Author: Stephane Eranian -Date: Fri Jan 3 08:27:14 2020 -0800 - - various AMD Fam17h fixes - - This patches fixes: - - typos in the new AMD Zen1/Zen2 support man page for libpfm_amd64_fam17.3 - - typos in lib/pfmlib_amd64_fam17.c - - obsolete umask_t arrays for AMD Zen2 event table - - Signed-off-by: Stephane Eranian - -diff --git a/docs/man3/libpfm_amd64_fam17h.3 b/docs/man3/libpfm_amd64_fam17h.3 -index 7925db2..5029669 100644 ---- a/docs/man3/libpfm_amd64_fam17h.3 -+++ b/docs/man3/libpfm_amd64_fam17h.3 -@@ -10,7 +10,7 @@ libpfm_amd64_fam17h - support for AMD64 Family 17h processors - .sp - .SH DESCRIPTION - The library supports AMD Family 17h processors Zen1 core PMU in both 32 and 64-bit modes. --The amd64_fam17h PMU model name has been deprecated in favor of amd_fam17_zen1. The old -+The amd64_fam17h PMU model name has been deprecated in favor of amd_fam17h_zen1. The old - name is maintained for backward compatibility reasons, but should not be used anymore. - - .SH MODIFIERS -diff --git a/lib/events/amd64_events_fam17h_zen2.h b/lib/events/amd64_events_fam17h_zen2.h -index 87dfff0..c832bce 100644 ---- a/lib/events/amd64_events_fam17h_zen2.h -+++ b/lib/events/amd64_events_fam17h_zen2.h -@@ -84,157 +84,6 @@ static const amd64_umask_t amd64_fam17h_zen2_tagged_ibs_ops[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_zen2_number_of_move_elimination_and_scalar_op_optimization[]={ -- { .uname = "OPTIMIZED", -- .udesc = "Number of scalar ops optimized.", -- .ucode = 0x8, -- }, -- { .uname = "OPT_POTENTIAL", -- .udesc = "Number of ops that are candidates for optimization (have z-bit either set or pass.", -- .ucode = 0x4, -- }, -- { .uname = "SSE_MOV_OPS_ELIM", -- .udesc = "Number of SSE move ops eliminated.", -- .ucode = 0x2, -- }, -- { .uname = "SSE_MOV_OPS", -- .udesc = "Number of SSE move ops.", -- .ucode = 0x1, -- }, --}; -- --static const amd64_umask_t amd64_fam17h_zen2_retired_sse_avx_operations[]={ -- { .uname = "MAC_FLOPS", -- .udesc = "Mac flops. MAC FLOPS count as 2 FLOPS.", -- .ucode = 0x8, -- }, -- { .uname = "DIV_FLOPS", -- .udesc = "Divide/square root flops.", -- .ucode = 0x4, -- }, -- { .uname = "MULT_FLOPS", -- .udesc = "Multiply flops.", -- .ucode = 0x2, -- }, -- { .uname = "ADD_SUB_FLOPS", -- .udesc = "Add/subtract flops.", -- .ucode = 0x1, -- }, --}; -- --static const amd64_umask_t amd64_fam17h_zen2_retired_serializing_ops[]={ -- { .uname = "X87_CTRL_RET", -- .udesc = "X87 control word mispredict traps due to mispredction in RC or PC, or changes in mask bits.", -- .ucode = 0x1, -- }, -- { .uname = "X87_BOT_RET", -- .udesc = "X87 bottom-executing uops retired.", -- .ucode = 0x2, -- }, -- { .uname = "SSE_CTRL_RET", -- .udesc = "SSE control word mispreduct traps due to mispredctions in RC, FTZ or DAZ or changes in mask bits.", -- .ucode = 0x4, -- }, -- { .uname = "SSE_BOT_RET", -- .udesc = "SSE bottom-executing uops retired.", -- .ucode = 0x8, -- }, --}; -- --static const amd64_umask_t amd64_fam17h_zen2_fp_dispatch_faults[]={ -- { .uname = "X87_FULL_FAULT", -- .udesc = "X87 fill faults", -- .ucode = 0x1, -- }, -- { .uname = "XMM_FILL_FAULT", -- .udesc = "XMM fill faults", -- .ucode = 0x2, -- }, -- { .uname = "YMM_FILL_FAULT", -- .udesc = "YMM fill faults", -- .ucode = 0x4, -- }, -- { .uname = "YMM_SPILL_FAULT", -- .udesc = "YMM spill faults", -- .ucode = 0x8, -- }, --}; -- --static const amd64_umask_t amd64_fam17h_zen2_retired_x87_floating_point_operations[]={ -- { .uname = "DIV_SQR_R_OPS", -- .udesc = "Divide and square root ops", -- .ucode = 0x4, -- }, -- { .uname = "MUL_OPS", -- .udesc = "Multiple ops", -- .ucode = 0x2, -- }, -- { .uname = "ADD_SUB_OPS", -- .udesc = "Add/subtract ops", -- .ucode = 0x1, -- }, --}; -- --static const amd64_umask_t amd64_fam17h_zen2_fpu_pipe_assignment[]={ -- { .uname = "DUAL3", -- .udesc = "Total number of multi-pipe uops assigned to pipe3", -- .ucode = 0x80, -- }, -- { .uname = "DUAL2", -- .udesc = "Total number of multi-pipe uops assigned to pipe2", -- .ucode = 0x40, -- }, -- { .uname = "DUAL1", -- .udesc = "Total number of multi-pipe uops assigned to pipe1", -- .ucode = 0x20, -- }, -- { .uname = "DUAL0", -- .udesc = "Total number of multi-pipe uops assigned to pipe0", -- .ucode = 0x10, -- }, -- { .uname = "TOTAL3", -- .udesc = "Total number of uops assigned to pipe3", -- .ucode = 0x8, -- }, -- { .uname = "TOTAL2", -- .udesc = "Total number of uops assigned to pipe2", -- .ucode = 0x4, -- }, -- { .uname = "TOTAL1", -- .udesc = "Total number of uops assigned to pipe1", -- .ucode = 0x2, -- }, -- { .uname = "TOTAL0", -- .udesc = "Total number of uops assigned to pipe0", -- .ucode = 0x1, -- }, --}; -- --static const amd64_umask_t amd64_fam17h_zen2_instruction_cache_lines_invalidated[]={ -- { .uname = "L2_INVALIDATING_PROBE", -- .udesc = "IC line invalidated due to L2 invalidating probe (external or LS).", -- .ucode = 0x2, -- }, -- { .uname = "FILL_INVALIDATED", -- .udesc = "IC line invalidated due to overwriting fill response.", -- .ucode = 0x1, -- }, --}; -- --static const amd64_umask_t amd64_fam17h_zen2_instruction_pipe_stall[]={ -- { .uname = "IC_STALL_ANY", -- .udesc = "IC pipe was stalled during this clock cycle for any reason (nothing valud in pipe ICM1).", -- .ucode = 0x4, -- }, -- { .uname = "IC_STALL_DQ_EMPTY", -- .udesc = "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", -- .ucode = 0x2, -- }, -- { .uname = "IC_STALL_BACK_PRESSURE", -- .udesc = "IC pipe was stalled during this clock cycle (ncluding IC to OC fetches) due to back pressure.", -- .ucode = 0x1, -- }, --}; - - static const amd64_umask_t amd64_fam17h_zen2_core_to_l2_cacheable_request_access_status[]={ - { .uname = "LS_RD_BLK_C_S", -@@ -279,21 +128,6 @@ static const amd64_umask_t amd64_fam17h_zen2_l2_prefetch_hit_l2[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_zen2_cycles_with_fill_pending_from_l2[]={ -- { .uname = "L2_FILL_BUSY", -- .udesc = "TBD", -- .ucode = 0x1, -- .uflags = AMD64_FL_DFL, -- }, --}; -- --static const amd64_umask_t amd64_fam17h_zen2_l2_latency[]={ -- { .uname = "L2_CYCLES_WAITING_ON_FILLS", -- .udesc = "TBD", -- .ucode = 0x1, -- .uflags = AMD64_FL_DFL, -- }, --}; - - static const amd64_umask_t amd64_fam17h_zen2_requests_to_l2_group1[]={ - { .uname = "RD_BLK_L", -@@ -365,37 +199,6 @@ static const amd64_umask_t amd64_fam17h_zen2_requests_to_l2_group2[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_zen2_ls_to_l2_wbc_requests[]={ -- { .uname = "WCB_WRITE", -- .udesc = "TBD", -- .ucode = 0x40, -- }, -- { .uname = "WCB_CLOSE", -- .udesc = "TBD", -- .ucode = 0x20, -- }, -- { .uname = "CACHE_LINE_FLUSH", -- .udesc = "TBD", -- .ucode = 0x10, -- }, -- { .uname = "I_LINE_FLUSH", -- .udesc = "TBD", -- .ucode = 0x8, -- }, -- { .uname = "ZERO_BYTE_STORE", -- .udesc = "TBD", -- .ucode = 0x4, -- }, -- { .uname = "LOCAL_IC_CLR", -- .udesc = "TBD", -- .ucode = 0x2, -- }, -- { .uname = "C_L_ZERO", -- .udesc = "TBD", -- .ucode = 0x1, -- }, --}; -- - static const amd64_umask_t amd64_fam17h_zen2_bad_status_2[]={ - { .uname = "STLI_OTHER", - .udesc = "Store-to-load conflicts. A load was unable to complete due to a non-forwardable conflict with an older store.", -@@ -512,25 +315,6 @@ static const amd64_umask_t amd64_fam17h_zen2_l1_dtlb_miss[]={ - }, - }; - --static const amd64_umask_t amd64_fam17h_zen2_locks[]={ -- { .uname = "SPEC_LOCK_MAP_COMMIT", -- .udesc = "TBD", -- .ucode = 0x8, -- }, -- { .uname = "SPEC_LOCK", -- .udesc = "TBD", -- .ucode = 0x4, -- }, -- { .uname = "NON_SPEC_LOCK", -- .udesc = "TBD", -- .ucode = 0x2, -- }, -- { .uname = "BUS_LOCK", -- .udesc = "TBD", -- .ucode = 0x1, -- }, --}; -- - static const amd64_umask_t amd64_fam17h_zen2_mab_allocation_by_pipe[]={ - { .uname = "TLB_PIPE_EARLY", - .udesc = "TBD", -@@ -562,36 +346,6 @@ static const amd64_umask_t amd64_fam17h_zen2_prefetch_instructions_dispatched[]= - }, - }; - --static const amd64_umask_t amd64_fam17h_zen2_tablewalker_allocation[]={ -- { .uname = "ALLOC_ISIDE1", -- .udesc = "TBD", -- .ucode = 0x8, -- }, -- { .uname = "ALLOC_ISIDE0", -- .udesc = "TBD", -- .ucode = 0x4, -- }, -- { .uname = "ALLOC_DSIDE1", -- .udesc = "TBD", -- .ucode = 0x2, -- }, -- { .uname = "ALLOC_DSIDE0", -- .udesc = "TBD", -- .ucode = 0x1, -- }, --}; -- --static const amd64_umask_t amd64_fam17h_zen2_oc_mode_switch[]={ -- { .uname = "OC_IC_MODE_SWITCH", -- .udesc = "TBD", -- .ucode = 0x2, -- }, -- { .uname = "IC_OC_MODE_SWITCH", -- .udesc = "TBD", -- .ucode = 0x1, -- }, --}; -- - static const amd64_umask_t amd64_fam17h_zen2_uops_dispatched_from_decoder[]={ - { .uname = "DECODER_DISPATCHED", - .udesc = "Number of uops dispatched from the Decoder", -diff --git a/lib/pfmlib_amd64_fam17h.c b/lib/pfmlib_amd64_fam17h.c -index f35af92..000c902 100644 ---- a/lib/pfmlib_amd64_fam17h.c -+++ b/lib/pfmlib_amd64_fam17h.c -@@ -51,7 +51,7 @@ pfm_amd64_family_detect_zen1(void *this) - * Should use amd_fam17h_zen1 instead. - */ - pfmlib_pmu_t amd64_fam17h_deprecated_support={ -- .desc = "AMD64 Fam17h Zen1 (deprecated - use amd_fam17_zen1 instead)", -+ .desc = "AMD64 Fam17h Zen1 (deprecated - use amd_fam17h_zen1 instead)", - .name = "amd64_fam17h", - .pmu = PFM_PMU_AMD64_FAM17H, - .pmu_rev = 0, - -commit fc49676f3f5315b43ae8016151c3d5ba030567f8 -Author: Stephane Eranian -Date: Wed Jan 8 14:03:19 2020 -0800 - - fix some AMD Zen1 and Zen2 comment typos - - The events files for Zen1 and Zen2 had some typos in comments, so fix that. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h_zen1.h b/lib/events/amd64_events_fam17h_zen1.h -index 218ee8f..c6a5852 100644 ---- a/lib/events/amd64_events_fam17h_zen1.h -+++ b/lib/events/amd64_events_fam17h_zen1.h -@@ -21,7 +21,7 @@ - * This file is part of libpfm, a performance monitoring support library for - * applications on Linux. - * -- * PMU: amd64_fam17h (AMD64 Fam17h)) -+ * PMU: amd64_fam17h_zen1 (AMD64 Fam17h Zen1)) - */ - - static const amd64_umask_t amd64_fam17h_zen1_l1_itlb_miss_l2_itlb_miss[]={ -diff --git a/lib/events/amd64_events_fam17h_zen2.h b/lib/events/amd64_events_fam17h_zen2.h -index c832bce..b269268 100644 ---- a/lib/events/amd64_events_fam17h_zen2.h -+++ b/lib/events/amd64_events_fam17h_zen2.h -@@ -21,7 +21,7 @@ - * This file is part of libpfm, a performance monitoring support library for - * applications on Linux. - * -- * PMU: amd64_fam17h_zen2_zen2 (AMD64 Fam17h Zen2)) -+ * PMU: amd64_fam17h_zen2 (AMD64 Fam17h Zen2)) - */ - - static const amd64_umask_t amd64_fam17h_zen2_l1_itlb_miss_l2_itlb_miss[]={ - -commit ca5321232c20201b3b91b335a104397c13054c51 -Author: Stephane Eranian -Date: Thu Jan 9 12:12:48 2020 -0800 - - fix encoding of L1_DTLB_MISS in Zen1 validation - - Was using amd64_fam17h instead of amd64_fam17h_zen1. - It was sill encoding with the deprecated pmu name. - - Signed-off-by: Stephane Eranian - -diff --git a/tests/validate_x86.c b/tests/validate_x86.c -index 36896df..563350e 100644 ---- a/tests/validate_x86.c -+++ b/tests/validate_x86.c -@@ -5660,10 +5660,10 @@ static const test_event_t x86_test_events[]={ - .fstr = "amd64_fam17h_zen1::LOCKS:SPEC_LOCK:k=1:u=1:e=0:i=0:c=0:h=0:g=0", - }, - { SRC_LINE, -- .name = "amd64_fam17h::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:u", -+ .name = "amd64_fam17h_zen1::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:u", - .count = 1, - .codes[0] = 0x510845ull, -- .fstr = "amd64_fam17h::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:k=0:u=1:e=0:i=0:c=0:h=0:g=0", -+ .fstr = "amd64_fam17h_zen1::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:k=0:u=1:e=0:i=0:c=0:h=0:g=0", - }, - { SRC_LINE, - .name = "amd64_fam16h::RETIRED_INSTRUCTIONS", - -commit e06078dba3b373225c61fe682880f35ee8db9651 -Author: Stephane Eranian -Date: Fri Jan 17 14:00:54 2020 -0800 - - add RETIRED_SERIALIZING_OPS to AMD Fam17h Zen2 - - Was missing from initial event table. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h_zen2.h b/lib/events/amd64_events_fam17h_zen2.h -index b269268..8c81cca 100644 ---- a/lib/events/amd64_events_fam17h_zen2.h -+++ b/lib/events/amd64_events_fam17h_zen2.h -@@ -400,6 +400,25 @@ static const amd64_umask_t amd64_fam17h_zen2_dispatch_resource_stall_cycles_0[]= - }, - }; - -+static const amd64_umask_t amd64_fam17h_zen2_retired_serializing_ops[]={ -+ { .uname = "X87_CTRL_RET", -+ .udesc = "X87 control word mispredict traps due to mispredction in RC or PC, or changes in mask bits.", -+ .ucode = 0x1, -+ }, -+ { .uname = "X87_BOT_RET", -+ .udesc = "X87 bottom-executing uops retired.", -+ .ucode = 0x2, -+ }, -+ { .uname = "SSE_CTRL_RET", -+ .udesc = "SSE control word mispreduct traps due to mispredctions in RC, FTZ or DAZ or changes in mask bits.", -+ .ucode = 0x4, -+ }, -+ { .uname = "SSE_BOT_RET", -+ .udesc = "SSE bottom-executing uops retired.", -+ .ucode = 0x8, -+ }, -+}; -+ - static const amd64_entry_t amd64_fam17h_zen2_pe[]={ - { .name = "L1_ITLB_MISS_L2_ITLB_HIT", - .desc = "Number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB.", -@@ -807,4 +826,13 @@ static const amd64_entry_t amd64_fam17h_zen2_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_dispatch_resource_stall_cycles_0), - .umasks = amd64_fam17h_zen2_dispatch_resource_stall_cycles_0, - }, -+ { .name = "RETIRED_SERIALIZING_OPS", -+ .desc = "The number of serializing Ops retired.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x5, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_retired_serializing_ops), -+ .umasks = amd64_fam17h_zen2_retired_serializing_ops, -+ }, - }; - -commit 42c1857c7694cec1a4750a340381d49dd84ca8ff -Author: Stephane Eranian -Date: Mon Mar 16 16:24:31 2020 -0700 - - add RETIRED_SSE_AVX_FLOPS event for AMD64 Fam17h Zen2 - - Was missing from initial commit. Added as PPR rev 0.54. - - Note that this event by itself does not count correctly. - It needs large increment support, which means merging - of two consecutive counters. This is handled by the Linux - kernel starting with 5.6-rc4. The library simply encodes - the event as if it was like any other normal event. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h_zen2.h b/lib/events/amd64_events_fam17h_zen2.h -index 8c81cca..134b81e 100644 ---- a/lib/events/amd64_events_fam17h_zen2.h -+++ b/lib/events/amd64_events_fam17h_zen2.h -@@ -419,6 +419,31 @@ static const amd64_umask_t amd64_fam17h_zen2_retired_serializing_ops[]={ - }, - }; - -+static const amd64_umask_t amd64_fam17h_zen2_retired_sse_avx_flops[]={ -+ { .uname = "ADD_SUB_FLOPS", -+ .udesc = "Addition/subtraction FLOPS", -+ .ucode = 0x1, -+ }, -+ { .uname = "MULT_FLOPS", -+ .udesc = "Multiplication FLOPS", -+ .ucode = 0x2, -+ }, -+ { .uname = "DIV_FLOPS", -+ .udesc = "Division FLOPS.", -+ .ucode = 0x4, -+ }, -+ { .uname = "MAC_FLOPS", -+ .udesc = "Double precision add/subtract flops.", -+ .ucode = 0x8, -+ }, -+ { .uname = "ANY", -+ .udesc = "Double precision add/subtract flops.", -+ .ucode = 0xf, -+ .uflags = AMD64_FL_DFL | AMD64_FL_NCOMBO, -+ }, -+}; -+ -+ - static const amd64_entry_t amd64_fam17h_zen2_pe[]={ - { .name = "L1_ITLB_MISS_L2_ITLB_HIT", - .desc = "Number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB.", -@@ -436,6 +461,15 @@ static const amd64_entry_t amd64_fam17h_zen2_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_l1_itlb_miss_l2_itlb_miss), - .umasks = amd64_fam17h_zen2_l1_itlb_miss_l2_itlb_miss, - }, -+ { .name = "RETIRED_SSE_AVX_FLOPS", -+ .desc = "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15 and therefore requires the MergeEvent. On Linux, the kernel handles this case without the need to pass the merge event.", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x3, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_retired_sse_avx_flops), -+ .umasks = amd64_fam17h_zen2_retired_sse_avx_flops, -+ }, - { .name = "DIV_CYCLES_BUSY_COUNT", - .desc = "Number of cycles when the divider is busy.", - .modmsk = AMD64_FAM17H_ATTRS, -diff --git a/tests/validate_x86.c b/tests/validate_x86.c -index e0beefa..1b7ff6d 100644 ---- a/tests/validate_x86.c -+++ b/tests/validate_x86.c -@@ -6962,6 +6962,18 @@ static const test_event_t x86_test_events[]={ - .codes[0] = 0x1005300d0ull, - .fstr = "amd64_fam17h_zen2::RETIRED_FUSED_INSTRUCTIONS:k=1:u=1:e=0:i=0:c=0:h=0:g=0", - }, -+ { SRC_LINE, -+ .name = "amd64_fam17h_zen2::RETIRED_SSE_AVX_FLOPS", -+ .count = 1, -+ .codes[0] = 0x530f03, -+ .fstr = "amd64_fam17h_zen2::RETIRED_SSE_AVX_FLOPS:ANY:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam17h_zen2::RETIRED_SSE_AVX_FLOPS:MULT_FLOPS:u", -+ .count = 1, -+ .codes[0] = 0x510203, -+ .fstr = "amd64_fam17h_zen2::RETIRED_SSE_AVX_FLOPS:MULT_FLOPS:k=0:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, - }; - - #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) - -commit c99ed181402b21e74744d5f602aceb6a320c7ded -Author: Stephane Eranian -Date: Sat May 30 18:08:52 2020 -0700 - - update AMD64 Fam17h Zen1 event table - - Add a few missing events. Thanks to Emmanuel for tracking them down. - - Based on AMD Fam17h model 01,08h B2 PPR version 3.03 Jun 14, 2019 - - Reported-by: Emmanuel Oseret - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h_zen1.h b/lib/events/amd64_events_fam17h_zen1.h -index c6a5852..8fb0551 100644 ---- a/lib/events/amd64_events_fam17h_zen1.h -+++ b/lib/events/amd64_events_fam17h_zen1.h -@@ -549,6 +549,115 @@ static const amd64_umask_t amd64_fam17h_zen1_dynamic_tokens_dispatch_stall_cycle - }, - }; - -+static const amd64_umask_t amd64_fam17h_zen1_software_prefetch_data_cache_fills[]={ -+ { .uname = "MABRESP_LCL_L2", -+ .udesc = "Fill from local L2.", -+ .ucode = 0x1, -+ }, -+ { .uname = "LS_MABRESP_LCL_CACHE", -+ .udesc = "Fill from another cache (home node local).", -+ .ucode = 0x2, -+ }, -+ { .uname = "LS_MABRESP_LCL_DRAM", -+ .udesc = "Fill from DRAM (home node local).", -+ .ucode = 0x8, -+ }, -+ { .uname = "LS_MABRESP_LCL_RMT_CACHE", -+ .udesc = "Fill from another cache (home node remote).", -+ .ucode = 0x10, -+ }, -+ { .uname = "LS_MABRESP_LCL_RMT_DRAM", -+ .udesc = "Fill from DRAM (home node remote).", -+ .ucode = 0x40, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen1_uops_dispatched_from_decoder[]={ -+ { .uname = "DECODER_DISPATCHED", -+ .udesc = "Number of uops dispatched from the Decoder", -+ .ucode = 0x1, -+ }, -+ { .uname = "OPCACHE_DISPATCHED", -+ .udesc = "Number of uops dispatched from the OpCache", -+ .ucode = 0x2, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen1_dispatch_resource_stall_cycles_1[]={ -+ { .uname = "INT_PHY_REG_FILE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to integer physical register file resource stalls. Applies to all uops that have integer destination register.", -+ .ucode = 0x1, -+ }, -+ { .uname = "LOAD_QUEUE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to load queue resource stalls. Applies to all uops with load semantics.", -+ .ucode = 0x2, -+ }, -+ { .uname = "STORE_QUEUE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to store queue resource stalls. Applies to all uops with store semantics.", -+ .ucode = 0x4, -+ }, -+ { .uname = "INT_SCHEDULER_MISC_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to integer scheduler miscellaneous resource stalls.", -+ .ucode = 0x8, -+ }, -+ { .uname = "TAKEN_BRANCH_BUFFER_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to taken branch buffer resource stalls.", -+ .ucode = 0x10, -+ }, -+ { .uname = "FP_REG_FILE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to floating-point register file resource stalls.", -+ .ucode = 0x20, -+ }, -+ { .uname = "FP_SCHEDULER_FILE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to floating-point scheduler resource stalls.", -+ .ucode = 0x40, -+ }, -+ { .uname = "FP_MISC_FILE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to floating-point miscellaneous resource unavailable.", -+ .ucode = 0x80, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen1_dispatch_resource_stall_cycles_0[]={ -+ { .uname = "ALSQ1_RSRC_STALL", -+ .udesc = "ALSQ1 resources unavailable.", -+ .ucode = 0x1, -+ }, -+ { .uname = "ALSQ2_RSRC_STALL", -+ .udesc = "ALSQ2 resources unavailable.", -+ .ucode = 0x2, -+ }, -+ { .uname = "ALSQ3_RSRC_STALL", -+ .udesc = "ALSQ3 resources unavailable.", -+ .ucode = 0x4, -+ }, -+ { .uname = "ALSQ3_0_RSRC_STALL", -+ .udesc = "TBD", -+ .ucode = 0x8, -+ }, -+ { .uname = "ALU_RSRC_STALL", -+ .udesc = "ALU resource total unavailable", -+ .ucode = 0x10, -+ }, -+ { .uname = "AGSQ_RSRC_STALL", -+ .udesc = "AGSQ resource unavailable", -+ .ucode = 0x20, -+ }, -+ { .uname = "RETIRE_RSRC_STALL", -+ .udesc = "RETIRE resource unavailable", -+ .ucode = 0x40, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam17h_zen1_l2_prefetch_hit_l2[]={ -+ { .uname = "ANY", -+ .udesc = "Any L2 prefetch requests", -+ .ucode = 0x3f, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+ - static const amd64_entry_t amd64_fam17h_zen1_pe[]={ - { .name = "L1_ITLB_MISS_L2_ITLB_HIT", - .desc = "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB.", -@@ -887,6 +996,24 @@ static const amd64_entry_t amd64_fam17h_zen1_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_ineffective_software_prefetch), - .umasks = amd64_fam17h_zen1_ineffective_software_prefetch, - }, -+ { .name = "SOFTWARE_PREFETCH_DATA_CACHE_FILLS", -+ .desc = "Number of software prefetches fills by data source", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x59, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_software_prefetch_data_cache_fills), -+ .umasks = amd64_fam17h_zen1_software_prefetch_data_cache_fills, -+ }, -+ { .name = "HARDWARE_PREFETCH_DATA_CACHE_FILLS", -+ .desc = "Number of hardware prefetches fills by data source", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x5a, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_software_prefetch_data_cache_fills), -+ .umasks = amd64_fam17h_zen1_software_prefetch_data_cache_fills, /* shared */ -+ }, - { .name = "L1_DTLB_MISS", - .desc = "L1 Data TLB misses.", - .modmsk = AMD64_FAM17H_ATTRS, -@@ -985,4 +1112,58 @@ static const amd64_entry_t amd64_fam17h_zen1_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_dynamic_tokens_dispatch_stall_cycles_0), - .umasks = amd64_fam17h_zen1_dynamic_tokens_dispatch_stall_cycles_0, - }, -+ { .name = "UOPS_DISPATCHED_FROM_DECODER", -+ .desc = "Number of uops dispatched from either the Decoder, OpCache or both", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xaa, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_uops_dispatched_from_decoder), -+ .umasks = amd64_fam17h_zen1_uops_dispatched_from_decoder, -+ }, -+ { .name = "DISPATCH_RESOURCE_STALL_CYCLES_1", -+ .desc = "Number of cycles where a dispatch group is valid but does not get dispatched due to a Token Stall", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xae, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_dispatch_resource_stall_cycles_1), -+ .umasks = amd64_fam17h_zen1_dispatch_resource_stall_cycles_1, -+ }, -+ { .name = "DISPATCH_RESOURCE_STALL_CYCLES_0", -+ .desc = "Number of cycles where a dispatch group is valid but does not get dispatched due to a Token Stall", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xaf, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_dispatch_resource_stall_cycles_0), -+ .umasks = amd64_fam17h_zen1_dispatch_resource_stall_cycles_0, -+ }, -+ { .name = "L2_PREFETCH_HIT_L2", -+ .desc = "Number of L2 prefetcher hits in the L2", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x70, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_l2_prefetch_hit_l2), -+ .umasks = amd64_fam17h_zen1_l2_prefetch_hit_l2, -+ }, -+ { .name = "L2_PREFETCH_HIT_L3", -+ .desc = "Number of L2 prefetcher hits in the L3", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x71, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_l2_prefetch_hit_l2), -+ .umasks = amd64_fam17h_zen1_l2_prefetch_hit_l2, /* shared */ -+ }, -+ { .name = "L2_PREFETCH_MISS_L3", -+ .desc = "Number of L2 prefetcher misses in the L3", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x72, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_l2_prefetch_hit_l2), -+ .umasks = amd64_fam17h_zen1_l2_prefetch_hit_l2, /* shared */ -+ }, - }; - -commit 17e622e9539e1f8faf3c0c27889963a537e95537 -Author: Stephane Eranian -Date: Sat Jun 13 00:39:58 2020 -0700 - - add L2_PREFETCH_MISS_L3 for AMD Fam17h Zen2 - - Add missing L2_PREFETCH_MISS_L3 event for AMD Fam17h Zen2. - - Reported-by: Emmanuel Oseret - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h_zen2.h b/lib/events/amd64_events_fam17h_zen2.h -index 134b81e..18d71d5 100644 ---- a/lib/events/amd64_events_fam17h_zen2.h -+++ b/lib/events/amd64_events_fam17h_zen2.h -@@ -128,7 +128,6 @@ static const amd64_umask_t amd64_fam17h_zen2_l2_prefetch_hit_l2[]={ - }, - }; - -- - static const amd64_umask_t amd64_fam17h_zen2_requests_to_l2_group1[]={ - { .uname = "RD_BLK_L", - .udesc = "Number of data cache reads (including software and hardware prefetches).", -@@ -627,6 +626,15 @@ static const amd64_entry_t amd64_fam17h_zen2_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_l2_prefetch_hit_l2), - .umasks = amd64_fam17h_zen2_l2_prefetch_hit_l2, /* shared */ - }, -+ { .name = "L2_PREFETCH_MISS_L3", -+ .desc = "Number of L2 prefetcher misses in the L3", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x72, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_l2_prefetch_hit_l2), -+ .umasks = amd64_fam17h_zen2_l2_prefetch_hit_l2, /* shared */ -+ }, - { .name = "REQUESTS_TO_L2_GROUP1", - .desc = "TBD", - .modmsk = AMD64_FAM17H_ATTRS, - -commit 5a623727cf7111afd09df2cdb0ff4b294d31efa7 -Author: Stephane Eranian -Date: Fri Jun 19 15:07:01 2020 -0700 - - update AMD Fam17h Zen2 event table - - Added: - - FP_DISPATCH_FAULT - - DATA_CACHE_REFILLS_FROM_SYSTEM - - Fixed typos in umask for SOFTWARE_PREFETCH_DATA_CACHE_FILLS - which are shared with DATA_CACHE_REFILLS_FROM_SYSTEM. - - Reported-by: Steve Kaufmann - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h_zen2.h b/lib/events/amd64_events_fam17h_zen2.h -index 18d71d5..71616e5 100644 ---- a/lib/events/amd64_events_fam17h_zen2.h -+++ b/lib/events/amd64_events_fam17h_zen2.h -@@ -261,11 +261,11 @@ static const amd64_umask_t amd64_fam17h_zen2_software_prefetch_data_cache_fills[ - .udesc = "Fill from DRAM (home node local).", - .ucode = 0x8, - }, -- { .uname = "LS_MABRESP_LCL_RMT_CACHE", -+ { .uname = "LS_MABRESP_RMT_CACHE", - .udesc = "Fill from another cache (home node remote).", - .ucode = 0x10, - }, -- { .uname = "LS_MABRESP_LCL_RMT_DRAM", -+ { .uname = "LS_MABRESP_RMT_DRAM", - .udesc = "Fill from DRAM (home node remote).", - .ucode = 0x40, - }, -@@ -442,6 +442,29 @@ static const amd64_umask_t amd64_fam17h_zen2_retired_sse_avx_flops[]={ - }, - }; - -+static const amd64_umask_t amd64_fam17h_zen2_fp_dispatch_faults[]={ -+ { .uname = "X87_FILL_FAULT", -+ .udesc = "x87 fill faults", -+ .ucode = 0x1, -+ }, -+ { .uname = "XMM_FILL_FAULT", -+ .udesc = "XMM fill faults", -+ .ucode = 0x2, -+ }, -+ { .uname = "YMM_FILL_FAULT", -+ .udesc = "YMM fill faults", -+ .ucode = 0x4, -+ }, -+ { .uname = "YMM_SPILL_FAULT", -+ .udesc = "YMM spill faults", -+ .ucode = 0x8, -+ }, -+ { .uname = "ANY", -+ .udesc = "Any FP dispatch faults", -+ .ucode = 0xf, -+ .uflags = AMD64_FL_DFL | AMD64_FL_NCOMBO, -+ }, -+}; - - static const amd64_entry_t amd64_fam17h_zen2_pe[]={ - { .name = "L1_ITLB_MISS_L2_ITLB_HIT", -@@ -877,4 +900,22 @@ static const amd64_entry_t amd64_fam17h_zen2_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_retired_serializing_ops), - .umasks = amd64_fam17h_zen2_retired_serializing_ops, - }, -+ { .name = "FP_DISPATCH_FAULTS", -+ .desc = "Floating-point dispatch faults", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0xe, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_fp_dispatch_faults), -+ .umasks = amd64_fam17h_zen2_fp_dispatch_faults, -+ }, -+ { .name = "DATA_CACHE_REFILLS_FROM_SYSTEM", -+ .desc = "Demand Data Cache fills by data source", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x43, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_software_prefetch_data_cache_fills), -+ .umasks = amd64_fam17h_zen2_software_prefetch_data_cache_fills, /* shared */ -+ }, - }; - -commit ea9752f3fee76798010093c2f35cbf719980997d -Author: Stephane Eranian -Date: Sat Jun 20 12:27:26 2020 -0700 - - more updates to AMD Fam17h Zen1 event table - - Added: - - DYNAMIC_INDIRECT_PREDICTIONS - - DECODER_OVERRIDES_PREDICTION - - Reported-by: Emmanuel Oseret - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h_zen1.h b/lib/events/amd64_events_fam17h_zen1.h -index 8fb0551..242091a 100644 ---- a/lib/events/amd64_events_fam17h_zen1.h -+++ b/lib/events/amd64_events_fam17h_zen1.h -@@ -657,7 +657,6 @@ static const amd64_umask_t amd64_fam17h_zen1_l2_prefetch_hit_l2[]={ - }, - }; - -- - static const amd64_entry_t amd64_fam17h_zen1_pe[]={ - { .name = "L1_ITLB_MISS_L2_ITLB_HIT", - .desc = "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB.", -@@ -1166,4 +1165,16 @@ static const amd64_entry_t amd64_fam17h_zen1_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_l2_prefetch_hit_l2), - .umasks = amd64_fam17h_zen1_l2_prefetch_hit_l2, /* shared */ - }, -+ { .name = "DYNAMIC_INDIRECT_PREDICTIONS", -+ .desc = "Indirect Branch Prediction for potential multi-target branch (speculative)", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x8e, -+ .flags = 0, -+ }, -+ { .name = "DECODER_OVERRIDES_PREDICTION", -+ .desc = "Decoder Overrides Existing Branch Prediction (speculative)", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x91, -+ .flags = 0, -+ }, - }; - -commit e162519d26d313860a9e69889bcc67406f92edc9 -Author: Stephane Eranian -Date: Wed Aug 12 15:23:27 2020 -0700 - - fix duplicate event code on AMD Fam17h Zen1 - - Removed DISPATCH_RESOURCE_STALL_CYCLES_0 which is not an AMD Fam17h event - but rather a Zen2 event with the same event code. - - Reported-by: Kaufmann, Steve - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h_zen1.h b/lib/events/amd64_events_fam17h_zen1.h -index 242091a..315f8b5 100644 ---- a/lib/events/amd64_events_fam17h_zen1.h -+++ b/lib/events/amd64_events_fam17h_zen1.h -@@ -1129,15 +1129,6 @@ static const amd64_entry_t amd64_fam17h_zen1_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_dispatch_resource_stall_cycles_1), - .umasks = amd64_fam17h_zen1_dispatch_resource_stall_cycles_1, - }, -- { .name = "DISPATCH_RESOURCE_STALL_CYCLES_0", -- .desc = "Number of cycles where a dispatch group is valid but does not get dispatched due to a Token Stall", -- .modmsk = AMD64_FAM17H_ATTRS, -- .code = 0xaf, -- .flags = 0, -- .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen1_dispatch_resource_stall_cycles_0), -- .umasks = amd64_fam17h_zen1_dispatch_resource_stall_cycles_0, -- }, - { .name = "L2_PREFETCH_HIT_L2", - .desc = "Number of L2 prefetcher hits in the L2", - .modmsk = AMD64_FAM17H_ATTRS, - -commit de4beb0da7530bc1dcd2f19582dfeca2ecb1d185 -Author: Stephane Eranian -Date: Fri Sep 25 11:41:56 2020 -0700 - - update AMD Fam17h Zen2 event table - - Based on PPR version 0.91 Sep1, 2020. - - Thanks to Emmanuel for tracking the diffs. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam17h_zen2.h b/lib/events/amd64_events_fam17h_zen2.h -index 71616e5..f44bd77 100644 ---- a/lib/events/amd64_events_fam17h_zen2.h -+++ b/lib/events/amd64_events_fam17h_zen2.h -@@ -210,12 +210,15 @@ static const amd64_umask_t amd64_fam17h_zen2_retired_lock_instructions[]={ - { .uname = "CACHEABLE_LOCKS", - .udesc = "Lock in cacheable memory region.", - .ucode = 0xe, -- .uflags = AMD64_FL_DFL, -+ }, -+ { .uname = "BUS_LOCK", -+ .udesc = "Number of bus locks", -+ .ucode = 0x1, - }, - }; - - static const amd64_umask_t amd64_fam17h_zen2_tlb_flushes[]={ -- { .uname = "ANY", -+ { .uname = "ALL", - .udesc = "ANY TLB flush.", - .ucode = 0xff, - .uflags = AMD64_FL_DFL, -@@ -338,10 +341,22 @@ static const amd64_umask_t amd64_fam17h_zen2_mab_allocation_by_pipe[]={ - }; - - static const amd64_umask_t amd64_fam17h_zen2_prefetch_instructions_dispatched[]={ -+ { .uname = "PREFETCH_T0_T1_T2", -+ .udesc = "Number of prefetcht0, perfetcht1, prefetcht2 instructions dispatched", -+ .ucode = 0x1, -+ }, -+ { .uname = "PREFETCHW", -+ .udesc = "Number of prefetchtw instructions dispatched", -+ .ucode = 0x2, -+ }, -+ { .uname = "PREFETCHNTA", -+ .udesc = "Number of prefetchtnta instructions dispatched", -+ .ucode = 0x4, -+ }, - { .uname = "ANY", - .udesc = "Any prefetch", -- .ucode = 0xff, -- .uflags = AMD64_FL_DFL, -+ .ucode = 0x7, -+ .uflags = AMD64_FL_DFL | AMD64_FL_NCOMBO, - }, - }; - -@@ -608,6 +623,13 @@ static const amd64_entry_t amd64_fam17h_zen2_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam17h_zen2_tagged_ibs_ops), - .umasks = amd64_fam17h_zen2_tagged_ibs_ops, - }, -+ { .name = "RETIRED_BRANCH_MISPREDICTED_DIRECTION_MISMATCH", -+ .desc = "Number of retired conditional branch instructions that were not correctly predicted because of branch direction mismatch", -+ .modmsk = AMD64_FAM17H_ATTRS, -+ .code = 0x1c7, -+ .flags = 0, -+ .ngrp = 0, -+ }, - { .name = "INSTRUCTION_CACHE_REFILLS_FROM_L2", - .desc = "Number of 64-byte instruction cachelines that was fulfilled by the L2 cache.", - .modmsk = AMD64_FAM17H_ATTRS, - -commit cc4ba27e55440f87359bee5176380db1ba4ef8af -Author: Swarup Sahoo -Date: Tue Mar 2 01:49:51 2021 +0530 - - Add AMD64 Fam19h Zen3 core PMU support - - The patch adds a core PMU support for AMD Fam19h Zen3. - - new PMU model: amd64_fam19h_zen3 - - Based on the public specifications PPR (#55898) Rev 0.35 - Feb 5, 2021. - Available at: https://www.amd.com/system/files/TechDocs/55898_pub.zip - - Signed-off-by: Swarup Sahoo - -diff --git a/README b/README -index f8cb866..3bc3a68 100644 ---- a/README -+++ b/README -@@ -41,6 +41,7 @@ The library supports many PMUs. The current version can handle: - AMD64 Fam16h (Jaguar) - AMD64 Fam17h (Zen1) - AMD64 Fam17h (Zen2) -+ AMD64 Fam19h (Zen3) - - - For Intel X86: - Intel P6 (Pentium II, Pentium Pro, Pentium III, Pentium M) -diff --git a/docs/Makefile b/docs/Makefile -index e124747..df51a3a 100644 ---- a/docs/Makefile -+++ b/docs/Makefile -@@ -41,6 +41,7 @@ ARCH_MAN=libpfm_intel_core.3 \ - libpfm_amd64_fam16h.3 \ - libpfm_amd64_fam17h.3 \ - libpfm_amd64_fam17h_zen2.3 \ -+ libpfm_amd64_fam19h_zen3.3 \ - libpfm_intel_atom.3 \ - libpfm_intel_nhm.3 \ - libpfm_intel_nhm_unc.3 \ -diff --git a/docs/man3/libpfm_amd64_fam19h_zen3.3 b/docs/man3/libpfm_amd64_fam19h_zen3.3 -new file mode 100644 -index 0000000..5faeb18 ---- /dev/null -+++ b/docs/man3/libpfm_amd64_fam19h_zen3.3 -@@ -0,0 +1,49 @@ -+.TH LIBPFM 3 "February, 2021" "" "Linux Programmer's Manual" -+.SH NAME -+libpfm_amd64_fam19h_zen3 - support for AMD64 Family 19h processors -+.SH SYNOPSIS -+.nf -+.B #include -+.sp -+.B PMU name: amd64_fam19h_zen3 -+.B PMU desc: AMD64 Fam19h Zen3 -+.sp -+.SH DESCRIPTION -+The library supports AMD Family 19h processors Zen3 core PMU in both 32 and 64-bit modes. -+ -+.SH MODIFIERS -+The following modifiers are supported on AMD64 Family 19h Zen3 core PMU: -+.TP -+.B u -+Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. -+This is a boolean modifier. -+.TP -+.B k -+Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. -+This is a boolean modifier. -+.TP -+.B h -+Measure at while executing in host mode (when using virtualization). This corresponds to \fBPFM_PLMH\fR. -+This modifier is available starting with Fam10h. This is a boolean modifier. -+.TP -+.B g -+Measure at while executing in guest mode (when using virtualization). This modifier is available -+starting with Fam10h. This is a boolean modifier. -+.TP -+.B i -+Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR -+occurring. This is a boolean modifier -+.TP -+.B e -+Enable edge detection, i.e., count only when there is a state transition. This is a boolean modifier. -+.TP -+.B c -+Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles -+in which the number of occurrences of the event is greater or equal to the threshold. This is an integer -+modifier with values in the range [0:255]. -+ -+.SH AUTHORS -+.nf -+Swarup Sahoo -+.if -+.PP -diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h -index f943ae9..b0ca262 100644 ---- a/include/perfmon/pfmlib.h -+++ b/include/perfmon/pfmlib.h -@@ -562,6 +562,7 @@ typedef enum { - - PFM_PMU_AMD64_FAM17H_ZEN1, /* AMD AMD64 Fam17h Zen1 */ - PFM_PMU_AMD64_FAM17H_ZEN2, /* AMD AMD64 Fam17h Zen2 */ -+ PFM_PMU_AMD64_FAM19H_ZEN3, /* AMD AMD64 Fam19h Zen3 */ - /* MUST ADD NEW PMU MODELS HERE */ - - PFM_PMU_MAX /* end marker */ -diff --git a/lib/Makefile b/lib/Makefile -index 7afe411..b9088e9 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -124,7 +124,8 @@ SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \ - pfmlib_amd64_k7.c pfmlib_amd64_k8.c pfmlib_amd64_fam10h.c \ - pfmlib_amd64_fam11h.c pfmlib_amd64_fam12h.c \ - pfmlib_amd64_fam14h.c pfmlib_amd64_fam15h.c \ -- pfmlib_amd64_fam17h.c pfmlib_amd64_fam16h.c -+ pfmlib_amd64_fam17h.c pfmlib_amd64_fam16h.c \ -+ pfmlib_amd64_fam19h.c - - CFLAGS += -DCONFIG_PFMLIB_ARCH_X86 - -@@ -252,6 +253,7 @@ INC_X86= pfmlib_intel_x86_priv.h \ - events/amd64_events_fam15h.h \ - events/amd64_events_fam17h_zen1.h \ - events/amd64_events_fam17h_zen2.h \ -+ events/amd64_events_fam19h_zen3.h \ - events/amd64_events_fam16h.h \ - events/intel_p6_events.h \ - events/intel_netburst_events.h \ -diff --git a/lib/events/amd64_events_fam19h_zen3.h b/lib/events/amd64_events_fam19h_zen3.h -new file mode 100644 -index 0000000..e95ac69 ---- /dev/null -+++ b/lib/events/amd64_events_fam19h_zen3.h -@@ -0,0 +1,999 @@ -+/* -+ * Contributed by Swarup Sahoo -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -+ * of the Software, and to permit persons to whom the Software is furnished to do so, -+ * subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in all -+ * copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ * This file is part of libpfm, a performance monitoring support library for -+ * applications on Linux. -+ * -+ * PMU: amd64_fam19h_zen3 (AMD64 Fam19h Zen3) -+ */ -+ -+static const amd64_umask_t amd64_fam19h_zen3_retired_sse_avx_flops[]={ -+ { .uname = "ADD_SUB_FLOPS", -+ .udesc = "Addition/subtraction FLOPS", -+ .ucode = 0x1, -+ }, -+ { .uname = "MULT_FLOPS", -+ .udesc = "Multiplication FLOPS", -+ .ucode = 0x2, -+ }, -+ { .uname = "DIV_FLOPS", -+ .udesc = "Division/Square-root FLOPS", -+ .ucode = 0x4, -+ }, -+ { .uname = "MAC_FLOPS", -+ .udesc = "Multiply-Accumulate flops. Each MAC operation is counted as 2 FLOPS", -+ .ucode = 0x8, -+ }, -+ { .uname = "ANY", -+ .udesc = "Double precision add/subtract flops", -+ .ucode = 0xf, -+ .uflags = AMD64_FL_DFL | AMD64_FL_NCOMBO, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_retired_serializing_ops[]={ -+ { .uname = "X87_CTRL_RET", -+ .udesc = "x87 control word mispredict traps due to mispredction in RC or PC, or changes in Exception Mask bits", -+ .ucode = 0x1, -+ }, -+ { .uname = "X87_BOT_RET", -+ .udesc = "x87 bottom-executing ops retired", -+ .ucode = 0x2, -+ }, -+ { .uname = "SSE_CTRL_RET", -+ .udesc = "SSE/AVX control word mispredict traps", -+ .ucode = 0x4, -+ }, -+ { .uname = "SSE_BOT_RET", -+ .udesc = "SSE/AVX bottom-executing ops retired", -+ .ucode = 0x8, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_fp_dispatch_faults[]={ -+ { .uname = "X87_FILL_FAULT", -+ .udesc = "x87 fill faults", -+ .ucode = 0x1, -+ }, -+ { .uname = "XMM_FILL_FAULT", -+ .udesc = "XMM fill faults", -+ .ucode = 0x2, -+ }, -+ { .uname = "YMM_FILL_FAULT", -+ .udesc = "YMM fill faults", -+ .ucode = 0x4, -+ }, -+ { .uname = "YMM_SPILL_FAULT", -+ .udesc = "YMM spill faults", -+ .ucode = 0x8, -+ }, -+ { .uname = "ANY", -+ .udesc = "Any FP dispatch faults", -+ .ucode = 0xf, -+ .uflags = AMD64_FL_DFL | AMD64_FL_NCOMBO, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_bad_status_2[]={ -+ { .uname = "STLI_OTHER", -+ .udesc = "Store-to-load conflicts. A load was unable to complete due to a non-forwardable conflict with an older store", -+ .ucode = 0x2, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_retired_lock_instructions[]={ -+ { .uname = "BUS_LOCK", -+ .udesc = "Number of bus locks", -+ .ucode = 0x1, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_ls_dispatch[]={ -+ { .uname = "LD_ST_DISPATCH", -+ .udesc = "Dispatched op that performs a load from and store to the same memory address", -+ .ucode = 0x4, -+ }, -+ { .uname = "STORE_DISPATCH", -+ .udesc = "Store ops dispatched", -+ .ucode = 0x2, -+ }, -+ { .uname = "LD_DISPATCH", -+ .udesc = "Load ops dispatched", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_store_commit_cancels_2[]={ -+ { .uname = "WCB_FULL", -+ .udesc = "Non cacheable store and the non-cacheable commit buffer is full", -+ .ucode = 0x1, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_mab_allocation_by_type[]={ -+ { .uname = "LS", -+ .udesc = "Load store allocations", -+ .ucode = 0x3f, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+ { .uname = "HW_PF", -+ .udesc = "Hardware prefetcher allocations", -+ .ucode = 0x40, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+ { .uname = "ALL", -+ .udesc = "All allocations", -+ .ucode = 0x7f, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_software_prefetch_data_cache_fills[]={ -+ { .uname = "LCL_L2", -+ .udesc = "Fill from local L2 to the core", -+ .ucode = 0x1, -+ }, -+ { .uname = "INT_CACHE", -+ .udesc = "Fill from L3 or different L2 in same CCX", -+ .ucode = 0x2, -+ }, -+ { .uname = "EXT_CACHE_LCL", -+ .udesc = "Fill from cache of different CCX in same node", -+ .ucode = 0x4, -+ }, -+ { .uname = "MEM_IO_LCL", -+ .udesc = "Fill from DRAM or IO connected in same node", -+ .ucode = 0x8, -+ }, -+ { .uname = "EXT_CACHE_RMT", -+ .udesc = "Fill from CCX cache in different node", -+ .ucode = 0x10, -+ }, -+ { .uname = "MEM_IO_RMT", -+ .udesc = "Fill from DRAM or IO connected in different node", -+ .ucode = 0x40, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_l1_dtlb_miss[]={ -+ { .uname = "TLB_RELOAD_1G_L2_MISS", -+ .udesc = "Data TLB reload to a 1GB page that missed in the L2 TLB", -+ .ucode = 0x80, -+ }, -+ { .uname = "TLB_RELOAD_2M_L2_MISS", -+ .udesc = "Data TLB reload to a 2MB page that missed in the L2 TLB", -+ .ucode = 0x40, -+ }, -+ { .uname = "TLB_RELOAD_COALESCED_PAGE_MISS", -+ .udesc = "Data TLB reload to a coalesced page that also missed in the L2 TLB", -+ .ucode = 0x20, -+ }, -+ { .uname = "TLB_RELOAD_4K_L2_MISS", -+ .udesc = "Data TLB reload to a 4KB page that missed in the L2 TLB", -+ .ucode = 0x10, -+ }, -+ { .uname = "TLB_RELOAD_1G_L2_HIT", -+ .udesc = "Data TLB reload to a 1GB page that hit in the L2 TLB", -+ .ucode = 0x8, -+ }, -+ { .uname = "TLB_RELOAD_2M_L2_HIT", -+ .udesc = "Data TLB reload to a 2MB page that hit in the L2 TLB", -+ .ucode = 0x4, -+ }, -+ { .uname = "TLB_RELOAD_COALESCED_PAGE_HIT", -+ .udesc = "Data TLB reload to a coalesced page that hit in the L2 TLB", -+ .ucode = 0x2, -+ }, -+ { .uname = "TLB_RELOAD_4K_L2_HIT", -+ .udesc = "Data TLB reload to a 4KB page that hit in the L2 TLB", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_misaligned_loads[]={ -+ { .uname = "MA4K", -+ .udesc = "The number of 4KB misaligned (page crossing) loads", -+ .ucode = 0x2, -+ }, -+ { .uname = "MA64", -+ .udesc = "The number of 64B misaligned (cacheline crossing) loads", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_prefetch_instructions_dispatched[]={ -+ { .uname = "PREFETCH_T0_T1_T2", -+ .udesc = "Number of prefetcht0, perfetcht1, prefetcht2 instructions dispatched", -+ .ucode = 0x1, -+ }, -+ { .uname = "PREFETCHW", -+ .udesc = "Number of prefetchtw instructions dispatched", -+ .ucode = 0x2, -+ }, -+ { .uname = "PREFETCHNTA", -+ .udesc = "Number of prefetchtnta instructions dispatched", -+ .ucode = 0x4, -+ }, -+ { .uname = "ANY", -+ .udesc = "Any prefetch", -+ .ucode = 0x7, -+ .uflags = AMD64_FL_DFL | AMD64_FL_NCOMBO, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_ineffective_software_prefetch[]={ -+ { .uname = "MAB_MCH_CNT", -+ .udesc = "Software prefetch instructions saw a match on an already allocated miss request buffer", -+ .ucode = 0x2, -+ }, -+ { .uname = "DATA_PIPE_SW_PF_DC_HIT", -+ .udesc = "Software Prefetch instruction saw a DC hit", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_tlb_flushes[]={ -+ { .uname = "ALL", -+ .udesc = "Any TLB flush", -+ .ucode = 0xff, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_l1_itlb_miss_l2_itlb_miss[]={ -+ { .uname = "COALESCED4K", -+ .udesc = "Number of instruction fetches to a >4K coalesced page", -+ .ucode = 0x8, -+ }, -+ { .uname = "IF1G", -+ .udesc = "Number of instruction fetches to a 1GB page", -+ .ucode = 0x4, -+ }, -+ { .uname = "IF2M", -+ .udesc = "Number of instruction fetches to a 2MB page", -+ .ucode = 0x2, -+ }, -+ { .uname = "IF4K", -+ .udesc = "Number of instruction fetches to a 4KB page", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_itlb_fetch_hit[]={ -+ { .uname = "IF1G", -+ .udesc = "L1 instruction fetch TLB hit a 1GB page size", -+ .ucode = 0x4, -+ }, -+ { .uname = "IF2M", -+ .udesc = "L1 instruction fetch TLB hit a 2MB page size", -+ .ucode = 0x2, -+ }, -+ { .uname = "IF4K", -+ .udesc = "L1 instruction fetch TLB hit a 4KB or 16KB page size", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_ic_tag_hit_miss[]={ -+ { .uname = "IC_HIT", -+ .udesc = "Instruction cache hit", -+ .ucode = 0x7, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+ { .uname = "IC_MISS", -+ .udesc = "Instruction cache miss", -+ .ucode = 0x18, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+ { .uname = "ALL_IC_ACCESS", -+ .udesc = "All instruction cache accesses", -+ .ucode = 0x1f, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_op_cache_hit_miss[]={ -+ { .uname = "OC_HIT", -+ .udesc = "Op cache hit", -+ .ucode = 0x3, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+ { .uname = "OC_MISS", -+ .udesc = "Op cache miss", -+ .ucode = 0x4, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+ { .uname = "ALL_OC_ACCESS", -+ .udesc = "All op cache accesses", -+ .ucode = 0x7, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_ops_source_dispatched_from_decoder[]={ -+ { .uname = "X86DECODER_DISPATCHED", -+ .udesc = "Number of ops fetched from Instruction Cache and dispatched", -+ .ucode = 0x1, -+ }, -+ { .uname = "OPCACHE_DISPATCHED", -+ .udesc = "Number of ops fetched from Op Cache and dispatched", -+ .ucode = 0x2, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_ops_type_dispatched_from_decoder[]={ -+ { .uname = "FP_DISP_IBS_MODE", -+ .udesc = "Any FP dispatch. Count aligns with IBS count", -+ .ucode = 0x04, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+ { .uname = "INT_DISP_IBS_MODE", -+ .udesc = "Any Integer dispatch. Count aligns with IBS count", -+ .ucode = 0x08, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+ { .uname = "FP_DISP_RETIRE_MODE", -+ .udesc = "Any FP dispatch. Count aligns with RETIRED_OPS count", -+ .ucode = 0x84, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+ { .uname = "INT_DISP_RETIRE_MODE", -+ .udesc = "Any Integer dispatch. Count aligns with RETIRED_OPS count", -+ .ucode = 0x88, -+ .uflags = AMD64_FL_NCOMBO, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_dispatch_resource_stall_cycles_1[]={ -+ { .uname = "INT_PHY_REG_FILE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to integer physical register file resource stalls. Applies to all ops that have integer destination register", -+ .ucode = 0x1, -+ }, -+ { .uname = "LOAD_QUEUE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to load queue resource stalls. Applies to all ops with load semantics", -+ .ucode = 0x2, -+ }, -+ { .uname = "STORE_QUEUE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to store queue resource stalls. Applies to all ops with store semantics", -+ .ucode = 0x4, -+ }, -+ { .uname = "TAKEN_BRANCH_BUFFER_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to taken branch buffer resource stalls", -+ .ucode = 0x10, -+ }, -+ { .uname = "FP_REG_FILE_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to floating-point register file resource stalls. Applies to all FP ops that have a destination register", -+ .ucode = 0x20, -+ }, -+ { .uname = "FP_SCHEDULER_RSRC_STALL", -+ .udesc = "Number of cycles stalled due to floating-point scheduler resource stalls. Applies to ops that use the FP scheduler", -+ .ucode = 0x40, -+ }, -+ { .uname = "FP_FLUSH_RECOVERY_STALL", -+ .udesc = "Number of cycles stalled due to floating-point flush recovery", -+ .ucode = 0x80, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_dispatch_resource_stall_cycles_2[]={ -+ { .uname = "INT_SCHEDULER_0_TOKEN_STALL", -+ .udesc = "Number of cycles stalled due to no tokens available for Integer Scheduler Queue 0", -+ .ucode = 0x1, -+ }, -+ { .uname = "INT_SCHEDULER_1_TOKEN_STALL", -+ .udesc = "Number of cycles stalled due to no tokens available for Integer Scheduler Queue 1", -+ .ucode = 0x2, -+ }, -+ { .uname = "INT_SCHEDULER_2_TOKEN_STALL", -+ .udesc = "Number of cycles stalled due to no tokens available for Integer Scheduler Queue 2", -+ .ucode = 0x4, -+ }, -+ { .uname = "INT_SCHEDULER_3_TOKEN_STALL", -+ .udesc = "Number of cycles stalled due to no tokens available for Integer Scheduler Queue 3", -+ .ucode = 0x8, -+ }, -+ { .uname = "RETIRE_TOKEN_STALL", -+ .udesc = "Number of cycles stalled due to insufficient tokens available for Retire Queue", -+ .ucode = 0x20, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_retired_mmx_fp_instructions[]={ -+ { .uname = "SSE_INSTR", -+ .udesc = "Number of SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX)", -+ .ucode = 0x4, -+ }, -+ { .uname = "MMX_INSTR", -+ .udesc = "Number of MMX instructions", -+ .ucode = 0x2, -+ }, -+ { .uname = "X87_INSTR", -+ .udesc = "Number of x87 instructions", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_tagged_ibs_ops[]={ -+ { .uname = "IBS_COUNT_ROLLOVER", -+ .udesc = "Number of times a op could not be tagged by IBS because of a previous tagged op that has not retired", -+ .ucode = 0x4, -+ }, -+ { .uname = "IBS_TAGGED_OPS_RET", -+ .udesc = "Number of ops tagged by IBS that retired", -+ .ucode = 0x2, -+ }, -+ { .uname = "IBS_TAGGED_OPS", -+ .udesc = "Number of ops tagged by IBS", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_requests_to_l2_group1[]={ -+ { .uname = "RD_BLK_L", -+ .udesc = "Number of data cache reads (including software and hardware prefetches)", -+ .ucode = 0x80, -+ }, -+ { .uname = "RD_BLK_X", -+ .udesc = "Number of data cache stores", -+ .ucode = 0x40, -+ }, -+ { .uname = "LS_RD_BLK_C_S", -+ .udesc = "Number of data cache shared reads", -+ .ucode = 0x20, -+ }, -+ { .uname = "CACHEABLE_IC_READ", -+ .udesc = "Number of instruction cache reads", -+ .ucode = 0x10, -+ }, -+ { .uname = "CHANGE_TO_X", -+ .udesc = "Number of requests change to writable, check L2 for current state", -+ .ucode = 0x8, -+ }, -+ { .uname = "PREFETCH_L2", -+ .udesc = "TBD", -+ .ucode = 0x4, -+ }, -+ { .uname = "L2_HW_PF", -+ .udesc = "Number of prefetches accepted by L2 pipeline, hit or miss", -+ .ucode = 0x2, -+ }, -+}; -+ -+static const amd64_umask_t amd64_fam19h_zen3_core_to_l2_cacheable_request_access_status[]={ -+ { .uname = "LS_RD_BLK_C_S", -+ .udesc = "Number of data cache shared read hitting in the L2", -+ .ucode = 0x80, -+ }, -+ { .uname = "LS_RD_BLK_L_HIT_X", -+ .udesc = "Number of data cache reads hitting in the L2", -+ .ucode = 0x40, -+ }, -+ { .uname = "LS_RD_BLK_L_HIT_S", -+ .udesc = "Number of data cache reads hitting a non-modifiable line in the L2", -+ .ucode = 0x20, -+ }, -+ { .uname = "LS_RD_BLK_X", -+ .udesc = "Number of data cache store or state change requests hitting in the L2", -+ .ucode = 0x10, -+ }, -+ { .uname = "LS_RD_BLK_C", -+ .udesc = "Number of data cache requests missing in the L2 (all types)", -+ .ucode = 0x8, -+ }, -+ { .uname = "IC_FILL_HIT_X", -+ .udesc = "Number of instruction cache fill requests hitting a modifiable line in the L2", -+ .ucode = 0x4, -+ }, -+ { .uname = "IC_FILL_HIT_S", -+ .udesc = "Number of instruction cache fill requests hitting a non-modifiable line in the L2", -+ .ucode = 0x2, -+ }, -+ { .uname = "IC_FILL_MISS", -+ .udesc = "Number of instruction cache fill requests missing the L2", -+ .ucode = 0x1, -+ }, -+}; -+ -+static const amd64_entry_t amd64_fam19h_zen3_pe[]={ -+ { .name = "RETIRED_SSE_AVX_FLOPS", -+ .desc = "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15 and therefore requires the MergeEvent", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x3, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_retired_sse_avx_flops), -+ .umasks = amd64_fam19h_zen3_retired_sse_avx_flops, -+ }, -+ { .name = "RETIRED_SERIALIZING_OPS", -+ .desc = "The number of serializing Ops retired", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x5, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_retired_serializing_ops), -+ .umasks = amd64_fam19h_zen3_retired_serializing_ops, -+ }, -+ { .name = "FP_DISPATCH_FAULTS", -+ .desc = "Floating-point dispatch faults", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xe, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_fp_dispatch_faults), -+ .umasks = amd64_fam19h_zen3_fp_dispatch_faults, -+ }, -+ { .name = "BAD_STATUS_2", -+ .desc = "TBD", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x24, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_bad_status_2), -+ .umasks = amd64_fam19h_zen3_bad_status_2, -+ }, -+ { .name = "RETIRED_LOCK_INSTRUCTIONS", -+ .desc = "Counts the number of retired locked instructions", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x25, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_retired_lock_instructions), -+ .umasks = amd64_fam19h_zen3_retired_lock_instructions, -+ }, -+ { .name = "RETIRED_CLFLUSH_INSTRUCTIONS", -+ .desc = "Counts the number of retired non-speculative clflush instructions", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x26, -+ .flags = 0, -+ }, -+ { .name = "RETIRED_CPUID_INSTRUCTIONS", -+ .desc = "Counts the number of retired cpuid instructions", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x27, -+ .flags = 0, -+ }, -+ { .name = "LS_DISPATCH", -+ .desc = "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x29, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_ls_dispatch), -+ .umasks = amd64_fam19h_zen3_ls_dispatch, -+ }, -+ { .name = "SMI_RECEIVED", -+ .desc = "Counts the number system management interrupts (SMI) received", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x2b, -+ .flags = 0, -+ }, -+ { .name = "INTERRUPT_TAKEN", -+ .desc = "Counts the number of interrupts taken", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x2c, -+ .flags = 0, -+ }, -+ { .name = "STORE_TO_LOAD_FORWARD", -+ .desc = "Number of STore to Load Forward hits", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x35, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "STORE_COMMIT_CANCELS_2", -+ .desc = "TBD", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x37, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_store_commit_cancels_2), -+ .umasks = amd64_fam19h_zen3_store_commit_cancels_2, -+ }, -+ { .name = "MAB_ALLOCATION_BY_TYPE", -+ .desc = "Counts when a LS pipe allocates a MAB entry", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x41, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_mab_allocation_by_type), -+ .umasks = amd64_fam19h_zen3_mab_allocation_by_type, -+ }, -+ { .name = "DEMAND_DATA_CACHE_FILLS_FROM_SYSTEM", -+ .desc = "Demand Data Cache fills by data source", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x43, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_software_prefetch_data_cache_fills), -+ .umasks = amd64_fam19h_zen3_software_prefetch_data_cache_fills, /* shared */ -+ }, -+ { .name = "ANY_DATA_CACHE_FILLS_FROM_SYSTEM", -+ .desc = "Any Data Cache fills by data source", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x44, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_software_prefetch_data_cache_fills), -+ .umasks = amd64_fam19h_zen3_software_prefetch_data_cache_fills, /* shared */ -+ }, -+ { .name = "L1_DTLB_MISS", -+ .desc = "L1 Data TLB misses", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x45, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l1_dtlb_miss), -+ .umasks = amd64_fam19h_zen3_l1_dtlb_miss, -+ }, -+ { .name = "MISALIGNED_LOADS", -+ .desc = "Misaligned loads retired", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x47, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_misaligned_loads), -+ .umasks = amd64_fam19h_zen3_misaligned_loads, -+ }, -+ { .name = "PREFETCH_INSTRUCTIONS_DISPATCHED", -+ .desc = "Software Prefetch Instructions Dispatched. This is a speculative event", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x4b, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_prefetch_instructions_dispatched), -+ .umasks = amd64_fam19h_zen3_prefetch_instructions_dispatched, -+ }, -+ { .name = "INEFFECTIVE_SOFTWARE_PREFETCH", -+ .desc = "Number of software prefetches that did not fetch data outside of the processor core", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x52, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_ineffective_software_prefetch), -+ .umasks = amd64_fam19h_zen3_ineffective_software_prefetch, -+ }, -+ { .name = "SOFTWARE_PREFETCH_DATA_CACHE_FILLS", -+ .desc = "Number of software prefetches fills by data source", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x59, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_software_prefetch_data_cache_fills), -+ .umasks = amd64_fam19h_zen3_software_prefetch_data_cache_fills, /* shared */ -+ }, -+ { .name = "HARDWARE_PREFETCH_DATA_CACHE_FILLS", -+ .desc = "Number of hardware prefetches fills by data source", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x5a, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_software_prefetch_data_cache_fills), -+ .umasks = amd64_fam19h_zen3_software_prefetch_data_cache_fills, /* shared */ -+ }, -+ { .name = "ALLOC_MAB_COUNT", -+ .desc = "Counts the in-flight L1 data cache misses (allocated Miss Address Buffers) divided by 4 and rounded down each cycle unless used with the MergeEvent functionality. If the MergeEvent is used, it counts the exact number of outstanding L1 data cache misses", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x5f, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "CYCLES_NOT_IN_HALT", -+ .desc = "Number of core cycles not in halted state", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x76, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "TLB_FLUSHES", -+ .desc = "Number of TLB flushes", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x78, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_tlb_flushes), -+ .umasks = amd64_fam19h_zen3_tlb_flushes, -+ }, -+ { .name = "INSTRUCTION_CACHE_REFILLS_FROM_L2", -+ .desc = "Number of 64-byte instruction cachelines that was fulfilled by the L2 cache", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x82, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM", -+ .desc = "Number of 64-byte instruction cachelines fulfilled from system memory or another cache", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x83, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "L1_ITLB_MISS_L2_ITLB_HIT", -+ .desc = "Number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x84, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "L1_ITLB_MISS_L2_ITLB_MISS", -+ .desc = "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x85, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l1_itlb_miss_l2_itlb_miss), -+ .umasks = amd64_fam19h_zen3_l1_itlb_miss_l2_itlb_miss, -+ }, -+ { .name = "L2_BTB_CORRECTION", -+ .desc = "Number of L2 branch prediction overrides of existing prediction. This is a speculative event", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x8b, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "DYNAMIC_INDIRECT_PREDICTIONS", -+ .desc = "Number of times a branch used the indirect predictor to make a prediction", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x8e, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "DECODER_OVERRIDE_BRANCH_PRED", -+ .desc = "Number of decoder overrides of existing branch prediction", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x91, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "L1_ITLB_FETCH_HIT", -+ .desc = "Instruction fetches that hit in the L1 ITLB", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x94, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_itlb_fetch_hit), -+ .umasks = amd64_fam19h_zen3_itlb_fetch_hit, -+ }, -+ { .name = "IC_TAG_HIT_MISS", -+ .desc = "Counts various IC tag related hit and miss events", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x18e, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_ic_tag_hit_miss), -+ .umasks = amd64_fam19h_zen3_ic_tag_hit_miss, -+ }, -+ { .name = "OP_CACHE_HIT_MISS", -+ .desc = "Counts op cache micro-tag hit/miss events", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x28f, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_op_cache_hit_miss), -+ .umasks = amd64_fam19h_zen3_op_cache_hit_miss, -+ }, -+ { .name = "OPS_SOURCE_DISPATCHED_FROM_DECODER", -+ .desc = "Number of ops dispatched from the decoder classified by op source", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xaa, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_ops_source_dispatched_from_decoder), -+ .umasks = amd64_fam19h_zen3_ops_source_dispatched_from_decoder, -+ }, -+ { .name = "OPS_TYPE_DISPATCHED_FROM_DECODER", -+ .desc = "Number of ops dispatched from the decoder classified by op type", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xab, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_ops_type_dispatched_from_decoder), -+ .umasks = amd64_fam19h_zen3_ops_type_dispatched_from_decoder, -+ }, -+ { .name = "DISPATCH_RESOURCE_STALL_CYCLES_1", -+ .desc = "Number of cycles where a dispatch group is valid but does not get dispatched due to a Token Stall", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xae, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_dispatch_resource_stall_cycles_1), -+ .umasks = amd64_fam19h_zen3_dispatch_resource_stall_cycles_1, -+ }, -+ { .name = "DISPATCH_RESOURCE_STALL_CYCLES_2", -+ .desc = "Number of cycles where a dispatch group is valid but does not get dispatched due to a Token Stall", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xaf, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_dispatch_resource_stall_cycles_2), -+ .umasks = amd64_fam19h_zen3_dispatch_resource_stall_cycles_2, -+ }, -+ { .name = "RETIRED_INSTRUCTIONS", -+ .desc = "Number of instructions retired", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xc0, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_OPS", -+ .desc = "Number of macro-ops retired", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xc1, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_BRANCH_INSTRUCTIONS", -+ .desc = "Number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xc2, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED", -+ .desc = "Number of retired branch instructions, that were mispredicted", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xc3, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS", -+ .desc = "Number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xc4, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED", -+ .desc = "Number of retired taken branch instructions that were mispredicted", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xc5, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_FAR_CONTROL_TRANSFERS", -+ .desc = "Number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xc6, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_NEAR_RETURNS", -+ .desc = "Number of near return instructions (RET or RET Iw) retired", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xc8, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_NEAR_RETURNS_MISPREDICTED", -+ .desc = "Number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xc9, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_INDIRECT_BRANCH_INSTRUCTIONS_MISPREDICTED", -+ .desc = "Number of indirect branches retired there were not correctly predicted. Each such mispredict incurs the same penalty as a mispredicted condition branch instruction. Only EX mispredicts are counted", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xca, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_MMX_FP_INSTRUCTIONS", -+ .desc = "Number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions, it is not suitable for measuring MFLOPS", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xcb, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_retired_mmx_fp_instructions), -+ .umasks = amd64_fam19h_zen3_retired_mmx_fp_instructions, -+ }, -+ { .name = "RETIRED_INDIRECT_BRANCH_INSTRUCTIONS", -+ .desc = "Number of indirect branches retired", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xcc, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS", -+ .desc = "Number of retired conditional branch instructions", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xd1, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "DIV_CYCLES_BUSY_COUNT", -+ .desc = "Number of cycles when the divider is busy", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xd3, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "DIV_OP_COUNT", -+ .desc = "Number of divide ops", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xd4, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "RETIRED_BRANCH_MISPREDICTED_DIRECTION_MISMATCH", -+ .desc = "Number of retired conditional branch instructions that were not correctly predicted because of branch direction mismatch", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x1c7, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "TAGGED_IBS_OPS", -+ .desc = "Counts Op IBS related events", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x1cf, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_tagged_ibs_ops), -+ .umasks = amd64_fam19h_zen3_tagged_ibs_ops, -+ }, -+ { .name = "RETIRED_FUSED_INSTRUCTIONS", -+ .desc = "Counts retired fused instructions", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x1d0, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "REQUESTS_TO_L2_GROUP1", -+ .desc = "All L2 cache requests", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x60, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_requests_to_l2_group1), -+ .umasks = amd64_fam19h_zen3_requests_to_l2_group1, -+ }, -+ { .name = "CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS", -+ .desc = "L2 cache request outcomes. This event does not count accesses to the L2 cache by the L2 prefetcher", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x64, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_core_to_l2_cacheable_request_access_status), -+ .umasks = amd64_fam19h_zen3_core_to_l2_cacheable_request_access_status, -+ }, -+ { .name = "L2_PREFETCH_HIT_L2", -+ .desc = "Number of L2 prefetches that hit in the L2", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x70, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "L2_PREFETCH_HIT_L3", -+ .desc = "Number of L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x71, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+ { .name = "L2_PREFETCH_MISS_L3", -+ .desc = "Number of L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x72, -+ .flags = 0, -+ .ngrp = 0, -+ }, -+}; -diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c -index 16384b8..c4497ea 100644 ---- a/lib/pfmlib_amd64.c -+++ b/lib/pfmlib_amd64.c -@@ -183,7 +183,12 @@ amd64_get_revision(pfm_amd64_config_t *cfg) - } - } else if (cfg->family == 22) { /* family 16h */ - rev = PFM_PMU_AMD64_FAM16H; -- } -+ } else if (cfg->family == 25) { /* family 19h */ -+ switch (cfg->model) { -+ default: -+ rev = PFM_PMU_AMD64_FAM19H_ZEN3; -+ } -+ } - - cfg->revision = rev; - } -diff --git a/lib/pfmlib_amd64_fam19h.c b/lib/pfmlib_amd64_fam19h.c -new file mode 100644 -index 0000000..dd3ce1a ---- /dev/null -+++ b/lib/pfmlib_amd64_fam19h.c -@@ -0,0 +1,56 @@ -+/* -+ * pfmlib_amd64_fam19h.c : AMD64 Fam19h core PMU support -+ * -+ * Contributed by Swarup Sahoo -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -+ * of the Software, and to permit persons to whom the Software is furnished to do so, -+ * subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in all -+ * copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ */ -+ -+/* private headers */ -+#include "pfmlib_priv.h" -+#include "pfmlib_amd64_priv.h" -+#include "events/amd64_events_fam19h_zen3.h" -+ -+ -+pfmlib_pmu_t amd64_fam19h_zen3_support={ -+ .desc = "AMD64 Fam19h Zen3", -+ .name = "amd64_fam19h_zen3", -+ .pmu = PFM_PMU_AMD64_FAM19H_ZEN3, -+ .pmu_rev = 0, -+ .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_pe), -+ .type = PFM_PMU_TYPE_CORE, -+ .supported_plm = AMD64_FAM10H_PLM, -+ .num_cntrs = 6, -+ .max_encoding = 1, -+ .pe = amd64_fam19h_zen3_pe, -+ .atdesc = amd64_mods, -+ .flags = PFMLIB_PMU_FL_RAW_UMASK, -+ .cpu_family = PFM_PMU_AMD64_FAM19H_ZEN3, -+ .pmu_detect = pfm_amd64_family_detect, -+ .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding, -+ PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), -+ .get_event_first = pfm_amd64_get_event_first, -+ .get_event_next = pfm_amd64_get_event_next, -+ .event_is_valid = pfm_amd64_event_is_valid, -+ .validate_table = pfm_amd64_validate_table, -+ .get_event_info = pfm_amd64_get_event_info, -+ .get_event_attr_info = pfm_amd64_get_event_attr_info, -+ PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs), -+ .get_event_nattrs = pfm_amd64_get_event_nattrs, -+}; -diff --git a/lib/pfmlib_amd64_priv.h b/lib/pfmlib_amd64_priv.h -index e863d08..6fadc17 100644 ---- a/lib/pfmlib_amd64_priv.h -+++ b/lib/pfmlib_amd64_priv.h -@@ -129,6 +129,7 @@ extern pfm_amd64_config_t pfm_amd64_cfg; - #define AMD64_FAM14H_ATTRS AMD64_FAM10H_ATTRS - #define AMD64_FAM15H_ATTRS AMD64_FAM10H_ATTRS - #define AMD64_FAM17H_ATTRS AMD64_FAM10H_ATTRS -+#define AMD64_FAM19H_ATTRS AMD64_FAM10H_ATTRS - - #define AMD64_FAM10H_PLM (PFM_PLM0|PFM_PLM3|PFM_PLMH) - #define AMD64_K7_PLM (PFM_PLM0|PFM_PLM3) -diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c -index 5cfd87f..54aa9c8 100644 ---- a/lib/pfmlib_common.c -+++ b/lib/pfmlib_common.c -@@ -81,6 +81,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= - &amd64_fam17h_deprecated_support, - &amd64_fam17h_zen1_support, - &amd64_fam17h_zen2_support, -+ &amd64_fam19h_zen3_support, - &intel_core_support, - &intel_atom_support, - &intel_nhm_support, -diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h -index db8b0fd..e6fb49a 100644 ---- a/lib/pfmlib_priv.h -+++ b/lib/pfmlib_priv.h -@@ -255,6 +255,7 @@ extern pfmlib_pmu_t amd64_fam16h_support; - extern pfmlib_pmu_t amd64_fam17h_deprecated_support; - extern pfmlib_pmu_t amd64_fam17h_zen1_support; - extern pfmlib_pmu_t amd64_fam17h_zen2_support; -+extern pfmlib_pmu_t amd64_fam19h_zen3_support; - extern pfmlib_pmu_t intel_p6_support; - extern pfmlib_pmu_t intel_ppro_support; - extern pfmlib_pmu_t intel_pii_support; -diff --git a/tests/validate_x86.c b/tests/validate_x86.c -index ad224be..c920509 100644 ---- a/tests/validate_x86.c -+++ b/tests/validate_x86.c -@@ -7279,6 +7279,48 @@ static const test_event_t x86_test_events[]={ - .codes[0] = 0x510203, - .fstr = "amd64_fam17h_zen2::RETIRED_SSE_AVX_FLOPS:MULT_FLOPS:k=0:u=1:e=0:i=0:c=0:h=0:g=0", - }, -+ { SRC_LINE, -+ .name = "amd64_fam19h_zen3::retired_ops", -+ .count = 1, -+ .codes[0] = 0x5300c1ull, -+ .fstr = "amd64_fam19h_zen3::RETIRED_OPS:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam19h_zen3::cycles_not_in_halt", -+ .count = 1, -+ .codes[0] = 0x530076ull, -+ .fstr = "amd64_fam19h_zen3::CYCLES_NOT_IN_HALT:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam19h_zen3::L2_PREFETCH_HIT_L2", -+ .count = 1, -+ .codes[0] = 0x530070ull, -+ .fstr = "amd64_fam19h_zen3::L2_PREFETCH_HIT_L2:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam19h_zen3::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:u", -+ .count = 1, -+ .codes[0] = 0x510845ull, -+ .fstr = "amd64_fam19h_zen3::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:k=0:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam19h_zen3::RETIRED_FUSED_INSTRUCTIONS", -+ .count = 1, -+ .codes[0] = 0x1005300d0ull, -+ .fstr = "amd64_fam19h_zen3::RETIRED_FUSED_INSTRUCTIONS:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam19h_zen3::RETIRED_SSE_AVX_FLOPS", -+ .count = 1, -+ .codes[0] = 0x530f03ull, -+ .fstr = "amd64_fam19h_zen3::RETIRED_SSE_AVX_FLOPS:ANY:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam19h_zen3::RETIRED_SSE_AVX_FLOPS:MULT_FLOPS:u", -+ .count = 1, -+ .codes[0] = 0x510203ull, -+ .fstr = "amd64_fam19h_zen3::RETIRED_SSE_AVX_FLOPS:MULT_FLOPS:k=0:u=1:e=0:i=0:c=0:h=0:g=0", -+ }, - }; - - #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) - -commit e2afb6186dab2419a4b6f79a6adf7cd9bb0f2340 -Author: Stephane Eranian -Date: Mon Mar 15 12:04:48 2021 -0700 - - Add AMD64 Fam17h Zen2 RAPL support - - This patch adds RAPL support for AMD64 Fam17h Zen2 - processors. On Zen2, only the RAPL_ENERGY_PKGS event is supported. - - Signed-off-by: Stephane Eranian - -diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h -index b0ca262..ccf3967 100644 ---- a/include/perfmon/pfmlib.h -+++ b/include/perfmon/pfmlib.h -@@ -564,6 +564,8 @@ typedef enum { - PFM_PMU_AMD64_FAM17H_ZEN1, /* AMD AMD64 Fam17h Zen1 */ - PFM_PMU_AMD64_FAM17H_ZEN2, /* AMD AMD64 Fam17h Zen2 */ - PFM_PMU_AMD64_FAM19H_ZEN3, /* AMD AMD64 Fam19h Zen3 */ -+ PFM_PMU_AMD64_RAPL, /* AMD64 RAPL */ -+ - /* MUST ADD NEW PMU MODELS HERE */ - - PFM_PMU_MAX /* end marker */ -diff --git a/lib/Makefile b/lib/Makefile -index b9088e9..ab1bc0a 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -125,7 +125,7 @@ SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \ - pfmlib_amd64_fam11h.c pfmlib_amd64_fam12h.c \ - pfmlib_amd64_fam14h.c pfmlib_amd64_fam15h.c \ - pfmlib_amd64_fam17h.c pfmlib_amd64_fam16h.c \ -- pfmlib_amd64_fam19h.c -+ pfmlib_amd64_fam19h.c pfmlib_amd64_rapl.c - - CFLAGS += -DCONFIG_PFMLIB_ARCH_X86 - -diff --git a/lib/pfmlib_amd64_rapl.c b/lib/pfmlib_amd64_rapl.c -new file mode 100644 -index 0000000..2a65e32 ---- /dev/null -+++ b/lib/pfmlib_amd64_rapl.c -@@ -0,0 +1,118 @@ -+/* -+ * pfmlib_amd64_rapl.c : AMD RAPL PMU -+ * -+ * Copyright 2021 Google LLC -+ * Contributed by Stephane Eranian -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -+ * of the Software, and to permit persons to whom the Software is furnished to do so, -+ * subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in all -+ * copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ * AMD RAPL PMU (AMD Zen2) -+ */ -+ -+/* private headers */ -+#include "pfmlib_priv.h" -+/* -+ * for now, we reuse the x86 table entry format and callback to avoid duplicating -+ * code. We may revisit this later on -+ */ -+#include "pfmlib_amd64_priv.h" -+ -+extern pfmlib_pmu_t amd64_rapl_support; -+ -+static const amd64_entry_t amd64_rapl_zen2[]={ -+ { .name = "RAPL_ENERGY_PKG", -+ .desc = "Number of Joules consumed by all cores and Last level cache on the package. Unit is 2^-32 Joules", -+ .code = 0x2, -+ } -+}; -+ -+static int -+pfm_amd64_rapl_detect(void *this) -+{ -+ int ret, rev; -+ -+ ret = pfm_amd64_detect(this); -+ if (ret != PFM_SUCCESS) -+ return ret; -+ -+ rev = pfm_amd64_cfg.revision; -+ switch(rev) { -+ case PFM_PMU_AMD64_FAM17H_ZEN2: -+ ret = PFM_SUCCESS; -+ break; -+ default: -+ ret = PFM_ERR_NOTSUPP; -+ } -+ return ret; -+} -+ -+static int -+pfm_amd64_rapl_get_encoding(void *this, pfmlib_event_desc_t *e) -+ -+{ -+ const amd64_entry_t *pe; -+ -+ pe = this_pe(this); -+ -+ e->fstr[0] = '\0'; -+ -+ e->codes[0] = pe[e->event].code; -+ e->count = 1; -+ evt_strcat(e->fstr, "%s", pe[e->event].name); -+ -+ __pfm_vbprintf("[0x%"PRIx64" event=0x%x] %s\n", -+ e->codes[0], -+ e->codes[0], e->fstr); -+ -+ return PFM_SUCCESS; -+} -+ -+/* -+ * number modifiers for RAPL -+ * define an empty modifier to avoid firing the -+ * sanity pfm_amd64_validate_table(). We are -+ * using this function to avoid duplicating code. -+ */ -+static const pfmlib_attr_desc_t amd64_rapl_mods[]= -+{ { 0, } }; -+ -+pfmlib_pmu_t amd64_rapl_support={ -+ .desc = "AMD64 RAPL", -+ .name = "amd64_rapl", -+ .perf_name = "power", -+ .pmu = PFM_PMU_AMD64_RAPL, -+ .pme_count = LIBPFM_ARRAY_SIZE(amd64_rapl_zen2), -+ .type = PFM_PMU_TYPE_UNCORE, -+ .num_cntrs = 0, -+ .num_fixed_cntrs = 3, -+ .max_encoding = 1, -+ .pe = amd64_rapl_zen2, -+ .pmu_detect = pfm_amd64_rapl_detect, -+ .atdesc = amd64_rapl_mods, -+ -+ .get_event_encoding[PFM_OS_NONE] = pfm_amd64_rapl_get_encoding, -+ PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), -+ .get_event_first = pfm_amd64_get_event_first, -+ .get_event_next = pfm_amd64_get_event_next, -+ .event_is_valid = pfm_amd64_event_is_valid, -+ .validate_table = pfm_amd64_validate_table, -+ .get_event_info = pfm_amd64_get_event_info, -+ .get_event_attr_info = pfm_amd64_get_event_attr_info, -+ PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs), -+ .get_event_nattrs = pfm_amd64_get_event_nattrs, -+}; -diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c -index 54aa9c8..ba2522e 100644 ---- a/lib/pfmlib_common.c -+++ b/lib/pfmlib_common.c -@@ -82,6 +82,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= - &amd64_fam17h_zen1_support, - &amd64_fam17h_zen2_support, - &amd64_fam19h_zen3_support, -+ &amd64_rapl_support, - &intel_core_support, - &intel_atom_support, - &intel_nhm_support, -diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h -index e6fb49a..24ca1c5 100644 ---- a/lib/pfmlib_priv.h -+++ b/lib/pfmlib_priv.h -@@ -256,6 +256,7 @@ extern pfmlib_pmu_t amd64_fam17h_deprecated_support; - extern pfmlib_pmu_t amd64_fam17h_zen1_support; - extern pfmlib_pmu_t amd64_fam17h_zen2_support; - extern pfmlib_pmu_t amd64_fam19h_zen3_support; -+extern pfmlib_pmu_t amd64_rapl_support; - extern pfmlib_pmu_t intel_p6_support; - extern pfmlib_pmu_t intel_ppro_support; - extern pfmlib_pmu_t intel_pii_support; - -commit 315941fc05f5a487e4eb5efd36ea10438336944b -Author: Stephane Eranian -Date: Thu Mar 18 23:13:57 2021 -0700 - - add AMD64 Fam19h Zen3 L3 PMU support - - This patch adds the AMD Fam19h (Zen3) L3 PMU support consisting of - 3 published events. - - new PMU model: amd64_fam19h_zen3_l3 - - Based on the public specifications PPR (#55898) Rev 0.35 - Feb 5, 2021. - Available at: https://www.amd.com/system/files/TechDocs/55898_pub.zip - - Signed-off-by: Stephane Eranian - -diff --git a/README b/README -index 3bc3a68..227fa99 100644 ---- a/README -+++ b/README -@@ -41,7 +41,7 @@ The library supports many PMUs. The current version can handle: - AMD64 Fam16h (Jaguar) - AMD64 Fam17h (Zen1) - AMD64 Fam17h (Zen2) -- AMD64 Fam19h (Zen3) -+ AMD64 Fam19h (Zen3) (core and L3) - - - For Intel X86: - Intel P6 (Pentium II, Pentium Pro, Pentium III, Pentium M) -diff --git a/docs/Makefile b/docs/Makefile -index df51a3a..349149c 100644 ---- a/docs/Makefile -+++ b/docs/Makefile -@@ -42,6 +42,7 @@ ARCH_MAN=libpfm_intel_core.3 \ - libpfm_amd64_fam17h.3 \ - libpfm_amd64_fam17h_zen2.3 \ - libpfm_amd64_fam19h_zen3.3 \ -+ libpfm_amd64_fam19h_zen3_l3.3 \ - libpfm_intel_atom.3 \ - libpfm_intel_nhm.3 \ - libpfm_intel_nhm_unc.3 \ -diff --git a/docs/man3/libpfm_amd64_fam19h_zen3_l3.3 b/docs/man3/libpfm_amd64_fam19h_zen3_l3.3 -new file mode 100644 -index 0000000..a727455 ---- /dev/null -+++ b/docs/man3/libpfm_amd64_fam19h_zen3_l3.3 -@@ -0,0 +1,19 @@ -+.TH LIBPFM 3 "March, 2021" "" "Linux Programmer's Manual" -+.SH NAME -+libpfm_amd64_fam19h_zen3_l3 - support for AMD64 Family 19h L3 PMU -+.SH SYNOPSIS -+.nf -+.B #include -+.sp -+.B PMU name: amd64_fam19h_zen3_l3 -+.B PMU desc: AMD64 Fam19h Zen3 L3 -+.sp -+.SH DESCRIPTION -+The library supports AMD Family 19h processors Zen3 L3 PMU in both 32 and 64-bit modes. -+ -+At this point, there is no modifier supported. -+.SH AUTHORS -+.nf -+Stephane Eranian -+.if -+.PP -diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h -index ccf3967..44d6afe 100644 ---- a/include/perfmon/pfmlib.h -+++ b/include/perfmon/pfmlib.h -@@ -565,6 +565,7 @@ typedef enum { - PFM_PMU_AMD64_FAM17H_ZEN2, /* AMD AMD64 Fam17h Zen2 */ - PFM_PMU_AMD64_FAM19H_ZEN3, /* AMD AMD64 Fam19h Zen3 */ - PFM_PMU_AMD64_RAPL, /* AMD64 RAPL */ -+ PFM_PMU_AMD64_FAM19H_ZEN3_L3, /* AMD64 Fam17h Zen3 L3 */ - - /* MUST ADD NEW PMU MODELS HERE */ - -diff --git a/lib/Makefile b/lib/Makefile -index ab1bc0a..fdc628d 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -125,7 +125,8 @@ SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c \ - pfmlib_amd64_fam11h.c pfmlib_amd64_fam12h.c \ - pfmlib_amd64_fam14h.c pfmlib_amd64_fam15h.c \ - pfmlib_amd64_fam17h.c pfmlib_amd64_fam16h.c \ -- pfmlib_amd64_fam19h.c pfmlib_amd64_rapl.c -+ pfmlib_amd64_fam19h.c pfmlib_amd64_rapl.c \ -+ pfmlib_amd64_fam19h_l3.c - - CFLAGS += -DCONFIG_PFMLIB_ARCH_X86 - -@@ -254,6 +255,7 @@ INC_X86= pfmlib_intel_x86_priv.h \ - events/amd64_events_fam17h_zen1.h \ - events/amd64_events_fam17h_zen2.h \ - events/amd64_events_fam19h_zen3.h \ -+ events/amd64_events_fam19h_zen3_l3.h \ - events/amd64_events_fam16h.h \ - events/intel_p6_events.h \ - events/intel_netburst_events.h \ -diff --git a/lib/events/amd64_events_fam19h_zen3_l3.h b/lib/events/amd64_events_fam19h_zen3_l3.h -new file mode 100644 -index 0000000..b57223c ---- /dev/null -+++ b/lib/events/amd64_events_fam19h_zen3_l3.h -@@ -0,0 +1,55 @@ -+/* -+ * Copyright 2021 Google LLC -+ * Contributed by Stephane Eranian -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -+ * of the Software, and to permit persons to whom the Software is furnished to do so, -+ * subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in all -+ * copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ * -+ * This file is part of libpfm, a performance monitoring support library for -+ * applications on Linux. -+ * -+ * PMU: amd64_fam19h_zen3_l3 (AMD64 Fam19h Zen3 L3) -+ */ -+ -+static const amd64_umask_t amd64_fam19h_zen3_l3_requests[]={ -+ { .uname = "ALL", -+ .udesc = "All types of requests", -+ .ucode = 0xff, -+ .uflags = AMD64_FL_DFL, -+ }, -+}; -+ -+static const amd64_entry_t amd64_fam19h_zen3_l3_pe[]={ -+ { .name = "REQUESTS", -+ .desc = "Number of requests to L3 cache", -+ .code = 0x04, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l3_requests), -+ .umasks = amd64_fam19h_zen3_l3_requests, -+ }, -+ { .name = "MISS_LATENCY", -+ .desc = "Accumulated miss latency in cycles - occupancy event", -+ .code = 0x90, -+ }, -+ { .name = "MISSES", -+ .desc = "Number of L3 cache misses", -+ .code = 0x9a, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l3_requests), -+ .umasks = amd64_fam19h_zen3_l3_requests, /* shared */ -+ }, -+}; -diff --git a/lib/pfmlib_amd64_fam19h_l3.c b/lib/pfmlib_amd64_fam19h_l3.c -new file mode 100644 -index 0000000..acf8b0c ---- /dev/null -+++ b/lib/pfmlib_amd64_fam19h_l3.c -@@ -0,0 +1,75 @@ -+/* -+ * pfmlib_amd64_fam19h_zen3_l3.c : AMD Fam19h Zen3 L3 PMU -+ * -+ * Copyright 2021 Google LLC -+ * Contributed by Stephane Eranian -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -+ * of the Software, and to permit persons to whom the Software is furnished to do so, -+ * subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in all -+ * copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -+ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE -+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ */ -+#include -+#include -+#include -+#include -+#include -+ -+/* private headers */ -+#include "pfmlib_priv.h" -+#include "pfmlib_amd64_priv.h" -+#include "events/amd64_events_fam19h_zen3_l3.h" -+ -+static void -+display_l3(void *this, pfmlib_event_desc_t *e, void *val) -+{ -+ pfm_amd64_reg_t *reg = val; -+ -+ __pfm_vbprintf("[L3=0x%"PRIx64" event=0x%x umask=0x%x\n", -+ reg->val, -+ reg->l3.event, -+ reg->l3.umask); -+} -+ -+const pfmlib_attr_desc_t l3_mods[]={ -+ PFM_ATTR_NULL -+}; -+ -+pfmlib_pmu_t amd64_fam19h_zen3_l3_support = { -+ .desc = "AMD64 Fam19h Zen3 L3", -+ .name = "amd64_fam19h_zen3_l3", -+ .pmu = PFM_PMU_AMD64_FAM19H_ZEN3_L3, -+ .pmu_rev = 0, -+ .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l3_pe), -+ .type = PFM_PMU_TYPE_UNCORE, -+ .num_cntrs = 4, -+ .max_encoding = 1, -+ .pe = amd64_fam19h_zen3_l3_pe, -+ .atdesc = l3_mods, -+ .flags = PFMLIB_PMU_FL_RAW_UMASK, -+ .cpu_family = PFM_PMU_AMD64_FAM19H_ZEN3, -+ .pmu_detect = pfm_amd64_family_detect, -+ .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding, -+ PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), -+ .get_event_first = pfm_amd64_get_event_first, -+ .get_event_next = pfm_amd64_get_event_next, -+ .event_is_valid = pfm_amd64_event_is_valid, -+ .validate_table = pfm_amd64_validate_table, -+ .get_event_info = pfm_amd64_get_event_info, -+ .get_event_attr_info = pfm_amd64_get_event_attr_info, -+ PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs), -+ .get_event_nattrs = pfm_amd64_get_event_nattrs, -+ .display_reg = display_l3, -+}; -diff --git a/lib/pfmlib_amd64_priv.h b/lib/pfmlib_amd64_priv.h -index 6fadc17..5783904 100644 ---- a/lib/pfmlib_amd64_priv.h -+++ b/lib/pfmlib_amd64_priv.h -@@ -180,6 +180,21 @@ typedef union { - uint64_t val:1; - uint64_t reserved2:45; - } ibsop; -+ struct { /* Zen3 L3 */ -+ uint64_t event:8; /* event mask */ -+ uint64_t umask:8; /* unit mask */ -+ uint64_t reserved1:6; /* reserved */ -+ uint64_t en:1; /* enable */ -+ uint64_t reserved2:19; /* reserved */ -+ uint64_t core_id:3; /* Core ID */ -+ uint64_t reserved3:1; /* reserved */ -+ uint64_t en_all_slices:1; /* enable all slices */ -+ uint64_t en_all_cores:1; /* enable all cores */ -+ uint64_t slice_id:3; /* Slice ID */ -+ uint64_t reserved4:5; /* reserved */ -+ uint64_t thread_id:4; /* reserved */ -+ uint64_t reserved5:4; /* reserved */ -+ } l3; - } pfm_amd64_reg_t; /* MSR 0xc001000-0xc001003 */ - - /* let's define some handy shortcuts! */ -diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c -index ba2522e..45d92df 100644 ---- a/lib/pfmlib_common.c -+++ b/lib/pfmlib_common.c -@@ -82,6 +82,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= - &amd64_fam17h_zen1_support, - &amd64_fam17h_zen2_support, - &amd64_fam19h_zen3_support, -+ &amd64_fam19h_zen3_l3_support, - &amd64_rapl_support, - &intel_core_support, - &intel_atom_support, -diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h -index 24ca1c5..46d7ab6 100644 ---- a/lib/pfmlib_priv.h -+++ b/lib/pfmlib_priv.h -@@ -256,6 +256,7 @@ extern pfmlib_pmu_t amd64_fam17h_deprecated_support; - extern pfmlib_pmu_t amd64_fam17h_zen1_support; - extern pfmlib_pmu_t amd64_fam17h_zen2_support; - extern pfmlib_pmu_t amd64_fam19h_zen3_support; -+extern pfmlib_pmu_t amd64_fam19h_zen3_l3_support; - extern pfmlib_pmu_t amd64_rapl_support; - extern pfmlib_pmu_t intel_p6_support; - extern pfmlib_pmu_t intel_ppro_support; -diff --git a/tests/validate_x86.c b/tests/validate_x86.c -index c920509..9aa7dd1 100644 ---- a/tests/validate_x86.c -+++ b/tests/validate_x86.c -@@ -7321,6 +7321,22 @@ static const test_event_t x86_test_events[]={ - .codes[0] = 0x510203ull, - .fstr = "amd64_fam19h_zen3::RETIRED_SSE_AVX_FLOPS:MULT_FLOPS:k=0:u=1:e=0:i=0:c=0:h=0:g=0", - }, -+ { SRC_LINE, -+ .name = "amd64_fam19h_zen3_l3::REQUESTS", -+ .count = 1, -+ .codes[0] = 0x53ff04ull, -+ .fstr = "amd64_fam19h_zen3_l3::REQUESTS:ALL", -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam19h_zen3_l3::REQUESTS:u", -+ .ret = PFM_ERR_ATTR, -+ }, -+ { SRC_LINE, -+ .name = "amd64_fam19h_zen3_l3::MISSES", -+ .count = 1, -+ .codes[0] = 0x53ff9aull, -+ .fstr = "amd64_fam19h_zen3_l3::MISSES:ALL", -+ }, - }; - - #define NUM_TEST_EVENTS (int)(sizeof(x86_test_events)/sizeof(test_event_t)) - -commit c132ab4948a828334a8fef00303a4b47f59bb4d9 -Author: Stephane Eranian -Date: Tue Mar 23 10:11:40 2021 -0700 - - Add prefix to AMD Fam19h Zen3 L3 events - - To avoid potential conflict with other core PMU events and make it - more explicit these are uncore L3 events following the model of - Intel uncore PMUs. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam19h_zen3_l3.h b/lib/events/amd64_events_fam19h_zen3_l3.h -index b57223c..523509e 100644 ---- a/lib/events/amd64_events_fam19h_zen3_l3.h -+++ b/lib/events/amd64_events_fam19h_zen3_l3.h -@@ -34,18 +34,18 @@ static const amd64_umask_t amd64_fam19h_zen3_l3_requests[]={ - }; - - static const amd64_entry_t amd64_fam19h_zen3_l3_pe[]={ -- { .name = "REQUESTS", -+ { .name = "UNC_L3_REQUESTS", - .desc = "Number of requests to L3 cache", - .code = 0x04, - .ngrp = 1, - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l3_requests), - .umasks = amd64_fam19h_zen3_l3_requests, - }, -- { .name = "MISS_LATENCY", -+ { .name = "UNC_L3_MISS_LATENCY", - .desc = "Accumulated miss latency in cycles - occupancy event", - .code = 0x90, - }, -- { .name = "MISSES", -+ { .name = "UNC_L3_MISSES", - .desc = "Number of L3 cache misses", - .code = 0x9a, - .ngrp = 1, -diff --git a/tests/validate_x86.c b/tests/validate_x86.c -index 9aa7dd1..dfeb213 100644 ---- a/tests/validate_x86.c -+++ b/tests/validate_x86.c -@@ -7322,20 +7322,20 @@ static const test_event_t x86_test_events[]={ - .fstr = "amd64_fam19h_zen3::RETIRED_SSE_AVX_FLOPS:MULT_FLOPS:k=0:u=1:e=0:i=0:c=0:h=0:g=0", - }, - { SRC_LINE, -- .name = "amd64_fam19h_zen3_l3::REQUESTS", -+ .name = "amd64_fam19h_zen3_l3::UNC_L3_REQUESTS", - .count = 1, - .codes[0] = 0x53ff04ull, -- .fstr = "amd64_fam19h_zen3_l3::REQUESTS:ALL", -+ .fstr = "amd64_fam19h_zen3_l3::UNC_L3_REQUESTS:ALL", - }, - { SRC_LINE, -- .name = "amd64_fam19h_zen3_l3::REQUESTS:u", -+ .name = "amd64_fam19h_zen3_l3::UNC_L3_REQUESTS:u", - .ret = PFM_ERR_ATTR, - }, - { SRC_LINE, -- .name = "amd64_fam19h_zen3_l3::MISSES", -+ .name = "amd64_fam19h_zen3_l3::UNC_L3_MISSES", - .count = 1, - .codes[0] = 0x53ff9aull, -- .fstr = "amd64_fam19h_zen3_l3::MISSES:ALL", -+ .fstr = "amd64_fam19h_zen3_l3::UNC_L3_MISSES:ALL", - }, - }; - - -commit 06197c0543476d40fad1c94d240e46a5d114f887 -Author: Stephane Eranian -Date: Mon May 3 21:45:59 2021 -0700 - - enable RAPL for AMD64 Fam19h Zen3 processor - - As per AMD64 PPR for Fam19h model 01h, RAPL Package is supported, so - enable it. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/pfmlib_amd64_rapl.c b/lib/pfmlib_amd64_rapl.c -index 2a65e32..885704a 100644 ---- a/lib/pfmlib_amd64_rapl.c -+++ b/lib/pfmlib_amd64_rapl.c -@@ -55,6 +55,9 @@ pfm_amd64_rapl_detect(void *this) - case PFM_PMU_AMD64_FAM17H_ZEN2: - ret = PFM_SUCCESS; - break; -+ case PFM_PMU_AMD64_FAM19H_ZEN3: -+ ret = PFM_SUCCESS; -+ break; - default: - ret = PFM_ERR_NOTSUPP; - } - -commit 9c3e9c025efc06f4ac4422d5e87a05d9776cbb94 -Author: Vince Weaver -Date: Wed May 26 22:00:27 2021 -0700 - - fix detection of AMD64 Zen1 vs. Zen2 - - This patch fixes the test checking the model number - for AMD64 Fam17h processors. There was a bug where it - would detect some Zen1 processors as Zen2. Zen2 processors - start at model number 48 and up. - - Signed-off-by: Vince Weaver - -diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c -index c4497ea..8c85565 100644 ---- a/lib/pfmlib_amd64.c -+++ b/lib/pfmlib_amd64.c -@@ -174,13 +174,10 @@ amd64_get_revision(pfm_amd64_config_t *cfg) - } else if (cfg->family == 21) { /* family 15h */ - rev = PFM_PMU_AMD64_FAM15H_INTERLAGOS; - } else if (cfg->family == 23) { /* family 17h */ -- switch (cfg->model) { -- case 49: -+ if (cfg->model >= 48) - rev = PFM_PMU_AMD64_FAM17H_ZEN2; -- break; -- default: -+ else - rev = PFM_PMU_AMD64_FAM17H_ZEN1; -- } - } else if (cfg->family == 22) { /* family 16h */ - rev = PFM_PMU_AMD64_FAM16H; - } else if (cfg->family == 25) { /* family 19h */ - -commit 7970a2513cc077cc5d76db470d679ff948e3ff55 -Author: Stephane Eranian -Date: Fri Feb 18 00:51:33 2022 -0800 - - fix perf_events raw encoding handling of event strings - - The pfm_perf_raw_match_event() was relying on sscanf to convert the - raw hexadecimal string into integer. - The problem with sscanf is that it stops at the first non hex character - but does not tell you where. This caused problems with wrongly named - events starting with 'r'. For instance, on AMD Zen3, the event - retired_uops does not exist, yet libpfm4 would encode at 0xe. - That is because the event starts with 'r' and therefore if libpfm4 fails - to find a match in the AMD Zen3 PMU, it will use the raw perf_event syntax - and the seocnd letter is 'e'. Then sscanf stops at the 't' and does not - report any error. - - Fix the problem by using strtoull() instead. It returns an end pointer. - In case the string contains non hex, that pointer will point to it. - - With this fix in place: - $ examples check_events retired_uops - fails on AMD Zen3. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/pfmlib_perf_event_raw.c b/lib/pfmlib_perf_event_raw.c -index 4e7176c..4aab75f 100644 ---- a/lib/pfmlib_perf_event_raw.c -+++ b/lib/pfmlib_perf_event_raw.c -@@ -27,6 +27,8 @@ - #include - #include - #include -+#include -+#include - - #include "pfmlib_priv.h" - #include "pfmlib_perf_event_priv.h" -@@ -137,14 +139,15 @@ static int - pfm_perf_raw_match_event(void *this, pfmlib_event_desc_t *d, const char *e, const char *s) - { - uint64_t code; -- int ret; -+ char *endptr = NULL; - - if (*s != 'r' || !isxdigit(*(s+1))) - return 1; - -- ret = sscanf(s+1, "%"PRIx64, &code); -- if (ret != 1) -+ code = strtoull(s+1, &endptr, 16); -+ if (code == ULLONG_MAX || errno == ERANGE|| (endptr && *endptr)) - return 1; -+ - /* - * stash code in final position - */ - -commit 1770c118887aa21374ad8d9f816e660f2e809115 -Author: Stephane Eranian -Date: Tue Apr 19 15:33:10 2022 -0700 - - Update AMD Zen3 core and L3 PMU event tables - - Based on PPR v0.50: - https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - - Thanks to Emmanuel @ UVSQ for pointing out the changes. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam19h_zen3.h b/lib/events/amd64_events_fam19h_zen3.h -index e95ac69..10e0c20 100644 ---- a/lib/events/amd64_events_fam19h_zen3.h -+++ b/lib/events/amd64_events_fam19h_zen3.h -@@ -514,6 +514,17 @@ static const amd64_umask_t amd64_fam19h_zen3_core_to_l2_cacheable_request_access - }, - }; - -+static const amd64_umask_t amd64_fam19h_zen3_l2_prefetch_hit_l2[]={ -+ { .uname = "L2_HW_PREFETCHER", -+ .udesc = "Number of requests generated by L2 hardware prefetcher", -+ .ucode = 0x1f, -+ }, -+ { .uname = "L1_HW_PREFETCHER", -+ .udesc = "Number of requests generated by L1 hardware prefetcher", -+ .ucode = 0xe0, -+ }, -+}; -+ - static const amd64_entry_t amd64_fam19h_zen3_pe[]={ - { .name = "RETIRED_SSE_AVX_FLOPS", - .desc = "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15 and therefore requires the MergeEvent", -@@ -980,20 +991,42 @@ static const amd64_entry_t amd64_fam19h_zen3_pe[]={ - .modmsk = AMD64_FAM19H_ATTRS, - .code = 0x70, - .flags = 0, -- .ngrp = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l2_prefetch_hit_l2), -+ .umasks = amd64_fam19h_zen3_l2_prefetch_hit_l2, - }, - { .name = "L2_PREFETCH_HIT_L3", - .desc = "Number of L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3", - .modmsk = AMD64_FAM19H_ATTRS, - .code = 0x71, - .flags = 0, -- .ngrp = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l2_prefetch_hit_l2), -+ .umasks = amd64_fam19h_zen3_l2_prefetch_hit_l2, -+ }, -+ { .name = "L2_PREFETCH_MISS_L3", -+ .desc = "Number of L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0x72, -+ .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l2_prefetch_hit_l2), -+ .umasks = amd64_fam19h_zen3_l2_prefetch_hit_l2, - }, - { .name = "L2_PREFETCH_MISS_L3", - .desc = "Number of L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches", - .modmsk = AMD64_FAM19H_ATTRS, - .code = 0x72, - .flags = 0, -+ .ngrp = 1, -+ .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l2_prefetch_hit_l2), -+ .umasks = amd64_fam19h_zen3_l2_prefetch_hit_l2, -+ }, -+ { .name = "OP_QUEUE_EMPTY", -+ .desc = "Counts cycles where the OP queue is empty", -+ .modmsk = AMD64_FAM19H_ATTRS, -+ .code = 0xa9, -+ .flags = 0, - .ngrp = 0, - }, - }; -diff --git a/lib/events/amd64_events_fam19h_zen3_l3.h b/lib/events/amd64_events_fam19h_zen3_l3.h -index 523509e..7ae09ef 100644 ---- a/lib/events/amd64_events_fam19h_zen3_l3.h -+++ b/lib/events/amd64_events_fam19h_zen3_l3.h -@@ -42,7 +42,7 @@ static const amd64_entry_t amd64_fam19h_zen3_l3_pe[]={ - .umasks = amd64_fam19h_zen3_l3_requests, - }, - { .name = "UNC_L3_MISS_LATENCY", -- .desc = "Accumulated miss latency in cycles - occupancy event", -+ .desc = "Each cycle, this event increments by the total number of read requests outstanding from the CCX divided by XiSysFillLatencyDivider. The user can calculate the average system fill latency in cycles by multiplying by XiSysFillLatencyDivider and dividing by the total number of fill requests over the same period (counted by event 0x9A UserMask 0x1F). XiSysFillLatencyDivider is 16 for this product, but may change for future products", - .code = 0x90, - }, - { .name = "UNC_L3_MISSES", -diff --git a/tests/validate_x86.c b/tests/validate_x86.c -index f1ac929..0155c54 100644 ---- a/tests/validate_x86.c -+++ b/tests/validate_x86.c -@@ -7315,10 +7315,10 @@ static const test_event_t x86_test_events[]={ - .fstr = "amd64_fam19h_zen3::CYCLES_NOT_IN_HALT:k=1:u=1:e=0:i=0:c=0:h=0:g=0", - }, - { SRC_LINE, -- .name = "amd64_fam19h_zen3::L2_PREFETCH_HIT_L2", -+ .name = "amd64_fam19h_zen3::L2_PREFETCH_HIT_L2:L2_HW_PREFETCHER", - .count = 1, -- .codes[0] = 0x530070ull, -- .fstr = "amd64_fam19h_zen3::L2_PREFETCH_HIT_L2:k=1:u=1:e=0:i=0:c=0:h=0:g=0", -+ .codes[0] = 0x531f70ull, -+ .fstr = "amd64_fam19h_zen3::L2_PREFETCH_HIT_L2:L2_HW_PREFETCHER:k=1:u=1:e=0:i=0:c=0:h=0:g=0", - }, - { SRC_LINE, - .name = "amd64_fam19h_zen3::L1_DTLB_MISS:TLB_RELOAD_1G_L2_HIT:u", - -commit dfc6a6574b1d4447fb6742169dddbad0d46c5ede -Author: Stephane Eranian -Date: Wed Apr 20 00:35:43 2022 -0700 - - Add duplicate detection to AMD64 event validation - - Was missing compared to Intel X86. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c -index 0d9fb92..60d4ded 100644 ---- a/lib/pfmlib_amd64.c -+++ b/lib/pfmlib_amd64.c -@@ -713,8 +713,8 @@ pfm_amd64_validate_table(void *this, FILE *fp) - pfmlib_pmu_t *pmu = this; - const amd64_entry_t *pe = this_pe(this); - const char *name = pmu->name; -- unsigned int j, k; -- int i, ndfl; -+ unsigned int i, j, k; -+ int ndfl; - int error = 0; - - if (!pmu->atdesc) { -@@ -727,7 +727,7 @@ pfm_amd64_validate_table(void *this, FILE *fp) - error++; - } - -- for(i=0; i < pmu->pme_count; i++) { -+ for(i=0; i < (unsigned int)pmu->pme_count; i++) { - - if (!pe[i].name) { - fprintf(fp, "pmu: %s event%d: :: no name (prev event was %s)\n", pmu->name, i, -@@ -822,6 +822,12 @@ pfm_amd64_validate_table(void *this, FILE *fp) - } - } - } -+ for (j=i+1; j < (unsigned int)pmu->pme_count; j++) { -+ if (pe[i].code == pe[j].code) { -+ fprintf(fp, "pmu: %s events %s and %s have the same code 0x%x\n", pmu->name, pe[i].name, pe[j].name, pe[i].code); -+ error++; -+ } -+ } - } - return error ? PFM_ERR_INVAL : PFM_SUCCESS; - } - -commit 3e19de455109672c5384e4defba9ba5b541b3915 -Author: Stephane Eranian -Date: Wed Apr 20 00:37:25 2022 -0700 - - Fix duplicate L2_PREFETCH_MISS_L3 for AMD Zen3 - - Was introduced by commit: - 1770c118887a "Update AMD Zen3 core and L3 PMU event tables" - - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam19h_zen3.h b/lib/events/amd64_events_fam19h_zen3.h -index 10e0c20..2d76da9 100644 ---- a/lib/events/amd64_events_fam19h_zen3.h -+++ b/lib/events/amd64_events_fam19h_zen3.h -@@ -1013,15 +1013,6 @@ static const amd64_entry_t amd64_fam19h_zen3_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l2_prefetch_hit_l2), - .umasks = amd64_fam19h_zen3_l2_prefetch_hit_l2, - }, -- { .name = "L2_PREFETCH_MISS_L3", -- .desc = "Number of L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches", -- .modmsk = AMD64_FAM19H_ATTRS, -- .code = 0x72, -- .flags = 0, -- .ngrp = 1, -- .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l2_prefetch_hit_l2), -- .umasks = amd64_fam19h_zen3_l2_prefetch_hit_l2, -- }, - { .name = "OP_QUEUE_EMPTY", - .desc = "Counts cycles where the OP queue is empty", - .modmsk = AMD64_FAM19H_ATTRS, - -commit 9580a003d83900569db3f2c7bc41e0e2ea7b88ef -Author: Stephane Eranian -Date: Wed Apr 20 19:56:03 2022 -0700 - - Fix amd64 duplicate event detection logic - - Must check flags as well as code otherwise false positive duplicate - are detected on AMD Fam10h Barcelona where some events appears as duplicate - when in fact they are for different revisions of the CPU. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c -index 60d4ded..aad8f26 100644 ---- a/lib/pfmlib_amd64.c -+++ b/lib/pfmlib_amd64.c -@@ -823,7 +823,7 @@ pfm_amd64_validate_table(void *this, FILE *fp) - } - } - for (j=i+1; j < (unsigned int)pmu->pme_count; j++) { -- if (pe[i].code == pe[j].code) { -+ if (pe[i].code == pe[j].code && pe[i].flags == pe[j].flags) { - fprintf(fp, "pmu: %s events %s and %s have the same code 0x%x\n", pmu->name, pe[i].name, pe[j].name, pe[i].code); - error++; - } - -commit c7798469063288ca5829ab96c7c174dad5a08e74 -Author: Stephane Eranian -Date: Thu Apr 21 15:01:07 2022 -0700 - - Rename OP_QUEUE_EMPTY to UOPS_QUEUE_EMPTY on AMD Zen3 - - To be comptible with AMD Zen2. - - Signed-off-by: Stephane Eranian - -diff --git a/lib/events/amd64_events_fam19h_zen3.h b/lib/events/amd64_events_fam19h_zen3.h -index 2d76da9..d56164e 100644 ---- a/lib/events/amd64_events_fam19h_zen3.h -+++ b/lib/events/amd64_events_fam19h_zen3.h -@@ -1013,8 +1013,8 @@ static const amd64_entry_t amd64_fam19h_zen3_pe[]={ - .numasks = LIBPFM_ARRAY_SIZE(amd64_fam19h_zen3_l2_prefetch_hit_l2), - .umasks = amd64_fam19h_zen3_l2_prefetch_hit_l2, - }, -- { .name = "OP_QUEUE_EMPTY", -- .desc = "Counts cycles where the OP queue is empty", -+ { .name = "UOPS_QUEUE_EMPTY", -+ .desc = "Counts cycles where the decoded uops queue is empty", - .modmsk = AMD64_FAM19H_ATTRS, - .code = 0xa9, - .flags = 0, diff --git a/libpfm-zseries.patch b/libpfm-zseries.patch deleted file mode 100644 index e85419f..0000000 --- a/libpfm-zseries.patch +++ /dev/null @@ -1,128 +0,0 @@ -commit dc643f4a460063a628b4bfba57dbbb36673a0789 -Author: Thomas Richter -Date: Thu Jul 11 17:48:03 2019 -0700 - - s390/cpumf: add IBM z14 ZR1 support - - Add support for machine type 0x3907. - - Signed-off-by: Thomas Richter - -diff --git a/lib/pfmlib_s390x_cpumf.c b/lib/pfmlib_s390x_cpumf.c -index 4e03fc4..7566aa0 100644 ---- a/lib/pfmlib_s390x_cpumf.c -+++ b/lib/pfmlib_s390x_cpumf.c -@@ -192,6 +192,7 @@ static int pfm_cpumcf_init(void *this) - ext_set_count = LIBPFM_ARRAY_SIZE(cpumcf_z13_counters); - break; - case 3906: /* IBM z14 */ -+ case 3907: /* IBM z14 ZR1 */ - ext_set = cpumcf_z14_counters; - ext_set_count = LIBPFM_ARRAY_SIZE(cpumcf_z14_counters); - break; -commit c08003fb085e23f2229e58cc176fcfcf58a3b238 -Author: Thomas Richter -Date: Thu Jul 11 17:49:42 2019 -0700 - - s390/cpumf: add support for counter second version number 6 - - IBM added secound version number 6 in the CPU Measurement - Counter facility to indicate additional counters in the - crypto counter set. - - Signed-off-by: Thomas Richter - -diff --git a/lib/events/s390x_cpumf_events.h b/lib/events/s390x_cpumf_events.h -index 8be1d55..8587a3b 100644 ---- a/lib/events/s390x_cpumf_events.h -+++ b/lib/events/s390x_cpumf_events.h -@@ -13,6 +13,8 @@ - #define CPUMF_CTRSET_EXTENDED 1 - #define CPUMF_CTRSET_MT_DIAG 32 - -+#define CPUMF_SVN6_ECC 4 -+ - - static const pme_cpumf_ctr_t cpumcf_fvn1_counters[] = { - { -@@ -270,6 +272,43 @@ static const pme_cpumf_ctr_t cpumcf_svn_generic_counters[] = { - " coprocessor is busy performing a function issued by" - " another CPU", - }, -+ { -+ .ctrnum = 80, -+ .ctrset = CPUMF_CTRSET_CRYPTO, -+ .name = "ECC_FUNCTION_COUNT", -+ .desc = "This counter counts the" -+ " total number of the elliptic-curve cryptography (ECC)" -+ " functions issued by the CPU.", -+ }, -+ { -+ .ctrnum = 81, -+ .ctrset = CPUMF_CTRSET_CRYPTO, -+ .name = "ECC_CYCLES_COUNT", -+ .desc = "This counter counts the total" -+ " number of CPU cycles when the ECC coprocessor is" -+ " busy performing the elliptic-curve cryptography" -+ " (ECC) functions issued by the CPU.", -+ }, -+ { -+ .ctrnum = 82, -+ .ctrset = CPUMF_CTRSET_CRYPTO, -+ .name = "ECC_BLOCKED_FUNCTION_COUNT", -+ .desc = "This counter counts the total number of the" -+ " elliptic-curve cryptography (ECC) functions that are" -+ " issued by the CPU and are blocked because the ECC" -+ " coprocessor is busy performing a function issued by" -+ " another CPU.", -+ }, -+ { -+ .ctrnum = 83, -+ .ctrset = CPUMF_CTRSET_CRYPTO, -+ .name = "ECC_BLOCKED_CYCLES_COUNT", -+ .desc = "This counter counts the total number of CPU cycles " -+ " blocked for the elliptic-curve cryptography (ECC) " -+ " functions issued by the CPU because the ECC" -+ " coprocessor is busy perform ing a function issued" -+ " by another CPU.", -+ }, - }; - - static const pme_cpumf_ctr_t cpumcf_z10_counters[] = { -diff --git a/lib/pfmlib_s390x_cpumf.c b/lib/pfmlib_s390x_cpumf.c -index 7566aa0..e68b000 100644 ---- a/lib/pfmlib_s390x_cpumf.c -+++ b/lib/pfmlib_s390x_cpumf.c -@@ -168,6 +168,8 @@ static int pfm_cpumcf_init(void *this) - /* counters based on second version number */ - csvn_set = cpumcf_svn_generic_counters; - csvn_set_count = LIBPFM_ARRAY_SIZE(cpumcf_svn_generic_counters); -+ if (csvn < 6) /* Crypto counter set enlarged for SVN == 6 */ -+ csvn_set_count -= CPUMF_SVN6_ECC; - - /* check and assign a machine-specific extended counter set */ - switch (get_machine_type()) { -commit b831b04254ac7fd1700b20b349799810952ff66e -Author: Thomas Richter -Date: Thu Jul 11 17:51:03 2019 -0700 - - s390/cpumf: add support for machine type 8561 - - Add extended counter set support for s390 machine types - 8561 and 8562. - - Signed-off-by: Thomas Richter - -diff --git a/lib/pfmlib_s390x_cpumf.c b/lib/pfmlib_s390x_cpumf.c -index e68b000..c11f9d9 100644 ---- a/lib/pfmlib_s390x_cpumf.c -+++ b/lib/pfmlib_s390x_cpumf.c -@@ -195,6 +195,8 @@ static int pfm_cpumcf_init(void *this) - break; - case 3906: /* IBM z14 */ - case 3907: /* IBM z14 ZR1 */ -+ case 8561: /* IBM Machine types 8561 and 8562 */ -+ case 8562: - ext_set = cpumcf_z14_counters; - ext_set_count = LIBPFM_ARRAY_SIZE(cpumcf_z14_counters); - break; diff --git a/libpfm.spec b/libpfm.spec index 0bf8f7f..bb1abbe 100644 --- a/libpfm.spec +++ b/libpfm.spec @@ -9,8 +9,8 @@ %endif Name: libpfm -Version: 4.10.1 -Release: 5%{?dist} +Version: 4.13.0 +Release: 1%{?dist} Summary: Library to encode performance events for use by perf tool @@ -19,12 +19,7 @@ License: MIT URL: http://perfmon2.sourceforge.net/ Source0: http://sourceforge.net/projects/perfmon2/files/libpfm4/%{name}-%{version}.tar.gz Patch2: libpfm-python3-setup.patch -Patch3: libpfm-zseries.patch -Patch4: libpfm-tx2.patch -Patch5: libpfm-a64fx.patch -Patch6: libpfm-flags.patch -Patch7: libpfm-amd_merge.patch -Patch8: libpfm-zen23.patch +Patch3: libpfm-initp.patch %if %{with python} BuildRequires: python3 @@ -75,12 +70,7 @@ Python bindings for libpfm4 and perf_event_open system call. %prep %setup -q %patch2 -p1 -b .python3 -%patch3 -p1 -b .zseries -%patch4 -p1 -b .tx2 -%patch5 -p1 -b .a64fx -%patch6 -p1 -%patch7 -p1 -%patch8 -p1 +%patch3 -p1 -b .test %build %if %{with python} @@ -129,6 +119,9 @@ make \ %endif %changelog +* Fri Apr 14 2023 William Cohen - 4.13.0-1 +- Rebase to libpfm-4.13.0 (RHBZ #2185653) + * Sun May 8 2022 William Cohen - 4.10.1-5 - Add AMD Zen 2/3 support (RHBZ #2067218) diff --git a/sources b/sources index 2f0b0d2..b2d4ea3 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (libpfm-4.10.1.tar.gz) = 33d99824216b4d83784e0db9f1aae3b39f84c2ba42dee64dde0b3c43cc5ee3a18c206d9044e75bf10867768add7cb9967c6318f7aa196f178f334271fa05e1aa +SHA512 (libpfm-4.13.0.tar.gz) = e61b210aa2ce80f0e47603c88eee2e4f2fe30ca2c0e194a5472b6a8de3bf9dc1085e5261bbb9ddbe5b6531c4b391fb34f20d038e5ebd8e6f4c14c2112aee508f