diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fc0a146 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/libpfm-4.10.1.tar.gz diff --git a/EMPTY b/EMPTY deleted file mode 100644 index 0519ecb..0000000 --- a/EMPTY +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/libpfm-a64fx.patch b/libpfm-a64fx.patch new file mode 100644 index 0000000..740c757 --- /dev/null +++ b/libpfm-a64fx.patch @@ -0,0 +1,1470 @@ +commit 0cfc35f73e0e39d54ba48c24e663bec93d164211 +Author: Steve Kaufmann +Date: Mon May 18 09:33:57 2020 -0700 + + Enable support for Fujitsu A64FX core PMU + + This patch adds support for Fujitsu A64FX core PMU. This + includes ARMv8 generic core events and Fujitsu model + specfic events. + + Signed-off-by: Steve Kaufmann + +diff --git a/README b/README +index f40489e..c21fb28 100644 +--- a/README ++++ b/README +@@ -72,6 +72,7 @@ The library supports many PMUs. The current version can handle: + ARMV8 Cortex A57, A53 + Applied Micro X-Gene + Qualcomm Krait ++ Fujitsu A64FX + + - For SPARC + Ultra I, II +diff --git a/docs/Makefile b/docs/Makefile +index e39de6b..995ece0 100644 +--- a/docs/Makefile ++++ b/docs/Makefile +@@ -122,7 +122,8 @@ ARCH_MAN += libpfm_arm_xgene.3 \ + libpfm_arm_ac15.3 \ + libpfm_arm_ac8.3 \ + libpfm_arm_ac9.3 \ +- libpfm_arm_qcom_krait.3 ++ libpfm_arm_qcom_krait.3 \ ++ libpfm_arm_a64fx.3 + endif + + ifeq ($(CONFIG_PFMLIB_ARCH_ARM64),y) +diff --git a/docs/man3/libpfm_arm_a64fx.3 b/docs/man3/libpfm_arm_a64fx.3 +new file mode 100644 +index 0000000..6214eb7 +--- /dev/null ++++ b/docs/man3/libpfm_arm_a64fx.3 +@@ -0,0 +1,36 @@ ++.TH LIBPFM 3 "May, 2020" "" "Linux Programmer's Manual" ++.SH NAME ++libpfm_arm_a64fx - support for Fujitsu A64FX PMU ++.SH SYNOPSIS ++.nf ++.B #include ++.sp ++.B PMU name: arm_a64fx ++.B PMU desc: Fujitsu A64FX ++.sp ++.SH DESCRIPTION ++The library supports the Fujitsu A64FX core PMU. ++ ++This PMU supports 6 counters and privilege levels filtering. ++It can operate in both 32 and 64 bit modes. ++ ++.SH MODIFIERS ++The following modifiers are supported on Fujitsu A64FX: ++.TP ++.B u ++Measure at the user level. This corresponds to \fBPFM_PLM3\fR. ++This is a boolean modifier. ++.TP ++.B k ++Measure at the kernel level. This corresponds to \fBPFM_PLM0\fR. ++This is a boolean modifier. ++.TP ++.B hv ++Measure at the hypervisor level. This corresponds to \fBPFM_PLMH\fR. ++This is a boolean modifier. ++ ++.SH AUTHORS ++.nf ++Stephane Eranian ++.if ++.PP +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index e19772a..caa93db 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -559,6 +559,8 @@ typedef enum { + PFM_PMU_ARM_THUNDERX2_LLC1, /* Marvell ThunderX2 LLC unit 1 uncore */ + PFM_PMU_ARM_THUNDERX2_CCPI0, /* Marvell ThunderX2 Cross-Socket Interconnect unit 0 uncore */ + PFM_PMU_ARM_THUNDERX2_CCPI1, /* Marvell ThunderX2 Cross-Socket Interconnect unit 1 uncore */ ++ ++ PFM_PMU_ARM_A64FX, /* Fujitsu A64FX processor */ + /* MUST ADD NEW PMU MODELS HERE */ + + PFM_PMU_MAX /* end marker */ +diff --git a/lib/events/arm_fujitsu_a64fx_events.h b/lib/events/arm_fujitsu_a64fx_events.h +new file mode 100644 +index 0000000..3b555f0 +--- /dev/null ++++ b/lib/events/arm_fujitsu_a64fx_events.h +@@ -0,0 +1,1136 @@ ++/* ++ * Copyright 2020 Cray Inc. All Rights Reserved. ++ */ ++ ++/* ++ * Fujitsu A64FX processor ++ * ++ * A64FX® PMU Events ++ * Fujitsu Limited ++ * 1.2, 28 April 2020 ++ */ ++ ++static const arm_entry_t arm_a64fx_pe[ ] = { ++ { ++ .name = "SW_INCR", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0000, ++ .desc = "This event counts on writes to the PMSWINC register.", ++ }, ++ { ++ .name = "L1I_CACHE_REFILL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0001, ++ .desc = "This event counts operations that cause a refill of at least the L1I cache.", ++ }, ++ { ++ .name = "L1I_TLB_REFILL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0002, ++ .desc = "This event counts operations that cause a TLB refill of at least the L1I TLB.", ++ }, ++ { ++ .name = "L1D_CACHE_REFILL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0003, ++ .desc = "This event counts operations that cause a refill of at least the L1D cache.", ++ }, ++ { ++ .name = "L1D_CACHE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0004, ++ .desc = "This event counts operations that cause a cache access to at least the L1D cache.", ++ }, ++ { ++ .name = "L1D_TLB_REFILL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0005, ++ .desc = "This event counts operations that cause a TLB refill of at least the L1D TLB.", ++ }, ++ { ++ .name = "INST_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0008, ++ .desc = "This event counts every architecturally executed instruction.", ++ }, ++ { ++ .name = "EXC_TAKEN", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0009, ++ .desc = "This event counts each exception taken.", ++ }, ++ { ++ .name = "EXC_RETURN", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x000a, ++ .desc = "This event counts each executed exception return instruction.", ++ }, ++ { ++ .name = "CID_WRITE_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x000b, ++ .desc = "This event counts every write to CONTEXTIDR.", ++ }, ++ { ++ .name = "BR_MIS_PRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0010, ++ .desc = "This event counts each correction to the predicted program flow that occurs because of a misprediction from, or no prediction from, the branch prediction resources and that relates to instructions that the branch prediction resources are capable of predicting.", ++ }, ++ { ++ .name = "CPU_CYCLES", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0011, ++ .desc = "This event counts every cycle.", ++ }, ++ { ++ .name = "BR_PRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0012, ++ .desc = "This event counts every branch or other change in the program flow that the branch prediction resources are capable of predicting.", ++ }, ++ { ++ .name = "L1I_CACHE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0014, ++ .desc = "This event counts operations that cause a cache access to at least the L1I cache.", ++ }, ++ { ++ .name = "L1D_CACHE_WB", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0015, ++ .desc = "This event counts every write-back of data from the L1D cache.", ++ }, ++ { ++ .name = "L2D_CACHE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0016, ++ .desc = "This event counts operations that cause a cache access to at least the L2 cache.", ++ }, ++ { ++ .name = "L2D_CACHE_REFILL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0017, ++ .desc = "This event counts operations that cause a refill of at least the L2 cache.", ++ }, ++ { ++ .name = "L2D_CACHE_WB", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0018, ++ .desc = "This event counts every write-back of data from the L2 cache.", ++ }, ++ { ++ .name = "INST_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x001b, ++ .desc = "This event counts every architecturally executed instruction.", ++ }, ++ { ++ .name = "STALL_FRONTEND", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0023, ++ .desc = "This event counts every cycle counted by the CPU_CYCLES event on that no operations are issued because there are no operations available to issue for this PE from the frontend.", ++ }, ++ { ++ .name = "STALL_BACKEND", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0024, ++ .desc = "This event counts every cycle counted by the CPU_CYCLES event on that no operations are issued because the backend is unable to accept any operations.", ++ }, ++ { ++ .name = "L2D_TLB_REFILL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x002d, ++ .desc = "This event counts operations that cause a TLB refill of at least the L2D TLB.", ++ }, ++ { ++ .name = "L2I_TLB_REFILL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x002e, ++ .desc = "This event counts operations that cause a TLB refill of at least the L2I TLB.", ++ }, ++ { ++ .name = "L2D_TLB", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x002f, ++ .desc = "This event counts operations that cause a TLB access to at least the L2D TLB.", ++ }, ++ { ++ .name = "L2I_TLB", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0030, ++ .desc = "This event counts operations that cause a TLB access to at least the L2I TLB.", ++ }, ++ { ++ .name = "L1D_CACHE_REFILL_PRF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0049, ++ .desc = "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch.", ++ }, ++ { ++ .name = "L2D_CACHE_REFILL_PRF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0059, ++ .desc = "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch.", ++ }, ++ { ++ .name = "LDREX_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x006c, ++ .desc = "This event counts architecturally executed load-exclusive instructions.", ++ }, ++ { ++ .name = "STREX_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x006f, ++ .desc = "This event counts architecturally executed store-exclusive instructions.", ++ }, ++ { ++ .name = "LD_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0070, ++ .desc = "This event counts architecturally executed memory-reading instructions, as defined by the LD_RETIRED event.", ++ }, ++ { ++ .name = "ST_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0071, ++ .desc = "This event counts architecturally executed memory-writing instructions, as defined by the ST_RETIRED event. This event counts DCZVA as a store operation.", ++ }, ++ { ++ .name = "LDST_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0072, ++ .desc = "This event counts architecturally executed memory-reading instructions and memory-writing instructions, as defined by the LD_RETIRED and ST_RETIRED events.", ++ }, ++ { ++ .name = "DP_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0073, ++ .desc = "This event counts architecturally executed integer data-processing instructions.", ++ }, ++ { ++ .name = "ASE_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0074, ++ .desc = "This event counts architecturally executed Advanced SIMD data-processing instructions.", ++ }, ++ { ++ .name = "VFP_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0075, ++ .desc = "This event counts architecturally executed floating-point data-processing instructions.", ++ }, ++ { ++ .name = "PC_WRITE_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0076, ++ .desc = "This event counts only software changes of the PC that defined by the instruction architecturally executed, condition code check pass and software change of the PC event.", ++ }, ++ { ++ .name = "CRYPTO_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0077, ++ .desc = "This event counts architecturally executed cryptographic instructions, except PMULL and VMULL.", ++ }, ++ { ++ .name = "BR_IMMED_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0078, ++ .desc = "This event counts architecturally executed immediate branch instructions.", ++ }, ++ { ++ .name = "BR_RETURN_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0079, ++ .desc = "This event counts architecturally executed procedure return operations that defined by the BR_RETURN_RETIRED event.", ++ }, ++ { ++ .name = "BR_INDIRECT_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x007a, ++ .desc = "This event counts architecturally executed indirect branch instructions that includes software change of the PC other than exception-generating instructions and immediate branch instructions.", ++ }, ++ { ++ .name = "ISB_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x007c, ++ .desc = "This event counts architecturally executed Instruction Synchronization Barrier instructions.", ++ }, ++ { ++ .name = "DSB_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x007d, ++ .desc = "This event counts architecturally executed Data Synchronization Barrier instructions.", ++ }, ++ { ++ .name = "DMB_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x007e, ++ .desc = "This event counts architecturally executed Data Memory Barrier instructions, excluding the implied barrier operations of load/store operations with release consistency semantics.", ++ }, ++ { ++ .name = "EXC_UNDEF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0081, ++ .desc = "This event counts only other synchronous exceptions that are taken locally.", ++ }, ++ { ++ .name = "EXC_SVC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0082, ++ .desc = "This event counts only Supervisor Call exceptions that are taken locally.", ++ }, ++ { ++ .name = "EXC_PABORT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0083, ++ .desc = "This event counts only Instruction Abort exceptions that are taken locally.", ++ }, ++ { ++ .name = "EXC_DABORT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0084, ++ .desc = "This event counts only Data Abort or SError interrupt exceptions that are taken locally.", ++ }, ++ { ++ .name = "EXC_IRQ", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0086, ++ .desc = "This event counts only IRQ exceptions that are taken locally, including Virtual IRQ exceptions.", ++ }, ++ { ++ .name = "EXC_FIQ", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0087, ++ .desc = "This event counts only FIQ exceptions that are taken locally, including Virtual FIQ exceptions.", ++ }, ++ { ++ .name = "EXC_SMC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0088, ++ .desc = "This event counts only Secure Monitor Call exceptions. The counter does not increment on SMC instructions trapped as a Hyp Trap exception.", ++ }, ++ { ++ .name = "EXC_HVC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x008a, ++ .desc = "This event counts for both Hypervisor Call exceptions taken locally in the hypervisor and those taken as an exception from Non-secure EL1.", ++ }, ++ { ++ .name = "DCZVA_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x009f, ++ .desc = "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction.", ++ }, ++ { ++ .name = "FP_MV_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0105, ++ .desc = "This event counts architecturally executed floating-point move operations.", ++ }, ++ { ++ .name = "PRD_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0108, ++ .desc = "This event counts architecturally executed operations that using predicate register.", ++ }, ++ { ++ .name = "IEL_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0109, ++ .desc = "This event counts architecturally executed inter-element manipulation operations.", ++ }, ++ { ++ .name = "IREG_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x010a, ++ .desc = "This event counts architecturally executed inter-register manipulation operations.", ++ }, ++ { ++ .name = "FP_LD_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0112, ++ .desc = "This event counts architecturally executed NOSIMD load operations that using SIMD and FP registers.", ++ }, ++ { ++ .name = "FP_ST_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0113, ++ .desc = "This event counts architecturally executed NOSIMD store operations that using SIMD and FP registers.", ++ }, ++ { ++ .name = "BC_LD_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x011a, ++ .desc = "This event counts architecturally executed SIMD broadcast floating-point load operations.", ++ }, ++ { ++ .name = "EFFECTIVE_INST_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0121, ++ .desc = "This event counts architecturally executed instructions, excluding the MOVPRFX instruction.", ++ }, ++ { ++ .name = "PRE_INDEX_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0123, ++ .desc = "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode.", ++ }, ++ { ++ .name = "POST_INDEX_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0124, ++ .desc = "This event counts architecturally executed operations that uses 'post-index' as its addressing mode.", ++ }, ++ { ++ .name = "UOP_SPLIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0139, ++ .desc = "This event counts the occurrence count of the micro-operation split.", ++ }, ++ { ++ .name = "LD_COMP_WAIT_L2_MISS", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0180, ++ .desc = "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store operation waits for memory access.", ++ }, ++ { ++ .name = "LD_COMP_WAIT_L2_MISS_EX", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0181, ++ .desc = "This event counts every cycle that no instructions are committed because the oldest and uncommitted integer load instruction waits for memory access.", ++ }, ++ { ++ .name = "LD_COMP_WAIT_L1_MISS", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0182, ++ .desc = "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store operation waits for L2 cache access.", ++ }, ++ { ++ .name = "LD_COMP_WAIT_L1_MISS_EX", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0183, ++ .desc = "This event counts every cycle that no instructions are committed because the oldest and uncommitted integer load instruction waits for L2 cache access.", ++ }, ++ { ++ .name = "LD_COMP_WAIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0184, ++ .desc = "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store operation waits for L1D, L2 and memory access.", ++ }, ++ { ++ .name = "LD_COMP_WAIT_EX", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0185, ++ .desc = "This event counts every cycle that no instructions are committed because the oldest and uncommitted integer load instruction waits for L1D, L2 and memory access.", ++ }, ++ { ++ .name = "LD_COMP_WAIT_PFP_BUSY", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0186, ++ .desc = "This event counts every cycle that no instructions are committed due to the lack of an available prefetch port.", ++ }, ++ { ++ .name = "LD_COMP_WAIT_PFP_BUSY_EX", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0187, ++ .desc = "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation.", ++ }, ++ { ++ .name = "LD_COMP_WAIT_PFP_BUSY_SWPF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0188, ++ .desc = "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction.", ++ }, ++ { ++ .name = "EU_COMP_WAIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0189, ++ .desc = "This event counts every cycle that no instructions are committed, and the oldest and uncommitted instruction is an integer or floating-point instruction.", ++ }, ++ { ++ .name = "FL_COMP_WAIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x018a, ++ .desc = "This event counts every cycle that no instructions are committed, and the oldest and uncommitted instruction is a floating-point instruction.", ++ }, ++ { ++ .name = "BR_COMP_WAIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x018b, ++ .desc = "This event counts every cycle that no instructions are committed, and the oldest and uncommitted instruction is a branch instruction.", ++ }, ++ { ++ .name = "ROB_EMPTY", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x018c, ++ .desc = "This event counts every cycle that no instructions are committed because the CSE is empty.", ++ }, ++ { ++ .name = "ROB_EMPTY_STQ_BUSY", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x018d, ++ .desc = "This event counts every cycle that no instructions are committed because the CSE is empty and the all store ports are full.", ++ }, ++ { ++ .name = "WFE_WFI_CYCLE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x018e, ++ .desc = "This event counts every cycle that the WFE/WFI instruction brings the instruction unit to a halt.", ++ }, ++ { ++ .name = "0INST_COMMIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0190, ++ .desc = "This event counts every cycle that no instructions are committed, but counts at the time when commits MOVPRFX only.", ++ }, ++ { ++ .name = "1INST_COMMIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0191, ++ .desc = "This event counts every cycle that one instruction is committed.", ++ }, ++ { ++ .name = "2INST_COMMIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0192, ++ .desc = "This event counts every cycle that two instructions are committed.", ++ }, ++ { ++ .name = "3INST_COMMIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0193, ++ .desc = "This event counts every cycle that three instructions are committed.", ++ }, ++ { ++ .name = "4INST_COMMIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0194, ++ .desc = "This event counts every cycle that four instructions are committed.", ++ }, ++ { ++ .name = "UOP_ONLY_COMMIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0198, ++ .desc = "This event counts every cycle that only any micro-operations are committed.", ++ }, ++ { ++ .name = "SINGLE_MOVPRFX_COMMIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0199, ++ .desc = "This event counts every cycle that only the MOVPRFX instruction is committed.", ++ }, ++ { ++ .name = "EAGA_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x01a0, ++ .desc = "This event counts valid cycles of EAGA pipeline.", ++ }, ++ { ++ .name = "EAGB_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x01a1, ++ .desc = "This event counts valid cycles of EAGB pipeline.", ++ }, ++ { ++ .name = "EXA_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x01a2, ++ .desc = "This event counts valid cycles of EXA pipeline.", ++ }, ++ { ++ .name = "EXB_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x01a3, ++ .desc = "This event counts valid cycles of EXB pipeline.", ++ }, ++ { ++ .name = "FLA_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x01a4, ++ .desc = "This event counts valid cycles of FLA pipeline.", ++ }, ++ { ++ .name = "FLB_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x01a5, ++ .desc = "This event counts valid cycles of FLB pipeline.", ++ }, ++ { ++ .name = "PRX_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x01a6, ++ .desc = "This event counts valid cycles of PRX pipeline.", ++ }, ++ { ++ .name = "FLA_VAL_PRD_CNT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x01b4, ++ .desc = "This event counts the number of 1 in the predicate bits of request in FLA pipeline, and corrects itself to be 16 when all bits are 1.", ++ }, ++ { ++ .name = "FLB_VAL_PRD_CNT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x01b5, ++ .desc = "This event counts the number of 1 in the predicate bits of request in FLB pipeline, and corrects itself to be 16 when all bits are 1.", ++ }, ++ { ++ .name = "EA_CORE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x01e0, ++ .desc = "This event counts energy consumption per cycle of core.", ++ }, ++ { ++ .name = "L1D_CACHE_REFILL_DM", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0200, ++ .desc = "This event counts L1D_CACHE_REFILL caused by demand access.", ++ }, ++ { ++ .name = "L1D_CACHE_REFILL_HWPRF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0202, ++ .desc = "This event counts L1D_CACHE_REFILL caused by hardware prefetch.", ++ }, ++ { ++ .name = "L1_MISS_WAIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0208, ++ .desc = "This event counts outstanding L1D cache miss requests per cycle.", ++ }, ++ { ++ .name = "L1I_MISS_WAIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0209, ++ .desc = "This event counts outstanding L1I cache miss requests per cycle.", ++ }, ++ { ++ .name = "L1HWPF_STREAM_PF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0230, ++ .desc = "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher.", ++ }, ++ { ++ .name = "L1HWPF_INJ_ALLOC_PF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0231, ++ .desc = "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", ++ }, ++ { ++ .name = "L1HWPF_INJ_NOALLOC_PF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0232, ++ .desc = "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", ++ }, ++ { ++ .name = "L2HWPF_STREAM_PF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0233, ++ .desc = "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher.", ++ }, ++ { ++ .name = "L2HWPF_INJ_ALLOC_PF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0234, ++ .desc = "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", ++ }, ++ { ++ .name = "L2HWPF_INJ_NOALLOC_PF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0235, ++ .desc = "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", ++ }, ++ { ++ .name = "L2HWPF_OTHER", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0236, ++ .desc = "This event counts prefetch requests to L2 cache generated by the other causes.", ++ }, ++ { ++ .name = "L1_PIPE0_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0240, ++ .desc = "This event counts valid cycles of L1D cache pipeline#0.", ++ }, ++ { ++ .name = "L1_PIPE1_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0241, ++ .desc = "This event counts valid cycles of L1D cache pipeline#1.", ++ }, ++ { ++ .name = "L1_PIPE0_VAL_IU_TAG_ADRS_SCE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0250, ++ .desc = "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1.", ++ }, ++ { ++ .name = "L1_PIPE0_VAL_IU_TAG_ADRS_PFE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0251, ++ .desc = "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1.", ++ }, ++ { ++ .name = "L1_PIPE1_VAL_IU_TAG_ADRS_SCE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0252, ++ .desc = "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1.", ++ }, ++ { ++ .name = "L1_PIPE1_VAL_IU_TAG_ADRS_PFE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0253, ++ .desc = "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1.", ++ }, ++ { ++ .name = "L1_PIPE0_COMP", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0260, ++ .desc = "This event counts completed requests in L1D cache pipeline#0.", ++ }, ++ { ++ .name = "L1_PIPE1_COMP", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0261, ++ .desc = "This event counts completed requests in L1D cache pipeline#1.", ++ }, ++ { ++ .name = "L1I_PIPE_COMP", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0268, ++ .desc = "This event counts completed requests in L1I cache pipeline.", ++ }, ++ { ++ .name = "L1I_PIPE_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0269, ++ .desc = "This event counts valid cycles of L1I cache pipeline.", ++ }, ++ { ++ .name = "L1_PIPE_ABORT_STLD_INTLK", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0274, ++ .desc = "This event counts aborted requests in L1D pipelines that due to store-load interlock.", ++ }, ++ { ++ .name = "L1_PIPE0_VAL_IU_NOT_SEC0", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x02a0, ++ .desc = "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0.", ++ }, ++ { ++ .name = "L1_PIPE1_VAL_IU_NOT_SEC0", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x02a1, ++ .desc = "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0.", ++ }, ++ { ++ .name = "L1_PIPE_COMP_GATHER_2FLOW", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x02b0, ++ .desc = "This event counts the number of times where 2 elements of the gather instructions became 2flows because 2 elements could not be combined.", ++ }, ++ { ++ .name = "L1_PIPE_COMP_GATHER_1FLOW", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x02b1, ++ .desc = "This event counts the number of times where 2 elements of the gather instructions became 1flow because 2 elements could be combined.", ++ }, ++ { ++ .name = "L1_PIPE_COMP_GATHER_0FLOW", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x02b2, ++ .desc = "This event counts the number of times where 2 elements of the gather instructions became 0flow because both predicate values are 0.", ++ }, ++ { ++ .name = "L1_PIPE_COMP_SCATTER_1FLOW", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x02b3, ++ .desc = "This event counts the number of flows of the scatter instructions.", ++ }, ++ { ++ .name = "L1_PIPE0_COMP_PRD_CNT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x02b8, ++ .desc = "This event counts the number of 1 in the predicate bits of request in L1D cache pipeline#0, and corrects itself to be 16 when all bits are 1.", ++ }, ++ { ++ .name = "L1_PIPE1_COMP_PRD_CNT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x02b9, ++ .desc = "This event counts the number of 1 in the predicate bits of request in L1D cache pipeline#1, and corrects itself to be 16 when all bits are 1.", ++ }, ++ { ++ .name = "L2D_CACHE_REFILL_DM", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0300, ++ .desc = "This event counts L2D_CACHE_REFILL caused by demand access.", ++ }, ++ { ++ .name = "L2D_CACHE_REFILL_HWPRF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0302, ++ .desc = "This event counts L2D_CACHE_REFILL caused by hardware prefetch.", ++ }, ++ { ++ .name = "L2_MISS_WAIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0308, ++ .desc = "This event counts outstanding L2 cache miss requests per cycle. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "L2_MISS_COUNT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0309, ++ .desc = "This event counts the number of times of L2 cache miss. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_READ_TOTAL_CMG0", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0310, ++ .desc = "This event counts read requests from CMG0 to measured CMG, if measured CMG is not CMG0. Otherwise, this event counts read requests from CMG0 local memory to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_READ_TOTAL_CMG1", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0311, ++ .desc = "This event counts read requests from CMG1 to measured CMG, if measured CMG is not CMG1. Otherwise, this event counts read requests from CMG1 local memory to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_READ_TOTAL_CMG2", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0312, ++ .desc = "This event counts read requests from CMG2 to measured CMG, if measured CMG is not CMG2. Otherwise, this event counts read requests from CMG2 local memory to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_READ_TOTAL_CMG3", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0313, ++ .desc = "This event counts read requests from CMG3 to measured CMG, if measured CMG is not CMG3. Otherwise, this event counts read requests from CMG3 local memory to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_READ_TOTAL_TOFU", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0314, ++ .desc = "This event counts read requests from tofu controller to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_READ_TOTAL_PCI", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0315, ++ .desc = "This event counts read requests from PCI controller to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_READ_TOTAL_MEM", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0316, ++ .desc = "This event counts read requests from measured CMG local memory to measured CMG. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_WRITE_TOTAL_CMG0", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0318, ++ .desc = "This event counts write requests from measured CMG to CMG0, if measured CMG is not CMG0. Otherwise, this event counts write requests from measured CMG to CMG0 local memory. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_WRITE_TOTAL_CMG1", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0319, ++ .desc = "This event counts write requests from measured CMG to CMG1, if measured CMG is not CMG1. Otherwise, this event counts write requests from measured CMG to CMG1 local memory. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_WRITE_TOTAL_CMG2", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x031a, ++ .desc = "This event counts write requests from measured CMG to CMG2, if measured CMG is not CMG2. Otherwise, this event counts write requests from measured CMG to CMG2 local memory. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_WRITE_TOTAL_CMG3", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x031b, ++ .desc = "This event counts write requests from measured CMG to CMG3, if measured CMG is not CMG3. Otherwise, this event counts write requests from measured CMG to CMG3 local memory. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_WRITE_TOTAL_TOFU", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x031c, ++ .desc = "This event counts write requests from measured CMG to tofu controller. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_WRITE_TOTAL_PCI", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x031d, ++ .desc = "This event counts write requests from measured CMG to PCI controller. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "BUS_WRITE_TOTAL_MEM", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x031e, ++ .desc = "This event counts write requests from measured CMG to measured CMG local memory. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "L2D_SWAP_DM", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0325, ++ .desc = "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.", ++ }, ++ { ++ .name = "L2D_CACHE_MIBMCH_PRF", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0326, ++ .desc = "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.", ++ }, ++ { ++ .name = "L2_PIPE_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0330, ++ .desc = "This event counts valid cycles of L2 cache pipeline. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "L2_PIPE_COMP_ALL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0350, ++ .desc = "This event counts completed requests in L2 cache pipeline. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "L2_PIPE_COMP_PF_L2MIB_MCH", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0370, ++ .desc = "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "L2D_CACHE_SWAP_LOCAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x0396, ++ .desc = "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "EA_L2", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x03e0, ++ .desc = "This event counts energy consumption per cycle of L2 cache. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "EA_MEMORY", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x03e8, ++ .desc = "This event counts energy consumption per cycle of CMG local memory. It counts all events caused in measured CMG regardless of measured PE.", ++ }, ++ { ++ .name = "SIMD_INST_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8000, ++ .desc = "This event counts architecturally executed SIMD instructions, excluding the Advanced SIMD scalar instructions and the instructions listed in Non-SIMD SVE instructions section of SVE Reference Manual.", ++ }, ++ { ++ .name = "SVE_INST_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8002, ++ .desc = "This event counts architecturally executed Advanced SIMD instructions, including the instructions listed in Non-SIMD SVE instructions section of SVE Reference Manual.", ++ }, ++ { ++ .name = "UOP_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8008, ++ .desc = "This event counts all architecturally executed micro-operations.", ++ }, ++ { ++ .name = "SVE_MATH_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x800e, ++ .desc = "This event counts architecturally executed math function operations due to the SVE FTSMUL, FTMAD, FTSSEL, and FEXPA instructions.", ++ }, ++ { ++ .name = "FP_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8010, ++ .desc = "This event counts architecturally executed operations due to scalar, Advanced SIMD, and SVE instructions listed in Floating-point instructions section of SVE Reference Manual.", ++ }, ++ { ++ .name = "FP_FMA_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8028, ++ .desc = "This event counts architecturally executed floating-point fused multiply-add and multiply-subtract operations.", ++ }, ++ { ++ .name = "FP_RECPE_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8034, ++ .desc = "This event counts architecturally executed floating-point reciprocal estimate operations due to the Advanced SIMD scalar, Advanced SIMD vector, and SVE FRECPE and FRSQRTE instructions.", ++ }, ++ { ++ .name = "FP_CVT_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8038, ++ .desc = "This event counts architecturally executed floating-point convert operations due to the scalar, Advanced SIMD, and SVE floating-point conversion instructions listed in Floating-point conversions section of SVE Reference Manual.", ++ }, ++ { ++ .name = "ASE_SVE_INT_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8043, ++ .desc = "This event counts architecturally executed integer arithmetic operations due to Advanced SIMD and SVE data-processing instructions listed in Integer instructions section of SVE Reference Manual.", ++ }, ++ { ++ .name = "SVE_PRED_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8074, ++ .desc = "This event counts architecturally executed SIMD data-processing and load/store operations due to SVE instructions with a Governing predicate operand that determines the Active elements.", ++ }, ++ { ++ .name = "SVE_MOVPRFX_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x807c, ++ .desc = "This event counts architecturally executed operations due to MOVPRFX instructions, whether or not they are fused with the prefixed instruction.", ++ }, ++ { ++ .name = "SVE_MOVPRFX_U_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x807f, ++ .desc = "This event counts architecturally executed operations due to MOVPRFX instructions that are not fused with the prefixed instruction.", ++ }, ++ { ++ .name = "ASE_SVE_LD_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8085, ++ .desc = "This event counts architecturally executed operations that read from memory due to SVE and Advanced SIMD load instructions.", ++ }, ++ { ++ .name = "ASE_SVE_ST_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8086, ++ .desc = "This event counts architecturally executed operations that write to memory due to SVE and Advanced SIMD store instructions.", ++ }, ++ { ++ .name = "PRF_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8087, ++ .desc = "This event counts architecturally executed prefetch operations due to scalar PRFM and SVE PRF instructions.", ++ }, ++ { ++ .name = "BASE_LD_REG_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8089, ++ .desc = "This event counts architecturally executed operations that read from memory due to an instruction that loads a general-purpose register.", ++ }, ++ { ++ .name = "BASE_ST_REG_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x808a, ++ .desc = "This event counts architecturally executed operations that write to memory due to an instruction that stores a general-purpose register, excluding the 'DC ZVA' instruction.", ++ }, ++ { ++ .name = "SVE_LDR_REG_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8091, ++ .desc = "This event counts architecturally executed operations that read from memory due to an SVE LDR instruction.", ++ }, ++ { ++ .name = "SVE_STR_REG_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8092, ++ .desc = "This event counts architecturally executed operations that write to memory due to an SVE STR instruction.", ++ }, ++ { ++ .name = "SVE_LDR_PREG_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8095, ++ .desc = "This event counts architecturally executed operations that read from memory due to an SVE LDR (predicate) instruction.", ++ }, ++ { ++ .name = "SVE_STR_PREG_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x8096, ++ .desc = "This event counts architecturally executed operations that write to memory due to an SVE STR (predicate) instruction.", ++ }, ++ { ++ .name = "SVE_PRF_CONTIG_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x809f, ++ .desc = "This event counts architecturally executed operations that prefetch memory due to an SVE predicated single contiguous element prefetch instruction.", ++ }, ++ { ++ .name = "ASE_SVE_LD_MULTI_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80a5, ++ .desc = "This event counts architecturally executed operations that read from memory due to SVE and Advanced SIMD multiple vector contiguous structure load instructions.", ++ }, ++ { ++ .name = "ASE_SVE_ST_MULTI_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80a6, ++ .desc = "This event counts architecturally executed operations that write to memory due to SVE and Advanced SIMD multiple vector contiguous structure store instructions.", ++ }, ++ { ++ .name = "SVE_LD_GATHER_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80ad, ++ .desc = "This event counts architecturally executed operations that read from memory due to SVE noncontiguous gather-load instructions.", ++ }, ++ { ++ .name = "SVE_ST_SCATTER_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80ae, ++ .desc = "This event counts architecturally executed operations that write to memory due to SVE noncontiguous scatter-store instructions.", ++ }, ++ { ++ .name = "SVE_PRF_GATHER_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80af, ++ .desc = "This event counts architecturally executed operations that prefetch memory due to SVE noncontiguous gather-prefetch instructions.", ++ }, ++ { ++ .name = "SVE_LDFF_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80bc, ++ .desc = "This event counts architecturally executed memory read operations due to SVE First-fault and Non-fault load instructions.", ++ }, ++ { ++ .name = "FP_SCALE_OPS_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80c0, ++ .desc = "This event counts architecturally executed SVE arithmetic operations. This event counter is incremented by (128 / CSIZE) and by twice that amount for operations that would also be counted by SVE_FP_FMA_SPEC.", ++ }, ++ { ++ .name = "FP_FIXED_OPS_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80c1, ++ .desc = "This event counts architecturally executed v8SIMD and FP arithmetic operations. The event counter is incremented by the specified number of elements for Advanced SIMD operations or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC.", ++ }, ++ { ++ .name = "FP_HP_SCALE_OPS_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80c2, ++ .desc = "This event counts architecturally executed SVE half-precision arithmetic operations. This event counter is incremented by 8, or by 16 for operations that would also be counted by SVE_FP_FMA_SPEC.", ++ }, ++ { ++ .name = "FP_HP_FIXED_OPS_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80c3, ++ .desc = "This event counts architecturally executed v8SIMD and FP half-precision arithmetic operations. This event counter is incremented by the number of 16-bit elements for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC.", ++ }, ++ { ++ .name = "FP_SP_SCALE_OPS_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80c4, ++ .desc = "This event counts architecturally executed SVE single-precision arithmetic operations. This event counter is incremented by 4, or by 8 for operations that would also be counted by SVE_FP_FMA_SPEC.", ++ }, ++ { ++ .name = "FP_SP_FIXED_OPS_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80c5, ++ .desc = "This event counts architecturally executed v8SIMD and FP single-precision arithmetic operations. This event counter is incremented by the number of 32-bit elements for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC.", ++ }, ++ { ++ .name = "FP_DP_SCALE_OPS_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80c6, ++ .desc = "This event counts architecturally executed SVE double-precision arithmetic operations. This event counter is incremented by 2, or by 4 for operations that would also be counted by SVE_FP_FMA_SPEC.", ++ }, ++ { ++ .name = "FP_DP_FIXED_OPS_SPEC", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x80c7, ++ .desc = "This event counts architecturally executed v8SIMD and FP double-precision arithmetic operations. This event counter is incremented by 2 for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC.", ++ }, ++}; +diff --git a/lib/pfmlib_arm_armv8.c b/lib/pfmlib_arm_armv8.c +index a252951..26b3f8c 100644 +--- a/lib/pfmlib_arm_armv8.c ++++ b/lib/pfmlib_arm_armv8.c +@@ -35,6 +35,7 @@ + #include "events/arm_xgene_events.h" /* Applied Micro X-Gene tables */ + #include "events/arm_cavium_tx2_events.h" /* Marvell ThunderX2 tables */ + #include "events/arm_marvell_tx2_unc_events.h" /* Marvell ThunderX2 PMU tables */ ++#include "events/arm_fujitsu_a64fx_events.h" /* Fujitsu A64FX PMU tables */ + + static int + pfm_arm_detect_cortex_a57(void *this) +@@ -104,6 +105,22 @@ pfm_arm_detect_thunderx2(void *this) + return PFM_ERR_NOTSUPP; + } + ++static int ++pfm_arm_detect_a64fx(void *this) ++{ ++ int ret; ++ ++ ret = pfm_arm_detect(this); ++ if (ret != PFM_SUCCESS) ++ return PFM_ERR_NOTSUPP; ++ ++ if ((pfm_arm_cfg.implementer == 0x46) && /* Fujitsu */ ++ (pfm_arm_cfg.part == 0x001)) { /* a64fx */ ++ return PFM_SUCCESS; ++ } ++ return PFM_ERR_NOTSUPP; ++} ++ + /* ARM Cortex A57 support */ + pfmlib_pmu_t arm_cortex_a57_support={ + .desc = "ARM Cortex A57", +@@ -204,6 +221,31 @@ pfmlib_pmu_t arm_thunderx2_support={ + .get_event_nattrs = pfm_arm_get_event_nattrs, + }; + ++/* Fujitsu A64FX support */ ++pfmlib_pmu_t arm_fujitsu_a64fx_support={ ++ .desc = "Fujitsu A64FX", ++ .name = "arm_a64fx", ++ .pmu = PFM_PMU_ARM_A64FX, ++ .pme_count = LIBPFM_ARRAY_SIZE(arm_a64fx_pe), ++ .type = PFM_PMU_TYPE_CORE, ++ .pe = arm_a64fx_pe, ++ ++ .pmu_detect = pfm_arm_detect_a64fx, ++ .max_encoding = 1, ++ .num_cntrs = 6, ++ ++ .get_event_encoding[PFM_OS_NONE] = pfm_arm_get_encoding, ++ PFMLIB_ENCODE_PERF(pfm_arm_get_perf_encoding), ++ .get_event_first = pfm_arm_get_event_first, ++ .get_event_next = pfm_arm_get_event_next, ++ .event_is_valid = pfm_arm_event_is_valid, ++ .validate_table = pfm_arm_validate_table, ++ .get_event_info = pfm_arm_get_event_info, ++ .get_event_attr_info = pfm_arm_get_event_attr_info, ++ PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), ++ .get_event_nattrs = pfm_arm_get_event_nattrs, ++}; ++ + // For uncore, each socket has a separate perf name, otherwise they are the same, use macro + + #define DEFINE_TX2_DMC(n) \ +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index 335155e..102db37 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -494,6 +494,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= + &arm_thunderx2_llc1_support, + &arm_thunderx2_ccpi0_support, + &arm_thunderx2_ccpi1_support, ++ &arm_fujitsu_a64fx_support, + #endif + #ifdef CONFIG_PFMLIB_ARCH_ARM64 + &arm_cortex_a57_support, +@@ -506,6 +507,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= + &arm_thunderx2_llc1_support, + &arm_thunderx2_ccpi0_support, + &arm_thunderx2_ccpi1_support, ++ &arm_fujitsu_a64fx_support, + #endif + + #ifdef CONFIG_PFMLIB_ARCH_S390X +diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h +index 845a13e..237e8d6 100644 +--- a/lib/pfmlib_priv.h ++++ b/lib/pfmlib_priv.h +@@ -662,6 +662,8 @@ extern pfmlib_pmu_t arm_thunderx2_llc1_support; + extern pfmlib_pmu_t arm_thunderx2_ccpi0_support; + extern pfmlib_pmu_t arm_thunderx2_ccpi1_support; + ++extern pfmlib_pmu_t arm_fujitsu_a64fx_support; ++ + extern pfmlib_pmu_t mips_74k_support; + extern pfmlib_pmu_t s390x_cpum_cf_support; + extern pfmlib_pmu_t s390x_cpum_sf_support; +diff --git a/tests/validate_arm64.c b/tests/validate_arm64.c +index 5cb1966..96060bb 100644 +--- a/tests/validate_arm64.c ++++ b/tests/validate_arm64.c +@@ -198,6 +198,27 @@ static const test_event_t arm64_test_events[]={ + .codes[0] = 0xd, + .fstr = "tx2_llc0::UNC_LLC_READ", + }, ++ { SRC_LINE, ++ .name = "arm_a64fx::CPU_CYCLES", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x8000011, ++ .fstr = "arm_a64fx::CPU_CYCLES:k=1:u=1:hv=0", ++ }, ++ { SRC_LINE, ++ .name = "arm_a64fx::CPU_CYCLES:k", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x88000011, ++ .fstr = "arm_a64fx::CPU_CYCLES:k=1:u=0:hv=0", ++ }, ++ { SRC_LINE, ++ .name = "arm_a64fx::INST_RETIRED", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x8000008, ++ .fstr = "arm_a64fx::INST_RETIRED:k=1:u=1:hv=0", ++ }, + }; + #define NUM_TEST_EVENTS (int)(sizeof(arm64_test_events)/sizeof(test_event_t)) + +commit dfe30a72c18dc64ea8e55c469a9adcfec9c09340 +Author: Stephane Eranian +Date: Wed Sep 23 16:53:53 2020 -0700 + + install Fujitsu A64FX man page in ARM64 mode + + This patch corrects the documentation Makefile to install + the libpfm_a64fx.3 man page when bulding for ARM64. Otherwise + the man page woul only be installed in ARM (32-bit) mode. + + Reported-by: William Cohen + Signed-off-by: Stephane Eranian + +diff --git a/docs/Makefile b/docs/Makefile +index f8beebc..f5c0935 100644 +--- a/docs/Makefile ++++ b/docs/Makefile +@@ -131,6 +131,7 @@ ifeq ($(CONFIG_PFMLIB_ARCH_ARM64),y) + ARCH_MAN += libpfm_arm_xgene.3 \ + libpfm_arm_ac57.3 \ +- libpfm_arm_ac53.3 ++ libpfm_arm_ac53.3 \ ++ libpfm_arm_a64fx.3 + endif + + ifeq ($(CONFIG_PFMLIB_ARCH_MIPS),y) +commit f6500e77563e606c8510ff26f57d321328bd8157 +Author: Masahiko, Yamada +Date: Wed Jan 27 20:12:59 2021 +0900 + + Changing the number of PMU counters and deleting the ARM(32-bit) mode for A64FX + + The current libpfm4 implementation treats PMCR_EL0.N = 0x6 like other ARM Reference processors. + On an A64FX, PMCR_EL0.N = 0x8 (The number of PMU counters is 8.). + Therefore, only 6 counters are available in the current implementation. + The A64FX core also supports the AArch64 state and the A64 Instruction set. + The AArch32 state and the A32, T32 Instruction set are not supported and cannot be transitioned to this Execution state. + Currently, the libpfm manual(docs/man3/libpfm_arm_a64fx.3) states that A32/A64 can be used, but A32 cannot be used. + + I have created a patch with the above fixes, so please review and merge it. + + Originally, the specification of the A64FX which Fujitsu published should have described the above two points, + but the description was omitted. + A64FX Specification HPC Extension v1.1 will add:. + - On a A64FX, PMCR_EL0.N = 0x8 (The number of PMU counters is 8.). + - A64FX does not support the AArch32 state and the A32, T32 Instruction set and cannot transition to this Execution state. + +diff --git a/docs/Makefile b/docs/Makefile +index f5c0935..e124747 100644 +--- a/docs/Makefile ++++ b/docs/Makefile +@@ -123,8 +123,7 @@ ARCH_MAN += libpfm_arm_xgene.3 \ + libpfm_arm_ac15.3 \ + libpfm_arm_ac8.3 \ + libpfm_arm_ac9.3 \ +- libpfm_arm_qcom_krait.3 \ +- libpfm_arm_a64fx.3 ++ libpfm_arm_qcom_krait.3 + endif + + ifeq ($(CONFIG_PFMLIB_ARCH_ARM64),y) +diff --git a/docs/man3/libpfm_arm_a64fx.3 b/docs/man3/libpfm_arm_a64fx.3 +index 6214eb7..587eea6 100644 +--- a/docs/man3/libpfm_arm_a64fx.3 ++++ b/docs/man3/libpfm_arm_a64fx.3 +@@ -11,8 +11,8 @@ libpfm_arm_a64fx - support for Fujitsu A64FX PMU + .SH DESCRIPTION + The library supports the Fujitsu A64FX core PMU. + +-This PMU supports 6 counters and privilege levels filtering. +-It can operate in both 32 and 64 bit modes. ++This PMU supports 8 counters and privilege levels filtering. ++It can operate in 64 bit mode only. + + .SH MODIFIERS + The following modifiers are supported on Fujitsu A64FX: +diff --git a/lib/Makefile b/lib/Makefile +index 483e0e1..7afe411 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -371,7 +371,8 @@ INC_ARM64=events/arm_cortex_a57_events.h \ + INC_ARM64=events/arm_cortex_a57_events.h \ + events/arm_cortex_a53_events.h \ + events/arm_cavium_tx2_events.h \ +- events/arm_marvell_tx2_unc_events.h ++ events/arm_marvell_tx2_unc_events.h \ ++ events/arm_fujitsu_a64fx_events.h \ + + INCDEP=$(INC_COMMON) $(INCARCH) + +diff --git a/lib/pfmlib_arm_armv8.c b/lib/pfmlib_arm_armv8.c +index e147d6c..00e2e3f 100644 +--- a/lib/pfmlib_arm_armv8.c ++++ b/lib/pfmlib_arm_armv8.c +@@ -254,7 +254,7 @@ pfmlib_pmu_t arm_fujitsu_a64fx_support={ + + .pmu_detect = pfm_arm_detect_a64fx, + .max_encoding = 1, +- .num_cntrs = 6, ++ .num_cntrs = 8, + + .get_event_encoding[PFM_OS_NONE] = pfm_arm_get_encoding, + PFMLIB_ENCODE_PERF(pfm_arm_get_perf_encoding), diff --git a/libpfm-python3-setup.patch b/libpfm-python3-setup.patch new file mode 100644 index 0000000..f43187a --- /dev/null +++ b/libpfm-python3-setup.patch @@ -0,0 +1,10 @@ +diff --git a/python/setup.py b/python/setup.py +index eda8fa5..dff0f27 100755 +--- a/python/setup.py ++++ b/python/setup.py +@@ -1,4 +1,4 @@ +-#!/usr/bin/env python ++#!/usr/bin/env python3 + + from distutils.core import setup, Extension + from distutils.command.install_data import install_data diff --git a/libpfm-tx2.patch b/libpfm-tx2.patch new file mode 100644 index 0000000..d730c43 --- /dev/null +++ b/libpfm-tx2.patch @@ -0,0 +1,1341 @@ +commit 6c9e44b95a55b8bf62cbd64009c4c9b30964a66c +Author: Steve Walk +Date: Tue Mar 20 09:37:56 2018 -0700 + + update Cavium ThunderX2 with now public events + + This patch adds new model specific events to the + Cavium Thunder X2 core PMU. The updated list is based + on publicly available documentation from Cavium which + is available at: + https://cavium.com/resources.html + + Signed-off-by: Steve Walk + +diff --git a/lib/events/arm_cavium_tx2_events.h b/lib/events/arm_cavium_tx2_events.h +index 67de9f8..198d33d 100644 +--- a/lib/events/arm_cavium_tx2_events.h ++++ b/lib/events/arm_cavium_tx2_events.h +@@ -23,6 +23,9 @@ + * + * ARM Architecture Reference Manual, ARMv8, for ARMv8-A architecture profile, + * ARM DDI 0487B.a (ID033117) ++ * ++ * Cavium ThunderX2 C99XX PMU Events (Abridged), July 31, 2018 ++ * https://cavium.com/resources.html + */ + + static const arm_entry_t arm_thunderx2_pe[]={ +@@ -161,6 +164,11 @@ static const arm_entry_t arm_thunderx2_pe[]={ + .code = 0x1C, + .desc = "Instruction architecturally executed (condition check pass) Write to translation table base" + }, ++ {.name = "CHAIN", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x1E, ++ .desc = "For odd-numbered counters, increments the count by one for each overflow of the proceeding even counter" ++ }, + {.name = "L1D_CACHE_ALLOCATE", + .modmsk = ARMV8_ATTRS, + .code = 0x1F, +@@ -556,6 +564,274 @@ static const arm_entry_t arm_thunderx2_pe[]={ + .code = 0x91, + .desc = "Release consistency instruction speculatively executed (store-release)" + }, +- +- /* END Cavium ThunderX2 specific events */ ++ {.name = "L1D_LHS_VANOTP", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xC1, ++ .desc = "A Load hit store retry" ++ }, ++ {.name = "L1D_LHS_OVRLAP", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xC2, ++ .desc = "A Load hit store retry, VA match, PA mismatch" ++ }, ++ {.name = "L1D_LHS_VANOSD", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xC3, ++ .desc = "A Load hit store retry, VA match, store data not issued" ++ }, ++ {.name = "L1D_LHS_FWD", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xC4, ++ .desc = "A Load hit store forwarding. Load completes" ++ }, ++ {.name = "L1D_BNKCFL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xC6, ++ .desc = "Bank conflict load retry" ++ }, ++ {.name = "L1D_LSMQ_FULL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xC7, ++ .desc = "LSMQ retry" ++ }, ++ {.name = "L1D_LSMQ_HIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xC8, ++ .desc = "LSMQ hit retry" ++ }, ++ {.name = "L1D_EXPB_MISS", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xC9, ++ .desc = "An external probe missed the L1" ++ }, ++ {.name = "L1D_L2EV_MISS", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xCA, ++ .desc = "An L2 evict operation missed the L1" ++ }, ++ {.name = "L1D_EXPB_HITM", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xCB, ++ .desc = "An external probe hit a modified line in the L1" ++ }, ++ {.name = "L1D_L2EV_HITM", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xCC, ++ .desc = "An L2 evict operation hit a modified line in the L1" ++ }, ++ {.name = "L1D_EXPB_HIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xCD, ++ .desc = "An external probe hit in the L1" ++ }, ++ {.name = "L1D_L2EV_HIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xCE, ++ .desc = "An L2 evict operation hit in the L1" ++ }, ++ {.name = "L1D_EXPB_RETRY", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xCF, ++ .desc = "An external probe hit was retried" ++ }, ++ {.name = "L1D_L2EV_RETRY", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xD0, ++ .desc = "An L2 evict operation was retried" ++ }, ++ {.name = "L1D_ST_RMW", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xD1, ++ .desc = "A read modify write store was drained and updated the L1" ++ }, ++ {.name = "L1D_LSMQ00_LDREQ", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xD2, ++ .desc = "A load has allocated LSMQ entry 0" ++ }, ++ {.name = "L1D_LSMQ00_LDVLD", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xD3, ++ .desc = "LSMQ entry 0 was initiated by a load" ++ }, ++ {.name = "L1D_LSMQ15_STREQ", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xD4, ++ .desc = "A store was allocated LSMQ entry 15" ++ }, ++ {.name = "L1D_LSMQ15_STVLD", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xD5, ++ .desc = "LSMQ entry 15 was initiated by a store" ++ }, ++ {.name = "L1D_PB_FLUSH", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xD6, ++ .desc = "LRQ ordering flush" ++ }, ++ {.name = "BR_COND_MIS_PRED_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xE0, ++ .desc = "Conditional branch instruction executed, but mis-predicted" ++ }, ++ {.name = "BR_IND_MIS_PRED_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xE1, ++ .desc = "Indirect branch instruction executed, but mis-predicted" ++ }, ++ {.name = "BR_RETURN_MIS_PRED_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xE2, ++ .desc = "Return branch instruction executed, but mis-predicted" ++ }, ++ {.name = "OP_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xE8, ++ .desc = "Uops executed" ++ }, ++ {.name = "LD_OP_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xE9, ++ .desc = "Load uops executed" ++ }, ++ {.name = "ST_OP_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xEA, ++ .desc = "Store uops executed" ++ }, ++ {.name = "FUSED_OP_RETIRED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xEB, ++ .desc = "Fused uops executed" ++ }, ++ {.name = "IRQ_MASK", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xF8, ++ .desc = "Cumulative duration of a PSTATE.I interrupt mask set to 1" ++ }, ++ {.name = "FIQ_MASK", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xF9, ++ .desc = "Cumulative duration of a PSTATE.F interrupt mask set to 1" ++ }, ++ {.name = "SERROR_MASK", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0xFA, ++ .desc = "Cumulative duration of PSTATE.A interrupt mask set to 1" ++ }, ++ {.name = "WFIWFE_SLEEP", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x108, ++ .desc = "Number of cycles in which CPU is in low power mode due to WFI/WFE instruction" ++ }, ++ {.name = "L2TLB_4K_PAGE_MISS", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x127, ++ .desc = "L2 TLB lookup miss using 4K page size" ++ }, ++ {.name = "L2TLB_64K_PAGE_MISS", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x128, ++ .desc = "L2 TLB lookup miss using 64K page size" ++ }, ++ {.name = "L2TLB_2M_PAGE_MISS", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x129, ++ .desc = "L2 TLB lookup miss using 2M page size" ++ }, ++ {.name = "L2TLB_512M_PAGE_MISS", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x12A, ++ .desc = "L2 TLB lookup miss using 512M page size" ++ }, ++ {.name = "ISB_EMPTY", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x150, ++ .desc = "Number of cycles during which micro-op skid-buffer is empty" ++ }, ++ {.name = "ISB_FULL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x151, ++ .desc = "Number of cycles during which micro-op skid-buffer is back-pressuring decode" ++ }, ++ {.name = "STALL_NOTSELECTED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x152, ++ .desc = "Number of cycles during which thread was available for dispatch but not selected" ++ }, ++ {.name = "ROB_RECYCLE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x153, ++ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to ROB full" ++ }, ++ {.name = "ISSQ_RECYCLE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x154, ++ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to ISSQ full" ++ }, ++ {.name = "GPR_RECYCLE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x155, ++ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to GPR full" ++ }, ++ {.name = "FPR_RECYCLE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x156, ++ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to FPR full" ++ }, ++ {.name = "LRQ_RECYCLE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x158, ++ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to LRQ full" ++ }, ++ {.name = "SRQ_RECYCLE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x159, ++ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to SRQ full" ++ }, ++ {.name = "BSR_RECYCLE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x15B, ++ .desc = "Number of cycles in which one or more valid micro-ops did not dispatch due to BSR full" ++ }, ++ {.name = "UOPSFUSED", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x164, ++ .desc = "Number of fused micro-ops dispatched" ++ }, ++ {.name = "L2D_TLBI_INT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x20B, ++ .desc = "Internal mmu tlbi cacheops" ++ }, ++ {.name = "L2D_TLBI_EXT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x20C, ++ .desc = "External mmu tlbi cacheops" ++ }, ++ {.name = "L2D_HWPF_DMD_HIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x218, ++ .desc = "Scu ld/st requests that hit cache or msg for lines brought in by the hardware prefetcher" ++ }, ++ {.name = "L2D_HWPF_REQ_VAL", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x219, ++ .desc = "Scu hwpf requests into the pipeline" ++ }, ++ {.name = "L2D_HWPF_REQ_LD", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x21A, ++ .desc = "Scu hwpf ld requests into the pipeline" ++ }, ++ {.name = "L2D_HWPF_REQ_MISS", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x21B, ++ .desc = "Scu hwpf ld requests that miss" ++ }, ++ {.name = "L2D_HWPF_NEXT_LINE", ++ .modmsk = ARMV8_ATTRS, ++ .code = 0x21C, ++ .desc = "Scu hwpf next line requests generated" ++ }, + }; +From 0b050ca9ba2a2bf74f87fa3a8b4ed8aec9d1dfa8 Mon Sep 17 00:00:00 2001 +From: Shay Gal-On +Date: Wed, 23 Oct 2019 18:58:03 -0700 +Subject: [PATCH 1/4] ThunderX2 uncore support + +This patch adds ThundeX2 uncore PMUs support. + +The following uncore PMUs are added: +- tx2_llc0, tx2_llc1 (last level cache) +- tx2_dmc0, tx2_dmc1 (memory controller) + +Based on documentation available at: +https://www.marvell.com/documents/hrur6mybdvk5uki1w0z7/ + +Signed-off-by: Shay Gal-On +--- + include/perfmon/pfmlib.h | 5 ++ + lib/Makefile | 2 +- + lib/events/arm_cavium_tx2_events.h | 61 +++++++++++++ + lib/pfmlib_arm_armv8.c | 55 ++++++++++++ + lib/pfmlib_common.c | 4 + + lib/pfmlib_priv.h | 6 ++ + lib/pfmlib_tx2_unc_perf_event.c | 139 +++++++++++++++++++++++++++++ + tests/validate_arm64.c | 6 ++ + 8 files changed, 277 insertions(+), 1 deletion(-) + create mode 100644 lib/pfmlib_tx2_unc_perf_event.c + +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index 09c673d..20d5feb 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -546,6 +546,11 @@ typedef enum { + PFM_PMU_INTEL_KNM_UNC_UBOX, /* Intel Knights Mill Ubox uncore */ + PFM_PMU_INTEL_KNM_UNC_M2PCIE, /* Intel Knights Mill M2PCIe uncore */ + PFM_PMU_ARM_THUNDERX2, /* Cavium ThunderX2 */ ++ ++ PFM_PMU_ARM_THUNDERX2_DMC0, /* Cavium ThunderX2 DMC unit 0 uncore */ ++ PFM_PMU_ARM_THUNDERX2_DMC1, /* Cavium ThunderX2 DMC unit 1 uncore */ ++ PFM_PMU_ARM_THUNDERX2_LLC0, /* Cavium ThunderX2 LLC unit 0 uncore */ ++ PFM_PMU_ARM_THUNDERX2_LLC1, /* Cavium ThunderX2 LLC unit 1 uncore */ + /* MUST ADD NEW PMU MODELS HERE */ + + PFM_PMU_MAX /* end marker */ +diff --git a/lib/Makefile b/lib/Makefile +index 2eb3ebb..f45515d 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -188,7 +188,7 @@ SRCS += pfmlib_arm_perf_event.c + endif + + INCARCH = $(INC_ARM64) +-SRCS += pfmlib_arm.c pfmlib_arm_armv8.c ++SRCS += pfmlib_arm.c pfmlib_arm_armv8.c pfmlib_tx2_unc_perf_event.c + CFLAGS += -DCONFIG_PFMLIB_ARCH_ARM64 + endif + +diff --git a/lib/events/arm_cavium_tx2_events.h b/lib/events/arm_cavium_tx2_events.h +index 198d33d..18d8931 100644 +--- a/lib/events/arm_cavium_tx2_events.h ++++ b/lib/events/arm_cavium_tx2_events.h +@@ -835,3 +835,64 @@ static const arm_entry_t arm_thunderx2_pe[]={ + .desc = "Scu hwpf next line requests generated" + }, + }; ++ ++#define ARM_TX2_CORE_EVENT_COUNT (sizeof(arm_thunderx2_pe)/sizeof(arm_entry_t)) ++ ++/* L3C event IDs */ ++#define L3_EVENT_READ_REQ 0xD ++#define L3_EVENT_WRITEBACK_REQ 0xE ++#define L3_EVENT_EVICT_REQ 0x13 ++#define L3_EVENT_READ_HIT 0x17 ++#define L3_EVENT_MAX 0x18 ++ ++/* DMC event IDs */ ++#define DMC_EVENT_COUNT_CYCLES 0x1 ++#define DMC_EVENT_WRITE_TXNS 0xB ++#define DMC_EVENT_DATA_TRANSFERS 0xD ++#define DMC_EVENT_READ_TXNS 0xF ++#define DMC_EVENT_MAX 0x10 ++ ++static const arm_entry_t arm_thunderx2_unc_dmc_pe[]={ ++ {.name = "UNC_DMC_READS", ++ .modmsk = ARMV8_ATTRS, ++ .code = DMC_EVENT_READ_TXNS, ++ .desc = "Memory read transactions" ++ }, ++ {.name = "UNC_DMC_WRITES", ++ .modmsk = ARMV8_ATTRS, ++ .code = DMC_EVENT_WRITE_TXNS, ++ .desc = "Memory write transactions" ++ }, ++}; ++ ++#define ARM_TX2_CORE_DMC_COUNT (sizeof(arm_thunderx2_unc_dmc_pe)/sizeof(arm_entry_t)) ++ ++static const arm_entry_t arm_thunderx2_unc_llc_pe[]={ ++ {.name = "UNC_LLC_READ", ++ .modmsk = ARMV8_ATTRS, ++ .code = L3_EVENT_READ_REQ, ++ .desc = "Read requests to LLC" ++ }, ++ {.name = "UNC_LLC_EVICT", ++ .modmsk = ARMV8_ATTRS, ++ .code = L3_EVENT_EVICT_REQ, ++ .desc = "Evict requests to LLC" ++ }, ++ {.name = "UNC_LLC_READ_HIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = L3_EVENT_READ_HIT, ++ .desc = "Read requests to LLC which hit" ++ }, ++ {.name = "UNC_LLC_WB", ++ .modmsk = ARMV8_ATTRS, ++ .code = L3_EVENT_WRITEBACK_REQ, ++ .desc = "Writeback requests to LLC" ++ } ++}; ++ ++#define ARM_TX2_CORE_LLC_COUNT (sizeof(arm_thunderx2_unc_llc_pe)/sizeof(arm_entry_t)) ++//Uncore accessor functions ++int ++pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e); ++int ++pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); +diff --git a/lib/pfmlib_arm_armv8.c b/lib/pfmlib_arm_armv8.c +index 0a3313f..35ff70f 100644 +--- a/lib/pfmlib_arm_armv8.c ++++ b/lib/pfmlib_arm_armv8.c +@@ -203,3 +203,58 @@ pfmlib_pmu_t arm_thunderx2_support={ + .get_event_nattrs = pfm_arm_get_event_nattrs, + }; + ++// For uncore, each socket has a separate perf name, otherwise they are the same, use macro ++ ++#define DEFINE_TX2_DMC(n) \ ++pfmlib_pmu_t arm_thunderx2_dmc##n##_support={ \ ++ .desc = "Cavium ThunderX2 Node"#n" DMC", \ ++ .name = "tx2_dmc"#n, \ ++ .perf_name = "uncore_dmc_"#n, \ ++ .pmu = PFM_PMU_ARM_THUNDERX2_DMC##n, \ ++ .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_dmc_pe), \ ++ .type = PFM_PMU_TYPE_UNCORE, \ ++ .pe = arm_thunderx2_unc_dmc_pe, \ ++ .pmu_detect = pfm_arm_detect_thunderx2, \ ++ .max_encoding = 1, \ ++ .num_cntrs = 4, \ ++ .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ ++ PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ ++ .get_event_first = pfm_arm_get_event_first, \ ++ .get_event_next = pfm_arm_get_event_next, \ ++ .event_is_valid = pfm_arm_event_is_valid, \ ++ .validate_table = pfm_arm_validate_table, \ ++ .get_event_info = pfm_arm_get_event_info, \ ++ .get_event_attr_info = pfm_arm_get_event_attr_info, \ ++ PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ ++ .get_event_nattrs = pfm_arm_get_event_nattrs, \ ++}; ++ ++DEFINE_TX2_DMC(0); ++DEFINE_TX2_DMC(1); ++ ++#define DEFINE_TX2_LLC(n) \ ++pfmlib_pmu_t arm_thunderx2_llc##n##_support={ \ ++ .desc = "Cavium ThunderX2 node "#n" LLC", \ ++ .name = "tx2_llc"#n, \ ++ .perf_name = "uncore_l3c_"#n, \ ++ .pmu = PFM_PMU_ARM_THUNDERX2_LLC##n, \ ++ .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_llc_pe), \ ++ .type = PFM_PMU_TYPE_UNCORE, \ ++ .pe = arm_thunderx2_unc_llc_pe, \ ++ .pmu_detect = pfm_arm_detect_thunderx2, \ ++ .max_encoding = 1, \ ++ .num_cntrs = 4, \ ++ .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ ++ PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ ++ .get_event_first = pfm_arm_get_event_first, \ ++ .get_event_next = pfm_arm_get_event_next, \ ++ .event_is_valid = pfm_arm_event_is_valid, \ ++ .validate_table = pfm_arm_validate_table, \ ++ .get_event_info = pfm_arm_get_event_info, \ ++ .get_event_attr_info = pfm_arm_get_event_attr_info, \ ++ PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ ++ .get_event_nattrs = pfm_arm_get_event_nattrs, \ ++}; ++ ++DEFINE_TX2_LLC(0); ++DEFINE_TX2_LLC(1); +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index 2b6cbb4..8314d4b 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -490,6 +490,10 @@ static pfmlib_pmu_t *pfmlib_pmus[]= + &arm_cortex_a53_support, + &arm_xgene_support, + &arm_thunderx2_support, ++ &arm_thunderx2_dmc0_support, ++ &arm_thunderx2_dmc1_support, ++ &arm_thunderx2_llc0_support, ++ &arm_thunderx2_llc1_support, + #endif + + #ifdef CONFIG_PFMLIB_ARCH_S390X +diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h +index b0070a6..cb83f43 100644 +--- a/lib/pfmlib_priv.h ++++ b/lib/pfmlib_priv.h +@@ -644,7 +644,13 @@ extern pfmlib_pmu_t arm_qcom_krait_support; + extern pfmlib_pmu_t arm_cortex_a57_support; + extern pfmlib_pmu_t arm_cortex_a53_support; + extern pfmlib_pmu_t arm_xgene_support; ++ + extern pfmlib_pmu_t arm_thunderx2_support; ++extern pfmlib_pmu_t arm_thunderx2_dmc0_support; ++extern pfmlib_pmu_t arm_thunderx2_dmc1_support; ++extern pfmlib_pmu_t arm_thunderx2_llc0_support; ++extern pfmlib_pmu_t arm_thunderx2_llc1_support; ++ + extern pfmlib_pmu_t mips_74k_support; + extern pfmlib_pmu_t s390x_cpum_cf_support; + extern pfmlib_pmu_t s390x_cpum_sf_support; +diff --git a/lib/pfmlib_tx2_unc_perf_event.c b/lib/pfmlib_tx2_unc_perf_event.c +new file mode 100644 +index 0000000..1a04e1d +--- /dev/null ++++ b/lib/pfmlib_tx2_unc_perf_event.c +@@ -0,0 +1,139 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* private headers */ ++#include "pfmlib_priv.h" ++#include "pfmlib_perf_event_priv.h" ++#include "pfmlib_arm_priv.h" ++ ++typedef union { ++ uint64_t val; ++ struct { ++ unsigned long unc_event:8; /* event code */ ++ unsigned long unc_umask:8; /* unit mask */ ++ unsigned long unc_res1:1; /* reserved */ ++ unsigned long unc_rst:1; /* reset */ ++ unsigned long unc_edge:1; /* edge detect */ ++ unsigned long unc_res2:3; /* reserved */ ++ unsigned long unc_en:1; /* enable */ ++ unsigned long unc_inv:1; /* invert counter mask */ ++ unsigned long unc_thres:8; /* counter mask */ ++ unsigned long unc_res3:32; /* reserved */ ++ } com; /* covers common fields for DMC/L3C */ ++} tx2_unc_data_t; ++ ++static void ++display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg); ++static void ++display_com(void *this, pfmlib_event_desc_t *e, void *val); ++static int ++find_pmu_type_by_name(const char *name); ++ ++int ++pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e) ++{ ++ //from pe field in for the uncore, get the array with all the event defs ++ const arm_entry_t *event_list = this_pe(this); ++ tx2_unc_data_t reg; ++ //get code for the event from the table ++ reg.val = event_list[e->event].code; ++ //pass the data back to the caller ++ e->codes[0] = reg.val; ++ e->count = 1; ++ evt_strcat(e->fstr, "%s", event_list[e->event].name); ++ display_reg(this, e, reg); ++ return PFM_SUCCESS; ++} ++ ++int ++pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e) ++{ ++ pfmlib_pmu_t *pmu = this; ++ struct perf_event_attr *attr = e->os_data; ++ tx2_unc_data_t reg; ++ int ret; ++ ++ if (!pmu->get_event_encoding[PFM_OS_NONE]) ++ return PFM_ERR_NOTSUPP; ++ ++ ret = pmu->get_event_encoding[PFM_OS_NONE](this, e); ++ if (ret != PFM_SUCCESS) ++ return ret; ++ //get pmu type to probe ++ ret = find_pmu_type_by_name(pmu->perf_name); ++ if (ret < 0) ++ return ret; ++ ++ attr->type = ret; ++ //get code to provide to the uncore pmu probe ++ reg.val = e->codes[0]; ++ attr->config = reg.val; ++ ++ // if needed, can use attr->config1 or attr->config2 for extra info from event structure defines e->codes[i] ++ ++ // uncore measures at all priv levels ++ attr->exclude_hv = 0; ++ attr->exclude_kernel = 0; ++ attr->exclude_user = 0; ++ ++ return PFM_SUCCESS; ++} ++ ++ ++static void ++display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg) ++{ ++ pfmlib_pmu_t *pmu = this; ++ if (pmu->display_reg) ++ pmu->display_reg(this, e, ®); ++ else ++ display_com(this, e, ®); ++} ++ ++static void ++display_com(void *this, pfmlib_event_desc_t *e, void *val) ++{ ++ const arm_entry_t *pe = this_pe(this); ++ tx2_unc_data_t *reg = val; ++ ++ __pfm_vbprintf("[UNC=0x%"PRIx64" event=0x%x umask=0x%x en=%d " ++ "inv=%d edge=%d thres=%d] %s\n", ++ reg->val, ++ reg->com.unc_event, ++ reg->com.unc_umask, ++ reg->com.unc_en, ++ reg->com.unc_inv, ++ reg->com.unc_edge, ++ reg->com.unc_thres, ++ pe[e->event].name); ++} ++ ++static int ++find_pmu_type_by_name(const char *name) ++{ ++ char filename[PATH_MAX]; ++ FILE *fp; ++ int ret, type; ++ ++ if (!name) ++ return PFM_ERR_NOTSUPP; ++ ++ sprintf(filename, "/sys/bus/event_source/devices/%s/type", name); ++ ++ fp = fopen(filename, "r"); ++ if (!fp) ++ return PFM_ERR_NOTSUPP; ++ ++ ret = fscanf(fp, "%d", &type); ++ if (ret != 1) ++ type = PFM_ERR_NOTSUPP; ++ ++ fclose(fp); ++ ++ return type; ++} ++ +diff --git a/tests/validate_arm64.c b/tests/validate_arm64.c +index f7f021a..35eb6ef 100644 +--- a/tests/validate_arm64.c ++++ b/tests/validate_arm64.c +@@ -177,6 +177,12 @@ static const test_event_t arm64_test_events[]={ + .codes[0] = 0x8000008, + .fstr = "arm_thunderx2::INST_RETIRED:k=1:u=1:hv=0", + }, ++ { SRC_LINE, ++ .name = "tx2_dmc1::UNC_DMC_READS", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0xf, ++ }, + }; + #define NUM_TEST_EVENTS (int)(sizeof(arm64_test_events)/sizeof(test_event_t)) + +-- +2.21.0 + +From 6641952170c23c5ab69c1af19197a9d8284c1e53 Mon Sep 17 00:00:00 2001 +From: Shay Gal-On +Date: Thu, 21 Nov 2019 10:41:26 -0800 +Subject: [PATCH 2/4] Moved TX2 uncore event to separate file + +To make event files cleaner. +Also added link to marvell doc publishing the uncore event lists. + +Signed-off-by: Shay Gal-On +--- + lib/Makefile | 8 ++- + lib/events/arm_cavium_tx2_events.h | 61 ----------------- + lib/events/arm_marvell_tx2_unc_events.h | 90 +++++++++++++++++++++++++ + lib/pfmlib_arm_armv8.c | 3 +- + 4 files changed, 97 insertions(+), 65 deletions(-) + create mode 100755 lib/events/arm_marvell_tx2_unc_events.h + +diff --git a/lib/Makefile b/lib/Makefile +index f45515d..686264b 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -360,11 +360,13 @@ INC_ARM=pfmlib_arm_priv.h \ + events/arm_cortex_a15_events.h \ + events/arm_cortex_a57_events.h \ + events/arm_cortex_a53_events.h \ +- events/arm_cavium_tx2_events.h ++ events/arm_cavium_tx2_events.h \ ++ events/arm_marvell_tx2_unc_events.h + + INC_ARM64=events/arm_cortex_a57_events.h \ +- events/arm_cortex_a53_events.h \ +- events/arm_cavium_tx2_events.h ++ events/arm_cortex_a53_events.h \ ++ events/arm_cavium_tx2_events.h \ ++ events/arm_marvell_tx2_unc_events.h + + INCDEP=$(INC_COMMON) $(INCARCH) + +diff --git a/lib/events/arm_cavium_tx2_events.h b/lib/events/arm_cavium_tx2_events.h +index 18d8931..198d33d 100644 +--- a/lib/events/arm_cavium_tx2_events.h ++++ b/lib/events/arm_cavium_tx2_events.h +@@ -835,64 +835,3 @@ static const arm_entry_t arm_thunderx2_pe[]={ + .desc = "Scu hwpf next line requests generated" + }, + }; +- +-#define ARM_TX2_CORE_EVENT_COUNT (sizeof(arm_thunderx2_pe)/sizeof(arm_entry_t)) +- +-/* L3C event IDs */ +-#define L3_EVENT_READ_REQ 0xD +-#define L3_EVENT_WRITEBACK_REQ 0xE +-#define L3_EVENT_EVICT_REQ 0x13 +-#define L3_EVENT_READ_HIT 0x17 +-#define L3_EVENT_MAX 0x18 +- +-/* DMC event IDs */ +-#define DMC_EVENT_COUNT_CYCLES 0x1 +-#define DMC_EVENT_WRITE_TXNS 0xB +-#define DMC_EVENT_DATA_TRANSFERS 0xD +-#define DMC_EVENT_READ_TXNS 0xF +-#define DMC_EVENT_MAX 0x10 +- +-static const arm_entry_t arm_thunderx2_unc_dmc_pe[]={ +- {.name = "UNC_DMC_READS", +- .modmsk = ARMV8_ATTRS, +- .code = DMC_EVENT_READ_TXNS, +- .desc = "Memory read transactions" +- }, +- {.name = "UNC_DMC_WRITES", +- .modmsk = ARMV8_ATTRS, +- .code = DMC_EVENT_WRITE_TXNS, +- .desc = "Memory write transactions" +- }, +-}; +- +-#define ARM_TX2_CORE_DMC_COUNT (sizeof(arm_thunderx2_unc_dmc_pe)/sizeof(arm_entry_t)) +- +-static const arm_entry_t arm_thunderx2_unc_llc_pe[]={ +- {.name = "UNC_LLC_READ", +- .modmsk = ARMV8_ATTRS, +- .code = L3_EVENT_READ_REQ, +- .desc = "Read requests to LLC" +- }, +- {.name = "UNC_LLC_EVICT", +- .modmsk = ARMV8_ATTRS, +- .code = L3_EVENT_EVICT_REQ, +- .desc = "Evict requests to LLC" +- }, +- {.name = "UNC_LLC_READ_HIT", +- .modmsk = ARMV8_ATTRS, +- .code = L3_EVENT_READ_HIT, +- .desc = "Read requests to LLC which hit" +- }, +- {.name = "UNC_LLC_WB", +- .modmsk = ARMV8_ATTRS, +- .code = L3_EVENT_WRITEBACK_REQ, +- .desc = "Writeback requests to LLC" +- } +-}; +- +-#define ARM_TX2_CORE_LLC_COUNT (sizeof(arm_thunderx2_unc_llc_pe)/sizeof(arm_entry_t)) +-//Uncore accessor functions +-int +-pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e); +-int +-pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); +diff --git a/lib/events/arm_marvell_tx2_unc_events.h b/lib/events/arm_marvell_tx2_unc_events.h +new file mode 100755 +index 0000000..9b0a1b4 +--- /dev/null ++++ b/lib/events/arm_marvell_tx2_unc_events.h +@@ -0,0 +1,90 @@ ++/* ++ * Copyright (c) 2019 Marvell Technology Group Ltd ++ * Contributed by Shay Gal-On ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies ++ * of the Software, and to permit persons to whom the Software is furnished to do so, ++ * subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, ++ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A ++ * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF ++ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE ++ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Marvell ThunderX2 ++ * ++ * ARM Architecture Reference Manual, ARMv8, for ARMv8-A architecture profile, ++ * ARM DDI 0487B.a (ID033117) ++ * ++ * Marvell ThunderX2 C99XX Core and Uncore PMU Events (Abridged) can be found at ++ * https://www.marvell.com/documents/hrur6mybdvk5uki1w0z7/ ++ * ++ */ ++ ++ ++/* L3C event IDs */ ++#define L3_EVENT_READ_REQ 0xD ++#define L3_EVENT_WRITEBACK_REQ 0xE ++#define L3_EVENT_EVICT_REQ 0x13 ++#define L3_EVENT_READ_HIT 0x17 ++#define L3_EVENT_MAX 0x18 ++ ++/* DMC event IDs */ ++#define DMC_EVENT_COUNT_CYCLES 0x1 ++#define DMC_EVENT_WRITE_TXNS 0xB ++#define DMC_EVENT_DATA_TRANSFERS 0xD ++#define DMC_EVENT_READ_TXNS 0xF ++#define DMC_EVENT_MAX 0x10 ++ ++static const arm_entry_t arm_thunderx2_unc_dmc_pe[]={ ++ {.name = "UNC_DMC_READS", ++ .modmsk = ARMV8_ATTRS, ++ .code = DMC_EVENT_READ_TXNS, ++ .desc = "Memory read transactions" ++ }, ++ {.name = "UNC_DMC_WRITES", ++ .modmsk = ARMV8_ATTRS, ++ .code = DMC_EVENT_WRITE_TXNS, ++ .desc = "Memory write transactions" ++ }, ++}; ++ ++#define ARM_TX2_CORE_DMC_COUNT (sizeof(arm_thunderx2_unc_dmc_pe)/sizeof(arm_entry_t)) ++ ++static const arm_entry_t arm_thunderx2_unc_llc_pe[]={ ++ {.name = "UNC_LLC_READ", ++ .modmsk = ARMV8_ATTRS, ++ .code = L3_EVENT_READ_REQ, ++ .desc = "Read requests to LLC" ++ }, ++ {.name = "UNC_LLC_EVICT", ++ .modmsk = ARMV8_ATTRS, ++ .code = L3_EVENT_EVICT_REQ, ++ .desc = "Evict requests to LLC" ++ }, ++ {.name = "UNC_LLC_READ_HIT", ++ .modmsk = ARMV8_ATTRS, ++ .code = L3_EVENT_READ_HIT, ++ .desc = "Read requests to LLC which hit" ++ }, ++ {.name = "UNC_LLC_WB", ++ .modmsk = ARMV8_ATTRS, ++ .code = L3_EVENT_WRITEBACK_REQ, ++ .desc = "Writeback requests to LLC" ++ } ++}; ++ ++#define ARM_TX2_CORE_LLC_COUNT (sizeof(arm_thunderx2_unc_llc_pe)/sizeof(arm_entry_t)) ++//Uncore accessor functions ++int ++pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e); ++int ++pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); +diff --git a/lib/pfmlib_arm_armv8.c b/lib/pfmlib_arm_armv8.c +index 35ff70f..291ac60 100644 +--- a/lib/pfmlib_arm_armv8.c ++++ b/lib/pfmlib_arm_armv8.c +@@ -33,7 +33,8 @@ + #include "events/arm_cortex_a57_events.h" /* A57 event tables */ + #include "events/arm_cortex_a53_events.h" /* A53 event tables */ + #include "events/arm_xgene_events.h" /* Applied Micro X-Gene tables */ +-#include "events/arm_cavium_tx2_events.h" /* Cavium ThunderX2 tables */ ++#include "events/arm_cavium_tx2_events.h" /* Marvell ThunderX2 tables */ ++#include "events/arm_marvell_tx2_unc_events.h" /* Marvell ThunderX2 PMU tables */ + + static int + pfm_arm_detect_cortex_a57(void *this) +-- +2.21.0 + +From dc1da4573eb8d24bdf64b9bb5e04ed956075d712 Mon Sep 17 00:00:00 2001 +From: Shay Gal-On +Date: Mon, 25 Nov 2019 12:00:15 -0800 +Subject: [PATCH 3/4] Add ThunderX2 DMC events and CCPI events + +This patch adds missing 2 DMC events for ThunderX2 +and adds support for the Cross Core Complex Interconnect +(CCPI) PMU and events. + +The following PMU models are added: + - tx2_ccpi0, tx2_ccpi1 + - tx2_dmc0, tx2_dmc1 + +Signed-off-by: Shay Gal-On +--- + include/perfmon/pfmlib.h | 12 +-- + lib/Makefile | 2 +- + lib/events/arm_marvell_tx2_unc_events.h | 42 ++++++++++ + lib/pfmlib_arm_armv8.c | 33 +++++++- + lib/pfmlib_common.c | 8 ++ + lib/pfmlib_priv.h | 2 + + lib/pfmlib_tx2_unc_perf_event.c | 101 ++++++++++-------------- + tests/validate_arm64.c | 15 ++++ + 8 files changed, 148 insertions(+), 67 deletions(-) + +diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h +index 20d5feb..3f1d2f5 100644 +--- a/include/perfmon/pfmlib.h ++++ b/include/perfmon/pfmlib.h +@@ -543,12 +543,14 @@ typedef enum { + + PFM_PMU_INTEL_KNM_UNC_UBOX, /* Intel Knights Mill Ubox uncore */ + PFM_PMU_INTEL_KNM_UNC_M2PCIE, /* Intel Knights Mill M2PCIe uncore */ +- PFM_PMU_ARM_THUNDERX2, /* Cavium ThunderX2 */ ++ PFM_PMU_ARM_THUNDERX2, /* Marvell ThunderX2 */ + +- PFM_PMU_ARM_THUNDERX2_DMC0, /* Cavium ThunderX2 DMC unit 0 uncore */ +- PFM_PMU_ARM_THUNDERX2_DMC1, /* Cavium ThunderX2 DMC unit 1 uncore */ +- PFM_PMU_ARM_THUNDERX2_LLC0, /* Cavium ThunderX2 LLC unit 0 uncore */ +- PFM_PMU_ARM_THUNDERX2_LLC1, /* Cavium ThunderX2 LLC unit 1 uncore */ ++ PFM_PMU_ARM_THUNDERX2_DMC0, /* Marvell ThunderX2 DMC unit 0 uncore */ ++ PFM_PMU_ARM_THUNDERX2_DMC1, /* Marvell ThunderX2 DMC unit 1 uncore */ ++ PFM_PMU_ARM_THUNDERX2_LLC0, /* Marvell ThunderX2 LLC unit 0 uncore */ ++ PFM_PMU_ARM_THUNDERX2_LLC1, /* Marvell ThunderX2 LLC unit 1 uncore */ ++ PFM_PMU_ARM_THUNDERX2_CCPI0, /* Marvell ThunderX2 Cross-Socket Interconnect unit 0 uncore */ ++ PFM_PMU_ARM_THUNDERX2_CCPI1, /* Marvell ThunderX2 Cross-Socket Interconnect unit 1 uncore */ + /* MUST ADD NEW PMU MODELS HERE */ + + PFM_PMU_MAX /* end marker */ +diff --git a/lib/Makefile b/lib/Makefile +index 686264b..4a4dc3b 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -177,7 +177,7 @@ SRCS += pfmlib_arm_perf_event.c + endif + + INCARCH = $(INC_ARM) +-SRCS += pfmlib_arm.c pfmlib_arm_armv7_pmuv1.c pfmlib_arm_armv6.c pfmlib_arm_armv8.c ++SRCS += pfmlib_arm.c pfmlib_arm_armv7_pmuv1.c pfmlib_arm_armv6.c pfmlib_arm_armv8.c pfmlib_tx2_unc_perf_event.c + CFLAGS += -DCONFIG_PFMLIB_ARCH_ARM + endif + +diff --git a/lib/events/arm_marvell_tx2_unc_events.h b/lib/events/arm_marvell_tx2_unc_events.h +index 9b0a1b4..51e6b4d 100755 +--- a/lib/events/arm_marvell_tx2_unc_events.h ++++ b/lib/events/arm_marvell_tx2_unc_events.h +@@ -44,6 +44,13 @@ + #define DMC_EVENT_READ_TXNS 0xF + #define DMC_EVENT_MAX 0x10 + ++/* CCPI event IDs */ ++#define CCPI2_EVENT_REQ_PKT_SENT 0x3D ++#define CCPI2_EVENT_SNOOP_PKT_SENT 0x65 ++#define CCPI2_EVENT_DATA_PKT_SENT 0x105 ++#define CCPI2_EVENT_GIC_PKT_SENT 0x12D ++ ++ + static const arm_entry_t arm_thunderx2_unc_dmc_pe[]={ + {.name = "UNC_DMC_READS", + .modmsk = ARMV8_ATTRS, +@@ -55,10 +62,45 @@ static const arm_entry_t arm_thunderx2_unc_dmc_pe[]={ + .code = DMC_EVENT_WRITE_TXNS, + .desc = "Memory write transactions" + }, ++ {.name = "UNC_DMC_DATA_TRANSFERS", ++ .modmsk = ARMV8_ATTRS, ++ .code = DMC_EVENT_DATA_TRANSFERS, ++ .desc = "Memory data transfers" ++ }, ++ {.name = "UNC_DMC_CYCLES", ++ .modmsk = ARMV8_ATTRS, ++ .code = DMC_EVENT_COUNT_CYCLES, ++ .desc = "Clocks at the DMC clock rate" ++ } + }; + + #define ARM_TX2_CORE_DMC_COUNT (sizeof(arm_thunderx2_unc_dmc_pe)/sizeof(arm_entry_t)) + ++static const arm_entry_t arm_thunderx2_unc_ccpi_pe[]={ ++ {.name = "UNC_CCPI_REQ", ++ .modmsk = ARMV8_ATTRS, ++ .code = CCPI2_EVENT_REQ_PKT_SENT, ++ .desc = "Request packets sent from this node" ++ }, ++ {.name = "UNC_CCPI_SNOOP", ++ .modmsk = ARMV8_ATTRS, ++ .code = CCPI2_EVENT_SNOOP_PKT_SENT, ++ .desc = "Snoop packets sent from this node" ++ }, ++ {.name = "UNC_CCPI_DATA", ++ .modmsk = ARMV8_ATTRS, ++ .code = CCPI2_EVENT_DATA_PKT_SENT , ++ .desc = "Data packets sent from this node" ++ }, ++ {.name = "UNC_CCPI_GIC", ++ .modmsk = ARMV8_ATTRS, ++ .code = CCPI2_EVENT_GIC_PKT_SENT, ++ .desc = "Interrupt related packets sent from this node" ++ } ++}; ++ ++#define ARM_TX2_CORE_CCPI_COUNT (sizeof(arm_thunderx2_unc_ccpi_pe)/sizeof(arm_entry_t)) ++ + static const arm_entry_t arm_thunderx2_unc_llc_pe[]={ + {.name = "UNC_LLC_READ", + .modmsk = ARMV8_ATTRS, +diff --git a/lib/pfmlib_arm_armv8.c b/lib/pfmlib_arm_armv8.c +index 291ac60..a252951 100644 +--- a/lib/pfmlib_arm_armv8.c ++++ b/lib/pfmlib_arm_armv8.c +@@ -179,7 +179,7 @@ pfmlib_pmu_t arm_xgene_support={ + .get_event_nattrs = pfm_arm_get_event_nattrs, + }; + +-/* Cavium ThunderX2 support */ ++/* Marvell ThunderX2 support */ + pfmlib_pmu_t arm_thunderx2_support={ + .desc = "Cavium ThunderX2", + .name = "arm_thunderx2", +@@ -208,7 +208,7 @@ pfmlib_pmu_t arm_thunderx2_support={ + + #define DEFINE_TX2_DMC(n) \ + pfmlib_pmu_t arm_thunderx2_dmc##n##_support={ \ +- .desc = "Cavium ThunderX2 Node"#n" DMC", \ ++ .desc = "Marvell ThunderX2 Node"#n" DMC", \ + .name = "tx2_dmc"#n, \ + .perf_name = "uncore_dmc_"#n, \ + .pmu = PFM_PMU_ARM_THUNDERX2_DMC##n, \ +@@ -235,7 +235,7 @@ DEFINE_TX2_DMC(1); + + #define DEFINE_TX2_LLC(n) \ + pfmlib_pmu_t arm_thunderx2_llc##n##_support={ \ +- .desc = "Cavium ThunderX2 node "#n" LLC", \ ++ .desc = "Marvell ThunderX2 node "#n" LLC", \ + .name = "tx2_llc"#n, \ + .perf_name = "uncore_l3c_"#n, \ + .pmu = PFM_PMU_ARM_THUNDERX2_LLC##n, \ +@@ -259,3 +259,30 @@ pfmlib_pmu_t arm_thunderx2_llc##n##_support={ \ + + DEFINE_TX2_LLC(0); + DEFINE_TX2_LLC(1); ++ ++#define DEFINE_TX2_CCPI(n) \ ++pfmlib_pmu_t arm_thunderx2_ccpi##n##_support={ \ ++ .desc = "Marvell ThunderX2 node "#n" Cross-Socket Interconnect", \ ++ .name = "tx2_ccpi"#n, \ ++ .perf_name = "uncore_ccpi_"#n, \ ++ .pmu = PFM_PMU_ARM_THUNDERX2_CCPI##n, \ ++ .pme_count = LIBPFM_ARRAY_SIZE(arm_thunderx2_unc_ccpi_pe), \ ++ .type = PFM_PMU_TYPE_UNCORE, \ ++ .pe = arm_thunderx2_unc_ccpi_pe, \ ++ .pmu_detect = pfm_arm_detect_thunderx2, \ ++ .max_encoding = 1, \ ++ .num_cntrs = 4, \ ++ .get_event_encoding[PFM_OS_NONE] = pfm_tx2_unc_get_event_encoding, \ ++ PFMLIB_ENCODE_PERF(pfm_tx2_unc_get_perf_encoding), \ ++ .get_event_first = pfm_arm_get_event_first, \ ++ .get_event_next = pfm_arm_get_event_next, \ ++ .event_is_valid = pfm_arm_event_is_valid, \ ++ .validate_table = pfm_arm_validate_table, \ ++ .get_event_info = pfm_arm_get_event_info, \ ++ .get_event_attr_info = pfm_arm_get_event_attr_info, \ ++ PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs),\ ++ .get_event_nattrs = pfm_arm_get_event_nattrs, \ ++}; ++ ++DEFINE_TX2_CCPI(0); ++DEFINE_TX2_CCPI(1); +diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c +index 8314d4b..8cb8998 100644 +--- a/lib/pfmlib_common.c ++++ b/lib/pfmlib_common.c +@@ -484,6 +484,12 @@ static pfmlib_pmu_t *pfmlib_pmus[]= + &arm_cortex_a53_support, + &arm_xgene_support, + &arm_thunderx2_support, ++ &arm_thunderx2_dmc0_support, ++ &arm_thunderx2_dmc1_support, ++ &arm_thunderx2_llc0_support, ++ &arm_thunderx2_llc1_support, ++ &arm_thunderx2_ccpi0_support, ++ &arm_thunderx2_ccpi1_support, + #endif + #ifdef CONFIG_PFMLIB_ARCH_ARM64 + &arm_cortex_a57_support, +@@ -494,6 +500,8 @@ static pfmlib_pmu_t *pfmlib_pmus[]= + &arm_thunderx2_dmc1_support, + &arm_thunderx2_llc0_support, + &arm_thunderx2_llc1_support, ++ &arm_thunderx2_ccpi0_support, ++ &arm_thunderx2_ccpi1_support, + #endif + + #ifdef CONFIG_PFMLIB_ARCH_S390X +diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h +index cb83f43..1340a6b 100644 +--- a/lib/pfmlib_priv.h ++++ b/lib/pfmlib_priv.h +@@ -650,6 +650,8 @@ extern pfmlib_pmu_t arm_thunderx2_dmc0_support; + extern pfmlib_pmu_t arm_thunderx2_dmc1_support; + extern pfmlib_pmu_t arm_thunderx2_llc0_support; + extern pfmlib_pmu_t arm_thunderx2_llc1_support; ++extern pfmlib_pmu_t arm_thunderx2_ccpi0_support; ++extern pfmlib_pmu_t arm_thunderx2_ccpi1_support; + + extern pfmlib_pmu_t mips_74k_support; + extern pfmlib_pmu_t s390x_cpum_cf_support; +diff --git a/lib/pfmlib_tx2_unc_perf_event.c b/lib/pfmlib_tx2_unc_perf_event.c +index 1a04e1d..7dc2372 100644 +--- a/lib/pfmlib_tx2_unc_perf_event.c ++++ b/lib/pfmlib_tx2_unc_perf_event.c +@@ -27,11 +27,51 @@ typedef union { + } tx2_unc_data_t; + + static void +-display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg); ++display_com(void *this, pfmlib_event_desc_t *e, void *val) ++{ ++ const arm_entry_t *pe = this_pe(this); ++ tx2_unc_data_t *reg = val; ++ ++ __pfm_vbprintf("[UNC=0x%"PRIx64"] %s\n", ++ reg->val, ++ pe[e->event].name); ++} ++ + static void +-display_com(void *this, pfmlib_event_desc_t *e, void *val); ++display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg) ++{ ++ pfmlib_pmu_t *pmu = this; ++ if (pmu->display_reg) ++ pmu->display_reg(this, e, ®); ++ else ++ display_com(this, e, ®); ++} ++ ++ + static int +-find_pmu_type_by_name(const char *name); ++find_pmu_type_by_name(const char *name) ++{ ++ char filename[PATH_MAX]; ++ FILE *fp; ++ int ret, type; ++ ++ if (!name) ++ return PFM_ERR_NOTSUPP; ++ ++ sprintf(filename, "/sys/bus/event_source/devices/%s/type", name); ++ ++ fp = fopen(filename, "r"); ++ if (!fp) ++ return PFM_ERR_NOTSUPP; ++ ++ ret = fscanf(fp, "%d", &type); ++ if (ret != 1) ++ type = PFM_ERR_NOTSUPP; ++ ++ fclose(fp); ++ ++ return type; ++} + + int + pfm_tx2_unc_get_event_encoding(void *this, pfmlib_event_desc_t *e) +@@ -82,58 +122,3 @@ pfm_tx2_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e) + + return PFM_SUCCESS; + } +- +- +-static void +-display_reg(void *this, pfmlib_event_desc_t *e, tx2_unc_data_t reg) +-{ +- pfmlib_pmu_t *pmu = this; +- if (pmu->display_reg) +- pmu->display_reg(this, e, ®); +- else +- display_com(this, e, ®); +-} +- +-static void +-display_com(void *this, pfmlib_event_desc_t *e, void *val) +-{ +- const arm_entry_t *pe = this_pe(this); +- tx2_unc_data_t *reg = val; +- +- __pfm_vbprintf("[UNC=0x%"PRIx64" event=0x%x umask=0x%x en=%d " +- "inv=%d edge=%d thres=%d] %s\n", +- reg->val, +- reg->com.unc_event, +- reg->com.unc_umask, +- reg->com.unc_en, +- reg->com.unc_inv, +- reg->com.unc_edge, +- reg->com.unc_thres, +- pe[e->event].name); +-} +- +-static int +-find_pmu_type_by_name(const char *name) +-{ +- char filename[PATH_MAX]; +- FILE *fp; +- int ret, type; +- +- if (!name) +- return PFM_ERR_NOTSUPP; +- +- sprintf(filename, "/sys/bus/event_source/devices/%s/type", name); +- +- fp = fopen(filename, "r"); +- if (!fp) +- return PFM_ERR_NOTSUPP; +- +- ret = fscanf(fp, "%d", &type); +- if (ret != 1) +- type = PFM_ERR_NOTSUPP; +- +- fclose(fp); +- +- return type; +-} +- +diff --git a/tests/validate_arm64.c b/tests/validate_arm64.c +index 35eb6ef..5cb1966 100644 +--- a/tests/validate_arm64.c ++++ b/tests/validate_arm64.c +@@ -182,6 +182,21 @@ static const test_event_t arm64_test_events[]={ + .ret = PFM_SUCCESS, + .count = 1, + .codes[0] = 0xf, ++ .fstr = "tx2_dmc1::UNC_DMC_READS", ++ }, ++ { SRC_LINE, ++ .name = "tx2_ccpi0::UNC_CCPI_GIC", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0x12d, ++ .fstr = "tx2_ccpi0::UNC_CCPI_GIC", ++ }, ++ { SRC_LINE, ++ .name = "tx2_llc0::UNC_LLC_READ", ++ .ret = PFM_SUCCESS, ++ .count = 1, ++ .codes[0] = 0xd, ++ .fstr = "tx2_llc0::UNC_LLC_READ", + }, + }; + #define NUM_TEST_EVENTS (int)(sizeof(arm64_test_events)/sizeof(test_event_t)) +-- +2.21.0 + +From e401d29e89b92e999615e11ea17808e90eda93fd Mon Sep 17 00:00:00 2001 +From: Shay Gal-On +Date: Tue, 3 Dec 2019 09:54:37 -0800 +Subject: [PATCH 4/4] Removed extra fields from tx2_unc_data_t + +Removed useless fields from tx2_unc_data_t. + +Signed-off-by: Shay Gal-On +--- + lib/events/arm_marvell_tx2_unc_events.h | 0 + lib/pfmlib_tx2_unc_perf_event.c | 13 ++----------- + 2 files changed, 2 insertions(+), 11 deletions(-) + mode change 100755 => 100644 lib/events/arm_marvell_tx2_unc_events.h + +diff --git a/lib/events/arm_marvell_tx2_unc_events.h b/lib/events/arm_marvell_tx2_unc_events.h +old mode 100755 +new mode 100644 +diff --git a/lib/pfmlib_tx2_unc_perf_event.c b/lib/pfmlib_tx2_unc_perf_event.c +index 7dc2372..154cb0a 100644 +--- a/lib/pfmlib_tx2_unc_perf_event.c ++++ b/lib/pfmlib_tx2_unc_perf_event.c +@@ -13,17 +13,8 @@ + typedef union { + uint64_t val; + struct { +- unsigned long unc_event:8; /* event code */ +- unsigned long unc_umask:8; /* unit mask */ +- unsigned long unc_res1:1; /* reserved */ +- unsigned long unc_rst:1; /* reset */ +- unsigned long unc_edge:1; /* edge detect */ +- unsigned long unc_res2:3; /* reserved */ +- unsigned long unc_en:1; /* enable */ +- unsigned long unc_inv:1; /* invert counter mask */ +- unsigned long unc_thres:8; /* counter mask */ +- unsigned long unc_res3:32; /* reserved */ +- } com; /* covers common fields for DMC/L3C */ ++ unsigned long unc_res1:32; /* reserved */ ++ } com; /* reserved space for future extensions */ + } tx2_unc_data_t; + + static void +-- +2.21.0 + diff --git a/libpfm-zseries.patch b/libpfm-zseries.patch new file mode 100644 index 0000000..e85419f --- /dev/null +++ b/libpfm-zseries.patch @@ -0,0 +1,128 @@ +commit dc643f4a460063a628b4bfba57dbbb36673a0789 +Author: Thomas Richter +Date: Thu Jul 11 17:48:03 2019 -0700 + + s390/cpumf: add IBM z14 ZR1 support + + Add support for machine type 0x3907. + + Signed-off-by: Thomas Richter + +diff --git a/lib/pfmlib_s390x_cpumf.c b/lib/pfmlib_s390x_cpumf.c +index 4e03fc4..7566aa0 100644 +--- a/lib/pfmlib_s390x_cpumf.c ++++ b/lib/pfmlib_s390x_cpumf.c +@@ -192,6 +192,7 @@ static int pfm_cpumcf_init(void *this) + ext_set_count = LIBPFM_ARRAY_SIZE(cpumcf_z13_counters); + break; + case 3906: /* IBM z14 */ ++ case 3907: /* IBM z14 ZR1 */ + ext_set = cpumcf_z14_counters; + ext_set_count = LIBPFM_ARRAY_SIZE(cpumcf_z14_counters); + break; +commit c08003fb085e23f2229e58cc176fcfcf58a3b238 +Author: Thomas Richter +Date: Thu Jul 11 17:49:42 2019 -0700 + + s390/cpumf: add support for counter second version number 6 + + IBM added secound version number 6 in the CPU Measurement + Counter facility to indicate additional counters in the + crypto counter set. + + Signed-off-by: Thomas Richter + +diff --git a/lib/events/s390x_cpumf_events.h b/lib/events/s390x_cpumf_events.h +index 8be1d55..8587a3b 100644 +--- a/lib/events/s390x_cpumf_events.h ++++ b/lib/events/s390x_cpumf_events.h +@@ -13,6 +13,8 @@ + #define CPUMF_CTRSET_EXTENDED 1 + #define CPUMF_CTRSET_MT_DIAG 32 + ++#define CPUMF_SVN6_ECC 4 ++ + + static const pme_cpumf_ctr_t cpumcf_fvn1_counters[] = { + { +@@ -270,6 +272,43 @@ static const pme_cpumf_ctr_t cpumcf_svn_generic_counters[] = { + " coprocessor is busy performing a function issued by" + " another CPU", + }, ++ { ++ .ctrnum = 80, ++ .ctrset = CPUMF_CTRSET_CRYPTO, ++ .name = "ECC_FUNCTION_COUNT", ++ .desc = "This counter counts the" ++ " total number of the elliptic-curve cryptography (ECC)" ++ " functions issued by the CPU.", ++ }, ++ { ++ .ctrnum = 81, ++ .ctrset = CPUMF_CTRSET_CRYPTO, ++ .name = "ECC_CYCLES_COUNT", ++ .desc = "This counter counts the total" ++ " number of CPU cycles when the ECC coprocessor is" ++ " busy performing the elliptic-curve cryptography" ++ " (ECC) functions issued by the CPU.", ++ }, ++ { ++ .ctrnum = 82, ++ .ctrset = CPUMF_CTRSET_CRYPTO, ++ .name = "ECC_BLOCKED_FUNCTION_COUNT", ++ .desc = "This counter counts the total number of the" ++ " elliptic-curve cryptography (ECC) functions that are" ++ " issued by the CPU and are blocked because the ECC" ++ " coprocessor is busy performing a function issued by" ++ " another CPU.", ++ }, ++ { ++ .ctrnum = 83, ++ .ctrset = CPUMF_CTRSET_CRYPTO, ++ .name = "ECC_BLOCKED_CYCLES_COUNT", ++ .desc = "This counter counts the total number of CPU cycles " ++ " blocked for the elliptic-curve cryptography (ECC) " ++ " functions issued by the CPU because the ECC" ++ " coprocessor is busy perform ing a function issued" ++ " by another CPU.", ++ }, + }; + + static const pme_cpumf_ctr_t cpumcf_z10_counters[] = { +diff --git a/lib/pfmlib_s390x_cpumf.c b/lib/pfmlib_s390x_cpumf.c +index 7566aa0..e68b000 100644 +--- a/lib/pfmlib_s390x_cpumf.c ++++ b/lib/pfmlib_s390x_cpumf.c +@@ -168,6 +168,8 @@ static int pfm_cpumcf_init(void *this) + /* counters based on second version number */ + csvn_set = cpumcf_svn_generic_counters; + csvn_set_count = LIBPFM_ARRAY_SIZE(cpumcf_svn_generic_counters); ++ if (csvn < 6) /* Crypto counter set enlarged for SVN == 6 */ ++ csvn_set_count -= CPUMF_SVN6_ECC; + + /* check and assign a machine-specific extended counter set */ + switch (get_machine_type()) { +commit b831b04254ac7fd1700b20b349799810952ff66e +Author: Thomas Richter +Date: Thu Jul 11 17:51:03 2019 -0700 + + s390/cpumf: add support for machine type 8561 + + Add extended counter set support for s390 machine types + 8561 and 8562. + + Signed-off-by: Thomas Richter + +diff --git a/lib/pfmlib_s390x_cpumf.c b/lib/pfmlib_s390x_cpumf.c +index e68b000..c11f9d9 100644 +--- a/lib/pfmlib_s390x_cpumf.c ++++ b/lib/pfmlib_s390x_cpumf.c +@@ -195,6 +195,8 @@ static int pfm_cpumcf_init(void *this) + break; + case 3906: /* IBM z14 */ + case 3907: /* IBM z14 ZR1 */ ++ case 8561: /* IBM Machine types 8561 and 8562 */ ++ case 8562: + ext_set = cpumcf_z14_counters; + ext_set_count = LIBPFM_ARRAY_SIZE(cpumcf_z14_counters); + break; diff --git a/libpfm.spec b/libpfm.spec new file mode 100644 index 0000000..46b5b61 --- /dev/null +++ b/libpfm.spec @@ -0,0 +1,263 @@ +%bcond_without python +%if %{with python} +%define python_sitearch %(python3 -c "from distutils.sysconfig import get_python_lib; print (get_python_lib(1))") +%define python_prefix %(python3 -c "import sys; print (sys.prefix)") +%{?filter_setup: +%filter_provides_in %{python_sitearch}/perfmon/.*\.so$ +%filter_setup +} +%endif + +Name: libpfm +Version: 4.10.1 +Release: 4%{?dist} + +Summary: Library to encode performance events for use by perf tool + +Group: System Environment/Libraries +License: MIT +URL: http://perfmon2.sourceforge.net/ +Source0: http://sourceforge.net/projects/perfmon2/files/libpfm4/%{name}-%{version}.tar.gz +Patch2: libpfm-python3-setup.patch +Patch3: libpfm-zseries.patch +Patch4: libpfm-tx2.patch +Patch5: libpfm-a64fx.patch + +%if %{with python} +BuildRequires: python3 +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: swig +%endif + +%description + +libpfm4 is a library to help encode events for use with operating system +kernels performance monitoring interfaces. The current version provides support +for the perf_events interface available in upstream Linux kernels since v2.6.31. + +%package devel +Summary: Development library to encode performance events for perf_events based tools +Group: Development/Libraries +Requires: %{name}%{?_isa} = %{version}-%{release} + +%description devel +Development library and header files to create performance monitoring +applications for the perf_events interface. + +%package static +Summary: Static library to encode performance events for perf_events based tools +Group: Development/Libraries +Requires: %{name}%{?_isa} = %{version}-%{release} + +%description static +Static version of the libpfm library for performance monitoring +applications for the perf_events interface. + +%if %{with python} +%package -n python3-libpfm +%{?python_provide:%python_provide python3-libpfm} +# Remove before F30 +Provides: %{name}-python = %{version}-%{release} +Provides: %{name}-python%{?_isa} = %{version}-%{release} +Obsoletes: %{name}-python < %{version}-%{release} +Summary: Python bindings for libpfm and perf_event_open system call +Group: Development/Languages +Requires: %{name}%{?_isa} = %{version}-%{release} + +%description -n python3-libpfm +Python bindings for libpfm4 and perf_event_open system call. +%endif + +%prep +%setup -q +%patch2 -p1 -b .python3 +%patch3 -p1 -b .zseries +%patch4 -p1 -b .tx2 +%patch5 -p1 -b .a64fx + +%build +%if %{with python} +%global python_config CONFIG_PFMLIB_NOPYTHON=n +%else +%global python_config CONFIG_PFMLIB_NOPYTHON=y +%endif +make %{python_config} %{?_smp_mflags} \ + OPTIM="%{optflags}" LDFLAGS="%{build_ldflags}" + + +%install +rm -rf $RPM_BUILD_ROOT + +%if %{with python} +%global python_config CONFIG_PFMLIB_NOPYTHON=n PYTHON_PREFIX=$RPM_BUILD_ROOT/%{python_prefix} +%else +%global python_config CONFIG_PFMLIB_NOPYTHON=y +%endif + +make \ + PREFIX=$RPM_BUILD_ROOT%{_prefix} \ + LIBDIR=$RPM_BUILD_ROOT%{_libdir} \ + %{python_config} \ + LDCONFIG=/bin/true \ + install + +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig + +%files +%doc README +%{_libdir}/lib*.so.* + +%files devel +%{_includedir}/* +%{_mandir}/man3/* +%{_libdir}/lib*.so + +%files static +%{_libdir}/lib*.a + +%if %{with python} +%files -n python3-libpfm +%{python_sitearch}/* +%endif + +%changelog +* Thu May 20 2021 William Cohen - 4.10.1-4 +- Add Fujitsu A64FX support (RHBZ #1908126) + +* Wed May 27 2020 William Cohen - 4.10.1-3 +- Add Marvell TunderX2 UNC support. (RHBZ #1726070) + +* Tue Oct 15 2019 William Cohen - 4.10.1-2 +- Add IBM zseries support. (RHBZ #1731019) + +* Fri Jun 15 2018 William Cohen - 4.10.1-1 +- Rebase on libpfm-4.10.1. + +* Tue Jun 12 2018 William Cohen - 4.10.0-2 +- Use upstream libpfm cavium patch. + +* Fri Jun 8 2018 William Cohen - 4.10.0-1 +- Rebase on libpfm-4.10.0. +- Use Python 3. + +* Mon Feb 26 2018 Fedora Release Engineering - 4.9.0-6 +- Pass in LDFLAGS for build. + +* Wed Feb 07 2018 Fedora Release Engineering - 4.9.0-5 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_28_Mass_Rebuild + +* Tue Jan 30 2018 William Cohen - 4.9.0-4 +- Address truncation issues. + +* Tue Jan 30 2018 William Cohen - 4.9.0-3 +- Use the RPM build flags. (RHBZ #1540262) + +* Tue Jan 09 2018 Iryna Shcherbina - 4.9.0-2 +- Update Python 2 dependency declarations to new packaging standards + (See https://fedoraproject.org/wiki/FinalizingFedoraSwitchtoPython3) + +* Thu Jan 4 2018 William Cohen - 4.9.0-1 +- Rebase on libpfm-4.9.0. + +* Sun Aug 20 2017 Zbigniew Jędrzejewski-Szmek - 4.8.0-8 +- Add Provides for the old name without %%_isa + +* Sat Aug 19 2017 Zbigniew Jędrzejewski-Szmek - 4.8.0-7 +- Python 2 binary package renamed to python2-libpfm + See https://fedoraproject.org/wiki/FinalizingFedoraSwitchtoPython3 + +* Thu Aug 03 2017 Fedora Release Engineering - 4.8.0-6 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + +* Wed Jul 26 2017 Fedora Release Engineering - 4.8.0-5 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + +* Fri Jul 07 2017 Igor Gnatenko - 4.8.0-4 +- Rebuild due to bug in RPM (RHBZ #1468476) + +* Fri Feb 10 2017 Fedora Release Engineering - 4.8.0-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_26_Mass_Rebuild + +* Mon Nov 7 2016 William Cohen - 4.8.0-1 +- Rebase on libpfm-4.8.0. + +* Tue Jul 19 2016 Fedora Release Engineering - 4.7.0-2 +- https://fedoraproject.org/wiki/Changes/Automatic_Provides_for_Python_RPM_Packages + +* Thu Feb 11 2016 William Cohen - 4.7.0-1 +- Rebase on libpfm-4.7.0. + +* Thu Feb 04 2016 Fedora Release Engineering - 4.6.0-4 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_24_Mass_Rebuild + +* Wed Jun 17 2015 Fedora Release Engineering - 4.6.0-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild + +* Thu Jun 4 2015 William Cohen - 4.6.0-2 +- Correct requires for subpackages. + +* Thu Mar 5 2015 William Cohen - 4.6.0-1 +- Rebase on libpfm-4.6.0. + +* Wed Feb 11 2015 William Cohen - 4.5.0-6 +- Bump version and rebuild. + +* Sun Aug 17 2014 Fedora Release Engineering - 4.5.0-5 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_22_Mass_Rebuild + +* Mon Jun 30 2014 Toshio Kuratomi - 4.5.0-4 +- Replace python-setuptools-devel BR with python-setuptools + +* Sat Jun 07 2014 Fedora Release Engineering - 4.5.0-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild + +* Fri May 23 2014 William Cohen 4.5.0-2 +- Add cortex a53 support. + +* Fri May 23 2014 William Cohen 4.5.0-1 +- Rebase on libpfm-4.5.0. + +* Sat Aug 03 2013 Fedora Release Engineering - 4.4.0-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_20_Mass_Rebuild + +* Fri Jul 19 2013 William Cohen 4.4.0-2 +- Add IBM power 8 support. + +* Mon Jun 17 2013 William Cohen 4.4.0-1 +- Rebase on libpfm-4.4.0. + +* Thu Feb 14 2013 Fedora Release Engineering - 4.3.0-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild + +* Tue Aug 28 2012 William Cohen 4.3.0-2 +- Turn off LDCONFIG and remove patch. + +* Tue Aug 28 2012 William Cohen 4.3.0-1 +- Rebase on libpfm-4.3.0. + +* Thu Jul 19 2012 Fedora Release Engineering - 4.2.0-8 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild + +* Fri Jun 8 2012 William Cohen 4.2.0-7 +- Eliminate swig error. + +* Thu Jun 7 2012 William Cohen 4.2.0-6 +- Eliminate rpm_build_root macro in build section. +- Correct location of shared library files. + +* Thu Jun 7 2012 William Cohen 4.2.0-5 +- Use siginfo_t for some examples. + +* Mon Jun 4 2012 William Cohen 4.2.0-4 +- Correct python files. + +* Wed Mar 28 2012 William Cohen 4.2.0-3 +- Additional spec file fixup for rhbz804666. + +* Wed Mar 14 2012 William Cohen 4.2.0-2 +- Some spec file fixup. + +* Wed Jan 12 2011 Arun Sharma 4.2.0-0 +Initial revision diff --git a/sources b/sources new file mode 100644 index 0000000..2f0b0d2 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +SHA512 (libpfm-4.10.1.tar.gz) = 33d99824216b4d83784e0db9f1aae3b39f84c2ba42dee64dde0b3c43cc5ee3a18c206d9044e75bf10867768add7cb9967c6318f7aa196f178f334271fa05e1aa