import rasdaemon-0.6.1-7.el8
This commit is contained in:
parent
32e5540096
commit
0059188997
28
SOURCES/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch
Normal file
28
SOURCES/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
commit 28ea956acc2dab7c18b4701f9657afb9ab3ddc79
|
||||||
|
Author: Muralidhara M K <muralimk@amd.com>
|
||||||
|
Date: Mon Jul 12 05:18:43 2021 -0500
|
||||||
|
|
||||||
|
rasdaemon: set SMCA maximum number of banks to 64
|
||||||
|
|
||||||
|
Newer AMD systems with SMCA banks support up to 64 MCA banks per CPU.
|
||||||
|
|
||||||
|
This patch is based on the commit below upstremed into the kernel:
|
||||||
|
a0bc32b3cacf ("x86/mce: Increase maximum number of banks to 64")
|
||||||
|
|
||||||
|
Signed-off-by: Muralidhara M K <muralimk@amd.com>
|
||||||
|
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
|
||||||
|
|
||||||
|
diff --git a/mce-amd-smca.c b/mce-amd-smca.c
|
||||||
|
index e0cf512..3c346f4 100644
|
||||||
|
--- a/mce-amd-smca.c
|
||||||
|
+++ b/mce-amd-smca.c
|
||||||
|
@@ -75,6 +75,9 @@ enum smca_bank_types {
|
||||||
|
N_SMCA_BANK_TYPES
|
||||||
|
};
|
||||||
|
|
||||||
|
+/* Maximum number of MCA banks per CPU. */
|
||||||
|
+#define MAX_NR_BANKS 64
|
||||||
|
+
|
||||||
|
/* SMCA Extended error strings */
|
||||||
|
/* Load Store */
|
||||||
|
static const char * const smca_ls_mce_desc[] = {
|
24
SOURCES/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch
Normal file
24
SOURCES/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
commit 7937f0d6c2aaaed096f3a3d306416743c0dcb7a4
|
||||||
|
Author: Muralidhara M K <muralimk@amd.com>
|
||||||
|
Date: Wed Jul 28 01:52:12 2021 -0500
|
||||||
|
|
||||||
|
rasdaemon: Support MCE for AMD CPU family 19h
|
||||||
|
|
||||||
|
Add support for family 19h x86 CPUs from AMD.
|
||||||
|
|
||||||
|
Signed-off-by: Muralidhara M K <muralimk@amd.com>
|
||||||
|
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
|
||||||
|
|
||||||
|
diff --git a/ras-mce-handler.c b/ras-mce-handler.c
|
||||||
|
index 805004a..f2b53d4 100644
|
||||||
|
--- a/ras-mce-handler.c
|
||||||
|
+++ b/ras-mce-handler.c
|
||||||
|
@@ -208,7 +208,7 @@ static int detect_cpu(struct ras_events *ras)
|
||||||
|
mce->cputype = CPU_AMD_SMCA;
|
||||||
|
goto ret;
|
||||||
|
}
|
||||||
|
- if (mce->family > 23) {
|
||||||
|
+ if (mce->family > 25) {
|
||||||
|
log(ALL, LOG_INFO,
|
||||||
|
"Can't parse MCE for this AMD CPU yet %d\n",
|
||||||
|
mce->family);
|
230
SOURCES/9acef39f13833f7d53ef96abc5a72e79384260f4.patch
Normal file
230
SOURCES/9acef39f13833f7d53ef96abc5a72e79384260f4.patch
Normal file
@ -0,0 +1,230 @@
|
|||||||
|
commit 9acef39f13833f7d53ef96abc5a72e79384260f4
|
||||||
|
Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
|
||||||
|
Date: Tue Jun 1 11:01:17 2021 +0530
|
||||||
|
|
||||||
|
rasdaemon: Add new SMCA bank types with error decoding
|
||||||
|
|
||||||
|
Upcoming systems with Scalable Machine Check Architecture (SMCA) have
|
||||||
|
new MCA banks added.
|
||||||
|
|
||||||
|
This patch adds the (HWID, MCATYPE) tuple, name and error decoding for
|
||||||
|
those new SMCA banks.
|
||||||
|
While at it, optimize the string names in smca_bank_name[].
|
||||||
|
|
||||||
|
Signed-off-by: Muralidhara M K <muralimk@amd.com>
|
||||||
|
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
|
||||||
|
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
|
||||||
|
|
||||||
|
diff --git a/mce-amd-smca.c b/mce-amd-smca.c
|
||||||
|
index 7c619fd..e0cf512 100644
|
||||||
|
--- a/mce-amd-smca.c
|
||||||
|
+++ b/mce-amd-smca.c
|
||||||
|
@@ -47,7 +47,7 @@
|
||||||
|
/* These may be used by multiple smca_hwid_mcatypes */
|
||||||
|
enum smca_bank_types {
|
||||||
|
SMCA_LS = 0, /* Load Store */
|
||||||
|
- SMCA_LS_V2, /* Load Store */
|
||||||
|
+ SMCA_LS_V2,
|
||||||
|
SMCA_IF, /* Instruction Fetch */
|
||||||
|
SMCA_L2_CACHE, /* L2 Cache */
|
||||||
|
SMCA_DE, /* Decoder Unit */
|
||||||
|
@@ -56,17 +56,22 @@ enum smca_bank_types {
|
||||||
|
SMCA_FP, /* Floating Point */
|
||||||
|
SMCA_L3_CACHE, /* L3 Cache */
|
||||||
|
SMCA_CS, /* Coherent Slave */
|
||||||
|
- SMCA_CS_V2, /* Coherent Slave V2 */
|
||||||
|
+ SMCA_CS_V2,
|
||||||
|
SMCA_PIE, /* Power, Interrupts, etc. */
|
||||||
|
SMCA_UMC, /* Unified Memory Controller */
|
||||||
|
+ SMCA_UMC_V2,
|
||||||
|
SMCA_PB, /* Parameter Block */
|
||||||
|
SMCA_PSP, /* Platform Security Processor */
|
||||||
|
- SMCA_PSP_V2, /* Platform Security Processor V2 */
|
||||||
|
+ SMCA_PSP_V2,
|
||||||
|
SMCA_SMU, /* System Management Unit */
|
||||||
|
- SMCA_SMU_V2, /* System Management Unit V2 */
|
||||||
|
+ SMCA_SMU_V2,
|
||||||
|
SMCA_MP5, /* Microprocessor 5 Unit */
|
||||||
|
SMCA_NBIO, /* Northbridge IO Unit */
|
||||||
|
SMCA_PCIE, /* PCI Express Unit */
|
||||||
|
+ SMCA_PCIE_V2,
|
||||||
|
+ SMCA_XGMI_PCS, /* xGMI PCS Unit */
|
||||||
|
+ SMCA_XGMI_PHY, /* xGMI PHY Unit */
|
||||||
|
+ SMCA_WAFL_PHY, /* WAFL PHY Unit */
|
||||||
|
N_SMCA_BANK_TYPES
|
||||||
|
};
|
||||||
|
|
||||||
|
@@ -237,6 +242,22 @@ static const char * const smca_umc_mce_desc[] = {
|
||||||
|
"Command/address parity error",
|
||||||
|
"Write data CRC error",
|
||||||
|
};
|
||||||
|
+
|
||||||
|
+static const char * const smca_umc2_mce_desc[] = {
|
||||||
|
+ "DRAM ECC error",
|
||||||
|
+ "Data poison error",
|
||||||
|
+ "SDP parity error",
|
||||||
|
+ "Reserved",
|
||||||
|
+ "Address/Command parity error",
|
||||||
|
+ "Write data parity error",
|
||||||
|
+ "DCQ SRAM ECC error",
|
||||||
|
+ "Reserved",
|
||||||
|
+ "Read data parity error",
|
||||||
|
+ "Rdb SRAM ECC error",
|
||||||
|
+ "RdRsp SRAM ECC error",
|
||||||
|
+ "LM32 MP errors",
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
/* Parameter Block */
|
||||||
|
static const char * const smca_pb_mce_desc[] = {
|
||||||
|
"Parameter Block RAM ECC error",
|
||||||
|
@@ -314,6 +335,55 @@ static const char * const smca_pcie_mce_desc[] = {
|
||||||
|
"CCIX Non-okay write response with data error",
|
||||||
|
};
|
||||||
|
|
||||||
|
+static const char * const smca_pcie2_mce_desc[] = {
|
||||||
|
+ "SDP Parity Error logging",
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static const char * const smca_xgmipcs_mce_desc[] = {
|
||||||
|
+ "Data Loss Error",
|
||||||
|
+ "Training Error",
|
||||||
|
+ "Flow Control Acknowledge Error",
|
||||||
|
+ "Rx Fifo Underflow Error",
|
||||||
|
+ "Rx Fifo Overflow Error",
|
||||||
|
+ "CRC Error",
|
||||||
|
+ "BER Exceeded Error",
|
||||||
|
+ "Tx Vcid Data Error",
|
||||||
|
+ "Replay Buffer Parity Error",
|
||||||
|
+ "Data Parity Error",
|
||||||
|
+ "Replay Fifo Overflow Error",
|
||||||
|
+ "Replay Fifo Underflow Error",
|
||||||
|
+ "Elastic Fifo Overflow Error",
|
||||||
|
+ "Deskew Error",
|
||||||
|
+ "Flow Control CRC Error",
|
||||||
|
+ "Data Startup Limit Error",
|
||||||
|
+ "FC Init Timeout Error",
|
||||||
|
+ "Recovery Timeout Error",
|
||||||
|
+ "Ready Serial Timeout Error",
|
||||||
|
+ "Ready Serial Attempt Error",
|
||||||
|
+ "Recovery Attempt Error",
|
||||||
|
+ "Recovery Relock Attempt Error",
|
||||||
|
+ "Replay Attempt Error",
|
||||||
|
+ "Sync Header Error",
|
||||||
|
+ "Tx Replay Timeout Error",
|
||||||
|
+ "Rx Replay Timeout Error",
|
||||||
|
+ "LinkSub Tx Timeout Error",
|
||||||
|
+ "LinkSub Rx Timeout Error",
|
||||||
|
+ "Rx CMD Pocket Error",
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static const char * const smca_xgmiphy_mce_desc[] = {
|
||||||
|
+ "RAM ECC Error",
|
||||||
|
+ "ARC instruction buffer parity error",
|
||||||
|
+ "ARC data buffer parity error",
|
||||||
|
+ "PHY APB error",
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static const char * const smca_waflphy_mce_desc[] = {
|
||||||
|
+ "RAM ECC Error",
|
||||||
|
+ "ARC instruction buffer parity error",
|
||||||
|
+ "ARC data buffer parity error",
|
||||||
|
+ "PHY APB error",
|
||||||
|
+};
|
||||||
|
|
||||||
|
struct smca_mce_desc {
|
||||||
|
const char * const *descs;
|
||||||
|
@@ -333,6 +403,7 @@ static struct smca_mce_desc smca_mce_descs[] = {
|
||||||
|
[SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
|
||||||
|
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
|
||||||
|
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
|
||||||
|
+ [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) },
|
||||||
|
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
|
||||||
|
[SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
|
||||||
|
[SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)},
|
||||||
|
@@ -341,6 +412,10 @@ static struct smca_mce_desc smca_mce_descs[] = {
|
||||||
|
[SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
|
||||||
|
[SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)},
|
||||||
|
[SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)},
|
||||||
|
+ [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) },
|
||||||
|
+ [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) },
|
||||||
|
+ [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
|
||||||
|
+ [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) },
|
||||||
|
};
|
||||||
|
|
||||||
|
struct smca_hwid {
|
||||||
|
@@ -369,6 +444,8 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
|
||||||
|
|
||||||
|
/* Unified Memory Controller MCA type */
|
||||||
|
{ SMCA_UMC, 0x00000096 },
|
||||||
|
+ /* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */
|
||||||
|
+ { SMCA_UMC_V2, 0x00010096 },
|
||||||
|
|
||||||
|
/* Parameter Block MCA type */
|
||||||
|
{ SMCA_PB, 0x00000005 },
|
||||||
|
@@ -389,6 +466,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
|
||||||
|
|
||||||
|
/* PCI Express Unit MCA type */
|
||||||
|
{ SMCA_PCIE, 0x00000046 },
|
||||||
|
+ { SMCA_PCIE_V2, 0x00010046 },
|
||||||
|
+
|
||||||
|
+ /* Ext Global Memory Interconnect PCS MCA type */
|
||||||
|
+ { SMCA_XGMI_PCS, 0x00000050 },
|
||||||
|
+
|
||||||
|
+ /* Ext Global Memory Interconnect PHY MCA type */
|
||||||
|
+ { SMCA_XGMI_PHY, 0x00000259 },
|
||||||
|
+
|
||||||
|
+ /* WAFL PHY MCA type */
|
||||||
|
+ { SMCA_WAFL_PHY, 0x00000267 },
|
||||||
|
};
|
||||||
|
|
||||||
|
struct smca_bank_name {
|
||||||
|
@@ -396,27 +483,28 @@ struct smca_bank_name {
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct smca_bank_name smca_names[] = {
|
||||||
|
- [SMCA_LS] = { "Load Store Unit" },
|
||||||
|
- [SMCA_LS_V2] = { "Load Store Unit" },
|
||||||
|
- [SMCA_IF] = { "Instruction Fetch Unit" },
|
||||||
|
- [SMCA_L2_CACHE] = { "L2 Cache" },
|
||||||
|
- [SMCA_DE] = { "Decode Unit" },
|
||||||
|
- [SMCA_RESERVED] = { "Reserved" },
|
||||||
|
- [SMCA_EX] = { "Execution Unit" },
|
||||||
|
- [SMCA_FP] = { "Floating Point Unit" },
|
||||||
|
- [SMCA_L3_CACHE] = { "L3 Cache" },
|
||||||
|
- [SMCA_CS] = { "Coherent Slave" },
|
||||||
|
- [SMCA_CS_V2] = { "Coherent Slave" },
|
||||||
|
- [SMCA_PIE] = { "Power, Interrupts, etc." },
|
||||||
|
- [SMCA_UMC] = { "Unified Memory Controller" },
|
||||||
|
- [SMCA_PB] = { "Parameter Block" },
|
||||||
|
- [SMCA_PSP] = { "Platform Security Processor" },
|
||||||
|
- [SMCA_PSP_V2] = { "Platform Security Processor" },
|
||||||
|
- [SMCA_SMU] = { "System Management Unit" },
|
||||||
|
- [SMCA_SMU_V2] = { "System Management Unit" },
|
||||||
|
- [SMCA_MP5] = { "Microprocessor 5 Unit" },
|
||||||
|
- [SMCA_NBIO] = { "Northbridge IO Unit" },
|
||||||
|
- [SMCA_PCIE] = { "PCI Express Unit" },
|
||||||
|
+ [SMCA_LS ... SMCA_LS_V2] = { "Load Store Unit" },
|
||||||
|
+ [SMCA_IF] = { "Instruction Fetch Unit" },
|
||||||
|
+ [SMCA_L2_CACHE] = { "L2 Cache" },
|
||||||
|
+ [SMCA_DE] = { "Decode Unit" },
|
||||||
|
+ [SMCA_RESERVED] = { "Reserved" },
|
||||||
|
+ [SMCA_EX] = { "Execution Unit" },
|
||||||
|
+ [SMCA_FP] = { "Floating Point Unit" },
|
||||||
|
+ [SMCA_L3_CACHE] = { "L3 Cache" },
|
||||||
|
+ [SMCA_CS ... SMCA_CS_V2] = { "Coherent Slave" },
|
||||||
|
+ [SMCA_PIE] = { "Power, Interrupts, etc." },
|
||||||
|
+ [SMCA_UMC] = { "Unified Memory Controller" },
|
||||||
|
+ [SMCA_UMC_V2] = { "Unified Memory Controller V2" },
|
||||||
|
+ [SMCA_PB] = { "Parameter Block" },
|
||||||
|
+ [SMCA_PSP ... SMCA_PSP_V2] = { "Platform Security Processor" },
|
||||||
|
+ [SMCA_SMU ... SMCA_SMU_V2] = { "System Management Unit" },
|
||||||
|
+ [SMCA_MP5] = { "Microprocessor 5 Unit" },
|
||||||
|
+ [SMCA_NBIO] = { "Northbridge IO Unit" },
|
||||||
|
+ [SMCA_PCIE ... SMCA_PCIE_V2] = { "PCI Express Unit" },
|
||||||
|
+ [SMCA_XGMI_PCS] = { "Ext Global Memory Interconnect PCS Unit" },
|
||||||
|
+ [SMCA_XGMI_PHY] = { "Ext Global Memory Interconnect PHY Unit" },
|
||||||
|
+ [SMCA_WAFL_PHY] = { "WAFL PHY Unit" },
|
||||||
|
+
|
||||||
|
};
|
||||||
|
|
||||||
|
static void amd_decode_errcode(struct mce_event *e)
|
107
SOURCES/aecf33aa70331670c06db6b652712b476e24051c.patch
Normal file
107
SOURCES/aecf33aa70331670c06db6b652712b476e24051c.patch
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
commit aecf33aa70331670c06db6b652712b476e24051c
|
||||||
|
Author: Muralidhara M K <muralimk@amd.com>
|
||||||
|
Date: Mon Jul 12 05:40:46 2021 -0500
|
||||||
|
|
||||||
|
rasdaemon: Enumerate memory on noncpu nodes
|
||||||
|
|
||||||
|
On newer heterogeneous systems from AMD with GPU nodes (with HBM2 memory
|
||||||
|
banks) connected via xGMI links to the CPUs.
|
||||||
|
|
||||||
|
The node id information is available in the InstanceHI[47:44] of
|
||||||
|
the IPID register.
|
||||||
|
|
||||||
|
The UMC Phys on Aldeberan nodes are enumerated as csrow
|
||||||
|
The UMC channels connected to HBMs are enumerated as ranks.
|
||||||
|
|
||||||
|
Signed-off-by: Muralidhara M K <muralimk@amd.com>
|
||||||
|
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
|
||||||
|
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
|
||||||
|
|
||||||
|
diff --git a/mce-amd-smca.c b/mce-amd-smca.c
|
||||||
|
index 3c346f4..f3379fc 100644
|
||||||
|
--- a/mce-amd-smca.c
|
||||||
|
+++ b/mce-amd-smca.c
|
||||||
|
@@ -78,6 +78,12 @@ enum smca_bank_types {
|
||||||
|
/* Maximum number of MCA banks per CPU. */
|
||||||
|
#define MAX_NR_BANKS 64
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ * On Newer heterogeneous systems from AMD with CPU and GPU nodes connected
|
||||||
|
+ * via xGMI links, the NON CPU Nodes are enumerated from index 8
|
||||||
|
+ */
|
||||||
|
+#define NONCPU_NODE_INDEX 8
|
||||||
|
+
|
||||||
|
/* SMCA Extended error strings */
|
||||||
|
/* Load Store */
|
||||||
|
static const char * const smca_ls_mce_desc[] = {
|
||||||
|
@@ -531,6 +537,26 @@ static int find_umc_channel(struct mce_event *e)
|
||||||
|
{
|
||||||
|
return EXTRACT(e->ipid, 0, 31) >> 20;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * The HBM memory managed by the UMCCH of the noncpu node
|
||||||
|
+ * can be calculated based on the [15:12]bits of IPID
|
||||||
|
+ */
|
||||||
|
+static int find_hbm_channel(struct mce_event *e)
|
||||||
|
+{
|
||||||
|
+ int umc, tmp;
|
||||||
|
+
|
||||||
|
+ umc = EXTRACT(e->ipid, 0, 31) >> 20;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * The HBM channel managed by the UMC of the noncpu node
|
||||||
|
+ * can be calculated based on the [15:12]bits of IPID as follows
|
||||||
|
+ */
|
||||||
|
+ tmp = ((e->ipid >> 12) & 0xf);
|
||||||
|
+
|
||||||
|
+ return (umc % 2) ? tmp + 4 : tmp;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Decode extended errors according to Scalable MCA specification */
|
||||||
|
static void decode_smca_error(struct mce_event *e)
|
||||||
|
{
|
||||||
|
@@ -539,6 +565,7 @@ static void decode_smca_error(struct mce_event *e)
|
||||||
|
unsigned short xec = (e->status >> 16) & 0x3f;
|
||||||
|
const struct smca_hwid *s_hwid;
|
||||||
|
uint32_t mcatype_hwid = EXTRACT(e->ipid, 32, 63);
|
||||||
|
+ uint8_t mcatype_instancehi = EXTRACT(e->ipid, 44, 47);
|
||||||
|
unsigned int csrow = -1, channel = -1;
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
@@ -548,14 +575,16 @@ static void decode_smca_error(struct mce_event *e)
|
||||||
|
bank_type = s_hwid->bank_type;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
+ if (mcatype_instancehi >= NONCPU_NODE_INDEX)
|
||||||
|
+ bank_type = SMCA_UMC_V2;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (i >= ARRAY_SIZE(smca_hwid_mcatypes)) {
|
||||||
|
+ if (i >= MAX_NR_BANKS) {
|
||||||
|
strcpy(e->mcastatus_msg, "Couldn't find bank type with IPID");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (bank_type >= N_SMCA_BANK_TYPES) {
|
||||||
|
+ if (bank_type >= MAX_NR_BANKS) {
|
||||||
|
strcpy(e->mcastatus_msg, "Don't know how to decode this bank");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
@@ -580,6 +609,16 @@ static void decode_smca_error(struct mce_event *e)
|
||||||
|
mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d",
|
||||||
|
channel, csrow);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ if (bank_type == SMCA_UMC_V2 && xec == 0) {
|
||||||
|
+ /* The UMCPHY is reported as csrow in case of noncpu nodes */
|
||||||
|
+ csrow = find_umc_channel(e) / 2;
|
||||||
|
+ /* UMCCH is managing the HBM memory */
|
||||||
|
+ channel = find_hbm_channel(e);
|
||||||
|
+ mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d",
|
||||||
|
+ channel, csrow);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
}
|
||||||
|
|
||||||
|
int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e)
|
@ -0,0 +1,47 @@
|
|||||||
|
From: Muralidhara M K <muralimk@amd.com>
|
||||||
|
|
||||||
|
This patch removes trailing spaces at the end of a line from
|
||||||
|
file location and fixes --layout option to parse dimm nodes
|
||||||
|
to get the size from ras-mc-ctl.
|
||||||
|
|
||||||
|
Issue is reported https://github.com/mchehab/rasdaemon/issues/43
|
||||||
|
Where '> ras-mc-ctl --layout' reports all 0s
|
||||||
|
|
||||||
|
With this change the layout prints the correct dimm sizes
|
||||||
|
> sudo ras-mc-ctl --layout
|
||||||
|
+-----------------------------------------------+
|
||||||
|
| mc0 |
|
||||||
|
| csrow0 | csrow1 | csrow2 | csrow3 |
|
||||||
|
----------+-----------------------------------------------+
|
||||||
|
...
|
||||||
|
channel7: | 16384 MB | 0 MB | 0 MB | 0 MB |
|
||||||
|
channel6: | 16384 MB | 0 MB | 0 MB | 0 MB |
|
||||||
|
...
|
||||||
|
----------+-----------------------------------------------+
|
||||||
|
|
||||||
|
Signed-off-by: Muralidhara M K <muralimk@amd.com>
|
||||||
|
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
|
||||||
|
---
|
||||||
|
util/ras-mc-ctl.in | 2 ++
|
||||||
|
1 file changed, 2 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
|
||||||
|
index 1e3aeb7..b22dd60 100755
|
||||||
|
--- a/util/ras-mc-ctl.in
|
||||||
|
+++ b/util/ras-mc-ctl.in
|
||||||
|
@@ -246,6 +246,7 @@ sub parse_dimm_nodes
|
||||||
|
if (($file =~ /max_location$/)) {
|
||||||
|
open IN, $file;
|
||||||
|
my $location = <IN>;
|
||||||
|
+ $location =~ s/\s+$//;
|
||||||
|
close IN;
|
||||||
|
my @temp = split(/ /, $location);
|
||||||
|
|
||||||
|
@@ -288,6 +289,7 @@ sub parse_dimm_nodes
|
||||||
|
|
||||||
|
open IN, $file;
|
||||||
|
my $location = <IN>;
|
||||||
|
+ $location =~ s/\s+$//;
|
||||||
|
close IN;
|
||||||
|
|
||||||
|
my @pos;
|
@ -1,6 +1,6 @@
|
|||||||
Name: rasdaemon
|
Name: rasdaemon
|
||||||
Version: 0.6.1
|
Version: 0.6.1
|
||||||
Release: 5.1%{?dist}
|
Release: 7%{?dist}
|
||||||
Summary: Utility to receive RAS error tracings
|
Summary: Utility to receive RAS error tracings
|
||||||
Group: Applications/System
|
Group: Applications/System
|
||||||
License: GPLv2
|
License: GPLv2
|
||||||
@ -31,6 +31,11 @@ Patch5: 2a1d217660351c08eb2f8bccebf939abba2f7e69.patch
|
|||||||
Patch6: 8704a85d8dc3483423ec2934fee8132f85f8fdb6.patch
|
Patch6: 8704a85d8dc3483423ec2934fee8132f85f8fdb6.patch
|
||||||
Patch7: cc2ce5c65ed5a42eaa97aa3659854add6d808da5.patch
|
Patch7: cc2ce5c65ed5a42eaa97aa3659854add6d808da5.patch
|
||||||
Patch8: 854364ba44aee9bc5646f6537fc744b0b54aff37.patch
|
Patch8: 854364ba44aee9bc5646f6537fc744b0b54aff37.patch
|
||||||
|
Patch9: 9acef39f13833f7d53ef96abc5a72e79384260f4.patch
|
||||||
|
Patch10: 28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch
|
||||||
|
Patch11: aecf33aa70331670c06db6b652712b476e24051c.patch
|
||||||
|
Patch12: 7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch
|
||||||
|
Patch13: rasdaemon-ras-mc-ctl-Fix-script-to-parse-dimm-sizes.patch
|
||||||
|
|
||||||
%description
|
%description
|
||||||
%{name} is a RAS (Reliability, Availability and Serviceability) logging tool.
|
%{name} is a RAS (Reliability, Availability and Serviceability) logging tool.
|
||||||
@ -52,6 +57,11 @@ an utility for reporting current error counts from the EDAC sysfs files.
|
|||||||
%patch6 -p1
|
%patch6 -p1
|
||||||
%patch7 -p1
|
%patch7 -p1
|
||||||
%patch8 -p1
|
%patch8 -p1
|
||||||
|
%patch9 -p1
|
||||||
|
%patch10 -p1
|
||||||
|
%patch11 -p1
|
||||||
|
%patch12 -p1
|
||||||
|
%patch13 -p1
|
||||||
|
|
||||||
%build
|
%build
|
||||||
%ifarch %{arm} aarch64
|
%ifarch %{arm} aarch64
|
||||||
@ -78,8 +88,11 @@ rm INSTALL %{buildroot}/usr/include/*.h
|
|||||||
%{_sysconfdir}/ras/dimm_labels.d
|
%{_sysconfdir}/ras/dimm_labels.d
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
* Wed May 26 2021 Aristeu Rozanski <aris@redhat.com> 0.6.1-5.1
|
* Thu Aug 26 2021 Aristeu Rozanski <aris@redhat.com> 0.6.1-7
|
||||||
- Add support for AMD SMCA [1975506]
|
- Add support for AMD SMCA banks for family 19 [1991955]
|
||||||
|
|
||||||
|
* Wed May 26 2021 Aristeu Rozanski <aris@redhat.com> 0.6.1-6
|
||||||
|
- Add support for AMD SMCA [1965011]
|
||||||
|
|
||||||
* Wed Apr 08 2020 Aristeu Rozanski <aris@redhat.com> 0.6.1-5
|
* Wed Apr 08 2020 Aristeu Rozanski <aris@redhat.com> 0.6.1-5
|
||||||
- Fix high CPU usage when CPUs are offline [1683420]
|
- Fix high CPU usage when CPUs are offline [1683420]
|
||||||
|
Loading…
Reference in New Issue
Block a user