Add support for vendor specific information
Resolves: RHEL-68673 Signed-off-by: Aristeu Rozanski <arozansk@redhat.com>
This commit is contained in:
parent
09ea5ccc0c
commit
0184702bc0
95
83a3ced797256dcb1c93f8de4266fd7545fbfb3b.patch
Normal file
95
83a3ced797256dcb1c93f8de4266fd7545fbfb3b.patch
Normal file
@ -0,0 +1,95 @@
|
||||
commit 83a3ced797256dcb1c93f8de4266fd7545fbfb3b
|
||||
Author: Avadhut Naik <avadnaik@amd.com>
|
||||
Date: Tue Nov 21 14:04:19 2023 -0600
|
||||
|
||||
rasdaemon: Add support for vendor-specific machine check error information
|
||||
|
||||
Some CPU vendors may provide additional vendor-specific machine check
|
||||
error information. AMD, for example, provides FRU Text through SYND 1/2
|
||||
registers if BIT 9 of SMCA_CONFIG register is set.
|
||||
|
||||
Add support to display the additional vendor-specific error information,
|
||||
if any.
|
||||
|
||||
Signed-off-by: Avadhut Naik <Avadhut.Naik@amd.com>
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
|
||||
|
||||
---
|
||||
mce-amd-smca.c | 12 ++++++++++++
|
||||
ras-mce-handler.c | 22 ++++++++++++++++++++++
|
||||
ras-mce-handler.h | 3 +++
|
||||
3 files changed, 37 insertions(+)
|
||||
|
||||
--- rasdaemon-0.6.7.orig/mce-amd-smca.c 2024-11-27 10:18:13.765255836 -0500
|
||||
+++ rasdaemon-0.6.7/mce-amd-smca.c 2024-11-27 10:18:23.014169756 -0500
|
||||
@@ -999,6 +999,18 @@ if (bank_type == SMCA_UMC_V2 && xec == 0
|
||||
channel, csrow);
|
||||
}
|
||||
|
||||
+
|
||||
+ if (e->vdata_len) {
|
||||
+ uint64_t smca_config = e->vdata[2];
|
||||
+
|
||||
+ /*
|
||||
+ * BIT 9 of the CONFIG register of a few SMCA Bank types indicates
|
||||
+ * presence of FRU Text in SYND 1 / 2 registers
|
||||
+ */
|
||||
+ if (smca_config & BIT(9))
|
||||
+ memcpy(e->frutext, e->vdata, 16);
|
||||
+ }
|
||||
+
|
||||
}
|
||||
|
||||
int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e)
|
||||
--- rasdaemon-0.6.7.orig/ras-mce-handler.c 2024-11-27 10:18:23.014169756 -0500
|
||||
+++ rasdaemon-0.6.7/ras-mce-handler.c 2024-11-27 10:19:38.849463954 -0500
|
||||
@@ -375,6 +375,25 @@ #if 0
|
||||
if (e->microcode)
|
||||
trace_seq_printf(s, ", microcode= %x", e->microcode);
|
||||
|
||||
+ if (!e->vdata_len)
|
||||
+ return;
|
||||
+
|
||||
+ if (strlen(e->frutext)) {
|
||||
+ trace_seq_printf(s, ", FRU Text= %s", e->frutext);
|
||||
+ trace_seq_printf(s, ", Vendor Data= ");
|
||||
+ for (int i = 2; i < e->vdata_len/8; i++) {
|
||||
+ trace_seq_printf(s, "0x%lx", e->vdata[i]);
|
||||
+ trace_seq_printf(s, " ");
|
||||
+ }
|
||||
+ } else {
|
||||
+ trace_seq_printf(s, ", Vendor Data= ");
|
||||
+ for (int i = 0; i < e->vdata_len/8; i ++) {
|
||||
+ trace_seq_printf(s, "0x%lx", e->vdata[i]);
|
||||
+ trace_seq_printf(s, " ");
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+
|
||||
/*
|
||||
* FIXME: The original mcelog userspace tool uses DMI to map from
|
||||
* address to DIMM. From the comments there, the code there doesn't
|
||||
@@ -559,6 +578,9 @@ if (pevent_get_field_val(s, event, "ipid
|
||||
if (!pevent_get_field_val(s, event, "microcode", record, &val, 1))
|
||||
e.microcode = val;
|
||||
|
||||
+ /* Get Vendor-specfic Data, if any */
|
||||
+ e.vdata = pevent_get_field_raw(s, event, "v_data", record, &e.vdata_len, 1);
|
||||
+
|
||||
switch (mce->cputype) {
|
||||
case CPU_GENERIC:
|
||||
break;
|
||||
--- rasdaemon-0.6.7.orig/ras-mce-handler.h 2024-11-27 10:18:23.014169756 -0500
|
||||
+++ rasdaemon-0.6.7/ras-mce-handler.h 2024-11-27 10:20:05.249218250 -0500
|
||||
@@ -76,8 +76,11 @@ struct mce_event {
|
||||
uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */
|
||||
uint64_t ppin;
|
||||
uint32_t microcode;
|
||||
+ int32_t vdata_len;
|
||||
+ const uint64_t *vdata;
|
||||
|
||||
/* Parsed data */
|
||||
+ char frutext[17];
|
||||
char timestamp[64];
|
||||
char bank_name[64];
|
||||
char error_msg[4096];
|
75
8b536321cc0679fb82d4ea7521f9375d88cec0cc.patch
Normal file
75
8b536321cc0679fb82d4ea7521f9375d88cec0cc.patch
Normal file
@ -0,0 +1,75 @@
|
||||
commit 8b536321cc0679fb82d4ea7521f9375d88cec0cc
|
||||
Author: Avadhut Naik <avadhut.naik@amd.com>
|
||||
Date: Thu Nov 7 06:24:44 2024 +0000
|
||||
|
||||
rasdaemon: Modify support for vendor-specific machine check error information
|
||||
|
||||
Commit 83a3ced797256d ("rasdaemon: Add support for vendor-specific
|
||||
machine check error information") assumes that MCA_CONFIG MSR will be
|
||||
exported as part of vendor-specific error information through the MCE
|
||||
tracepoint.
|
||||
|
||||
The same, however, is not true anymore. MCA_CONFIG MSR will not be
|
||||
exported through the MCE tracepoint. Instead, the data from MCA_SYND1/2
|
||||
MSRs, exported as vendor-specific error information on newer AMD SOCs,
|
||||
should always be interpreted as FRUText.
|
||||
|
||||
Modify the error decoding support accordingly.
|
||||
|
||||
Fixes: 83a3ced797256d ("rasdaemon: Add support for vendor-specific
|
||||
machine check error information")
|
||||
Signed-off-by: Avadhut Naik <avadhut.naik@amd.com>
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
|
||||
|
||||
---
|
||||
mce-amd-smca.c | 13 ++-----------
|
||||
ras-mce-handler.c | 15 +--------------
|
||||
2 files changed, 3 insertions(+), 25 deletions(-)
|
||||
|
||||
--- rasdaemon-0.6.7.orig/mce-amd-smca.c 2024-11-27 10:20:29.777989960 -0500
|
||||
+++ rasdaemon-0.6.7/mce-amd-smca.c 2024-11-27 10:21:28.731441278 -0500
|
||||
@@ -1000,17 +1000,8 @@ if (bank_type == SMCA_UMC_V2 && xec == 0
|
||||
}
|
||||
|
||||
|
||||
- if (e->vdata_len) {
|
||||
- uint64_t smca_config = e->vdata[2];
|
||||
-
|
||||
- /*
|
||||
- * BIT 9 of the CONFIG register of a few SMCA Bank types indicates
|
||||
- * presence of FRU Text in SYND 1 / 2 registers
|
||||
- */
|
||||
- if (smca_config & BIT(9))
|
||||
- memcpy(e->frutext, e->vdata, 16);
|
||||
- }
|
||||
-
|
||||
+ if (e->vdata_len)
|
||||
+ memcpy(e->frutext, e->vdata, 16);
|
||||
}
|
||||
|
||||
int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e)
|
||||
--- rasdaemon-0.6.7.orig/ras-mce-handler.c 2024-11-27 10:20:29.777989960 -0500
|
||||
+++ rasdaemon-0.6.7/ras-mce-handler.c 2024-11-27 10:21:01.517694557 -0500
|
||||
@@ -378,21 +378,8 @@ #if 0
|
||||
if (!e->vdata_len)
|
||||
return;
|
||||
|
||||
- if (strlen(e->frutext)) {
|
||||
+ if (strlen(e->frutext))
|
||||
trace_seq_printf(s, ", FRU Text= %s", e->frutext);
|
||||
- trace_seq_printf(s, ", Vendor Data= ");
|
||||
- for (int i = 2; i < e->vdata_len/8; i++) {
|
||||
- trace_seq_printf(s, "0x%lx", e->vdata[i]);
|
||||
- trace_seq_printf(s, " ");
|
||||
- }
|
||||
- } else {
|
||||
- trace_seq_printf(s, ", Vendor Data= ");
|
||||
- for (int i = 0; i < e->vdata_len/8; i ++) {
|
||||
- trace_seq_printf(s, "0x%lx", e->vdata[i]);
|
||||
- trace_seq_printf(s, " ");
|
||||
- }
|
||||
- }
|
||||
-
|
||||
|
||||
/*
|
||||
* FIXME: The original mcelog userspace tool uses DMI to map from
|
@ -1,6 +1,6 @@
|
||||
Name: rasdaemon
|
||||
Version: 0.6.7
|
||||
Release: 17%{?dist}
|
||||
Release: 18%{?dist}
|
||||
Summary: Utility to receive RAS error tracings
|
||||
License: GPL-2.0-only
|
||||
URL: http://git.infradead.org/users/mchehab/rasdaemon.git
|
||||
@ -44,6 +44,8 @@ Patch35: b1ace39286e287282a275b6edc90dc2f64e60a3c.patch
|
||||
Patch36: 045ab08eaa00172d50621df9502f6910f3fe3af4.patch
|
||||
Patch37: 79065939fc4bc1da72a3718937fab80e73a6dd75.patch
|
||||
Patch38: 794530fbf270eae9f6f43c6d0bbd3ec6f2b210f3.patch
|
||||
Patch39: 83a3ced797256dcb1c93f8de4266fd7545fbfb3b.patch
|
||||
Patch40: 8b536321cc0679fb82d4ea7521f9375d88cec0cc.patch
|
||||
|
||||
ExcludeArch: s390 s390x
|
||||
BuildRequires: make
|
||||
@ -117,6 +119,8 @@ an utility for reporting current error counts from the EDAC sysfs files.
|
||||
%patch36 -p1
|
||||
%patch37 -p1
|
||||
%patch38 -p1
|
||||
%patch39 -p1
|
||||
%patch40 -p1
|
||||
|
||||
# The tarball is locked in time the first time aclocal was ran and will keep
|
||||
# requiring an older version of automake
|
||||
@ -152,6 +156,9 @@ sed -i "s/^PAGE_CE_ACTION=.*/PAGE_CE_ACTION=account/" %{buildroot}/%{_sysconfdir
|
||||
%{_sysconfdir}/sysconfig/rasdaemon
|
||||
|
||||
%changelog
|
||||
* Wed Nov 27 2024 Aristeu Rozanski <aris@redhat.com> 0.6.7-18
|
||||
- Add support for vendor specific information [RHEL-68673]
|
||||
|
||||
* Tue Nov 19 2024 Aristeu Rozanski <aris@redhat.com> 0.6.7-17
|
||||
- ras-events: quit loop in read_ras_event when kbuf data is broken [RHEL-68127]
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user