diff --git a/83a3ced797256dcb1c93f8de4266fd7545fbfb3b.patch b/83a3ced797256dcb1c93f8de4266fd7545fbfb3b.patch new file mode 100644 index 0000000..4ffb2aa --- /dev/null +++ b/83a3ced797256dcb1c93f8de4266fd7545fbfb3b.patch @@ -0,0 +1,95 @@ +commit 83a3ced797256dcb1c93f8de4266fd7545fbfb3b +Author: Avadhut Naik +Date: Tue Nov 21 14:04:19 2023 -0600 + + rasdaemon: Add support for vendor-specific machine check error information + + Some CPU vendors may provide additional vendor-specific machine check + error information. AMD, for example, provides FRU Text through SYND 1/2 + registers if BIT 9 of SMCA_CONFIG register is set. + + Add support to display the additional vendor-specific error information, + if any. + + Signed-off-by: Avadhut Naik + Signed-off-by: Mauro Carvalho Chehab + +--- + mce-amd-smca.c | 12 ++++++++++++ + ras-mce-handler.c | 22 ++++++++++++++++++++++ + ras-mce-handler.h | 3 +++ + 3 files changed, 37 insertions(+) + +--- rasdaemon-0.6.7.orig/mce-amd-smca.c 2024-11-27 10:18:13.765255836 -0500 ++++ rasdaemon-0.6.7/mce-amd-smca.c 2024-11-27 10:18:23.014169756 -0500 +@@ -999,6 +999,18 @@ if (bank_type == SMCA_UMC_V2 && xec == 0 + channel, csrow); + } + ++ ++ if (e->vdata_len) { ++ uint64_t smca_config = e->vdata[2]; ++ ++ /* ++ * BIT 9 of the CONFIG register of a few SMCA Bank types indicates ++ * presence of FRU Text in SYND 1 / 2 registers ++ */ ++ if (smca_config & BIT(9)) ++ memcpy(e->frutext, e->vdata, 16); ++ } ++ + } + + int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e) +--- rasdaemon-0.6.7.orig/ras-mce-handler.c 2024-11-27 10:18:23.014169756 -0500 ++++ rasdaemon-0.6.7/ras-mce-handler.c 2024-11-27 10:19:38.849463954 -0500 +@@ -375,6 +375,25 @@ #if 0 + if (e->microcode) + trace_seq_printf(s, ", microcode= %x", e->microcode); + ++ if (!e->vdata_len) ++ return; ++ ++ if (strlen(e->frutext)) { ++ trace_seq_printf(s, ", FRU Text= %s", e->frutext); ++ trace_seq_printf(s, ", Vendor Data= "); ++ for (int i = 2; i < e->vdata_len/8; i++) { ++ trace_seq_printf(s, "0x%lx", e->vdata[i]); ++ trace_seq_printf(s, " "); ++ } ++ } else { ++ trace_seq_printf(s, ", Vendor Data= "); ++ for (int i = 0; i < e->vdata_len/8; i ++) { ++ trace_seq_printf(s, "0x%lx", e->vdata[i]); ++ trace_seq_printf(s, " "); ++ } ++ } ++ ++ + /* + * FIXME: The original mcelog userspace tool uses DMI to map from + * address to DIMM. From the comments there, the code there doesn't +@@ -559,6 +578,9 @@ if (pevent_get_field_val(s, event, "ipid + if (!pevent_get_field_val(s, event, "microcode", record, &val, 1)) + e.microcode = val; + ++ /* Get Vendor-specfic Data, if any */ ++ e.vdata = pevent_get_field_raw(s, event, "v_data", record, &e.vdata_len, 1); ++ + switch (mce->cputype) { + case CPU_GENERIC: + break; +--- rasdaemon-0.6.7.orig/ras-mce-handler.h 2024-11-27 10:18:23.014169756 -0500 ++++ rasdaemon-0.6.7/ras-mce-handler.h 2024-11-27 10:20:05.249218250 -0500 +@@ -76,8 +76,11 @@ struct mce_event { + uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + uint64_t ppin; + uint32_t microcode; ++ int32_t vdata_len; ++ const uint64_t *vdata; + + /* Parsed data */ ++ char frutext[17]; + char timestamp[64]; + char bank_name[64]; + char error_msg[4096]; diff --git a/8b536321cc0679fb82d4ea7521f9375d88cec0cc.patch b/8b536321cc0679fb82d4ea7521f9375d88cec0cc.patch new file mode 100644 index 0000000..d9c0b34 --- /dev/null +++ b/8b536321cc0679fb82d4ea7521f9375d88cec0cc.patch @@ -0,0 +1,75 @@ +commit 8b536321cc0679fb82d4ea7521f9375d88cec0cc +Author: Avadhut Naik +Date: Thu Nov 7 06:24:44 2024 +0000 + + rasdaemon: Modify support for vendor-specific machine check error information + + Commit 83a3ced797256d ("rasdaemon: Add support for vendor-specific + machine check error information") assumes that MCA_CONFIG MSR will be + exported as part of vendor-specific error information through the MCE + tracepoint. + + The same, however, is not true anymore. MCA_CONFIG MSR will not be + exported through the MCE tracepoint. Instead, the data from MCA_SYND1/2 + MSRs, exported as vendor-specific error information on newer AMD SOCs, + should always be interpreted as FRUText. + + Modify the error decoding support accordingly. + + Fixes: 83a3ced797256d ("rasdaemon: Add support for vendor-specific + machine check error information") + Signed-off-by: Avadhut Naik + Signed-off-by: Mauro Carvalho Chehab + +--- + mce-amd-smca.c | 13 ++----------- + ras-mce-handler.c | 15 +-------------- + 2 files changed, 3 insertions(+), 25 deletions(-) + +--- rasdaemon-0.6.7.orig/mce-amd-smca.c 2024-11-27 10:20:29.777989960 -0500 ++++ rasdaemon-0.6.7/mce-amd-smca.c 2024-11-27 10:21:28.731441278 -0500 +@@ -1000,17 +1000,8 @@ if (bank_type == SMCA_UMC_V2 && xec == 0 + } + + +- if (e->vdata_len) { +- uint64_t smca_config = e->vdata[2]; +- +- /* +- * BIT 9 of the CONFIG register of a few SMCA Bank types indicates +- * presence of FRU Text in SYND 1 / 2 registers +- */ +- if (smca_config & BIT(9)) +- memcpy(e->frutext, e->vdata, 16); +- } +- ++ if (e->vdata_len) ++ memcpy(e->frutext, e->vdata, 16); + } + + int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e) +--- rasdaemon-0.6.7.orig/ras-mce-handler.c 2024-11-27 10:20:29.777989960 -0500 ++++ rasdaemon-0.6.7/ras-mce-handler.c 2024-11-27 10:21:01.517694557 -0500 +@@ -378,21 +378,8 @@ #if 0 + if (!e->vdata_len) + return; + +- if (strlen(e->frutext)) { ++ if (strlen(e->frutext)) + trace_seq_printf(s, ", FRU Text= %s", e->frutext); +- trace_seq_printf(s, ", Vendor Data= "); +- for (int i = 2; i < e->vdata_len/8; i++) { +- trace_seq_printf(s, "0x%lx", e->vdata[i]); +- trace_seq_printf(s, " "); +- } +- } else { +- trace_seq_printf(s, ", Vendor Data= "); +- for (int i = 0; i < e->vdata_len/8; i ++) { +- trace_seq_printf(s, "0x%lx", e->vdata[i]); +- trace_seq_printf(s, " "); +- } +- } +- + + /* + * FIXME: The original mcelog userspace tool uses DMI to map from diff --git a/rasdaemon.spec b/rasdaemon.spec index a8641a5..12d37a6 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.7 -Release: 17%{?dist} +Release: 18%{?dist} Summary: Utility to receive RAS error tracings License: GPL-2.0-only URL: http://git.infradead.org/users/mchehab/rasdaemon.git @@ -44,6 +44,8 @@ Patch35: b1ace39286e287282a275b6edc90dc2f64e60a3c.patch Patch36: 045ab08eaa00172d50621df9502f6910f3fe3af4.patch Patch37: 79065939fc4bc1da72a3718937fab80e73a6dd75.patch Patch38: 794530fbf270eae9f6f43c6d0bbd3ec6f2b210f3.patch +Patch39: 83a3ced797256dcb1c93f8de4266fd7545fbfb3b.patch +Patch40: 8b536321cc0679fb82d4ea7521f9375d88cec0cc.patch ExcludeArch: s390 s390x BuildRequires: make @@ -117,6 +119,8 @@ an utility for reporting current error counts from the EDAC sysfs files. %patch36 -p1 %patch37 -p1 %patch38 -p1 +%patch39 -p1 +%patch40 -p1 # The tarball is locked in time the first time aclocal was ran and will keep # requiring an older version of automake @@ -152,6 +156,9 @@ sed -i "s/^PAGE_CE_ACTION=.*/PAGE_CE_ACTION=account/" %{buildroot}/%{_sysconfdir %{_sysconfdir}/sysconfig/rasdaemon %changelog +* Wed Nov 27 2024 Aristeu Rozanski 0.6.7-18 +- Add support for vendor specific information [RHEL-68673] + * Tue Nov 19 2024 Aristeu Rozanski 0.6.7-17 - ras-events: quit loop in read_ras_event when kbuf data is broken [RHEL-68127]