From ec6406197f69c0c515e7d2581b4c7ba5bd010464 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Thu, 18 Jul 2024 11:18:50 -0400 Subject: [PATCH] rasdaemon: mce-amd-smca: Optimizing decoding of MCA_CTL_SMU bits Resolves: RHEL-48819 Signed-off-by: Aristeu Rozanski --- ...177ce0d2fcb7693cacee4778d0845ebd3788.patch | 93 +++++++++++++++++++ rasdaemon.spec | 7 +- 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 73d8177ce0d2fcb7693cacee4778d0845ebd3788.patch diff --git a/73d8177ce0d2fcb7693cacee4778d0845ebd3788.patch b/73d8177ce0d2fcb7693cacee4778d0845ebd3788.patch new file mode 100644 index 0000000..2655de8 --- /dev/null +++ b/73d8177ce0d2fcb7693cacee4778d0845ebd3788.patch @@ -0,0 +1,93 @@ +commit 73d8177ce0d2fcb7693cacee4778d0845ebd3788 +Author: sathya priya kumar +Date: Thu Jun 13 05:29:09 2024 +0000 + + rasdaemon: mce-amd-smca: Optimizing decoding of MCA_CTL_SMU bits + + Optimize smca_smu2_mce_desc in better way from the commit ced615c. + + Update existing array with extended error descriptions instead + of creating new array, simplifying the code. + + Signed-off-by: Sathya Priya Kumar + Signed-off-by: Mauro Carvalho Chehab + +--- + mce-amd-smca.c | 29 +++-------------------------- + ras-mce-handler.h | 1 - + 2 files changed, 3 insertions(+), 27 deletions(-) + +--- rasdaemon-0.6.7.orig/mce-amd-smca.c 2024-07-18 11:14:26.008582740 -0400 ++++ rasdaemon-0.6.7/mce-amd-smca.c 2024-07-18 11:15:05.510270132 -0400 +@@ -397,7 +397,7 @@ static const char * const smca_smu_mce_d + "An ECC or parity error in an SMU RAM instance", + }; + +-static const char * smca_smu2_mce_desc[64] = { ++static const char * const smca_smu2_mce_desc[] = { + "High SRAM ECC or parity error", + "Low SRAM ECC or parity error", + "Data Cache Bank A ECC or parity error", +@@ -410,14 +410,13 @@ static const char * smca_smu2_mce_desc[6 + "Instruction Tag Cache Bank B ECC or parity error", + "System Hub Read Buffer ECC or parity error", + "PHY RAS ECC Error", +-}; +- +-static const char * smca_smu2_ext_mce_desc[] = { ++ [12 ... 57] = "Reserved", + "A correctable error from a GFX Sub-IP", + "A fatal error from a GFX Sub-IP", + "Reserved", + "Reserved", + "A poison error from a GFX Sub-IP", ++ "Reserved", + }; + + static const char * const smca_mp5_mce_desc[] = { +@@ -824,27 +823,6 @@ static struct smca_bank_name smca_names[ + [SMCA_GMI_PHY] = { "Global Memory Interconnect PHY Unit" }, + }; + +-void smca_smu2_ext_err_desc(void) +-{ +- int i, j; +- int smu2_bits = 62; +- +- /* +- * MCA_CTL_SMU error stings are defined for b'58:59 and b'62 +- * in MI300A AMD systems. See AMD PPR MCA::SMU::MCA_CTL_SMU +- * +- * b'0:11 can be decoded from existing array smca_smu2_mce_desc. +- * b'12:57 are Reserved and b'58:62 are appended to the +- * smca_smu2_mce_desc. +- */ +- for (i = 12, j = 0; i < smu2_bits || j < 5; i++, j++) { +- for ( ; i < 58; i++) +- smca_smu2_mce_desc[i] = "Reserved"; +- +- smca_smu2_mce_desc[i] = smca_smu2_ext_mce_desc[j]; +- } +-} +- + void amd_decode_errcode(struct mce_event *e) + { + +@@ -936,7 +914,6 @@ unsigned short xec = (e->status >> 16) & + mcatype_hwid = HWID_MCATYPE(ipid_high & MCI_IPID_HWID, + (ipid_high & MCI_IPID_MCATYPE) >> 16); + +- smca_smu2_ext_err_desc(); + fixup_hwid(m, &mcatype_hwid); + + for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { +--- rasdaemon-0.6.7.orig/ras-mce-handler.h 2024-07-18 11:14:26.008582740 -0400 ++++ rasdaemon-0.6.7/ras-mce-handler.h 2024-07-18 11:14:28.987559165 -0400 +@@ -121,7 +121,6 @@ int set_intel_imc_log(enum cputype cputy + /* Undertake AMD SMCA Error Decoding */ + void decode_smca_error(struct mce_event *e, struct mce_priv *m); + void amd_decode_errcode(struct mce_event *e); +-void smca_smu2_ext_err_desc(void); + + /* Per-CPU-type decoders for Intel CPUs */ + void p4_decode_model(struct mce_event *e); diff --git a/rasdaemon.spec b/rasdaemon.spec index d8eaa5d..07eecb5 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.7 -Release: 13%{?dist} +Release: 15%{?dist} Summary: Utility to receive RAS error tracings License: GPL-2.0-only URL: http://git.infradead.org/users/mchehab/rasdaemon.git @@ -38,6 +38,7 @@ Patch29: 9bd84aef87978b806178a73ed33c39d6c442fc1f.patch Patch30: 885e546add918457c453bd3f753ac7df90b39e36.patch Patch31: 7ed2da7aedf8bc8ad4c4efe7acbda60ba061be6e.patch Patch32: ced615cf8146f51b5d6fe7a29107a2adc77407ca.patch +Patch33: 73d8177ce0d2fcb7693cacee4778d0845ebd3788.patch ExcludeArch: s390 s390x BuildRequires: make @@ -105,6 +106,7 @@ an utility for reporting current error counts from the EDAC sysfs files. %patch30 -p1 %patch31 -p1 %patch32 -p1 +%patch33 -p1 # The tarball is locked in time the first time aclocal was ran and will keep # requiring an older version of automake @@ -140,6 +142,9 @@ sed -i "s/^PAGE_CE_ACTION=.*/PAGE_CE_ACTION=account/" %{buildroot}/%{_sysconfdir %{_sysconfdir}/sysconfig/rasdaemon %changelog +* Thu Jul 18 2024 Aristeu Rozanski 0.6.7-14 +- rasdaemon: mce-amd-smca: Optimizing decoding of MCA_CTL_SMU bits [RHEL-48819] + * Fri Jun 28 2024 Aristeu Rozanski 0.6.7-13 - rasdaemon: Add error decoding for MCA_CTL_SMU extended bits [RHEL-35718]