Merge branch 'arozansk-RHEL-35718' into 'c9s'
rasdaemon: Add error decoding for MCA_CTL_SMU extended bits See merge request redhat/centos-stream/rpms/rasdaemon!15
This commit is contained in:
commit
bb0ce16037
94
ced615cf8146f51b5d6fe7a29107a2adc77407ca.patch
Normal file
94
ced615cf8146f51b5d6fe7a29107a2adc77407ca.patch
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
commit ced615cf8146f51b5d6fe7a29107a2adc77407ca
|
||||||
|
Author: Sathya Priya Kumar <sathyapriya.k@amd.com>
|
||||||
|
Date: Thu Jan 11 01:20:07 2024 -0600
|
||||||
|
|
||||||
|
rasdaemon: Add error decoding for MCA_CTL_SMU extended bits
|
||||||
|
|
||||||
|
Enable error decoding support for the newly added extended
|
||||||
|
error bit descriptions from MCA_CTL_SMU.
|
||||||
|
b'0:11 can be decoded from existing array smca_smu2_mce_desc.
|
||||||
|
Define a function to append the newly defined b'58:62 to the
|
||||||
|
smca_smu2_mce_desc. This reduces the maintaining Reserved bits
|
||||||
|
from b'12:57 in the code.
|
||||||
|
|
||||||
|
Signed-off-by: Sathya Priya Kumar <sathyapriya.k@amd.com>
|
||||||
|
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
|
||||||
|
|
||||||
|
---
|
||||||
|
mce-amd-smca.c | 33 ++++++++++++++++++++++++++++++++-
|
||||||
|
ras-mce-handler.h | 1 +
|
||||||
|
2 files changed, 33 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
--- rasdaemon-0.6.7.orig/mce-amd-smca.c 2024-06-28 10:34:16.453522865 -0400
|
||||||
|
+++ rasdaemon-0.6.7/mce-amd-smca.c 2024-06-28 10:34:46.049124270 -0400
|
||||||
|
@@ -397,7 +397,7 @@ static const char * const smca_smu_mce_d
|
||||||
|
"An ECC or parity error in an SMU RAM instance",
|
||||||
|
};
|
||||||
|
|
||||||
|
-static const char * const smca_smu2_mce_desc[] = {
|
||||||
|
+static const char * smca_smu2_mce_desc[64] = {
|
||||||
|
"High SRAM ECC or parity error",
|
||||||
|
"Low SRAM ECC or parity error",
|
||||||
|
"Data Cache Bank A ECC or parity error",
|
||||||
|
@@ -409,6 +409,15 @@ static const char * const smca_smu2_mce_
|
||||||
|
"Instruction Tag Cache Bank A ECC or parity error",
|
||||||
|
"Instruction Tag Cache Bank B ECC or parity error",
|
||||||
|
"System Hub Read Buffer ECC or parity error",
|
||||||
|
+ "PHY RAS ECC Error",
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static const char * smca_smu2_ext_mce_desc[] = {
|
||||||
|
+ "A correctable error from a GFX Sub-IP",
|
||||||
|
+ "A fatal error from a GFX Sub-IP",
|
||||||
|
+ "Reserved",
|
||||||
|
+ "Reserved",
|
||||||
|
+ "A poison error from a GFX Sub-IP",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const smca_mp5_mce_desc[] = {
|
||||||
|
@@ -815,6 +824,27 @@ static struct smca_bank_name smca_names[
|
||||||
|
[SMCA_GMI_PHY] = { "Global Memory Interconnect PHY Unit" },
|
||||||
|
};
|
||||||
|
|
||||||
|
+void smca_smu2_ext_err_desc(void)
|
||||||
|
+{
|
||||||
|
+ int i, j;
|
||||||
|
+ int smu2_bits = 62;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * MCA_CTL_SMU error stings are defined for b'58:59 and b'62
|
||||||
|
+ * in MI300A AMD systems. See AMD PPR MCA::SMU::MCA_CTL_SMU
|
||||||
|
+ *
|
||||||
|
+ * b'0:11 can be decoded from existing array smca_smu2_mce_desc.
|
||||||
|
+ * b'12:57 are Reserved and b'58:62 are appended to the
|
||||||
|
+ * smca_smu2_mce_desc.
|
||||||
|
+ */
|
||||||
|
+ for (i = 12, j = 0; i < smu2_bits || j < 5; i++, j++) {
|
||||||
|
+ for ( ; i < 58; i++)
|
||||||
|
+ smca_smu2_mce_desc[i] = "Reserved";
|
||||||
|
+
|
||||||
|
+ smca_smu2_mce_desc[i] = smca_smu2_ext_mce_desc[j];
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
void amd_decode_errcode(struct mce_event *e)
|
||||||
|
{
|
||||||
|
|
||||||
|
@@ -906,6 +936,7 @@ unsigned short xec = (e->status >> 16) &
|
||||||
|
mcatype_hwid = HWID_MCATYPE(ipid_high & MCI_IPID_HWID,
|
||||||
|
(ipid_high & MCI_IPID_MCATYPE) >> 16);
|
||||||
|
|
||||||
|
+ smca_smu2_ext_err_desc();
|
||||||
|
fixup_hwid(m, &mcatype_hwid);
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
|
||||||
|
--- rasdaemon-0.6.7.orig/ras-mce-handler.h 2024-06-28 10:34:16.453522865 -0400
|
||||||
|
+++ rasdaemon-0.6.7/ras-mce-handler.h 2024-06-28 10:34:17.795508302 -0400
|
||||||
|
@@ -121,6 +121,7 @@ int set_intel_imc_log(enum cputype cputy
|
||||||
|
/* Undertake AMD SMCA Error Decoding */
|
||||||
|
void decode_smca_error(struct mce_event *e, struct mce_priv *m);
|
||||||
|
void amd_decode_errcode(struct mce_event *e);
|
||||||
|
+void smca_smu2_ext_err_desc(void);
|
||||||
|
|
||||||
|
/* Per-CPU-type decoders for Intel CPUs */
|
||||||
|
void p4_decode_model(struct mce_event *e);
|
@ -1,6 +1,6 @@
|
|||||||
Name: rasdaemon
|
Name: rasdaemon
|
||||||
Version: 0.6.7
|
Version: 0.6.7
|
||||||
Release: 12%{?dist}
|
Release: 13%{?dist}
|
||||||
Summary: Utility to receive RAS error tracings
|
Summary: Utility to receive RAS error tracings
|
||||||
License: GPL-2.0-only
|
License: GPL-2.0-only
|
||||||
URL: http://git.infradead.org/users/mchehab/rasdaemon.git
|
URL: http://git.infradead.org/users/mchehab/rasdaemon.git
|
||||||
@ -37,6 +37,7 @@ Patch28: 9c86f6255f67a8bae28cd46c54500fc16bfc7a30.patch
|
|||||||
Patch29: 9bd84aef87978b806178a73ed33c39d6c442fc1f.patch
|
Patch29: 9bd84aef87978b806178a73ed33c39d6c442fc1f.patch
|
||||||
Patch30: 885e546add918457c453bd3f753ac7df90b39e36.patch
|
Patch30: 885e546add918457c453bd3f753ac7df90b39e36.patch
|
||||||
Patch31: 7ed2da7aedf8bc8ad4c4efe7acbda60ba061be6e.patch
|
Patch31: 7ed2da7aedf8bc8ad4c4efe7acbda60ba061be6e.patch
|
||||||
|
Patch32: ced615cf8146f51b5d6fe7a29107a2adc77407ca.patch
|
||||||
|
|
||||||
ExcludeArch: s390 s390x
|
ExcludeArch: s390 s390x
|
||||||
BuildRequires: make
|
BuildRequires: make
|
||||||
@ -103,6 +104,7 @@ an utility for reporting current error counts from the EDAC sysfs files.
|
|||||||
%patch29 -p1
|
%patch29 -p1
|
||||||
%patch30 -p1
|
%patch30 -p1
|
||||||
%patch31 -p1
|
%patch31 -p1
|
||||||
|
%patch32 -p1
|
||||||
|
|
||||||
# The tarball is locked in time the first time aclocal was ran and will keep
|
# The tarball is locked in time the first time aclocal was ran and will keep
|
||||||
# requiring an older version of automake
|
# requiring an older version of automake
|
||||||
@ -138,6 +140,9 @@ sed -i "s/^PAGE_CE_ACTION=.*/PAGE_CE_ACTION=account/" %{buildroot}/%{_sysconfdir
|
|||||||
%{_sysconfdir}/sysconfig/rasdaemon
|
%{_sysconfdir}/sysconfig/rasdaemon
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Fri Jun 28 2024 Aristeu Rozanski <aris@redhat.com> 0.6.7-13
|
||||||
|
- rasdaemon: Add error decoding for MCA_CTL_SMU extended bits [RHEL-35718]
|
||||||
|
|
||||||
* Thu Jun 20 2024 Aristeu Rozanski <aris@redhat.com> 0.6.7-12
|
* Thu Jun 20 2024 Aristeu Rozanski <aris@redhat.com> 0.6.7-12
|
||||||
- mce-amd-smca: update smca_hwid to use smca_bank_types [RHEL-24170]
|
- mce-amd-smca: update smca_hwid to use smca_bank_types [RHEL-24170]
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user