diff --git a/045ab08eaa00172d50621df9502f6910f3fe3af4.patch b/045ab08eaa00172d50621df9502f6910f3fe3af4.patch new file mode 100644 index 0000000..99887d4 --- /dev/null +++ b/045ab08eaa00172d50621df9502f6910f3fe3af4.patch @@ -0,0 +1,154 @@ +commit 045ab08eaa00172d50621df9502f6910f3fe3af4 +Author: Avadhut Naik +Date: Mon Apr 1 23:33:07 2024 -0500 + + rasdaemon: Add support to parse the PPIN field of mce tracepoint + + Support for exporting the PPIN (Protected Processor Inventory Number) + is being added to the mce_record tracepoint. + + Add the required, corresponding support in the rasdaemon for the field + to be parsed and logged or added to the database and viewed later through + ras-mc-ctl utility. + + Signed-off-by: Avadhut Naik + Signed-off-by: Mauro Carvalho Chehab + +--- + ras-mce-handler.c | 7 +++++++ + ras-mce-handler.h | 1 + + ras-record.c | 42 ++++++++++++++++++++++-------------------- + util/ras-mc-ctl.in | 7 ++++--- + 4 files changed, 34 insertions(+), 23 deletions(-) + +--- rasdaemon-0.6.7.orig/ras-mce-handler.c 2024-08-22 14:30:41.581276901 -0400 ++++ rasdaemon-0.6.7/ras-mce-handler.c 2024-08-22 14:33:19.940957855 -0400 +@@ -369,6 +369,9 @@ #if 0 + + trace_seq_printf(s, ", apicid= %x", e->apicid); + ++ if (e->ppin) ++ trace_seq_printf(s, ", ppin= %llx", (long long)e->ppin); ++ + /* + * FIXME: The original mcelog userspace tool uses DMI to map from + * address to DIMM. From the comments there, the code there doesn't +@@ -545,6 +548,10 @@ if (pevent_get_field_val(s, event, "ipid + return -1; + e.ipid = val; + ++ /* Get PPIN */ ++ if (!pevent_get_field_val(s, event, "ppin", record, &val, 1)) ++ e.ppin = val; ++ + switch (mce->cputype) { + case CPU_GENERIC: + break; +--- rasdaemon-0.6.7.orig/ras-mce-handler.h 2024-08-22 14:30:41.581276901 -0400 ++++ rasdaemon-0.6.7/ras-mce-handler.h 2024-08-22 14:33:47.312729865 -0400 +@@ -74,6 +74,7 @@ struct mce_event { + uint8_t cpuvendor; + uint64_t synd; /* MCA_SYND MSR: only valid on SMCA systems */ + uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */ ++ uint64_t ppin; + + /* Parsed data */ + char timestamp[64]; +--- rasdaemon-0.6.7.orig/ras-record.c 2024-08-22 14:30:41.581276901 -0400 ++++ rasdaemon-0.6.7/ras-record.c 2024-08-22 14:37:20.884941279 -0400 +@@ -330,19 +330,20 @@ static const struct db_fields mce_record + { .name="ip", .type="INTEGER" }, + { .name="tsc", .type="INTEGER" }, + { .name="walltime", .type="INTEGER" }, +- { .name="cpu", .type="INTEGER" }, // 10 ++ { .name = "ppin", .type = "INTEGER" }, // 10 ++ { .name="cpu", .type="INTEGER" }, + { .name="cpuid", .type="INTEGER" }, + { .name="apicid", .type="INTEGER" }, + { .name="socketid", .type="INTEGER" }, +- { .name="cs", .type="INTEGER" }, +- { .name="bank", .type="INTEGER" }, //15 ++ { .name="cs", .type="INTEGER" }, //15 ++ { .name="bank", .type="INTEGER" }, + { .name="cpuvendor", .type="INTEGER" }, + + /* Parsed data - will likely change */ + { .name="bank_name", .type="TEXT" }, + { .name="error_msg", .type="TEXT" }, +- { .name="mcgstatus_msg", .type="TEXT" }, +- { .name="mcistatus_msg", .type="TEXT" }, // 20 ++ { .name="mcgstatus_msg", .type="TEXT" }, // 20 ++ { .name="mcistatus_msg", .type="TEXT" }, + { .name="mcastatus_msg", .type="TEXT" }, + { .name="user_action", .type="TEXT" }, + { .name="mc_location", .type="TEXT" }, +@@ -372,21 +373,22 @@ return 0; + sqlite3_bind_int64 (priv->stmt_mce_record, 7, ev->ip); + sqlite3_bind_int64 (priv->stmt_mce_record, 8, ev->tsc); + sqlite3_bind_int64 (priv->stmt_mce_record, 9, ev->walltime); +- sqlite3_bind_int (priv->stmt_mce_record, 10, ev->cpu); +- sqlite3_bind_int (priv->stmt_mce_record, 11, ev->cpuid); +- sqlite3_bind_int (priv->stmt_mce_record, 12, ev->apicid); +- sqlite3_bind_int (priv->stmt_mce_record, 13, ev->socketid); +- sqlite3_bind_int (priv->stmt_mce_record, 14, ev->cs); +- sqlite3_bind_int (priv->stmt_mce_record, 15, ev->bank); +- sqlite3_bind_int (priv->stmt_mce_record, 16, ev->cpuvendor); +- +- sqlite3_bind_text(priv->stmt_mce_record, 17, ev->bank_name, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 18, ev->error_msg, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 19, ev->mcgstatus_msg, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 20, ev->mcistatus_msg, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 21, ev->mcastatus_msg, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 22, ev->user_action, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 23, ev->mc_location, -1, NULL); ++ sqlite3_bind_int64(priv->stmt_mce_record, 10, ev->ppin); ++ sqlite3_bind_int (priv->stmt_mce_record, 11, ev->cpu); ++ sqlite3_bind_int (priv->stmt_mce_record, 12, ev->cpuid); ++ sqlite3_bind_int (priv->stmt_mce_record, 13, ev->apicid); ++ sqlite3_bind_int (priv->stmt_mce_record, 14, ev->socketid); ++ sqlite3_bind_int (priv->stmt_mce_record, 15, ev->cs); ++ sqlite3_bind_int (priv->stmt_mce_record, 16, ev->bank); ++ sqlite3_bind_int (priv->stmt_mce_record, 17, ev->cpuvendor); ++ ++ sqlite3_bind_text(priv->stmt_mce_record, 18, ev->bank_name, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 19, ev->error_msg, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 20, ev->mcgstatus_msg, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 21, ev->mcistatus_msg, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 22, ev->mcastatus_msg, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 23, ev->user_action, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 24, ev->mc_location, -1, NULL); + + rc = sqlite3_step(priv->stmt_mce_record); + if (rc != SQLITE_OK && rc != SQLITE_DONE) +--- rasdaemon-0.6.7.orig/util/ras-mc-ctl.in 2024-08-22 14:29:51.058697724 -0400 ++++ rasdaemon-0.6.7/util/ras-mc-ctl.in 2024-08-22 14:34:53.803175632 -0400 +@@ -1317,7 +1317,7 @@ sub errors + { + require DBI; + my ($query, $query_handle, $id, $time, $devname, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); +- my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); ++ my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); + my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); + my ($bus_name, $dev_name, $driver_name, $reporter_name); + my ($dev, $sector, $nr_sector, $error, $rwbs, $cmd); +@@ -1485,10 +1485,10 @@ $out .= sprintf "address=0x%08x, ", $add + + # MCE mce_record errors + if ($has_mce == 1) { +- $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record order by id"; ++ $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, ppin, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +- $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); ++ $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); + $out = ""; + while($query_handle->fetch()) { + $out .= "$id $time error: $msg"; +@@ -1507,6 +1507,7 @@ $out .= sprintf ", misc=0x%08x", $misc i + $out .= sprintf ", ip=0x%08x", $ip if ($ip); + $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc); + $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime); ++ $out .= sprintf ", ppin=0x%08x", $ppin if ($ppin); + $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu); + $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid); + $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid); diff --git a/79065939fc4bc1da72a3718937fab80e73a6dd75.patch b/79065939fc4bc1da72a3718937fab80e73a6dd75.patch new file mode 100644 index 0000000..8f4e2a3 --- /dev/null +++ b/79065939fc4bc1da72a3718937fab80e73a6dd75.patch @@ -0,0 +1,128 @@ +commit 79065939fc4bc1da72a3718937fab80e73a6dd75 +Author: Avadhut Naik +Date: Tue Apr 2 00:07:38 2024 -0500 + + rasdaemon: Add support to parse microcode field of mce tracepoint + + Support for exporting the Microcode Revision is being added to the + mce_record tracepoint. + + Add the required, corresponding support in the rasdaemon for the field + to be parsed and logged or added to the database and viewed later through + ras-mc-ctl utility. + + Signed-off-by: Avadhut Naik + Signed-off-by: Mauro Carvalho Chehab + +--- + ras-mce-handler.c | 7 +++++++ + ras-mce-handler.h | 1 + + ras-record.c | 20 +++++++++++--------- + util/ras-mc-ctl.in | 7 ++++--- + 4 files changed, 23 insertions(+), 12 deletions(-) + +--- rasdaemon-0.6.7.orig/ras-mce-handler.c 2024-08-22 14:44:51.352160832 -0400 ++++ rasdaemon-0.6.7/ras-mce-handler.c 2024-08-22 14:44:51.361160757 -0400 +@@ -372,6 +372,9 @@ #if 0 + if (e->ppin) + trace_seq_printf(s, ", ppin= %llx", (long long)e->ppin); + ++ if (e->microcode) ++ trace_seq_printf(s, ", microcode= %x", e->microcode); ++ + /* + * FIXME: The original mcelog userspace tool uses DMI to map from + * address to DIMM. From the comments there, the code there doesn't +@@ -552,6 +555,10 @@ if (pevent_get_field_val(s, event, "ipid + if (!pevent_get_field_val(s, event, "ppin", record, &val, 1)) + e.ppin = val; + ++ /* Get Microcode Revision */ ++ if (!pevent_get_field_val(s, event, "microcode", record, &val, 1)) ++ e.microcode = val; ++ + switch (mce->cputype) { + case CPU_GENERIC: + break; +--- rasdaemon-0.6.7.orig/ras-mce-handler.h 2024-08-22 14:44:51.352160832 -0400 ++++ rasdaemon-0.6.7/ras-mce-handler.h 2024-08-22 14:44:51.361160757 -0400 +@@ -75,6 +75,7 @@ struct mce_event { + uint64_t synd; /* MCA_SYND MSR: only valid on SMCA systems */ + uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + uint64_t ppin; ++ uint32_t microcode; + + /* Parsed data */ + char timestamp[64]; +--- rasdaemon-0.6.7.orig/ras-record.c 2024-08-22 14:44:51.353160824 -0400 ++++ rasdaemon-0.6.7/ras-record.c 2024-08-22 14:44:51.362160748 -0400 +@@ -338,11 +338,12 @@ { .name = "ppin", .type = "INTEGER" }, + { .name="cs", .type="INTEGER" }, //15 + { .name="bank", .type="INTEGER" }, + { .name="cpuvendor", .type="INTEGER" }, ++ { .name = "microcode", .type = "INTEGER" }, + + /* Parsed data - will likely change */ + { .name="bank_name", .type="TEXT" }, +- { .name="error_msg", .type="TEXT" }, +- { .name="mcgstatus_msg", .type="TEXT" }, // 20 ++ { .name="error_msg", .type="TEXT" }, // 20 ++ { .name="mcgstatus_msg", .type="TEXT" }, + { .name="mcistatus_msg", .type="TEXT" }, + { .name="mcastatus_msg", .type="TEXT" }, + { .name="user_action", .type="TEXT" }, +@@ -381,14 +382,15 @@ sqlite3_bind_int64(priv->stmt_mce_record + sqlite3_bind_int (priv->stmt_mce_record, 15, ev->cs); + sqlite3_bind_int (priv->stmt_mce_record, 16, ev->bank); + sqlite3_bind_int (priv->stmt_mce_record, 17, ev->cpuvendor); ++ sqlite3_bind_int (priv->stmt_mce_record, 18, ev->microcode); + +- sqlite3_bind_text(priv->stmt_mce_record, 18, ev->bank_name, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 19, ev->error_msg, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 20, ev->mcgstatus_msg, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 21, ev->mcistatus_msg, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 22, ev->mcastatus_msg, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 23, ev->user_action, -1, NULL); +- sqlite3_bind_text(priv->stmt_mce_record, 24, ev->mc_location, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 19, ev->bank_name, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 20, ev->error_msg, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 21, ev->mcgstatus_msg, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 22, ev->mcistatus_msg, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 23, ev->mcastatus_msg, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 24, ev->user_action, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 25, ev->mc_location, -1, NULL); + + rc = sqlite3_step(priv->stmt_mce_record); + if (rc != SQLITE_OK && rc != SQLITE_DONE) +--- rasdaemon-0.6.7.orig/util/ras-mc-ctl.in 2024-08-22 14:44:51.353160824 -0400 ++++ rasdaemon-0.6.7/util/ras-mc-ctl.in 2024-08-22 14:44:51.362160748 -0400 +@@ -1317,7 +1317,7 @@ sub errors + { + require DBI; + my ($query, $query_handle, $id, $time, $devname, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); +- my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); ++ my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $microcode, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); + my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); + my ($bus_name, $dev_name, $driver_name, $reporter_name); + my ($dev, $sector, $nr_sector, $error, $rwbs, $cmd); +@@ -1485,10 +1485,10 @@ $out .= sprintf "address=0x%08x, ", $add + + # MCE mce_record errors + if ($has_mce == 1) { +- $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, ppin, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record order by id"; ++ $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, ppin, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, microcode, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +- $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); ++ $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $microcode, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); + $out = ""; + while($query_handle->fetch()) { + $out .= "$id $time error: $msg"; +@@ -1514,6 +1514,7 @@ $out .= sprintf ", apicid=0x%08x", $apic + $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid); + $out .= sprintf ", cs=0x%08x", $cs if ($cs); + $out .= sprintf ", bank=0x%08x", $bank if ($bank); ++ $out .= sprintf ", microcode=0x%08x", $microcode if ($microcode); + + $out .= "\n"; + } diff --git a/ad0444190e02bca309a61a4bad51bc0e16c0aef5.patch b/ad0444190e02bca309a61a4bad51bc0e16c0aef5.patch new file mode 100644 index 0000000..d289ea4 --- /dev/null +++ b/ad0444190e02bca309a61a4bad51bc0e16c0aef5.patch @@ -0,0 +1,134 @@ +commit ad0444190e02bca309a61a4bad51bc0e16c0aef5 +Author: Avadhut Naik +Date: Fri May 10 13:20:19 2024 -0500 + + rasdaemon: Update SMCA bank error descriptions + + Update error descriptions of SMCA bank types to support AMD's new Family + 1Ah-based processors. + Also, modify some existing error descriptions to better reflect the error + received. + + Signed-off-by: Avadhut Naik + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 6632663..a55e013 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -108,7 +108,7 @@ static const char * const smca_ls_mce_desc[] = { + "Store queue parity", + "Miss address buffer payload parity", + "L1 TLB parity", +- "Reserved", ++ "DC Tag error type 5", + "DC tag error type 6", + "DC tag error type 1", + "Internal error type 1", +@@ -125,6 +125,12 @@ static const char * const smca_ls_mce_desc[] = { + "DC tag error type 3", + "DC tag error type 5", + "L2 fill data error", ++ "Error on SCB cacheline state or address field", ++ "Error on SCB data, commit pipe 0", ++ "Error on SCB data, commit pipe 1", ++ "Error on SCB data for non-cacheable DRAM or IO", ++ "System Read Data Error detected by write combine buffer", ++ "Hardware Asserts", + }; + + static const char * const smca_ls2_mce_desc[] = { +@@ -168,7 +174,7 @@ static const char * const smca_if_mce_desc[] = { + "BP L1-BTB Multi-Hit Error", + "BP L2-BTB Multi-Hit Error", + "L2 Cache Response Poison error", +- "L2 Cache Error Response", ++ "System Read Data error", + "Hardware Assertion Error", + "L1-TLB Multi-Hit", + "L2-TLB Multi-Hit", +@@ -182,6 +188,7 @@ static const char * const smca_l2_mce_desc[] = { + "L2M Data Array ECC Error", + "Hardware Assert Error", + "SDP Read Response Parity Error", ++ "Error initiated by programmable state machine", + }; + + static const char * const smca_de_mce_desc[] = { +@@ -193,7 +200,7 @@ static const char * const smca_de_mce_desc[] = { + "Fetch address FIFO parity error", + "Patch RAM data parity error", + "Patch RAM sequencer parity error", +- "Micro-op buffer parity error", ++ "Micro-op fetch queue parity error", + "Hardware Assertion MCA Error", + }; + +@@ -235,6 +242,7 @@ static const char * const smca_l3_mce_desc[] = { + "L3 victim queue Data Fabric error", + "L3 Hardware Assertion", + "XI WCB Parity Poison Creation event", ++ "Machine check error initiated by DSM action", + }; + + static const char * const smca_cs_mce_desc[] = { +@@ -268,6 +276,9 @@ static const char * const smca_cs2_mce_desc[] = { + "Address Violation on the no data channel", + "Security Violation on the no data channel", + "Hardware Assert Error", ++ "Shadow Tag Array Protocol Error", ++ "Shadow Tag ECC Error", ++ "Shadow Tag Transaction Error", + }; + + /* +@@ -303,6 +314,8 @@ static const char * const smca_pie_mce_desc[] = { + "A deferred error was detected in the DF", + "Watch Dog Timer", + "An SRAM ECC error was detected in the CNLI block", ++ "Register access during DF Cstate", ++ "DSM Error", + }; + + static const char * const smca_umc_mce_desc[] = { +@@ -318,6 +331,11 @@ static const char * const smca_umc_mce_desc[] = { + "ECS Error", + "UMC Throttling Error", + "Read CRC Error", ++ "Reserved", ++ "Reserved", ++ "Reserved", ++ "Reserved", ++ "RFM SRAM ECC error", + }; + + static const char * const smca_umc_quirk_mce_desc[] = { +@@ -391,6 +409,12 @@ static const char * const smca_psp2_mce_desc[] = { + "TLB Bank 0 parity error", + "TLB Bank 1 parity error", + "System Hub Read Buffer ECC or parity error", ++ "FUSE IP SRAM ECC or parity error", ++ "PCRU FUSE SRAM ECC or parity error", ++ "SIB SRAM parity error", ++ "mpASP SECEMC Error", ++ "mpASP A5 Hang", ++ "SIB WDT error", + }; + + static const char * const smca_smu_mce_desc[] = { +@@ -431,6 +455,7 @@ static const char * const smca_mp5_mce_desc[] = { + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", ++ "Fuse SRAM ECC or parity error", + }; + + static const char * const smca_mpdma_mce_desc[] = { +@@ -483,6 +508,7 @@ static const char * const smca_mpdma_mce_desc[] = { + "MPDMA PTE Internal Data FIFO ECC or parity error", + "MPDMA PTE Command Memory DMA ECC or parity error", + "MPDMA PTE Command Memory Internal ECC or parity error", ++ "MPDMA TVF SDP Master Memory 7 ECC or parity error", + }; + + static const char * const smca_nbio_mce_desc[] = { diff --git a/b1ace39286e287282a275b6edc90dc2f64e60a3c.patch b/b1ace39286e287282a275b6edc90dc2f64e60a3c.patch new file mode 100644 index 0000000..0f5280f --- /dev/null +++ b/b1ace39286e287282a275b6edc90dc2f64e60a3c.patch @@ -0,0 +1,56 @@ +commit b1ace39286e287282a275b6edc90dc2f64e60a3c +Author: Avadhut Naik +Date: Mon Mar 25 23:06:08 2024 -0500 + + rasdaemon: ras-mc-ctl: Add support to display mcastatus_msg string + + Currently, the mcastatus_msg string of struct mce_event is added to the + SQLite database by the rasdaemon when it is recording errors. The same + however, is not outputted by the ras-mc-ctl utility. + + The string provides important error information relating to the received + MCE. For example, on AMD SMCA systems, the string outputs extended error + code and description. As such, the string should be present in the + output of ras-mc-ctl utility. + + Add support to output the string through the ras-mc-ctl utility. + + Signed-off-by: Avadhut Naik + Signed-off-by: Mauro Carvalho Chehab + +--- + util/ras-mc-ctl.in | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- rasdaemon-0.6.7.orig/util/ras-mc-ctl.in 2024-08-19 15:08:29.246429487 -0400 ++++ rasdaemon-0.6.7/util/ras-mc-ctl.in 2024-08-19 15:10:55.478162148 -0400 +@@ -1317,7 +1317,7 @@ sub errors + { + require DBI; + my ($query, $query_handle, $id, $time, $devname, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); +- my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); ++ my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); + my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); + my ($bus_name, $dev_name, $driver_name, $reporter_name); + my ($dev, $sector, $nr_sector, $error, $rwbs, $cmd); +@@ -1485,10 +1485,10 @@ $out .= sprintf "address=0x%08x, ", $add + + # MCE mce_record errors + if ($has_mce == 1) { +- $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; ++ $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +- $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); ++ $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); + $out = ""; + while($query_handle->fetch()) { + $out .= "$id $time error: $msg"; +@@ -1496,6 +1496,7 @@ $out .= sprintf "address=0x%08x, ", $add + $out .= ", bank $bank_name" if ($bank_name); + $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); + $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); ++ $out .= ", mca $mcastatus_msg" if ($mcastatus_msg); + $out .= ", $mc_location" if ($mc_location); + $out .= ", $user_action" if ($user_action); + $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); diff --git a/rasdaemon.spec b/rasdaemon.spec index 07eecb5..470c385 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.7 -Release: 15%{?dist} +Release: 16%{?dist} Summary: Utility to receive RAS error tracings License: GPL-2.0-only URL: http://git.infradead.org/users/mchehab/rasdaemon.git @@ -39,6 +39,10 @@ Patch30: 885e546add918457c453bd3f753ac7df90b39e36.patch Patch31: 7ed2da7aedf8bc8ad4c4efe7acbda60ba061be6e.patch Patch32: ced615cf8146f51b5d6fe7a29107a2adc77407ca.patch Patch33: 73d8177ce0d2fcb7693cacee4778d0845ebd3788.patch +Patch34: ad0444190e02bca309a61a4bad51bc0e16c0aef5.patch +Patch35: b1ace39286e287282a275b6edc90dc2f64e60a3c.patch +Patch36: 045ab08eaa00172d50621df9502f6910f3fe3af4.patch +Patch37: 79065939fc4bc1da72a3718937fab80e73a6dd75.patch ExcludeArch: s390 s390x BuildRequires: make @@ -107,6 +111,10 @@ an utility for reporting current error counts from the EDAC sysfs files. %patch31 -p1 %patch32 -p1 %patch33 -p1 +%patch34 -p1 +%patch35 -p1 +%patch36 -p1 +%patch37 -p1 # The tarball is locked in time the first time aclocal was ran and will keep # requiring an older version of automake @@ -142,7 +150,13 @@ sed -i "s/^PAGE_CE_ACTION=.*/PAGE_CE_ACTION=account/" %{buildroot}/%{_sysconfdir %{_sysconfdir}/sysconfig/rasdaemon %changelog -* Thu Jul 18 2024 Aristeu Rozanski 0.6.7-14 +* Thu Sep 05 2024 Aristeu Rozanski 0.6.7-16 +- rasdaemon: Add support to parse the PPIN field of mce tracepoint [RHEL-52911] +- rasdaemon: Add support to parse microcode field of mce tracepoint [RHEL-52911] +- rasdaemon: Update SMCA bank error descriptions [RHEL-52911] +- rasdaemon: ras-mc-ctl: Add support to display mcastatus_msg string [RHEL-52911] + +* Thu Jul 18 2024 Aristeu Rozanski 0.6.7-15 - rasdaemon: mce-amd-smca: Optimizing decoding of MCA_CTL_SMU bits [RHEL-48819] * Fri Jun 28 2024 Aristeu Rozanski 0.6.7-13