117 lines
4.0 KiB
Diff
117 lines
4.0 KiB
Diff
commit 81b362f0412eb9769098c2f4317b84b9bd82cce9
|
|
Author: Shiju Jose <shiju.jose@huawei.com>
|
|
Date: Mon Feb 12 10:35:25 2024 +0000
|
|
|
|
rasdaemon: ras-mc-ctl: Add support for CXL AER correctable trace events
|
|
|
|
Add support for CXL AER correctable events to the ras-mc-ctl tool.
|
|
|
|
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
|
|
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
|
|
(cherry picked from commit ae1647624486fca0070b297d0e2fd4e53443c10b)
|
|
|
|
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
|
|
index c0a2ec6..9519279 100755
|
|
--- a/util/ras-mc-ctl.in
|
|
+++ b/util/ras-mc-ctl.in
|
|
@@ -1230,6 +1230,46 @@ sub get_cxl_ue_error_status_text
|
|
return join (", ", @out);
|
|
}
|
|
|
|
+use constant {
|
|
+ CXL_AER_CE_CACHE_DATA_ECC => 0x0001,
|
|
+ CXL_AER_CE_MEM_DATA_ECC => 0x0002,
|
|
+ CXL_AER_CE_CRC_THRESH => 0x0004,
|
|
+ CXL_AER_CE_RETRY_THRESH => 0x0008,
|
|
+ CXL_AER_CE_CACHE_POISON => 0x0010,
|
|
+ CXL_AER_CE_MEM_POISON => 0x0020,
|
|
+ CXL_AER_CE_PHYS_LAYER_ERR => 0x0040,
|
|
+};
|
|
+
|
|
+sub get_cxl_ce_error_status_text
|
|
+{
|
|
+ my $error_status = $_[0];
|
|
+ my @out;
|
|
+
|
|
+ if ($error_status & CXL_AER_CE_CACHE_DATA_ECC) {
|
|
+ push @out, (sprintf "\'Cache Data ECC Error\' ");
|
|
+ }
|
|
+ if ($error_status & CXL_AER_CE_MEM_DATA_ECC) {
|
|
+ push @out, (sprintf "\'Memory Data ECC Error\' ");
|
|
+ }
|
|
+ if ($error_status & CXL_AER_CE_CRC_THRESH) {
|
|
+ push @out, (sprintf "\'CRC Threshold Hit\' ");
|
|
+ }
|
|
+ if ($error_status & CXL_AER_CE_RETRY_THRESH) {
|
|
+ push @out, (sprintf "\'Retry Threshold\' ");
|
|
+ }
|
|
+ if ($error_status & CXL_AER_CE_CACHE_POISON) {
|
|
+ push @out, (sprintf "\'Received Cache Poison From Peer\' ");
|
|
+ }
|
|
+ if ($error_status & CXL_AER_CE_MEM_POISON) {
|
|
+ push @out, (sprintf "\'Received Memory Poison From Peer\' ");
|
|
+ }
|
|
+ if ($error_status & CXL_AER_CE_PHYS_LAYER_ERR) {
|
|
+ push @out, (sprintf "\'Received Error From Physical Layer\' ");
|
|
+ }
|
|
+
|
|
+ return join (", ", @out);
|
|
+}
|
|
+
|
|
sub summary
|
|
{
|
|
require DBI;
|
|
@@ -1310,6 +1350,22 @@ sub summary
|
|
print "No CXL AER uncorrectable errors.\n\n";
|
|
}
|
|
$query_handle->finish;
|
|
+
|
|
+ # CXL AER correctable errors
|
|
+ $query = "select memdev, count(*) from cxl_aer_ce_event$conf{opt}{since} group by memdev";
|
|
+ $query_handle = $dbh->prepare($query);
|
|
+ $query_handle->execute();
|
|
+ $query_handle->bind_columns(\($memdev, $count));
|
|
+ $out = "";
|
|
+ while($query_handle->fetch()) {
|
|
+ $out .= "\t$memdev errors: $count\n";
|
|
+ }
|
|
+ if ($out ne "") {
|
|
+ print "CXL AER correctable events summary:\n$out\n";
|
|
+ } else {
|
|
+ print "No CXL AER correctable errors.\n\n";
|
|
+ }
|
|
+ $query_handle->finish;
|
|
}
|
|
|
|
# extlog errors
|
|
@@ -1519,6 +1575,29 @@ sub errors
|
|
print "No CXL AER uncorrectable errors.\n\n";
|
|
}
|
|
$query_handle->finish;
|
|
+
|
|
+ # CXL AER correctable errors
|
|
+ $query = "select id, timestamp, memdev, host, serial, error_status from cxl_aer_ce_event$conf{opt}{since} order by id";
|
|
+ $query_handle = $dbh->prepare($query);
|
|
+ $query_handle->execute();
|
|
+ $query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $error_status));
|
|
+ $out = "";
|
|
+ while($query_handle->fetch()) {
|
|
+ $out .= "$id $timestamp error: ";
|
|
+ $out .= "memdev=$memdev, " if (defined $memdev && length $memdev);
|
|
+ $out .= "host=$host, " if (defined $host && length $host);
|
|
+ $out .= sprintf "serial=0x%llx, ", $serial if (defined $serial && length $serial);
|
|
+ if (defined $error_status && length $error_status) {
|
|
+ $out .= sprintf "error_status: %s, ", get_cxl_ce_error_status_text($error_status);
|
|
+ }
|
|
+ $out .= "\n";
|
|
+ }
|
|
+ if ($out ne "") {
|
|
+ print "CXL AER correctable events:\n$out\n";
|
|
+ } else {
|
|
+ print "No CXL AER correctable errors.\n\n";
|
|
+ }
|
|
+ $query_handle->finish;
|
|
}
|
|
|
|
# Extlog errors
|