import rasdaemon-0.6.7-5.el9_0

This commit is contained in:
CentOS Sources 2022-04-05 05:46:18 -04:00 committed by Stepan Oksanichenko
parent 1db359d99d
commit 52b5c87f51
23 changed files with 1813 additions and 30 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
SOURCES/rasdaemon-0.6.4.tar.bz2
SOURCES/rasdaemon-0.6.7.tar.bz2

View File

@ -1 +1 @@
2f3c6815ec9e8aa04439ff95dca0e29e77ba44fb SOURCES/rasdaemon-0.6.4.tar.bz2
8ae34f40b676a0843be6647854b950f45161e7d4 SOURCES/rasdaemon-0.6.7.tar.bz2

View File

@ -0,0 +1,32 @@
commit 1ff5f3d2a0fcd48add9462567c30fe0e14585fb4
Author: Matt Whitlock <whitslack@users.noreply.github.com>
Date: Wed Jun 9 10:25:18 2021 -0400
configure.ac: fix SYSCONFDEFDIR default value
configure.ac was using AC_ARG_WITH incorrectly, yielding a generated configure script like:
# Check whether --with-sysconfdefdir was given.
if test "${with_sysconfdefdir+set}" = set; then :
withval=$with_sysconfdefdir; SYSCONFDEFDIR=$withval
else
"/etc/sysconfig"
fi
This commit fixes the default case so that the SYSCONFDEFDIR variable is assigned the value "/etc/sysconfig" rather than trying to execute "/etc/sysconfig" as a command.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/configure.ac b/configure.ac
index f7d1947..33b81fe 100644
--- a/configure.ac
+++ b/configure.ac
@@ -172,7 +172,7 @@ AC_SUBST([RASSTATEDIR])
AC_ARG_WITH(sysconfdefdir,
AC_HELP_STRING([--with-sysconfdefdir=DIR], [rasdaemon environment file dir]),
[SYSCONFDEFDIR=$withval],
- ["/etc/sysconfig"])
+ [SYSCONFDEFDIR=/etc/sysconfig])
AC_SUBST([SYSCONFDEFDIR])
AC_DEFINE([RAS_DB_FNAME], ["ras-mc_event.db"], [ras events database])

View File

@ -0,0 +1,28 @@
commit 28ea956acc2dab7c18b4701f9657afb9ab3ddc79
Author: Muralidhara M K <muralimk@amd.com>
Date: Mon Jul 12 05:18:43 2021 -0500
rasdaemon: set SMCA maximum number of banks to 64
Newer AMD systems with SMCA banks support up to 64 MCA banks per CPU.
This patch is based on the commit below upstremed into the kernel:
a0bc32b3cacf ("x86/mce: Increase maximum number of banks to 64")
Signed-off-by: Muralidhara M K <muralimk@amd.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/mce-amd-smca.c b/mce-amd-smca.c
index e0cf512..3c346f4 100644
--- a/mce-amd-smca.c
+++ b/mce-amd-smca.c
@@ -75,6 +75,9 @@ enum smca_bank_types {
N_SMCA_BANK_TYPES
};
+/* Maximum number of MCA banks per CPU. */
+#define MAX_NR_BANKS 64
+
/* SMCA Extended error strings */
/* Load Store */
static const char * const smca_ls_mce_desc[] = {

View File

@ -0,0 +1,63 @@
commit 2b37a26dcec389723f75d69d3da9c2f15f6c317d
Author: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed May 26 12:41:27 2021 +0200
ci.yml: Fix the job for it to run on a single arch
There were some issues on the previous content. Fix them, in
order to allow it to build on a single architecture.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5b3e757..747a844 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,34 +1,23 @@
name: CI
-# Should run only on branches and PR, as "on_tag.yml" will handle tags
on:
+ workflow_dispatch:
push:
- branches: master test
pull_request:
- branches: master
jobs:
-
-#
-# Linux
-#
Ubuntu:
name: Ubuntu
- runs-on: ubuntu-20.04
- strategy:
- matrix:
- arch: [x64_64, aarch64, armv7, ppc64le]
+ runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
- with:
- arch: ${{ matrix.arch }}
- - name: prepare
- run: |
- sudo apt-get update
- sudo apt-get install -y build-essential sqlite3
- - name: build
- run: |
- autoreconf -vfi
- ./configure --enable-all
- make
- sudo make install
+ - uses: actions/checkout@v2
+ - name: prepare
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y build-essential sqlite3
+ - name: build
+ run: |
+ autoreconf -vfi
+ ./configure --enable-all
+ make
+ sudo make install

View File

@ -0,0 +1,44 @@
commit 2b6a54b0d31e02e657171fd27f4e31d996756bc6
Author: DmNosachev <quartz64@gmail.com>
Date: Thu Jul 22 10:25:38 2021 +0300
labels/supermicro: added Supermicro X10DRL, X11SPM
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/labels/supermicro b/labels/supermicro
index 1e7761f..990fc9e 100644
--- a/labels/supermicro
+++ b/labels/supermicro
@@ -88,6 +88,16 @@ Vendor: Supermicro
P2-DIMMF1: 1.1.0; P2-DIMMF2: 1.1.1;
P2-DIMMG1: 1.2.0; P2-DIMMG2: 1.2.1;
P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1;
+
+ Model: X10DRL-i
+ P1-DIMMA1: 0.0.0;
+ P1-DIMMB1: 0.1.0;
+ P1-DIMMC1: 0.2.0;
+ P1-DIMMD1: 0.3.0;
+ P2-DIMME1: 1.0.0;
+ P2-DIMMF1: 1.1.0;
+ P2-DIMMG1: 1.2.0;
+ P2-DIMMH1: 1.3.0;
Model: X11DDW-NT, X11DDW-L
P1-DIMMA1: 0.0.0;
@@ -102,6 +112,14 @@ Vendor: Supermicro
P2-DIMMD1: 3.0.0;
P2-DIMME1: 3.1.0;
P2-DIMMF1: 3.2.0;
+
+ Model: X11SPM-F, X11SPM-TF, X11SPM-TPF
+ DIMMA1: 0.0.0;
+ DIMMB1: 0.1.0;
+ DIMMC1: 0.2.0;
+ DIMMD1: 1.0.0;
+ DIMME1: 1.1.0;
+ DIMMF1: 1.2.0;
Model: B1DRi
P1_DIMMA1: 0.0.0;

View File

@ -0,0 +1,43 @@
commit 50565005b10fe909c66f1c90f2feb95712427c7d
Author: DmNosachev <quartz64@gmail.com>
Date: Tue Jun 29 14:07:54 2021 +0300
labels/supermicro: added Supermicro X11DDW-NT(-L)
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/labels/supermicro b/labels/supermicro
index 86e4617..373de07 100644
--- a/labels/supermicro
+++ b/labels/supermicro
@@ -69,7 +69,7 @@ Vendor: Supermicro
P2_DIMM4B: 2.0.1;
P2_DIMM4B: 2.1.1;
- Model: X11DPH-i
+ Model: X11DPH-i, X11DPH-T, X11DPH-TQ
P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1;
P1-DIMMB1: 0.1.0;
P1-DIMMC1: 0.2.0;
@@ -91,4 +91,18 @@ Vendor: Supermicro
P2-DIMME1: 1.0.0; P2-DIMME2: 1.0.1;
P2-DIMMF1: 1.1.0; P2-DIMMF2: 1.1.1;
P2-DIMMG1: 1.2.0; P2-DIMMG2: 1.2.1;
- P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1;
\ No newline at end of file
+ P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1;
+
+ Model: X11DDW-NT, X11DDW-L
+ P1-DIMMA1: 0.0.0;
+ P1-DIMMB1: 0.1.0;
+ P1-DIMMC1: 0.2.0;
+ P1-DIMMD1: 1.0.0;
+ P1-DIMME1: 1.1.0;
+ P1-DIMMF1: 1.2.0;
+ P2-DIMMA1: 2.0.0;
+ P2-DIMMB1: 2.1.0;
+ P2-DIMMC1: 2.2.0;
+ P2-DIMMD1: 3.0.0;
+ P2-DIMME1: 3.1.0;
+ P2-DIMMF1: 3.2.0;
\ No newline at end of file

View File

@ -0,0 +1,37 @@
commit 6bc43db1b6b3d73805179c21d1dd5521e8dc0f74
Author: DmNosachev <quartz64@gmail.com>
Date: Fri Jul 2 13:13:46 2021 +0300
labels/supermicro: added Supermicro X11SCA(-F)
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/labels/supermicro b/labels/supermicro
index b924a32..1e7761f 100644
--- a/labels/supermicro
+++ b/labels/supermicro
@@ -10,11 +10,7 @@
#
Vendor: Supermicro
- Model: A2SDi-8C-HLN4F
- DIMMA1: 0.0.0; DIMMA2: 0.0.1;
- DIMMB1: 0.1.0; DIMMB2: 0.1.1;
-
- Model: A2SDi-8C+-HLN4F
+ Model: A2SDi-8C-HLN4F, A2SDi-8C+-HLN4F
DIMMA1: 0.0.0; DIMMA2: 0.0.1;
DIMMB1: 0.1.0; DIMMB2: 0.1.1;
@@ -115,4 +111,8 @@ Vendor: Supermicro
P2_DIMME1: 1.0.0;
P2_DIMMF1: 1.1.0;
P2_DIMMG1: 1.2.0;
- P2_DIMMH1: 1.3.0;
\ No newline at end of file
+ P2_DIMMH1: 1.3.0;
+
+ Model: X11SCA, X11SCA-F
+ DIMMA1: 0.0.0, 0.1.0; DIMMA2: 0.2.0, 0.3.0;
+ DIMMB1: 0.0.1, 0.1.1; DIMMB2: 0.2.1, 0.3.1;
\ No newline at end of file

View File

@ -0,0 +1,610 @@
commit 738bafafdcb2e8b0ced32fff31b13754d571090b
Author: Jason Tian <jason@os.amperecomputing.com>
Date: Fri May 28 11:35:43 2021 +0800
Add error handling for Ampere-specific errors.
Save Ampere-specific errors' decode into sqlite3 data
base and log PCIe segment, bus/device/function number
into BMC SEL.
Signed-off-by: Jason Tian <jason@os.amperecomputing.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/non-standard-ampere.c b/non-standard-ampere.c
index 8cceb26..05b5252 100644
--- a/non-standard-ampere.c
+++ b/non-standard-ampere.c
@@ -216,6 +216,13 @@ static const char * const err_bert_sub_type[] = {
"PMPRO Fatal",
};
+static char *sqlite3_table_list[] = {
+ "amp_payload0_event_tab",
+ "amp_payload1_event_tab",
+ "amp_payload2_event_tab",
+ "amp_payload3_event_tab",
+};
+
struct amp_ras_type_info {
int id;
const char *name;
@@ -352,6 +359,359 @@ static const char *oem_subtype_name(const struct amp_ras_type_info *info,
return "unknown";
}
+#ifdef HAVE_SQLITE3
+/*key pair definition for ampere specific error payload type 0*/
+static const struct db_fields amp_payload0_event_fields[] = {
+ { .name = "id", .type = "INTEGER PRIMARY KEY" },
+ { .name = "timestamp", .type = "TEXT" },
+ { .name = "type", .type = "TEXT" },
+ { .name = "subtype", .type = "TEXT" },
+ { .name = "instance", .type = "INTEGER" },
+ { .name = "socket_num", .type = "INTEGER" },
+ { .name = "status_reg", .type = "INTEGER" },
+ { .name = "addr_reg", .type = "INTEGER" },
+ { .name = "misc0", .type = "INTEGER" },
+ { .name = "misc1", .type = "INTEGER" },
+ { .name = "misc2", .type = "INTEGER" },
+ { .name = "misc3", .type = "INTEGER" },
+};
+
+static const struct db_table_descriptor amp_payload0_event_tab = {
+ .name = "amp_payload0_event",
+ .fields = amp_payload0_event_fields,
+ .num_fields = ARRAY_SIZE(amp_payload0_event_fields),
+};
+
+/*key pair definition for ampere specific error payload type 1*/
+static const struct db_fields amp_payload1_event_fields[] = {
+ { .name = "id", .type = "INTEGER PRIMARY KEY" },
+ { .name = "timestamp", .type = "TEXT" },
+ { .name = "type", .type = "TEXT" },
+ { .name = "subtype", .type = "TEXT" },
+ { .name = "instance", .type = "INTEGER" },
+ { .name = "socket_num", .type = "INTEGER" },
+ { .name = "uncore_err_status", .type = "INTEGER" },
+ { .name = "uncore_err_mask", .type = "INTEGER" },
+ { .name = "uncore_err_sev", .type = "INTEGER" },
+ { .name = "core_err_status", .type = "INTEGER" },
+ { .name = "core_err_mask", .type = "INTEGER" },
+ { .name = "root_err_cmd", .type = "INTEGER" },
+ { .name = "root_err_status", .type = "INTEGER" },
+ { .name = "src_id", .type = "INTEGER" },
+ { .name = "reserved1", .type = "INTEGER" },
+ { .name = "reserverd2", .type = "INTEGER" },
+};
+
+static const struct db_table_descriptor amp_payload1_event_tab = {
+ .name = "amp_payload1_event",
+ .fields = amp_payload1_event_fields,
+ .num_fields = ARRAY_SIZE(amp_payload1_event_fields),
+};
+
+/*key pair definition for ampere specific error payload type 2*/
+static const struct db_fields amp_payload2_event_fields[] = {
+ { .name = "id", .type = "INTEGER PRIMARY KEY" },
+ { .name = "timestamp", .type = "TEXT" },
+ { .name = "type", .type = "TEXT" },
+ { .name = "subtype", .type = "TEXT" },
+ { .name = "instance", .type = "INTEGER" },
+ { .name = "socket_num", .type = "INTEGER" },
+ { .name = "ce_report_reg", .type = "INTEGER" },
+ { .name = "ce_location", .type = "INTEGER" },
+ { .name = "ce_addr", .type = "INTEGER" },
+ { .name = "ue_report_reg", .type = "INTEGER" },
+ { .name = "ue_location", .type = "INTEGER" },
+ { .name = "ue_addr", .type = "INTEGER" },
+ { .name = "reserved1", .type = "INTEGER" },
+ { .name = "reserved2", .type = "INTEGER" },
+ { .name = "reserved2", .type = "INTEGER" },
+};
+
+static const struct db_table_descriptor amp_payload2_event_tab = {
+ .name = "amp_payload2_event",
+ .fields = amp_payload2_event_fields,
+ .num_fields = ARRAY_SIZE(amp_payload2_event_fields),
+};
+
+/*key pair definition for ampere specific error payload type 3*/
+static const struct db_fields amp_payload3_event_fields[] = {
+ { .name = "id", .type = "INTEGER PRIMARY KEY" },
+ { .name = "timestamp", .type = "TEXT" },
+ { .name = "type", .type = "TEXT" },
+ { .name = "subtype", .type = "TEXT" },
+ { .name = "instance", .type = "INTEGER" },
+ { .name = "socket_num", .type = "INTEGER" },
+ { .name = "fw_spec_data0", .type = "INTEGER" },
+ { .name = "fw_spec_data1", .type = "INTEGER" },
+ { .name = "fw_spec_data2", .type = "INTEGER" },
+ { .name = "fw_spec_data3", .type = "INTEGER" },
+ { .name = "fw_spec_data4", .type = "INTEGER" },
+ { .name = "fw_spec_data5", .type = "INTEGER" },
+};
+
+static const struct db_table_descriptor amp_payload3_event_tab = {
+ .name = "amp_payload3_event",
+ .fields = amp_payload3_event_fields,
+ .num_fields = ARRAY_SIZE(amp_payload3_event_fields),
+};
+
+/*Save data with different type into sqlite3 db*/
+static void record_amp_data(struct ras_ns_ev_decoder *ev_decoder,
+ enum amp_oem_data_type data_type,
+ int id, int64_t data, const char *text)
+{
+ switch (data_type) {
+ case AMP_OEM_DATA_TYPE_INT:
+ sqlite3_bind_int(ev_decoder->stmt_dec_record, id, data);
+ break;
+ case AMP_OEM_DATA_TYPE_INT64:
+ sqlite3_bind_int64(ev_decoder->stmt_dec_record, id, data);
+ break;
+ case AMP_OEM_DATA_TYPE_TEXT:
+ sqlite3_bind_text(ev_decoder->stmt_dec_record, id,
+ text, -1, NULL);
+ break;
+ default:
+ break;
+ }
+}
+
+static int store_amp_err_data(struct ras_ns_ev_decoder *ev_decoder,
+ const char *name)
+{
+ int rc;
+
+ rc = sqlite3_step(ev_decoder->stmt_dec_record);
+ if (rc != SQLITE_OK && rc != SQLITE_DONE)
+ log(TERM, LOG_ERR,
+ "Failed to do %s step on sqlite: error = %d\n", name, rc);
+
+ rc = sqlite3_reset(ev_decoder->stmt_dec_record);
+ if (rc != SQLITE_OK && rc != SQLITE_DONE)
+ log(TERM, LOG_ERR,
+ "Failed to reset %s on sqlite: error = %d\n", name, rc);
+
+ rc = sqlite3_clear_bindings(ev_decoder->stmt_dec_record);
+ if (rc != SQLITE_OK && rc != SQLITE_DONE)
+ log(TERM, LOG_ERR,
+ "Failed to clear bindings %s on sqlite: error = %d\n",
+ name, rc);
+
+ return rc;
+}
+
+/*save all Ampere Specific Error Payload type 0 to sqlite3 database*/
+static void record_amp_payload0_err(struct ras_ns_ev_decoder *ev_decoder,
+ const char *type_str, const char *subtype_str,
+ const struct amp_payload0_type_sec *err)
+{
+ if (ev_decoder != NULL) {
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT,
+ AMP_PAYLOAD0_FIELD_TYPE, 0, type_str);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT,
+ AMP_PAYLOAD0_FIELD_SUB_TYPE, 0, subtype_str);
+
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD0_FIELD_INS, INSTANCE(err->instance), NULL);
+
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD0_FIELD_SOCKET_NUM,
+ SOCKET_NUM(err->instance), NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD0_FIELD_STATUS_REG, err->err_status, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD0_FIELD_ADDR_REG,
+ err->err_addr, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD0_FIELD_MISC0,
+ err->err_misc_0, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD0_FIELD_MISC1,
+ err->err_misc_1, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD0_FIELD_MISC2,
+ err->err_misc_2, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD0_FIELD_MISC3,
+ err->err_misc_3, NULL);
+ store_amp_err_data(ev_decoder, "amp_payload0_event_tab");
+ }
+}
+
+/*save all Ampere Specific Error Payload type 1 to sqlite3 database*/
+static void record_amp_payload1_err(struct ras_ns_ev_decoder *ev_decoder,
+ const char *type_str, const char *subtype_str,
+ const struct amp_payload1_type_sec *err)
+{
+ if (ev_decoder != NULL) {
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT,
+ AMP_PAYLOAD1_FIELD_TYPE, 0, type_str);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT,
+ AMP_PAYLOAD1_FIELD_SUB_TYPE, 0, subtype_str);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_INS,
+ INSTANCE(err->instance), NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_SOCKET_NUM,
+ SOCKET_NUM(err->instance), NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_UNCORE_ERR_STATUS,
+ err->uncore_status, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_UNCORE_ERR_MASK,
+ err->uncore_mask, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_UNCORE_ERR_SEV,
+ err->uncore_sev, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_CORE_ERR_STATUS,
+ err->core_status, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_CORE_ERR_MASK,
+ err->core_mask, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_ROOT_ERR_CMD,
+ err->root_err_cmd, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_ROOT_ERR_STATUS,
+ err->root_status, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_SRC_ID,
+ err->src_id, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD1_FIELD_RESERVED1,
+ err->reserved1, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD1_FIELD_RESERVED2,
+ err->reserved2, NULL);
+ store_amp_err_data(ev_decoder, "amp_payload1_event_tab");
+ }
+}
+
+/*save all Ampere Specific Error Payload type 2 to sqlite3 database*/
+static void record_amp_payload2_err(struct ras_ns_ev_decoder *ev_decoder,
+ const char *type_str, const char *subtype_str,
+ const struct amp_payload2_type_sec *err)
+{
+ if (ev_decoder != NULL) {
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT,
+ AMP_PAYLOAD2_FIELD_TYPE, 0, type_str);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT,
+ AMP_PAYLOAD2_FIELD_SUB_TYPE, 0, subtype_str);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD2_FIELD_INS, INSTANCE(err->instance), NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD2_FIELD_SOCKET_NUM,
+ SOCKET_NUM(err->instance), NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD2_FIELD_CE_REPORT_REG,
+ err->ce_register, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD2_FIELD_CE_LOACATION,
+ err->ce_location, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD2_FIELD_CE_ADDR,
+ err->ce_addr, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD2_FIELD_UE_REPORT_REG,
+ err->ue_register, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD2_FIELD_UE_LOCATION,
+ err->ue_location, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD2_FIELD_UE_ADDR,
+ err->ue_addr, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD2_FIELD_RESERVED1,
+ err->reserved1, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD2_FIELD_RESERVED2,
+ err->reserved2, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD2_FIELD_RESERVED3,
+ err->reserved3, NULL);
+ store_amp_err_data(ev_decoder, "amp_payload2_event_tab");
+ }
+}
+
+/*save all Ampere Specific Error Payload type 3 to sqlite3 database*/
+static void record_amp_payload3_err(struct ras_ns_ev_decoder *ev_decoder,
+ const char *type_str, const char *subtype_str,
+ const struct amp_payload3_type_sec *err)
+{
+ if (ev_decoder != NULL) {
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT,
+ AMP_PAYLOAD3_FIELD_TYPE, 0, type_str);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT,
+ AMP_PAYLOAD3_FIELD_SUB_TYPE, 0, subtype_str);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD3_FIELD_INS, INSTANCE(err->instance), NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD3_FIELD_SOCKET_NUM,
+ SOCKET_NUM(err->instance), NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA0,
+ err->fw_speci_data0, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA1,
+ err->fw_speci_data1, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA2,
+ err->fw_speci_data2, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA3,
+ err->fw_speci_data3, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA4,
+ err->fw_speci_data4, NULL);
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA5,
+ err->fw_speci_data5, NULL);
+ store_amp_err_data(ev_decoder, "amp_payload3_event_tab");
+ }
+}
+
+#else
+static void record_amp_data(struct ras_ns_ev_decoder *ev_decoder,
+ enum amp_oem_data_type data_type,
+ int id, int64_t data, const char *text)
+{
+ return 0;
+}
+
+static void record_amp_payload0_err(struct ras_ns_ev_decoder *ev_decoder,
+ const char *type_str, const char *subtype_str,
+ const struct amp_payload0_type_sec *err)
+{
+ return 0;
+}
+
+static void record_amp_payload1_err(struct ras_ns_ev_decoder *ev_decoder,
+ const char *type_str, const char *subtype_str,
+ const struct amp_payload1_type_sec *err)
+{
+ return 0;
+}
+
+static void record_amp_payload2_err(struct ras_ns_ev_decoder *ev_decoder,
+ const char *type_str, const char *subtype_str,
+ const struct amp_payload2_type_sec *err)
+{
+ return 0;
+}
+
+static void record_amp_payload3_err(struct ras_ns_ev_decoder *ev_decoder,
+ const char *type_str, const char *subtype_str,
+ const struct amp_payload3_type_sec *err)
+{
+ return 0;
+}
+
+static int store_amp_err_data(struct ras_ns_ev_decoder *ev_decoder, char *name)
+{
+ return 0;
+}
+#endif
/*decode ampere specific error payload type 0, the CPU's data is save*/
/*to sqlite by ras-arm-handler, others are saved by this function.*/
@@ -434,6 +794,7 @@ void decode_amp_payload0_err_regs(struct ras_ns_ev_decoder *ev_decoder,
*p = '\0';
}
+ record_amp_payload0_err(ev_decoder, type_str, subtype_str, err);
i = 0;
p = NULL;
end = NULL;
@@ -517,6 +878,7 @@ static void decode_amp_payload1_err_regs(struct ras_ns_ev_decoder *ev_decoder,
*p = '\0';
}
+ record_amp_payload1_err(ev_decoder, type_str, subtype_str, err);
i = 0;
p = NULL;
end = NULL;
@@ -601,6 +963,7 @@ static void decode_amp_payload2_err_regs(struct ras_ns_ev_decoder *ev_decoder,
*p = '\0';
}
+ record_amp_payload2_err(ev_decoder, type_str, subtype_str, err);
i = 0;
p = NULL;
end = NULL;
@@ -673,6 +1036,7 @@ static void decode_amp_payload3_err_regs(struct ras_ns_ev_decoder *ev_decoder,
*p = '\0';
}
+ record_amp_payload3_err(ev_decoder, type_str, subtype_str, err);
i = 0;
p = NULL;
end = NULL;
@@ -687,6 +1051,38 @@ static int decode_amp_oem_type_error(struct ras_events *ras,
{
int payload_type = PAYLOAD_TYPE(event->error[0]);
+#ifdef HAVE_SQLITE3
+ struct db_table_descriptor db_tab;
+ int id = 0;
+
+ if (payload_type == PAYLOAD_TYPE_0) {
+ db_tab = amp_payload0_event_tab;
+ id = AMP_PAYLOAD0_FIELD_TIMESTAMP;
+ } else if (payload_type == PAYLOAD_TYPE_1) {
+ db_tab = amp_payload1_event_tab;
+ id = AMP_PAYLOAD1_FIELD_TIMESTAMP;
+ } else if (payload_type == PAYLOAD_TYPE_2) {
+ db_tab = amp_payload2_event_tab;
+ id = AMP_PAYLOAD2_FIELD_TIMESTAMP;
+ } else if (payload_type == PAYLOAD_TYPE_3) {
+ db_tab = amp_payload3_event_tab;
+ id = AMP_PAYLOAD3_FIELD_TIMESTAMP;
+ } else
+ return -1;
+
+ if (!ev_decoder->stmt_dec_record) {
+ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record,
+ &db_tab) != SQLITE_OK) {
+ trace_seq_printf(s,
+ "create sql %s fail\n",
+ sqlite3_table_list[payload_type]);
+ return -1;
+ }
+ }
+ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT,
+ id, 0, event->timestamp);
+#endif
+
if (payload_type == PAYLOAD_TYPE_0) {
const struct amp_payload0_type_sec *err =
(struct amp_payload0_type_sec *)event->error;
diff --git a/non-standard-ampere.h b/non-standard-ampere.h
index aacf3a8..f463c53 100644
--- a/non-standard-ampere.h
+++ b/non-standard-ampere.h
@@ -102,6 +102,79 @@ struct amp_payload3_type_sec {
uint64_t fw_speci_data5;
};
+enum amp_oem_data_type {
+ AMP_OEM_DATA_TYPE_INT,
+ AMP_OEM_DATA_TYPE_INT64,
+ AMP_OEM_DATA_TYPE_TEXT,
+};
+
+enum {
+ AMP_PAYLOAD0_FIELD_ID,
+ AMP_PAYLOAD0_FIELD_TIMESTAMP,
+ AMP_PAYLOAD0_FIELD_TYPE,
+ AMP_PAYLOAD0_FIELD_SUB_TYPE,
+ AMP_PAYLOAD0_FIELD_INS,
+ AMP_PAYLOAD0_FIELD_SOCKET_NUM,
+ AMP_PAYLOAD0_FIELD_STATUS_REG,
+ AMP_PAYLOAD0_FIELD_ADDR_REG,
+ AMP_PAYLOAD0_FIELD_MISC0,
+ AMP_PAYLOAD0_FIELD_MISC1,
+ AMP_PAYLOAD0_FIELD_MISC2,
+ AMP_PAYLOAD0_FIELD_MISC3,
+};
+
+enum {
+ AMP_PAYLOAD1_FIELD_ID,
+ AMP_PAYLOAD1_FIELD_TIMESTAMP,
+ AMP_PAYLOAD1_FIELD_TYPE,
+ AMP_PAYLOAD1_FIELD_SUB_TYPE,
+ AMP_PAYLOAD1_FIELD_INS,
+ AMP_PAYLOAD1_FIELD_SOCKET_NUM,
+ AMP_PAYLOAD1_FIELD_UNCORE_ERR_STATUS,
+ AMP_PAYLOAD1_FIELD_UNCORE_ERR_MASK,
+ AMP_PAYLOAD1_FIELD_UNCORE_ERR_SEV,
+ AMP_PAYLOAD1_FIELD_CORE_ERR_STATUS,
+ AMP_PAYLOAD1_FIELD_CORE_ERR_MASK,
+ AMP_PAYLOAD1_FIELD_ROOT_ERR_CMD,
+ AMP_PAYLOAD1_FIELD_ROOT_ERR_STATUS,
+ AMP_PAYLOAD1_FIELD_SRC_ID,
+ AMP_PAYLOAD1_FIELD_RESERVED1,
+ AMP_PAYLOAD1_FIELD_RESERVED2,
+};
+
+enum {
+ AMP_PAYLOAD2_FIELD_ID,
+ AMP_PAYLOAD2_FIELD_TIMESTAMP,
+ AMP_PAYLOAD2_FIELD_TYPE,
+ AMP_PAYLOAD2_FIELD_SUB_TYPE,
+ AMP_PAYLOAD2_FIELD_INS,
+ AMP_PAYLOAD2_FIELD_SOCKET_NUM,
+ AMP_PAYLOAD2_FIELD_CE_REPORT_REG,
+ AMP_PAYLOAD2_FIELD_CE_LOACATION,
+ AMP_PAYLOAD2_FIELD_CE_ADDR,
+ AMP_PAYLOAD2_FIELD_UE_REPORT_REG,
+ AMP_PAYLOAD2_FIELD_UE_LOCATION,
+ AMP_PAYLOAD2_FIELD_UE_ADDR,
+ AMP_PAYLOAD2_FIELD_RESERVED1,
+ AMP_PAYLOAD2_FIELD_RESERVED2,
+ AMP_PAYLOAD2_FIELD_RESERVED3,
+};
+
+enum {
+ AMP_PAYLOAD3_FIELD_ID,
+ AMP_PAYLOAD3_FIELD_TIMESTAMP,
+ AMP_PAYLOAD3_FIELD_TYPE,
+ AMP_PAYLOAD3_FIELD_SUB_TYPE,
+ AMP_PAYLOAD3_FIELD_INS,
+ AMP_PAYLOAD3_FIELD_SOCKET_NUM,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA0,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA1,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA2,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA3,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA4,
+ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA5
+};
+
void decode_amp_payload0_err_regs(struct ras_ns_ev_decoder *ev_decoder,
struct trace_seq *s,
const struct amp_payload0_type_sec *err);
diff --git a/ras-aer-handler.c b/ras-aer-handler.c
index 8ddd439..6f4cb2b 100644
--- a/ras-aer-handler.c
+++ b/ras-aer-handler.c
@@ -67,6 +67,9 @@ int ras_aer_event_handler(struct trace_seq *s,
struct tm *tm;
struct ras_aer_event ev;
char buf[BUF_LEN];
+ char ipmi_add_sel[105];
+ uint8_t sel_data[5];
+ int seg, bus, dev, fn;
/*
* Newer kernels (3.10-rc1 or upper) provide an uptime clock.
@@ -129,15 +132,19 @@ int ras_aer_event_handler(struct trace_seq *s,
switch (severity_val) {
case HW_EVENT_AER_UNCORRECTED_NON_FATAL:
ev.error_type = "Uncorrected (Non-Fatal)";
+ sel_data[0] = 0xca;
break;
case HW_EVENT_AER_UNCORRECTED_FATAL:
ev.error_type = "Uncorrected (Fatal)";
+ sel_data[0] = 0xca;
break;
case HW_EVENT_AER_CORRECTED:
ev.error_type = "Corrected";
+ sel_data[0] = 0xbf;
break;
default:
ev.error_type = "Unknown severity";
+ sel_data[0] = 0xbf;
}
trace_seq_puts(s, ev.error_type);
@@ -151,5 +158,29 @@ int ras_aer_event_handler(struct trace_seq *s,
ras_report_aer_event(ras, &ev);
#endif
+#ifdef HAVE_AMP_NS_DECODE
+ /*
+ * Get PCIe AER error source seg/bus/dev/fn and save it into
+ * BMC OEM SEL, ipmitool raw 0x0a 0x44 is IPMI command-Add SEL
+ * entry, please refer IPMI specificaiton chapter 31.6. 0xcd3a
+ * is manufactuer ID(ampere),byte 12 is sensor num(CE is 0xBF,
+ * UE is 0xCA), byte 13~14 is segment number, byte 15 is bus
+ * number, byte 16[7:3] is device number, byte 16[2:0] is
+ * function number
+ */
+ sscanf(ev.dev_name, "%x:%x:%x.%x", &seg, &bus, &dev, &fn);
+
+ sel_data[1] = seg & 0xff;
+ sel_data[2] = (seg & 0xff00) >> 8;
+ sel_data[3] = bus;
+ sel_data[4] = (((dev & 0x1f) << 3) | (fn & 0x7));
+
+ sprintf(ipmi_add_sel,
+ "ipmitool raw 0x0a 0x44 0x00 0x00 0xc0 0x00 0x00 0x00 0x00 0x3a 0xcd 0x00 0xc0 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x",
+ sel_data[0], sel_data[1], sel_data[2], sel_data[3], sel_data[4]);
+
+ system(ipmi_add_sel);
+#endif
+
return 0;
}

View File

@ -0,0 +1,24 @@
commit 7937f0d6c2aaaed096f3a3d306416743c0dcb7a4
Author: Muralidhara M K <muralimk@amd.com>
Date: Wed Jul 28 01:52:12 2021 -0500
rasdaemon: Support MCE for AMD CPU family 19h
Add support for family 19h x86 CPUs from AMD.
Signed-off-by: Muralidhara M K <muralimk@amd.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/ras-mce-handler.c b/ras-mce-handler.c
index 805004a..f2b53d4 100644
--- a/ras-mce-handler.c
+++ b/ras-mce-handler.c
@@ -208,7 +208,7 @@ static int detect_cpu(struct ras_events *ras)
mce->cputype = CPU_AMD_SMCA;
goto ret;
}
- if (mce->family > 23) {
+ if (mce->family > 25) {
log(ALL, LOG_INFO,
"Can't parse MCE for this AMD CPU yet %d\n",
mce->family);

View File

@ -0,0 +1,26 @@
commit 7ccf12f5ae26a055926d175d908c7930293438c4
Author: DmNosachev <quartz64@gmail.com>
Date: Fri Jul 23 17:28:33 2021 +0300
labels/supermicro: added Supermicro X11SCW
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/labels/supermicro b/labels/supermicro
index 990fc9e..aea7c3c 100644
--- a/labels/supermicro
+++ b/labels/supermicro
@@ -133,4 +133,10 @@ Vendor: Supermicro
Model: X11SCA, X11SCA-F
DIMMA1: 0.0.0, 0.1.0; DIMMA2: 0.2.0, 0.3.0;
- DIMMB1: 0.0.1, 0.1.1; DIMMB2: 0.2.1, 0.3.1;
\ No newline at end of file
+ DIMMB1: 0.0.1, 0.1.1; DIMMB2: 0.2.1, 0.3.1;
+
+ Model: X11SCW-F
+ DIMMA1: 0.1.0;
+ DIMMA2: 0.0.0;
+ DIMMB1: 0.1.1;
+ DIMMB2: 0.0.1;
\ No newline at end of file

View File

@ -0,0 +1,40 @@
commit 9a5baed97b21af31064d9995ffcfaac0e9d7983e
Author: DmNosachev <quartz64@gmail.com>
Date: Tue Jun 29 13:37:48 2021 +0300
labels/supermicro: supermicro db syntax
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/labels/supermicro b/labels/supermicro
index bfaed93..47ea05f 100644
--- a/labels/supermicro
+++ b/labels/supermicro
@@ -18,17 +18,17 @@ Vendor: Supermicro
DIMMA1: 0.0.0; DIMMA2: 0.0.1;
DIMMB1: 0.1.0; DIMMB2: 0.1.1;
- Product: X10SRA-F
- DIMMA1: 0.0.0
- DIMMA2: 0.0.1
- DIMMB1: 0.1.0
- DIMMB2: 0.1.1
- DIMMC1: 1.0.0
- DIMMC2: 1.0.1
- DIMMD1: 1.1.0
- DIMMD2: 1.1.1
+ Model: X10SRA-F
+ DIMMA1: 0.0.0;
+ DIMMA2: 0.0.1;
+ DIMMB1: 0.1.0;
+ DIMMB2: 0.1.1;
+ DIMMC1: 1.0.0;
+ DIMMC2: 1.0.1;
+ DIMMD1: 1.1.0;
+ DIMMD2: 1.1.1;
- Product: H8DGU
+ Model: H8DGU
P1_DIMM1A: 0.2.0;
P1_DIMM1A: 0.3.0;
P2_DIMM1A: 3.2.0;

View File

@ -0,0 +1,230 @@
commit 9acef39f13833f7d53ef96abc5a72e79384260f4
Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
Date: Tue Jun 1 11:01:17 2021 +0530
rasdaemon: Add new SMCA bank types with error decoding
Upcoming systems with Scalable Machine Check Architecture (SMCA) have
new MCA banks added.
This patch adds the (HWID, MCATYPE) tuple, name and error decoding for
those new SMCA banks.
While at it, optimize the string names in smca_bank_name[].
Signed-off-by: Muralidhara M K <muralimk@amd.com>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/mce-amd-smca.c b/mce-amd-smca.c
index 7c619fd..e0cf512 100644
--- a/mce-amd-smca.c
+++ b/mce-amd-smca.c
@@ -47,7 +47,7 @@
/* These may be used by multiple smca_hwid_mcatypes */
enum smca_bank_types {
SMCA_LS = 0, /* Load Store */
- SMCA_LS_V2, /* Load Store */
+ SMCA_LS_V2,
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */
@@ -56,17 +56,22 @@ enum smca_bank_types {
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
SMCA_CS, /* Coherent Slave */
- SMCA_CS_V2, /* Coherent Slave V2 */
+ SMCA_CS_V2,
SMCA_PIE, /* Power, Interrupts, etc. */
SMCA_UMC, /* Unified Memory Controller */
+ SMCA_UMC_V2,
SMCA_PB, /* Parameter Block */
SMCA_PSP, /* Platform Security Processor */
- SMCA_PSP_V2, /* Platform Security Processor V2 */
+ SMCA_PSP_V2,
SMCA_SMU, /* System Management Unit */
- SMCA_SMU_V2, /* System Management Unit V2 */
+ SMCA_SMU_V2,
SMCA_MP5, /* Microprocessor 5 Unit */
SMCA_NBIO, /* Northbridge IO Unit */
SMCA_PCIE, /* PCI Express Unit */
+ SMCA_PCIE_V2,
+ SMCA_XGMI_PCS, /* xGMI PCS Unit */
+ SMCA_XGMI_PHY, /* xGMI PHY Unit */
+ SMCA_WAFL_PHY, /* WAFL PHY Unit */
N_SMCA_BANK_TYPES
};
@@ -237,6 +242,22 @@ static const char * const smca_umc_mce_desc[] = {
"Command/address parity error",
"Write data CRC error",
};
+
+static const char * const smca_umc2_mce_desc[] = {
+ "DRAM ECC error",
+ "Data poison error",
+ "SDP parity error",
+ "Reserved",
+ "Address/Command parity error",
+ "Write data parity error",
+ "DCQ SRAM ECC error",
+ "Reserved",
+ "Read data parity error",
+ "Rdb SRAM ECC error",
+ "RdRsp SRAM ECC error",
+ "LM32 MP errors",
+};
+
/* Parameter Block */
static const char * const smca_pb_mce_desc[] = {
"Parameter Block RAM ECC error",
@@ -314,6 +335,55 @@ static const char * const smca_pcie_mce_desc[] = {
"CCIX Non-okay write response with data error",
};
+static const char * const smca_pcie2_mce_desc[] = {
+ "SDP Parity Error logging",
+};
+
+static const char * const smca_xgmipcs_mce_desc[] = {
+ "Data Loss Error",
+ "Training Error",
+ "Flow Control Acknowledge Error",
+ "Rx Fifo Underflow Error",
+ "Rx Fifo Overflow Error",
+ "CRC Error",
+ "BER Exceeded Error",
+ "Tx Vcid Data Error",
+ "Replay Buffer Parity Error",
+ "Data Parity Error",
+ "Replay Fifo Overflow Error",
+ "Replay Fifo Underflow Error",
+ "Elastic Fifo Overflow Error",
+ "Deskew Error",
+ "Flow Control CRC Error",
+ "Data Startup Limit Error",
+ "FC Init Timeout Error",
+ "Recovery Timeout Error",
+ "Ready Serial Timeout Error",
+ "Ready Serial Attempt Error",
+ "Recovery Attempt Error",
+ "Recovery Relock Attempt Error",
+ "Replay Attempt Error",
+ "Sync Header Error",
+ "Tx Replay Timeout Error",
+ "Rx Replay Timeout Error",
+ "LinkSub Tx Timeout Error",
+ "LinkSub Rx Timeout Error",
+ "Rx CMD Pocket Error",
+};
+
+static const char * const smca_xgmiphy_mce_desc[] = {
+ "RAM ECC Error",
+ "ARC instruction buffer parity error",
+ "ARC data buffer parity error",
+ "PHY APB error",
+};
+
+static const char * const smca_waflphy_mce_desc[] = {
+ "RAM ECC Error",
+ "ARC instruction buffer parity error",
+ "ARC data buffer parity error",
+ "PHY APB error",
+};
struct smca_mce_desc {
const char * const *descs;
@@ -333,6 +403,7 @@ static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
+ [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) },
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
[SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
[SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)},
@@ -341,6 +412,10 @@ static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
[SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)},
[SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)},
+ [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) },
+ [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) },
+ [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
+ [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) },
};
struct smca_hwid {
@@ -369,6 +444,8 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
/* Unified Memory Controller MCA type */
{ SMCA_UMC, 0x00000096 },
+ /* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */
+ { SMCA_UMC_V2, 0x00010096 },
/* Parameter Block MCA type */
{ SMCA_PB, 0x00000005 },
@@ -389,6 +466,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
/* PCI Express Unit MCA type */
{ SMCA_PCIE, 0x00000046 },
+ { SMCA_PCIE_V2, 0x00010046 },
+
+ /* Ext Global Memory Interconnect PCS MCA type */
+ { SMCA_XGMI_PCS, 0x00000050 },
+
+ /* Ext Global Memory Interconnect PHY MCA type */
+ { SMCA_XGMI_PHY, 0x00000259 },
+
+ /* WAFL PHY MCA type */
+ { SMCA_WAFL_PHY, 0x00000267 },
};
struct smca_bank_name {
@@ -396,27 +483,28 @@ struct smca_bank_name {
};
static struct smca_bank_name smca_names[] = {
- [SMCA_LS] = { "Load Store Unit" },
- [SMCA_LS_V2] = { "Load Store Unit" },
- [SMCA_IF] = { "Instruction Fetch Unit" },
- [SMCA_L2_CACHE] = { "L2 Cache" },
- [SMCA_DE] = { "Decode Unit" },
- [SMCA_RESERVED] = { "Reserved" },
- [SMCA_EX] = { "Execution Unit" },
- [SMCA_FP] = { "Floating Point Unit" },
- [SMCA_L3_CACHE] = { "L3 Cache" },
- [SMCA_CS] = { "Coherent Slave" },
- [SMCA_CS_V2] = { "Coherent Slave" },
- [SMCA_PIE] = { "Power, Interrupts, etc." },
- [SMCA_UMC] = { "Unified Memory Controller" },
- [SMCA_PB] = { "Parameter Block" },
- [SMCA_PSP] = { "Platform Security Processor" },
- [SMCA_PSP_V2] = { "Platform Security Processor" },
- [SMCA_SMU] = { "System Management Unit" },
- [SMCA_SMU_V2] = { "System Management Unit" },
- [SMCA_MP5] = { "Microprocessor 5 Unit" },
- [SMCA_NBIO] = { "Northbridge IO Unit" },
- [SMCA_PCIE] = { "PCI Express Unit" },
+ [SMCA_LS ... SMCA_LS_V2] = { "Load Store Unit" },
+ [SMCA_IF] = { "Instruction Fetch Unit" },
+ [SMCA_L2_CACHE] = { "L2 Cache" },
+ [SMCA_DE] = { "Decode Unit" },
+ [SMCA_RESERVED] = { "Reserved" },
+ [SMCA_EX] = { "Execution Unit" },
+ [SMCA_FP] = { "Floating Point Unit" },
+ [SMCA_L3_CACHE] = { "L3 Cache" },
+ [SMCA_CS ... SMCA_CS_V2] = { "Coherent Slave" },
+ [SMCA_PIE] = { "Power, Interrupts, etc." },
+ [SMCA_UMC] = { "Unified Memory Controller" },
+ [SMCA_UMC_V2] = { "Unified Memory Controller V2" },
+ [SMCA_PB] = { "Parameter Block" },
+ [SMCA_PSP ... SMCA_PSP_V2] = { "Platform Security Processor" },
+ [SMCA_SMU ... SMCA_SMU_V2] = { "System Management Unit" },
+ [SMCA_MP5] = { "Microprocessor 5 Unit" },
+ [SMCA_NBIO] = { "Northbridge IO Unit" },
+ [SMCA_PCIE ... SMCA_PCIE_V2] = { "PCI Express Unit" },
+ [SMCA_XGMI_PCS] = { "Ext Global Memory Interconnect PCS Unit" },
+ [SMCA_XGMI_PHY] = { "Ext Global Memory Interconnect PHY Unit" },
+ [SMCA_WAFL_PHY] = { "WAFL PHY Unit" },
+
};
static void amd_decode_errcode(struct mce_event *e)

View File

@ -0,0 +1,107 @@
commit aecf33aa70331670c06db6b652712b476e24051c
Author: Muralidhara M K <muralimk@amd.com>
Date: Mon Jul 12 05:40:46 2021 -0500
rasdaemon: Enumerate memory on noncpu nodes
On newer heterogeneous systems from AMD with GPU nodes (with HBM2 memory
banks) connected via xGMI links to the CPUs.
The node id information is available in the InstanceHI[47:44] of
the IPID register.
The UMC Phys on Aldeberan nodes are enumerated as csrow
The UMC channels connected to HBMs are enumerated as ranks.
Signed-off-by: Muralidhara M K <muralimk@amd.com>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/mce-amd-smca.c b/mce-amd-smca.c
index 3c346f4..f3379fc 100644
--- a/mce-amd-smca.c
+++ b/mce-amd-smca.c
@@ -78,6 +78,12 @@ enum smca_bank_types {
/* Maximum number of MCA banks per CPU. */
#define MAX_NR_BANKS 64
+/*
+ * On Newer heterogeneous systems from AMD with CPU and GPU nodes connected
+ * via xGMI links, the NON CPU Nodes are enumerated from index 8
+ */
+#define NONCPU_NODE_INDEX 8
+
/* SMCA Extended error strings */
/* Load Store */
static const char * const smca_ls_mce_desc[] = {
@@ -531,6 +537,26 @@ static int find_umc_channel(struct mce_event *e)
{
return EXTRACT(e->ipid, 0, 31) >> 20;
}
+
+/*
+ * The HBM memory managed by the UMCCH of the noncpu node
+ * can be calculated based on the [15:12]bits of IPID
+ */
+static int find_hbm_channel(struct mce_event *e)
+{
+ int umc, tmp;
+
+ umc = EXTRACT(e->ipid, 0, 31) >> 20;
+
+ /*
+ * The HBM channel managed by the UMC of the noncpu node
+ * can be calculated based on the [15:12]bits of IPID as follows
+ */
+ tmp = ((e->ipid >> 12) & 0xf);
+
+ return (umc % 2) ? tmp + 4 : tmp;
+}
+
/* Decode extended errors according to Scalable MCA specification */
static void decode_smca_error(struct mce_event *e)
{
@@ -539,6 +565,7 @@ static void decode_smca_error(struct mce_event *e)
unsigned short xec = (e->status >> 16) & 0x3f;
const struct smca_hwid *s_hwid;
uint32_t mcatype_hwid = EXTRACT(e->ipid, 32, 63);
+ uint8_t mcatype_instancehi = EXTRACT(e->ipid, 44, 47);
unsigned int csrow = -1, channel = -1;
unsigned int i;
@@ -548,14 +575,16 @@ static void decode_smca_error(struct mce_event *e)
bank_type = s_hwid->bank_type;
break;
}
+ if (mcatype_instancehi >= NONCPU_NODE_INDEX)
+ bank_type = SMCA_UMC_V2;
}
- if (i >= ARRAY_SIZE(smca_hwid_mcatypes)) {
+ if (i >= MAX_NR_BANKS) {
strcpy(e->mcastatus_msg, "Couldn't find bank type with IPID");
return;
}
- if (bank_type >= N_SMCA_BANK_TYPES) {
+ if (bank_type >= MAX_NR_BANKS) {
strcpy(e->mcastatus_msg, "Don't know how to decode this bank");
return;
}
@@ -580,6 +609,16 @@ static void decode_smca_error(struct mce_event *e)
mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d",
channel, csrow);
}
+
+ if (bank_type == SMCA_UMC_V2 && xec == 0) {
+ /* The UMCPHY is reported as csrow in case of noncpu nodes */
+ csrow = find_umc_channel(e) / 2;
+ /* UMCCH is managing the HBM memory */
+ channel = find_hbm_channel(e);
+ mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d",
+ channel, csrow);
+ }
+
}
int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e)

View File

@ -0,0 +1,30 @@
commit b4402d36e1b42fb7b0d8ddccc83463a6e622dbc4
Author: DmNosachev <quartz64@gmail.com>
Date: Tue Jun 29 13:48:55 2021 +0300
labels/supermicro: added Supermicro X10DRI(-T)
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/labels/supermicro b/labels/supermicro
index 47ea05f..86e4617 100644
--- a/labels/supermicro
+++ b/labels/supermicro
@@ -81,4 +81,14 @@ Vendor: Supermicro
P2-DIMMC1: 2.2.0;
P2-DIMMD1: 3.0.0; P2-DIMMD2: 3.0.1;
P2-DIMME1: 3.1.0;
- P2-DIMMF1: 3.2.0;
\ No newline at end of file
+ P2-DIMMF1: 3.2.0;
+
+ Model: X10DRI, X10DRI-T
+ P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1;
+ P1-DIMMB1: 0.1.0; P1-DIMMB2: 0.1.1;
+ P1-DIMMC1: 0.2.0; P1-DIMMC2: 0.2.1;
+ P1-DIMMD1: 0.3.0; P1-DIMMD2: 0.3.1;
+ P2-DIMME1: 1.0.0; P2-DIMME2: 1.0.1;
+ P2-DIMMF1: 1.1.0; P2-DIMMF2: 1.1.1;
+ P2-DIMMG1: 1.2.0; P2-DIMMG2: 1.2.1;
+ P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1;
\ No newline at end of file

View File

@ -0,0 +1,27 @@
commit dda7d95bcbbb95e0db557a7a9325ee9815ab4e9b
Author: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed May 26 12:55:54 2021 +0200
Add support for multi-arch builds
Allow building rasdaemon on several architectures:
- x86_64
- arm 64
- ppc 64 LE
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 747a844..898687c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,6 +9,9 @@ jobs:
Ubuntu:
name: Ubuntu
runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ arch: [x64_64, aarch64, ppc64le]
steps:
- uses: actions/checkout@v2
- name: prepare

View File

@ -0,0 +1,31 @@
commit ec443ec0add059fa897f844349e1a2345d81713c
Author: DmNosachev <quartz64@gmail.com>
Date: Tue Jun 29 11:33:10 2021 +0300
labels/supermicro: added x11dph-i labels
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/labels/supermicro b/labels/supermicro
index 3fd6fee..bfaed93 100644
--- a/labels/supermicro
+++ b/labels/supermicro
@@ -68,3 +68,17 @@ Vendor: Supermicro
P1_DIMM4B: 1.1.1;
P2_DIMM4B: 2.0.1;
P2_DIMM4B: 2.1.1;
+
+ Model: X11DPH-i
+ P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1;
+ P1-DIMMB1: 0.1.0;
+ P1-DIMMC1: 0.2.0;
+ P1-DIMMD1: 1.0.0; P1-DIMMD2: 1.0.1;
+ P1-DIMME1: 1.1.0;
+ P1-DIMMF1: 1.2.0;
+ P2-DIMMA1: 2.0.0; P2-DIMMA2: 2.0.1;
+ P2-DIMMB1: 2.1.0;
+ P2-DIMMC1: 2.2.0;
+ P2-DIMMD1: 3.0.0; P2-DIMMD2: 3.0.1;
+ P2-DIMME1: 3.1.0;
+ P2-DIMMF1: 3.2.0;
\ No newline at end of file

View File

@ -0,0 +1,48 @@
commit f7cdd720297cd17e405a7170c04df89d1d9536f8
Author: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed May 26 12:35:55 2021 +0200
Add a github workflow for CI automation
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..5b3e757
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,34 @@
+name: CI
+
+# Should run only on branches and PR, as "on_tag.yml" will handle tags
+on:
+ push:
+ branches: master test
+ pull_request:
+ branches: master
+
+jobs:
+
+#
+# Linux
+#
+ Ubuntu:
+ name: Ubuntu
+ runs-on: ubuntu-20.04
+ strategy:
+ matrix:
+ arch: [x64_64, aarch64, armv7, ppc64le]
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ arch: ${{ matrix.arch }}
+ - name: prepare
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y build-essential sqlite3
+ - name: build
+ run: |
+ autoreconf -vfi
+ ./configure --enable-all
+ make
+ sudo make install

View File

@ -0,0 +1,30 @@
commit fc1dd37d422fc907416afd028514fff59b63ae12
Author: DmNosachev <quartz64@gmail.com>
Date: Wed Jun 30 16:49:18 2021 +0300
labels/supermicro: added Supermicro B1DRi
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/labels/supermicro b/labels/supermicro
index 373de07..b924a32 100644
--- a/labels/supermicro
+++ b/labels/supermicro
@@ -105,4 +105,14 @@ Vendor: Supermicro
P2-DIMMC1: 2.2.0;
P2-DIMMD1: 3.0.0;
P2-DIMME1: 3.1.0;
- P2-DIMMF1: 3.2.0;
\ No newline at end of file
+ P2-DIMMF1: 3.2.0;
+
+ Model: B1DRi
+ P1_DIMMA1: 0.0.0;
+ P1_DIMMB1: 0.1.0;
+ P1_DIMMC1: 0.2.0;
+ P1_DIMMD1: 0.3.0;
+ P2_DIMME1: 1.0.0;
+ P2_DIMMF1: 1.1.0;
+ P2_DIMMG1: 1.2.0;
+ P2_DIMMH1: 1.3.0;
\ No newline at end of file

View File

@ -0,0 +1,28 @@
commit fcdffdcb28ece67ed78e3575a3dce45d9dd4f015
Author: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed May 26 10:37:52 2021 +0200
rasdaemon.spec.in: Fix the description on this example file
While this is used just to test if building it is OK, better
to keep the logs nice ;-)
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/misc/rasdaemon.spec.in b/misc/rasdaemon.spec.in
index 6ef223f..afa4359 100644
--- a/misc/rasdaemon.spec.in
+++ b/misc/rasdaemon.spec.in
@@ -61,10 +61,10 @@ rm INSTALL %{buildroot}/usr/include/*.h
%changelog
* Wed May 26 2021 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> 0.6.7-1
-- Bump to version 0.6.5 with several fixes and additions
+- Bump to version 0.6.7 with several fixes and additions
* Tue Jul 21 2020 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> 0.6.6-1
-- Bump to version 0.6.5 with several fixes, new hip08 events and memory prediction analysis
+- Bump to version 0.6.6 with several fixes, new hip08 events and memory prediction analysis
* Wed Nov 20 2019 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> 0.6.5-1
- Bump to version 0.6.5 with several fixes and improves PCIe events record

263
SOURCES/labels.patch Normal file
View File

@ -0,0 +1,263 @@
Add labels directory from upstream
Labels directory doesn't get exported by tarball releases.
Signed-off-by: Aristeu Rozanski <aris@redhat.com>
---
labels/asus | 20 +++++++
labels/dell | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
labels/supermicro | 70 ++++++++++++++++++++++++
3 files changed, 242 insertions(+)
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ rasdaemon-0.6.7/labels/asus 2022-02-08 15:44:53.563362010 -0500
@@ -0,0 +1,20 @@
+# RASDAEMON Motherboard DIMM labels Database file.
+#
+# Vendor-name and model-name are found from the program 'dmidecode'
+# labels are found from the silk screen on the motherboard.
+#
+#Vendor: <vendor-name>
+# Product: <product-name>
+# Model: <model-name>
+# <label>: <mc>.<top>.<mid>.<low>
+#
+#
+#Vendor: <vendor-name>
+# Model: <model-name>
+# <label>: <mc>.<row>.<channel>
+#
+
+Vendor: ASUSTeK COMPUTER INC.
+ Model: PRIME X570-PRO
+ DIMM_A1: 0.0.1, 0.1.1; DIMM_A2: 0.2.1, 0.3.1;
+ DIMM_B1: 0.0.0, 0.1.0; DIMM_B2: 0.2.0, 0.3.0;
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ rasdaemon-0.6.7/labels/dell 2022-02-08 15:44:53.564361999 -0500
@@ -0,0 +1,152 @@
+# RASDAEMON Motherboard DIMM labels Database file.
+#
+# Vendor-name and model-name are found from the program 'dmidecode'
+# labels are found from the silk screen on the motherboard.
+#
+#Vendor: <vendor-name>
+# Product: <product-name>
+# Model: <model-name>
+# <label>: <mc>.<top>.<mid>.<low>
+#
+
+Vendor: Dell Inc.
+# 1-socket
+ Product: PowerEdge R220, PowerEdge R330, PowerEdge T330, PowerEdge R230, PowerEdge T130, PowerEdge T30
+ DIMM_A1: 0.0.0; DIMM_A2: 0.0.1;
+ DIMM_A3: 0.1.0; DIMM_A4: 0.1.1;
+
+ Product: PowerEdge T110 II, PowerEdge T20
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0;
+
+ DIMM_B1: 0.0.1; DIMM_B2: 0.1.1;
+
+ Product: PowerEdge R320, PowerEdge T320
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0;
+ DIMM_A4: 0.0.1; DIMM_A5: 0.1.1; DIMM_A6: 0.2.1;
+
+# 2-socket
+ Product: PowerEdge R610
+ DIMM_A1: 0.0.0; DIMM_A2: 0.0.1; DIMM_A3: 0.0.2;
+ DIMM_A4: 0.1.0; DIMM_A5: 0.1.1; DIMM_A6: 0.1.2;
+
+ DIMM_B1: 1.0.0; DIMM_B2: 1.0.1; DIMM_B3: 1.0.2;
+ DIMM_B4: 1.1.0; DIMM_B5: 1.1.1; DIMM_B6: 1.1.2;
+
+ Product: PowerEdge T710, PowerEdge R710
+ DIMM_A3: 0.0.0; DIMM_A2: 0.1.0; DIMM_A1: 0.2.0;
+ DIMM_A6: 0.0.1; DIMM_A5: 0.1.1; DIMM_A4: 0.2.1;
+ DIMM_A9: 0.0.2; DIMM_A8: 0.1.2; DIMM_A7: 0.2.2;
+
+ DIMM_B3: 1.0.0; DIMM_B2: 1.1.0; DIMM_B1: 1.2.0;
+ DIMM_B6: 1.0.1; DIMM_B5: 1.1.1; DIMM_B4: 1.2.1;
+ DIMM_B9: 1.0.2; DIMM_B8: 1.1.2; DIMM_B7: 1.2.2;
+
+ Product: PowerEdge R620, PowerEdge T620, PowerEdge R720xd, PowerEdge R730xd, PowerEdge T630, PowerEdge R730, PowerEdge R630, PowerEdge T620, PowerEdge M620, PowerEdge FC620, PowerEdge M630, PowerEdge FC630
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0;
+ DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1;
+ DIMM_A9: 0.0.2; DIMM_A10: 0.1.2; DIMM_A11: 0.2.2; DIMM_A12: 0.3.2;
+
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0;
+ DIMM_B5: 1.0.1; DIMM_B6: 1.1.1; DIMM_B7: 1.2.1; DIMM_B8: 1.3.1;
+ DIMM_B9: 1.0.2; DIMM_B10: 1.1.2; DIMM_B11: 1.2.2; DIMM_B12: 1.3.2;
+
+ Product: PowerEdge R640, PowerEdge R740, PowerEdge R740xd, PowerEdge T640
+ A1: 0.0.0; A2: 0.1.0; A3: 0.2.0; A4: 1.0.0; A5: 1.1.0; A6: 1.2.0;
+ A7: 0.0.1; A8: 0.1.1; A9: 0.2.1; A10: 1.0.1; A11: 1.1.1; A12: 1.2.1;
+
+ B1: 2.0.0; B2: 2.1.0; B3: 2.2.0; B4: 3.0.0; B5: 3.1.0; B6: 3.2.0;
+ B7: 2.0.1; B8: 2.1.1; B9: 2.2.1; B10: 3.0.1; B11: 3.1.1; B12: 3.2.1;
+
+ Product: PowerEdge M520, PowerEdge R420, PowerEdge T420
+ DIMM_A1: 0.1.0; DIMM_A2: 0.2.0; DIMM_A3: 0.3.0;
+ DIMM_A4: 0.1.1; DIMM_A5: 0.2.1; DIMM_A6: 0.3.1;
+
+ DIMM_B1: 1.1.0; DIMM_B2: 1.2.0; DIMM_B3: 1.3.0;
+ DIMM_B4: 1.1.1; DIMM_B5: 1.2.1; DIMM_B6: 1.3.1;
+
+ Product: PowerEdge FC420, PowerEdge M420
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0;
+
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0;
+
+ Product: PowerEdge C6320, PowerEdge C4130
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0;
+ DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1;
+
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0;
+ DIMM_B5: 1.0.1; DIMM_B6: 1.1.1; DIMM_B7: 1.2.1; DIMM_B8: 1.3.1;
+
+ Product: PowerEdge C6320p
+ A1: 0.0.0; B1: 0.1.0; C1: 0.2.0;
+ D1: 1.0.0; E1: 1.1.0; F1: 1.2.0;
+
+ Product: PowerEdge C6420
+ A1: 0.0.0; A2: 0.1.0; A3: 0.2.0; A4: 1.0.0; A5: 1.1.0; A6: 1.2.0;
+ A7: 0.0.1; A8: 1.0.1;
+
+ B1: 2.0.0; B2: 2.1.0; B3: 2.2.0; B4: 3.0.0; B5: 3.1.0; B6: 3.2.0;
+ B7: 2.0.1; B8: 3.0.1;
+
+ Product: PowerEdge R430, PowerEdge T430, PowerEdge R530
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0;
+ DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1;
+
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0;
+
+ Product: PowerEdge FC430
+ DIMM_A1: 0.1.0; DIMM_A2: 0.0.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0;
+
+ DIMM_B1: 1.1.0; DIMM_B2: 1.0.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0;
+
+# 4-socket
+ Product: PowerEdge M820, PowerEdge R830, PowerEdge M830, PowerEdge R930, PowerEdge FC830
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0;
+ DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1;
+ DIMM_A9: 0.0.2; DIMM_A10: 0.1.2; DIMM_A11: 0.2.2; DIMM_A12: 0.3.2;
+
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0;
+ DIMM_B5: 1.0.1; DIMM_B6: 1.1.1; DIMM_B7: 1.2.1; DIMM_B8: 1.3.1;
+ DIMM_B9: 1.0.2; DIMM_B10: 1.1.2; DIMM_B11: 1.2.2; DIMM_B12: 1.3.2;
+
+ DIMM_C1: 2.0.0; DIMM_C2: 2.1.0; DIMM_C3: 2.2.0; DIMM_C4: 2.3.0;
+ DIMM_C5: 2.0.1; DIMM_C6: 2.1.1; DIMM_C7: 2.2.1; DIMM_C8: 2.3.1;
+ DIMM_C9: 2.0.2; DIMM_C10: 2.1.2; DIMM_C11: 2.2.2; DIMM_C12: 2.3.2;
+
+ DIMM_D1: 3.0.0; DIMM_D2: 3.1.0; DIMM_D3: 3.2.0; DIMM_D4: 3.3.0;
+ DIMM_D5: 3.0.1; DIMM_D6: 3.1.1; DIMM_D7: 3.2.1; DIMM_D8: 3.3.1;
+ DIMM_D9: 3.0.2; DIMM_D10: 3.1.2; DIMM_D11: 3.2.2; DIMM_D12: 3.3.2;
+
+ Product: PowerEdge FM120x4
+ DIMM_A_A1: 0.1.0; DIMM_A_A2: 0.2.0;
+
+ DIMM_B_A1: 1.1.0; DIMM_B_A2: 1.2.0;
+
+ DIMM_C_A1: 2.1.0; DIMM_C_A2: 2.2.0;
+
+ DIMM_D_A1: 3.1.0; DIMM_D_A2: 3.2.0;
+
+ Product: PowerEdge R940
+ A1: 0.0.0; A2: 0.1.0; A3: 0.2.0; A4: 1.0.0; A5: 1.1.0; A6: 1.2.0;
+ A7: 0.0.1; A8: 0.1.1; A9: 0.2.1; A10: 1.0.1; A11: 1.1.1; A12: 1.2.1;
+
+ B1: 2.0.0; B2: 2.1.0; B3: 2.2.0; B4: 3.0.0; B5: 3.1.0; B6: 3.2.0;
+ B7: 2.0.1; B8: 2.1.1; B9: 2.2.1; B10: 3.0.1; B11: 3.1.1; B12: 3.2.1;
+
+ C1: 4.0.0; C2: 4.1.0; C3: 4.2.0; C4: 5.0.0; C5: 5.1.0; C6: 5.2.0;
+ C7: 4.0.1; C8: 4.1.1; C9: 4.2.1; C10: 5.0.1; C11: 5.1.1; C12: 5.2.1;
+
+ D1: 6.0.0; D2: 6.1.0; D3: 6.2.0; D4: 7.0.0; D5: 7.1.0; D6: 7.2.0;
+ D7: 6.0.1; D8: 6.1.1; D9: 6.2.1; D10: 7.0.1; D11: 7.1.1; D12: 7.2.1;
+
+ Product: PowerEdge R440, PowerEdge R540
+ A1: 0.0.0; A2: 0.1.0; A3: 0.2.0; A4: 1.0.0; A5: 1.1.0; A6: 1.2.0;
+ A7: 0.0.1; A8: 0.1.1; A9: 1.0.1; A10: 1.1.1;
+
+ B1: 2.0.0; B2: 2.1.0; B3: 2.2.0; B4: 3.0.0; B5: 3.1.0; B6: 3.2.0;
+
+ Product: PowerEdge M640, PowerEdge FC640
+ A1: 0.0.0; A2: 0.1.0; A3: 0.2.0; A4: 1.0.0; A5: 1.1.0; A6: 1.2.0;
+ A7: 0.0.1; A8: 1.0.1;
+
+ B1: 2.0.0; B2: 2.1.0; B3: 2.2.0; B4: 3.0.0; B5: 3.1.0; B6: 3.2.0;
+ B7: 2.0.1; B8: 3.0.1;
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ rasdaemon-0.6.7/labels/supermicro 2022-02-08 15:44:53.564361999 -0500
@@ -0,0 +1,70 @@
+# RASDAEMON Motherboard DIMM labels Database file.
+#
+# Vendor-name and model-name are found from the program 'dmidecode'
+# labels are found from the silk screen on the motherboard.
+#
+#Vendor: <vendor-name>
+# Product: <product-name>
+# Model: <model-name>
+# <label>: <mc>.<top>.<mid>.<low>
+#
+
+Vendor: Supermicro
+ Model: A2SDi-8C-HLN4F
+ DIMMA1: 0.0.0; DIMMA2: 0.0.1;
+ DIMMB1: 0.1.0; DIMMB2: 0.1.1;
+
+ Model: A2SDi-8C+-HLN4F
+ DIMMA1: 0.0.0; DIMMA2: 0.0.1;
+ DIMMB1: 0.1.0; DIMMB2: 0.1.1;
+
+ Product: X10SRA-F
+ DIMMA1: 0.0.0
+ DIMMA2: 0.0.1
+ DIMMB1: 0.1.0
+ DIMMB2: 0.1.1
+ DIMMC1: 1.0.0
+ DIMMC2: 1.0.1
+ DIMMD1: 1.1.0
+ DIMMD2: 1.1.1
+
+ Product: H8DGU
+ P1_DIMM1A: 0.2.0;
+ P1_DIMM1A: 0.3.0;
+ P2_DIMM1A: 3.2.0;
+ P2_DIMM1A: 3.3.0;
+
+ P1_DIMM2A: 0.2.1;
+ P1_DIMM2A: 0.3.1;
+ P2_DIMM2A: 3.2.1;
+ P2_DIMM2A: 3.3.1;
+
+ P1_DIMM3A: 1.2.0;
+ P1_DIMM3A: 1.3.0;
+ P2_DIMM3A: 2.2.0;
+ P2_DIMM3A: 2.3.0;
+
+ P1_DIMM4A: 1.2.1;
+ P1_DIMM4A: 1.3.1;
+ P2_DIMM4A: 2.2.1;
+ P2_DIMM4A: 2.3.1;
+
+ P1_DIMM1B: 0.0.0;
+ P1_DIMM1B: 0.2.0;
+ P2_DIMM1B: 3.0.0;
+ P2_DIMM1B: 3.1.0;
+
+ P1_DIMM2B: 0.0.1;
+ P1_DIMM2B: 0.1.1;
+ P2_DIMM2B: 3.0.1;
+ P2_DIMM2B: 3.1.1;
+
+ P1_DIMM3B: 1.0.0;
+ P1_DIMM3B: 1.1.0;
+ P2_DIMM3B: 2.0.0;
+ P2_DIMM3B: 2.1.0;
+
+ P1_DIMM4B: 1.0.1;
+ P1_DIMM4B: 1.1.1;
+ P2_DIMM4B: 2.0.1;
+ P2_DIMM4B: 2.1.1;

View File

@ -1,24 +0,0 @@
commit fd982af0a307edc5d3e56011d2e045015b1efd4b
Author: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon Mar 30 01:22:24 2020 +0200
ras-record.h: define an external var as such
Otherwise, newer versions of gcc will produce multiple symbols,
causing link breakages.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
diff --git a/ras-record.h b/ras-record.h
index 5311c67caf44..0d2a481c23dd 100644
--- a/ras-record.h
+++ b/ras-record.h
@@ -25,7 +25,7 @@
extern long user_hz;
-struct ras_events *ras;
+extern struct ras_events *ras;
struct ras_mc_event {
char timestamp[64];

View File

@ -1,19 +1,40 @@
Name: rasdaemon
Version: 0.6.4
Release: 6%{?dist}
Version: 0.6.7
Release: 5%{?dist}
Summary: Utility to receive RAS error tracings
License: GPLv2
URL: http://git.infradead.org/users/mchehab/rasdaemon.git
Source0: http://www.infradead.org/~mchehab/rasdaemon/%{name}-%{version}.tar.bz2
Patch0: rasdaemon-avoid-multiple-definitions.patch
Patch0: labels.patch
Patch1: fcdffdcb28ece67ed78e3575a3dce45d9dd4f015.patch
Patch2: f7cdd720297cd17e405a7170c04df89d1d9536f8.patch
Patch3: 2b37a26dcec389723f75d69d3da9c2f15f6c317d.patch
Patch4: dda7d95bcbbb95e0db557a7a9325ee9815ab4e9b.patch
Patch5: 738bafafdcb2e8b0ced32fff31b13754d571090b.patch
Patch6: 1ff5f3d2a0fcd48add9462567c30fe0e14585fb4.patch
Patch7: 9acef39f13833f7d53ef96abc5a72e79384260f4.patch
Patch8: 28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch
Patch9: aecf33aa70331670c06db6b652712b476e24051c.patch
Patch10: 7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch
Patch11: ec443ec0add059fa897f844349e1a2345d81713c.patch
Patch12: 9a5baed97b21af31064d9995ffcfaac0e9d7983e.patch
Patch13: b4402d36e1b42fb7b0d8ddccc83463a6e622dbc4.patch
Patch14: 50565005b10fe909c66f1c90f2feb95712427c7d.patch
Patch15: fc1dd37d422fc907416afd028514fff59b63ae12.patch
Patch16: 6bc43db1b6b3d73805179c21d1dd5521e8dc0f74.patch
Patch17: 2b6a54b0d31e02e657171fd27f4e31d996756bc6.patch
Patch18: 7ccf12f5ae26a055926d175d908c7930293438c4.patch
ExcludeArch: s390 s390x
BuildRequires: make
BuildRequires: make
BuildRequires: gcc
BuildRequires: gettext-devel
BuildRequires: perl-generators
BuildRequires: sqlite-devel
BuildRequires: systemd
BuildRequires: autoconf
BuildRequires: automake
BuildRequires: libtool
Provides: bundled(kernel-event-lib)
Requires: hwdata
Requires: perl-DBD-SQLite
@ -38,6 +59,28 @@ an utility for reporting current error counts from the EDAC sysfs files.
%prep
%setup -q
%patch0 -p1
%patch1 -p1
%patch2 -p1
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
%patch9 -p1
%patch10 -p1
%patch11 -p1
%patch12 -p1
%patch13 -p1
%patch14 -p1
%patch15 -p1
%patch16 -p1
%patch17 -p1
%patch18 -p1
# The tarball is locked in time the first time aclocal was ran and will keep
# requiring an older version of automake
autoreconf -vfi
%build
%ifarch %{arm} aarch64
@ -52,6 +95,11 @@ make install DESTDIR=%{buildroot}
install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service
install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service
rm INSTALL %{buildroot}/usr/include/*.h
mkdir -p %{buildroot}/%{_sharedstatedir}/rasdaemon
install -d -p -m 0755 %{buildroot}/%{_sharedstatedir}/rasdaemon
mkdir -p %{buildroot}/%{_sysconfdir}/sysconfig
install -D -p -m 0644 misc/rasdaemon.env %{buildroot}/%{_sysconfdir}/sysconfig/rasdaemon
sed -i "s/^PAGE_CE_ACTION=.*/PAGE_CE_ACTION=account/" %{buildroot}/%{_sysconfdir}/sysconfig/rasdaemon
%files
%doc AUTHORS ChangeLog COPYING README TODO
@ -61,8 +109,26 @@ rm INSTALL %{buildroot}/usr/include/*.h
%{_unitdir}/*.service
%{_sharedstatedir}/rasdaemon
%{_sysconfdir}/ras/dimm_labels.d
%{_sysconfdir}/sysconfig/rasdaemon
%changelog
* Thu Mar 24 2022 Aristeu Rozanski <aris@redhat.com> 0.6.7-5
- Trying to guess what's going on on the testing side [2065729]
* Thu Mar 24 2022 Aristeu Rozanski <aris@redhat.com> 0.6.7-4
- Adding simple test to stop being gated [2065729]
* Thu Mar 24 2022 Aristeu Rozanski <aris@redhat.com> 0.6.7-3
- Adding gating.yaml [2065729]
* Fri Mar 18 2022 Aristeu Rozanski <aris@redhat.com> 0.6.7-2
- Adding missing rasdaemon environment configuration to /etc/sysconfig/rasdaemon [2065729]
* Tue Feb 08 2022 Aristeu Rozanski <aris@redhat.com> 0.6.7-1
- Bumped to 0.6.7
- Backported patches that sit on top of 0.6.7 without being released
Related: rhbz#2052190
* Tue Aug 10 2021 Mohan Boddu <mboddu@redhat.com> - 0.6.4-6
- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags
Related: rhbz#1991688