diff --git a/.gitignore b/.gitignore index e69cfd0..a9f11d1 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/rasdaemon-0.6.1.tar.bz2 +rasdaemon-0.8.0.tar.bz2 diff --git a/.rasdaemon.metadata b/.rasdaemon.metadata deleted file mode 100644 index e6215b6..0000000 --- a/.rasdaemon.metadata +++ /dev/null @@ -1 +0,0 @@ -742eda555cccb8ca8f9b6a18bab1f4a732c11318 SOURCES/rasdaemon-0.6.1.tar.bz2 diff --git a/2ff9bc453998ddb145c7bb8ba30a57c56bd18eab.patch b/2ff9bc453998ddb145c7bb8ba30a57c56bd18eab.patch new file mode 100644 index 0000000..eaa9559 --- /dev/null +++ b/2ff9bc453998ddb145c7bb8ba30a57c56bd18eab.patch @@ -0,0 +1,66 @@ +commit 2ff9bc453998ddb145c7bb8ba30a57c56bd18eab +Author: Shiju Jose +Date: Tue Apr 4 14:40:42 2023 +0100 + + rasdaemon: Add common function to convert timestamp in the CXL event records to the broken-down time format + + Add common function to convert the timestamp in the CXL event records + in nanoseconds to the broken-down time format. + + Signed-off-by: Shiju Jose + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c +index 8f6342d..59534a4 100644 +--- a/ras-cxl-handler.c ++++ b/ras-cxl-handler.c +@@ -23,6 +23,25 @@ + #include "ras-report.h" + #include + ++/* Common Functions */ ++static void convert_timestamp(unsigned long long ts, char *ts_ptr, uint16_t size) ++{ ++ /* CXL Specification 3.0 ++ * Overflow timestamp - The number of unsigned nanoseconds ++ * that have elapsed since midnight, 01-Jan-1970 UTC ++ */ ++ time_t ts_secs = ts / 1000000000ULL; ++ struct tm *tm; ++ ++ tm = localtime(&ts_secs); ++ if (tm) ++ strftime(ts_ptr, size, "%Y-%m-%d %H:%M:%S %z", tm); ++ ++ if (!ts || !tm) ++ strncpy(ts_ptr, "1970-01-01 00:00:00 +0000", ++ size); ++} ++ + /* Poison List: Payload out flags */ + #define CXL_POISON_FLAG_MORE BIT(0) + #define CXL_POISON_FLAG_OVERFLOW BIT(1) +@@ -168,22 +187,7 @@ int ras_cxl_poison_event_handler(struct trace_seq *s, + if (ev.flags & CXL_POISON_FLAG_OVERFLOW) { + if (tep_get_field_val(s, event, "overflow_ts", record, &val, 1) < 0) + return -1; +- if (val) { +- /* CXL Specification 3.0 +- * Overflow timestamp - The number of unsigned nanoseconds +- * that have elapsed since midnight, 01-Jan-1970 UTC +- */ +- time_t ovf_ts_secs = val / 1000000000ULL; +- +- tm = localtime(&ovf_ts_secs); +- if (tm) { +- strftime(ev.overflow_ts, sizeof(ev.overflow_ts), +- "%Y-%m-%d %H:%M:%S %z", tm); +- } +- } +- if (!val || !tm) +- strncpy(ev.overflow_ts, "1970-01-01 00:00:00 +0000", +- sizeof(ev.overflow_ts)); ++ convert_timestamp(val, ev.overflow_ts, sizeof(ev.overflow_ts)); + } else + strncpy(ev.overflow_ts, "1970-01-01 00:00:00 +0000", sizeof(ev.overflow_ts)); + if (trace_seq_printf(s, "overflow timestamp:%s\n", ev.overflow_ts) <= 0) diff --git a/31c7578ddb0fc15aa7247f2b8885956540031221.patch b/31c7578ddb0fc15aa7247f2b8885956540031221.patch new file mode 100644 index 0000000..7ee1e3b --- /dev/null +++ b/31c7578ddb0fc15aa7247f2b8885956540031221.patch @@ -0,0 +1,54 @@ +commit 31c7578ddb0fc15aa7247f2b8885956540031221 +Author: Shiju Jose +Date: Tue Feb 6 12:08:00 2024 +0000 + + rasdaemon: ras-memory-failure-handler: update memory failure action page types + + Update memory failure action page types corresponding to the same in + mm/memory-failure.c in the kernel. + + Signed-off-by: Shiju Jose + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/ras-memory-failure-handler.c b/ras-memory-failure-handler.c +index 97e8840..a5acc08 100644 +--- a/ras-memory-failure-handler.c ++++ b/ras-memory-failure-handler.c +@@ -26,10 +26,8 @@ enum mf_action_page_type { + MF_MSG_KERNEL_HIGH_ORDER, + MF_MSG_SLAB, + MF_MSG_DIFFERENT_COMPOUND, +- MF_MSG_POISONED_HUGE, + MF_MSG_HUGE, + MF_MSG_FREE_HUGE, +- MF_MSG_NON_PMD_HUGE, + MF_MSG_UNMAP_FAILED, + MF_MSG_DIRTY_SWAPCACHE, + MF_MSG_CLEAN_SWAPCACHE, +@@ -41,7 +39,6 @@ enum mf_action_page_type { + MF_MSG_CLEAN_LRU, + MF_MSG_TRUNCATED_LRU, + MF_MSG_BUDDY, +- MF_MSG_BUDDY_2ND, + MF_MSG_DAX, + MF_MSG_UNSPLIT_THP, + MF_MSG_UNKNOWN, +@@ -64,10 +61,8 @@ static const struct { + { MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page"}, + { MF_MSG_SLAB, "kernel slab page"}, + { MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking"}, +- { MF_MSG_POISONED_HUGE, "huge page already hardware poisoned"}, + { MF_MSG_HUGE, "huge page"}, + { MF_MSG_FREE_HUGE, "free huge page"}, +- { MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page"}, + { MF_MSG_UNMAP_FAILED, "unmapping failed page"}, + { MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page"}, + { MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page"}, +@@ -79,7 +74,6 @@ static const struct { + { MF_MSG_CLEAN_LRU, "clean LRU page"}, + { MF_MSG_TRUNCATED_LRU, "already truncated LRU page"}, + { MF_MSG_BUDDY, "free buddy page"}, +- { MF_MSG_BUDDY_2ND, "free buddy page (2nd try)"}, + { MF_MSG_DAX, "dax page"}, + { MF_MSG_UNSPLIT_THP, "unsplit thp"}, + { MF_MSG_UNKNOWN, "unknown page"}, diff --git a/53c682fb45c2909c128be4ee8f51a3e42fe2f7fd.patch b/53c682fb45c2909c128be4ee8f51a3e42fe2f7fd.patch new file mode 100644 index 0000000..cb656cc --- /dev/null +++ b/53c682fb45c2909c128be4ee8f51a3e42fe2f7fd.patch @@ -0,0 +1,551 @@ +commit 53c682fb45c2909c128be4ee8f51a3e42fe2f7fd +Author: Shiju Jose +Date: Wed Apr 5 11:54:41 2023 +0100 + + rasdaemon: Add support for the CXL general media events + + Add support to log and record the CXL general media events. + + Signed-off-by: Shiju Jose + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c +index 83ada56..2de96f6 100644 +--- a/ras-cxl-handler.c ++++ b/ras-cxl-handler.c +@@ -99,6 +99,14 @@ static char *uuid_be(const char *uu) + return uuid; + } + ++static const char* get_cxl_type_str(const char** type_array, uint8_t num_elems, uint8_t type) ++{ ++ if (type >= num_elems) ++ return "Unknown"; ++ ++ return type_array[type]; ++} ++ + /* Poison List: Payload out flags */ + #define CXL_POISON_FLAG_MORE BIT(0) + #define CXL_POISON_FLAG_OVERFLOW BIT(1) +@@ -709,3 +717,151 @@ int ras_cxl_generic_event_handler(struct trace_seq *s, + + return 0; + } ++ ++#define CXL_DPA_VOLATILE BIT(0) ++#define CXL_DPA_NOT_REPAIRABLE BIT(1) ++ ++static const struct cxl_event_flags cxl_dpa_flags[] = { ++ { .bit = CXL_DPA_VOLATILE, .flag = "VOLATILE" }, ++ { .bit = CXL_DPA_NOT_REPAIRABLE, .flag = "NOT_REPAIRABLE" }, ++}; ++ ++/* ++ * General Media Event Record - GMER ++ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 ++ */ ++#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0) ++#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1) ++#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW BIT(2) ++ ++static const struct cxl_event_flags cxl_gmer_event_desc_flags[] = { ++ { .bit = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, .flag = "UNCORRECTABLE EVENT" }, ++ { .bit = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, .flag = "THRESHOLD EVENT" }, ++ { .bit = CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW, .flag = "POISON LIST OVERFLOW" }, ++}; ++ ++#define CXL_GMER_VALID_CHANNEL BIT(0) ++#define CXL_GMER_VALID_RANK BIT(1) ++#define CXL_GMER_VALID_DEVICE BIT(2) ++#define CXL_GMER_VALID_COMPONENT BIT(3) ++ ++static const char* cxl_gmer_mem_event_type[] = { ++ "ECC Error", ++ "Invalid Address", ++ "Data Path Error", ++}; ++ ++static const char* cxl_gmer_trans_type[] = { ++ "Unknown", ++ "Host Read", ++ "Host Write", ++ "Host Scan Media", ++ "Host Inject Poison", ++ "Internal Media Scrub", ++ "Internal Media Management", ++}; ++ ++int ras_cxl_general_media_event_handler(struct trace_seq *s, ++ struct tep_record *record, ++ struct tep_event *event, void *context) ++{ ++ int len, i; ++ unsigned long long val; ++ struct ras_events *ras = context; ++ struct ras_cxl_general_media_event ev; ++ ++ memset(&ev, 0, sizeof(ev)); ++ if (handle_ras_cxl_common_hdr(s, record, event, context, &ev.hdr) < 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "dpa", record, &val, 1) < 0) ++ return -1; ++ ev.dpa = val; ++ if (trace_seq_printf(s, "dpa:0x%llx ", (unsigned long long)ev.dpa) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "dpa_flags", record, &val, 1) < 0) ++ return -1; ++ ev.dpa_flags = val; ++ if (trace_seq_printf(s, "dpa_flags:") <= 0) ++ return -1; ++ if (decode_cxl_event_flags(s, ev.dpa_flags, cxl_dpa_flags, ARRAY_SIZE(cxl_dpa_flags)) < 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "descriptor", record, &val, 1) < 0) ++ return -1; ++ ev.descriptor = val; ++ if (trace_seq_printf(s, "descriptor:") <= 0) ++ return -1; ++ if (decode_cxl_event_flags(s, ev.descriptor, cxl_gmer_event_desc_flags, ++ ARRAY_SIZE(cxl_gmer_event_desc_flags)) < 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "type", record, &val, 1) < 0) ++ return -1; ++ ev.type = val; ++ if (trace_seq_printf(s, "type:%s ", get_cxl_type_str(cxl_gmer_mem_event_type, ++ ARRAY_SIZE(cxl_gmer_mem_event_type), ev.type)) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "transaction_type", record, &val, 1) < 0) ++ return -1; ++ ev.transaction_type = val; ++ if (trace_seq_printf(s, "transaction_type:%s ", ++ get_cxl_type_str(cxl_gmer_trans_type, ++ ARRAY_SIZE(cxl_gmer_trans_type), ++ ev.transaction_type)) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "validity_flags", record, &val, 1) < 0) ++ return -1; ++ ev.validity_flags = val; ++ ++ if (ev.validity_flags & CXL_GMER_VALID_CHANNEL) { ++ if (tep_get_field_val(s, event, "channel", record, &val, 1) < 0) ++ return -1; ++ ev.channel = val; ++ if (trace_seq_printf(s, "channel:%u ", ev.channel) <= 0) ++ return -1; ++ } ++ ++ if (ev.validity_flags & CXL_GMER_VALID_RANK) { ++ if (tep_get_field_val(s, event, "rank", record, &val, 1) < 0) ++ return -1; ++ ev.rank = val; ++ if (trace_seq_printf(s, "rank:%u ", ev.rank) <= 0) ++ return -1; ++ } ++ ++ if (ev.validity_flags & CXL_GMER_VALID_DEVICE) { ++ if (tep_get_field_val(s, event, "device", record, &val, 1) < 0) ++ return -1; ++ ev.device = val; ++ if (trace_seq_printf(s, "device:%x ", ev.device) <= 0) ++ return -1; ++ } ++ ++ if (ev.validity_flags & CXL_GMER_VALID_COMPONENT) { ++ ev.comp_id = tep_get_field_raw(s, event, "comp_id", record, &len, 1); ++ if (!ev.comp_id) ++ return -1; ++ if (trace_seq_printf(s, "comp_id:") <= 0) ++ return -1; ++ for (i = 0; i < CXL_EVENT_GEN_MED_COMP_ID_SIZE; i++) { ++ if (trace_seq_printf(s, "%02x ", ev.comp_id[i]) <= 0) ++ break; ++ } ++ } ++ ++ /* Insert data into the SGBD */ ++#ifdef HAVE_SQLITE3 ++ ras_store_cxl_general_media_event(ras, &ev); ++#endif ++ ++#ifdef HAVE_ABRT_REPORT ++ /* Report event to ABRT */ ++ ras_report_cxl_general_media_event(ras, &ev); ++#endif ++ ++ return 0; ++} +diff --git a/ras-cxl-handler.h b/ras-cxl-handler.h +index 9f77cb7..3adca4a 100644 +--- a/ras-cxl-handler.h ++++ b/ras-cxl-handler.h +@@ -35,4 +35,7 @@ int ras_cxl_overflow_event_handler(struct trace_seq *s, + int ras_cxl_generic_event_handler(struct trace_seq *s, + struct tep_record *record, + struct tep_event *event, void *context); ++int ras_cxl_general_media_event_handler(struct trace_seq *s, ++ struct tep_record *record, ++ struct tep_event *event, void *context); + #endif +diff --git a/ras-events.c b/ras-events.c +index 4036933..978dee4 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -250,6 +250,7 @@ int toggle_ras_mc_event(int enable) + rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_aer_correctable_error", enable); + rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_overflow", enable); + rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_generic_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_general_media", enable); + #endif + + free_ras: +@@ -1063,6 +1064,14 @@ int handle_ras_events(int record_events) + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "cxl", "cxl_generic_event"); ++ ++ rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_general_media", ++ ras_cxl_general_media_event_handler, NULL, CXL_GENERAL_MEDIA_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "cxl", "cxl_general_media"); + #endif + + if (!num_events) { +diff --git a/ras-events.h b/ras-events.h +index 96c299e..9b83df3 100644 +--- a/ras-events.h ++++ b/ras-events.h +@@ -44,6 +44,7 @@ enum { + CXL_AER_CE_EVENT, + CXL_OVERFLOW_EVENT, + CXL_GENERIC_EVENT, ++ CXL_GENERAL_MEDIA_EVENT, + NR_EVENTS + }; + +diff --git a/ras-record.c b/ras-record.c +index a65d9c0..507a58e 100644 +--- a/ras-record.c ++++ b/ras-record.c +@@ -846,6 +846,75 @@ int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_e + + return rc; + } ++ ++/* ++ * Table and functions to handle cxl:cxl_general_media_event ++ */ ++static const struct db_fields cxl_general_media_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "memdev", .type = "TEXT" }, ++ { .name = "host", .type = "TEXT" }, ++ { .name = "serial", .type = "INTEGER" }, ++ { .name = "log_type", .type = "TEXT" }, ++ { .name = "hdr_uuid", .type = "TEXT" }, ++ { .name = "hdr_flags", .type = "INTEGER" }, ++ { .name = "hdr_handle", .type = "INTEGER" }, ++ { .name = "hdr_related_handle", .type = "INTEGER" }, ++ { .name = "hdr_ts", .type = "TEXT" }, ++ { .name = "hdr_length", .type = "INTEGER" }, ++ { .name = "hdr_maint_op_class", .type = "INTEGER" }, ++ { .name = "dpa", .type = "INTEGER" }, ++ { .name = "dpa_flags", .type = "INTEGER" }, ++ { .name = "descriptor", .type = "INTEGER" }, ++ { .name = "type", .type = "INTEGER" }, ++ { .name = "transaction_type", .type = "INTEGER" }, ++ { .name = "channel", .type = "INTEGER" }, ++ { .name = "rank", .type = "INTEGER" }, ++ { .name = "device", .type = "INTEGER" }, ++ { .name = "comp_id", .type = "BLOB" }, ++}; ++ ++static const struct db_table_descriptor cxl_general_media_event_tab = { ++ .name = "cxl_general_media_event", ++ .fields = cxl_general_media_event_fields, ++ .num_fields = ARRAY_SIZE(cxl_general_media_event_fields), ++}; ++ ++int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) ++{ ++ int rc; ++ struct sqlite3_priv *priv = ras->db_priv; ++ ++ if (!priv || !priv->stmt_cxl_general_media_event) ++ return 0; ++ log(TERM, LOG_INFO, "cxl_general_media_event store: %p\n", ++ priv->stmt_cxl_general_media_event); ++ ++ ras_store_cxl_common_hdr(priv->stmt_cxl_general_media_event, &ev->hdr); ++ sqlite3_bind_int64(priv->stmt_cxl_general_media_event, 13, ev->dpa); ++ sqlite3_bind_int(priv->stmt_cxl_general_media_event, 14, ev->dpa_flags); ++ sqlite3_bind_int(priv->stmt_cxl_general_media_event, 15, ev->descriptor); ++ sqlite3_bind_int(priv->stmt_cxl_general_media_event, 16, ev->type); ++ sqlite3_bind_int(priv->stmt_cxl_general_media_event, 17, ev->transaction_type); ++ sqlite3_bind_int(priv->stmt_cxl_general_media_event, 18, ev->channel); ++ sqlite3_bind_int(priv->stmt_cxl_general_media_event, 19, ev->rank); ++ sqlite3_bind_int(priv->stmt_cxl_general_media_event, 20, ev->device); ++ sqlite3_bind_blob(priv->stmt_cxl_general_media_event, 21, ev->comp_id, ++ CXL_EVENT_GEN_MED_COMP_ID_SIZE, NULL); ++ ++ rc = sqlite3_step(priv->stmt_cxl_general_media_event); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed to do stmt_cxl_general_media_event step on sqlite: error = %d\n", rc); ++ rc = sqlite3_reset(priv->stmt_cxl_general_media_event); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed reset stmt_cxl_general_media_event on sqlite: error = %d\n", rc); ++ log(TERM, LOG_INFO, "register inserted at db\n"); ++ ++ return rc; ++} + #endif + + /* +@@ -1229,6 +1298,14 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + if (rc != SQLITE_OK) + goto error; + } ++ ++ rc = ras_mc_create_table(priv, &cxl_general_media_event_tab); ++ if (rc == SQLITE_OK) { ++ rc = ras_mc_prepare_stmt(priv, &priv->stmt_cxl_general_media_event, ++ &cxl_general_media_event_tab); ++ if (rc != SQLITE_OK) ++ goto error; ++ } + #endif + + ras->db_priv = priv; +@@ -1390,6 +1467,14 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + "cpu %u: Failed to finalize cxl_generic_event sqlite: error = %d\n", + cpu, rc); + } ++ ++ if (priv->stmt_cxl_general_media_event) { ++ rc = sqlite3_finalize(priv->stmt_cxl_general_media_event); ++ if (rc != SQLITE_OK) ++ log(TERM, LOG_ERR, ++ "cpu %u: Failed to finalize cxl_general_media_event sqlite: error = %d\n", ++ cpu, rc); ++ } + #endif + + rc = sqlite3_close_v2(db); +diff --git a/ras-record.h b/ras-record.h +index 9ecfcda..37c32de 100644 +--- a/ras-record.h ++++ b/ras-record.h +@@ -134,6 +134,7 @@ struct ras_cxl_poison_event { + #define CXL_HEADERLOG_SIZE SZ_512 + #define CXL_HEADERLOG_SIZE_U32 (SZ_512 / sizeof(uint32_t)) + #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 ++#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 + + struct ras_cxl_aer_ue_event { + char timestamp[64]; +@@ -184,6 +185,20 @@ struct ras_cxl_generic_event { + uint8_t *data; + }; + ++struct ras_cxl_general_media_event { ++ struct ras_cxl_event_common_hdr hdr; ++ uint64_t dpa; ++ uint8_t dpa_flags; ++ uint8_t descriptor; ++ uint8_t type; ++ uint8_t transaction_type; ++ uint8_t channel; ++ uint8_t rank; ++ uint32_t device; ++ uint8_t *comp_id; ++ uint16_t validity_flags; ++}; ++ + struct ras_mc_event; + struct ras_aer_event; + struct ras_extlog_event; +@@ -198,6 +213,7 @@ struct ras_cxl_aer_ue_event; + struct ras_cxl_aer_ce_event; + struct ras_cxl_overflow_event; + struct ras_cxl_generic_event; ++struct ras_cxl_general_media_event; + + #ifdef HAVE_SQLITE3 + +@@ -236,6 +252,7 @@ struct sqlite3_priv { + sqlite3_stmt *stmt_cxl_aer_ce_event; + sqlite3_stmt *stmt_cxl_overflow_event; + sqlite3_stmt *stmt_cxl_generic_event; ++ sqlite3_stmt *stmt_cxl_general_media_event; + #endif + }; + +@@ -269,6 +286,7 @@ int ras_store_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_eve + int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev); + int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev); + int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev); ++int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev); + + #else + static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; +@@ -287,6 +305,7 @@ static inline int ras_store_cxl_aer_ue_event(struct ras_events *ras, struct ras_ + static inline int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev) { return 0; }; + static inline int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; }; + static inline int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; }; ++static inline int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) { return 0; }; + + #endif + +diff --git a/ras-report.c b/ras-report.c +index 8d7b76a..725dc9b 100644 +--- a/ras-report.c ++++ b/ras-report.c +@@ -489,6 +489,60 @@ static int set_cxl_generic_event_backtrace(char *buf, struct ras_cxl_generic_eve + return 0; + } + ++static int set_cxl_general_media_event_backtrace(char *buf, struct ras_cxl_general_media_event *ev) ++{ ++ char bt_buf[MAX_BACKTRACE_SIZE]; ++ ++ if (!buf || !ev) ++ return -1; ++ ++ sprintf(bt_buf, "BACKTRACE=" \ ++ "timestamp=%s\n" \ ++ "memdev=%s\n" \ ++ "host=%s\n" \ ++ "serial=0x%lx\n" \ ++ "log_type=%s\n" \ ++ "hdr_uuid=%s\n" \ ++ "hdr_flags=0x%x\n" \ ++ "hdr_handle=0x%x\n" \ ++ "hdr_related_handle=0x%x\n" \ ++ "hdr_timestamp=%s\n" \ ++ "hdr_length=%u\n" \ ++ "hdr_maint_op_class=%u\n" \ ++ "dpa=0x%lx\n" \ ++ "dpa_flags=%u\n" \ ++ "descriptor=%u\n" \ ++ "type=%u\n" \ ++ "transaction_type=%u\n" \ ++ "channel=%u\n" \ ++ "rank=%u\n" \ ++ "device=0x%x\n", \ ++ ev->hdr.timestamp, \ ++ ev->hdr.memdev, \ ++ ev->hdr.host, \ ++ ev->hdr.serial, \ ++ ev->hdr.log_type, \ ++ ev->hdr.hdr_uuid, \ ++ ev->hdr.hdr_flags, \ ++ ev->hdr.hdr_handle, \ ++ ev->hdr.hdr_related_handle, \ ++ ev->hdr.hdr_timestamp, \ ++ ev->hdr.hdr_length, \ ++ ev->hdr.hdr_maint_op_class, \ ++ ev->dpa, \ ++ ev->dpa_flags, \ ++ ev->descriptor, \ ++ ev->type, \ ++ ev->transaction_type, \ ++ ev->channel, \ ++ ev->rank, \ ++ ev->device); ++ ++ strcat(buf, bt_buf); ++ ++ return 0; ++} ++ + static int commit_report_backtrace(int sockfd, int type, void *ev){ + char buf[MAX_BACKTRACE_SIZE]; + char *pbuf = buf; +@@ -541,6 +595,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ + case CXL_GENERIC_EVENT: + rc = set_cxl_generic_event_backtrace(buf, (struct ras_cxl_generic_event *)ev); + break; ++ case CXL_GENERAL_MEDIA_EVENT: ++ rc = set_cxl_general_media_event_backtrace(buf, (struct ras_cxl_general_media_event *)ev); ++ break; + default: + return -1; + } +@@ -1170,3 +1227,47 @@ cxl_generic_fail: + return -1; + + } ++ ++int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) ++{ ++ char buf[MAX_MESSAGE_SIZE]; ++ int sockfd = 0; ++ int done = 0; ++ int rc = -1; ++ ++ memset(buf, 0, sizeof(buf)); ++ ++ sockfd = setup_report_socket(); ++ if (sockfd < 0) ++ return -1; ++ ++ rc = commit_report_basic(sockfd); ++ if (rc < 0) ++ goto cxl_general_media_fail; ++ ++ rc = commit_report_backtrace(sockfd, CXL_GENERAL_MEDIA_EVENT, ev); ++ if (rc < 0) ++ goto cxl_general_media_fail; ++ ++ sprintf(buf, "ANALYZER=%s", "rasdaemon-cxl_general_media_event"); ++ rc = write(sockfd, buf, strlen(buf) + 1); ++ if (rc < strlen(buf) + 1) ++ goto cxl_general_media_fail; ++ ++ sprintf(buf, "REASON=%s", "CXL General Media Event"); ++ rc = write(sockfd, buf, strlen(buf) + 1); ++ if (rc < strlen(buf) + 1) ++ goto cxl_general_media_fail; ++ ++ done = 1; ++ ++cxl_general_media_fail: ++ ++ if (sockfd >= 0) ++ close(sockfd); ++ ++ if (done) ++ return 0; ++ else ++ return -1; ++} +diff --git a/ras-report.h b/ras-report.h +index bf591a6..d9ec7df 100644 +--- a/ras-report.h ++++ b/ras-report.h +@@ -44,6 +44,7 @@ int ras_report_cxl_aer_ue_event(struct ras_events *ras, struct ras_cxl_aer_ue_ev + int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev); + int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev); + int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev); ++int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev); + + #else + +@@ -60,6 +61,7 @@ static inline int ras_report_cxl_aer_ue_event(struct ras_events *ras, struct ras + static inline int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_event *ev) { return 0; }; + static inline int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; }; + static inline int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; }; ++static inline int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) { return 0; }; + + #endif + diff --git a/572de9d57691be9e630abee9ffa56a2fb155d558.patch b/572de9d57691be9e630abee9ffa56a2fb155d558.patch new file mode 100644 index 0000000..4a89c04 --- /dev/null +++ b/572de9d57691be9e630abee9ffa56a2fb155d558.patch @@ -0,0 +1,182 @@ +commit dea649c9f9a6f2941e80cade9ed311a398e267be +Author: Shiju Jose +Date: Mon Feb 12 11:14:03 2024 +0000 + + rasdaemon: ras-mc-ctl: Add support for CXL general media trace events + + Add support for CXL general media events to the ras-mc-ctl tool. + + Signed-off-by: Shiju Jose + Signed-off-by: Mauro Carvalho Chehab + (cherry picked from commit 572de9d57691be9e630abee9ffa56a2fb155d558) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 5528021..99b3c10 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1298,6 +1298,84 @@ sub get_cxl_hdr_flags_text + return join (", ", @out); + } + ++use constant { ++ CXL_DPA_VOLATILE => 0x0001, ++ CXL_DPA_NOT_REPAIRABLE => 0x0002, ++}; ++ ++sub get_cxl_dpa_flags_text ++{ ++ my $flags = $_[0]; ++ my @out; ++ ++ if ($flags & CXL_DPA_VOLATILE) { ++ push @out, (sprintf "\'VOLATILE\' "); ++ } ++ if ($flags & CXL_DPA_NOT_REPAIRABLE) { ++ push @out, (sprintf "\'NOT_REPAIRABLE\' "); ++ } ++ ++ return join (", ", @out); ++} ++ ++use constant { ++ CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT => 0x0001, ++ CXL_GMER_EVT_DESC_THRESHOLD_EVENT => 0x0002, ++ CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW => 0x0004, ++}; ++ ++sub get_cxl_descriptor_flags_text ++{ ++ my $flags = $_[0]; ++ my @out; ++ ++ if ($flags & CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT) { ++ push @out, (sprintf "\'UNCORRECTABLE EVENT\' "); ++ } ++ if ($flags & CXL_GMER_EVT_DESC_THRESHOLD_EVENT) { ++ push @out, (sprintf "\'THRESHOLD EVENT\' "); ++ } ++ if ($flags & CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW) { ++ push @out, (sprintf "\'POISON LIST OVERFLOW\' "); ++ } ++ ++ return join (", ", @out); ++} ++ ++sub get_cxl_mem_event_type ++{ ++ my @types; ++ ++ if ($_[0] < 0 || $_[0] > 2) { ++ return "unknown-type"; ++ } ++ ++ @types = ("ECC Error", ++ "Invalid Address", ++ "Data Path Error"); ++ ++ return $types[$_[0]]; ++} ++ ++sub get_cxl_transaction_type ++{ ++ my @types; ++ ++ if ($_[0] < 0 || $_[0] > 6) { ++ return "unknown-type"; ++ } ++ ++ @types = ("Unknown", ++ "Host Read", ++ "Host Write", ++ "Host Scan Media", ++ "Host Inject Poison", ++ "Internal Media Scrub", ++ "Internal Media Management"); ++ ++ return $types[$_[0]]; ++} ++ + sub summary + { + require DBI; +@@ -1442,6 +1520,22 @@ sub summary + print "No CXL generic errors.\n\n"; + } + $query_handle->finish; ++ ++ # CXL general media errors ++ $query = "select memdev, count(*) from cxl_general_media_event$conf{opt}{since} group by memdev"; ++ $query_handle = $dbh->prepare($query); ++ $query_handle->execute(); ++ $query_handle->bind_columns(\($memdev, $count)); ++ $out = ""; ++ while($query_handle->fetch()) { ++ $out .= "\t$memdev errors: $count\n"; ++ } ++ if ($out ne "") { ++ print "CXL general media events summary:\n$out\n"; ++ } else { ++ print "No CXL general media errors.\n\n"; ++ } ++ $query_handle->finish; + } + + # extlog errors +@@ -1553,6 +1647,7 @@ sub errors + my ($log_type, $first_ts, $last_ts); + my ($trace_type, $region, $region_uuid, $hpa, $dpa, $dpa_length, $source, $flags, $overflow_ts); + my ($hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $data); ++ my ($dpa_flags, $descriptor, $mem_event_type, $transaction_type, $channel, $rank, $device, $comp_id); + + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); + +@@ -1764,6 +1859,49 @@ sub errors + } else { + print "No CXL generic errors.\n\n"; + } ++ ++ # CXL general media errors ++ use constant CXL_EVENT_GEN_MED_COMP_ID_SIZE => 0x10; ++ $query = "select id, timestamp, memdev, host, serial, log_type, hdr_uuid, hdr_flags, hdr_handle, hdr_related_handle, hdr_ts, hdr_length, hdr_maint_op_class, dpa, dpa_flags, descriptor, type, transaction_type, channel, rank, device, comp_id from cxl_general_media_event$conf{opt}{since} order by id"; ++ $query_handle = $dbh->prepare($query); ++ $query_handle->execute(); ++ $query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $log_type, $hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $dpa, $dpa_flags, $descriptor, $mem_event_type, $transaction_type, $channel, $rank, $device, $comp_id)); ++ $out = ""; ++ while($query_handle->fetch()) { ++ $out .= "$id $timestamp error: "; ++ $out .= "memdev=$memdev, " if (defined $memdev && length $memdev); ++ $out .= "host=$host, " if (defined $host && length $host); ++ $out .= sprintf "serial=0x%llx, ", $serial if (defined $serial && length $serial); ++ $out .= "log=$log_type, " if (defined $log_type && length $log_type); ++ $out .= "hdr_uuid=$hdr_uuid, " if (defined $hdr_uuid && length $hdr_uuid); ++ $out .= sprintf "hdr_flags=0x%llx %s, ", $hdr_flags, get_cxl_hdr_flags_text($hdr_flags) if (defined $hdr_flags && length $hdr_flags); ++ $out .= sprintf "hdr_handle=0x%x, ", $hdr_handle if (defined $hdr_handle && length $hdr_handle); ++ $out .= sprintf "hdr_related_handle=0x%x, ", $hdr_related_handle if (defined $hdr_related_handle && length $hdr_related_handle); ++ $out .= "hdr_timestamp=$hdr_ts, " if (defined $hdr_ts && length $hdr_ts); ++ $out .= sprintf "hdr_length=%u, ", $hdr_length if (defined $hdr_length && length $hdr_length); ++ $out .= sprintf "hdr_maint_op_class=%u, ", $hdr_maint_op_class if (defined $hdr_maint_op_class && length $hdr_maint_op_class); ++ $out .= sprintf "dpa=0x%llx, ", $dpa if (defined $dpa && length $dpa); ++ $out .= sprintf "dpa_flags: %s, ", get_cxl_dpa_flags_text($dpa_flags) if (defined $dpa_flags && length $dpa_flags); ++ $out .= sprintf "descriptor_flags: %s, ", get_cxl_descriptor_flags_text($descriptor) if (defined $descriptor && length $descriptor); ++ $out .= sprintf "memory event type: %s, ", get_cxl_mem_event_type($mem_event_type) if (defined $mem_event_type && length $mem_event_type); ++ $out .= sprintf "transaction_type: %s, ", get_cxl_transaction_type($transaction_type) if (defined $transaction_type && length $transaction_type); ++ $out .= sprintf "channel=%u, ", $channel if (defined $channel && length $channel); ++ $out .= sprintf "rank=%u, ", $rank if (defined $rank && length $rank); ++ $out .= sprintf "device=0x%x, ", $device if (defined $device && length $device); ++ if (defined $comp_id && length $comp_id) { ++ $out .= sprintf "component_id:"; ++ my @bytes = unpack "C*", $comp_id; ++ for (my $i = 0; $i < CXL_EVENT_GEN_MED_COMP_ID_SIZE; $i++) { ++ $out .= sprintf "%02x ", $bytes[$i]; ++ } ++ } ++ $out .= "\n"; ++ } ++ if ($out ne "") { ++ print "CXL general media events:\n$out\n"; ++ } else { ++ print "No CXL general media errors.\n\n"; ++ } + } + + # Extlog errors diff --git a/75c8fec559641f843345ef8fbc36d124b60b914d.patch b/75c8fec559641f843345ef8fbc36d124b60b914d.patch new file mode 100644 index 0000000..cd0aca4 --- /dev/null +++ b/75c8fec559641f843345ef8fbc36d124b60b914d.patch @@ -0,0 +1,663 @@ +commit 75c8fec559641f843345ef8fbc36d124b60b914d +Author: Shiju Jose +Date: Fri Mar 31 13:35:13 2023 +0100 + + rasdaemon: Add support for the CXL poison events + + Add support to log and record the CXL poison events. + + The corresponding Kernel patches here: + https://lore.kernel.org/linux-cxl/64457d30bae07_2028294ac@dwillia2-xfh.jf.intel.com.notmuch/ + + Presently for logging only, could be extended for the policy + based recovery action for the frequent poison events depending on the above + kernel patches. + + Signed-off-by: Shiju Jose + Reviewed-by: Jonathan Cameron + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/Makefile.am b/Makefile.am +index 56c144e..5bddeac 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -73,6 +73,11 @@ endif + if WITH_CPU_FAULT_ISOLATION + rasdaemon_SOURCES += ras-cpu-isolation.c queue.c + endif ++ ++if WITH_CXL ++ rasdaemon_SOURCES += ras-cxl-handler.c ++endif ++ + rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) $(LIBTRACEEVENT_LIBS) + rasdaemon_CFLAGS = $(SQLITE3_CFLAGS) $(LIBTRACEEVENT_CFLAGS) + +@@ -81,7 +86,7 @@ include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ + ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ + ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ + non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \ +- ras-cpu-isolation.h queue.h ++ ras-cxl-handler.h ras-cpu-isolation.h queue.h + + # This rule can't be called with more than one Makefile job (like make -j8) + # I can't figure out a way to fix that +diff --git a/configure.ac b/configure.ac +index f588090..ab5697d 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -127,6 +127,16 @@ AS_IF([test "x$enable_memory_failure" = "xyes" || test "x$enable_all" = "xyes"], + AM_CONDITIONAL([WITH_MEMORY_FAILURE], [test x$enable_memory_failure = xyes || test x$enable_all = xyes]) + AM_COND_IF([WITH_MEMORY_FAILURE], [USE_MEMORY_FAILURE="yes"], [USE_MEMORY_FAILURE="no"]) + ++AC_ARG_ENABLE([cxl], ++ AS_HELP_STRING([--enable-cxl], [enable CXL events (currently experimental)])) ++ ++AS_IF([test "x$enable_cxl" = "xyes" || test "x$enable_all" == "xyes"], [ ++ AC_DEFINE(HAVE_CXL,1,"have CXL events collect") ++ AC_SUBST([WITH_CXL]) ++]) ++AM_CONDITIONAL([WITH_CXL], [test x$enable_cxl = xyes || test x$enable_all == xyes]) ++AM_COND_IF([WITH_CXL], [USE_CXL="yes"], [USE_CXL="no"]) ++ + AC_ARG_ENABLE([abrt_report], + AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)])) + +@@ -215,6 +225,7 @@ compile time options summary + DEVLINK : $USE_DEVLINK + Disk I/O errors : $USE_DISKERROR + Memory Failure : $USE_MEMORY_FAILURE ++ CXL events : $USE_CXL + Memory CE PFA : $USE_MEMORY_CE_PFA + AMP RAS errors : $USE_AMP_NS_DECODE + CPU fault isolation : $USE_CPU_FAULT_ISOLATION +diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c +new file mode 100644 +index 0000000..cb23ba2 +--- /dev/null ++++ b/ras-cxl-handler.c +@@ -0,0 +1,202 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "ras-cxl-handler.h" ++#include "ras-record.h" ++#include "ras-logger.h" ++#include "ras-report.h" ++ ++/* Poison List: Payload out flags */ ++#define CXL_POISON_FLAG_MORE BIT(0) ++#define CXL_POISON_FLAG_OVERFLOW BIT(1) ++#define CXL_POISON_FLAG_SCANNING BIT(2) ++ ++/* CXL poison - source types */ ++enum cxl_poison_source { ++ CXL_POISON_SOURCE_UNKNOWN = 0, ++ CXL_POISON_SOURCE_EXTERNAL = 1, ++ CXL_POISON_SOURCE_INTERNAL = 2, ++ CXL_POISON_SOURCE_INJECTED = 3, ++ CXL_POISON_SOURCE_VENDOR = 7, ++}; ++ ++/* CXL poison - trace types */ ++enum cxl_poison_trace_type { ++ CXL_POISON_TRACE_LIST, ++ CXL_POISON_TRACE_INJECT, ++ CXL_POISON_TRACE_CLEAR, ++}; ++ ++int ras_cxl_poison_event_handler(struct trace_seq *s, ++ struct tep_record *record, ++ struct tep_event *event, void *context) ++{ ++ int len; ++ unsigned long long val; ++ struct ras_events *ras = context; ++ time_t now; ++ struct tm *tm; ++ struct ras_cxl_poison_event ev; ++ ++ now = record->ts / user_hz + ras->uptime_diff; ++ tm = localtime(&now); ++ if (tm) ++ strftime(ev.timestamp, sizeof(ev.timestamp), ++ "%Y-%m-%d %H:%M:%S %z", tm); ++ else ++ strncpy(ev.timestamp, "1970-01-01 00:00:00 +0000", sizeof(ev.timestamp)); ++ if (trace_seq_printf(s, "%s ", ev.timestamp) <= 0) ++ return -1; ++ ++ ev.memdev = tep_get_field_raw(s, event, "memdev", ++ record, &len, 1); ++ if (!ev.memdev) ++ return -1; ++ if (trace_seq_printf(s, "memdev:%s ", ev.memdev) <= 0) ++ return -1; ++ ++ ev.host = tep_get_field_raw(s, event, "host", ++ record, &len, 1); ++ if (!ev.host) ++ return -1; ++ if (trace_seq_printf(s, "host:%s ", ev.host) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "serial", record, &val, 1) < 0) ++ return -1; ++ ev.serial = val; ++ if (trace_seq_printf(s, "serial:0x%llx ", (unsigned long long)ev.serial) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "trace_type", record, &val, 1) < 0) ++ return -1; ++ switch (val) { ++ case CXL_POISON_TRACE_LIST: ++ ev.trace_type = "List"; ++ break; ++ case CXL_POISON_TRACE_INJECT: ++ ev.trace_type = "Inject"; ++ break; ++ case CXL_POISON_TRACE_CLEAR: ++ ev.trace_type = "Clear"; ++ break; ++ default: ++ ev.trace_type = "Invalid"; ++ } ++ if (trace_seq_printf(s, "trace_type:%s ", ev.trace_type) <= 0) ++ return -1; ++ ++ ev.region = tep_get_field_raw(s, event, "region", ++ record, &len, 1); ++ if (!ev.region) ++ return -1; ++ if (trace_seq_printf(s, "region:%s ", ev.region) <= 0) ++ return -1; ++ ++ ev.uuid = tep_get_field_raw(s, event, "uuid", ++ record, &len, 1); ++ if (!ev.uuid) ++ return -1; ++ if (trace_seq_printf(s, "region_uuid:%s ", ev.uuid) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "hpa", record, &val, 1) < 0) ++ return -1; ++ ev.hpa = val; ++ if (trace_seq_printf(s, "poison list: hpa:0x%llx ", (unsigned long long)ev.hpa) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "dpa", record, &val, 1) < 0) ++ return -1; ++ ev.dpa = val; ++ if (trace_seq_printf(s, "dpa:0x%llx ", (unsigned long long)ev.dpa) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "dpa_length", record, &val, 1) < 0) ++ return -1; ++ ev.dpa_length = val; ++ if (trace_seq_printf(s, "dpa_length:0x%x ", ev.dpa_length) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "source", record, &val, 1) < 0) ++ return -1; ++ switch (val) { ++ case CXL_POISON_SOURCE_UNKNOWN: ++ ev.source = "Unknown"; ++ break; ++ case CXL_POISON_SOURCE_EXTERNAL: ++ ev.source = "External"; ++ break; ++ case CXL_POISON_SOURCE_INTERNAL: ++ ev.source = "Internal"; ++ break; ++ case CXL_POISON_SOURCE_INJECTED: ++ ev.source = "Injected"; ++ break; ++ case CXL_POISON_SOURCE_VENDOR: ++ ev.source = "Vendor"; ++ break; ++ default: ++ ev.source = "Invalid"; ++ } ++ if (trace_seq_printf(s, "source:%s ", ev.source) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "flags", record, &val, 1) < 0) ++ return -1; ++ ev.flags = val; ++ if (trace_seq_printf(s, "flags:%d ", ev.flags) <= 0) ++ return -1; ++ ++ if (ev.flags & CXL_POISON_FLAG_OVERFLOW) { ++ if (tep_get_field_val(s, event, "overflow_ts", record, &val, 1) < 0) ++ return -1; ++ if (val) { ++ /* CXL Specification 3.0 ++ * Overflow timestamp - The number of unsigned nanoseconds ++ * that have elapsed since midnight, 01-Jan-1970 UTC ++ */ ++ time_t ovf_ts_secs = val / 1000000000ULL; ++ ++ tm = localtime(&ovf_ts_secs); ++ if (tm) { ++ strftime(ev.overflow_ts, sizeof(ev.overflow_ts), ++ "%Y-%m-%d %H:%M:%S %z", tm); ++ } ++ } ++ if (!val || !tm) ++ strncpy(ev.overflow_ts, "1970-01-01 00:00:00 +0000", ++ sizeof(ev.overflow_ts)); ++ } else ++ strncpy(ev.overflow_ts, "1970-01-01 00:00:00 +0000", sizeof(ev.overflow_ts)); ++ if (trace_seq_printf(s, "overflow timestamp:%s\n", ev.overflow_ts) <= 0) ++ return -1; ++ ++ /* Insert data into the SGBD */ ++#ifdef HAVE_SQLITE3 ++ ras_store_cxl_poison_event(ras, &ev); ++#endif ++ ++#ifdef HAVE_ABRT_REPORT ++ /* Report event to ABRT */ ++ ras_report_cxl_poison_event(ras, &ev); ++#endif ++ ++ return 0; ++} +diff --git a/ras-cxl-handler.h b/ras-cxl-handler.h +new file mode 100644 +index 0000000..84d5cc6 +--- /dev/null ++++ b/ras-cxl-handler.h +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#ifndef __RAS_CXL_HANDLER_H ++#define __RAS_CXL_HANDLER_H ++ ++#include "ras-events.h" ++#include ++ ++int ras_cxl_poison_event_handler(struct trace_seq *s, ++ struct tep_record *record, ++ struct tep_event *event, void *context); ++#endif +diff --git a/ras-events.c b/ras-events.c +index 5fe8e19..f95844a 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -40,6 +40,7 @@ + #include "ras-devlink-handler.h" + #include "ras-diskerror-handler.h" + #include "ras-memory-failure-handler.h" ++#include "ras-cxl-handler.h" + #include "ras-record.h" + #include "ras-logger.h" + #include "ras-page-isolation.h" +@@ -243,6 +244,10 @@ int toggle_ras_mc_event(int enable) + rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable); + #endif + ++#ifdef HAVE_CXL ++ rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_poison", enable); ++#endif ++ + free_ras: + free(ras); + return rc; +@@ -979,6 +984,16 @@ int handle_ras_events(int record_events) + "ras", "memory_failure_event"); + #endif + ++#ifdef HAVE_CXL ++ rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_poison", ++ ras_cxl_poison_event_handler, NULL, CXL_POISON_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "cxl", "cxl_poison"); ++#endif ++ + if (!num_events) { + log(ALL, LOG_INFO, + "Failed to trace all supported RAS events. Aborting.\n"); +diff --git a/ras-events.h b/ras-events.h +index 649b0c0..1ef3ecd 100644 +--- a/ras-events.h ++++ b/ras-events.h +@@ -39,6 +39,7 @@ enum { + DEVLINK_EVENT, + DISKERROR_EVENT, + MF_EVENT, ++ CXL_POISON_EVENT, + NR_EVENTS + }; + +diff --git a/ras-record.c b/ras-record.c +index adc97a4..c31baa0 100644 +--- a/ras-record.c ++++ b/ras-record.c +@@ -559,6 +559,71 @@ int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) + } + #endif + ++#ifdef HAVE_CXL ++/* ++ * Table and functions to handle cxl:cxl_poison ++ */ ++static const struct db_fields cxl_poison_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "memdev", .type = "TEXT" }, ++ { .name = "host", .type = "TEXT" }, ++ { .name = "serial", .type = "INTEGER" }, ++ { .name = "trace_type", .type = "TEXT" }, ++ { .name = "region", .type = "TEXT" }, ++ { .name = "region_uuid", .type = "TEXT" }, ++ { .name = "hpa", .type = "INTEGER" }, ++ { .name = "dpa", .type = "INTEGER" }, ++ { .name = "dpa_length", .type = "INTEGER" }, ++ { .name = "source", .type = "TEXT" }, ++ { .name = "flags", .type = "INTEGER" }, ++ { .name = "overflow_ts", .type = "TEXT" }, ++}; ++ ++static const struct db_table_descriptor cxl_poison_event_tab = { ++ .name = "cxl_poison_event", ++ .fields = cxl_poison_event_fields, ++ .num_fields = ARRAY_SIZE(cxl_poison_event_fields), ++}; ++ ++int ras_store_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_event *ev) ++{ ++ int rc; ++ struct sqlite3_priv *priv = ras->db_priv; ++ ++ if (!priv || !priv->stmt_cxl_poison_event) ++ return 0; ++ log(TERM, LOG_INFO, "cxl_poison_event store: %p\n", priv->stmt_cxl_poison_event); ++ ++ sqlite3_bind_text(priv->stmt_cxl_poison_event, 1, ev->timestamp, -1, NULL); ++ sqlite3_bind_text(priv->stmt_cxl_poison_event, 2, ev->memdev, -1, NULL); ++ sqlite3_bind_text(priv->stmt_cxl_poison_event, 3, ev->host, -1, NULL); ++ sqlite3_bind_int64(priv->stmt_cxl_poison_event, 4, ev->serial); ++ sqlite3_bind_text(priv->stmt_cxl_poison_event, 5, ev->trace_type, -1, NULL); ++ sqlite3_bind_text(priv->stmt_cxl_poison_event, 6, ev->region, -1, NULL); ++ sqlite3_bind_text(priv->stmt_cxl_poison_event, 7, ev->uuid, -1, NULL); ++ sqlite3_bind_int64(priv->stmt_cxl_poison_event, 8, ev->hpa); ++ sqlite3_bind_int64(priv->stmt_cxl_poison_event, 9, ev->dpa); ++ sqlite3_bind_int(priv->stmt_cxl_poison_event, 10, ev->dpa_length); ++ sqlite3_bind_text(priv->stmt_cxl_poison_event, 11, ev->source, -1, NULL); ++ sqlite3_bind_int(priv->stmt_cxl_poison_event, 12, ev->flags); ++ sqlite3_bind_text(priv->stmt_cxl_poison_event, 13, ev->overflow_ts, -1, NULL); ++ ++ rc = sqlite3_step(priv->stmt_cxl_poison_event); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed to do cxl_poison_event step on sqlite: error = %d\n", rc); ++ rc = sqlite3_reset(priv->stmt_cxl_poison_event); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed reset cxl_poison_event on sqlite: error = %d\n", ++ rc); ++ log(TERM, LOG_INFO, "register inserted at db\n"); ++ ++ return rc; ++} ++#endif ++ + /* + * Generic code + */ +@@ -900,6 +965,16 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + } + #endif + ++#ifdef HAVE_CXL ++ rc = ras_mc_create_table(priv, &cxl_poison_event_tab); ++ if (rc == SQLITE_OK) { ++ rc = ras_mc_prepare_stmt(priv, &priv->stmt_cxl_poison_event, ++ &cxl_poison_event_tab); ++ if (rc != SQLITE_OK) ++ goto error; ++ } ++#endif ++ + ras->db_priv = priv; + return 0; + +@@ -1019,6 +1094,16 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + } + #endif + ++#ifdef HAVE_CXL ++ if (priv->stmt_cxl_poison_event) { ++ rc = sqlite3_finalize(priv->stmt_cxl_poison_event); ++ if (rc != SQLITE_OK) ++ log(TERM, LOG_ERR, ++ "cpu %u: Failed to finalize cxl_poison_event sqlite: error = %d\n", ++ cpu, rc); ++ } ++#endif ++ + rc = sqlite3_close_v2(db); + if (rc != SQLITE_OK) + log(TERM, LOG_ERR, +diff --git a/ras-record.h b/ras-record.h +index 219f10b..fd15215 100644 +--- a/ras-record.h ++++ b/ras-record.h +@@ -114,6 +114,22 @@ struct ras_mf_event { + const char *action_result; + }; + ++struct ras_cxl_poison_event { ++ char timestamp[64]; ++ const char *memdev; ++ const char *host; ++ uint64_t serial; ++ const char *trace_type; ++ const char *region; ++ const char *uuid; ++ uint64_t hpa; ++ uint64_t dpa; ++ uint32_t dpa_length; ++ const char *source; ++ uint8_t flags; ++ char overflow_ts[64]; ++}; ++ + struct ras_mc_event; + struct ras_aer_event; + struct ras_extlog_event; +@@ -123,6 +139,7 @@ struct mce_event; + struct devlink_event; + struct diskerror_event; + struct ras_mf_event; ++struct ras_cxl_poison_event; + + #ifdef HAVE_SQLITE3 + +@@ -155,6 +172,9 @@ struct sqlite3_priv { + #ifdef HAVE_MEMORY_FAILURE + sqlite3_stmt *stmt_mf_event; + #endif ++#ifdef HAVE_CXL ++ sqlite3_stmt *stmt_cxl_poison_event; ++#endif + }; + + struct db_fields { +@@ -182,6 +202,7 @@ int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev); + int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev); + int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev); + int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev); ++int ras_store_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_event *ev); + + #else + static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; +@@ -195,6 +216,7 @@ static inline int ras_store_arm_record(struct ras_events *ras, struct ras_arm_ev + static inline int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; }; + static inline int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev) { return 0; }; + static inline int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; }; ++static inline int ras_store_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_event *ev) { return 0; }; + + #endif + +diff --git a/ras-report.c b/ras-report.c +index 62d5eb7..3daecc0 100644 +--- a/ras-report.c ++++ b/ras-report.c +@@ -331,6 +331,46 @@ static int set_mf_event_backtrace(char *buf, struct ras_mf_event *ev) + return 0; + } + ++static int set_cxl_poison_event_backtrace(char *buf, struct ras_cxl_poison_event *ev) ++{ ++ char bt_buf[MAX_BACKTRACE_SIZE]; ++ ++ if (!buf || !ev) ++ return -1; ++ ++ sprintf(bt_buf, "BACKTRACE=" \ ++ "timestamp=%s\n" \ ++ "memdev=%s\n" \ ++ "host=%s\n" \ ++ "serial=0x%lx\n" \ ++ "trace_type=%s\n" \ ++ "region=%s\n" \ ++ "region_uuid=%s\n" \ ++ "hpa=0x%lx\n" \ ++ "dpa=0x%lx\n" \ ++ "dpa_length=0x%x\n" \ ++ "source=%s\n" \ ++ "flags=%u\n" \ ++ "overflow_timestamp=%s\n", \ ++ ev->timestamp, \ ++ ev->memdev, \ ++ ev->host, \ ++ ev->serial, \ ++ ev->trace_type, \ ++ ev->region, \ ++ ev->uuid, \ ++ ev->hpa, \ ++ ev->dpa, \ ++ ev->dpa_length, \ ++ ev->source, \ ++ ev->flags, \ ++ ev->overflow_ts); ++ ++ strcat(buf, bt_buf); ++ ++ return 0; ++} ++ + static int commit_report_backtrace(int sockfd, int type, void *ev){ + char buf[MAX_BACKTRACE_SIZE]; + char *pbuf = buf; +@@ -368,6 +408,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ + case MF_EVENT: + rc = set_mf_event_backtrace(buf, (struct ras_mf_event *)ev); + break; ++ case CXL_POISON_EVENT: ++ rc = set_cxl_poison_event_backtrace(buf, (struct ras_cxl_poison_event *)ev); ++ break; + default: + return -1; + } +@@ -776,3 +819,47 @@ mf_fail: + else + return -1; + } ++ ++int ras_report_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_event *ev) ++{ ++ char buf[MAX_MESSAGE_SIZE]; ++ int sockfd = 0; ++ int done = 0; ++ int rc = -1; ++ ++ memset(buf, 0, sizeof(buf)); ++ ++ sockfd = setup_report_socket(); ++ if (sockfd < 0) ++ return -1; ++ ++ rc = commit_report_basic(sockfd); ++ if (rc < 0) ++ goto cxl_poison_fail; ++ ++ rc = commit_report_backtrace(sockfd, CXL_POISON_EVENT, ev); ++ if (rc < 0) ++ goto cxl_poison_fail; ++ ++ sprintf(buf, "ANALYZER=%s", "rasdaemon-cxl-poison"); ++ rc = write(sockfd, buf, strlen(buf) + 1); ++ if (rc < strlen(buf) + 1) ++ goto cxl_poison_fail; ++ ++ sprintf(buf, "REASON=%s", "CXL poison"); ++ rc = write(sockfd, buf, strlen(buf) + 1); ++ if (rc < strlen(buf) + 1) ++ goto cxl_poison_fail; ++ ++ done = 1; ++ ++cxl_poison_fail: ++ ++ if (sockfd >= 0) ++ close(sockfd); ++ ++ if (done) ++ return 0; ++ else ++ return -1; ++} +diff --git a/ras-report.h b/ras-report.h +index e605eb1..d1591ce 100644 +--- a/ras-report.h ++++ b/ras-report.h +@@ -39,6 +39,7 @@ int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev); + int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev); + int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev); + int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev); ++int ras_report_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_event *ev); + + #else + +@@ -50,6 +51,7 @@ static inline int ras_report_arm_event(struct ras_events *ras, struct ras_arm_ev + static inline int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; }; + static inline int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev) { return 0; }; + static inline int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; }; ++static inline int ras_report_cxl_poison_event(struct ras_events *ras, struct ras_cxl_poison_event *ev) { return 0; }; + + #endif + diff --git a/7be2edbf863b7acf7e1cab10c2b9f9bf51b3d513.patch b/7be2edbf863b7acf7e1cab10c2b9f9bf51b3d513.patch new file mode 100644 index 0000000..b6092db --- /dev/null +++ b/7be2edbf863b7acf7e1cab10c2b9f9bf51b3d513.patch @@ -0,0 +1,97 @@ +commit 7be2edbf863b7acf7e1cab10c2b9f9bf51b3d513 +Author: Shiju Jose +Date: Tue Apr 4 16:07:21 2023 +0100 + + rasdaemon: Add common function to get timestamp for the event + + Add common function to get the timestamp for the event + reported. + + Signed-off-by: Shiju Jose + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c +index 59534a4..d540ebb 100644 +--- a/ras-cxl-handler.c ++++ b/ras-cxl-handler.c +@@ -42,6 +42,20 @@ static void convert_timestamp(unsigned long long ts, char *ts_ptr, uint16_t size + size); + } + ++static void get_timestamp(struct trace_seq *s, struct tep_record *record, ++ struct ras_events *ras, char *ts_ptr, uint16_t size) ++{ ++ time_t now; ++ struct tm *tm; ++ ++ now = record->ts / user_hz + ras->uptime_diff; ++ tm = localtime(&now); ++ if (tm) ++ strftime(ts_ptr, size, "%Y-%m-%d %H:%M:%S %z", tm); ++ else ++ strncpy(ts_ptr, "1970-01-01 00:00:00 +0000", size); ++} ++ + /* Poison List: Payload out flags */ + #define CXL_POISON_FLAG_MORE BIT(0) + #define CXL_POISON_FLAG_OVERFLOW BIT(1) +@@ -70,17 +84,9 @@ int ras_cxl_poison_event_handler(struct trace_seq *s, + int len; + unsigned long long val; + struct ras_events *ras = context; +- time_t now; +- struct tm *tm; + struct ras_cxl_poison_event ev; + +- now = record->ts / user_hz + ras->uptime_diff; +- tm = localtime(&now); +- if (tm) +- strftime(ev.timestamp, sizeof(ev.timestamp), +- "%Y-%m-%d %H:%M:%S %z", tm); +- else +- strncpy(ev.timestamp, "1970-01-01 00:00:00 +0000", sizeof(ev.timestamp)); ++ get_timestamp(s, record, ras, (char *)&ev.timestamp, sizeof(ev.timestamp)); + if (trace_seq_printf(s, "%s ", ev.timestamp) <= 0) + return -1; + +@@ -285,19 +291,11 @@ int ras_cxl_aer_ue_event_handler(struct trace_seq *s, + { + int len, i; + unsigned long long val; +- time_t now; +- struct tm *tm; + struct ras_events *ras = context; + struct ras_cxl_aer_ue_event ev; + + memset(&ev, 0, sizeof(ev)); +- now = record->ts / user_hz + ras->uptime_diff; +- tm = localtime(&now); +- if (tm) +- strftime(ev.timestamp, sizeof(ev.timestamp), +- "%Y-%m-%d %H:%M:%S %z", tm); +- else +- strncpy(ev.timestamp, "1970-01-01 00:00:00 +0000", sizeof(ev.timestamp)); ++ get_timestamp(s, record, ras, (char *)&ev.timestamp, sizeof(ev.timestamp)); + if (trace_seq_printf(s, "%s ", ev.timestamp) <= 0) + return -1; + +@@ -380,18 +378,10 @@ int ras_cxl_aer_ce_event_handler(struct trace_seq *s, + { + int len; + unsigned long long val; +- time_t now; +- struct tm *tm; + struct ras_events *ras = context; + struct ras_cxl_aer_ce_event ev; + +- now = record->ts / user_hz + ras->uptime_diff; +- tm = localtime(&now); +- if (tm) +- strftime(ev.timestamp, sizeof(ev.timestamp), +- "%Y-%m-%d %H:%M:%S %z", tm); +- else +- strncpy(ev.timestamp, "1970-01-01 00:00:00 +0000", sizeof(ev.timestamp)); ++ get_timestamp(s, record, ras, (char *)&ev.timestamp, sizeof(ev.timestamp)); + if (trace_seq_printf(s, "%s ", ev.timestamp) <= 0) + return -1; + diff --git a/8f79833e3d78424f4a594985fbeb91890f4af81c.patch b/8f79833e3d78424f4a594985fbeb91890f4af81c.patch new file mode 100644 index 0000000..b509270 --- /dev/null +++ b/8f79833e3d78424f4a594985fbeb91890f4af81c.patch @@ -0,0 +1,78 @@ +commit 8f79833e3d78424f4a594985fbeb91890f4af81c +Author: Shiju Jose +Date: Mon Mar 4 11:49:50 2024 +0000 + + rasdaemon: Fix build warnings unused variable if AMP RAS errors is not enabled + + This patch fixes following build warnings unused variable if AMP RAS errors + is not enabled(--enable-amp-ns-decode). + + ================================================== + ras-aer-handler.c: In function ‘ras_aer_event_handler’: + ras-aer-handler.c:72:21: warning: unused variable ‘fn’ [-Wunused-variable] + int seg, bus, dev, fn; + ^~ + ras-aer-handler.c:72:16: warning: unused variable ‘dev’ [-Wunused-variable] + int seg, bus, dev, fn; + ^~~ + ras-aer-handler.c:72:11: warning: unused variable ‘bus’ [-Wunused-variable] + int seg, bus, dev, fn; + ^~~ + ras-aer-handler.c:72:6: warning: unused variable ‘seg’ [-Wunused-variable] + int seg, bus, dev, fn; + ^~~ + ras-aer-handler.c:71:10: warning: variable ‘sel_data’ set but not used [-Wunused-but-set-variable] + uint8_t sel_data[5]; + ^~~~~~~~ + ras-aer-handler.c:70:7: warning: unused variable ‘ipmi_add_sel’ [-Wunused-variable] + char ipmi_add_sel[105]; + ^~~~~~~~~~~~ + ================================================== + + Signed-off-by: Shiju Jose + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/ras-aer-handler.c b/ras-aer-handler.c +index bb1a6f6..29f6551 100644 +--- a/ras-aer-handler.c ++++ b/ras-aer-handler.c +@@ -67,9 +67,11 @@ int ras_aer_event_handler(struct trace_seq *s, + struct tm *tm; + struct ras_aer_event ev; + char buf[BUF_LEN]; ++#ifdef HAVE_AMP_NS_DECODE + char ipmi_add_sel[105]; + uint8_t sel_data[5]; + int seg, bus, dev, fn; ++#endif + + /* + * Newer kernels (3.10-rc1 or upper) provide an uptime clock. +@@ -132,19 +134,27 @@ int ras_aer_event_handler(struct trace_seq *s, + switch (severity_val) { + case HW_EVENT_AER_UNCORRECTED_NON_FATAL: + ev.error_type = "Uncorrected (Non-Fatal)"; ++#ifdef HAVE_AMP_NS_DECODE + sel_data[0] = 0xca; ++#endif + break; + case HW_EVENT_AER_UNCORRECTED_FATAL: + ev.error_type = "Uncorrected (Fatal)"; ++#ifdef HAVE_AMP_NS_DECODE + sel_data[0] = 0xca; ++#endif + break; + case HW_EVENT_AER_CORRECTED: + ev.error_type = "Corrected"; ++#ifdef HAVE_AMP_NS_DECODE + sel_data[0] = 0xbf; ++#endif + break; + default: + ev.error_type = "Unknown severity"; ++#ifdef HAVE_AMP_NS_DECODE + sel_data[0] = 0xbf; ++#endif + } + trace_seq_puts(s, ev.error_type); + diff --git a/93ca96b66c917af37b2ae9295dc5df46a7d64dd2.patch b/93ca96b66c917af37b2ae9295dc5df46a7d64dd2.patch new file mode 100644 index 0000000..4952349 --- /dev/null +++ b/93ca96b66c917af37b2ae9295dc5df46a7d64dd2.patch @@ -0,0 +1,82 @@ +commit b6506f22fb2d7f44d9d633d44656dff2a94f257e +Author: Shiju Jose +Date: Mon Feb 12 10:49:10 2024 +0000 + + rasdaemon: ras-mc-ctl: Add support for CXL poison trace events + + Add support for CXL poison events to the ras-mc-ctl tool. + + Signed-off-by: Shiju Jose + Signed-off-by: Mauro Carvalho Chehab + (cherry picked from commit 93ca96b66c917af37b2ae9295dc5df46a7d64dd2) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 6a319a7..16b0589 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1382,6 +1382,22 @@ sub summary + print "No CXL overflow errors.\n\n"; + } + $query_handle->finish; ++ ++ # CXL poison errors ++ $query = "select memdev, count(*) from cxl_poison_event$conf{opt}{since} group by memdev"; ++ $query_handle = $dbh->prepare($query); ++ $query_handle->execute(); ++ $query_handle->bind_columns(\($memdev, $count)); ++ $out = ""; ++ while($query_handle->fetch()) { ++ $out .= "\t$memdev errors: $count\n"; ++ } ++ if ($out ne "") { ++ print "CXL poison events summary:\n$out\n"; ++ } else { ++ print "No CXL poison errors.\n\n"; ++ } ++ $query_handle->finish; + } + + # extlog errors +@@ -1491,6 +1507,7 @@ sub errors + my ($pfn, $page_type, $action_result); + my ($memdev, $host, $serial, $error_status, $first_error, $header_log); + my ($log_type, $first_ts, $last_ts); ++ my ($trace_type, $region, $region_uuid, $hpa, $dpa, $dpa_length, $source, $flags, $overflow_ts); + + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); + +@@ -1636,6 +1653,34 @@ sub errors + } else { + print "No CXL overflow errors.\n\n"; + } ++ ++ # CXL poison errors ++ $query = "select id, timestamp, memdev, host, serial, trace_type, region, region_uuid, hpa, dpa, dpa_length, source, flags, overflow_ts from cxl_poison_event$conf{opt}{since} order by id"; ++ $query_handle = $dbh->prepare($query); ++ $query_handle->execute(); ++ $query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $trace_type, $region, $region_uuid, $hpa, $dpa, $dpa_length, $source, $flags, $overflow_ts)); ++ $out = ""; ++ while($query_handle->fetch()) { ++ $out .= "$id $timestamp error: "; ++ $out .= "memdev=$memdev, " if (defined $memdev && length $memdev); ++ $out .= "host=$host, " if (defined $host && length $host); ++ $out .= sprintf "serial=0x%llx, ", $serial if (defined $serial && length $serial); ++ $out .= "trace_type=$trace_type, " if (defined $trace_type && length $trace_type); ++ $out .= "region=$region, " if (defined $region && length $region); ++ $out .= "region_uuid=$region_uuid, " if (defined $region_uuid && length $region_uuid); ++ $out .= sprintf "hpa=0x%llx, ", $hpa if (defined $hpa && length $hpa); ++ $out .= sprintf "dpa=0x%llx, ", $dpa if (defined $dpa && length $dpa); ++ $out .= sprintf "dpa_length=0x%x, ", $dpa_length if (defined $dpa_length && length $dpa_length); ++ $out .= "source=$source, " if (defined $source && length $source); ++ $out .= sprintf "flags=%d, ", $flags if (defined $flags && length $flags); ++ $out .= "overflow timestamp=$overflow_ts " if (defined $overflow_ts && length $overflow_ts); ++ $out .= "\n"; ++ } ++ if ($out ne "") { ++ print "CXL poison events:\n$out\n"; ++ } else { ++ print "No CXL poison errors.\n\n"; ++ } + } + + # Extlog errors diff --git a/9a2f6186db2622788f8868d8ec082684d6a06d4d.patch b/9a2f6186db2622788f8868d8ec082684d6a06d4d.patch new file mode 100644 index 0000000..c85f54e --- /dev/null +++ b/9a2f6186db2622788f8868d8ec082684d6a06d4d.patch @@ -0,0 +1,559 @@ +commit 9a2f6186db2622788f8868d8ec082684d6a06d4d +Author: Shiju Jose +Date: Wed Apr 5 13:28:20 2023 +0100 + + rasdaemon: Add support for the CXL dram events + + Add support to log and record the CXL dram events. + + Signed-off-by: Shiju Jose + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c +index 2de96f6..64b0b50 100644 +--- a/ras-cxl-handler.c ++++ b/ras-cxl-handler.c +@@ -865,3 +865,154 @@ int ras_cxl_general_media_event_handler(struct trace_seq *s, + + return 0; + } ++ ++/* ++ * DRAM Event Record - DER ++ * ++ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 ++ */ ++#define CXL_DER_VALID_CHANNEL BIT(0) ++#define CXL_DER_VALID_RANK BIT(1) ++#define CXL_DER_VALID_NIBBLE BIT(2) ++#define CXL_DER_VALID_BANK_GROUP BIT(3) ++#define CXL_DER_VALID_BANK BIT(4) ++#define CXL_DER_VALID_ROW BIT(5) ++#define CXL_DER_VALID_COLUMN BIT(6) ++#define CXL_DER_VALID_CORRECTION_MASK BIT(7) ++ ++int ras_cxl_dram_event_handler(struct trace_seq *s, ++ struct tep_record *record, ++ struct tep_event *event, void *context) ++{ ++ int len, i; ++ unsigned long long val; ++ struct ras_events *ras = context; ++ struct ras_cxl_dram_event ev; ++ ++ memset(&ev, 0, sizeof(ev)); ++ if (handle_ras_cxl_common_hdr(s, record, event, context, &ev.hdr) < 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "dpa", record, &val, 1) < 0) ++ return -1; ++ ev.dpa = val; ++ if (trace_seq_printf(s, "dpa:0x%llx ", (unsigned long long)ev.dpa) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "dpa_flags", record, &val, 1) < 0) ++ return -1; ++ ev.dpa_flags = val; ++ if (trace_seq_printf(s, "dpa_flags:") <= 0) ++ return -1; ++ if (decode_cxl_event_flags(s, ev.dpa_flags, cxl_dpa_flags, ARRAY_SIZE(cxl_dpa_flags)) < 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "descriptor", record, &val, 1) < 0) ++ return -1; ++ ev.descriptor = val; ++ if (trace_seq_printf(s, "descriptor:") <= 0) ++ return -1; ++ if (decode_cxl_event_flags(s, ev.descriptor, cxl_gmer_event_desc_flags, ++ ARRAY_SIZE(cxl_gmer_event_desc_flags)) < 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "type", record, &val, 1) < 0) ++ return -1; ++ ev.type = val; ++ if (trace_seq_printf(s, "type:%s ", get_cxl_type_str(cxl_gmer_mem_event_type, ++ ARRAY_SIZE(cxl_gmer_mem_event_type), ev.type)) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "transaction_type", record, &val, 1) < 0) ++ return -1; ++ ev.transaction_type = val; ++ if (trace_seq_printf(s, "transaction_type:%s ", ++ get_cxl_type_str(cxl_gmer_trans_type, ++ ARRAY_SIZE(cxl_gmer_trans_type), ++ ev.transaction_type)) <= 0) ++ return -1; ++ ++ if (tep_get_field_val(s, event, "validity_flags", record, &val, 1) < 0) ++ return -1; ++ ev.validity_flags = val; ++ ++ if (ev.validity_flags & CXL_DER_VALID_CHANNEL) { ++ if (tep_get_field_val(s, event, "channel", record, &val, 1) < 0) ++ return -1; ++ ev.channel = val; ++ if (trace_seq_printf(s, "channel:%u ", ev.channel) <= 0) ++ return -1; ++ } ++ ++ if (ev.validity_flags & CXL_DER_VALID_RANK) { ++ if (tep_get_field_val(s, event, "rank", record, &val, 1) < 0) ++ return -1; ++ ev.rank = val; ++ if (trace_seq_printf(s, "rank:%u ", ev.rank) <= 0) ++ return -1; ++ } ++ ++ if (ev.validity_flags & CXL_DER_VALID_NIBBLE) { ++ if (tep_get_field_val(s, event, "nibble_mask", record, &val, 1) < 0) ++ return -1; ++ ev.nibble_mask = val; ++ if (trace_seq_printf(s, "nibble_mask:%u ", ev.nibble_mask) <= 0) ++ return -1; ++ } ++ ++ if (ev.validity_flags & CXL_DER_VALID_BANK_GROUP) { ++ if (tep_get_field_val(s, event, "bank_group", record, &val, 1) < 0) ++ return -1; ++ ev.bank_group = val; ++ if (trace_seq_printf(s, "bank_group:%u ", ev.bank_group) <= 0) ++ return -1; ++ } ++ ++ if (ev.validity_flags & CXL_DER_VALID_BANK) { ++ if (tep_get_field_val(s, event, "bank", record, &val, 1) < 0) ++ return -1; ++ ev.bank = val; ++ if (trace_seq_printf(s, "bank:%u ", ev.bank) <= 0) ++ return -1; ++ } ++ ++ if (ev.validity_flags & CXL_DER_VALID_ROW) { ++ if (tep_get_field_val(s, event, "row", record, &val, 1) < 0) ++ return -1; ++ ev.row = val; ++ if (trace_seq_printf(s, "row:%u ", ev.row) <= 0) ++ return -1; ++ } ++ ++ if (ev.validity_flags & CXL_DER_VALID_COLUMN) { ++ if (tep_get_field_val(s, event, "column", record, &val, 1) < 0) ++ return -1; ++ ev.column = val; ++ if (trace_seq_printf(s, "column:%u ", ev.column) <= 0) ++ return -1; ++ } ++ ++ if (ev.validity_flags & CXL_DER_VALID_CORRECTION_MASK) { ++ ev.cor_mask = tep_get_field_raw(s, event, "cor_mask", record, &len, 1); ++ if (!ev.cor_mask) ++ return -1; ++ if (trace_seq_printf(s, "correction_mask:") <= 0) ++ return -1; ++ for (i = 0; i < CXL_EVENT_DER_CORRECTION_MASK_SIZE; i++) { ++ if (trace_seq_printf(s, "%02x ", ev.cor_mask[i]) <= 0) ++ break; ++ } ++ } ++ ++ /* Insert data into the SGBD */ ++#ifdef HAVE_SQLITE3 ++ ras_store_cxl_dram_event(ras, &ev); ++#endif ++ ++#ifdef HAVE_ABRT_REPORT ++ /* Report event to ABRT */ ++ ras_report_cxl_dram_event(ras, &ev); ++#endif ++ ++ return 0; ++} +diff --git a/ras-cxl-handler.h b/ras-cxl-handler.h +index 3adca4a..35455af 100644 +--- a/ras-cxl-handler.h ++++ b/ras-cxl-handler.h +@@ -38,4 +38,7 @@ int ras_cxl_generic_event_handler(struct trace_seq *s, + int ras_cxl_general_media_event_handler(struct trace_seq *s, + struct tep_record *record, + struct tep_event *event, void *context); ++int ras_cxl_dram_event_handler(struct trace_seq *s, ++ struct tep_record *record, ++ struct tep_event *event, void *context); + #endif +diff --git a/ras-events.c b/ras-events.c +index 978dee4..d27e0c4 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -251,6 +251,7 @@ int toggle_ras_mc_event(int enable) + rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_overflow", enable); + rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_generic_event", enable); + rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_general_media", enable); ++ rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_dram", enable); + #endif + + free_ras: +@@ -1072,6 +1073,14 @@ int handle_ras_events(int record_events) + else + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "cxl", "cxl_general_media"); ++ ++ rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_dram", ++ ras_cxl_dram_event_handler, NULL, CXL_DRAM_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "cxl", "cxl_dram"); + #endif + + if (!num_events) { +diff --git a/ras-events.h b/ras-events.h +index 9b83df3..d192a6b 100644 +--- a/ras-events.h ++++ b/ras-events.h +@@ -45,6 +45,7 @@ enum { + CXL_OVERFLOW_EVENT, + CXL_GENERIC_EVENT, + CXL_GENERAL_MEDIA_EVENT, ++ CXL_DRAM_EVENT, + NR_EVENTS + }; + +diff --git a/ras-record.c b/ras-record.c +index 507a58e..fffa81c 100644 +--- a/ras-record.c ++++ b/ras-record.c +@@ -915,6 +915,83 @@ int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_gen + + return rc; + } ++ ++/* ++ * Table and functions to handle cxl:cxl_dram_event ++ */ ++static const struct db_fields cxl_dram_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "memdev", .type = "TEXT" }, ++ { .name = "host", .type = "TEXT" }, ++ { .name = "serial", .type = "INTEGER" }, ++ { .name = "log_type", .type = "TEXT" }, ++ { .name = "hdr_uuid", .type = "TEXT" }, ++ { .name = "hdr_flags", .type = "INTEGER" }, ++ { .name = "hdr_handle", .type = "INTEGER" }, ++ { .name = "hdr_related_handle", .type = "INTEGER" }, ++ { .name = "hdr_ts", .type = "TEXT" }, ++ { .name = "hdr_length", .type = "INTEGER" }, ++ { .name = "hdr_maint_op_class", .type = "INTEGER" }, ++ { .name = "dpa", .type = "INTEGER" }, ++ { .name = "dpa_flags", .type = "INTEGER" }, ++ { .name = "descriptor", .type = "INTEGER" }, ++ { .name = "type", .type = "INTEGER" }, ++ { .name = "transaction_type", .type = "INTEGER" }, ++ { .name = "channel", .type = "INTEGER" }, ++ { .name = "rank", .type = "INTEGER" }, ++ { .name = "nibble_mask", .type = "INTEGER" }, ++ { .name = "bank_group", .type = "INTEGER" }, ++ { .name = "bank", .type = "INTEGER" }, ++ { .name = "row", .type = "INTEGER" }, ++ { .name = "column", .type = "INTEGER" }, ++ { .name = "cor_mask", .type = "BLOB" }, ++}; ++ ++static const struct db_table_descriptor cxl_dram_event_tab = { ++ .name = "cxl_dram_event", ++ .fields = cxl_dram_event_fields, ++ .num_fields = ARRAY_SIZE(cxl_dram_event_fields), ++}; ++ ++int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev) ++{ ++ int rc; ++ struct sqlite3_priv *priv = ras->db_priv; ++ ++ if (!priv || !priv->stmt_cxl_dram_event) ++ return 0; ++ log(TERM, LOG_INFO, "cxl_dram_event store: %p\n", ++ priv->stmt_cxl_dram_event); ++ ++ ras_store_cxl_common_hdr(priv->stmt_cxl_dram_event, &ev->hdr); ++ sqlite3_bind_int64(priv->stmt_cxl_dram_event, 13, ev->dpa); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 14, ev->dpa_flags); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 15, ev->descriptor); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 16, ev->type); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 17, ev->transaction_type); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 18, ev->channel); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 19, ev->rank); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 20, ev->nibble_mask); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 21, ev->bank_group); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 22, ev->bank); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 23, ev->row); ++ sqlite3_bind_int(priv->stmt_cxl_dram_event, 24, ev->column); ++ sqlite3_bind_blob(priv->stmt_cxl_dram_event, 25, ev->cor_mask, ++ CXL_EVENT_DER_CORRECTION_MASK_SIZE, NULL); ++ ++ rc = sqlite3_step(priv->stmt_cxl_dram_event); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed to do stmt_cxl_dram_event step on sqlite: error = %d\n", rc); ++ rc = sqlite3_reset(priv->stmt_cxl_dram_event); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed reset stmt_cxl_dram_event on sqlite: error = %d\n", rc); ++ log(TERM, LOG_INFO, "register inserted at db\n"); ++ ++ return rc; ++} + #endif + + /* +@@ -1306,6 +1383,14 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + if (rc != SQLITE_OK) + goto error; + } ++ ++ rc = ras_mc_create_table(priv, &cxl_dram_event_tab); ++ if (rc == SQLITE_OK) { ++ rc = ras_mc_prepare_stmt(priv, &priv->stmt_cxl_dram_event, ++ &cxl_dram_event_tab); ++ if (rc != SQLITE_OK) ++ goto error; ++ } + #endif + + ras->db_priv = priv; +@@ -1475,6 +1560,14 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + "cpu %u: Failed to finalize cxl_general_media_event sqlite: error = %d\n", + cpu, rc); + } ++ ++ if (priv->stmt_cxl_dram_event) { ++ rc = sqlite3_finalize(priv->stmt_cxl_dram_event); ++ if (rc != SQLITE_OK) ++ log(TERM, LOG_ERR, ++ "cpu %u: Failed to finalize cxl_dram_event sqlite: error = %d\n", ++ cpu, rc); ++ } + #endif + + rc = sqlite3_close_v2(db); +diff --git a/ras-record.h b/ras-record.h +index 37c32de..480ff92 100644 +--- a/ras-record.h ++++ b/ras-record.h +@@ -135,6 +135,7 @@ struct ras_cxl_poison_event { + #define CXL_HEADERLOG_SIZE_U32 (SZ_512 / sizeof(uint32_t)) + #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 + #define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 ++#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 + + struct ras_cxl_aer_ue_event { + char timestamp[64]; +@@ -199,6 +200,24 @@ struct ras_cxl_general_media_event { + uint16_t validity_flags; + }; + ++struct ras_cxl_dram_event { ++ struct ras_cxl_event_common_hdr hdr; ++ uint64_t dpa; ++ uint8_t dpa_flags; ++ uint8_t descriptor; ++ uint8_t type; ++ uint8_t transaction_type; ++ uint8_t channel; ++ uint8_t rank; ++ uint32_t nibble_mask; ++ uint8_t bank_group; ++ uint8_t bank; ++ uint32_t row; ++ uint16_t column; ++ uint8_t *cor_mask; ++ uint16_t validity_flags; ++}; ++ + struct ras_mc_event; + struct ras_aer_event; + struct ras_extlog_event; +@@ -214,6 +233,7 @@ struct ras_cxl_aer_ce_event; + struct ras_cxl_overflow_event; + struct ras_cxl_generic_event; + struct ras_cxl_general_media_event; ++struct ras_cxl_dram_event; + + #ifdef HAVE_SQLITE3 + +@@ -253,6 +273,7 @@ struct sqlite3_priv { + sqlite3_stmt *stmt_cxl_overflow_event; + sqlite3_stmt *stmt_cxl_generic_event; + sqlite3_stmt *stmt_cxl_general_media_event; ++ sqlite3_stmt *stmt_cxl_dram_event; + #endif + }; + +@@ -287,6 +308,7 @@ int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_eve + int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev); + int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev); + int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev); ++int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev); + + #else + static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; +@@ -306,6 +328,7 @@ static inline int ras_store_cxl_aer_ce_event(struct ras_events *ras, struct ras_ + static inline int ras_store_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; }; + static inline int ras_store_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; }; + static inline int ras_store_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) { return 0; }; ++static inline int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev) { return 0; }; + + #endif + +diff --git a/ras-report.c b/ras-report.c +index 725dc9b..21180b1 100644 +--- a/ras-report.c ++++ b/ras-report.c +@@ -543,6 +543,68 @@ static int set_cxl_general_media_event_backtrace(char *buf, struct ras_cxl_gener + return 0; + } + ++static int set_cxl_dram_event_backtrace(char *buf, struct ras_cxl_dram_event *ev) ++{ ++ char bt_buf[MAX_BACKTRACE_SIZE]; ++ ++ if (!buf || !ev) ++ return -1; ++ ++ sprintf(bt_buf, "BACKTRACE=" \ ++ "timestamp=%s\n" \ ++ "memdev=%s\n" \ ++ "host=%s\n" \ ++ "serial=0x%lx\n" \ ++ "log_type=%s\n" \ ++ "hdr_uuid=%s\n" \ ++ "hdr_flags=0x%x\n" \ ++ "hdr_handle=0x%x\n" \ ++ "hdr_related_handle=0x%x\n" \ ++ "hdr_timestamp=%s\n" \ ++ "hdr_length=%u\n" \ ++ "hdr_maint_op_class=%u\n" \ ++ "dpa=0x%lx\n" \ ++ "dpa_flags=%u\n" \ ++ "descriptor=%u\n" \ ++ "type=%u\n" \ ++ "transaction_type=%u\n" \ ++ "channel=%u\n" \ ++ "rank=%u\n" \ ++ "nibble_mask=%u\n" \ ++ "bank_group=%u\n" \ ++ "bank=%u\n" \ ++ "row=%u\n" \ ++ "column=%u\n", \ ++ ev->hdr.timestamp, \ ++ ev->hdr.memdev, \ ++ ev->hdr.host, \ ++ ev->hdr.serial, \ ++ ev->hdr.log_type, \ ++ ev->hdr.hdr_uuid, \ ++ ev->hdr.hdr_flags, \ ++ ev->hdr.hdr_handle, \ ++ ev->hdr.hdr_related_handle, \ ++ ev->hdr.hdr_timestamp, \ ++ ev->hdr.hdr_length, \ ++ ev->hdr.hdr_maint_op_class, \ ++ ev->dpa, \ ++ ev->dpa_flags, \ ++ ev->descriptor, \ ++ ev->type, \ ++ ev->transaction_type, \ ++ ev->channel, \ ++ ev->rank, \ ++ ev->nibble_mask, \ ++ ev->bank_group, \ ++ ev->bank, \ ++ ev->row, \ ++ ev->column); ++ ++ strcat(buf, bt_buf); ++ ++ return 0; ++} ++ + static int commit_report_backtrace(int sockfd, int type, void *ev){ + char buf[MAX_BACKTRACE_SIZE]; + char *pbuf = buf; +@@ -598,6 +660,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ + case CXL_GENERAL_MEDIA_EVENT: + rc = set_cxl_general_media_event_backtrace(buf, (struct ras_cxl_general_media_event *)ev); + break; ++ case CXL_DRAM_EVENT: ++ rc = set_cxl_dram_event_backtrace(buf, (struct ras_cxl_dram_event *)ev); ++ break; + default: + return -1; + } +@@ -1271,3 +1336,47 @@ cxl_general_media_fail: + else + return -1; + } ++ ++int ras_report_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev) ++{ ++ char buf[MAX_MESSAGE_SIZE]; ++ int sockfd = 0; ++ int done = 0; ++ int rc = -1; ++ ++ memset(buf, 0, sizeof(buf)); ++ ++ sockfd = setup_report_socket(); ++ if (sockfd < 0) ++ return -1; ++ ++ rc = commit_report_basic(sockfd); ++ if (rc < 0) ++ goto cxl_dram_fail; ++ ++ rc = commit_report_backtrace(sockfd, CXL_DRAM_EVENT, ev); ++ if (rc < 0) ++ goto cxl_dram_fail; ++ ++ sprintf(buf, "ANALYZER=%s", "rasdaemon-cxl_dram_event"); ++ rc = write(sockfd, buf, strlen(buf) + 1); ++ if (rc < strlen(buf) + 1) ++ goto cxl_dram_fail; ++ ++ sprintf(buf, "REASON=%s", "CXL DRAM Event"); ++ rc = write(sockfd, buf, strlen(buf) + 1); ++ if (rc < strlen(buf) + 1) ++ goto cxl_dram_fail; ++ ++ done = 1; ++ ++cxl_dram_fail: ++ ++ if (sockfd >= 0) ++ close(sockfd); ++ ++ if (done) ++ return 0; ++ else ++ return -1; ++} +diff --git a/ras-report.h b/ras-report.h +index d9ec7df..1ad00e0 100644 +--- a/ras-report.h ++++ b/ras-report.h +@@ -45,6 +45,7 @@ int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras_cxl_aer_ce_ev + int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev); + int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev); + int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev); ++int ras_report_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev); + + #else + +@@ -62,6 +63,7 @@ static inline int ras_report_cxl_aer_ce_event(struct ras_events *ras, struct ras + static inline int ras_report_cxl_overflow_event(struct ras_events *ras, struct ras_cxl_overflow_event *ev) { return 0; }; + static inline int ras_report_cxl_generic_event(struct ras_events *ras, struct ras_cxl_generic_event *ev) { return 0; }; + static inline int ras_report_cxl_general_media_event(struct ras_events *ras, struct ras_cxl_general_media_event *ev) { return 0; }; ++static inline int ras_report_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *ev) { return 0; }; + + #endif + diff --git a/SOURCES/0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch b/SOURCES/0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch deleted file mode 100644 index 852eb4f..0000000 --- a/SOURCES/0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch +++ /dev/null @@ -1,85 +0,0 @@ -commit 0862a096c3a1d0f993703ab3299f1ddfadf53d7f -Author: Shiju Jose -Date: Tue Aug 11 13:31:46 2020 +0100 - - rasdaemon: ras-mc-ctl: Add ARM processor error information - - Add supporting ARM processor error in the ras-mc-ctl tool. - - Signed-off-by: Shiju Jose - Signed-off-by: Mauro Carvalho Chehab - ---- - util/ras-mc-ctl.in | 40 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - ---- rasdaemon-0.6.1.orig/util/ras-mc-ctl.in 2021-10-06 14:14:25.000440090 -0400 -+++ rasdaemon-0.6.1/util/ras-mc-ctl.in 2021-10-06 14:15:59.995598590 -0400 -@@ -1124,6 +1124,7 @@ sub summary - my ($query, $query_handle, $out); - my ($err_type, $label, $mc, $top, $mid, $low, $count, $msg); - my ($etype, $severity, $etype_string, $severity_string); -+ my ($affinity, $mpidr); - - my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); - -@@ -1159,6 +1160,22 @@ sub summary - } - $query_handle->finish; - -+ # ARM processor arm_event errors -+ $query = "select affinity, mpidr, count(*) from arm_event group by affinity, mpidr"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($affinity, $mpidr, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count errors\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events summary:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; -+ - # extlog errors - $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; - $query_handle = $dbh->prepare($query); -@@ -1202,6 +1219,7 @@ sub errors - my ($query, $query_handle, $id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); - my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); - my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); -+ my ($error_count, $affinity, $mpidr, $r_state, $psci_state); - - my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); - -@@ -1241,6 +1259,28 @@ sub errors - } - $query_handle->finish; - -+ # ARM processor arm_event errors -+ $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $timestamp error: "; -+ $out .= "error_count=$error_count, " if ($error_count); -+ $out .= "affinity_level=$affinity, "; -+ $out .= sprintf "mpidr=0x%x, ", $mpidr; -+ $out .= sprintf "running_state=0x%x, ", $r_state; -+ $out .= sprintf "psci_state=0x%x", $psci_state; -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; -+ - # Extlog errors - $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; - $query_handle = $dbh->prepare($query); diff --git a/SOURCES/16d929b024c31d54a7f8a72eab094376c7be27f5.patch b/SOURCES/16d929b024c31d54a7f8a72eab094376c7be27f5.patch deleted file mode 100644 index ab66f52..0000000 --- a/SOURCES/16d929b024c31d54a7f8a72eab094376c7be27f5.patch +++ /dev/null @@ -1,32 +0,0 @@ -commit 16d929b024c31d54a7f8a72eab094376c7be27f5 -Author: Mauro Carvalho Chehab -Date: Wed May 26 10:20:39 2021 +0200 - - Makefile.am: fix build header rules - - non-standard-hisilicon.h was added twice; - ras-memory-failure-handler.h is missing. - - Due to that, the tarball becomes incomplete, causing build - errors. - - While here, also adjust .travis.yml to use --enable-all. - - Signed-off-by: Mauro Carvalho Chehab - ---- - Makefile.am | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/Makefile.am 2021-10-13 13:27:53.402685179 -0400 -+++ b/Makefile.am 2021-10-13 13:28:11.664525173 -0400 -@@ -54,7 +54,8 @@ rasdaemon_LDADD = -lpthread $(SQLITE3_LI - - include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ - ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ -- ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h -+ ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ -+ ras-memory-failure-handler.h - - # This rule can't be called with more than one Makefile job (like make -j8) - # I can't figure out a way to fix that diff --git a/SOURCES/2290d65b97311dd5736838f1e285355f7f357046.patch b/SOURCES/2290d65b97311dd5736838f1e285355f7f357046.patch deleted file mode 100644 index 0710974..0000000 --- a/SOURCES/2290d65b97311dd5736838f1e285355f7f357046.patch +++ /dev/null @@ -1,538 +0,0 @@ -commit 2290d65b97311dd5736838f1e285355f7f357046 -Author: Shiju Jose -Date: Mon Mar 8 16:57:26 2021 +0000 - - rasdaemon: add support for memory_failure events - - Add support to log the memory_failure kernel trace - events. - - Example rasdaemon log and SQLite DB output for the - memory_failure event, - ================================================= - rasdaemon: memory_failure_event store: 0x126ce8f8 - rasdaemon: register inserted at db - <...>-785 [000] 0.000024: memory_failure_event: 2020-10-02 13:27:13 -0400 pfn=0x204000000 page_type=free buddy page action_result=Delayed - - CREATE TABLE memory_failure_event (id INTEGER PRIMARY KEY, timestamp TEXT, pfn TEXT, page_type TEXT, action_result TEXT); - INSERT INTO memory_failure_event VALUES(1,'2020-10-02 13:27:13 -0400','0x204000000','free buddy page','Delayed'); - ================================================== - - Signed-off-by: Shiju Jose - Signed-off-by: Mauro Carvalho Chehab - ---- - Makefile.am | 4 - ras-events.c | 15 +++ - ras-memory-failure-handler.c | 179 +++++++++++++++++++++++++++++++++++++++++++ - ras-memory-failure-handler.h | 25 ++++++ - ras-record.c | 56 +++++++++++++ - ras-record.h | 13 +++ - ras-report.c | 68 ++++++++++++++++ - ras-report.h | 5 - - 8 files changed, 364 insertions(+), 1 deletion(-) - ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ b/ras-memory-failure-handler.c 2021-10-14 16:31:36.840657728 -0400 -@@ -0,0 +1,179 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+#include -+#include "libtrace/kbuffer.h" -+#include "ras-memory-failure-handler.h" -+#include "ras-record.h" -+#include "ras-logger.h" -+#include "ras-report.h" -+ -+/* Memory failure - various types of pages */ -+enum mf_action_page_type { -+ MF_MSG_KERNEL, -+ MF_MSG_KERNEL_HIGH_ORDER, -+ MF_MSG_SLAB, -+ MF_MSG_DIFFERENT_COMPOUND, -+ MF_MSG_POISONED_HUGE, -+ MF_MSG_HUGE, -+ MF_MSG_FREE_HUGE, -+ MF_MSG_NON_PMD_HUGE, -+ MF_MSG_UNMAP_FAILED, -+ MF_MSG_DIRTY_SWAPCACHE, -+ MF_MSG_CLEAN_SWAPCACHE, -+ MF_MSG_DIRTY_MLOCKED_LRU, -+ MF_MSG_CLEAN_MLOCKED_LRU, -+ MF_MSG_DIRTY_UNEVICTABLE_LRU, -+ MF_MSG_CLEAN_UNEVICTABLE_LRU, -+ MF_MSG_DIRTY_LRU, -+ MF_MSG_CLEAN_LRU, -+ MF_MSG_TRUNCATED_LRU, -+ MF_MSG_BUDDY, -+ MF_MSG_BUDDY_2ND, -+ MF_MSG_DAX, -+ MF_MSG_UNSPLIT_THP, -+ MF_MSG_UNKNOWN, -+}; -+ -+/* Action results for various types of pages */ -+enum mf_action_result { -+ MF_IGNORED, /* Error: cannot be handled */ -+ MF_FAILED, /* Error: handling failed */ -+ MF_DELAYED, /* Will be handled later */ -+ MF_RECOVERED, /* Successfully recovered */ -+}; -+ -+/* memory failure page types */ -+static const struct { -+ int type; -+ const char *page_type; -+} mf_page_type[] = { -+ { MF_MSG_KERNEL, "reserved kernel page" }, -+ { MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page"}, -+ { MF_MSG_SLAB, "kernel slab page"}, -+ { MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking"}, -+ { MF_MSG_POISONED_HUGE, "huge page already hardware poisoned"}, -+ { MF_MSG_HUGE, "huge page"}, -+ { MF_MSG_FREE_HUGE, "free huge page"}, -+ { MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page"}, -+ { MF_MSG_UNMAP_FAILED, "unmapping failed page"}, -+ { MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page"}, -+ { MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page"}, -+ { MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page"}, -+ { MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page"}, -+ { MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page"}, -+ { MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page"}, -+ { MF_MSG_DIRTY_LRU, "dirty LRU page"}, -+ { MF_MSG_CLEAN_LRU, "clean LRU page"}, -+ { MF_MSG_TRUNCATED_LRU, "already truncated LRU page"}, -+ { MF_MSG_BUDDY, "free buddy page"}, -+ { MF_MSG_BUDDY_2ND, "free buddy page (2nd try)"}, -+ { MF_MSG_DAX, "dax page"}, -+ { MF_MSG_UNSPLIT_THP, "unsplit thp"}, -+ { MF_MSG_UNKNOWN, "unknown page"}, -+}; -+ -+/* memory failure action results */ -+static const struct { -+ int result; -+ const char *action_result; -+} mf_action_result[] = { -+ { MF_IGNORED, "Ignored" }, -+ { MF_FAILED, "Failed" }, -+ { MF_DELAYED, "Delayed" }, -+ { MF_RECOVERED, "Recovered" }, -+}; -+ -+static const char *get_page_type(int page_type) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(mf_page_type); i++) -+ if (mf_page_type[i].type == page_type) -+ return mf_page_type[i].page_type; -+ -+ return "unknown page"; -+} -+ -+static const char *get_action_result(int result) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(mf_action_result); i++) -+ if (mf_action_result[i].result == result) -+ return mf_action_result[i].action_result; -+ -+ return "unknown"; -+} -+ -+ -+int ras_memory_failure_event_handler(struct trace_seq *s, -+ struct pevent_record *record, -+ struct event_format *event, void *context) -+{ -+ unsigned long long val; -+ struct ras_events *ras = context; -+ time_t now; -+ struct tm *tm; -+ struct ras_mf_event ev; -+ -+ /* -+ * Newer kernels (3.10-rc1 or upper) provide an uptime clock. -+ * On previous kernels, the way to properly generate an event would -+ * be to inject a fake one, measure its timestamp and diff it against -+ * gettimeofday. We won't do it here. Instead, let's use uptime, -+ * falling-back to the event report's time, if "uptime" clock is -+ * not available (legacy kernels). -+ */ -+ -+ if (ras->use_uptime) -+ now = record->ts/user_hz + ras->uptime_diff; -+ else -+ now = time(NULL); -+ -+ tm = localtime(&now); -+ if (tm) -+ strftime(ev.timestamp, sizeof(ev.timestamp), -+ "%Y-%m-%d %H:%M:%S %z", tm); -+ trace_seq_printf(s, "%s ", ev.timestamp); -+ -+ if (pevent_get_field_val(s, event, "pfn", record, &val, 1) < 0) -+ return -1; -+ sprintf(ev.pfn, "0x%llx", val); -+ trace_seq_printf(s, "pfn=0x%llx ", val); -+ -+ if (pevent_get_field_val(s, event, "type", record, &val, 1) < 0) -+ return -1; -+ ev.page_type = get_page_type(val); -+ trace_seq_printf(s, "page_type=%s ", ev.page_type); -+ -+ if (pevent_get_field_val(s, event, "result", record, &val, 1) < 0) -+ return -1; -+ ev.action_result = get_action_result(val); -+ trace_seq_printf(s, "action_result=%s ", ev.action_result); -+ -+ /* Store data into the SQLite DB */ -+#ifdef HAVE_SQLITE3 -+ ras_store_mf_event(ras, &ev); -+#endif -+ -+#ifdef HAVE_ABRT_REPORT -+ /* Report event to ABRT */ -+ ras_report_mf_event(ras, &ev); -+#endif -+ -+ return 0; -+} ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ b/ras-memory-failure-handler.h 2021-10-14 16:31:36.840657728 -0400 -@@ -0,0 +1,25 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+*/ -+ -+#ifndef __RAS_MEMORY_FAILURE_HANDLER_H -+#define __RAS_MEMORY_FAILURE_HANDLER_H -+ -+#include "ras-events.h" -+#include "libtrace/event-parse.h" -+ -+int ras_memory_failure_event_handler(struct trace_seq *s, -+ struct pevent_record *record, -+ struct event_format *event, void *context); -+ -+#endif ---- a/ras-record.c 2018-04-25 06:19:03.000000000 -0400 -+++ b/ras-record.c 2021-10-14 16:31:36.840657728 -0400 -@@ -404,6 +404,55 @@ sqlite3_bind_text(priv->stmt_mce_record, - } - #endif - -+/* -+ * Table and functions to handle ras:memory_failure -+ */ -+ -+#ifdef HAVE_MEMORY_FAILURE -+static const struct db_fields mf_event_fields[] = { -+ { .name="id", .type="INTEGER PRIMARY KEY" }, -+ { .name="timestamp", .type="TEXT" }, -+ { .name="pfn", .type="TEXT" }, -+ { .name="page_type", .type="TEXT" }, -+ { .name="action_result", .type="TEXT" }, -+}; -+ -+static const struct db_table_descriptor mf_event_tab = { -+ .name = "memory_failure_event", -+ .fields = mf_event_fields, -+ .num_fields = ARRAY_SIZE(mf_event_fields), -+}; -+ -+int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) -+{ -+ int rc; -+ struct sqlite3_priv *priv = ras->db_priv; -+ -+ if (!priv || !priv->stmt_mf_event) -+ return 0; -+ log(TERM, LOG_INFO, "memory_failure_event store: %p\n", priv->stmt_mf_event); -+ -+ sqlite3_bind_text(priv->stmt_mf_event, 1, ev->timestamp, -1, NULL); -+ sqlite3_bind_text(priv->stmt_mf_event, 2, ev->pfn, -1, NULL); -+ sqlite3_bind_text(priv->stmt_mf_event, 3, ev->page_type, -1, NULL); -+ sqlite3_bind_text(priv->stmt_mf_event, 4, ev->action_result, -1, NULL); -+ -+ rc = sqlite3_step(priv->stmt_mf_event); -+ if (rc != SQLITE_OK && rc != SQLITE_DONE) -+ log(TERM, LOG_ERR, -+ "Failed to do memory_failure_event step on sqlite: error = %d\n", rc); -+ -+ rc = sqlite3_reset(priv->stmt_mf_event); -+ if (rc != SQLITE_OK && rc != SQLITE_DONE) -+ log(TERM, LOG_ERR, -+ "Failed reset memory_failure_event on sqlite: error = %d\n", -+ rc); -+ -+ log(TERM, LOG_INFO, "register inserted at db\n"); -+ -+ return rc; -+} -+#endif - - /* - * Generic code -@@ -567,6 +616,13 @@ usleep(10000); - rc = ras_mc_prepare_stmt(priv, &priv->stmt_arm_record, - &arm_event_tab); - #endif -+#ifdef HAVE_MEMORY_FAILURE -+ rc = ras_mc_create_table(priv, &mf_event_tab); -+ if (rc == SQLITE_OK) { -+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_mf_event, -+ &mf_event_tab); -+ } -+#endif - - ras->db_priv = priv; - return 0; ---- a/ras-record.h 2018-04-25 06:19:03.000000000 -0400 -+++ b/ras-record.h 2021-10-14 16:31:36.840657728 -0400 -@@ -75,12 +75,20 @@ struct ras_arm_event { - int32_t psci_state; - }; - -+struct ras_mf_event { -+ char timestamp[64]; -+ char pfn[30]; -+ const char *page_type; -+ const char *action_result; -+}; -+ - struct ras_mc_event; - struct ras_aer_event; - struct ras_extlog_event; - struct ras_non_standard_event; - struct ras_arm_event; - struct mce_event; -+struct ras_mf_event; - - #ifdef HAVE_SQLITE3 - -@@ -104,6 +112,9 @@ struct sqlite3_priv { - #ifdef HAVE_ARM - sqlite3_stmt *stmt_arm_record; - #endif -+#ifdef HAVE_MEMORY_FAILURE -+ sqlite3_stmt *stmt_mf_event; -+#endif - }; - - int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras); -@@ -113,6 +124,7 @@ int ras_store_mce_record(struct ras_even - int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev); - int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev); - int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev); -+int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev); - - #else - static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; -@@ -122,6 +134,7 @@ static inline int ras_store_mce_record(s - static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; }; - static inline int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; - static inline int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; -+static inline int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; }; - - #endif - ---- a/ras-report.c 2017-10-14 05:11:34.000000000 -0400 -+++ b/ras-report.c 2021-10-14 16:31:36.840657728 -0400 -@@ -255,6 +255,28 @@ "midr=0x%lx\n" \ - return 0; - } - -+static int set_mf_event_backtrace(char *buf, struct ras_mf_event *ev) -+{ -+ char bt_buf[MAX_BACKTRACE_SIZE]; -+ -+ if (!buf || !ev) -+ return -1; -+ -+ sprintf(bt_buf, "BACKTRACE=" \ -+ "timestamp=%s\n" \ -+ "pfn=%s\n" \ -+ "page_type=%s\n" \ -+ "action_result=%s\n", \ -+ ev->timestamp, \ -+ ev->pfn, \ -+ ev->page_type, \ -+ ev->action_result); -+ -+ strcat(buf, bt_buf); -+ -+ return 0; -+} -+ - static int commit_report_backtrace(int sockfd, int type, void *ev){ - char buf[MAX_BACKTRACE_SIZE]; - char *pbuf = buf; -@@ -283,6 +305,9 @@ memset(buf, 0, MAX_BACKTRACE_SIZE); - case ARM_EVENT: - rc = set_arm_event_backtrace(buf, (struct ras_arm_event *)ev); - break; -+ case MF_EVENT: -+ rc = set_mf_event_backtrace(buf, (struct ras_mf_event *)ev); -+ break; - default: - return -1; - } -@@ -549,3 +574,46 @@ return 0; - return -1; - } - } -+ -+int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) -+{ -+ char buf[MAX_MESSAGE_SIZE]; -+ int sockfd = 0; -+ int done = 0; -+ int rc = -1; -+ -+ memset(buf, 0, sizeof(buf)); -+ -+ sockfd = setup_report_socket(); -+ if (sockfd < 0) -+ return -1; -+ -+ rc = commit_report_basic(sockfd); -+ if (rc < 0) -+ goto mf_fail; -+ -+ rc = commit_report_backtrace(sockfd, MF_EVENT, ev); -+ if (rc < 0) -+ goto mf_fail; -+ -+ sprintf(buf, "ANALYZER=%s", "rasdaemon-memory_failure"); -+ rc = write(sockfd, buf, strlen(buf) + 1); -+ if (rc < strlen(buf) + 1) -+ goto mf_fail; -+ -+ sprintf(buf, "REASON=%s", "memory failure problem"); -+ rc = write(sockfd, buf, strlen(buf) + 1); -+ if (rc < strlen(buf) + 1) -+ goto mf_fail; -+ -+ done = 1; -+ -+mf_fail: -+ if (sockfd > 0) -+ close(sockfd); -+ -+ if (done) -+ return 0; -+ else -+ return -1; -+} ---- a/ras-report.h 2017-10-14 05:11:34.000000000 -0400 -+++ b/ras-report.h 2021-10-14 16:31:36.840657728 -0400 -@@ -34,7 +34,8 @@ enum { - MCE_EVENT, - AER_EVENT, - NON_STANDARD_EVENT, -- ARM_EVENT -+ ARM_EVENT, -+ MF_EVENT, - }; - - #ifdef HAVE_ABRT_REPORT -@@ -44,6 +45,7 @@ int ras_report_aer_event(struct ras_even - int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev); - int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev); - int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev); -+int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev); - - #else - -@@ -52,6 +54,7 @@ static inline int ras_report_aer_event(s - static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) { return 0; }; - static inline int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; - static inline int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; -+static inline int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; }; - - #endif - ---- a/Makefile.am 2018-04-25 06:21:56.000000000 -0400 -+++ b/Makefile.am 2021-10-14 16:37:42.423639762 -0400 -@@ -41,12 +41,16 @@ endif - if WITH_EXTLOG - rasdaemon_SOURCES += ras-extlog-handler.c - endif -+if WITH_MEMORY_FAILURE -+ rasdaemon_SOURCES += ras-memory-failure-handler.c -+endif - if WITH_ABRT_REPORT - rasdaemon_SOURCES += ras-report.c - endif - if WITH_HISI_NS_DECODE - rasdaemon_SOURCES += non-standard-hisi_hip07.c - endif -+ - rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a - - include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ ---- a/ras-events.c 2021-10-14 16:31:36.730658636 -0400 -+++ b/ras-events.c 2021-10-14 16:37:11.043898809 -0400 -@@ -33,6 +33,7 @@ * Foundation, Inc., 51 Franklin Street, - #include "ras-arm-handler.h" - #include "ras-mce-handler.h" - #include "ras-extlog-handler.h" -+#include "ras-memory-failure-handler.h" - #include "ras-record.h" - #include "ras-logger.h" - -@@ -218,6 +219,10 @@ if (rc < 0) { - rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable); - #endif - -+#ifdef HAVE_MEMORY_FAILURE -+ rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable); -+#endif -+ - free_ras: - free(ras); - return rc; -@@ -736,6 +741,16 @@ (void)open("/sys/kernel/debug/ras/daemon - "ras", "aer_event"); - #endif - -+#ifdef HAVE_MEMORY_FAILURE -+ rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event", -+ ras_memory_failure_event_handler); -+ if (!rc) -+ num_events++; -+ else -+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", -+ "ras", "memory_failure_event"); -+#endif -+ - if (!num_events) { - log(ALL, LOG_INFO, - "Failed to trace all supported RAS events. Aborting.\n"); diff --git a/SOURCES/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch b/SOURCES/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch deleted file mode 100644 index fdc509b..0000000 --- a/SOURCES/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch +++ /dev/null @@ -1,28 +0,0 @@ -commit 28ea956acc2dab7c18b4701f9657afb9ab3ddc79 -Author: Muralidhara M K -Date: Mon Jul 12 05:18:43 2021 -0500 - - rasdaemon: set SMCA maximum number of banks to 64 - - Newer AMD systems with SMCA banks support up to 64 MCA banks per CPU. - - This patch is based on the commit below upstremed into the kernel: - a0bc32b3cacf ("x86/mce: Increase maximum number of banks to 64") - - Signed-off-by: Muralidhara M K - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/mce-amd-smca.c b/mce-amd-smca.c -index e0cf512..3c346f4 100644 ---- a/mce-amd-smca.c -+++ b/mce-amd-smca.c -@@ -75,6 +75,9 @@ enum smca_bank_types { - N_SMCA_BANK_TYPES - }; - -+/* Maximum number of MCA banks per CPU. */ -+#define MAX_NR_BANKS 64 -+ - /* SMCA Extended error strings */ - /* Load Store */ - static const char * const smca_ls_mce_desc[] = { diff --git a/SOURCES/2a1d217660351c08eb2f8bccebf939abba2f7e69.patch b/SOURCES/2a1d217660351c08eb2f8bccebf939abba2f7e69.patch deleted file mode 100644 index 1b5844d..0000000 --- a/SOURCES/2a1d217660351c08eb2f8bccebf939abba2f7e69.patch +++ /dev/null @@ -1,66 +0,0 @@ -commit 2a1d217660351c08eb2f8bccebf939abba2f7e69 -Author: Brian WoodsGhannam, Yazen -Date: Fri Nov 1 15:48:13 2019 +0100 - - rasdaemon: rename CPU_NAPLES cputype - - Change CPU_NAPLES to CPU_AMD_SMCA to reflect that it isn't just NAPLES - that is supported, but AMD's Scalable Machine Check Architecture (SMCA). - - [ Yazen: change family check to feature check, and change CPU name. ] - - CC: "mchehab+samsung@kernel.org" , "Namburu, Chandu-babu" # Thread-Topic: [PATCH 1/2] rasdaemon: rename CPU_NAPLES cputype - Signed-off-by: Brian Woods - Signed-off-by: Yazen Ghannam - Cc: Chandu-babu Namburu - Signed-off-by: Mauro Carvalho Chehab - ---- - ras-mce-handler.c | 10 ++++++---- - ras-mce-handler.h | 2 +- - 2 files changed, 7 insertions(+), 5 deletions(-) - ---- rasdaemon-0.6.1.orig/ras-mce-handler.c 2021-05-26 15:16:24.699096556 -0400 -+++ rasdaemon-0.6.1/ras-mce-handler.c 2021-05-26 15:18:06.543162745 -0400 -@@ -55,7 +55,7 @@ [CPU_XEON75XX] = "Intel Xeon 7500 series - [CPU_KNIGHTS_LANDING] = "Knights Landing", - [CPU_KNIGHTS_MILL] = "Knights Mill", - [CPU_SKYLAKE_XEON] = "Skylake server", -- [CPU_NAPLES] = "AMD Family 17h Zen1" -+ [CPU_AMD_SMCA] = "AMD Scalable MCA", - }; - - static enum cputype select_intel_cputype(struct ras_events *ras) -@@ -191,8 +191,10 @@ ret = 0; - if (!strcmp(mce->vendor, "AuthenticAMD")) { - if (mce->family == 15) - mce->cputype = CPU_K8; -- if (mce->family == 23) -- mce->cputype = CPU_NAPLES; -+ if (strstr(mce->processor_flags, "smca")) { -+ mce->cputype = CPU_AMD_SMCA; -+ goto ret; -+ } - if (mce->family > 23) { - log(ALL, LOG_INFO, - "Can't parse MCE for this AMD CPU yet %d\n", -@@ -435,7 +437,7 @@ if (pevent_get_field_val(s, event, "ipid - case CPU_K8: - rc = parse_amd_k8_event(ras, &e); - break; -- case CPU_NAPLES: -+ case CPU_AMD_SMCA: - rc = parse_amd_smca_event(ras, &e); - break; - default: /* All other CPU types are Intel */ ---- rasdaemon-0.6.1.orig/ras-mce-handler.h 2021-05-26 15:17:15.409631590 -0400 -+++ rasdaemon-0.6.1/ras-mce-handler.h 2021-05-26 15:18:20.102038424 -0400 -@@ -50,7 +50,7 @@ enum cputype { - CPU_KNIGHTS_LANDING, - CPU_KNIGHTS_MILL, - CPU_SKYLAKE_XEON, -- CPU_NAPLES, -+ CPU_AMD_SMCA, - }; - - struct mce_event { diff --git a/SOURCES/546cf713f667437fb6e283cc3dc090679eb47d08.patch b/SOURCES/546cf713f667437fb6e283cc3dc090679eb47d08.patch deleted file mode 100644 index 448b1f6..0000000 --- a/SOURCES/546cf713f667437fb6e283cc3dc090679eb47d08.patch +++ /dev/null @@ -1,372 +0,0 @@ -commit 546cf713f667437fb6e283cc3dc090679eb47d08 -Author: Subhendu Saha -Date: Tue Jan 12 03:29:55 2021 -0500 - - Fix ras-mc-ctl script. - - When rasdaemon is compiled without enabling aer, mce, devlink, - etc., those tables are not created in the database file. Then - ras-mc-ctl script breaks trying to query data from non-existent - tables. - - Signed-off-by: Subhendu Saha subhends@akamai.com - Signed-off-by: Mauro Carvalho Chehab - ---- - util/ras-mc-ctl.in | 310 ++++++++++++++++++++++++++++------------------------- - 1 file changed, 168 insertions(+), 142 deletions(-) - ---- a/util/ras-mc-ctl.in 2021-10-12 13:45:43.260646935 -0400 -+++ b/util/ras-mc-ctl.in 2021-10-12 13:46:38.610158949 -0400 -@@ -41,6 +41,16 @@ my $sysconfdir = "@sysconfdir@"; - my $dmidecode = find_prog ("dmidecode"); - my $modprobe = find_prog ("modprobe") or exit (1); - -+my $has_aer = 0; -+my $has_arm = 0; -+my $has_extlog = 0; -+my $has_mce = 0; -+ -+@WITH_AER_TRUE@$has_aer = 1; -+@WITH_ARM_TRUE@$has_arm = 1; -+@WITH_EXTLOG_TRUE@$has_extlog = 1; -+@WITH_MCE_TRUE@$has_mce = 1; -+ - my %conf = (); - my %bus = (); - my %dimm_size = (); -@@ -1145,70 +1155,78 @@ sub summary - $query_handle->finish; - - # PCIe AER aer_event errors -- $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($err_type, $msg, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$count $err_type errors: $msg\n"; -- } -- if ($out ne "") { -- print "PCIe AER events summary:\n$out\n"; -- } else { -- print "No PCIe AER errors.\n\n"; -+ if ($has_aer == 1) { -+ $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($err_type, $msg, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count $err_type errors: $msg\n"; -+ } -+ if ($out ne "") { -+ print "PCIe AER events summary:\n$out\n"; -+ } else { -+ print "No PCIe AER errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # ARM processor arm_event errors -- $query = "select affinity, mpidr, count(*) from arm_event group by affinity, mpidr"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($affinity, $mpidr, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$count errors\n"; -- } -- if ($out ne "") { -- print "ARM processor events summary:\n$out\n"; -- } else { -- print "No ARM processor errors.\n\n"; -+ if ($has_arm == 1) { -+ $query = "select affinity, mpidr, count(*) from arm_event group by affinity, mpidr"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($affinity, $mpidr, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count errors\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events summary:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # extlog errors -- $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($etype, $severity, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $etype_string = get_extlog_type($etype); -- $severity_string = get_extlog_severity($severity); -- $out .= "\t$count $etype_string $severity_string errors\n"; -- } -- if ($out ne "") { -- print "Extlog records summary:\n$out"; -- } else { -- print "No Extlog errors.\n"; -+ if ($has_extlog == 1) { -+ $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($etype, $severity, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $etype_string = get_extlog_type($etype); -+ $severity_string = get_extlog_severity($severity); -+ $out .= "\t$count $etype_string $severity_string errors\n"; -+ } -+ if ($out ne "") { -+ print "Extlog records summary:\n$out"; -+ } else { -+ print "No Extlog errors.\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # MCE mce_record errors -- $query = "select error_msg, count(*) from mce_record group by error_msg"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($msg, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$count $msg errors\n"; -- } -- if ($out ne "") { -- print "MCE records summary:\n$out"; -- } else { -- print "No MCE errors.\n"; -+ if ($has_mce == 1) { -+ $query = "select error_msg, count(*) from mce_record group by error_msg"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($msg, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count $msg errors\n"; -+ } -+ if ($out ne "") { -+ print "MCE records summary:\n$out"; -+ } else { -+ print "No MCE errors.\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - undef($dbh); - } -@@ -1244,105 +1262,113 @@ sub errors - $query_handle->finish; - - # PCIe AER aer_event errors -- $query = "select id, timestamp, err_type, err_msg from aer_event order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $time, $type, $msg)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $time $type error: $msg\n"; -- } -- if ($out ne "") { -- print "PCIe AER events:\n$out\n"; -- } else { -- print "No PCIe AER errors.\n\n"; -+ if ($has_aer == 1) { -+ $query = "select id, timestamp, err_type, err_msg from aer_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $time, $type, $msg)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $time $type error: $msg\n"; -+ } -+ if ($out ne "") { -+ print "PCIe AER events:\n$out\n"; -+ } else { -+ print "No PCIe AER errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # ARM processor arm_event errors -- $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $timestamp error: "; -- $out .= "error_count=$error_count, " if ($error_count); -- $out .= "affinity_level=$affinity, "; -- $out .= sprintf "mpidr=0x%x, ", $mpidr; -- $out .= sprintf "running_state=0x%x, ", $r_state; -- $out .= sprintf "psci_state=0x%x", $psci_state; -- $out .= "\n"; -- } -- if ($out ne "") { -- print "ARM processor events:\n$out\n"; -- } else { -- print "No ARM processor errors.\n\n"; -+ if ($has_arm == 1) { -+ $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $timestamp error: "; -+ $out .= "error_count=$error_count, " if ($error_count); -+ $out .= "affinity_level=$affinity, "; -+ $out .= sprintf "mpidr=0x%x, ", $mpidr; -+ $out .= sprintf "running_state=0x%x, ", $r_state; -+ $out .= sprintf "psci_state=0x%x", $psci_state; -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # Extlog errors -- $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data)); -- $out = ""; -- while($query_handle->fetch()) { -- $etype_string = get_extlog_type($etype); -- $severity_string = get_extlog_severity($severity); -- $out .= "$id $timestamp error: "; -- $out .= "type=$etype_string, "; -- $out .= "severity=$severity_string, "; -- $out .= sprintf "address=0x%08x, ", $addr; -- $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id); -- $out .= "fru_text='$fru_text', "; -- $out .= get_cper_data_text($cper_data) if ($cper_data); -- $out .= "\n"; -- } -- if ($out ne "") { -- print "Extlog events:\n$out\n"; -- } else { -- print "No Extlog errors.\n\n"; -+ if ($has_extlog) { -+ $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $etype_string = get_extlog_type($etype); -+ $severity_string = get_extlog_severity($severity); -+ $out .= "$id $timestamp error: "; -+ $out .= "type=$etype_string, "; -+ $out .= "severity=$severity_string, "; -+ $out .= sprintf "address=0x%08x, ", $addr; -+ $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id); -+ $out .= "fru_text='$fru_text', "; -+ $out .= get_cper_data_text($cper_data) if ($cper_data); -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "Extlog events:\n$out\n"; -+ } else { -+ print "No Extlog errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # MCE mce_record errors -- $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $time error: $msg"; -- $out .= ", CPU $cpuvendor" if ($cpuvendor); -- $out .= ", bank $bank_name" if ($bank_name); -- $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); -- $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); -- $out .= ", $mc_location" if ($mc_location); -- $out .= ", $user_action" if ($user_action); -- $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); -- $out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus); -- $out .= sprintf ", status=0x%08x", $status if ($status); -- $out .= sprintf ", addr=0x%08x", $addr if ($addr); -- $out .= sprintf ", misc=0x%08x", $misc if ($misc); -- $out .= sprintf ", ip=0x%08x", $ip if ($ip); -- $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc); -- $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime); -- $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu); -- $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid); -- $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid); -- $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid); -- $out .= sprintf ", cs=0x%08x", $cs if ($cs); -- $out .= sprintf ", bank=0x%08x", $bank if ($bank); -+ if ($has_mce == 1) { -+ $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $time error: $msg"; -+ $out .= ", CPU $cpuvendor" if ($cpuvendor); -+ $out .= ", bank $bank_name" if ($bank_name); -+ $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); -+ $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); -+ $out .= ", $mc_location" if ($mc_location); -+ $out .= ", $user_action" if ($user_action); -+ $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); -+ $out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus); -+ $out .= sprintf ", status=0x%08x", $status if ($status); -+ $out .= sprintf ", addr=0x%08x", $addr if ($addr); -+ $out .= sprintf ", misc=0x%08x", $misc if ($misc); -+ $out .= sprintf ", ip=0x%08x", $ip if ($ip); -+ $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc); -+ $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime); -+ $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu); -+ $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid); -+ $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid); -+ $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid); -+ $out .= sprintf ", cs=0x%08x", $cs if ($cs); -+ $out .= sprintf ", bank=0x%08x", $bank if ($bank); - -- $out .= "\n"; -- } -- if ($out ne "") { -- print "MCE events:\n$out\n"; -- } else { -- print "No MCE errors.\n\n"; -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "MCE events:\n$out\n"; -+ } else { -+ print "No MCE errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - undef($dbh); - } diff --git a/SOURCES/60a91e4da4f2daf2b10143fc148a8043312b61e5.patch b/SOURCES/60a91e4da4f2daf2b10143fc148a8043312b61e5.patch deleted file mode 100644 index 57a4e46..0000000 --- a/SOURCES/60a91e4da4f2daf2b10143fc148a8043312b61e5.patch +++ /dev/null @@ -1,149 +0,0 @@ -commit 60a91e4da4f2daf2b10143fc148a8043312b61e5 -Author: Aristeu Rozanski -Date: Wed Aug 1 16:29:58 2018 -0400 - - rasdaemon: ras-mc-ctl: add option to show error counts - - In some scenarios it might not be desirable to have a daemon running - to parse and store the errors provided by EDAC and only having the - number of CEs and UEs is enough. This patch implements this feature - as an ras-mc-ctl option. - - Signed-off-by: Aristeu Rozanski - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in -index 38b7824..aee431a 100755 ---- a/util/ras-mc-ctl.in -+++ b/util/ras-mc-ctl.in -@@ -50,6 +50,8 @@ my %dimm_location = (); - my %csrow_size = (); - my %rank_size = (); - my %csrow_ranks = (); -+my %dimm_ce_count = (); -+my %dimm_ue_count = (); - - my @layers; - my @max_pos; -@@ -76,6 +78,7 @@ Usage: $prog [OPTIONS...] - --layout Display the memory layout. - --summary Presents a summary of the logged errors. - --errors Shows the errors stored at the error database. -+ --error-count Shows the corrected and uncorrected error counts using sysfs. - --help This help message. - EOF - -@@ -83,7 +86,7 @@ parse_cmdline(); - - if ( $conf{opt}{mainboard} || $conf{opt}{print_labels} - || $conf{opt}{register_labels} || $conf{opt}{display_memory_layout} -- || $conf{opt}{guess_dimm_label}) { -+ || $conf{opt}{guess_dimm_label} || $conf{opt}{error_count}) { - - get_mainboard_info(); - -@@ -105,6 +108,9 @@ if ( $conf{opt}{mainboard} || $conf{opt}{print_labels} - if ($conf{opt}{guess_dimm_label}) { - guess_dimm_label (); - } -+ if ($conf{opt}{error_count}) { -+ display_error_count (); -+ } - } - - if ($conf{opt}{status}) { -@@ -134,6 +140,7 @@ sub parse_cmdline - $conf{opt}{guess_dimm_label} = 0; - $conf{opt}{summary} = 0; - $conf{opt}{errors} = 0; -+ $conf{opt}{error_count} = 0; - - my $rref = \$conf{opt}{report}; - my $mref = \$conf{opt}{mainboard}; -@@ -150,7 +157,8 @@ sub parse_cmdline - "status" => \$conf{opt}{status}, - "layout" => \$conf{opt}{display_memory_layout}, - "summary" => \$conf{opt}{summary}, -- "errors" => \$conf{opt}{errors} -+ "errors" => \$conf{opt}{errors}, -+ "error-count" => \$conf{opt}{error_count} - ); - - usage(1) if !$rc; -@@ -284,6 +292,30 @@ sub parse_dimm_nodes - $dimm_label_file{$str_loc} = $file; - $dimm_location{$str_loc} = $location; - -+ my $count; -+ -+ $file =~s/dimm_label/dimm_ce_count/; -+ if (-e $file) { -+ open IN, $file; -+ chomp($count = ); -+ close IN; -+ } else { -+ log_error ("dimm_ce_count not found in sysfs. Old kernel?\n"); -+ exit -1; -+ } -+ $dimm_ce_count{$str_loc} = $count; -+ -+ $file =~s/dimm_ce_count/dimm_ue_count/; -+ if (-e $file) { -+ open IN, $file; -+ chomp($count = ); -+ close IN; -+ } else { -+ log_error ("dimm_ue_count not found in sysfs. Old kernel?\n"); -+ exit -1; -+ } -+ $dimm_ue_count{$str_loc} = $count; -+ - return; - } - } -@@ -906,6 +938,45 @@ sub display_memory_layout - dimm_display_mem(); - } - -+sub display_error_count -+{ -+ my $sysfs_dir = "/sys/devices/system/edac/mc"; -+ my $key; -+ my $max_width = 0; -+ my %dimm_labels = (); -+ -+ find ({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir); -+ -+ if (!scalar(keys %dimm_node)) { -+ log_error ("No DIMMs found in /sys or new sysfs EDAC interface not found.\n"); -+ exit -1; -+ } -+ -+ foreach $key (keys %dimm_node) { -+ my $label_width; -+ -+ open IN, $dimm_label_file{$key}; -+ chomp(my $label = ); -+ close IN; -+ $label_width = length $label; -+ -+ if ($label_width > $max_width) { -+ $max_width = $label_width; -+ } -+ $dimm_labels{$key} = $label; -+ } -+ my $string = "Label"; -+ $string .= " " x ($max_width - length $string); -+ print($string . "\tCE\tUE\n"); -+ -+ foreach $key (keys %dimm_node) { -+ my $ce_count = $dimm_ce_count{$key}; -+ my $ue_count = $dimm_ue_count{$key}; -+ -+ print("$dimm_labels{$key}\t$ce_count\t$ue_count\n"); -+ } -+} -+ - sub find_prog - { - my ($file) = @_; diff --git a/SOURCES/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch b/SOURCES/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch deleted file mode 100644 index 76afc8e..0000000 --- a/SOURCES/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch +++ /dev/null @@ -1,24 +0,0 @@ -commit 7937f0d6c2aaaed096f3a3d306416743c0dcb7a4 -Author: Muralidhara M K -Date: Wed Jul 28 01:52:12 2021 -0500 - - rasdaemon: Support MCE for AMD CPU family 19h - - Add support for family 19h x86 CPUs from AMD. - - Signed-off-by: Muralidhara M K - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/ras-mce-handler.c b/ras-mce-handler.c -index 805004a..f2b53d4 100644 ---- a/ras-mce-handler.c -+++ b/ras-mce-handler.c -@@ -208,7 +208,7 @@ static int detect_cpu(struct ras_events *ras) - mce->cputype = CPU_AMD_SMCA; - goto ret; - } -- if (mce->family > 23) { -+ if (mce->family > 25) { - log(ALL, LOG_INFO, - "Can't parse MCE for this AMD CPU yet %d\n", - mce->family); diff --git a/SOURCES/854364ba44aee9bc5646f6537fc744b0b54aff37.patch b/SOURCES/854364ba44aee9bc5646f6537fc744b0b54aff37.patch deleted file mode 100644 index 91bad1b..0000000 --- a/SOURCES/854364ba44aee9bc5646f6537fc744b0b54aff37.patch +++ /dev/null @@ -1,38 +0,0 @@ -commit 854364ba44aee9bc5646f6537fc744b0b54aff37 -Author: Muralidhara M K -Date: Thu Aug 20 21:00:57 2020 +0530 - - rasdaemon: Add 8 channel decoding for SMCA systems - - Current Scalable Machine Check Architecture (SMCA) systems support up - to 8 UMC channels. - - To find the UMC channel represented by a bank, look at the 6th nibble - in the MCA_IPID[InstanceId] field. - - Signed-off-by: Muralidhara M K - [ Adjust commit message. ] - Signed-off-by: Yazen Ghannam - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/mce-amd-smca.c b/mce-amd-smca.c -index d0b6cb6..7c619fd 100644 ---- a/mce-amd-smca.c -+++ b/mce-amd-smca.c -@@ -438,15 +438,7 @@ static void amd_decode_errcode(struct mce_event *e) - */ - static int find_umc_channel(struct mce_event *e) - { -- uint32_t umc_instance_id[] = {0x50f00, 0x150f00}; -- uint32_t instance_id = EXTRACT(e->ipid, 0, 31); -- int i, channel = -1; -- -- for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++) -- if (umc_instance_id[i] == instance_id) -- channel = i; -- -- return channel; -+ return EXTRACT(e->ipid, 0, 31) >> 20; - } - /* Decode extended errors according to Scalable MCA specification */ - static void decode_smca_error(struct mce_event *e) diff --git a/SOURCES/8704a85d8dc3483423ec2934fee8132f85f8fdb6.patch b/SOURCES/8704a85d8dc3483423ec2934fee8132f85f8fdb6.patch deleted file mode 100644 index e3617fc..0000000 --- a/SOURCES/8704a85d8dc3483423ec2934fee8132f85f8fdb6.patch +++ /dev/null @@ -1,207 +0,0 @@ -commit 8704a85d8dc3483423ec2934fee8132f85f8fdb6 -Author: Brian WoodsGhannam, Yazen -Date: Fri Nov 1 15:48:14 2019 +0100 - - rasdaemon: add support for new AMD SMCA bank types - - Going forward, the Scalable Machine Check Architecture (SMCA) has some - updated and additional bank types which show up in Zen2. The differing - bank types include: CS_V2, PSP_V2, SMU_V2, MP5, NBIO, and PCIE. The V2 - bank types replace the original bank types but have unique HWID/MCAtype - IDs from the originals so there's no conflicts between different - versions or other bank types. All of the differing bank types have new - MCE descriptions which have been added as well. - - CC: "mchehab+samsung@kernel.org" , "Namburu, Chandu-babu" # Thread-Topic: [PATCH 2/2] rasdaemon: add support for new AMD SMCA bank types - Signed-off-by: Brian Woods - Signed-off-by: Yazen Ghannam - Cc: Chandu-babu Namburu - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/mce-amd-smca.c b/mce-amd-smca.c -index 6c3e8a5..114e786 100644 ---- a/mce-amd-smca.c -+++ b/mce-amd-smca.c -@@ -49,11 +49,17 @@ enum smca_bank_types { - SMCA_FP, /* Floating Point */ - SMCA_L3_CACHE, /* L3 Cache */ - SMCA_CS, /* Coherent Slave */ -+ SMCA_CS_V2, /* Coherent Slave V2 */ - SMCA_PIE, /* Power, Interrupts, etc. */ - SMCA_UMC, /* Unified Memory Controller */ - SMCA_PB, /* Parameter Block */ - SMCA_PSP, /* Platform Security Processor */ -+ SMCA_PSP_V2, /* Platform Security Processor V2 */ - SMCA_SMU, /* System Management Unit */ -+ SMCA_SMU_V2, /* System Management Unit V2 */ -+ SMCA_MP5, /* Microprocessor 5 Unit */ -+ SMCA_NBIO, /* Northbridge IO Unit */ -+ SMCA_PCIE, /* PCI Express Unit */ - N_SMCA_BANK_TYPES - }; - -@@ -165,6 +171,23 @@ static const char * const smca_cs_mce_desc[] = { - "Atomic request parity", - "ECC error on probe filter access", - }; -+/* Coherent Slave Unit V2 */ -+static const char * const smca_cs2_mce_desc[] = { -+ "Illegal Request", -+ "Address Violation", -+ "Security Violation", -+ "Illegal Response", -+ "Unexpected Response", -+ "Request or Probe Parity Error", -+ "Read Response Parity Error", -+ "Atomic Request Parity Error", -+ "SDP read response had no match in the CS queue", -+ "Probe Filter Protocol Error", -+ "Probe Filter ECC Error", -+ "SDP read response had an unexpected RETRY error", -+ "Counter overflow error", -+ "Counter underflow error", -+}; - /* Power, Interrupt, etc.. */ - static const char * const smca_pie_mce_desc[] = { - "HW assert", -@@ -189,10 +212,75 @@ static const char * const smca_pb_mce_desc[] = { - static const char * const smca_psp_mce_desc[] = { - "PSP RAM ECC or parity error", - }; -+/* Platform Security Processor V2 */ -+static const char * const smca_psp2_mce_desc[] = { -+ "High SRAM ECC or parity error", -+ "Low SRAM ECC or parity error", -+ "Instruction Cache Bank 0 ECC or parity error", -+ "Instruction Cache Bank 1 ECC or parity error", -+ "Instruction Tag Ram 0 parity error", -+ "Instruction Tag Ram 1 parity error", -+ "Data Cache Bank 0 ECC or parity error", -+ "Data Cache Bank 1 ECC or parity error", -+ "Data Cache Bank 2 ECC or parity error", -+ "Data Cache Bank 3 ECC or parity error", -+ "Data Tag Bank 0 parity error", -+ "Data Tag Bank 1 parity error", -+ "Data Tag Bank 2 parity error", -+ "Data Tag Bank 3 parity error", -+ "Dirty Data Ram parity error", -+ "TLB Bank 0 parity error", -+ "TLB Bank 1 parity error", -+ "System Hub Read Buffer ECC or parity error", -+}; - /* System Management Unit */ - static const char * const smca_smu_mce_desc[] = { - "SMU RAM ECC or parity error", - }; -+/* System Management Unit V2 */ -+static const char * const smca_smu2_mce_desc[] = { -+ "High SRAM ECC or parity error", -+ "Low SRAM ECC or parity error", -+ "Data Cache Bank A ECC or parity error", -+ "Data Cache Bank B ECC or parity error", -+ "Data Tag Cache Bank A ECC or parity error", -+ "Data Tag Cache Bank B ECC or parity error", -+ "Instruction Cache Bank A ECC or parity error", -+ "Instruction Cache Bank B ECC or parity error", -+ "Instruction Tag Cache Bank A ECC or parity error", -+ "Instruction Tag Cache Bank B ECC or parity error", -+ "System Hub Read Buffer ECC or parity error", -+}; -+/* Microprocessor 5 Unit */ -+static const char * const smca_mp5_mce_desc[] = { -+ "High SRAM ECC or parity error", -+ "Low SRAM ECC or parity error", -+ "Data Cache Bank A ECC or parity error", -+ "Data Cache Bank B ECC or parity error", -+ "Data Tag Cache Bank A ECC or parity error", -+ "Data Tag Cache Bank B ECC or parity error", -+ "Instruction Cache Bank A ECC or parity error", -+ "Instruction Cache Bank B ECC or parity error", -+ "Instruction Tag Cache Bank A ECC or parity error", -+ "Instruction Tag Cache Bank B ECC or parity error", -+}; -+/* Northbridge IO Unit */ -+static const char * const smca_nbio_mce_desc[] = { -+ "ECC or Parity error", -+ "PCIE error", -+ "SDP ErrEvent error", -+ "SDP Egress Poison Error", -+ "IOHC Internal Poison Error", -+}; -+/* PCI Express Unit */ -+static const char * const smca_pcie_mce_desc[] = { -+ "CCIX PER Message logging", -+ "CCIX Read Response with Status: Non-Data Error", -+ "CCIX Write Response with Status: Non-Data Error", -+ "CCIX Read Response with Status: Data Error", -+ "CCIX Non-okay write response with data error", -+}; -+ - - struct smca_mce_desc { - const char * const *descs; -@@ -208,11 +296,17 @@ static struct smca_mce_desc smca_mce_descs[] = { - [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) }, - [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, - [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, -+ [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, - [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, - [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, - [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, - [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, -+ [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)}, - [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, -+ [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc)}, -+ [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, -+ [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)}, -+ [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)}, - }; - - struct smca_hwid { -@@ -235,6 +329,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { - - /* Data Fabric MCA types */ - { SMCA_CS, 0x0000002E }, -+ { SMCA_CS_V2, 0x0002002E }, - { SMCA_PIE, 0x0001002E }, - - /* Unified Memory Controller MCA type */ -@@ -245,9 +340,20 @@ static struct smca_hwid smca_hwid_mcatypes[] = { - - /* Platform Security Processor MCA type */ - { SMCA_PSP, 0x000000FF }, -+ { SMCA_PSP_V2, 0x000100FF }, - - /* System Management Unit MCA type */ - { SMCA_SMU, 0x00000001 }, -+ { SMCA_SMU_V2, 0x00010001 }, -+ -+ /* Microprocessor 5 Unit MCA type */ -+ { SMCA_MP5, 0x00020001 }, -+ -+ /* Northbridge IO Unit MCA type */ -+ { SMCA_NBIO, 0x00000018 }, -+ -+ /* PCI Express Unit MCA type */ -+ { SMCA_PCIE, 0x00000046 }, - }; - - struct smca_bank_name { -@@ -264,11 +370,17 @@ static struct smca_bank_name smca_names[] = { - [SMCA_FP] = { "Floating Point Unit" }, - [SMCA_L3_CACHE] = { "L3 Cache" }, - [SMCA_CS] = { "Coherent Slave" }, -+ [SMCA_CS_V2] = { "Coherent Slave" }, - [SMCA_PIE] = { "Power, Interrupts, etc." }, - [SMCA_UMC] = { "Unified Memory Controller" }, - [SMCA_PB] = { "Parameter Block" }, - [SMCA_PSP] = { "Platform Security Processor" }, -+ [SMCA_PSP_V2] = { "Platform Security Processor" }, - [SMCA_SMU] = { "System Management Unit" }, -+ [SMCA_SMU_V2] = { "System Management Unit" }, -+ [SMCA_MP5] = { "Microprocessor 5 Unit" }, -+ [SMCA_NBIO] = { "Northbridge IO Unit" }, -+ [SMCA_PCIE] = { "PCI Express Unit" }, - }; - - static void amd_decode_errcode(struct mce_event *e) diff --git a/SOURCES/899fcc2cf21c86b5462c8f4441cd9c92b3d75f7d.patch b/SOURCES/899fcc2cf21c86b5462c8f4441cd9c92b3d75f7d.patch deleted file mode 100644 index 8f26b51..0000000 --- a/SOURCES/899fcc2cf21c86b5462c8f4441cd9c92b3d75f7d.patch +++ /dev/null @@ -1,71 +0,0 @@ -commit 899fcc2cf21c86b5462c8f4441cd9c92b3d75f7d -Author: Aristeu Rozanski -Date: Thu Jan 19 08:45:57 2023 -0500 - - rasdaemon: ras-report: fix possible but unlikely file descriptor leak - - Found with covscan. - - Signed-off-by: Aristeu Rozanski - Signed-off-by: Mauro Carvalho Chehab - ---- - ras-report.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - ---- rasdaemon-0.6.1.orig/ras-report.c 2023-01-23 11:36:20.972368760 -0500 -+++ rasdaemon-0.6.1/ras-report.c 2023-01-23 11:36:23.236343267 -0500 -@@ -374,7 +374,7 @@ if(rc < 0){ - - mc_fail: - -- if(sockfd > 0){ -+ if(sockfd >= 0){ - close(sockfd); - } - -@@ -424,7 +424,7 @@ if(rc < 0){ - - aer_fail: - -- if(sockfd > 0){ -+ if(sockfd >= 0){ - close(sockfd); - } - -@@ -473,7 +473,7 @@ rc = 0; - - non_standard_fail: - -- if(sockfd > 0){ -+ if(sockfd >= 0){ - close(sockfd); - } - -@@ -518,7 +518,7 @@ rc = 0; - - arm_fail: - -- if(sockfd > 0){ -+ if(sockfd >= 0){ - close(sockfd); - } - -@@ -564,7 +564,7 @@ if(rc < 0){ - - mce_fail: - -- if(sockfd > 0){ -+ if(sockfd >= 0){ - close(sockfd); - } - -@@ -609,7 +609,7 @@ if (rc < 0) - done = 1; - - mf_fail: -- if (sockfd > 0) -+ if (sockfd >= 0) - close(sockfd); - - if (done) diff --git a/SOURCES/9acef39f13833f7d53ef96abc5a72e79384260f4.patch b/SOURCES/9acef39f13833f7d53ef96abc5a72e79384260f4.patch deleted file mode 100644 index c4c8af1..0000000 --- a/SOURCES/9acef39f13833f7d53ef96abc5a72e79384260f4.patch +++ /dev/null @@ -1,230 +0,0 @@ -commit 9acef39f13833f7d53ef96abc5a72e79384260f4 -Author: Naveen Krishna Chatradhi -Date: Tue Jun 1 11:01:17 2021 +0530 - - rasdaemon: Add new SMCA bank types with error decoding - - Upcoming systems with Scalable Machine Check Architecture (SMCA) have - new MCA banks added. - - This patch adds the (HWID, MCATYPE) tuple, name and error decoding for - those new SMCA banks. - While at it, optimize the string names in smca_bank_name[]. - - Signed-off-by: Muralidhara M K - Signed-off-by: Naveen Krishna Chatradhi - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/mce-amd-smca.c b/mce-amd-smca.c -index 7c619fd..e0cf512 100644 ---- a/mce-amd-smca.c -+++ b/mce-amd-smca.c -@@ -47,7 +47,7 @@ - /* These may be used by multiple smca_hwid_mcatypes */ - enum smca_bank_types { - SMCA_LS = 0, /* Load Store */ -- SMCA_LS_V2, /* Load Store */ -+ SMCA_LS_V2, - SMCA_IF, /* Instruction Fetch */ - SMCA_L2_CACHE, /* L2 Cache */ - SMCA_DE, /* Decoder Unit */ -@@ -56,17 +56,22 @@ enum smca_bank_types { - SMCA_FP, /* Floating Point */ - SMCA_L3_CACHE, /* L3 Cache */ - SMCA_CS, /* Coherent Slave */ -- SMCA_CS_V2, /* Coherent Slave V2 */ -+ SMCA_CS_V2, - SMCA_PIE, /* Power, Interrupts, etc. */ - SMCA_UMC, /* Unified Memory Controller */ -+ SMCA_UMC_V2, - SMCA_PB, /* Parameter Block */ - SMCA_PSP, /* Platform Security Processor */ -- SMCA_PSP_V2, /* Platform Security Processor V2 */ -+ SMCA_PSP_V2, - SMCA_SMU, /* System Management Unit */ -- SMCA_SMU_V2, /* System Management Unit V2 */ -+ SMCA_SMU_V2, - SMCA_MP5, /* Microprocessor 5 Unit */ - SMCA_NBIO, /* Northbridge IO Unit */ - SMCA_PCIE, /* PCI Express Unit */ -+ SMCA_PCIE_V2, -+ SMCA_XGMI_PCS, /* xGMI PCS Unit */ -+ SMCA_XGMI_PHY, /* xGMI PHY Unit */ -+ SMCA_WAFL_PHY, /* WAFL PHY Unit */ - N_SMCA_BANK_TYPES - }; - -@@ -237,6 +242,22 @@ static const char * const smca_umc_mce_desc[] = { - "Command/address parity error", - "Write data CRC error", - }; -+ -+static const char * const smca_umc2_mce_desc[] = { -+ "DRAM ECC error", -+ "Data poison error", -+ "SDP parity error", -+ "Reserved", -+ "Address/Command parity error", -+ "Write data parity error", -+ "DCQ SRAM ECC error", -+ "Reserved", -+ "Read data parity error", -+ "Rdb SRAM ECC error", -+ "RdRsp SRAM ECC error", -+ "LM32 MP errors", -+}; -+ - /* Parameter Block */ - static const char * const smca_pb_mce_desc[] = { - "Parameter Block RAM ECC error", -@@ -314,6 +335,55 @@ static const char * const smca_pcie_mce_desc[] = { - "CCIX Non-okay write response with data error", - }; - -+static const char * const smca_pcie2_mce_desc[] = { -+ "SDP Parity Error logging", -+}; -+ -+static const char * const smca_xgmipcs_mce_desc[] = { -+ "Data Loss Error", -+ "Training Error", -+ "Flow Control Acknowledge Error", -+ "Rx Fifo Underflow Error", -+ "Rx Fifo Overflow Error", -+ "CRC Error", -+ "BER Exceeded Error", -+ "Tx Vcid Data Error", -+ "Replay Buffer Parity Error", -+ "Data Parity Error", -+ "Replay Fifo Overflow Error", -+ "Replay Fifo Underflow Error", -+ "Elastic Fifo Overflow Error", -+ "Deskew Error", -+ "Flow Control CRC Error", -+ "Data Startup Limit Error", -+ "FC Init Timeout Error", -+ "Recovery Timeout Error", -+ "Ready Serial Timeout Error", -+ "Ready Serial Attempt Error", -+ "Recovery Attempt Error", -+ "Recovery Relock Attempt Error", -+ "Replay Attempt Error", -+ "Sync Header Error", -+ "Tx Replay Timeout Error", -+ "Rx Replay Timeout Error", -+ "LinkSub Tx Timeout Error", -+ "LinkSub Rx Timeout Error", -+ "Rx CMD Pocket Error", -+}; -+ -+static const char * const smca_xgmiphy_mce_desc[] = { -+ "RAM ECC Error", -+ "ARC instruction buffer parity error", -+ "ARC data buffer parity error", -+ "PHY APB error", -+}; -+ -+static const char * const smca_waflphy_mce_desc[] = { -+ "RAM ECC Error", -+ "ARC instruction buffer parity error", -+ "ARC data buffer parity error", -+ "PHY APB error", -+}; - - struct smca_mce_desc { - const char * const *descs; -@@ -333,6 +403,7 @@ static struct smca_mce_desc smca_mce_descs[] = { - [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, - [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, - [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, -+ [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, - [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, - [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, - [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)}, -@@ -341,6 +412,10 @@ static struct smca_mce_desc smca_mce_descs[] = { - [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, - [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)}, - [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)}, -+ [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, -+ [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, -+ [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, -+ [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) }, - }; - - struct smca_hwid { -@@ -369,6 +444,8 @@ static struct smca_hwid smca_hwid_mcatypes[] = { - - /* Unified Memory Controller MCA type */ - { SMCA_UMC, 0x00000096 }, -+ /* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */ -+ { SMCA_UMC_V2, 0x00010096 }, - - /* Parameter Block MCA type */ - { SMCA_PB, 0x00000005 }, -@@ -389,6 +466,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = { - - /* PCI Express Unit MCA type */ - { SMCA_PCIE, 0x00000046 }, -+ { SMCA_PCIE_V2, 0x00010046 }, -+ -+ /* Ext Global Memory Interconnect PCS MCA type */ -+ { SMCA_XGMI_PCS, 0x00000050 }, -+ -+ /* Ext Global Memory Interconnect PHY MCA type */ -+ { SMCA_XGMI_PHY, 0x00000259 }, -+ -+ /* WAFL PHY MCA type */ -+ { SMCA_WAFL_PHY, 0x00000267 }, - }; - - struct smca_bank_name { -@@ -396,27 +483,28 @@ struct smca_bank_name { - }; - - static struct smca_bank_name smca_names[] = { -- [SMCA_LS] = { "Load Store Unit" }, -- [SMCA_LS_V2] = { "Load Store Unit" }, -- [SMCA_IF] = { "Instruction Fetch Unit" }, -- [SMCA_L2_CACHE] = { "L2 Cache" }, -- [SMCA_DE] = { "Decode Unit" }, -- [SMCA_RESERVED] = { "Reserved" }, -- [SMCA_EX] = { "Execution Unit" }, -- [SMCA_FP] = { "Floating Point Unit" }, -- [SMCA_L3_CACHE] = { "L3 Cache" }, -- [SMCA_CS] = { "Coherent Slave" }, -- [SMCA_CS_V2] = { "Coherent Slave" }, -- [SMCA_PIE] = { "Power, Interrupts, etc." }, -- [SMCA_UMC] = { "Unified Memory Controller" }, -- [SMCA_PB] = { "Parameter Block" }, -- [SMCA_PSP] = { "Platform Security Processor" }, -- [SMCA_PSP_V2] = { "Platform Security Processor" }, -- [SMCA_SMU] = { "System Management Unit" }, -- [SMCA_SMU_V2] = { "System Management Unit" }, -- [SMCA_MP5] = { "Microprocessor 5 Unit" }, -- [SMCA_NBIO] = { "Northbridge IO Unit" }, -- [SMCA_PCIE] = { "PCI Express Unit" }, -+ [SMCA_LS ... SMCA_LS_V2] = { "Load Store Unit" }, -+ [SMCA_IF] = { "Instruction Fetch Unit" }, -+ [SMCA_L2_CACHE] = { "L2 Cache" }, -+ [SMCA_DE] = { "Decode Unit" }, -+ [SMCA_RESERVED] = { "Reserved" }, -+ [SMCA_EX] = { "Execution Unit" }, -+ [SMCA_FP] = { "Floating Point Unit" }, -+ [SMCA_L3_CACHE] = { "L3 Cache" }, -+ [SMCA_CS ... SMCA_CS_V2] = { "Coherent Slave" }, -+ [SMCA_PIE] = { "Power, Interrupts, etc." }, -+ [SMCA_UMC] = { "Unified Memory Controller" }, -+ [SMCA_UMC_V2] = { "Unified Memory Controller V2" }, -+ [SMCA_PB] = { "Parameter Block" }, -+ [SMCA_PSP ... SMCA_PSP_V2] = { "Platform Security Processor" }, -+ [SMCA_SMU ... SMCA_SMU_V2] = { "System Management Unit" }, -+ [SMCA_MP5] = { "Microprocessor 5 Unit" }, -+ [SMCA_NBIO] = { "Northbridge IO Unit" }, -+ [SMCA_PCIE ... SMCA_PCIE_V2] = { "PCI Express Unit" }, -+ [SMCA_XGMI_PCS] = { "Ext Global Memory Interconnect PCS Unit" }, -+ [SMCA_XGMI_PHY] = { "Ext Global Memory Interconnect PHY Unit" }, -+ [SMCA_WAFL_PHY] = { "WAFL PHY Unit" }, -+ - }; - - static void amd_decode_errcode(struct mce_event *e) diff --git a/SOURCES/a16ca0711001957ee98f2c124abce0fa1f801529.patch b/SOURCES/a16ca0711001957ee98f2c124abce0fa1f801529.patch deleted file mode 100644 index 3a96263..0000000 --- a/SOURCES/a16ca0711001957ee98f2c124abce0fa1f801529.patch +++ /dev/null @@ -1,670 +0,0 @@ -commit a16ca0711001957ee98f2c124abce0fa1f801529 -Author: Chandu-babu Namburu -Date: Wed Jan 30 20:36:45 2019 +0530 - - rasdaemon: add support for AMD Scalable MCA - - Add logic here to decode errors from all known IP blocks for - AMD Scalable MCA supported processors - - Reviewed-by: Yazen Ghannam - Signed-off-by: Chandu-babu Namburu - ---- - mce-amd-smca.c | 371 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ - mce-amd.c | 122 +++++++++++++++++ - ras-mce-handler.c | 24 +++ - ras-mce-handler.h | 15 ++ - 4 files changed, 530 insertions(+), 2 deletions(-) - ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ rasdaemon-0.6.1/mce-amd-smca.c 2019-07-12 11:35:04.836470461 -0400 -@@ -0,0 +1,371 @@ -+/* -+ * Copyright (c) 2018, AMD, Inc. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 and -+ * only version 2 as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+ -+#include "ras-mce-handler.h" -+#include "bitfield.h" -+ -+/* MCA_STATUS REGISTER FOR FAMILY 17H -+ *********************** Higher 32-bits ***************************** -+ * 63: VALIDERROR, 62: OVERFLOW, 61: UC, 60: Err ENABLE, -+ * 59: Misc Valid, 58: Addr Valid, 57: PCC, 56: ErrCoreID Valid, -+ * 55: TCC, 54: RES, 53: Syndrom Valid, 52: Transparanet, -+ * 51: RES, 50: RES, 49: RES, 48: RES, -+ * 47: RES, 46: CECC, 45: UECC, 44: Deferred, -+ * 43: Poison, 42: RES, 41: RES, 40: RES, -+ * 39: RES, 38: RES, 37: ErrCoreID[5], 36: ErrCoreID[4], -+ * 35: ErrCoreID[3], 34: ErrCoreID[2] 33: ErrCoreID[1] 32: ErrCoreID[0] -+ *********************** Lower 32-bits ****************************** -+ * 31: RES, 30: RES, 29: RES, 28: RES, -+ * 27: RES, 26: RES, 25: RES, 24: RES -+ * 23: RES, 22: RES, 21: XEC[5], 20: XEC[4], -+ * 19: XEC[3], 18: XEC[2], 17: XEC[1], 16: XEC[0] -+ * 15: EC[15], 14: EC[14], 13: EC[13], 12: EC[12], -+ * 11: EC[11], 10: EC[10], 09: EC[9], 08: EC[8], -+ * 07: EC[7], 06: EC[6], 05: EC[5], 04: EC[4], -+ * 03: EC[3], 02: EC[2], 01: EC[1], 00: EC[0] -+ */ -+ -+/* These may be used by multiple smca_hwid_mcatypes */ -+enum smca_bank_types { -+ SMCA_LS = 0, /* Load Store */ -+ SMCA_IF, /* Instruction Fetch */ -+ SMCA_L2_CACHE, /* L2 Cache */ -+ SMCA_DE, /* Decoder Unit */ -+ SMCA_RESERVED, /* Reserved */ -+ SMCA_EX, /* Execution Unit */ -+ SMCA_FP, /* Floating Point */ -+ SMCA_L3_CACHE, /* L3 Cache */ -+ SMCA_CS, /* Coherent Slave */ -+ SMCA_PIE, /* Power, Interrupts, etc. */ -+ SMCA_UMC, /* Unified Memory Controller */ -+ SMCA_PB, /* Parameter Block */ -+ SMCA_PSP, /* Platform Security Processor */ -+ SMCA_SMU, /* System Management Unit */ -+ N_SMCA_BANK_TYPES -+}; -+ -+/* SMCA Extended error strings */ -+/* Load Store */ -+static const char * const smca_ls_mce_desc[] = { -+ "Load queue parity", -+ "Store queue parity", -+ "Miss address buffer payload parity", -+ "L1 TLB parity", -+ "Reserved", -+ "DC tag error type 6", -+ "DC tag error type 1", -+ "Internal error type 1", -+ "Internal error type 2", -+ "Sys Read data error thread 0", -+ "Sys read data error thread 1", -+ "DC tag error type 2", -+ "DC data error type 1 (poison consumption)", -+ "DC data error type 2", -+ "DC data error type 3", -+ "DC tag error type 4", -+ "L2 TLB parity", -+ "PDC parity error", -+ "DC tag error type 3", -+ "DC tag error type 5", -+ "L2 fill data error", -+}; -+/* Instruction Fetch */ -+static const char * const smca_if_mce_desc[] = { -+ "microtag probe port parity error", -+ "IC microtag or full tag multi-hit error", -+ "IC full tag parity", -+ "IC data array parity", -+ "Decoupling queue phys addr parity error", -+ "L0 ITLB parity error", -+ "L1 ITLB parity error", -+ "L2 ITLB parity error", -+ "BPQ snoop parity on Thread 0", -+ "BPQ snoop parity on Thread 1", -+ "L1 BTB multi-match error", -+ "L2 BTB multi-match error", -+ "L2 Cache Response Poison error", -+ "System Read Data error", -+}; -+/* L2 Cache */ -+static const char * const smca_l2_mce_desc[] = { -+ "L2M tag multi-way-hit error", -+ "L2M tag ECC error", -+ "L2M data ECC error", -+ "HW assert", -+}; -+/* Decoder Unit */ -+static const char * const smca_de_mce_desc[] = { -+ "uop cache tag parity error", -+ "uop cache data parity error", -+ "Insn buffer parity error", -+ "uop queue parity error", -+ "Insn dispatch queue parity error", -+ "Fetch address FIFO parity", -+ "Patch RAM data parity", -+ "Patch RAM sequencer parity", -+ "uop buffer parity" -+}; -+/* Execution Unit */ -+static const char * const smca_ex_mce_desc[] = { -+ "Watchdog timeout error", -+ "Phy register file parity", -+ "Flag register file parity", -+ "Immediate displacement register file parity", -+ "Address generator payload parity", -+ "EX payload parity", -+ "Checkpoint queue parity", -+ "Retire dispatch queue parity", -+ "Retire status queue parity error", -+ "Scheduling queue parity error", -+ "Branch buffer queue parity error", -+}; -+/* Floating Point Unit */ -+static const char * const smca_fp_mce_desc[] = { -+ "Physical register file parity", -+ "Freelist parity error", -+ "Schedule queue parity", -+ "NSQ parity error", -+ "Retire queue parity", -+ "Status register file parity", -+ "Hardware assertion", -+}; -+/* L3 Cache */ -+static const char * const smca_l3_mce_desc[] = { -+ "Shadow tag macro ECC error", -+ "Shadow tag macro multi-way-hit error", -+ "L3M tag ECC error", -+ "L3M tag multi-way-hit error", -+ "L3M data ECC error", -+ "XI parity, L3 fill done channel error", -+ "L3 victim queue parity", -+ "L3 HW assert", -+}; -+/* Coherent Slave Unit */ -+static const char * const smca_cs_mce_desc[] = { -+ "Illegal request from transport layer", -+ "Address violation", -+ "Security violation", -+ "Illegal response from transport layer", -+ "Unexpected response", -+ "Parity error on incoming request or probe response data", -+ "Parity error on incoming read response data", -+ "Atomic request parity", -+ "ECC error on probe filter access", -+}; -+/* Power, Interrupt, etc.. */ -+static const char * const smca_pie_mce_desc[] = { -+ "HW assert", -+ "Internal PIE register security violation", -+ "Error on GMI link", -+ "Poison data written to internal PIE register", -+}; -+/* Unified Memory Controller */ -+static const char * const smca_umc_mce_desc[] = { -+ "DRAM ECC error", -+ "Data poison error on DRAM", -+ "SDP parity error", -+ "Advanced peripheral bus error", -+ "Command/address parity error", -+ "Write data CRC error", -+}; -+/* Parameter Block */ -+static const char * const smca_pb_mce_desc[] = { -+ "Parameter Block RAM ECC error", -+}; -+/* Platform Security Processor */ -+static const char * const smca_psp_mce_desc[] = { -+ "PSP RAM ECC or parity error", -+}; -+/* System Management Unit */ -+static const char * const smca_smu_mce_desc[] = { -+ "SMU RAM ECC or parity error", -+}; -+ -+struct smca_mce_desc { -+ const char * const *descs; -+ unsigned int num_descs; -+}; -+ -+static struct smca_mce_desc smca_mce_descs[] = { -+ [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) }, -+ [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) }, -+ [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) }, -+ [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) }, -+ [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) }, -+ [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) }, -+ [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, -+ [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, -+ [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, -+ [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, -+ [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, -+ [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, -+ [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, -+}; -+ -+struct smca_hwid { -+ unsigned int bank_type; /* Use with smca_bank_types for easy indexing.*/ -+ uint32_t mcatype_hwid; /* mcatype,hwid bit 63-32 in MCx_IPID Register*/ -+}; -+ -+static struct smca_hwid smca_hwid_mcatypes[] = { -+ /* { bank_type, mcatype_hwid } */ -+ -+ /* ZN Core (HWID=0xB0) MCA types */ -+ { SMCA_LS, 0x000000B0 }, -+ { SMCA_IF, 0x000100B0 }, -+ { SMCA_L2_CACHE, 0x000200B0 }, -+ { SMCA_DE, 0x000300B0 }, -+ /* HWID 0xB0 MCATYPE 0x4 is Reserved */ -+ { SMCA_EX, 0x000500B0 }, -+ { SMCA_FP, 0x000600B0 }, -+ { SMCA_L3_CACHE, 0x000700B0 }, -+ -+ /* Data Fabric MCA types */ -+ { SMCA_CS, 0x0000002E }, -+ { SMCA_PIE, 0x0001002E }, -+ -+ /* Unified Memory Controller MCA type */ -+ { SMCA_UMC, 0x00000096 }, -+ -+ /* Parameter Block MCA type */ -+ { SMCA_PB, 0x00000005 }, -+ -+ /* Platform Security Processor MCA type */ -+ { SMCA_PSP, 0x000000FF }, -+ -+ /* System Management Unit MCA type */ -+ { SMCA_SMU, 0x00000001 }, -+}; -+ -+struct smca_bank_name { -+ const char *name; -+}; -+ -+static struct smca_bank_name smca_names[] = { -+ [SMCA_LS] = { "Load Store Unit" }, -+ [SMCA_IF] = { "Instruction Fetch Unit" }, -+ [SMCA_L2_CACHE] = { "L2 Cache" }, -+ [SMCA_DE] = { "Decode Unit" }, -+ [SMCA_RESERVED] = { "Reserved" }, -+ [SMCA_EX] = { "Execution Unit" }, -+ [SMCA_FP] = { "Floating Point Unit" }, -+ [SMCA_L3_CACHE] = { "L3 Cache" }, -+ [SMCA_CS] = { "Coherent Slave" }, -+ [SMCA_PIE] = { "Power, Interrupts, etc." }, -+ [SMCA_UMC] = { "Unified Memory Controller" }, -+ [SMCA_PB] = { "Parameter Block" }, -+ [SMCA_PSP] = { "Platform Security Processor" }, -+ [SMCA_SMU] = { "System Management Unit" }, -+}; -+ -+static void amd_decode_errcode(struct mce_event *e) -+{ -+ -+ decode_amd_errcode(e); -+ -+ if (e->status & MCI_STATUS_POISON) -+ mce_snprintf(e->mcistatus_msg, "Poison consumed"); -+ -+ if (e->status & MCI_STATUS_TCC) -+ mce_snprintf(e->mcistatus_msg, "Task_context_corrupt"); -+ -+} -+/* -+ * To find the UMC channel represented by this bank we need to match on its -+ * instance_id. The instance_id of a bank is held in the lower 32 bits of its -+ * IPID. -+ */ -+static int find_umc_channel(struct mce_event *e) -+{ -+ uint32_t umc_instance_id[] = {0x50f00, 0x150f00}; -+ uint32_t instance_id = EXTRACT(e->ipid, 0, 31); -+ int i, channel = -1; -+ -+ for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++) -+ if (umc_instance_id[i] == instance_id) -+ channel = i; -+ -+ return channel; -+} -+/* Decode extended errors according to Scalable MCA specification */ -+static void decode_smca_error(struct mce_event *e) -+{ -+ enum smca_bank_types bank_type; -+ const char *ip_name; -+ unsigned short xec = (e->status >> 16) & 0x3f; -+ const struct smca_hwid *s_hwid; -+ uint32_t mcatype_hwid = EXTRACT(e->ipid, 32, 63); -+ unsigned int csrow = -1, channel = -1; -+ unsigned int i; -+ -+ for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { -+ s_hwid = &smca_hwid_mcatypes[i]; -+ if (mcatype_hwid == s_hwid->mcatype_hwid) { -+ bank_type = s_hwid->bank_type; -+ break; -+ } -+ } -+ -+ if (i >= ARRAY_SIZE(smca_hwid_mcatypes)) { -+ strcpy(e->mcastatus_msg, "Couldn't find bank type with IPID"); -+ return; -+ } -+ -+ if (bank_type >= N_SMCA_BANK_TYPES) { -+ strcpy(e->mcastatus_msg, "Don't know how to decode this bank"); -+ return; -+ } -+ -+ if (bank_type == SMCA_RESERVED) { -+ strcpy(e->mcastatus_msg, "Bank 4 is reserved.\n"); -+ return; -+ } -+ -+ ip_name = smca_names[bank_type].name; -+ -+ mce_snprintf(e->bank_name, "%s (bank=%d)", ip_name, e->bank); -+ -+ /* Only print the descriptor of valid extended error code */ -+ if (xec < smca_mce_descs[bank_type].num_descs) -+ mce_snprintf(e->mcastatus_msg, -+ " %s.\n", smca_mce_descs[bank_type].descs[xec]); -+ -+ if (bank_type == SMCA_UMC && xec == 0) { -+ channel = find_umc_channel(e); -+ csrow = e->synd & 0x7; /* Bit 0, 1 ,2 */ -+ mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d", -+ channel, csrow); -+ } -+} -+ -+int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e) -+{ -+ uint64_t mcgstatus = e->mcgstatus; -+ -+ mce_snprintf(e->mcgstatus_msg, "mcgstatus=%lld", -+ (long long)e->mcgstatus); -+ -+ if (mcgstatus & MCG_STATUS_RIPV) -+ mce_snprintf(e->mcgstatus_msg, "RIPV"); -+ if (mcgstatus & MCG_STATUS_EIPV) -+ mce_snprintf(e->mcgstatus_msg, "EIPV"); -+ if (mcgstatus & MCG_STATUS_MCIP) -+ mce_snprintf(e->mcgstatus_msg, "MCIP"); -+ -+ decode_smca_error(e); -+ amd_decode_errcode(e); -+ return 0; -+} ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ rasdaemon-0.6.1/mce-amd.c 2019-07-12 11:35:04.836470461 -0400 -@@ -0,0 +1,122 @@ -+/* -+ * Copyright (c) 2018, The AMD, Inc. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 and -+ * only version 2 as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+ -+#include "ras-mce-handler.h" -+ -+/* Error Code Types */ -+#define TLB_ERROR(x) (((x) & 0xFFF0) == 0x0010) -+#define MEM_ERROR(x) (((x) & 0xFF00) == 0x0100) -+#define BUS_ERROR(x) (((x) & 0xF800) == 0x0800) -+#define INT_ERROR(x) (((x) & 0xF4FF) == 0x0400) -+ -+/* Error code: transaction type (TT) */ -+static char *transaction[] = { -+ "instruction", "data", "generic", "reserved" -+}; -+/* Error codes: cache level (LL) */ -+static char *cachelevel[] = { -+ "reserved", "L1", "L2", "L3/generic" -+}; -+/* Error codes: memory transaction type (RRRR) */ -+static char *memtrans[] = { -+ "generic", "generic read", "generic write", "data read", -+ "data write", "instruction fetch", "prefetch", "evict", "snoop", -+ "?", "?", "?", "?", "?", "?", "?" -+}; -+/* Participation Processor */ -+static char *partproc[] = { -+ "local node origin", "local node response", -+ "local node observed", "generic participation" -+}; -+/* Timeout */ -+static char *timeout[] = { -+ "request didn't time out", -+ "request timed out" -+}; -+/* internal unclassified error code */ -+static char *internal[] = { "reserved", -+ "reserved", -+ "hardware assert", -+ "reserved" }; -+ -+#define TT(x) (((x) >> 2) & 0x3) /*bit 2, bit 3*/ -+#define TT_MSG(x) transaction[TT(x)] -+#define LL(x) ((x) & 0x3) /*bit 0, bit 1*/ -+#define LL_MSG(x) cachelevel[LL(x)] -+ -+#define R4(x) (((x) >> 4) & 0xF) /*bit 4, bit 5, bit 6, bit 7 */ -+#define R4_MSG(x) ((R4(x) < 9) ? memtrans[R4(x)] : "Wrong R4!") -+ -+#define TO(x) (((x) >> 8) & 0x1) /*bit 8*/ -+#define TO_MSG(x) timeout[TO(x)] -+#define PP(x) (((x) >> 9) & 0x3) /*bit 9, bit 10*/ -+#define PP_MSG(x) partproc[PP(x)] -+ -+#define UU(x) (((x) >> 8) & 0x3) /*bit 8, bit 9*/ -+#define UU_MSG(x) internal[UU(x)] -+ -+void decode_amd_errcode(struct mce_event *e) -+{ -+ uint16_t ec = e->status & 0xffff; -+ uint16_t ecc = (e->status >> 45) & 0x3; -+ -+ if (e->status & MCI_STATUS_UC) { -+ if (e->status & MCI_STATUS_PCC) -+ strcpy(e->error_msg, "System Fatal error."); -+ if (e->mcgstatus & MCG_STATUS_RIPV) -+ strcpy(e->error_msg, -+ "Uncorrected, software restartable error."); -+ strcpy(e->error_msg, -+ "Uncorrected, software containable error."); -+ } else if (e->status & MCI_STATUS_DEFERRED) -+ strcpy(e->error_msg, "Deferred error, no action required."); -+ else -+ strcpy(e->error_msg, "Corrected error, no action required."); -+ -+ if (!(e->status & MCI_STATUS_VAL)) -+ mce_snprintf(e->mcistatus_msg, "MCE_INVALID"); -+ -+ if (e->status & MCI_STATUS_OVER) -+ mce_snprintf(e->mcistatus_msg, "Error_overflow"); -+ -+ if (e->status & MCI_STATUS_PCC) -+ mce_snprintf(e->mcistatus_msg, "Processor_context_corrupt"); -+ -+ if (ecc) -+ mce_snprintf(e->mcistatus_msg, -+ "%sECC", ((ecc == 2) ? "C" : "U")); -+ -+ if (INT_ERROR(ec)) { -+ mce_snprintf(e->mcastatus_msg, "Internal '%s'", UU_MSG(ec)); -+ return; -+ } -+ -+ if (TLB_ERROR(ec)) -+ mce_snprintf(e->mcastatus_msg, -+ "TLB Error 'tx: %s, level: %s'", -+ TT_MSG(ec), LL_MSG(ec)); -+ else if (MEM_ERROR(ec)) -+ mce_snprintf(e->mcastatus_msg, -+ "Memory Error 'mem-tx: %s, tx: %s, level: %s'", -+ R4_MSG(ec), TT_MSG(ec), LL_MSG(ec)); -+ else if (BUS_ERROR(ec)) -+ mce_snprintf(e->mcastatus_msg, -+ "Bus Error '%s, %s, mem-tx: %s, level: %s'", -+ PP_MSG(ec), TO_MSG(ec), -+ R4_MSG(ec), LL_MSG(ec)); -+ return; -+ -+} ---- rasdaemon-0.6.1.orig/ras-mce-handler.c 2019-07-12 11:35:01.585502811 -0400 -+++ rasdaemon-0.6.1/ras-mce-handler.c 2019-07-12 11:35:04.836470461 -0400 -@@ -55,6 +55,7 @@ [CPU_XEON75XX] = "Intel Xeon 7500 series - [CPU_KNIGHTS_LANDING] = "Knights Landing", - [CPU_KNIGHTS_MILL] = "Knights Mill", - [CPU_SKYLAKE_XEON] = "Skylake server", -+ [CPU_NAPLES] = "AMD Family 17h Zen1" - }; - - static enum cputype select_intel_cputype(struct ras_events *ras) -@@ -190,9 +191,12 @@ ret = 0; - if (!strcmp(mce->vendor, "AuthenticAMD")) { - if (mce->family == 15) - mce->cputype = CPU_K8; -- if (mce->family > 15) { -+ if (mce->family == 23) -+ mce->cputype = CPU_NAPLES; -+ if (mce->family > 23) { - log(ALL, LOG_INFO, -- "Can't parse MCE for this AMD CPU yet\n"); -+ "Can't parse MCE for this AMD CPU yet %d\n", -+ mce->family); - ret = EINVAL; - } - goto ret; -@@ -331,6 +335,12 @@ #if 0 - if (e->status & MCI_STATUS_ADDRV) - trace_seq_printf(s, ", addr= %llx", (long long)e->addr); - -+ if (e->status & MCI_STATUS_SYNDV) -+ trace_seq_printf(s, ", synd= %llx", (long long)e->synd); -+ -+ if (e->ipid) -+ trace_seq_printf(s, ", ipid= %llx", (long long)e->ipid); -+ - if (e->mcgstatus_msg) - trace_seq_printf(s, ", %s", e->mcgstatus_msg); - else -@@ -411,6 +421,13 @@ if (pevent_get_field_val(s, event, "bank - if (pevent_get_field_val(s, event, "cpuvendor", record, &val, 1) < 0) - return -1; - e.cpuvendor = val; -+ /* Get New entries */ -+ if (pevent_get_field_val(s, event, "synd", record, &val, 1) < 0) -+ return -1; -+ e.synd = val; -+ if (pevent_get_field_val(s, event, "ipid", record, &val, 1) < 0) -+ return -1; -+ e.ipid = val; - - switch (mce->cputype) { - case CPU_GENERIC: -@@ -418,6 +435,9 @@ if (pevent_get_field_val(s, event, "cpuv - case CPU_K8: - rc = parse_amd_k8_event(ras, &e); - break; -+ case CPU_NAPLES: -+ rc = parse_amd_smca_event(ras, &e); -+ break; - default: /* All other CPU types are Intel */ - rc = parse_intel_event(ras, &e); - } ---- rasdaemon-0.6.1.orig/ras-mce-handler.h 2019-07-12 11:35:01.585502811 -0400 -+++ rasdaemon-0.6.1/ras-mce-handler.h 2019-07-12 11:35:04.836470461 -0400 -@@ -50,6 +50,7 @@ enum cputype { - CPU_KNIGHTS_LANDING, - CPU_KNIGHTS_MILL, - CPU_SKYLAKE_XEON, -+ CPU_NAPLES, - }; - - struct mce_event { -@@ -69,6 +70,8 @@ struct mce_event { - uint8_t cs; - uint8_t bank; - uint8_t cpuvendor; -+ uint64_t synd; /* MCA_SYND MSR: only valid on SMCA systems */ -+ uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */ - - /* Parsed data */ - char timestamp[64]; -@@ -129,6 +132,9 @@ void broadwell_de_decode_model(struct ra - void broadwell_epex_decode_model(struct ras_events *ras, struct mce_event *e); - void skylake_s_decode_model(struct ras_events *ras, struct mce_event *e); - -+/* AMD error code decode function */ -+void decode_amd_errcode(struct mce_event *e); -+ - /* Software defined banks */ - #define MCE_EXTENDED_BANK 128 - -@@ -144,6 +150,13 @@ #define MCI_STATUS_EN (1ULL<<60) /* - #define MCI_STATUS_S (1ULL<<56) /* signalled */ - #define MCI_STATUS_AR (1ULL<<55) /* action-required */ - -+/* AMD-specific bits */ -+#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ -+#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */ -+/* uncorrected error,deferred exception */ -+#define MCI_STATUS_DEFERRED (1ULL<<44) -+#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ -+ - #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ - #define MCG_STATUS_EIPV (1ULL<<1) /* eip points to correct instruction */ - #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ -@@ -154,4 +167,6 @@ int parse_intel_event(struct ras_events - - int parse_amd_k8_event(struct ras_events *ras, struct mce_event *e); - -+int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e); -+ - #endif ---- rasdaemon-0.6.1.orig/Makefile.in 2018-04-25 06:29:05.000000000 -0400 -+++ rasdaemon-0.6.1/Makefile.in 2019-07-15 14:41:22.308278851 -0400 -@@ -100,7 +100,7 @@ sbin_PROGRAMS = rasdaemon$(EXEEXT) - @WITH_MCE_TRUE@ mce-intel-dunnington.c mce-intel-tulsa.c \ - @WITH_MCE_TRUE@ mce-intel-sb.c mce-intel-ivb.c mce-intel-haswell.c \ - @WITH_MCE_TRUE@ mce-intel-knl.c mce-intel-broadwell-de.c \ --@WITH_MCE_TRUE@ mce-intel-broadwell-epex.c mce-intel-skylake-xeon.c -+@WITH_MCE_TRUE@ mce-intel-broadwell-epex.c mce-intel-skylake-xeon.c mce-amd.c mce-amd-smca.c - - @WITH_EXTLOG_TRUE@am__append_6 = ras-extlog-handler.c - @WITH_ABRT_REPORT_TRUE@am__append_7 = ras-report.c -@@ -132,7 +132,7 @@ am__rasdaemon_SOURCES_DIST = rasdaemon.c - mce-intel-ivb.c mce-intel-haswell.c mce-intel-knl.c \ - mce-intel-broadwell-de.c mce-intel-broadwell-epex.c \ - mce-intel-skylake-xeon.c ras-extlog-handler.c ras-report.c \ -- non-standard-hisi_hip07.c -+ non-standard-hisi_hip07.c mce-amd-smca.c mce-amd.c - @WITH_SQLITE3_TRUE@am__objects_1 = ras-record.$(OBJEXT) - @WITH_AER_TRUE@am__objects_2 = ras-aer-handler.$(OBJEXT) - @WITH_NON_STANDARD_TRUE@am__objects_3 = \ -@@ -149,7 +149,9 @@ non-standard-hisi_hip07.c - @WITH_MCE_TRUE@ mce-intel-knl.$(OBJEXT) \ - @WITH_MCE_TRUE@ mce-intel-broadwell-de.$(OBJEXT) \ - @WITH_MCE_TRUE@ mce-intel-broadwell-epex.$(OBJEXT) \ --@WITH_MCE_TRUE@ mce-intel-skylake-xeon.$(OBJEXT) -+@WITH_MCE_TRUE@ mce-intel-skylake-xeon.$(OBJEXT) \ -+@WITH_MCE_TRUE@ mce-amd-smca.$(OBJEXT) \ -+@WITH_MCE_TRUE@ mce-amd.$(OBJEXT) - @WITH_EXTLOG_TRUE@am__objects_6 = ras-extlog-handler.$(OBJEXT) - @WITH_ABRT_REPORT_TRUE@am__objects_7 = ras-report.$(OBJEXT) - @WITH_HISI_NS_DECODE_TRUE@am__objects_8 = \ -@@ -595,6 +597,8 @@ distclean-compile: - - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bitfield.Po@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-amd-k8.Po@am__quote@ -+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-amd.Po@am__quote@ -+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-amd-scma.Po@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-intel-broadwell-de.Po@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-intel-broadwell-epex.Po@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-intel-dunnington.Po@am__quote@ diff --git a/SOURCES/a8c776ed94f68ae31d7b5f74e19545698898c13c.patch b/SOURCES/a8c776ed94f68ae31d7b5f74e19545698898c13c.patch deleted file mode 100644 index 38657d4..0000000 --- a/SOURCES/a8c776ed94f68ae31d7b5f74e19545698898c13c.patch +++ /dev/null @@ -1,138 +0,0 @@ -commit a8c776ed94f68ae31d7b5f74e19545698898c13c -Author: Mauro Carvalho Chehab -Date: Tue Aug 14 13:06:27 2018 -0300 - - mce-intel-*: fix a warning when using FIELD(, NULL) - - Internally, FIELD() macro checks the size of an array, by - using ARRAY_SIZE. Well, this macro causes a division by zero - if NULL is used, as its type is void, as warned: - - mce-intel-dunnington.c:30:2: note: in expansion of macro ‘FIELD’ - FIELD(17, NULL), - ^~~~~ - ras-mce-handler.h:28:33: warning: division ‘sizeof (void *) / sizeof (void)’ does not compute the number of array elements [-Wsizeof-pointer-div] - #define ARRAY_SIZE(x) (sizeof(x)/sizeof(*(x))) - ^ - bitfield.h:37:51: note: in expansion of macro ‘ARRAY_SIZE’ - #define FIELD(start_bit, name) { start_bit, name, ARRAY_SIZE(name) } - ^~~~~~~~~~ - - While this warning is harmless, it may prevent seeing more serios - warnings. So, add a FIELD_NULL() macro to avoid that. - - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/bitfield.h b/bitfield.h -index c7dfeb1..fccbb36 100644 ---- a/bitfield.h -+++ b/bitfield.h -@@ -35,6 +35,7 @@ struct numfield { - }; - - #define FIELD(start_bit, name) { start_bit, name, ARRAY_SIZE(name) } -+#define FIELD_NULL(start_bit) { start_bit, NULL, 0 } - #define SBITFIELD(start_bit, string) { start_bit, ((char * [2]) { NULL, string }), 2 } - - #define NUMBER(start, end, name) { start, end, name, "%Lu", 0 } -diff --git a/mce-intel-dunnington.c b/mce-intel-dunnington.c -index 4b1c7e3..c695c62 100644 ---- a/mce-intel-dunnington.c -+++ b/mce-intel-dunnington.c -@@ -27,14 +27,14 @@ - - static struct field dunnington_bus_status[] = { - SBITFIELD(16, "Parity error detected during FSB request phase"), -- FIELD(17, NULL), -+ FIELD_NULL(17), - SBITFIELD(20, "Hard Failure response received for a local transaction"), - SBITFIELD(21, "Parity error on FSB response field detected"), - SBITFIELD(22, "Parity data error on inbound data detected"), -- FIELD(23, NULL), -- FIELD(25, NULL), -- FIELD(28, NULL), -- FIELD(31, NULL), -+ FIELD_NULL(23), -+ FIELD_NULL(25), -+ FIELD_NULL(28), -+ FIELD_NULL(31), - {} - }; - -diff --git a/mce-intel-p4-p6.c b/mce-intel-p4-p6.c -index 4615e1a..5c6c3ff 100644 ---- a/mce-intel-p4-p6.c -+++ b/mce-intel-p4-p6.c -@@ -60,7 +60,7 @@ static char *bus_queue_error_type[] = { - }; - - static struct field p6_shared_status[] = { -- FIELD(16, NULL), -+ FIELD_NULL(16), - FIELD(19, bus_queue_req_type), - FIELD(25, bus_queue_error_type), - FIELD(25, bus_queue_error_type), -@@ -68,7 +68,7 @@ static struct field p6_shared_status[] = { - SBITFIELD(36, "received parity error on response transaction"), - SBITFIELD(38, "timeout BINIT (ROB timeout)." - " No micro-instruction retired for some time"), -- FIELD(39, NULL), -+ FIELD_NULL(39), - SBITFIELD(42, "bus transaction received hard error response"), - SBITFIELD(43, "failure that caused IERR"), - /* The following are reserved for Core in the SDM. Let's keep them here anyways*/ -@@ -76,15 +76,15 @@ static struct field p6_shared_status[] = { - SBITFIELD(45, "uncorrectable ECC error"), - SBITFIELD(46, "correctable ECC error"), - /* [47..54]: ECC syndrome */ -- FIELD(55, NULL), -+ FIELD_NULL(55), - {}, - }; - - static struct field p6old_status[] = { - SBITFIELD(28, "FRC error"), - SBITFIELD(29, "BERR on this CPU"), -- FIELD(31, NULL), -- FIELD(32, NULL), -+ FIELD_NULL(31), -+ FIELD_NULL(32), - SBITFIELD(35, "BINIT received from external bus"), - SBITFIELD(37, "Received hard error reponse on split transaction (Bus BINIT)"), - {} -@@ -94,9 +94,9 @@ static struct field core2_status[] = { - SBITFIELD(28, "MCE driven"), - SBITFIELD(29, "MCE is observed"), - SBITFIELD(31, "BINIT observed"), -- FIELD(32, NULL), -+ FIELD_NULL(32), - SBITFIELD(34, "PIC or FSB data parity error"), -- FIELD(35, NULL), -+ FIELD_NULL(35), - SBITFIELD(37, "FSB address parity error detected"), - {} - }; -diff --git a/mce-intel-tulsa.c b/mce-intel-tulsa.c -index 6cea421..e59bf06 100644 ---- a/mce-intel-tulsa.c -+++ b/mce-intel-tulsa.c -@@ -39,7 +39,7 @@ static struct field tls_bus_status[] = { - SBITFIELD(16, "Parity error detected during FSB request phase"), - SBITFIELD(17, "Partity error detected on Core 0 request's address field"), - SBITFIELD(18, "Partity error detected on Core 1 request's address field"), -- FIELD(19, NULL), -+ FIELD_NULL(19), - SBITFIELD(20, "Parity error on FSB response field detected"), - SBITFIELD(21, "FSB data parity error on inbound date detected"), - SBITFIELD(22, "Data parity error on data received from Core 0 detected"), -@@ -48,8 +48,8 @@ static struct field tls_bus_status[] = { - SBITFIELD(25, "Data ECC event to error on inbound data correctable or uncorrectable"), - SBITFIELD(26, "Pad logic detected a data strobe glitch or sequencing error"), - SBITFIELD(27, "Pad logic detected a request strobe glitch or sequencing error"), -- FIELD(28, NULL), -- FIELD(31, NULL), -+ FIELD_NULL(28), -+ FIELD_NULL(31), - {} - }; - diff --git a/SOURCES/add_upstream_labels.patch b/SOURCES/add_upstream_labels.patch deleted file mode 100644 index 70a04df..0000000 --- a/SOURCES/add_upstream_labels.patch +++ /dev/null @@ -1,159 +0,0 @@ ---- - labels/dell | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 152 insertions(+) - ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ rasdaemon-0.6.1/labels/dell 2020-02-20 11:53:39.574579258 -0500 -@@ -0,0 +1,152 @@ -+# RASDAEMON Motherboard DIMM labels Database file. -+# -+# Vendor-name and model-name are found from the program 'dmidecode' -+# labels are found from the silk screen on the motherboard. -+# -+#Vendor: -+# Product: -+# Model: -+#