From bc0dc46c4297b8d8ab7a881207f958c541b62b58 Mon Sep 17 00:00:00 2001 From: eabdullin Date: Wed, 10 Apr 2024 17:55:52 +0000 Subject: [PATCH] import CS ppc64-diag-2.7.9-3.el8 --- ...4-diag-2.7.9-call_home-fail-on-nvmf-device | 71 +++++++++ ...ght-path-diagnostics-for-RTAS-events.patch | 63 ++++++++ ...2.7.9-handle_multiple_platform_dumps.patch | 100 +++++++++++++ ...7.9-moving-trim_trail_space-function.patch | 136 ++++++++++++++++++ ...rim_trail_space-in-event_fru_callout.patch | 37 +++++ SPECS/ppc64-diag.spec | 21 ++- 6 files changed, 427 insertions(+), 1 deletion(-) create mode 100644 SOURCES/ppc64-diag-2.7.9-call_home-fail-on-nvmf-device create mode 100644 SOURCES/ppc64-diag-2.7.9-enable-light-path-diagnostics-for-RTAS-events.patch create mode 100644 SOURCES/ppc64-diag-2.7.9-handle_multiple_platform_dumps.patch create mode 100644 SOURCES/ppc64-diag-2.7.9-moving-trim_trail_space-function.patch create mode 100644 SOURCES/ppc64-diag-2.7.9-utilize-trim_trail_space-in-event_fru_callout.patch diff --git a/SOURCES/ppc64-diag-2.7.9-call_home-fail-on-nvmf-device b/SOURCES/ppc64-diag-2.7.9-call_home-fail-on-nvmf-device new file mode 100644 index 0000000..d3e3006 --- /dev/null +++ b/SOURCES/ppc64-diag-2.7.9-call_home-fail-on-nvmf-device @@ -0,0 +1,71 @@ +commit db0c6d7974d7f8909878384d77ec02457759d6df +Author: Nilay Shroff +Date: Tue Jan 16 13:55:03 2024 +0530 + + diags/diag_nvme: call_home command fails on nvmf drive + + The diag_nvme command needs to retrieve the VPD log page from NVMe for + filling in the product data while generating the call-home event. + However, call-home feature is supported for directly attached NVMe + module. In the current diag_nvme implementation, if user doesn't + provide NVMe device name for diagnostics then it(diag_nvme) loops + through each NVMe moudle (directly connected to the system/LPAR as + well as discovered over fabrics) and attempt retrieving the SMART log + page as well as VPD page. Unfortunately, diag_nvme fails to retrieve + the VPD page for NVMe connected over fabrics and that causes the + diag_nvme to print "not-so-nice" failure messages on console. + + Henec fixed the diag_nvme code so that for call-home event reporting, + it skips the NVMe which is connected over fabrics and prints a + "nice-message" informing the user that it's skipping diagnosting for + NVMe module connected over fabrics. In a nutshell, with this fix now + diag_nvme would only diagnose the NVMe module which is directtly + attached (over PCIe) to the system. + + Signed-off-by: Nilay Shroff + +diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c +index c1c0a20..e86786c 100644 +--- a/diags/diag_nvme.c ++++ b/diags/diag_nvme.c +@@ -375,9 +375,40 @@ static int diagnose_nvme(char *device_name, struct notify *notify, char *file_pa + char endurance_s[sizeof(vpd.endurance) + 1], capacity_s[sizeof(vpd.capacity)+1]; + uint64_t event_id; + uint8_t severity; ++ FILE *fp; ++ char tr_file_path[PATH_MAX]; + uint32_t raw_data_len = 0; + unsigned char *raw_data = NULL; + ++ /* ++ * Skip diag test if NVMe is connected over fabric ++ */ ++ snprintf(tr_file_path, sizeof(tr_file_path), ++ NVME_SYS_PATH"/%s/%s", device_name, "transport"); ++ fp = fopen(tr_file_path, "r"); ++ if (fp) { ++ char buf[12]; ++ int n = fread(buf, 1, sizeof(buf), fp); ++ ++ if (n) { ++ /* ++ * If NVMe transport is anything but pcie then skip the diag test ++ */ ++ if (strncmp(buf, "pcie", 4) != 0) { ++ fprintf(stdout, "Skipping diagnostics for nvmf : %s\n", ++ device_name); ++ fclose(fp); ++ return 0; ++ } ++ } ++ fclose(fp); ++ } else { ++ fprintf(stderr, "Skipping diagnostics for %s:\n" ++ "Unable to find the nvme transport type\n", ++ device_name); ++ return -1; ++ } ++ + tmp_rc = regex_controller(controller_name, device_name); + if (tmp_rc != 0) + return -1; diff --git a/SOURCES/ppc64-diag-2.7.9-enable-light-path-diagnostics-for-RTAS-events.patch b/SOURCES/ppc64-diag-2.7.9-enable-light-path-diagnostics-for-RTAS-events.patch new file mode 100644 index 0000000..67d843f --- /dev/null +++ b/SOURCES/ppc64-diag-2.7.9-enable-light-path-diagnostics-for-RTAS-events.patch @@ -0,0 +1,63 @@ +commit 0fa486dbe800bea05c81fc33eee197873573fefb +Author: Sathvika Vasireddy +Date: Fri Sep 8 12:35:14 2023 +0530 + + ppc64-diag/lp_diag: Enable light path diagnostics for RTAS events + + Currently, Light Path Diagnostics support is enabled only for OS and + Enclosure type events. Enable light path diagnostics support for RTAS + type events by turning on only the high priority FRU callouts. + + Signed-off-by: Sathvika Vasireddy + Signed-off-by: Mahesh Salgaonkar + +diff --git a/lpd/lp_diag.c b/lpd/lp_diag.c +index e6f5d3c..e67db02 100644 +--- a/lpd/lp_diag.c ++++ b/lpd/lp_diag.c +@@ -37,6 +37,8 @@ + #include "lp_util.h" + #include "utils.h" + ++static int rtas_event; ++ + /* FRU callout priority as defined in PAPR+ + * + * Note: Order of the priority is important! +@@ -173,8 +175,10 @@ service_event_supported(struct sl_event *event) + return 0; + } + break; +- case SL_TYPE_BMC: + case SL_TYPE_RTAS: ++ rtas_event = 1; ++ break; ++ case SL_TYPE_BMC: + case SL_TYPE_BASIC: + default: + return 0; +@@ -446,14 +450,20 @@ parse_service_event(int event_id) + attn_loc = &list[0]; + + if (operating_mode == LED_MODE_LIGHT_PATH) { +- if (event->callouts) ++ if (event->callouts) { + /* Run over FRU callout priority in order and + * enable fault indicator + */ +- for (i = 0; FRU_CALLOUT_PRIORITY[i]; i++) ++ if (!rtas_event) { ++ for (i = 0; FRU_CALLOUT_PRIORITY[i]; i++) ++ rc = event_fru_callout(event->callouts, list, ++ FRU_CALLOUT_PRIORITY[i], ++ &attn_on); ++ } else { + rc = event_fru_callout(event->callouts, list, +- FRU_CALLOUT_PRIORITY[i], +- &attn_on); ++ 'H', &attn_on); ++ } ++ } + else { + /* No callout list, enable check log indicator */ + indicator_log_write("Empty callout list"); diff --git a/SOURCES/ppc64-diag-2.7.9-handle_multiple_platform_dumps.patch b/SOURCES/ppc64-diag-2.7.9-handle_multiple_platform_dumps.patch new file mode 100644 index 0000000..0846300 --- /dev/null +++ b/SOURCES/ppc64-diag-2.7.9-handle_multiple_platform_dumps.patch @@ -0,0 +1,100 @@ +commit d05654e5ec6f37cf6caa491fc7d95b336f9603e2 +Author: Sathvika Vasireddy +Date: Mon Jul 10 13:43:21 2023 +0530 + + rtas_errd: Handle multiple platform dumps + + Currently, whenever a new dump arrives, old dump file of that specific dump + type is removed before writing the new dump out. Any dump file with the + same prefix (dump type) gets deleted. This means only one set of dump files + is saved, since only one dump file per dump type is saved. + + Handle multiple dumps on Linux by allowing as many dumps to be offloaded + until disk space is available. To do this, remove the function that checks + for prefix size and removes old dump files. In the event of not enough + disk space available, log an error to the user along with the dump tag. + User will free up space and run extract_platdump tool using the dump tag + provided in the error message to offload the dump. Error log can be viewed + by the user by issuing 'journalctl -p err -t rtas_errd' command. + + Signed-off-by: Sathvika Vasireddy + Signed-off-by: Mahesh Salgaonkar + +diff --git a/rtas_errd/dump.c b/rtas_errd/dump.c +index cc50d91..494c322 100644 +--- a/rtas_errd/dump.c ++++ b/rtas_errd/dump.c +@@ -30,8 +30,10 @@ + #include + #include + #include ++#include + #include + #include ++#include + #include "utils.h" + #include "rtas_errd.h" + +@@ -284,7 +286,9 @@ void + check_platform_dump(struct event *event) + { + struct rtas_dump_scn *dump_scn; ++ struct statvfs vfs; + uint64_t dump_tag; ++ uint64_t dump_size; + char filename[DUMP_MAX_FNAME_LEN + 20], *pos; + char *pathname = NULL; + FILE *f; +@@ -306,11 +310,34 @@ check_platform_dump(struct event *event) + return; + } + +- /* Retrieve the dump */ ++ /* Retrieve the dump tag */ + dump_tag = dump_scn->id; + dump_tag |= ((uint64_t)dump_scn->v6hdr.subtype << 32); + dbg("Dump ID: 0x%016LX", dump_tag); + ++ if (statvfs(d_cfg.platform_dump_path, &vfs) == -1) { ++ log_msg(event, "statvfs() failed on %s: %s", ++ d_cfg.platform_dump_path, strerror(errno)); ++ return; ++ } ++ ++ /* Retrieve the size of the platform dump */ ++ dump_size = dump_scn->size_hi; ++ dump_size <<= 32; ++ dump_size |= dump_scn->size_lo; ++ ++ /* Check if there is sufficient space in the file system to store the dump */ ++ if (vfs.f_bavail * vfs.f_frsize < dump_size) { ++ syslog(LOG_ERR, "Insufficient space in %s to store platform dump for dump ID: " ++ "0x%016lX (required: %lu bytes, available: %lu bytes)", ++ d_cfg.platform_dump_path, dump_tag, dump_size, ++ (vfs.f_bavail * vfs.f_frsize)); ++ syslog(LOG_ERR, "After clearing space, run 'extract_platdump " ++ "0x%016lX'.\n", dump_tag); ++ return; ++ } ++ ++ /* Retrieve the dump */ + snprintf(tmp_sys_arg, 60, "0x%016LX", (long long unsigned int)dump_tag); + system_args[0] = EXTRACT_PLATDUMP_CMD; + system_args[1] = tmp_sys_arg; +diff --git a/rtas_errd/extract_platdump.c b/rtas_errd/extract_platdump.c +index fbe65b2..831e57e 100644 +--- a/rtas_errd/extract_platdump.c ++++ b/rtas_errd/extract_platdump.c +@@ -290,12 +290,6 @@ extract_platform_dump(uint64_t dump_tag) + } + } + +- /* +- * Before writing the new dump out, we need to see if any older +- * dumps need to be removed first +- */ +- remove_old_dumpfiles(filename, prefix_size); +- + /* Copy the dump off to the filesystem */ + pathname[0] = '\0'; + strcpy(pathname, d_cfg.platform_dump_path); diff --git a/SOURCES/ppc64-diag-2.7.9-moving-trim_trail_space-function.patch b/SOURCES/ppc64-diag-2.7.9-moving-trim_trail_space-function.patch new file mode 100644 index 0000000..53b3f40 --- /dev/null +++ b/SOURCES/ppc64-diag-2.7.9-moving-trim_trail_space-function.patch @@ -0,0 +1,136 @@ +commit c507319d1b5f0286d67e08a3598949ca4144f475 +Author: Sathvika Vasireddy +Date: Fri Sep 8 12:35:12 2023 +0530 + + ppc64-diag: Move trim_trail_space() function to common/utils.c + + Currently, trim_trail_space() function is used in diags/diag_nvme.c file + to be able to trim trailing white spaces from a given location code. Allow + code reusability by moving the trim_trail_space() function from + diags/diag_nvme.c to common/utils.c. + + Signed-off-by: Sathvika Vasireddy + Signed-off-by: Mahesh Salgaonkar + +diff --git a/common/utils.c b/common/utils.c +index 0312943..2349878 100644 +--- a/common/utils.c ++++ b/common/utils.c +@@ -24,9 +24,34 @@ + #include + #include + #include ++#include + + #include "utils.h" + ++/* trim_trail_space - Trim trailing white spaces from string ++ * @string - Null terminated string to remove white spaces from ++ * ++ * This function will alter the passed string by removing any trailing white spaces and null ++ * terminating it at that point. ++ */ ++void trim_trail_space(char *string) ++{ ++ char *end; ++ size_t length; ++ ++ if (string == NULL) ++ return; ++ ++ length = strlen(string); ++ if (length == 0) ++ return; ++ ++ end = string + length - 1; ++ while (end >= string && isspace(*end)) ++ end--; ++ *(end + 1) = '\0'; ++} ++ + static int process_child(char *argv[], int pipefd[]) + { + int nullfd; +diff --git a/common/utils.h b/common/utils.h +index ec2072d..2459b5b 100644 +--- a/common/utils.h ++++ b/common/utils.h +@@ -18,6 +18,7 @@ + #ifndef UTILS_H + #define UTILS_H + ++void trim_trail_space(char *string); + FILE *spopen(char **, pid_t *); + int spclose(FILE *, pid_t); + +diff --git a/diags/Makefile.am b/diags/Makefile.am +index 4ac81b8..dea0a79 100644 +--- a/diags/Makefile.am ++++ b/diags/Makefile.am +@@ -13,7 +13,8 @@ encl_led_h_files = diags/encl_led.h \ + $(diag_common_h_files) + + diag_nvme_h_files = diags/diag_nvme.h \ +- common/platform.h ++ common/platform.h \ ++ common/utils.h + + sbin_PROGRAMS += diags/diag_encl diags/encl_led diags/diag_nvme + +@@ -41,6 +42,7 @@ diags_encl_led_SOURCES = diags/encl_led.c \ + + diags_diag_nvme_SOURCES = diags/diag_nvme.c \ + common/platform.c \ ++ common/utils.c \ + $(diag_nvme_h_files) + diags_diag_nvme_LDADD = -lservicelog -lm + diags_diag_nvme_CFLAGS = $(AM_CFLAGS) -Wno-stringop-truncation +diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c +index 2a78034..2606f2c 100644 +--- a/diags/diag_nvme.c ++++ b/diags/diag_nvme.c +@@ -27,6 +27,7 @@ + #include + #include "diag_nvme.h" + #include "platform.h" ++#include "utils.h" + + #define ITEM_DATA_LENGTH 255 + #define MIN_HOURS_ON 720 +@@ -71,7 +72,6 @@ static int raw_data_smart(unsigned char **raw_data, uint32_t *raw_data_len, stru + static int raw_data_vpd(unsigned char **raw_data, uint32_t *raw_data_len, struct nvme_ibm_vpd *vpd); + static int regex_controller(char *controller_name, char *device_name); + static void set_notify(struct notify *notify, struct dictionary *dict, int num_elements); +-static void trim_trail_space(char *string); + static long double uint128_to_long_double(uint8_t *data); + + int main(int argc, char *argv[]) { +@@ -1426,28 +1426,6 @@ extern void set_vpd_pcie_field(const char *keyword, const char *vpd_data, struct + strncpy(vpd->firmware_level, vpd_data, sizeof(vpd->firmware_level)); + } + +-/* trim_trail_space - Trim trailing white spaces from string +- * @string - Null terminated string to remove white spaces from +- * +- * This function will alter the passed string by removing any trailing white spaces and null +- * terminating it at that point. +- */ +-static void trim_trail_space(char *string) { +- char *end; +- size_t length; +- +- if (string == NULL) +- return; +- +- if ((length = strlen(string)) == 0) +- return; +- +- end = string + length - 1; +- while (end >= string && isspace(*end)) +- end--; +- *(end + 1) = '\0'; +-} +- + static long double uint128_to_long_double(uint8_t *data) { + int i; + long double value = 0; diff --git a/SOURCES/ppc64-diag-2.7.9-utilize-trim_trail_space-in-event_fru_callout.patch b/SOURCES/ppc64-diag-2.7.9-utilize-trim_trail_space-in-event_fru_callout.patch new file mode 100644 index 0000000..2b0692b --- /dev/null +++ b/SOURCES/ppc64-diag-2.7.9-utilize-trim_trail_space-in-event_fru_callout.patch @@ -0,0 +1,37 @@ +commit 476b0af7516b86c4d98cfa229fb0c6b856eea31d +Author: Sathvika Vasireddy +Date: Fri Sep 8 12:35:13 2023 +0530 + + ppc64-diag/lp_diag: Utilize trim_trail_space() function in event_fru_callout() + + Update the event_fru_callout() function to use the trim_trail_space() + function to be able to remove any trailing spaces from the location code. + This change aims to address an issue where the presence of trailing spaces + in the location code results in failure to find an indicator for the given + location code. Use trim_trail_space() on the location to ensure that the + device location code is properly compared with the indicator list. + + Signed-off-by: Sathvika Vasireddy + Signed-off-by: Mahesh Salgaonkar + +diff --git a/lpd/lp_diag.c b/lpd/lp_diag.c +index 988a021..e6f5d3c 100644 +--- a/lpd/lp_diag.c ++++ b/lpd/lp_diag.c +@@ -35,6 +35,7 @@ + #include "servicelog.h" + #include "indicator.h" + #include "lp_util.h" ++#include "utils.h" + + /* FRU callout priority as defined in PAPR+ + * +@@ -344,6 +345,8 @@ event_fru_callout(struct sl_callout *callouts, struct loc_code *list, + /* get FRUs nearest fault indicator */ + strncpy(location, callout->location, LOCATION_LENGTH); + location[LOCATION_LENGTH - 1] = '\0'; ++ trim_trail_space(location); ++ + loc_led = get_fru_indicator(list, location, &truncated); + if (!loc_led) { /* No indicator found for the given loc code */ + *attn_on = 1; diff --git a/SPECS/ppc64-diag.spec b/SPECS/ppc64-diag.spec index 75e70a3..4cde2ee 100644 --- a/SPECS/ppc64-diag.spec +++ b/SPECS/ppc64-diag.spec @@ -3,7 +3,7 @@ Name: ppc64-diag Version: 2.7.9 -Release: 1%{?dist} +Release: 3%{?dist} Summary: PowerLinux Platform Diagnostics URL: https://github.com/power-ras/ppc64-diag Group: System Environment/Base @@ -39,6 +39,16 @@ Source5: rtas_errd.8 # fix paths and permissions Patch0: ppc64-diag-2.7.9-fedora.patch +# upstream fixes +# rtas_errd: Handle multiple platform dumps +Patch10: ppc64-diag-2.7.9-handle_multiple_platform_dumps.patch +# ppc64-diag/lp_diag: Enable light path diagnostics for RTAS events +Patch11: ppc64-diag-2.7.9-moving-trim_trail_space-function.patch +Patch12: ppc64-diag-2.7.9-utilize-trim_trail_space-in-event_fru_callout.patch +Patch13: ppc64-diag-2.7.9-enable-light-path-diagnostics-for-RTAS-events.patch +# call_home command "diag_nvme" fails on nvmf drive(nvmf/lpfc/Power10) +Patch14: ppc64-diag-2.7.9-call_home-fail-on-nvmf-device + %description This package contains various diagnostic tools for PowerLinux. These tools captures the diagnostic events from Power Systems @@ -173,6 +183,15 @@ if [ "$1" = "0" ]; then # last uninstall fi %changelog +* Wed Jan 31 2024 Than Ngo - 2.7.9-3 +- call_home command "diag_nvme" fails on nvmf drive + Resolves: RHEL-23437 + +* Sun Dec 10 2023 Than Ngo - 2.7.9-2 +- Enable light path diagnostics for RTAS events +- Handle multiple platform dumps + Resolves: RHEL-11454 + * Wed Oct 19 2022 Than Ngo - 2.7.9-1 - Resolves: #2114591, rebase to 2.7.9