import CS ppc64-diag-2.7.9-3.el8

This commit is contained in:
eabdullin 2024-04-10 17:55:52 +00:00
parent 7ca424e2ff
commit bc0dc46c42
6 changed files with 427 additions and 1 deletions

View File

@ -0,0 +1,71 @@
commit db0c6d7974d7f8909878384d77ec02457759d6df
Author: Nilay Shroff <nilay@linux.ibm.com>
Date: Tue Jan 16 13:55:03 2024 +0530
diags/diag_nvme: call_home command fails on nvmf drive
The diag_nvme command needs to retrieve the VPD log page from NVMe for
filling in the product data while generating the call-home event.
However, call-home feature is supported for directly attached NVMe
module. In the current diag_nvme implementation, if user doesn't
provide NVMe device name for diagnostics then it(diag_nvme) loops
through each NVMe moudle (directly connected to the system/LPAR as
well as discovered over fabrics) and attempt retrieving the SMART log
page as well as VPD page. Unfortunately, diag_nvme fails to retrieve
the VPD page for NVMe connected over fabrics and that causes the
diag_nvme to print "not-so-nice" failure messages on console.
Henec fixed the diag_nvme code so that for call-home event reporting,
it skips the NVMe which is connected over fabrics and prints a
"nice-message" informing the user that it's skipping diagnosting for
NVMe module connected over fabrics. In a nutshell, with this fix now
diag_nvme would only diagnose the NVMe module which is directtly
attached (over PCIe) to the system.
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index c1c0a20..e86786c 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -375,9 +375,40 @@ static int diagnose_nvme(char *device_name, struct notify *notify, char *file_pa
char endurance_s[sizeof(vpd.endurance) + 1], capacity_s[sizeof(vpd.capacity)+1];
uint64_t event_id;
uint8_t severity;
+ FILE *fp;
+ char tr_file_path[PATH_MAX];
uint32_t raw_data_len = 0;
unsigned char *raw_data = NULL;
+ /*
+ * Skip diag test if NVMe is connected over fabric
+ */
+ snprintf(tr_file_path, sizeof(tr_file_path),
+ NVME_SYS_PATH"/%s/%s", device_name, "transport");
+ fp = fopen(tr_file_path, "r");
+ if (fp) {
+ char buf[12];
+ int n = fread(buf, 1, sizeof(buf), fp);
+
+ if (n) {
+ /*
+ * If NVMe transport is anything but pcie then skip the diag test
+ */
+ if (strncmp(buf, "pcie", 4) != 0) {
+ fprintf(stdout, "Skipping diagnostics for nvmf : %s\n",
+ device_name);
+ fclose(fp);
+ return 0;
+ }
+ }
+ fclose(fp);
+ } else {
+ fprintf(stderr, "Skipping diagnostics for %s:\n"
+ "Unable to find the nvme transport type\n",
+ device_name);
+ return -1;
+ }
+
tmp_rc = regex_controller(controller_name, device_name);
if (tmp_rc != 0)
return -1;

View File

@ -0,0 +1,63 @@
commit 0fa486dbe800bea05c81fc33eee197873573fefb
Author: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Fri Sep 8 12:35:14 2023 +0530
ppc64-diag/lp_diag: Enable light path diagnostics for RTAS events
Currently, Light Path Diagnostics support is enabled only for OS and
Enclosure type events. Enable light path diagnostics support for RTAS
type events by turning on only the high priority FRU callouts.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
diff --git a/lpd/lp_diag.c b/lpd/lp_diag.c
index e6f5d3c..e67db02 100644
--- a/lpd/lp_diag.c
+++ b/lpd/lp_diag.c
@@ -37,6 +37,8 @@
#include "lp_util.h"
#include "utils.h"
+static int rtas_event;
+
/* FRU callout priority as defined in PAPR+
*
* Note: Order of the priority is important!
@@ -173,8 +175,10 @@ service_event_supported(struct sl_event *event)
return 0;
}
break;
- case SL_TYPE_BMC:
case SL_TYPE_RTAS:
+ rtas_event = 1;
+ break;
+ case SL_TYPE_BMC:
case SL_TYPE_BASIC:
default:
return 0;
@@ -446,14 +450,20 @@ parse_service_event(int event_id)
attn_loc = &list[0];
if (operating_mode == LED_MODE_LIGHT_PATH) {
- if (event->callouts)
+ if (event->callouts) {
/* Run over FRU callout priority in order and
* enable fault indicator
*/
- for (i = 0; FRU_CALLOUT_PRIORITY[i]; i++)
+ if (!rtas_event) {
+ for (i = 0; FRU_CALLOUT_PRIORITY[i]; i++)
+ rc = event_fru_callout(event->callouts, list,
+ FRU_CALLOUT_PRIORITY[i],
+ &attn_on);
+ } else {
rc = event_fru_callout(event->callouts, list,
- FRU_CALLOUT_PRIORITY[i],
- &attn_on);
+ 'H', &attn_on);
+ }
+ }
else {
/* No callout list, enable check log indicator */
indicator_log_write("Empty callout list");

View File

@ -0,0 +1,100 @@
commit d05654e5ec6f37cf6caa491fc7d95b336f9603e2
Author: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon Jul 10 13:43:21 2023 +0530
rtas_errd: Handle multiple platform dumps
Currently, whenever a new dump arrives, old dump file of that specific dump
type is removed before writing the new dump out. Any dump file with the
same prefix (dump type) gets deleted. This means only one set of dump files
is saved, since only one dump file per dump type is saved.
Handle multiple dumps on Linux by allowing as many dumps to be offloaded
until disk space is available. To do this, remove the function that checks
for prefix size and removes old dump files. In the event of not enough
disk space available, log an error to the user along with the dump tag.
User will free up space and run extract_platdump tool using the dump tag
provided in the error message to offload the dump. Error log can be viewed
by the user by issuing 'journalctl -p err -t rtas_errd' command.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
diff --git a/rtas_errd/dump.c b/rtas_errd/dump.c
index cc50d91..494c322 100644
--- a/rtas_errd/dump.c
+++ b/rtas_errd/dump.c
@@ -30,8 +30,10 @@
#include <fcntl.h>
#include <librtas.h>
#include <librtasevent.h>
+#include <syslog.h>
#include <sys/stat.h>
#include <sys/wait.h>
+#include <sys/statvfs.h>
#include "utils.h"
#include "rtas_errd.h"
@@ -284,7 +286,9 @@ void
check_platform_dump(struct event *event)
{
struct rtas_dump_scn *dump_scn;
+ struct statvfs vfs;
uint64_t dump_tag;
+ uint64_t dump_size;
char filename[DUMP_MAX_FNAME_LEN + 20], *pos;
char *pathname = NULL;
FILE *f;
@@ -306,11 +310,34 @@ check_platform_dump(struct event *event)
return;
}
- /* Retrieve the dump */
+ /* Retrieve the dump tag */
dump_tag = dump_scn->id;
dump_tag |= ((uint64_t)dump_scn->v6hdr.subtype << 32);
dbg("Dump ID: 0x%016LX", dump_tag);
+ if (statvfs(d_cfg.platform_dump_path, &vfs) == -1) {
+ log_msg(event, "statvfs() failed on %s: %s",
+ d_cfg.platform_dump_path, strerror(errno));
+ return;
+ }
+
+ /* Retrieve the size of the platform dump */
+ dump_size = dump_scn->size_hi;
+ dump_size <<= 32;
+ dump_size |= dump_scn->size_lo;
+
+ /* Check if there is sufficient space in the file system to store the dump */
+ if (vfs.f_bavail * vfs.f_frsize < dump_size) {
+ syslog(LOG_ERR, "Insufficient space in %s to store platform dump for dump ID: "
+ "0x%016lX (required: %lu bytes, available: %lu bytes)",
+ d_cfg.platform_dump_path, dump_tag, dump_size,
+ (vfs.f_bavail * vfs.f_frsize));
+ syslog(LOG_ERR, "After clearing space, run 'extract_platdump "
+ "0x%016lX'.\n", dump_tag);
+ return;
+ }
+
+ /* Retrieve the dump */
snprintf(tmp_sys_arg, 60, "0x%016LX", (long long unsigned int)dump_tag);
system_args[0] = EXTRACT_PLATDUMP_CMD;
system_args[1] = tmp_sys_arg;
diff --git a/rtas_errd/extract_platdump.c b/rtas_errd/extract_platdump.c
index fbe65b2..831e57e 100644
--- a/rtas_errd/extract_platdump.c
+++ b/rtas_errd/extract_platdump.c
@@ -290,12 +290,6 @@ extract_platform_dump(uint64_t dump_tag)
}
}
- /*
- * Before writing the new dump out, we need to see if any older
- * dumps need to be removed first
- */
- remove_old_dumpfiles(filename, prefix_size);
-
/* Copy the dump off to the filesystem */
pathname[0] = '\0';
strcpy(pathname, d_cfg.platform_dump_path);

View File

@ -0,0 +1,136 @@
commit c507319d1b5f0286d67e08a3598949ca4144f475
Author: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Fri Sep 8 12:35:12 2023 +0530
ppc64-diag: Move trim_trail_space() function to common/utils.c
Currently, trim_trail_space() function is used in diags/diag_nvme.c file
to be able to trim trailing white spaces from a given location code. Allow
code reusability by moving the trim_trail_space() function from
diags/diag_nvme.c to common/utils.c.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
diff --git a/common/utils.c b/common/utils.c
index 0312943..2349878 100644
--- a/common/utils.c
+++ b/common/utils.c
@@ -24,9 +24,34 @@
#include <fcntl.h>
#include <string.h>
#include <assert.h>
+#include <ctype.h>
#include "utils.h"
+/* trim_trail_space - Trim trailing white spaces from string
+ * @string - Null terminated string to remove white spaces from
+ *
+ * This function will alter the passed string by removing any trailing white spaces and null
+ * terminating it at that point.
+ */
+void trim_trail_space(char *string)
+{
+ char *end;
+ size_t length;
+
+ if (string == NULL)
+ return;
+
+ length = strlen(string);
+ if (length == 0)
+ return;
+
+ end = string + length - 1;
+ while (end >= string && isspace(*end))
+ end--;
+ *(end + 1) = '\0';
+}
+
static int process_child(char *argv[], int pipefd[])
{
int nullfd;
diff --git a/common/utils.h b/common/utils.h
index ec2072d..2459b5b 100644
--- a/common/utils.h
+++ b/common/utils.h
@@ -18,6 +18,7 @@
#ifndef UTILS_H
#define UTILS_H
+void trim_trail_space(char *string);
FILE *spopen(char **, pid_t *);
int spclose(FILE *, pid_t);
diff --git a/diags/Makefile.am b/diags/Makefile.am
index 4ac81b8..dea0a79 100644
--- a/diags/Makefile.am
+++ b/diags/Makefile.am
@@ -13,7 +13,8 @@ encl_led_h_files = diags/encl_led.h \
$(diag_common_h_files)
diag_nvme_h_files = diags/diag_nvme.h \
- common/platform.h
+ common/platform.h \
+ common/utils.h
sbin_PROGRAMS += diags/diag_encl diags/encl_led diags/diag_nvme
@@ -41,6 +42,7 @@ diags_encl_led_SOURCES = diags/encl_led.c \
diags_diag_nvme_SOURCES = diags/diag_nvme.c \
common/platform.c \
+ common/utils.c \
$(diag_nvme_h_files)
diags_diag_nvme_LDADD = -lservicelog -lm
diags_diag_nvme_CFLAGS = $(AM_CFLAGS) -Wno-stringop-truncation
diff --git a/diags/diag_nvme.c b/diags/diag_nvme.c
index 2a78034..2606f2c 100644
--- a/diags/diag_nvme.c
+++ b/diags/diag_nvme.c
@@ -27,6 +27,7 @@
#include <sys/utsname.h>
#include "diag_nvme.h"
#include "platform.h"
+#include "utils.h"
#define ITEM_DATA_LENGTH 255
#define MIN_HOURS_ON 720
@@ -71,7 +72,6 @@ static int raw_data_smart(unsigned char **raw_data, uint32_t *raw_data_len, stru
static int raw_data_vpd(unsigned char **raw_data, uint32_t *raw_data_len, struct nvme_ibm_vpd *vpd);
static int regex_controller(char *controller_name, char *device_name);
static void set_notify(struct notify *notify, struct dictionary *dict, int num_elements);
-static void trim_trail_space(char *string);
static long double uint128_to_long_double(uint8_t *data);
int main(int argc, char *argv[]) {
@@ -1426,28 +1426,6 @@ extern void set_vpd_pcie_field(const char *keyword, const char *vpd_data, struct
strncpy(vpd->firmware_level, vpd_data, sizeof(vpd->firmware_level));
}
-/* trim_trail_space - Trim trailing white spaces from string
- * @string - Null terminated string to remove white spaces from
- *
- * This function will alter the passed string by removing any trailing white spaces and null
- * terminating it at that point.
- */
-static void trim_trail_space(char *string) {
- char *end;
- size_t length;
-
- if (string == NULL)
- return;
-
- if ((length = strlen(string)) == 0)
- return;
-
- end = string + length - 1;
- while (end >= string && isspace(*end))
- end--;
- *(end + 1) = '\0';
-}
-
static long double uint128_to_long_double(uint8_t *data) {
int i;
long double value = 0;

View File

@ -0,0 +1,37 @@
commit 476b0af7516b86c4d98cfa229fb0c6b856eea31d
Author: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Fri Sep 8 12:35:13 2023 +0530
ppc64-diag/lp_diag: Utilize trim_trail_space() function in event_fru_callout()
Update the event_fru_callout() function to use the trim_trail_space()
function to be able to remove any trailing spaces from the location code.
This change aims to address an issue where the presence of trailing spaces
in the location code results in failure to find an indicator for the given
location code. Use trim_trail_space() on the location to ensure that the
device location code is properly compared with the indicator list.
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
diff --git a/lpd/lp_diag.c b/lpd/lp_diag.c
index 988a021..e6f5d3c 100644
--- a/lpd/lp_diag.c
+++ b/lpd/lp_diag.c
@@ -35,6 +35,7 @@
#include "servicelog.h"
#include "indicator.h"
#include "lp_util.h"
+#include "utils.h"
/* FRU callout priority as defined in PAPR+
*
@@ -344,6 +345,8 @@ event_fru_callout(struct sl_callout *callouts, struct loc_code *list,
/* get FRUs nearest fault indicator */
strncpy(location, callout->location, LOCATION_LENGTH);
location[LOCATION_LENGTH - 1] = '\0';
+ trim_trail_space(location);
+
loc_led = get_fru_indicator(list, location, &truncated);
if (!loc_led) { /* No indicator found for the given loc code */
*attn_on = 1;

View File

@ -3,7 +3,7 @@
Name: ppc64-diag
Version: 2.7.9
Release: 1%{?dist}
Release: 3%{?dist}
Summary: PowerLinux Platform Diagnostics
URL: https://github.com/power-ras/ppc64-diag
Group: System Environment/Base
@ -39,6 +39,16 @@ Source5: rtas_errd.8
# fix paths and permissions
Patch0: ppc64-diag-2.7.9-fedora.patch
# upstream fixes
# rtas_errd: Handle multiple platform dumps
Patch10: ppc64-diag-2.7.9-handle_multiple_platform_dumps.patch
# ppc64-diag/lp_diag: Enable light path diagnostics for RTAS events
Patch11: ppc64-diag-2.7.9-moving-trim_trail_space-function.patch
Patch12: ppc64-diag-2.7.9-utilize-trim_trail_space-in-event_fru_callout.patch
Patch13: ppc64-diag-2.7.9-enable-light-path-diagnostics-for-RTAS-events.patch
# call_home command "diag_nvme" fails on nvmf drive(nvmf/lpfc/Power10)
Patch14: ppc64-diag-2.7.9-call_home-fail-on-nvmf-device
%description
This package contains various diagnostic tools for PowerLinux.
These tools captures the diagnostic events from Power Systems
@ -173,6 +183,15 @@ if [ "$1" = "0" ]; then # last uninstall
fi
%changelog
* Wed Jan 31 2024 Than Ngo <than@redhat.com> - 2.7.9-3
- call_home command "diag_nvme" fails on nvmf drive
Resolves: RHEL-23437
* Sun Dec 10 2023 Than Ngo <than@redhat.com> - 2.7.9-2
- Enable light path diagnostics for RTAS events
- Handle multiple platform dumps
Resolves: RHEL-11454
* Wed Oct 19 2022 Than Ngo <than@redhat.com> - 2.7.9-1
- Resolves: #2114591, rebase to 2.7.9