Compare commits

...

No commits in common. "c8" and "c8-beta" have entirely different histories.
c8 ... c8-beta

9 changed files with 2233 additions and 428 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,85 @@
From da45fc39390208c30b3ba656ccfb478e217b7401 Mon Sep 17 00:00:00 2001
From: "Milan P. Gandhi" <mgandhi@redhat.com>
Date: Mon, 17 Oct 2022 14:23:54 +0530
Subject: [PATCH 1/3] scsiprint.cpp: Attempted fix to tickets 1272, 1331 and
1346: Log sub-page handling
---
smartmontools-7.1/scsiprint.cpp | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)
diff --git a/smartmontools-7.1/scsiprint.cpp b/smartmontools-7.1/scsiprint.cpp
index 4c52268..1edb7c2 100644
--- a/smartmontools-7.1/scsiprint.cpp
+++ b/smartmontools-7.1/scsiprint.cpp
@@ -118,8 +118,10 @@ static void
scsiGetSupportedLogPages(scsi_device * device)
{
bool got_subpages = false;
- int k, bump, err, payload_len, num_unreported, num_unreported_spg;
- int payload_len_pg0_0 = 0;
+ int k, bump, err, resp_len, num_unreported, num_unreported_spg;
+ int resp_len_pg0_0 = 0;
+ int resp_len_pg0_ff = 0; /* in SPC-4, response length of supported
+ * log pages _and_ log subpages */
const uint8_t * up;
uint8_t sup_lpgs[LOG_RESP_LEN];
@@ -143,7 +145,7 @@ scsiGetSupportedLogPages(scsi_device * device)
(scsi_version <= SCSI_VERSION_HIGHEST)) {
/* unclear what code T10 will choose for SPC-6 */
memcpy(sup_lpgs, gBuf, LOG_RESP_LEN);
- payload_len_pg0_0 = sup_lpgs[3];
+ resp_len_pg0_0 = sup_lpgs[3];
if ((err = scsiLogSense(device, SUPPORTED_LPAGES, SUPP_SPAGE_L_SPAGE,
gBuf, LOG_RESP_LONG_LEN,
-1 /* just single not double fetch */))) {
@@ -160,33 +162,38 @@ scsiGetSupportedLogPages(scsi_device * device)
if (scsi_debugmode > 0)
pout("%s supported subpages is bad SPF=%u SUBPG=%u\n",
logSenRspStr, !! (0x40 & gBuf[0]), gBuf[2]);
- } else
+ } else {
+ resp_len_pg0_ff = sg_get_unaligned_be16(gBuf + 2);
got_subpages = true;
+ }
}
- } else
+ } else {
memcpy(sup_lpgs, gBuf, LOG_RESP_LEN);
+ resp_len_pg0_0 = sup_lpgs[3];
+ }
if (got_subpages) {
- payload_len = sg_get_unaligned_be16(gBuf + 2);
- if (payload_len <= payload_len_pg0_0) {
+ resp_len = sg_get_unaligned_be16(gBuf + 2);
+ if (resp_len_pg0_ff <= resp_len_pg0_0) {
/* something is rotten ....., ignore SUPP_SPAGE_L_SPAGE */
- payload_len = payload_len_pg0_0;
+ resp_len = resp_len_pg0_0;
bump = 1;
up = sup_lpgs + LOGPAGEHDRSIZE;
got_subpages = false;
(void)got_subpages; // not yet used below, suppress warning
} else {
+ resp_len = resp_len_pg0_ff;
bump = 2;
up = gBuf + LOGPAGEHDRSIZE;
}
} else {
- payload_len = payload_len_pg0_0;
+ resp_len = resp_len_pg0_0;
bump = 1;
up = sup_lpgs + LOGPAGEHDRSIZE;
}
num_unreported_spg = 0;
- for (num_unreported = 0, k = 0; k < payload_len; k += bump, up += bump) {
+ for (num_unreported = 0, k = 0; k < resp_len; k += bump, up += bump) {
uint8_t pg_num = 0x3f & up[0];
uint8_t sub_pg_num = (0x40 & up[0]) ? up[1] : 0;
--
2.35.1

View File

@ -0,0 +1,185 @@
From b6064d5ba30ee355e71e7543fdb66ea99fcebae4 Mon Sep 17 00:00:00 2001
From: "Milan P. Gandhi" <mgandhi@redhat.com>
Date: Mon, 17 Oct 2022 14:24:42 +0530
Subject: [PATCH 2/3] scsiprint.cpp: Add 'Accumulated power on time' field to
'smartctl -a'
---
smartmontools-7.1/scsiprint.cpp | 82 ++++++++++++++++++++++-----------
1 file changed, 56 insertions(+), 26 deletions(-)
diff --git a/smartmontools-7.1/scsiprint.cpp b/smartmontools-7.1/scsiprint.cpp
index 1edb7c2..81bed88 100644
--- a/smartmontools-7.1/scsiprint.cpp
+++ b/smartmontools-7.1/scsiprint.cpp
@@ -120,7 +120,7 @@ scsiGetSupportedLogPages(scsi_device * device)
bool got_subpages = false;
int k, bump, err, resp_len, num_unreported, num_unreported_spg;
int resp_len_pg0_0 = 0;
- int resp_len_pg0_ff = 0; /* in SPC-4, response length of supported
+ int resp_len_pg0_ff = 0; /* in SPC-4, response length of supported
* log pages _and_ log subpages */
const uint8_t * up;
uint8_t sup_lpgs[LOG_RESP_LEN];
@@ -163,13 +163,13 @@ scsiGetSupportedLogPages(scsi_device * device)
pout("%s supported subpages is bad SPF=%u SUBPG=%u\n",
logSenRspStr, !! (0x40 & gBuf[0]), gBuf[2]);
} else {
- resp_len_pg0_ff = sg_get_unaligned_be16(gBuf + 2);
+ resp_len_pg0_ff = sg_get_unaligned_be16(gBuf + 2);
got_subpages = true;
- }
+ }
}
} else {
memcpy(sup_lpgs, gBuf, LOG_RESP_LEN);
- resp_len_pg0_0 = sup_lpgs[3];
+ resp_len_pg0_0 = sup_lpgs[3];
}
if (got_subpages) {
@@ -182,7 +182,7 @@ scsiGetSupportedLogPages(scsi_device * device)
got_subpages = false;
(void)got_subpages; // not yet used below, suppress warning
} else {
- resp_len = resp_len_pg0_ff;
+ resp_len = resp_len_pg0_ff;
bump = 2;
up = gBuf + LOGPAGEHDRSIZE;
}
@@ -1162,12 +1162,14 @@ static const char * reassign_status[] = {
// Returns 0 if ok else FAIL* bitmask. Note can have a status entry
// and up to 2048 events (although would hope to have less). May set
// FAILLOG if serious errors detected (in the future).
+// When only_pow_time is true only print "Accumulated power on time"
+// data, if available.
static int
-scsiPrintBackgroundResults(scsi_device * device)
+scsiPrintBackgroundResults(scsi_device * device, bool only_pow_time)
{
+ bool noheader = true;
+ bool firstresult = true;
int num, j, m, err, truncated;
- int noheader = 1;
- int firstresult = 1;
int retval = 0;
uint8_t * ucp;
static const char * hname = "Background scan results";
@@ -1188,9 +1190,12 @@ scsiPrintBackgroundResults(scsi_device * device)
// compute page length
num = sg_get_unaligned_be16(gBuf + 2) + 4;
if (num < 20) {
- print_on();
- pout("%s %s length is %d, no scan status\n", hname, logSenStr, num);
- print_off();
+ if (! only_pow_time) {
+ print_on();
+ pout("%s %s length is %d, no scan status\n", hname, logSenStr,
+ num);
+ print_off();
+ }
return FAILSMART;
}
truncated = (num > LOG_RESP_LONG_LEN) ? num : 0;
@@ -1205,22 +1210,32 @@ scsiPrintBackgroundResults(scsi_device * device)
switch (pc) {
case 0:
if (noheader) {
- noheader = 0;
- pout("%s log\n", hname);
+ noheader = false;
+ if (! only_pow_time)
+ pout("%s log\n", hname);
}
- pout(" Status: ");
+ if (! only_pow_time)
+ pout(" Status: ");
if ((pl < 16) || (num < 16)) {
- pout("\n");
+ if (! only_pow_time)
+ pout("\n");
break;
}
j = ucp[9];
- if (j < (int)(sizeof(bms_status) / sizeof(bms_status[0])))
- pout("%s\n", bms_status[j]);
- else
- pout("unknown [0x%x] background scan status value\n", j);
+ if (! only_pow_time) {
+ if (j < (int)(sizeof(bms_status) / sizeof(bms_status[0])))
+ pout("%s\n", bms_status[j]);
+ else
+ pout("unknown [0x%x] background scan status value\n", j);
+ }
j = sg_get_unaligned_be32(ucp + 4);
- pout(" Accumulated power on time, hours:minutes %d:%02d "
- "[%d minutes]\n", (j / 60), (j % 60), j);
+ pout("%sAccumulated power on time, hours:minutes %d:%02d",
+ (only_pow_time ? "" : " "), (j / 60), (j % 60));
+ if (only_pow_time) {
+ pout("\n");
+ break;
+ } else
+ pout(" [%d minutes]\n", j);
jglb["power_on_time"]["hours"] = j / 60;
jglb["power_on_time"]["minutes"] = j % 60;
pout(" Number of background scans performed: %d, ",
@@ -1232,9 +1247,12 @@ scsiPrintBackgroundResults(scsi_device * device)
break;
default:
if (noheader) {
- noheader = 0;
- pout("\n%s log\n", hname);
+ noheader = false;
+ if (! only_pow_time)
+ pout("\n%s log\n", hname);
}
+ if (only_pow_time)
+ break;
if (firstresult) {
firstresult = 0;
pout("\n # when lba(hex) [sk,asc,ascq] "
@@ -1262,10 +1280,11 @@ scsiPrintBackgroundResults(scsi_device * device)
num -= pl;
ucp += pl;
}
- if (truncated)
+ if (truncated && (! only_pow_time))
pout(" >>>> log truncated, fetched %d of %d available "
"bytes\n", LOG_RESP_LONG_LEN, truncated);
- pout("\n");
+ if (! only_pow_time)
+ pout("\n");
return retval;
}
@@ -2447,6 +2466,17 @@ scsiPrintMain(scsi_device * device, const scsi_print_options & options)
scsiGetSupportedLogPages(device);
if (gTempLPage)
scsiPrintTemp(device);
+ }
+ // in the 'smartctl -a" case only want: "Accumulated power on time"
+ if ((! options.smart_background_log) && is_disk) {
+ if (! checkedSupportedLogPages)
+ scsiGetSupportedLogPages(device);
+ res = 0;
+ if (gBackgroundResultsLPage)
+ res = scsiPrintBackgroundResults(device, true);
+ any_output = true;
+ }
+ if (options.smart_vendor_attrib) {
if (gStartStopLPage)
scsiGetStartStopData(device);
if (is_disk) {
@@ -2488,7 +2518,7 @@ scsiPrintMain(scsi_device * device, const scsi_print_options & options)
scsiGetSupportedLogPages(device);
res = 0;
if (gBackgroundResultsLPage)
- res = scsiPrintBackgroundResults(device);
+ res = scsiPrintBackgroundResults(device, false);
else {
pout("Device does not support Background scan results logging\n");
failuretest(OPTIONAL_CMD, returnval|=FAILSMART);
--
2.35.1

View File

@ -0,0 +1,231 @@
From 7c207dd5d06efccdee7258f832d4216fe5d1d998 Mon Sep 17 00:00:00 2001
From: "Milan P. Gandhi" <mgandhi@redhat.com>
Date: Mon, 17 Oct 2022 14:25:34 +0530
Subject: [PATCH 3/3] scsiprint.cpp: applied patch proposed by Yannick Hemery
to merge both 'supported' log pages
---
smartmontools-7.1/scsicmds.h | 5 ++
smartmontools-7.1/scsiprint.cpp | 103 +++++++++++++++++---------------
2 files changed, 59 insertions(+), 49 deletions(-)
diff --git a/smartmontools-7.1/scsicmds.h b/smartmontools-7.1/scsicmds.h
index 516f773..9bd8b21 100644
--- a/smartmontools-7.1/scsicmds.h
+++ b/smartmontools-7.1/scsicmds.h
@@ -167,6 +167,11 @@ struct scsi_readcap_resp {
uint16_t l_a_lba; /* Lowest Aligned Logical Block Address */
};
+struct scsi_supp_log_pages {
+ uint8_t page_code;
+ uint8_t subpage_code;
+};
+
/* SCSI Peripheral types (of interest) */
#define SCSI_PT_DIRECT_ACCESS 0x0
#define SCSI_PT_SEQUENTIAL_ACCESS 0x1
diff --git a/smartmontools-7.1/scsiprint.cpp b/smartmontools-7.1/scsiprint.cpp
index 81bed88..21a4929 100644
--- a/smartmontools-7.1/scsiprint.cpp
+++ b/smartmontools-7.1/scsiprint.cpp
@@ -39,6 +39,9 @@ uint8_t gBuf[GBUF_SIZE];
#define LOG_RESP_LONG_LEN ((62 * 256) + 252)
#define LOG_RESP_TAPE_ALERT_LEN 0x144
+/* Supported log pages + Supported log pages and subpages maximum count */
+#define SCSI_SUPP_LOG_PAGES_MAX_COUNT (252 + (62 * 128) + 126)
+
/* Log pages supported */
static bool gSmartLPage = false; /* Informational Exceptions log page */
static bool gTempLPage = false;
@@ -118,14 +121,17 @@ static void
scsiGetSupportedLogPages(scsi_device * device)
{
bool got_subpages = false;
- int k, bump, err, resp_len, num_unreported, num_unreported_spg;
- int resp_len_pg0_0 = 0;
- int resp_len_pg0_ff = 0; /* in SPC-4, response length of supported
- * log pages _and_ log subpages */
+ int k, err, resp_len, num_unreported, num_unreported_spg;
+ int supp_lpg_and_spg_count = 0;
+
const uint8_t * up;
uint8_t sup_lpgs[LOG_RESP_LEN];
+ struct scsi_supp_log_pages supp_lpg_and_spg[SCSI_SUPP_LOG_PAGES_MAX_COUNT];
memset(gBuf, 0, LOG_RESP_LEN);
+ memset(supp_lpg_and_spg, 0, sizeof(supp_lpg_and_spg));
+
+ /* Get supported log pages */
if ((err = scsiLogSense(device, SUPPORTED_LPAGES, 0, gBuf,
LOG_RESP_LEN, 0 /* do double fetch */))) {
if (scsi_debugmode > 0)
@@ -140,12 +146,23 @@ scsiGetSupportedLogPages(scsi_device * device)
logSenStr, scsiErrString(err));
if (err)
return;
- memcpy(sup_lpgs, gBuf, LOG_RESP_LEN);
- } else if ((scsi_version >= SCSI_VERSION_SPC_4) &&
- (scsi_version <= SCSI_VERSION_HIGHEST)) {
+ }
+
+ memcpy(sup_lpgs, gBuf, LOG_RESP_LEN);
+ resp_len = gBuf[3];
+ up = gBuf + LOGPAGEHDRSIZE;
+
+ for (k = 0; k < resp_len; k += 1) {
+ uint8_t page_code = 0x3f & up[k];
+ supp_lpg_and_spg[supp_lpg_and_spg_count++] = {page_code, 0};
+ }
+
+ /* Get supported log pages and subpages. Most drives seems to include the
+ supported log pages here as well, but some drives such as the Samsung
+ PM1643a will only report the additional log pages with subpages here */
+ if ((scsi_version >= SCSI_VERSION_SPC_4) &&
+ (scsi_version <= SCSI_VERSION_HIGHEST)) {
/* unclear what code T10 will choose for SPC-6 */
- memcpy(sup_lpgs, gBuf, LOG_RESP_LEN);
- resp_len_pg0_0 = sup_lpgs[3];
if ((err = scsiLogSense(device, SUPPORTED_LPAGES, SUPP_SPAGE_L_SPAGE,
gBuf, LOG_RESP_LONG_LEN,
-1 /* just single not double fetch */))) {
@@ -153,6 +170,7 @@ scsiGetSupportedLogPages(scsi_device * device)
pout("%s for supported pages and subpages failed [%s]\n",
logSenStr, scsiErrString(err));
} else {
+ /* Ensure we didn't get the same answer than without the subpages */
if (0 == memcmp(gBuf, sup_lpgs, LOG_RESP_LEN)) {
if (scsi_debugmode > 0)
pout("%s: %s ignored subpage field, bad\n",
@@ -163,48 +181,34 @@ scsiGetSupportedLogPages(scsi_device * device)
pout("%s supported subpages is bad SPF=%u SUBPG=%u\n",
logSenRspStr, !! (0x40 & gBuf[0]), gBuf[2]);
} else {
- resp_len_pg0_ff = sg_get_unaligned_be16(gBuf + 2);
got_subpages = true;
}
}
- } else {
- memcpy(sup_lpgs, gBuf, LOG_RESP_LEN);
- resp_len_pg0_0 = sup_lpgs[3];
}
if (got_subpages) {
resp_len = sg_get_unaligned_be16(gBuf + 2);
- if (resp_len_pg0_ff <= resp_len_pg0_0) {
- /* something is rotten ....., ignore SUPP_SPAGE_L_SPAGE */
- resp_len = resp_len_pg0_0;
- bump = 1;
- up = sup_lpgs + LOGPAGEHDRSIZE;
- got_subpages = false;
- (void)got_subpages; // not yet used below, suppress warning
- } else {
- resp_len = resp_len_pg0_ff;
- bump = 2;
- up = gBuf + LOGPAGEHDRSIZE;
+ up = gBuf + LOGPAGEHDRSIZE;
+ for (k = 0; k < resp_len; k += 2) {
+ uint8_t page_code = 0x3f & up[k];
+ uint8_t subpage_code = up[k+1];
+ supp_lpg_and_spg[supp_lpg_and_spg_count++] = {page_code, subpage_code};
}
- } else {
- resp_len = resp_len_pg0_0;
- bump = 1;
- up = sup_lpgs + LOGPAGEHDRSIZE;
}
+ num_unreported = 0;
num_unreported_spg = 0;
- for (num_unreported = 0, k = 0; k < resp_len; k += bump, up += bump) {
- uint8_t pg_num = 0x3f & up[0];
- uint8_t sub_pg_num = (0x40 & up[0]) ? up[1] : 0;
+ for (k = 0; k < supp_lpg_and_spg_count; k += 1) {
+ struct scsi_supp_log_pages supp_lpg = supp_lpg_and_spg[k];
- switch (pg_num)
+ switch (supp_lpg.page_code)
{
case SUPPORTED_LPAGES:
- if (! ((NO_SUBPAGE_L_SPAGE == sub_pg_num) ||
- (SUPP_SPAGE_L_SPAGE == sub_pg_num))) {
+ if (! ((NO_SUBPAGE_L_SPAGE == supp_lpg.subpage_code) ||
+ (SUPP_SPAGE_L_SPAGE == supp_lpg.subpage_code))) {
if (scsi_debugmode > 1)
pout("%s: Strange Log page number: 0x0,0x%x\n",
- __func__, sub_pg_num);
+ __func__, supp_lpg.subpage_code);
}
break;
case READ_ERROR_COUNTER_LPAGE:
@@ -223,13 +227,13 @@ scsiGetSupportedLogPages(scsi_device * device)
gNonMediumELPage = true;
break;
case TEMPERATURE_LPAGE:
- if (NO_SUBPAGE_L_SPAGE == sub_pg_num)
+ if (NO_SUBPAGE_L_SPAGE == supp_lpg.subpage_code)
gTempLPage = true;
- else if (ENVIRO_REP_L_SPAGE == sub_pg_num)
+ else if (ENVIRO_REP_L_SPAGE == supp_lpg.subpage_code)
gEnviroReportingLPage = true;
- else if (ENVIRO_LIMITS_L_SPAGE == sub_pg_num)
+ else if (ENVIRO_LIMITS_L_SPAGE == supp_lpg.subpage_code)
gEnviroLimitsLPage = true;
- else if (SUPP_SPAGE_L_SPAGE != sub_pg_num) {
+ else if (SUPP_SPAGE_L_SPAGE != supp_lpg.subpage_code) {
++num_unreported;
++num_unreported_spg;
}
@@ -238,11 +242,11 @@ scsiGetSupportedLogPages(scsi_device * device)
reporting of <lpage>,0xff so it is not an error. */
break;
case STARTSTOP_CYCLE_COUNTER_LPAGE:
- if (NO_SUBPAGE_L_SPAGE == sub_pg_num)
+ if (NO_SUBPAGE_L_SPAGE == supp_lpg.subpage_code)
gStartStopLPage = true;
- else if (UTILIZATION_L_SPAGE == sub_pg_num)
+ else if (UTILIZATION_L_SPAGE == supp_lpg.subpage_code)
gUtilizationLPage = true;
- else if (SUPP_SPAGE_L_SPAGE != sub_pg_num) {
+ else if (SUPP_SPAGE_L_SPAGE != supp_lpg.subpage_code) {
++num_unreported;
++num_unreported_spg;
}
@@ -254,15 +258,15 @@ scsiGetSupportedLogPages(scsi_device * device)
gSmartLPage = true;
break;
case BACKGROUND_RESULTS_LPAGE:
- if (NO_SUBPAGE_L_SPAGE == sub_pg_num)
+ if (NO_SUBPAGE_L_SPAGE == supp_lpg.subpage_code)
gBackgroundResultsLPage = true;
- else if (PEND_DEFECTS_L_SPAGE == sub_pg_num)
+ else if (PEND_DEFECTS_L_SPAGE == supp_lpg.subpage_code)
gPendDefectsLPage = true;
- else if (BACKGROUND_OP_L_SPAGE == sub_pg_num)
+ else if (BACKGROUND_OP_L_SPAGE == supp_lpg.subpage_code)
gBackgroundOpLPage = true;
- else if (LPS_MISALIGN_L_SPAGE == sub_pg_num)
+ else if (LPS_MISALIGN_L_SPAGE == supp_lpg.subpage_code)
gLPSMisalignLPage = true;
- else if (SUPP_SPAGE_L_SPAGE != sub_pg_num) {
+ else if (SUPP_SPAGE_L_SPAGE != supp_lpg.subpage_code) {
++num_unreported;
++num_unreported_spg;
}
@@ -296,9 +300,10 @@ scsiGetSupportedLogPages(scsi_device * device)
gSeagateFactoryLPage = true;
break;
default:
- if (pg_num < 0x30) { /* don't count VS pages */
+ if (supp_lpg.page_code < 0x30) { /* don't count VS pages */
++num_unreported;
- if ((sub_pg_num > 0) && (SUPP_SPAGE_L_SPAGE != sub_pg_num))
+ if ((supp_lpg.subpage_code > 0) &&
+ (SUPP_SPAGE_L_SPAGE != supp_lpg.subpage_code))
++num_unreported_spg;
}
break;
--
2.35.1

View File

@ -0,0 +1,31 @@
Index: trunk/smartmontools/scsiprint.cpp
===================================================================
--- smartmontools/scsiprint.cpp (revision 5076)
+++ smartmontools/scsiprint.cpp (revision 5090)
@@ -2340,6 +2340,6 @@
!wce ? "Disabled" : "Enabled");
}
- } else
any_output = true;
+ }
if (options.drive_info)
@@ -2463,12 +2463,10 @@
if (gTempLPage)
scsiPrintTemp(device);
- }
- // in the 'smartctl -a" case only want: "Accumulated power on time"
- if ((! options.smart_background_log) && is_disk) {
- if (! checkedSupportedLogPages)
- scsiGetSupportedLogPages(device);
- res = 0;
- if (gBackgroundResultsLPage)
- res = scsiPrintBackgroundResults(device, true);
+ // in the 'smartctl -A' case only want: "Accumulated power on time"
+ if ((! options.smart_background_log) && is_disk) {
+ res = 0;
+ if (gBackgroundResultsLPage)
+ res = scsiPrintBackgroundResults(device, true);
+ }
any_output = true;
}

View File

@ -0,0 +1,200 @@
diff -up smartmontools-7.1/nvmecmds.cpp.r5121 smartmontools-7.1/nvmecmds.cpp
--- smartmontools-7.1/nvmecmds.cpp.r5121 2019-07-01 22:54:14.000000000 +0200
+++ smartmontools-7.1/nvmecmds.cpp 2023-11-22 12:56:02.927324622 +0100
@@ -3,7 +3,7 @@
*
* Home page of code is: https://www.smartmontools.org
*
- * Copyright (C) 2016-19 Christian Franke
+ * Copyright (C) 2016-20 Christian Franke
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
@@ -132,6 +132,7 @@ bool nvme_read_id_ctrl(nvme_device * dev
swapx(&id_ctrl.vid);
swapx(&id_ctrl.ssvid);
swapx(&id_ctrl.cntlid);
+ swapx(&id_ctrl.ver);
swapx(&id_ctrl.oacs);
swapx(&id_ctrl.wctemp);
swapx(&id_ctrl.cctemp);
@@ -181,30 +182,54 @@ bool nvme_read_id_ns(nvme_device * devic
return true;
}
-// Read NVMe log page with identifier LID.
-bool nvme_read_log_page(nvme_device * device, unsigned char lid, void * data,
- unsigned size, bool broadcast_nsid)
+static bool nvme_read_log_page_1(nvme_device * device, unsigned nsid,
+ unsigned char lid, void * data, unsigned size, unsigned offset = 0)
{
- if (!(4 <= size && size <= 0x4000 && (size % 4) == 0))
- throw std::logic_error("nvme_read_log_page(): invalid size");
+ if (!(4 <= size && size <= 0x1000 && !(size % 4) && !(offset % 4)))
+ return device->set_err(EINVAL, "Invalid NVMe log size %u or offset %u", size, offset);
memset(data, 0, size);
nvme_cmd_in in;
in.set_data_in(nvme_admin_get_log_page, data, size);
- in.nsid = broadcast_nsid ? 0xffffffff : device->get_nsid();
+ in.nsid = nsid;
in.cdw10 = lid | (((size / 4) - 1) << 16);
+ in.cdw12 = offset; // LPOL, NVMe 1.2.1
return nvme_pass_through(device, in);
}
+// Read NVMe log page with identifier LID.
+unsigned nvme_read_log_page(nvme_device * device, unsigned nsid, unsigned char lid,
+ void * data, unsigned size, bool nvme_121, unsigned offset /* = 0 */)
+{
+ unsigned n, bs;
+ for (n = 0; n < size; n += bs) {
+ if (!nvme_121 && offset + n > 0) {
+ device->set_err(ENOSYS, "Log Page Offset requires NVMe >= 1.2.1");
+ break;
+ }
+
+ // Limit transfer size to one page to avoid problems with
+ // limits of NVMe pass-through layer or too low MDTS values.
+ bs = size - n;
+ if (bs > 0x1000)
+ bs = 0x1000;
+ if (!nvme_read_log_page_1(device, nsid, lid, (char *)data + n, bs, offset + n))
+ break;
+ }
+
+ return n;
+}
+
// Read NVMe Error Information Log.
-bool nvme_read_error_log(nvme_device * device, nvme_error_log_page * error_log, unsigned num_entries)
+unsigned nvme_read_error_log(nvme_device * device, nvme_error_log_page * error_log,
+ unsigned num_entries, bool nvme_121)
{
- if (!nvme_read_log_page(device, 0x01, error_log, num_entries * sizeof(*error_log), true))
- return false;
+ unsigned n = nvme_read_log_page(device, 0xffffffff, 0x01, error_log,
+ num_entries * sizeof(*error_log), nvme_121);
if (isbigendian()) {
- for (unsigned i = 0; i < num_entries; i++) {
+ for (unsigned i = 0; i < n; i++) {
swapx(&error_log[i].error_count);
swapx(&error_log[i].sqid);
swapx(&error_log[i].cmdid);
@@ -215,13 +240,13 @@ bool nvme_read_error_log(nvme_device * d
}
}
- return true;
+ return n / sizeof(*error_log);
}
// Read NVMe SMART/Health Information log.
bool nvme_read_smart_log(nvme_device * device, nvme_smart_log & smart_log)
{
- if (!nvme_read_log_page(device, 0x02, &smart_log, sizeof(smart_log), true))
+ if (!nvme_read_log_page_1(device, 0xffffffff, 0x02, &smart_log, sizeof(smart_log)))
return false;
if (isbigendian()) {
diff -up smartmontools-7.1/nvmecmds.h.r5121 smartmontools-7.1/nvmecmds.h
--- smartmontools-7.1/nvmecmds.h.r5121 2019-07-01 22:54:14.000000000 +0200
+++ smartmontools-7.1/nvmecmds.h 2023-11-22 12:56:02.927324622 +0100
@@ -3,7 +3,7 @@
*
* Home page of code is: https://www.smartmontools.org
*
- * Copyright (C) 2016-19 Christian Franke
+ * Copyright (C) 2016-20 Christian Franke
*
* Original code from <linux/nvme.h>:
* Copyright (C) 2011-2014 Intel Corporation
@@ -236,12 +236,12 @@ bool nvme_read_id_ctrl(nvme_device * dev
bool nvme_read_id_ns(nvme_device * device, unsigned nsid, smartmontools::nvme_id_ns & id_ns);
// Read NVMe log page with identifier LID.
-bool nvme_read_log_page(nvme_device * device, unsigned char lid, void * data,
- unsigned size, bool broadcast_nsid);
+unsigned nvme_read_log_page(nvme_device * device, unsigned nsid, unsigned char lid,
+ void * data, unsigned size, bool nvme_121, unsigned offset = 0);
// Read NVMe Error Information Log.
-bool nvme_read_error_log(nvme_device * device, smartmontools::nvme_error_log_page * error_log,
- unsigned num_entries);
+unsigned nvme_read_error_log(nvme_device * device, smartmontools::nvme_error_log_page * error_log,
+ unsigned num_entries, bool nvme_121);
// Read NVMe SMART/Health Information log.
bool nvme_read_smart_log(nvme_device * device, smartmontools::nvme_smart_log & smart_log);
diff -up smartmontools-7.1/nvmeprint.cpp.r5121 smartmontools-7.1/nvmeprint.cpp
--- smartmontools-7.1/nvmeprint.cpp.r5121 2023-11-22 12:56:02.927324622 +0100
+++ smartmontools-7.1/nvmeprint.cpp 2023-11-22 13:00:34.472659814 +0100
@@ -524,6 +524,9 @@ int nvmePrintMain(nvme_device * device,
}
}
+ // Log Page Offset requires NVMe >= 1.2.1
+ bool nvme_121 = (id_ctrl.ver >= 0x10201);
+
// Print Error Information Log
if (options.error_log_entries) {
unsigned num_entries = id_ctrl.elpe + 1; // 0-based value
@@ -531,39 +534,47 @@ int nvmePrintMain(nvme_device * device,
nvme_error_log_page * error_log =
reinterpret_cast<nvme_error_log_page *>(error_log_buf.data());
- if (!nvme_read_error_log(device, error_log, num_entries)) {
+ unsigned read_entries = nvme_read_error_log(device, error_log, num_entries, nvme_121);
+ if (!read_entries) {
jerr("Read Error Information Log failed: %s\n\n", device->get_errmsg());
return retval | FAILSMART;
}
+ if (read_entries < num_entries)
+ jerr("Read Error Information Log failed, %u entries missing: %s\n",
+ num_entries - read_entries, device->get_errmsg());
- print_error_log(error_log, num_entries, options.error_log_entries);
+ print_error_log(error_log, read_entries, options.error_log_entries);
}
// Dump log page
if (options.log_page_size) {
// Align size to dword boundary
unsigned size = ((options.log_page_size + 4-1) / 4) * 4;
- bool broadcast_nsid;
raw_buffer log_buf(size);
+ unsigned nsid;
switch (options.log_page) {
case 1:
case 2:
case 3:
- broadcast_nsid = true;
+ nsid = 0xffffffff;
break;
default:
- broadcast_nsid = false;
+ nsid = device->get_nsid();
break;
}
- if (!nvme_read_log_page(device, options.log_page, log_buf.data(),
- size, broadcast_nsid)) {
+ unsigned read_bytes = nvme_read_log_page(device, nsid, options.log_page, log_buf.data(),
+ size, nvme_121);
+ if (!read_bytes) {
jerr("Read NVMe Log 0x%02x failed: %s\n\n", options.log_page, device->get_errmsg());
return retval | FAILSMART;
}
+ if (read_bytes < size)
+ jerr("Read NVMe Log 0x%02x failed, 0x%x bytes missing: %s\n",
+ options.log_page, size - read_bytes, device->get_errmsg());
- pout("NVMe Log 0x%02x (0x%04x bytes)\n", options.log_page, size);
- dStrHex(log_buf.data(), size, 0);
+ pout("NVMe Log 0x%02x (0x%04x bytes)\n", options.log_page, read_bytes);
+ dStrHex(log_buf.data(), read_bytes, 0);
pout("\n");
}

View File

@ -0,0 +1,312 @@
diff -up smartmontools-7.1/dev_interface.cpp.r5471 smartmontools-7.1/dev_interface.cpp
--- smartmontools-7.1/dev_interface.cpp.r5471 2019-11-24 19:19:24.000000000 +0100
+++ smartmontools-7.1/dev_interface.cpp 2023-11-22 14:07:37.647756091 +0100
@@ -15,6 +15,7 @@
#include "dev_tunnelled.h"
#include "atacmds.h" // ATA_SMART_CMD/STATUS
#include "scsicmds.h" // scsi_cmnd_io
+#include "nvmecmds.h" // nvme_status_*()
#include "utility.h"
#include <errno.h>
@@ -235,12 +236,11 @@ bool scsi_device::scsi_pass_through_and_
bool nvme_device::set_nvme_err(nvme_cmd_out & out, unsigned status, const char * msg /* = 0 */)
{
- if (!status)
- throw std::logic_error("nvme_device: set_nvme_err() called with status=0");
-
out.status = status;
out.status_valid = true;
- return set_err(EIO, "%sNVMe Status 0x%02x", (msg ? msg : ""), status);
+ char buf[64];
+ return set_err(nvme_status_to_errno(status), "%s%s (0x%03x)", (msg ? msg : ""),
+ nvme_status_to_info_str(buf, status), status);
}
diff -up smartmontools-7.1/nvmecmds.cpp.r5471 smartmontools-7.1/nvmecmds.cpp
--- smartmontools-7.1/nvmecmds.cpp.r5471 2023-11-22 14:07:37.646756079 +0100
+++ smartmontools-7.1/nvmecmds.cpp 2023-11-22 14:07:37.648756102 +0100
@@ -258,3 +258,221 @@ bool nvme_read_smart_log(nvme_device * d
return true;
}
+
+// Return flagged error message for NVMe status SCT/SC fields or nullptr if unknown.
+// If message starts with '-', the status indicates an invalid command (EINVAL).
+static const char * nvme_status_to_flagged_str(uint16_t status)
+{
+ // Section 3.3.3.2.1 of NVM Express Base Specification Revision 2.0c, October 4, 2022
+ uint8_t sc = (uint8_t)status;
+ switch ((status >> 8) & 0x7) {
+ case 0x0: // Generic Command Status
+ if (sc < 0x80) switch (sc) {
+ case 0x00: return "Successful Completion";
+ case 0x01: return "-Invalid Command Opcode";
+ case 0x02: return "-Invalid Field in Command";
+ case 0x03: return "Command ID Conflict";
+ case 0x04: return "Data Transfer Error";
+ case 0x05: return "Commands Aborted due to Power Loss Notification";
+ case 0x06: return "Internal Error";
+ case 0x07: return "Command Abort Requested";
+ case 0x08: return "Command Aborted due to SQ Deletion";
+ case 0x09: return "Command Aborted due to Failed Fused Command";
+ case 0x0a: return "Command Aborted due to Missing Fused Command";
+ case 0x0b: return "-Invalid Namespace or Format";
+ case 0x0c: return "Command Sequence Error";
+ case 0x0d: return "-Invalid SGL Segment Descriptor";
+ case 0x0e: return "-Invalid Number of SGL Descriptors";
+ case 0x0f: return "-Data SGL Length Invalid";
+ case 0x10: return "-Metadata SGL Length Invalid";
+ case 0x11: return "-SGL Descriptor Type Invalid";
+ case 0x12: return "-Invalid Use of Controller Memory Buffer";
+ case 0x13: return "-PRP Offset Invalid";
+ case 0x14: return "Atomic Write Unit Exceeded";
+ case 0x15: return "Operation Denied";
+ case 0x16: return "-SGL Offset Invalid";
+ case 0x18: return "Host Identifier Inconsistent Format";
+ case 0x19: return "Keep Alive Timer Expired";
+ case 0x1a: return "-Keep Alive Timeout Invalid";
+ case 0x1b: return "Command Aborted due to Preempt and Abort";
+ case 0x1c: return "Sanitize Failed";
+ case 0x1d: return "Sanitize In Progress";
+ case 0x1e: return "SGL Data Block Granularity Invalid";
+ case 0x1f: return "Command Not Supported for Queue in CMB";
+ case 0x20: return "Namespace is Write Protected";
+ case 0x21: return "Command Interrupted";
+ case 0x22: return "Transient Transport Error";
+ case 0x23: return "Command Prohibited by Command and Feature Lockdown";
+ case 0x24: return "Admin Command Media Not Ready";
+ // 0x25-0x7f: Reserved
+ }
+ else switch (sc) {
+ // 0x80-0xbf: I/O Command Set Specific
+ case 0x80: return "LBA Out of Range";
+ case 0x81: return "Capacity Exceeded";
+ case 0x82: return "Namespace Not Ready";
+ case 0x83: return "Reservation Conflict";
+ case 0x84: return "Format In Progress";
+ case 0x85: return "-Invalid Value Size";
+ case 0x86: return "-Invalid Key Size";
+ case 0x87: return "KV Key Does Not Exist";
+ case 0x88: return "Unrecovered Error";
+ case 0x89: return "Key Exists";
+ // 0x90-0xbf: Reserved
+ // 0xc0-0xff: Vendor Specific
+ }
+ break;
+
+ case 0x1: // Command Specific Status
+ if (sc < 0x80) switch (sc) {
+ case 0x00: return "-Completion Queue Invalid";
+ case 0x01: return "-Invalid Queue Identifier";
+ case 0x02: return "-Invalid Queue Size";
+ case 0x03: return "Abort Command Limit Exceeded";
+ case 0x04: return "Abort Command Is Missing";
+ case 0x05: return "Asynchronous Event Request Limit Exceeded";
+ case 0x06: return "-Invalid Firmware Slot";
+ case 0x07: return "-Invalid Firmware Image";
+ case 0x08: return "-Invalid Interrupt Vector";
+ case 0x09: return "-Invalid Log Page";
+ case 0x0a: return "-Invalid Format";
+ case 0x0b: return "Firmware Activation Requires Conventional Reset";
+ case 0x0c: return "-Invalid Queue Deletion";
+ case 0x0d: return "Feature Identifier Not Saveable";
+ case 0x0e: return "Feature Not Changeable";
+ case 0x0f: return "Feature Not Namespace Specific";
+ case 0x10: return "Firmware Activation Requires NVM Subsystem Reset";
+ case 0x11: return "Firmware Activation Requires Controller Level Reset";
+ case 0x12: return "Firmware Activation Requires Maximum Time Violation";
+ case 0x13: return "Firmware Activation Prohibited";
+ case 0x14: return "Overlapping Range";
+ case 0x15: return "Namespace Insufficient Capacity";
+ case 0x16: return "-Namespace Identifier Unavailable";
+ case 0x18: return "Namespace Already Attached";
+ case 0x19: return "Namespace Is Private";
+ case 0x1a: return "Namespace Not Attached";
+ case 0x1b: return "Thin Provisioning Not Supported";
+ case 0x1c: return "-Controller List Invalid";
+ case 0x1d: return "Device Self-test In Progress";
+ case 0x1e: return "Boot Partition Write Prohibited";
+ case 0x1f: return "Invalid Controller Identifier";
+ case 0x20: return "-Invalid Secondary Controller State";
+ case 0x21: return "-Invalid Number of Controller Resources";
+ case 0x22: return "-Invalid Resource Identifier";
+ case 0x23: return "Sanitize Prohibited While Persistent Memory Region is Enabled";
+ case 0x24: return "-ANA Group Identifier Invalid";
+ case 0x25: return "ANA Attach Failed";
+ case 0x26: return "Insufficient Capacity";
+ case 0x27: return "Namespace Attachment Limit Exceeded";
+ case 0x28: return "Prohibition of Command Execution Not Supported";
+ case 0x29: return "I/O Command Set Not Supported";
+ case 0x2a: return "I/O Command Set Not Enabled";
+ case 0x2b: return "I/O Command Set Combination Rejected";
+ case 0x2c: return "-Invalid I/O Command Set";
+ case 0x2d: return "-Identifier Unavailable";
+ // 0x2e-0x6f: Reserved
+ // 0x70-0x7f: Directive Specific
+ }
+ else if (sc < 0xb8) switch (sc) {
+ // 0x80-0xbf: I/O Command Set Specific (overlap with Fabrics Command Set)
+ case 0x80: return "-Conflicting Attributes";
+ case 0x81: return "-Invalid Protection Information";
+ case 0x82: return "Attempted Write to Read Only Range";
+ case 0x83: return "Command Size Limit Exceeded";
+ // 0x84-0xb7: Reserved
+ }
+ else switch (sc) {
+ case 0xb8: return "Zoned Boundary Error";
+ case 0xb9: return "Zone Is Full";
+ case 0xba: return "Zone Is Read Only";
+ case 0xbb: return "Zone Is Offline";
+ case 0xbc: return "Zone Invalid Write";
+ case 0xbd: return "Too Many Active Zones";
+ case 0xbe: return "Too Many Open Zones";
+ case 0xbf: return "Invalid Zone State Transition";
+ // 0xc0-0xff: Vendor Specific
+ }
+ break;
+
+ case 0x2: // Media and Data Integrity Errors
+ switch (sc) {
+ // 0x00-0x7f: Reserved
+ case 0x80: return "Write Fault";
+ case 0x81: return "Unrecovered Read Error";
+ case 0x82: return "End-to-end Guard Check Error";
+ case 0x83: return "End-to-end Application Tag Check Error";
+ case 0x84: return "End-to-end Reference Tag Check Error";
+ case 0x85: return "Compare Failure";
+ case 0x86: return "Access Denied";
+ case 0x87: return "Deallocated or Unwritten Logical Block";
+ case 0x88: return "End-to-End Storage Tag Check Error";
+ // 0x89-0xbf: Reserved
+ // 0xc0-0xff: Vendor Specific
+ }
+ break;
+
+ case 0x3: // Path Related Status
+ switch (sc) {
+ case 0x00: return "Internal Path Error";
+ case 0x01: return "Asymmetric Access Persistent Loss";
+ case 0x02: return "Asymmetric Access Inaccessible";
+ case 0x03: return "Asymmetric Access Transition";
+ // 0x04-0x5f: Reserved
+ // 0x60-0x6f: Controller Detected Pathing Errors
+ case 0x60: return "Controller Pathing Error";
+ // 0x61-0x6f: Reserved
+ // 0x70-0x7f: Host Detected Pathing Errors
+ case 0x70: return "Host Pathing Error";
+ case 0x71: return "Command Aborted By Host";
+ // 0x72-0x7f: Reserved
+ // 0x80-0xbf: I/O Command Set Specific
+ // 0xc0-0xff: Vendor Specific
+ }
+ break;
+
+ // 0x4-0x6: Reserved
+ // 0x7: Vendor Specific
+ }
+ return nullptr;
+}
+
+// Return errno for NVMe status SCT/SC fields: 0, EINVAL or EIO.
+int nvme_status_to_errno(uint16_t status)
+{
+ if (!nvme_status_is_error(status))
+ return 0;
+ const char * s = nvme_status_to_flagged_str(status);
+ if (s && *s == '-')
+ return EINVAL;
+ return EIO;
+}
+
+// Return error message for NVMe status SCT/SC fields or nullptr if unknown.
+const char * nvme_status_to_str(uint16_t status)
+{
+ const char * s = nvme_status_to_flagged_str(status);
+ return (s && *s == '-' ? s + 1 : s);
+}
+
+// Return error message for NVMe status SCT/SC fields or explanatory message if unknown.
+const char * nvme_status_to_info_str(char * buf, size_t bufsize, uint16_t status)
+{
+ const char * s = nvme_status_to_str(status);
+ if (s)
+ return s;
+
+ uint8_t sct = (status >> 8) & 0x7, sc = (uint8_t)status;
+ const char * pfx = (sc >= 0xc0 ? "Vendor Specific " : "Unknown ");
+ switch (sct) {
+ case 0x0: s = "Generic Command Status"; break;
+ case 0x1: s = "Command Specific Status"; break;
+ case 0x2: s = "Media and Data Integrity Error"; break;
+ case 0x3: s = "Path Related Status"; break;
+ case 0x7: s = "Vendor Specific Status"; pfx = ""; break;
+ }
+ if (s)
+ snprintf(buf, bufsize, "%s%s 0x%02x", pfx, s, sc);
+ else
+ snprintf(buf, bufsize, "Unknown Status 0x%x/0x%02x", sct, sc);
+ return buf;
+}
diff -up smartmontools-7.1/nvmecmds.h.r5471 smartmontools-7.1/nvmecmds.h
--- smartmontools-7.1/nvmecmds.h.r5471 2023-11-22 14:07:37.646756079 +0100
+++ smartmontools-7.1/nvmecmds.h 2023-11-22 14:09:29.911084240 +0100
@@ -18,6 +18,8 @@
#include "static_assert.h"
+#include <errno.h>
+#include <stddef.h>
#include <stdint.h>
// The code below was originally imported from <linux/nvme.h> include file from
@@ -246,4 +248,22 @@ unsigned nvme_read_error_log(nvme_device
// Read NVMe SMART/Health Information log.
bool nvme_read_smart_log(nvme_device * device, smartmontools::nvme_smart_log & smart_log);
+// Return true if NVMe status indicates an error.
+constexpr bool nvme_status_is_error(uint16_t status)
+ { return !!(status & 0x07ff); }
+
+// Return errno for NVMe status SCT/SC fields: 0, EINVAL or EIO.
+int nvme_status_to_errno(uint16_t status);
+
+// Return error message for NVMe status SCT/SC fields or nullptr if unknown.
+const char * nvme_status_to_str(uint16_t status);
+
+// Return error message for NVMe status SCT/SC fields or explanatory message if unknown.
+const char * nvme_status_to_info_str(char * buf, size_t bufsize, uint16_t status);
+
+// Version of above for fixed size buffers.
+template <size_t SIZE>
+inline const char * nvme_status_to_info_str(char (& buf)[SIZE], unsigned status)
+ { return nvme_status_to_info_str(buf, SIZE, status); }
+
#endif // NVMECMDS_H
diff -up smartmontools-7.1/nvmeprint.cpp.r5471 smartmontools-7.1/nvmeprint.cpp
--- smartmontools-7.1/nvmeprint.cpp.r5471 2023-11-22 14:07:37.648756102 +0100
+++ smartmontools-7.1/nvmeprint.cpp 2023-11-22 14:11:35.899574762 +0100
@@ -420,7 +420,7 @@ static void print_error_log(const nvme_e
continue;
if (cnt == 1)
- pout("Num ErrCount SQId CmdId Status PELoc LBA NSID VS\n");
+ pout("Num ErrCount SQId CmdId Status PELoc LBA NSID VS Message\n");
char sq[16] = "-", cm[16] = "-", st[16] = "-", pe[16] = "-";
char lb[32] = "-", ns[16] = "-", vs[8] = "-";
@@ -439,8 +439,10 @@ static void print_error_log(const nvme_e
if (e.vs != 0x00)
snprintf(vs, sizeof(vs), "0x%02x", e.vs);
- pout("%3u %10" PRIu64 " %5s %7s %7s %6s %12s %5s %5s\n",
- i, e.error_count, sq, cm, st, pe, lb, ns, vs);
+ char buf[64];
+ pout("%3u %10" PRIu64 " %5s %7s %7s %6s %12s %5s %5s %s\n",
+ i, e.error_count, sq, cm, st, pe, lb, ns, vs,
+ nvme_status_to_info_str(buf, e.status_field >> 1));
}
if (!cnt)

View File

@ -0,0 +1,159 @@
diff -U0 smartmontools-7.1/ChangeLog.r5472 smartmontools-7.1/ChangeLog
diff -up smartmontools-7.1/smartd.conf.5.in.r5472 smartmontools-7.1/smartd.conf.5.in
--- smartmontools-7.1/smartd.conf.5.in.r5472 2019-12-13 21:20:45.000000000 +0100
+++ smartmontools-7.1/smartd.conf.5.in 2023-11-22 12:32:37.341051288 +0100
@@ -696,6 +696,20 @@ error log has increased since the last c
.I error
\- [NVMe] report if the "Number of Error Information Log Entries" from the
SMART/Health Information log has increased since the last check.
+.br
+[NEW EXPERIMENTAL SMARTD FEATURE]
+This will only be logged as LOG_CRIT if at least one of the new errors is
+still present in the Error Information log and its status indicates a
+device related error.
+Up to eight of the most recent of these errors are logged as LOG_INFO then.
+This is useful because the NVMe Error Information log is not persistent
+across power cycles or device resets.
+.br
+If all new errors are either no longer present in the log or are not device
+related (e.g. invalid command, invalid field in command, ...), a LOG_INFO
+message is generated instead.
+This avoids misleading warnings if the operating system issues unsupported
+commands and the device firmware also logs these kind of errors.
.Sp
.\" %ENDIF OS Darwin FreeBSD Linux NetBSD Windows Cygwin
.I xerror
diff -up smartmontools-7.1/smartd.cpp.r5472 smartmontools-7.1/smartd.cpp
--- smartmontools-7.1/smartd.cpp.r5472 2019-12-29 14:10:18.000000000 +0100
+++ smartmontools-7.1/smartd.cpp 2023-11-22 12:35:19.254046678 +0100
@@ -2,7 +2,7 @@
* Home page of code is: https://www.smartmontools.org
*
* Copyright (C) 2002-11 Bruce Allen
- * Copyright (C) 2008-19 Christian Franke
+ * Copyright (C) 2008-23 Christian Franke
* Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
* Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
*
@@ -410,6 +410,9 @@ struct dev_config
ata_vendor_attr_defs attribute_defs; // -v options
+ // NVMe only
+ unsigned nvme_err_log_max_entries{}; // size of error log
+
dev_config();
};
@@ -2628,6 +2631,74 @@ static int nvme_get_max_temp_kelvin(cons
return k;
}
+// Check the NVMe Error Information log for device related errors.
+static bool check_nvme_error_log(const dev_config & cfg, dev_state & state, nvme_device * nvmedev,
+ uint64_t newcnt = 0)
+{
+ // Limit transfer size to one page (64 entries) to avoid problems with
+ // limits of NVMe pass-through layer or too low MDTS values.
+ unsigned want_entries = 64;
+ if (want_entries > cfg.nvme_err_log_max_entries)
+ want_entries = cfg.nvme_err_log_max_entries;
+ raw_buffer error_log_buf(want_entries * sizeof(nvme_error_log_page));
+ nvme_error_log_page * error_log =
+ reinterpret_cast<nvme_error_log_page *>(error_log_buf.data());
+ unsigned read_entries = nvme_read_error_log(nvmedev, error_log, want_entries, false /*!lpo_sup*/);
+ if (!read_entries) {
+ PrintOut(LOG_INFO, "Device: %s, Read %u entries from Error Information Log failed\n",
+ cfg.name.c_str(), want_entries);
+ return false;
+ }
+
+ if (!newcnt)
+ return true; // Support check only
+
+ // Scan log, find device related errors
+ uint64_t oldcnt = state.nvme_err_log_entries, mincnt = newcnt;
+ int err = 0, ign = 0;
+ for (unsigned i = 0; i < read_entries; i++) {
+ const nvme_error_log_page & e = error_log[i];
+ if (!e.error_count)
+ continue; // unused
+ if (e.error_count <= oldcnt)
+ break; // stop on first old entry
+ if (e.error_count < mincnt)
+ mincnt = e.error_count; // min known error
+ if (e.error_count > newcnt)
+ newcnt = e.error_count; // adjust maximum
+ uint16_t status = e.status_field >> 1;
+ if (!nvme_status_is_error(status) || nvme_status_to_errno(status) == EINVAL) {
+ ign++; // Not a device related error
+ continue;
+ }
+
+ // Log the most recent 8 errors
+ if (++err > 8)
+ continue;
+ char buf[64];
+ PrintOut(LOG_INFO, "Device: %s, NVMe error [%u], count %" PRIu64 ", status 0x%04x: %s\n",
+ cfg.name.c_str(), i, e.error_count, e.status_field,
+ nvme_status_to_info_str(buf, e.status_field >> 1));
+ }
+
+ std::string msg = strprintf("Device: %s, NVMe error count increased from %" PRIu64 " to %" PRIu64
+ " (%d new, %d ignored, %" PRIu64 " unknown)",
+ cfg.name.c_str(), oldcnt, newcnt, err, ign,
+ (mincnt > oldcnt + 1 ? mincnt - oldcnt - 1 : 0));
+ // LOG_CRIT only if device related errors are found
+ if (!err) {
+ PrintOut(LOG_INFO, "%s\n", msg.c_str());
+ }
+ else {
+ PrintOut(LOG_CRIT, "%s\n", msg.c_str());
+ MailWarning(cfg, state, 4, "%s", msg.c_str());
+ }
+
+ state.nvme_err_log_entries = newcnt;
+ state.must_write = true;
+ return true;
+}
+
static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
const dev_config_vector * prev_cfgs)
{
@@ -2687,8 +2758,14 @@ static int NVMeDeviceScan(dev_config & c
}
// Init total error count
+ cfg.nvme_err_log_max_entries = id_ctrl.elpe + 1; // 0's based value
if (cfg.errorlog || cfg.xerrorlog) {
- state.nvme_err_log_entries = le128_to_uint64(smart_log.num_err_log_entries);
+ if (!check_nvme_error_log(cfg, state, nvmedev)) {
+ PrintOut(LOG_INFO, "Device: %s, Error Information unavailable, ignoring -l [x]error\n", name);
+ cfg.errorlog = cfg.xerrorlog = false;
+ }
+ else
+ state.nvme_err_log_entries = le128_to_uint64(smart_log.num_err_log_entries);
}
// If no supported tests selected, return
@@ -3760,16 +3837,12 @@ static int NVMeCheckDevice(const dev_con
// Check if number of errors has increased
if (cfg.errorlog || cfg.xerrorlog) {
- uint64_t oldcnt = state.nvme_err_log_entries;
uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
- if (newcnt > oldcnt) {
- PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
- name, oldcnt, newcnt);
- MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
- name, oldcnt, newcnt);
- state.must_write = true;
+ if (newcnt > state.nvme_err_log_entries) {
+ // Warn only if device related errors are found
+ check_nvme_error_log(cfg, state, nvmedev, newcnt);
}
- state.nvme_err_log_entries = newcnt;
+ // else // TODO: Handle decrease of count?
}
CloseDevice(nvmedev, name);

View File

@ -1,7 +1,7 @@
Summary: Tools for monitoring SMART capable hard disks
Name: smartmontools
Version: 7.1
Release: 1%{?dist}
Release: 3%{?dist}
Epoch: 1
Group: System Environment/Base
License: GPLv2+
@ -15,6 +15,15 @@ Source5: drivedb.h
#fedora/rhel specific
Patch1: smartmontools-5.38-defaultconf.patch
Patch2: smartmontools-7.2-logsuppagefix1.patch
Patch3: smartmontools-7.2-logsuppagefix2.patch
Patch4: smartmontools-7.2-logsuppagefix3.patch
Patch5: smartmontools-7.2-logsuppagefix4.patch
# 3x from upstream, for smartmontools <= 7.4, #RHEL-6982
Patch6: smartmontools-7.4-r5121.patch
Patch7: smartmontools-7.4-r5471.patch
Patch8: smartmontools-7.4-r5472.patch
BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
#new rpm does not handle this (yet?)
@ -36,7 +45,14 @@ failure.
%prep
%setup -q
%patch1 -p1 -b .defaultconf
%patch -P 1 -p1 -b .defaultconf
%patch -P 2 -p2 -b .logsuppagefix1
%patch -P 3 -p2 -b .logsuppagefix2
%patch -P 4 -p2 -b .logsuppagefix3
%patch -P 5 -p1 -b .logsuppagefix4
%patch -P 6 -p1 -b .r5121
%patch -P 7 -p1 -b .r5471
%patch -P 8 -p1 -b .r5472
# update SOURCE5 on maintainer's machine prior commiting, there's no internet connection on builders
curl %{UrlSource5} -o %{SOURCE5} ||:
@ -108,6 +124,12 @@ fi
%{_sharedstatedir}/%{name}
%changelog
* Wed Nov 22 2023 Michal Hlavinka <mhlavink@redhat.com> - 1:7.1-3
- don't report new non-device related errors as critical (#RHEL-6982)
* Mon May 29 2023 Michal Hlavinka <mhlavink@redhat.com> - 1:7.1-2
- support reporting of Error Counter logging details (#2136439)
* Wed Apr 22 2020 Michal Hlavinka <mhlavink@redhat.com> - 1:7.1-1
- smartmontools updated to 7.1 (#1671154)