device-mapper-multipath/0094-libmultipath-Add-max_r...

183 lines
5.9 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Thu, 9 Nov 2023 18:46:11 -0500
Subject: [PATCH] libmultipath: Add max_retries config option
This option lets multipath set a scsi disk's max_retries sysfs value.
Setting this can be helpful for cases where the path checker succeeds,
but IO commands hang and timeout. By default, the SCSI layer will retry
IOs 5 times. Reducing this value will allow multipath to retry the IO
down another path sooner.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
---
libmultipath/config.h | 1 +
libmultipath/dict.c | 25 ++++++++++++++++++++++++
libmultipath/discovery.c | 40 +++++++++++++++++++++++++++++++++++++-
libmultipath/structs.h | 6 ++++++
multipath/multipath.conf.5 | 14 +++++++++++++
5 files changed, 85 insertions(+), 1 deletion(-)
diff --git a/libmultipath/config.h b/libmultipath/config.h
index c1e18363..b0ee8241 100644
--- a/libmultipath/config.h
+++ b/libmultipath/config.h
@@ -162,6 +162,7 @@ struct config {
int fast_io_fail;
unsigned int dev_loss;
int eh_deadline;
+ int max_retries;
int log_checker_err;
int allow_queueing;
int allow_usb_devices;
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
index eb2f33a2..0c66c1e1 100644
--- a/libmultipath/dict.c
+++ b/libmultipath/dict.c
@@ -1206,6 +1206,30 @@ declare_hw_snprint(eh_deadline, print_undef_off_zero)
declare_pc_handler(eh_deadline, set_undef_off_zero)
declare_pc_snprint(eh_deadline, print_undef_off_zero)
+static int
+def_max_retries_handler(struct config *conf, vector strvec, const char *file,
+ int line_nr)
+{
+ char * buff;
+
+ buff = set_value(strvec);
+ if (!buff)
+ return 1;
+
+ if (strcmp(buff, "off") == 0)
+ conf->max_retries = MAX_RETRIES_OFF;
+ else if (strcmp(buff, "0") == 0)
+ conf->max_retries = MAX_RETRIES_ZERO;
+ else
+ do_set_int(strvec, &conf->max_retries, 1, 5, file, line_nr,
+ buff);
+
+ free(buff);
+ return 0;
+}
+
+declare_def_snprint(max_retries, print_undef_off_zero)
+
static int
set_pgpolicy(vector strvec, void *ptr, const char *file, int line_nr)
{
@@ -2143,6 +2167,7 @@ init_keywords(vector keywords)
install_keyword("fast_io_fail_tmo", &def_fast_io_fail_handler, &snprint_def_fast_io_fail);
install_keyword("dev_loss_tmo", &def_dev_loss_handler, &snprint_def_dev_loss);
install_keyword("eh_deadline", &def_eh_deadline_handler, &snprint_def_eh_deadline);
+ install_keyword("max_retries", &def_max_retries_handler, &snprint_def_max_retries);
install_keyword("bindings_file", &def_bindings_file_handler, &snprint_def_bindings_file);
install_keyword("wwids_file", &def_wwids_file_handler, &snprint_def_wwids_file);
install_keyword("prkeys_file", &def_prkeys_file_handler, &snprint_def_prkeys_file);
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
index a592a54e..adf8bbaa 100644
--- a/libmultipath/discovery.c
+++ b/libmultipath/discovery.c
@@ -632,6 +632,42 @@ sysfs_set_eh_deadline(struct path *pp)
return (ret <= 0);
}
+static int
+sysfs_set_max_retries(struct config *conf, struct path *pp)
+{
+ struct udev_device *parent;
+ char value[16];
+ STRBUF_ON_STACK(buf);
+ int ret, len;
+
+ if (conf->max_retries == MAX_RETRIES_UNSET)
+ return 0;
+
+ if (!pp->udev || pp->sg_id.host_no < 0)
+ return 1;
+
+ len = sprintf(value, "%d", (conf->max_retries == MAX_RETRIES_OFF)? -1 :
+ (conf->max_retries == MAX_RETRIES_ZERO)? 0 :
+ conf->max_retries);
+
+ parent = udev_device_get_parent_with_subsystem_devtype(pp->udev,
+ "scsi", "scsi_device");
+ if (!parent)
+ return 1;
+
+ if (print_strbuf(&buf, "scsi_disk/%i:%i:%i:%" PRIu64 "/max_retries",
+ pp->sg_id.host_no, pp->sg_id.channel,
+ pp->sg_id.scsi_id, pp->sg_id.lun) < 0)
+ return 1;
+
+ ret = sysfs_attr_set_value(parent, get_strbuf_str(&buf), value, len);
+ if (len != ret)
+ condlog(3, "%s/%s: failed to set value to %s: %s",
+ udev_device_get_sysname(parent), get_strbuf_str(&buf),
+ value, (ret < 0)? strerror(-ret) : "write underflow");
+ return (len != ret);
+}
+
static void
sysfs_set_rport_tmo(struct multipath *mpp, struct path *pp)
{
@@ -862,13 +898,15 @@ sysfs_set_scsi_tmo (struct config *conf, struct multipath *mpp)
if (pp->dev_loss == DEV_LOSS_TMO_UNSET &&
pp->fast_io_fail == MP_FAST_IO_FAIL_UNSET &&
- pp->eh_deadline == EH_DEADLINE_UNSET)
+ pp->eh_deadline == EH_DEADLINE_UNSET &&
+ conf->max_retries == MAX_RETRIES_UNSET)
continue;
if (pp->bus != SYSFS_BUS_SCSI)
continue;
sysfs_set_eh_deadline(pp);
+ sysfs_set_max_retries(conf, pp);
if (pp->dev_loss == DEV_LOSS_TMO_UNSET &&
pp->fast_io_fail == MP_FAST_IO_FAIL_UNSET)
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index c1e93e6e..b4252ab5 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -276,6 +276,12 @@ enum eh_deadline_states {
EH_DEADLINE_ZERO = UOZ_ZERO,
};
+enum max_retries_states {
+ MAX_RETRIES_UNSET = UOZ_UNDEF,
+ MAX_RETRIES_OFF = UOZ_OFF,
+ MAX_RETRIES_ZERO = UOZ_ZERO,
+};
+
enum recheck_wwid_states {
RECHECK_WWID_UNDEF = YNU_UNDEF,
RECHECK_WWID_OFF = YNU_NO,
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
index 5e447e67..789f0bfc 100644
--- a/multipath/multipath.conf.5
+++ b/multipath/multipath.conf.5
@@ -743,6 +743,20 @@ The default is: \fB<unset>\fR
.
.
.TP
+.B max_retries
+Specify the maximum number of times the SCSI layer will retry IO commands for
+some types of SCSI errors before returning failure. Setting this can be helpful
+for cases where IO commands hang and timeout. By default, the SCSI layer will
+retry IOs 5 times. Reducing this value will allow multipath to retry the IO
+down another path sooner. Valid values are
+\fB0\fR through \fB5\fR.
+.RS
+.TP
+The default is: \fB<unset>\fR
+.RE
+.
+.
+.TP
.B bindings_file
This option is deprecated, and will be removed in a future release.
The full pathname of the binding file to be used when the user_friendly_names