296 lines
11 KiB
Diff
296 lines
11 KiB
Diff
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||
|
From: Benjamin Marzinski <bmarzins@redhat.com>
|
||
|
Date: Wed, 14 Oct 2020 18:38:20 -0500
|
||
|
Subject: [PATCH] libmultipath: add eh_deadline multipath.conf parameter
|
||
|
|
||
|
There are times a fc rport is never lost, meaning that fast_io_fail_tmo
|
||
|
and dev_loss_tmo never trigger, but scsi commands still hang. This can
|
||
|
cause problems in cases where users have string timing requirements, and
|
||
|
the easiest way to solve these issues is to set eh_deadline. Since it's
|
||
|
already possible to set fast_io_fail_tmo and dev_loss_tmo from
|
||
|
multipath.conf, and have multipath take care of setting it correctly for
|
||
|
the scsi devices in sysfs, it makes sense to allow users to set
|
||
|
eh_deadline here as well.
|
||
|
|
||
|
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
|
||
|
---
|
||
|
libmultipath/config.c | 2 ++
|
||
|
libmultipath/config.h | 2 ++
|
||
|
libmultipath/configure.c | 1 +
|
||
|
libmultipath/dict.c | 10 +++++++
|
||
|
libmultipath/discovery.c | 58 +++++++++++++++++++++++++++++++++-----
|
||
|
libmultipath/propsel.c | 17 +++++++++++
|
||
|
libmultipath/propsel.h | 1 +
|
||
|
libmultipath/structs.h | 7 +++++
|
||
|
multipath/multipath.conf.5 | 16 +++++++++++
|
||
|
9 files changed, 107 insertions(+), 7 deletions(-)
|
||
|
|
||
|
diff --git a/libmultipath/config.c b/libmultipath/config.c
|
||
|
index 26f8e050..a71db2d0 100644
|
||
|
--- a/libmultipath/config.c
|
||
|
+++ b/libmultipath/config.c
|
||
|
@@ -359,6 +359,7 @@ merge_hwe (struct hwentry * dst, struct hwentry * src)
|
||
|
merge_num(flush_on_last_del);
|
||
|
merge_num(fast_io_fail);
|
||
|
merge_num(dev_loss);
|
||
|
+ merge_num(eh_deadline);
|
||
|
merge_num(user_friendly_names);
|
||
|
merge_num(retain_hwhandler);
|
||
|
merge_num(detect_prio);
|
||
|
@@ -514,6 +515,7 @@ store_hwe (vector hwtable, struct hwentry * dhwe)
|
||
|
hwe->flush_on_last_del = dhwe->flush_on_last_del;
|
||
|
hwe->fast_io_fail = dhwe->fast_io_fail;
|
||
|
hwe->dev_loss = dhwe->dev_loss;
|
||
|
+ hwe->eh_deadline = dhwe->eh_deadline;
|
||
|
hwe->user_friendly_names = dhwe->user_friendly_names;
|
||
|
hwe->retain_hwhandler = dhwe->retain_hwhandler;
|
||
|
hwe->detect_prio = dhwe->detect_prio;
|
||
|
diff --git a/libmultipath/config.h b/libmultipath/config.h
|
||
|
index f38c7639..a22c1b4e 100644
|
||
|
--- a/libmultipath/config.h
|
||
|
+++ b/libmultipath/config.h
|
||
|
@@ -64,6 +64,7 @@ struct hwentry {
|
||
|
int flush_on_last_del;
|
||
|
int fast_io_fail;
|
||
|
unsigned int dev_loss;
|
||
|
+ int eh_deadline;
|
||
|
int user_friendly_names;
|
||
|
int retain_hwhandler;
|
||
|
int detect_prio;
|
||
|
@@ -149,6 +150,7 @@ struct config {
|
||
|
int attribute_flags;
|
||
|
int fast_io_fail;
|
||
|
unsigned int dev_loss;
|
||
|
+ int eh_deadline;
|
||
|
int log_checker_err;
|
||
|
int allow_queueing;
|
||
|
int find_multipaths;
|
||
|
diff --git a/libmultipath/configure.c b/libmultipath/configure.c
|
||
|
index 96c79610..b7113291 100644
|
||
|
--- a/libmultipath/configure.c
|
||
|
+++ b/libmultipath/configure.c
|
||
|
@@ -340,6 +340,7 @@ int setup_map(struct multipath *mpp, char *params, int params_size,
|
||
|
select_gid(conf, mpp);
|
||
|
select_fast_io_fail(conf, mpp);
|
||
|
select_dev_loss(conf, mpp);
|
||
|
+ select_eh_deadline(conf, mpp);
|
||
|
select_reservation_key(conf, mpp);
|
||
|
select_deferred_remove(conf, mpp);
|
||
|
select_marginal_path_err_sample_time(conf, mpp);
|
||
|
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
|
||
|
index ce8e1cda..8fd91d8c 100644
|
||
|
--- a/libmultipath/dict.c
|
||
|
+++ b/libmultipath/dict.c
|
||
|
@@ -911,6 +911,13 @@ declare_ovr_snprint(dev_loss, print_dev_loss)
|
||
|
declare_hw_handler(dev_loss, set_dev_loss)
|
||
|
declare_hw_snprint(dev_loss, print_dev_loss)
|
||
|
|
||
|
+declare_def_handler(eh_deadline, set_undef_off_zero)
|
||
|
+declare_def_snprint(eh_deadline, print_undef_off_zero)
|
||
|
+declare_ovr_handler(eh_deadline, set_undef_off_zero)
|
||
|
+declare_ovr_snprint(eh_deadline, print_undef_off_zero)
|
||
|
+declare_hw_handler(eh_deadline, set_undef_off_zero)
|
||
|
+declare_hw_snprint(eh_deadline, print_undef_off_zero)
|
||
|
+
|
||
|
static int
|
||
|
set_pgpolicy(vector strvec, void *ptr)
|
||
|
{
|
||
|
@@ -1776,6 +1783,7 @@ init_keywords(vector keywords)
|
||
|
install_keyword("gid", &def_gid_handler, &snprint_def_gid);
|
||
|
install_keyword("fast_io_fail_tmo", &def_fast_io_fail_handler, &snprint_def_fast_io_fail);
|
||
|
install_keyword("dev_loss_tmo", &def_dev_loss_handler, &snprint_def_dev_loss);
|
||
|
+ install_keyword("eh_deadline", &def_eh_deadline_handler, &snprint_def_eh_deadline);
|
||
|
install_keyword("bindings_file", &def_bindings_file_handler, &snprint_def_bindings_file);
|
||
|
install_keyword("wwids_file", &def_wwids_file_handler, &snprint_def_wwids_file);
|
||
|
install_keyword("prkeys_file", &def_prkeys_file_handler, &snprint_def_prkeys_file);
|
||
|
@@ -1885,6 +1893,7 @@ init_keywords(vector keywords)
|
||
|
install_keyword("flush_on_last_del", &hw_flush_on_last_del_handler, &snprint_hw_flush_on_last_del);
|
||
|
install_keyword("fast_io_fail_tmo", &hw_fast_io_fail_handler, &snprint_hw_fast_io_fail);
|
||
|
install_keyword("dev_loss_tmo", &hw_dev_loss_handler, &snprint_hw_dev_loss);
|
||
|
+ install_keyword("eh_deadline", &hw_eh_deadline_handler, &snprint_hw_eh_deadline);
|
||
|
install_keyword("user_friendly_names", &hw_user_friendly_names_handler, &snprint_hw_user_friendly_names);
|
||
|
install_keyword("retain_attached_hw_handler", &hw_retain_hwhandler_handler, &snprint_hw_retain_hwhandler);
|
||
|
install_keyword("detect_prio", &hw_detect_prio_handler, &snprint_hw_detect_prio);
|
||
|
@@ -1925,6 +1934,7 @@ init_keywords(vector keywords)
|
||
|
install_keyword("flush_on_last_del", &ovr_flush_on_last_del_handler, &snprint_ovr_flush_on_last_del);
|
||
|
install_keyword("fast_io_fail_tmo", &ovr_fast_io_fail_handler, &snprint_ovr_fast_io_fail);
|
||
|
install_keyword("dev_loss_tmo", &ovr_dev_loss_handler, &snprint_ovr_dev_loss);
|
||
|
+ install_keyword("eh_deadline", &ovr_eh_deadline_handler, &snprint_ovr_eh_deadline);
|
||
|
install_keyword("user_friendly_names", &ovr_user_friendly_names_handler, &snprint_ovr_user_friendly_names);
|
||
|
install_keyword("retain_attached_hw_handler", &ovr_retain_hwhandler_handler, &snprint_ovr_retain_hwhandler);
|
||
|
install_keyword("detect_prio", &ovr_detect_prio_handler, &snprint_ovr_detect_prio);
|
||
|
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
|
||
|
index 01aadba9..a328aafa 100644
|
||
|
--- a/libmultipath/discovery.c
|
||
|
+++ b/libmultipath/discovery.c
|
||
|
@@ -577,6 +577,42 @@ sysfs_get_asymmetric_access_state(struct path *pp, char *buff, int buflen)
|
||
|
return !!preferred;
|
||
|
}
|
||
|
|
||
|
+static int
|
||
|
+sysfs_set_eh_deadline(struct multipath *mpp, struct path *pp)
|
||
|
+{
|
||
|
+ struct udev_device *hostdev;
|
||
|
+ char host_name[HOST_NAME_LEN], value[16];
|
||
|
+ int ret;
|
||
|
+
|
||
|
+ if (mpp->eh_deadline == EH_DEADLINE_UNSET)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ sprintf(host_name, "host%d", pp->sg_id.host_no);
|
||
|
+ hostdev = udev_device_new_from_subsystem_sysname(udev,
|
||
|
+ "scsi_host", host_name);
|
||
|
+ if (!hostdev)
|
||
|
+ return 1;
|
||
|
+
|
||
|
+ if (mpp->eh_deadline == EH_DEADLINE_OFF)
|
||
|
+ sprintf(value, "off");
|
||
|
+ else if (mpp->eh_deadline == EH_DEADLINE_ZERO)
|
||
|
+ sprintf(value, "0");
|
||
|
+ else
|
||
|
+ snprintf(value, 16, "%u", mpp->eh_deadline);
|
||
|
+
|
||
|
+ ret = sysfs_attr_set_value(hostdev, "eh_deadline",
|
||
|
+ value, strlen(value));
|
||
|
+ /*
|
||
|
+ * not all scsi drivers support setting eh_deadline, so failing
|
||
|
+ * is totally reasonable
|
||
|
+ */
|
||
|
+ if (ret <= 0)
|
||
|
+ condlog(4, "%s: failed to set eh_deadline to %s, error %d", udev_device_get_sysname(hostdev), value, -ret);
|
||
|
+
|
||
|
+ udev_device_unref(hostdev);
|
||
|
+ return (ret <= 0);
|
||
|
+}
|
||
|
+
|
||
|
static void
|
||
|
sysfs_set_rport_tmo(struct multipath *mpp, struct path *pp)
|
||
|
{
|
||
|
@@ -787,16 +823,24 @@ sysfs_set_scsi_tmo (struct multipath *mpp, unsigned int checkint)
|
||
|
mpp->alias, mpp->fast_io_fail);
|
||
|
mpp->fast_io_fail = MP_FAST_IO_FAIL_OFF;
|
||
|
}
|
||
|
- if (!mpp->dev_loss && mpp->fast_io_fail == MP_FAST_IO_FAIL_UNSET)
|
||
|
+ if (!mpp->dev_loss && mpp->fast_io_fail == MP_FAST_IO_FAIL_UNSET &&
|
||
|
+ mpp->eh_deadline == EH_DEADLINE_UNSET)
|
||
|
return 0;
|
||
|
|
||
|
vector_foreach_slot(mpp->paths, pp, i) {
|
||
|
- if (pp->sg_id.proto_id == SCSI_PROTOCOL_FCP)
|
||
|
- sysfs_set_rport_tmo(mpp, pp);
|
||
|
- if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI)
|
||
|
- sysfs_set_session_tmo(mpp, pp);
|
||
|
- if (pp->sg_id.proto_id == SCSI_PROTOCOL_SAS)
|
||
|
- sysfs_set_nexus_loss_tmo(mpp, pp);
|
||
|
+ if (pp->bus != SYSFS_BUS_SCSI)
|
||
|
+ continue;
|
||
|
+
|
||
|
+ if (mpp->dev_loss ||
|
||
|
+ mpp->fast_io_fail != MP_FAST_IO_FAIL_UNSET) {
|
||
|
+ if (pp->sg_id.proto_id == SCSI_PROTOCOL_FCP)
|
||
|
+ sysfs_set_rport_tmo(mpp, pp);
|
||
|
+ else if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI)
|
||
|
+ sysfs_set_session_tmo(mpp, pp);
|
||
|
+ else if (pp->sg_id.proto_id == SCSI_PROTOCOL_SAS)
|
||
|
+ sysfs_set_nexus_loss_tmo(mpp, pp);
|
||
|
+ }
|
||
|
+ sysfs_set_eh_deadline(mpp, pp);
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
|
||
|
index 725db2b1..1150cfe8 100644
|
||
|
--- a/libmultipath/propsel.c
|
||
|
+++ b/libmultipath/propsel.c
|
||
|
@@ -776,6 +776,23 @@ out:
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+int select_eh_deadline(struct config *conf, struct multipath *mp)
|
||
|
+{
|
||
|
+ const char *origin;
|
||
|
+ char buff[12];
|
||
|
+
|
||
|
+ mp_set_ovr(eh_deadline);
|
||
|
+ mp_set_hwe(eh_deadline);
|
||
|
+ mp_set_conf(eh_deadline);
|
||
|
+ mp->eh_deadline = EH_DEADLINE_UNSET;
|
||
|
+ /* not changing sysfs in default cause, so don't print anything */
|
||
|
+ return 0;
|
||
|
+out:
|
||
|
+ print_undef_off_zero(buff, 12, mp->eh_deadline);
|
||
|
+ condlog(3, "%s: eh_deadline = %s %s", mp->alias, buff, origin);
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
int select_flush_on_last_del(struct config *conf, struct multipath *mp)
|
||
|
{
|
||
|
const char *origin;
|
||
|
diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h
|
||
|
index 3d6edd8a..a68bacf0 100644
|
||
|
--- a/libmultipath/propsel.h
|
||
|
+++ b/libmultipath/propsel.h
|
||
|
@@ -17,6 +17,7 @@ int select_uid(struct config *conf, struct multipath *mp);
|
||
|
int select_gid(struct config *conf, struct multipath *mp);
|
||
|
int select_fast_io_fail(struct config *conf, struct multipath *mp);
|
||
|
int select_dev_loss(struct config *conf, struct multipath *mp);
|
||
|
+int select_eh_deadline(struct config *conf, struct multipath *mp);
|
||
|
int select_reservation_key(struct config *conf, struct multipath *mp);
|
||
|
int select_retain_hwhandler (struct config *conf, struct multipath * mp);
|
||
|
int select_detect_prio(struct config *conf, struct path * pp);
|
||
|
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
|
||
|
index 29209984..65542dea 100644
|
||
|
--- a/libmultipath/structs.h
|
||
|
+++ b/libmultipath/structs.h
|
||
|
@@ -246,6 +246,12 @@ enum fast_io_fail_states {
|
||
|
MP_FAST_IO_FAIL_ZERO = UOZ_ZERO,
|
||
|
};
|
||
|
|
||
|
+enum eh_deadline_states {
|
||
|
+ EH_DEADLINE_UNSET = UOZ_UNDEF,
|
||
|
+ EH_DEADLINE_OFF = UOZ_OFF,
|
||
|
+ EH_DEADLINE_ZERO = UOZ_ZERO,
|
||
|
+};
|
||
|
+
|
||
|
struct vpd_vendor_page {
|
||
|
int pg;
|
||
|
const char *name;
|
||
|
@@ -366,6 +372,7 @@ struct multipath {
|
||
|
int ghost_delay;
|
||
|
int ghost_delay_tick;
|
||
|
unsigned int dev_loss;
|
||
|
+ int eh_deadline;
|
||
|
uid_t uid;
|
||
|
gid_t gid;
|
||
|
mode_t mode;
|
||
|
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
|
||
|
index 6dc26f10..60954574 100644
|
||
|
--- a/multipath/multipath.conf.5
|
||
|
+++ b/multipath/multipath.conf.5
|
||
|
@@ -700,6 +700,22 @@ The default is: \fB600\fR
|
||
|
.
|
||
|
.
|
||
|
.TP
|
||
|
+.B eh_deadline
|
||
|
+Specify the maximum number of seconds the SCSI layer will spend doing error
|
||
|
+handling when scsi devices fail. After this timeout the scsi layer will perform
|
||
|
+a full HBA reset. Setting this may be necessary in cases where the rport is
|
||
|
+never lost, so \fIfast_io_fail_tmo\fR and \fIdev_loss_tmo\fR will never
|
||
|
+trigger, but (frequently do to load) scsi commands still hang. \fBNote:\fR when
|
||
|
+the scsi error handler performs the HBA reset, all target paths on that HBA
|
||
|
+will be affected. eh_deadline should only be set in cases where all targets on
|
||
|
+the affected HBAs are multipathed.
|
||
|
+.RS
|
||
|
+.TP
|
||
|
+The default is: \fB<unset>\fR
|
||
|
+.RE
|
||
|
+.
|
||
|
+.
|
||
|
+.TP
|
||
|
.B bindings_file
|
||
|
The full pathname of the binding file to be used when the user_friendly_names
|
||
|
option is set.
|
||
|
--
|
||
|
2.17.2
|
||
|
|