device-mapper-multipath/SOURCES/0066-multipathd-add-recheck...

430 lines
15 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Wed, 24 Feb 2021 00:33:22 -0600
Subject: [PATCH] multipathd: add recheck_wwid option to verify the path wwid
There are cases where the wwid of a path changes due to LUN remapping
without triggering uevent for the changed path. Multipathd has no method
for trying to catch these cases, and corruption has resulted because of
it.
In order to have a better chance at catching these cases, multipath now
has a recheck_wwid option. If this is set to "yes", when a failed path
has become active again, multipathd will recheck its wwid. If multipathd
notices that a path's wwid has changed, it will remove and re-add the
path, just like the existing wwid checking code for change events does.
In cases where the no uevent occurs, both the udev database entry and
sysfs will have the old wwid, so the only way to get a current wwid is
to ask the device directly. Currently multipath only has code to
directly get the wwid for scsi devices, so this option only effects scsi
devices, and they must be configured to be able to use the uid_fallback
methods. To make sure both the sysfs and udev database values are
updated, multipathd triggers a both a rescan of the device and a udev
add event.
Co-developed-by: Chongyun Wu <wucy11@chinatelecom.cn>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>Reviewed-by: Martin Wilck <mwilck@suse.com>
---
libmultipath/config.c | 2 +
libmultipath/config.h | 2 +
libmultipath/configure.c | 4 +-
libmultipath/configure.h | 2 +
libmultipath/defaults.h | 1 +
libmultipath/dict.c | 11 ++++++
libmultipath/discovery.c | 3 +-
libmultipath/discovery.h | 1 +
libmultipath/propsel.c | 21 ++++++++++
libmultipath/propsel.h | 1 +
libmultipath/structs.h | 7 ++++
multipath/multipath.conf.5 | 14 +++++++
multipathd/cli_handlers.c | 9 +++++
multipathd/main.c | 78 ++++++++++++++++++++++++++++++++++++++
multipathd/main.h | 2 +
15 files changed, 155 insertions(+), 3 deletions(-)
diff --git a/libmultipath/config.c b/libmultipath/config.c
index dd645f17..abbddaf1 100644
--- a/libmultipath/config.c
+++ b/libmultipath/config.c
@@ -371,6 +371,7 @@ merge_hwe (struct hwentry * dst, struct hwentry * src)
merge_num(max_sectors_kb);
merge_num(ghost_delay);
merge_num(all_tg_pt);
+ merge_num(recheck_wwid);
merge_num(vpd_vendor_id);
merge_num(san_path_err_threshold);
merge_num(san_path_err_forget_rate);
@@ -762,6 +763,7 @@ load_config (char * file)
conf->remove_retries = 0;
conf->ghost_delay = DEFAULT_GHOST_DELAY;
conf->all_tg_pt = DEFAULT_ALL_TG_PT;
+ conf->recheck_wwid = DEFAULT_RECHECK_WWID;
/*
* preload default hwtable
*/
diff --git a/libmultipath/config.h b/libmultipath/config.h
index a22c1b4e..e2e3f143 100644
--- a/libmultipath/config.h
+++ b/libmultipath/config.h
@@ -84,6 +84,7 @@ struct hwentry {
int ghost_delay;
int all_tg_pt;
int vpd_vendor_id;
+ int recheck_wwid;
char * bl_product;
};
@@ -188,6 +189,7 @@ struct config {
int skip_delegate;
unsigned int version[3];
unsigned int sequence_nr;
+ int recheck_wwid;
char * multipath_dir;
char * selector;
diff --git a/libmultipath/configure.c b/libmultipath/configure.c
index a6893d8d..f24d9283 100644
--- a/libmultipath/configure.c
+++ b/libmultipath/configure.c
@@ -506,8 +506,8 @@ trigger_udev_change(const struct multipath *mpp)
udev_device_unref(udd);
}
-static void trigger_partitions_udev_change(struct udev_device *dev,
- const char *action, int len)
+void trigger_partitions_udev_change(struct udev_device *dev,
+ const char *action, int len)
{
struct udev_enumerate *part_enum;
struct udev_list_entry *item;
diff --git a/libmultipath/configure.h b/libmultipath/configure.h
index 0e33bf40..81090dd4 100644
--- a/libmultipath/configure.h
+++ b/libmultipath/configure.h
@@ -57,3 +57,5 @@ int get_refwwid (enum mpath_cmds cmd, char * dev, enum devtypes dev_type,
int reload_map(struct vectors *vecs, struct multipath *mpp, int refresh, int is_daemon);
struct udev_device *get_udev_device(const char *dev, enum devtypes dev_type);
void trigger_paths_udev_change(struct multipath *mpp, bool is_mpath);
+void trigger_partitions_udev_change(struct udev_device *dev, const char *action,
+ int len);
diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h
index 52fe05b9..f1cb000d 100644
--- a/libmultipath/defaults.h
+++ b/libmultipath/defaults.h
@@ -50,6 +50,7 @@
#define DEFAULT_FIND_MULTIPATHS_TIMEOUT -10
#define DEFAULT_UNKNOWN_FIND_MULTIPATHS_TIMEOUT 1
#define DEFAULT_ALL_TG_PT ALL_TG_PT_OFF
+#define DEFAULT_RECHECK_WWID RECHECK_WWID_OFF
/* Enable all foreign libraries by default */
#define DEFAULT_ENABLE_FOREIGN ""
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
index 8fd91d8c..13698b76 100644
--- a/libmultipath/dict.c
+++ b/libmultipath/dict.c
@@ -1413,6 +1413,14 @@ declare_hw_snprint(all_tg_pt, print_yes_no_undef)
declare_def_handler(marginal_pathgroups, set_yes_no)
declare_def_snprint(marginal_pathgroups, print_yes_no)
+declare_def_handler(recheck_wwid, set_yes_no_undef)
+declare_def_snprint_defint(recheck_wwid, print_yes_no_undef, DEFAULT_RECHECK_WWID)
+declare_ovr_handler(recheck_wwid, set_yes_no_undef)
+declare_ovr_snprint(recheck_wwid, print_yes_no_undef)
+declare_hw_handler(recheck_wwid, set_yes_no_undef)
+declare_hw_snprint(recheck_wwid, print_yes_no_undef)
+
+
static int
def_uxsock_timeout_handler(struct config *conf, vector strvec)
{
@@ -1824,6 +1832,7 @@ init_keywords(vector keywords)
install_keyword("enable_foreign", &def_enable_foreign_handler,
&snprint_def_enable_foreign);
install_keyword("marginal_pathgroups", &def_marginal_pathgroups_handler, &snprint_def_marginal_pathgroups);
+ install_keyword("recheck_wwid", &def_recheck_wwid_handler, &snprint_def_recheck_wwid);
__deprecated install_keyword("default_selector", &def_selector_handler, NULL);
__deprecated install_keyword("default_path_grouping_policy", &def_pgpolicy_handler, NULL);
__deprecated install_keyword("default_uid_attribute", &def_uid_attribute_handler, NULL);
@@ -1913,6 +1922,7 @@ init_keywords(vector keywords)
install_keyword("ghost_delay", &hw_ghost_delay_handler, &snprint_hw_ghost_delay);
install_keyword("all_tg_pt", &hw_all_tg_pt_handler, &snprint_hw_all_tg_pt);
install_keyword("vpd_vendor", &hw_vpd_vendor_handler, &snprint_hw_vpd_vendor);
+ install_keyword("recheck_wwid", &hw_recheck_wwid_handler, &snprint_hw_recheck_wwid);
install_sublevel_end();
install_keyword_root("overrides", &overrides_handler);
@@ -1954,6 +1964,7 @@ init_keywords(vector keywords)
install_keyword("max_sectors_kb", &ovr_max_sectors_kb_handler, &snprint_ovr_max_sectors_kb);
install_keyword("ghost_delay", &ovr_ghost_delay_handler, &snprint_ovr_ghost_delay);
install_keyword("all_tg_pt", &ovr_all_tg_pt_handler, &snprint_ovr_all_tg_pt);
+ install_keyword("recheck_wwid", &ovr_recheck_wwid_handler, &snprint_ovr_recheck_wwid);
install_keyword_root("multipaths", &multipaths_handler);
install_keyword_multi("multipath", &multipath_handler, NULL);
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
index 126a70b3..bc267609 100644
--- a/libmultipath/discovery.c
+++ b/libmultipath/discovery.c
@@ -2040,7 +2040,7 @@ static ssize_t uid_fallback(struct path *pp, int path_state,
return len;
}
-static bool has_uid_fallback(struct path *pp)
+bool has_uid_fallback(struct path *pp)
{
/*
* Falling back to direct WWID determination is dangerous
@@ -2072,6 +2072,7 @@ get_uid (struct path * pp, int path_state, struct udev_device *udev,
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
select_getuid(conf, pp);
+ select_recheck_wwid(conf, pp);
pthread_cleanup_pop(1);
}
diff --git a/libmultipath/discovery.h b/libmultipath/discovery.h
index d3193daf..a5446b4d 100644
--- a/libmultipath/discovery.h
+++ b/libmultipath/discovery.h
@@ -54,6 +54,7 @@ ssize_t sysfs_get_inquiry(struct udev_device *udev,
unsigned char *buff, size_t len);
int sysfs_get_asymmetric_access_state(struct path *pp,
char *buff, int buflen);
+bool has_uid_fallback(struct path *pp);
int get_uid(struct path * pp, int path_state, struct udev_device *udev,
int allow_fallback);
bool is_vpd_page_supported(int fd, int pg);
diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
index 1150cfe8..127b3370 100644
--- a/libmultipath/propsel.c
+++ b/libmultipath/propsel.c
@@ -581,6 +581,27 @@ out:
return 0;
}
+/* must be called after select_getuid */
+int select_recheck_wwid(struct config *conf, struct path * pp)
+{
+ const char *origin;
+
+ pp_set_ovr(recheck_wwid);
+ pp_set_hwe(recheck_wwid);
+ pp_set_conf(recheck_wwid);
+ pp_set_default(recheck_wwid, DEFAULT_RECHECK_WWID);
+out:
+ if (pp->recheck_wwid == RECHECK_WWID_ON &&
+ (pp->bus != SYSFS_BUS_SCSI || pp->getuid != NULL ||
+ !has_uid_fallback(pp))) {
+ pp->recheck_wwid = RECHECK_WWID_OFF;
+ origin = "(setting: unsupported by device type/config)";
+ }
+ condlog(3, "%s: recheck_wwid = %i %s", pp->dev, pp->recheck_wwid,
+ origin);
+ return 0;
+}
+
void
detect_prio(struct config *conf, struct path * pp)
{
diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h
index a68bacf0..72a7e33c 100644
--- a/libmultipath/propsel.h
+++ b/libmultipath/propsel.h
@@ -7,6 +7,7 @@ int select_features (struct config *conf, struct multipath * mp);
int select_hwhandler (struct config *conf, struct multipath * mp);
int select_checker(struct config *conf, struct path *pp);
int select_getuid (struct config *conf, struct path * pp);
+int select_recheck_wwid(struct config *conf, struct path * pp);
int select_prio (struct config *conf, struct path * pp);
int select_find_multipaths_timeout(struct config *conf, struct path *pp);
int select_no_path_retry(struct config *conf, struct multipath *mp);
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 65542dea..a5dbad5b 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -252,6 +252,12 @@ enum eh_deadline_states {
EH_DEADLINE_ZERO = UOZ_ZERO,
};
+enum recheck_wwid_states {
+ RECHECK_WWID_UNDEF = YNU_UNDEF,
+ RECHECK_WWID_OFF = YNU_NO,
+ RECHECK_WWID_ON = YNU_YES,
+};
+
struct vpd_vendor_page {
int pg;
const char *name;
@@ -326,6 +332,7 @@ struct path {
int find_multipaths_timeout;
int marginal;
int vpd_vendor_id;
+ int recheck_wwid;
/* configlet pointers */
vector hwe;
struct gen_path generic_path;
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
index a5686090..6da15aad 100644
--- a/multipath/multipath.conf.5
+++ b/multipath/multipath.conf.5
@@ -1256,6 +1256,20 @@ The default is: \fB\(dq\(dq\fR (the empty regular expression)
.RE
.
.
+.TP
+.B recheck_wwid
+If set to \fIyes\fR, when a failed path is restored, its wwid is rechecked. If
+the wwid has changed, the path is removed from the current multipath device,
+and re-added as a new path. Multipathd will also recheck a path's wwid if it is
+manually re-added. This option only works for SCSI devices that are configured
+to use the default uid_attribute, \fIID_SERIAL\fR, or sysfs for getting their
+wwid.
+.RS
+.TP
+The default is \fBno\fR
+.RE
+.
+.
.
.\" ----------------------------------------------------------------------------
diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c
index 782bb003..8b4bd187 100644
--- a/multipathd/cli_handlers.c
+++ b/multipathd/cli_handlers.c
@@ -715,6 +715,15 @@ cli_add_path (void * v, char ** reply, int * len, void * data)
pp = find_path_by_dev(vecs->pathvec, param);
if (pp) {
condlog(2, "%s: path already in pathvec", param);
+
+ if (pp->recheck_wwid == RECHECK_WWID_ON &&
+ check_path_wwid_change(pp)) {
+ condlog(0, "%s: wwid changed. Removing device",
+ pp->dev);
+ handle_path_wwid_change(pp, vecs);
+ return 1;
+ }
+
if (pp->mpp)
return 0;
} else {
diff --git a/multipathd/main.c b/multipathd/main.c
index cc1aeea2..1fbc31eb 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -822,6 +822,73 @@ ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
return flush_map(mpp, vecs, 0);
}
+static void
+rescan_path(struct udev_device *parent)
+{
+ while(parent) {
+ const char *subsys = udev_device_get_subsystem(parent);
+ if (subsys && !strncmp(subsys, "scsi", 4))
+ break;
+ parent = udev_device_get_parent(parent);
+ }
+ if (parent)
+ sysfs_attr_set_value(parent, "rescan", "1", strlen("1"));
+}
+
+void
+handle_path_wwid_change(struct path *pp, struct vectors *vecs)
+{
+ struct udev_device *udd;
+
+ if (!pp || !pp->udev)
+ return;
+
+ udd = udev_device_ref(pp->udev);
+ if (ev_remove_path(pp, vecs, 1) != 0 && pp->mpp) {
+ pp->dmstate = PSTATE_FAILED;
+ dm_fail_path(pp->mpp->alias, pp->dev_t);
+ }
+ rescan_path(udd);
+ sysfs_attr_set_value(udd, "uevent", "add", strlen("add"));
+ trigger_partitions_udev_change(udd, "add", strlen("add"));
+ udev_device_unref(udd);
+}
+
+bool
+check_path_wwid_change(struct path *pp)
+{
+ char wwid[WWID_SIZE];
+ int len = 0;
+ size_t i;
+
+ if (!strlen(pp->wwid))
+ return false;
+
+ /* Get the real fresh device wwid by sgio. sysfs still has old
+ * data, so only get_vpd_sgio will work to get the new wwid */
+ len = get_vpd_sgio(pp->fd, 0x83, 0, wwid, WWID_SIZE);
+
+ if (len <= 0) {
+ condlog(2, "%s: failed to check wwid by sgio: len = %d",
+ pp->dev, len);
+ return false;
+ }
+
+ /*Strip any trailing blanks */
+ for (i = strlen(pp->wwid); i > 0 && pp->wwid[i-1] == ' '; i--);
+ /* no-op */
+ pp->wwid[i] = '\0';
+ condlog(4, "%s: Got wwid %s by sgio", pp->dev, wwid);
+
+ if (strncmp(wwid, pp->wwid, WWID_SIZE)) {
+ condlog(0, "%s: wwid '%s' doesn't match wwid '%s' from device",
+ pp->dev, pp->wwid, wwid);
+ return true;
+ }
+
+ return false;
+}
+
static int
uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
{
@@ -1241,6 +1308,7 @@ uev_update_path (struct uevent *uev, struct vectors * vecs)
condlog(0, "%s: path wwid changed from '%s' to '%s'",
uev->kernel, wwid, pp->wwid);
ev_remove_path(pp, vecs, 1);
+ rescan_path(uev->udev);
needs_reinit = 1;
goto out;
} else {
@@ -2101,6 +2169,16 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
return 0;
set_no_path_retry(pp->mpp);
+ if (pp->recheck_wwid == RECHECK_WWID_ON &&
+ (newstate == PATH_UP || newstate == PATH_GHOST) &&
+ ((pp->state != PATH_UP && pp->state != PATH_GHOST) ||
+ pp->dmstate == PSTATE_FAILED) &&
+ check_path_wwid_change(pp)) {
+ condlog(0, "%s: path wwid change detected. Removing", pp->dev);
+ handle_path_wwid_change(pp, vecs);
+ return 0;
+ }
+
if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
(san_path_check_enabled(pp->mpp) ||
marginal_path_check_enabled(pp->mpp))) {
diff --git a/multipathd/main.h b/multipathd/main.h
index 5dff17e5..8f0028a9 100644
--- a/multipathd/main.h
+++ b/multipathd/main.h
@@ -49,4 +49,6 @@ int update_multipath (struct vectors *vecs, char *mapname, int reset);
int update_path_groups(struct multipath *mpp, struct vectors *vecs,
int refresh);
+void handle_path_wwid_change(struct path *pp, struct vectors *vecs);
+bool check_path_wwid_change(struct path *pp);
#endif /* MAIN_H */
--
2.17.2