diff --git a/0100-libmultipath-avoid-temporarily-enabling-queueing-on-.patch b/0100-libmultipath-avoid-temporarily-enabling-queueing-on-.patch new file mode 100644 index 0000000..2784399 --- /dev/null +++ b/0100-libmultipath-avoid-temporarily-enabling-queueing-on-.patch @@ -0,0 +1,42 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Mon, 18 Dec 2023 16:30:42 -0500 +Subject: [PATCH] libmultipath: avoid temporarily enabling queueing on reload + +Instead of always enabling queueing when a map is reloaded with +no_path_retry set to a positive number, check if the map has timed out +in recovery mode, and only enable queueing if it has not. This saves +multipathd from having to disable queueing on the map immediately after +the reload. + +Signed-off-by: Benjamin Marzinski +--- + libmultipath/dmparser.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/libmultipath/dmparser.c b/libmultipath/dmparser.c +index 16377c54..1ea2d619 100644 +--- a/libmultipath/dmparser.c ++++ b/libmultipath/dmparser.c +@@ -61,9 +61,19 @@ int assemble_map(struct multipath *mp, char **params) + nr_priority_groups = VECTOR_SIZE(mp->pg); + initial_pg_nr = (nr_priority_groups ? mp->bestpg : 0); + +- if (mp->no_path_retry != NO_PATH_RETRY_UNDEF && +- mp->no_path_retry != NO_PATH_RETRY_FAIL) { ++ switch (mp->no_path_retry) { ++ case NO_PATH_RETRY_UNDEF: ++ case NO_PATH_RETRY_FAIL: ++ break; ++ default: ++ /* don't enable queueing if no_path_retry has timed out */ ++ if (mp->in_recovery && mp->retry_tick == 0 && ++ count_active_paths(mp) == 0) ++ break; ++ /* fallthrough */ ++ case NO_PATH_RETRY_QUEUE: + add_feature(&mp->features, no_path_retry); ++ break; + } + if (mp->retain_hwhandler == RETAIN_HWHANDLER_ON && + get_linux_version_code() < KERNEL_VERSION(4, 3, 0)) diff --git a/0101-multipathd-Make-sure-to-disable-queueing-if-recovery.patch b/0101-multipathd-Make-sure-to-disable-queueing-if-recovery.patch new file mode 100644 index 0000000..62c3ed0 --- /dev/null +++ b/0101-multipathd-Make-sure-to-disable-queueing-if-recovery.patch @@ -0,0 +1,76 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Wed, 22 Nov 2023 16:41:22 -0500 +Subject: [PATCH] multipathd: Make sure to disable queueing if recovery has + failed. + +If a multipath device has no_path_retry set to a number and has lost all +paths, gone into recovery mode, and timed out, it will disable +queue_if_no_paths. After that, if the device is reloaded by multipath +outside of multipathd, it will re-enable queuieng on the device. When +multipathd later calls set_no_path_retry() to update the queueing state, +it will not disable queue_if_no_paths, since the device is still in the +recovery state, so it believes no work needs to be done. The device will +remain in the recovery state, with retry_ticks at 0, and queueing +enabled, even though there are no usable paths. + +To fix this, in set_no_path_retry(), if no_path_retry is set to a number +and the device is queueing but it is in recovery mode and out of +retries with no usable paths, manually disable queue_if_no_path. + +Signed-off-by: Benjamin Marzinski +--- + libmultipath/structs_vec.c | 26 ++++++++++++++++++++++++-- + 1 file changed, 24 insertions(+), 2 deletions(-) + +diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c +index 4a32b405..86ad89ca 100644 +--- a/libmultipath/structs_vec.c ++++ b/libmultipath/structs_vec.c +@@ -614,8 +614,19 @@ void __set_no_path_retry(struct multipath *mpp, bool check_features) + !mpp->in_recovery) + dm_queue_if_no_path(mpp->alias, 1); + leave_recovery_mode(mpp); +- } else +- enter_recovery_mode(mpp); ++ } else { ++ /* ++ * If in_recovery is set, enter_recovery_mode does ++ * nothing. If the device is already in recovery ++ * mode and has already timed out, manually call ++ * dm_queue_if_no_path to stop it from queueing. ++ */ ++ if ((!check_features || is_queueing) && ++ mpp->in_recovery && mpp->retry_tick == 0) ++ dm_queue_if_no_path(mpp->alias, 0); ++ if (pathcount(mpp, PATH_PENDING) == 0) ++ enter_recovery_mode(mpp); ++ } + break; + } + } +@@ -761,6 +772,11 @@ int verify_paths(struct multipath *mpp) + * -1 (FAIL) : fail_if_no_path + * 0 (UNDEF) : nothing + * >0 : queue_if_no_path enabled, turned off after polling n times ++ * ++ * Since this will only be called when fail_path(), update_multipath(), or ++ * io_err_stat_handle_pathfail() are failing a previously active path, the ++ * device cannot already be in recovery mode, so there will never be a need ++ * to disable queueing here. + */ + void update_queue_mode_del_path(struct multipath *mpp) + { +@@ -774,6 +790,12 @@ void update_queue_mode_del_path(struct multipath *mpp) + condlog(2, "%s: remaining active paths: %d", mpp->alias, active); + } + ++/* ++ * Since this will only be called from check_path() -> reinstate_path() after ++ * the queueing state has been updated in set_no_path_retry, this does not ++ * need to worry about modifying the queueing state except when actually ++ * leaving recovery mode. ++ */ + void update_queue_mode_add_path(struct multipath *mpp) + { + int active = count_active_paths(mpp); diff --git a/0102-multipathd-remove-nopath-flushing-code-from-flush_ma.patch b/0102-multipathd-remove-nopath-flushing-code-from-flush_ma.patch new file mode 100644 index 0000000..ef04153 --- /dev/null +++ b/0102-multipathd-remove-nopath-flushing-code-from-flush_ma.patch @@ -0,0 +1,131 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Wed, 6 Dec 2023 17:22:02 -0500 +Subject: [PATCH] multipathd: remove nopath flushing code from flush_map() + +Instead of flush_map() handling both user requested flushes and +automatic flushes when the last path has been deleted, make +flush_map_nopaths() handle the automatic flushes itself, since a later +patch will change the behavior of flush_map(). + +Reviewed-by: Martin Wilck +Signed-off-by: Benjamin Marzinski +--- + multipathd/cli_handlers.c | 2 +- + multipathd/main.c | 45 +++++++++++++++++---------------------- + multipathd/main.h | 2 +- + 3 files changed, 21 insertions(+), 28 deletions(-) + +diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c +index 53bebc8d..f04fb558 100644 +--- a/multipathd/cli_handlers.c ++++ b/multipathd/cli_handlers.c +@@ -796,7 +796,7 @@ cli_del_maps (void *v, char **reply, int *len, void *data) + + condlog(2, "remove maps (operator)"); + vector_foreach_slot(vecs->mpvec, mpp, i) { +- if (flush_map(mpp, vecs, 0)) ++ if (flush_map(mpp, vecs)) + ret++; + else + i--; +diff --git a/multipathd/main.c b/multipathd/main.c +index 6d1a5e4e..1b5f82e7 100644 +--- a/multipathd/main.c ++++ b/multipathd/main.c +@@ -490,12 +490,11 @@ int update_multipath (struct vectors *vecs, char *mapname, int reset) + + static bool + flush_map_nopaths(struct multipath *mpp, struct vectors *vecs) { +- char alias[WWID_SIZE]; ++ int r; + + /* + * flush_map will fail if the device is open + */ +- strlcpy(alias, mpp->alias, WWID_SIZE); + if (mpp->flush_on_last_del == FLUSH_ENABLED) { + condlog(2, "%s Last path deleted, disabling queueing", + mpp->alias); +@@ -505,11 +504,20 @@ flush_map_nopaths(struct multipath *mpp, struct vectors *vecs) { + mpp->stat_map_failures++; + dm_queue_if_no_path(mpp->alias, 0); + } +- if (!flush_map(mpp, vecs, 1)) { +- condlog(2, "%s: removed map after removing all paths", alias); +- return true; ++ r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove); ++ if (r) { ++ if (r == 1) ++ condlog(0, "%s: can't flush", mpp->alias); ++ else { ++ condlog(2, "%s: devmap deferred remove", mpp->alias); ++ mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS; ++ } ++ return false; + } +- return false; ++ ++ condlog(2, "%s: map flushed after removing all paths", mpp->alias); ++ remove_map_and_stop_waiter(mpp, vecs); ++ return true; + } + + static void +@@ -685,30 +693,15 @@ sync_maps_state(vector mpvec) + } + + int +-flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths) ++flush_map(struct multipath * mpp, struct vectors * vecs) + { +- int r; +- +- if (nopaths) +- r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove); +- else +- r = dm_flush_map(mpp->alias); +- /* +- * clear references to this map before flushing so we can ignore +- * the spurious uevent we may generate with the dm_flush_map call below +- */ ++ int r = dm_flush_map(mpp->alias); + if (r) { +- if (r == 1) +- condlog(0, "%s: can't flush", mpp->alias); +- else { +- condlog(2, "%s: devmap deferred remove", mpp->alias); +- mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS; +- } ++ condlog(0, "%s: can't flush", mpp->alias); + return r; + } +- else +- condlog(2, "%s: map flushed", mpp->alias); + ++ condlog(2, "%s: map flushed", mpp->alias); + remove_map_and_stop_waiter(mpp, vecs); + + return 0; +@@ -866,7 +859,7 @@ ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs) + mpp->alias, mpp->dmi->minor, minor); + return 1; + } +- return flush_map(mpp, vecs, 0); ++ return flush_map(mpp, vecs); + } + + static void +diff --git a/multipathd/main.h b/multipathd/main.h +index dbae4935..4138faa4 100644 +--- a/multipathd/main.h ++++ b/multipathd/main.h +@@ -43,7 +43,7 @@ int ev_add_path (struct path *, struct vectors *, int); + int ev_remove_path (struct path *, struct vectors *, int); + int ev_add_map (char *, const char *, struct vectors *); + int ev_remove_map (char *, char *, int, struct vectors *); +-int flush_map(struct multipath *, struct vectors *, int); ++int flush_map(struct multipath *, struct vectors *); + int set_config_state(enum daemon_status); + void * mpath_alloc_prin_response(int prin_sa); + int prin_do_scsi_ioctl(char *, int rq_servact, struct prin_resp * resp, diff --git a/0103-multipathd-make-flush_map-delete-maps-like-the-multi.patch b/0103-multipathd-make-flush_map-delete-maps-like-the-multi.patch new file mode 100644 index 0000000..d3fa6e3 --- /dev/null +++ b/0103-multipathd-make-flush_map-delete-maps-like-the-multi.patch @@ -0,0 +1,39 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Thu, 7 Dec 2023 11:23:18 -0500 +Subject: [PATCH] multipathd: make flush_map() delete maps like the multipath + command + +When the multipath command tries to delete a multipath device, it first +disables queueing and then suspends the device to force the IOs to get +flushed. Then it attempts to delete the device and any kpartx +partitions. multipathd, on the other hand, simply tries to delete the +device and kpartx partitions, without disabling queueing or suspending. +If there are no paths but there is outstanding IO, multipathd will hang +trying to delete the last kpartx device. This is because it must be the +last opener of the multipath device (multipath won't try to delete the +device if it has any openers besides the kpartx devices) and the kernel +will not allow the last opener of a block device to close until all the +outstanding IO is flushed. This hang can be avoided if multipathd calls +dm_suspend_and_flush_map() like the multipath command does, instead of +dm_flush_map(). + +Reviewed-by: Martin Wilck +Signed-off-by: Benjamin Marzinski +--- + multipathd/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/multipathd/main.c b/multipathd/main.c +index 1b5f82e7..3eeca82f 100644 +--- a/multipathd/main.c ++++ b/multipathd/main.c +@@ -695,7 +695,7 @@ sync_maps_state(vector mpvec) + int + flush_map(struct multipath * mpp, struct vectors * vecs) + { +- int r = dm_flush_map(mpp->alias); ++ int r = dm_suspend_and_flush_map(mpp->alias, 0); + if (r) { + condlog(0, "%s: can't flush", mpp->alias); + return r; diff --git a/0104-multipathd-disable-queueing-when-removing-unknown-ma.patch b/0104-multipathd-disable-queueing-when-removing-unknown-ma.patch new file mode 100644 index 0000000..51d68ae --- /dev/null +++ b/0104-multipathd-disable-queueing-when-removing-unknown-ma.patch @@ -0,0 +1,83 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Fri, 8 Dec 2023 14:50:31 -0500 +Subject: [PATCH] multipathd: disable queueing when removing unknown maps + +Make cli_del_maps() call dm_suspend_and_flush_map() for the unknown +multipath devices as well. + +After this change, all callers of cli_del_maps() set need_suspend, so +simplify dm_flush_maps(). + +Signed-off-by: Benjamin Marzinski +--- + libmultipath/devmapper.c | 7 ++----- + libmultipath/devmapper.h | 2 +- + multipath/main.c | 2 +- + multipathd/cli_handlers.c | 2 +- + 4 files changed, 5 insertions(+), 8 deletions(-) + +diff --git a/libmultipath/devmapper.c b/libmultipath/devmapper.c +index 4b2e8a15..f9de3358 100644 +--- a/libmultipath/devmapper.c ++++ b/libmultipath/devmapper.c +@@ -1145,7 +1145,7 @@ dm_flush_map_nopaths(const char * mapname, + + #endif + +-int dm_flush_maps (int need_suspend, int retries) ++int dm_flush_maps (int retries) + { + int r = 1; + struct dm_task *dmt; +@@ -1170,10 +1170,7 @@ int dm_flush_maps (int need_suspend, int retries) + goto out; + + do { +- if (need_suspend) +- r |= dm_suspend_and_flush_map(names->name, retries); +- else +- r |= dm_flush_map(names->name); ++ r |= dm_suspend_and_flush_map(names->name, retries); + next = names->next; + names = (void *) names + next; + } while (next); +diff --git a/libmultipath/devmapper.h b/libmultipath/devmapper.h +index 45a676de..808da28d 100644 +--- a/libmultipath/devmapper.h ++++ b/libmultipath/devmapper.h +@@ -55,7 +55,7 @@ int dm_flush_map_nopaths(const char * mapname, int deferred_remove); + #define dm_suspend_and_flush_map(mapname, retries) \ + _dm_flush_map(mapname, 1, 0, 1, retries) + int dm_cancel_deferred_remove(struct multipath *mpp); +-int dm_flush_maps (int need_suspend, int retries); ++int dm_flush_maps (int retries); + int dm_fail_path(const char * mapname, char * path); + int dm_reinstate_path(const char * mapname, char * path); + int dm_queue_if_no_path(const char *mapname, int enable); +diff --git a/multipath/main.c b/multipath/main.c +index f1077421..e296be6e 100644 +--- a/multipath/main.c ++++ b/multipath/main.c +@@ -1126,7 +1126,7 @@ main (int argc, char *argv[]) + goto out; + } + else if (cmd == CMD_FLUSH_ALL) { +- r = dm_flush_maps(1, retries) ? RTVL_FAIL : RTVL_OK; ++ r = dm_flush_maps(retries) ? RTVL_FAIL : RTVL_OK; + goto out; + } + while ((r = configure(conf, cmd, dev_type, dev)) == RTVL_RETRY) +diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c +index f04fb558..aca8e2df 100644 +--- a/multipathd/cli_handlers.c ++++ b/multipathd/cli_handlers.c +@@ -802,7 +802,7 @@ cli_del_maps (void *v, char **reply, int *len, void *data) + i--; + } + /* flush any multipath maps that aren't currently known by multipathd */ +- ret |= dm_flush_maps(0, 0); ++ ret |= dm_flush_maps(0); + return ret; + } + diff --git a/device-mapper-multipath.spec b/device-mapper-multipath.spec index ed73eb1..b2854d7 100644 --- a/device-mapper-multipath.spec +++ b/device-mapper-multipath.spec @@ -1,6 +1,6 @@ Name: device-mapper-multipath Version: 0.8.7 -Release: 25%{?dist} +Release: 26%{?dist} Summary: Tools to manage multipath devices using device-mapper License: GPLv2 URL: http://christophe.varoqui.free.fr/ @@ -109,6 +109,11 @@ Patch0096: 0096-multipathd-check-and-update-all-paths-when-in-cli_re.patch Patch0097: 0097-multipathd-move-post-reloading-commands-into-resize_.patch Patch0098: 0098-multipathd-move-resize_map-to-multipathd-main.c.patch Patch0099: 0099-multipathd-Add-auto_resize-config-option.patch +Patch0100: 0100-libmultipath-avoid-temporarily-enabling-queueing-on-.patch +Patch0101: 0101-multipathd-Make-sure-to-disable-queueing-if-recovery.patch +Patch0102: 0102-multipathd-remove-nopath-flushing-code-from-flush_ma.patch +Patch0103: 0103-multipathd-make-flush_map-delete-maps-like-the-multi.patch +Patch0104: 0104-multipathd-disable-queueing-when-removing-unknown-ma.patch # runtime @@ -312,6 +317,19 @@ fi %{_pkgconfdir}/libdmmp.pc %changelog +* Wed Jan 3 2024 Benjamin Marzinski - 0.8.7-26 +- Add 0100-libmultipath-avoid-temporarily-enabling-queueing-on-.patch +- Add 0101-multipathd-Make-sure-to-disable-queueing-if-recovery.patch + * Fixes RHEL-17234 ("RHEL9 dm-multipath no_path_retry [retry number] is + undone if paths are later lost for an open map.") +- Add 0102-multipathd-remove-nopath-flushing-code-from-flush_ma.patch +- Add 0103-multipathd-make-flush_map-delete-maps-like-the-multi.patch +- Add 0104-multipathd-disable-queueing-when-removing-unknown-ma.patch + * Fixes RHEL-4998 ("When remove external lun from host, rescan lun status + will cause the OS hang and no response") +- Resolves: RHEL-4998 +- Resolves: RHEL-17234 + * Mon Nov 20 2023 Benjamin Marzinski - 0.8.7-25 - Add 0094-libmultipath-Add-max_retries-config-option.patch * Fixes RHEL-1729 ("Allow multipathd to set the max_retries of the