diff --git a/SOURCES/0197-multipath-tools-update-NFINIDAT-InfiniBox-config-in-.patch b/SOURCES/0197-multipath-tools-update-NFINIDAT-InfiniBox-config-in-.patch new file mode 100644 index 0000000..6e6747a --- /dev/null +++ b/SOURCES/0197-multipath-tools-update-NFINIDAT-InfiniBox-config-in-.patch @@ -0,0 +1,65 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Xose Vazquez Perez +Date: Sat, 16 Aug 2025 20:16:23 +0200 +Subject: [PATCH] multipath-tools: update NFINIDAT/InfiniBox config in hwtable + +New recommended values for SLES-15, RHEL-8, and Ubuntu-22, or above: +https://lh3.googleusercontent.com/pw/AP1GczMchJ6bcMIohp_g3Ik5DS6BZv_IW0iwaEXf968sJsR8fr_p3mR4ThRpmHpZE_VMnlcV8j0uuuI2kz-eoqekRCUBxyTBKS7n-4WFMsUiecq8i7nIjEuhfZFrV54DBQLDzGt6ofakAAF1L-ZcAuCWW18=w0-h0 + +device { + vendor "NFINIDAT" + product "InfiniBox" + path_grouping_policy "group_by_prio" + path_checker "tur" + features 0 + hardware_handler "1 alua" + prio "alua" + rr_weight "priorities" + no_path_retry "queue" + rr_min_io 1 + rr_min_io_rq 1 + flush_on_last_del "yes" + fast_io_fail_tmo 15 + dev_loss_tmo "infinity" + path_selector "service-time 0" + failback "immediate" + detect_prio "no" + user_friendly_names "no" + } + +Cc: Martin Wilck +Cc: Benjamin Marzinski +Cc: Christophe Varoqui +Cc: DM_DEVEL-ML +Signed-off-by: Xose Vazquez Perez +Reviewed-by: Martin Wilck +Signed-off-by: Benjamin Marzinski +--- + libmultipath/hwtable.c | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +diff --git a/libmultipath/hwtable.c b/libmultipath/hwtable.c +index 2d359829..188ad5e0 100644 +--- a/libmultipath/hwtable.c ++++ b/libmultipath/hwtable.c +@@ -1124,16 +1124,13 @@ static struct hwentry default_hw[] = { + .vendor = "NFINIDAT", + .product = "InfiniBox", + .pgpolicy = GROUP_BY_PRIO, +- .pgfailback = 30, ++ .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, +- .selector = "round-robin 0", +- .rr_weight = RR_WEIGHT_PRIO, +- .no_path_retry = NO_PATH_RETRY_FAIL, +- .minio = 1, +- .minio_rq = 1, ++ .no_path_retry = NO_PATH_RETRY_QUEUE, + .flush_on_last_del = FLUSH_ALWAYS, + .fast_io_fail = 15, +- .dev_loss = 15, ++ .dev_loss = MAX_DEV_LOSS_TMO, ++ .detect_prio = DETECT_PRIO_OFF, + }, + /* + * Kaminario diff --git a/SOURCES/0198-multipathd-print-path-offline-message-even-without-a.patch b/SOURCES/0198-multipathd-print-path-offline-message-even-without-a.patch new file mode 100644 index 0000000..637a4c2 --- /dev/null +++ b/SOURCES/0198-multipathd-print-path-offline-message-even-without-a.patch @@ -0,0 +1,36 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Wed, 21 Jan 2026 16:03:13 -0500 +Subject: [PATCH] multipathd: print path offline message even without a checker + +If a path has a checker selected and is offline, multipathd will print a +"path offline" message. However if the checker isn't selected, for +instance because multipathd was started or reconfigured while the path +was offline, multipathd was not printing the "path offline" message. +Fix that. + +Signed-off-by: Benjamin Marzinski +Reviewed-by: Martin Wilck +--- + multipathd/main.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/multipathd/main.c b/multipathd/main.c +index a85c0db4..9beb0e06 100644 +--- a/multipathd/main.c ++++ b/multipathd/main.c +@@ -97,12 +97,11 @@ mpath_pr_event_handle(struct path *pp, unsigned int nr_keys_needed, + + #define LOG_MSG(lvl, pp) \ + do { \ +- if (pp->mpp && checker_selected(&pp->checker) && \ +- lvl <= libmp_verbosity) { \ ++ if (pp->mpp && lvl <= libmp_verbosity) { \ + if (pp->offline) \ + condlog(lvl, "%s: %s - path offline", \ + pp->mpp->alias, pp->dev); \ +- else { \ ++ else if (checker_selected(&pp->checker)) { \ + const char *__m = \ + checker_message(&pp->checker); \ + \ diff --git a/SOURCES/0199-libmultipath-improve-cleanup-of-uevent-queues-on-exi.patch b/SOURCES/0199-libmultipath-improve-cleanup-of-uevent-queues-on-exi.patch new file mode 100644 index 0000000..ae579aa --- /dev/null +++ b/SOURCES/0199-libmultipath-improve-cleanup-of-uevent-queues-on-exi.patch @@ -0,0 +1,133 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Thu, 9 Sep 2021 23:59:42 +0200 +Subject: [PATCH] libmultipath: improve cleanup of uevent queues on exit + +uevents listed on merge_node must be cleaned up, too. uevents +cancelled while being serviced and temporary queues, likewise. +The global uevq must be cleaned out in the uevent listener thread, +because it might have added events after the dispatcher thread +had already finished. + +Reviewed-by: Benjamin Marzinski +Signed-off-by: Benjamin Marzinski +--- + libmultipath/uevent.c | 49 ++++++++++++++++++++++++++++++++----------- + 1 file changed, 37 insertions(+), 12 deletions(-) + +diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c +index 4265904b..80941f87 100644 +--- a/libmultipath/uevent.c ++++ b/libmultipath/uevent.c +@@ -91,16 +91,25 @@ struct uevent * alloc_uevent (void) + return uev; + } + ++static void uevq_cleanup(struct list_head *tmpq); ++ ++static void cleanup_uev(void *arg) ++{ ++ struct uevent *uev = arg; ++ ++ uevq_cleanup(&uev->merge_node); ++ if (uev->udev) ++ udev_device_unref(uev->udev); ++ free(uev); ++} ++ + static void uevq_cleanup(struct list_head *tmpq) + { + struct uevent *uev, *tmp; + + list_for_each_entry_safe(uev, tmp, tmpq, node) { + list_del_init(&uev->node); +- +- if (uev->udev) +- udev_device_unref(uev->udev); +- FREE(uev); ++ cleanup_uev(uev); + } + } + +@@ -384,14 +393,10 @@ service_uevq(struct list_head *tmpq) + list_for_each_entry_safe(uev, tmp, tmpq, node) { + list_del_init(&uev->node); + ++ pthread_cleanup_push(cleanup_uev, uev); + if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data)) + condlog(0, "uevent trigger error"); +- +- uevq_cleanup(&uev->merge_node); +- +- if (uev->udev) +- udev_device_unref(uev->udev); +- FREE(uev); ++ pthread_cleanup_pop(1); + } + } + +@@ -411,6 +416,18 @@ static void monitor_cleanup(void *arg) + udev_monitor_unref(monitor); + } + ++static void cleanup_uevq(void *arg) ++{ ++ uevq_cleanup(arg); ++} ++ ++static void cleanup_global_uevq(void *arg __attribute__((unused))) ++{ ++ pthread_mutex_lock(uevq_lockp); ++ uevq_cleanup(&uevq); ++ pthread_mutex_unlock(uevq_lockp); ++} ++ + /* + * Service the uevent queue. + */ +@@ -425,6 +442,7 @@ int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data), + while (1) { + LIST_HEAD(uevq_tmp); + ++ pthread_cleanup_push(cleanup_mutex, uevq_lockp); + pthread_mutex_lock(uevq_lockp); + servicing_uev = 0; + /* +@@ -436,14 +454,17 @@ int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data), + } + servicing_uev = 1; + list_splice_init(&uevq, &uevq_tmp); +- pthread_mutex_unlock(uevq_lockp); ++ pthread_cleanup_pop(1); ++ + if (!my_uev_trigger) + break; ++ ++ pthread_cleanup_push(cleanup_uevq, &uevq_tmp); + merge_uevq(&uevq_tmp); + service_uevq(&uevq_tmp); ++ pthread_cleanup_pop(1); + } + condlog(3, "Terminating uev service queue"); +- uevq_cleanup(&uevq); + return 0; + } + +@@ -600,6 +621,8 @@ int uevent_listen(struct udev *udev) + + events = 0; + gettimeofday(&start_time, NULL); ++ pthread_cleanup_push(cleanup_global_uevq, NULL); ++ pthread_cleanup_push(cleanup_uevq, &uevlisten_tmp); + while (1) { + struct uevent *uev; + struct udev_device *dev; +@@ -650,6 +673,8 @@ int uevent_listen(struct udev *udev) + gettimeofday(&start_time, NULL); + timeout = 30; + } ++ pthread_cleanup_pop(1); ++ pthread_cleanup_pop(1); + out: + pthread_cleanup_pop(1); + out_udev: diff --git a/SOURCES/0200-uevent_dispatch-use-while-in-wait-loop.patch b/SOURCES/0200-uevent_dispatch-use-while-in-wait-loop.patch new file mode 100644 index 0000000..b8aed11 --- /dev/null +++ b/SOURCES/0200-uevent_dispatch-use-while-in-wait-loop.patch @@ -0,0 +1,38 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Tue, 29 Mar 2022 16:06:25 +0200 +Subject: [PATCH] uevent_dispatch(): use while in wait loop + +Callers of pthread_cond_wait() should generally use a while loop +to test the condition. Also, remove the misleading comment. +Condition variables aren't unreliable, they're just not strictly +tied to the condition tested. + +Signed-off-by: Martin Wilck +Reviewed-by: Benjamin Marzinski +Signed-off-by: Benjamin Marzinski +--- + libmultipath/uevent.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c +index 80941f87..e3ec1ac1 100644 +--- a/libmultipath/uevent.c ++++ b/libmultipath/uevent.c +@@ -445,13 +445,10 @@ int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data), + pthread_cleanup_push(cleanup_mutex, uevq_lockp); + pthread_mutex_lock(uevq_lockp); + servicing_uev = 0; +- /* +- * Condition signals are unreliable, +- * so make sure we only wait if we have to. +- */ +- if (list_empty(&uevq)) { ++ ++ while (list_empty(&uevq)) + pthread_cond_wait(uev_condp, uevq_lockp); +- } ++ + servicing_uev = 1; + list_splice_init(&uevq, &uevq_tmp); + pthread_cleanup_pop(1); diff --git a/SOURCES/0201-libmultipath-uevent_dispatch-process-uevents-one-by-.patch b/SOURCES/0201-libmultipath-uevent_dispatch-process-uevents-one-by-.patch new file mode 100644 index 0000000..4b14f78 --- /dev/null +++ b/SOURCES/0201-libmultipath-uevent_dispatch-process-uevents-one-by-.patch @@ -0,0 +1,319 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Tue, 29 Mar 2022 18:04:42 +0200 +Subject: [PATCH] libmultipath: uevent_dispatch(): process uevents one by one + +The main rationale for delaying uevents is that the +uevent dispatcher may have to wait for other threads to release the +vecs lock, may the vecs lock for an extended amount of time, and +even sleep occasionally. By delaying them, we have the chance +to accumulate events for the same path device ("filtering") or +WWID ("merging"), thus avoiding duplicate work if we merge these +into one. + +A similar effect can be obtained in the uevent dispatcher itself +by looking for new uevents after each dispatched event, and trying +to merge the newly arrived events with those that remained +in the queue. + +When uevq_work is non-empty and we append a list of new events, +we don't need to check the entire list for filterable and mergeable +uevents. uevq_work had been filtered and merged already. So we just +need to check the newly appended events. These must of course be +checked for merges with earlier events, too. + +We must deal with some special cases here, like previously merged +uevents being filtered later. + +Signed-off-by: Martin Wilck +Reviewed-by: Benjamin Marzinski +Signed-off-by: Benjamin Marzinski +--- + libmultipath/list.h | 53 +++++++++++++++++ + libmultipath/uevent.c | 129 ++++++++++++++++++++++++++++++------------ + 2 files changed, 147 insertions(+), 35 deletions(-) + +diff --git a/libmultipath/list.h b/libmultipath/list.h +index ced021f5..248f72bc 100644 +--- a/libmultipath/list.h ++++ b/libmultipath/list.h +@@ -246,6 +246,35 @@ static inline void list_splice_tail_init(struct list_head *list, + #define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + ++ ++/** ++ * list_pop - unlink and return the first list element ++ * @head: the &struct list_head pointer. ++ */ ++static inline struct list_head *list_pop(struct list_head *head) ++{ ++ struct list_head *tmp; ++ ++ if (list_empty(head)) ++ return NULL; ++ tmp = head->next; ++ list_del_init(tmp); ++ return tmp; ++} ++ ++/** ++ * list_pop_entry - unlink and return the entry of the first list element ++ * @head: the &struct list_head pointer. ++ * @type: the type of the struct this is embedded in. ++ * @member: the name of the list_struct within the struct. ++ */ ++#define list_pop_entry(head, type, member) \ ++({ \ ++ struct list_head *__h = list_pop(head); \ ++ \ ++ (__h ? container_of(__h, type, member) : NULL); \ ++}) ++ + /** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. +@@ -334,6 +363,30 @@ static inline void list_splice_tail_init(struct list_head *list, + &pos->member != (head); \ + pos = n, n = list_entry(n->member.prev, typeof(*n), member)) + ++/** ++ * list_for_some_entry - iterate list from the given begin node to the given end node ++ * @pos: the type * to use as a loop counter. ++ * @from: the begin node of the iteration. ++ * @to: the end node of the iteration. ++ * @member: the name of the list_struct within the struct. ++ */ ++#define list_for_some_entry(pos, from, to, member) \ ++ for (pos = list_entry((from)->next, typeof(*pos), member); \ ++ &pos->member != (to); \ ++ pos = list_entry(pos->member.next, typeof(*pos), member)) ++ ++/** ++ * list_for_some_entry_reverse - iterate backwards list from the given begin node to the given end node ++ * @pos: the type * to use as a loop counter. ++ * @from: the begin node of the iteration. ++ * @to: the end node of the iteration. ++ * @member: the name of the list_struct within the struct. ++ */ ++#define list_for_some_entry_reverse(pos, from, to, member) \ ++ for (pos = list_entry((from)->prev, typeof(*pos), member); \ ++ &pos->member != (to); \ ++ pos = list_entry(pos->member.prev, typeof(*pos), member)) ++ + /** + * list_for_some_entry_safe - iterate list from the given begin node to the given end node safe against removal of list entry + * @pos: the type * to use as a loop counter. +diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c +index e3ec1ac1..2198e254 100644 +--- a/libmultipath/uevent.c ++++ b/libmultipath/uevent.c +@@ -308,17 +308,64 @@ uevent_can_merge(struct uevent *earlier, struct uevent *later) + return false; + } + ++static void uevent_delete_from_list(struct uevent *to_delete, ++ struct uevent **previous, ++ struct list_head **old_tail) ++{ ++ /* ++ * "old_tail" is the list_head before the last list element to which ++ * the caller iterates (the list anchor if the caller iterates over ++ * the entire list). If this element is removed (which can't happen ++ * for the anchor), "old_tail" must be moved. It can happen that ++ * "old_tail" ends up pointing at the anchor. ++ */ ++ if (*old_tail == &to_delete->node) ++ *old_tail = to_delete->node.prev; ++ ++ list_del_init(&to_delete->node); ++ ++ /* ++ * The "to_delete" uevent has been merged with other uevents ++ * previously. Re-insert them into the list, at the point we're ++ * currently at. This must be done after the list_del_init() above, ++ * otherwise previous->next would still point to to_delete. ++ */ ++ if (!list_empty(&to_delete->merge_node)) { ++ struct uevent *last = list_entry(to_delete->merge_node.prev, ++ typeof(*last), node); ++ ++ list_splice(&to_delete->merge_node, &(*previous)->node); ++ *previous = last; ++ } ++ if (to_delete->udev) ++ udev_device_unref(to_delete->udev); ++ ++ free(to_delete); ++} ++ ++/* ++ * Use this function to delete events that are known not to ++ * be equal to old_tail, and have an empty merge_node list. ++ * For others, use uevent_delete_from_list(). ++ */ ++static void uevent_delete_simple(struct uevent *to_delete) ++{ ++ list_del_init(&to_delete->node); ++ ++ if (to_delete->udev) ++ udev_device_unref(to_delete->udev); ++ ++ free(to_delete); ++} ++ + static void +-uevent_prepare(struct list_head *tmpq) ++uevent_prepare(struct list_head *tmpq, const struct list_head *stop) + { + struct uevent *uev, *tmp; + +- list_for_each_entry_reverse_safe(uev, tmp, tmpq, node) { ++ list_for_some_entry_reverse_safe(uev, tmp, tmpq, stop, node) { + if (uevent_can_discard(uev)) { +- list_del_init(&uev->node); +- if (uev->udev) +- udev_device_unref(uev->udev); +- FREE(uev); ++ uevent_delete_simple(uev); + continue; + } + +@@ -329,7 +376,7 @@ uevent_prepare(struct list_head *tmpq) + } + + static void +-uevent_filter(struct uevent *later, struct list_head *tmpq) ++uevent_filter(struct uevent *later, struct list_head *tmpq, struct list_head **stop) + { + struct uevent *earlier, *tmp; + +@@ -343,16 +390,13 @@ uevent_filter(struct uevent *later, struct list_head *tmpq) + earlier->kernel, earlier->action, + later->kernel, later->action); + +- list_del_init(&earlier->node); +- if (earlier->udev) +- udev_device_unref(earlier->udev); +- FREE(earlier); ++ uevent_delete_from_list(earlier, &tmp, stop); + } + } + } + + static void +-uevent_merge(struct uevent *later, struct list_head *tmpq) ++uevent_merge(struct uevent *later, struct list_head *tmpq, struct list_head **stop) + { + struct uevent *earlier, *tmp; + +@@ -367,37 +411,42 @@ uevent_merge(struct uevent *later, struct list_head *tmpq) + earlier->action, earlier->kernel, earlier->wwid, + later->action, later->kernel, later->wwid); + ++ /* See comment in uevent_delete_from_list() */ ++ if (&earlier->node == *stop) ++ *stop = earlier->node.prev; ++ + list_move(&earlier->node, &later->merge_node); ++ list_splice_init(&earlier->merge_node, ++ &later->merge_node); + } + } + } + + static void +-merge_uevq(struct list_head *tmpq) ++merge_uevq(struct list_head *tmpq, struct list_head *stop) + { + struct uevent *later; + +- uevent_prepare(tmpq); +- list_for_each_entry_reverse(later, tmpq, node) { +- uevent_filter(later, tmpq); ++ uevent_prepare(tmpq, stop); ++ list_for_some_entry_reverse(later, tmpq, stop, node) { ++ uevent_filter(later, tmpq, &stop); + if(uevent_need_merge()) +- uevent_merge(later, tmpq); ++ uevent_merge(later, tmpq, &stop); + } + } + + static void + service_uevq(struct list_head *tmpq) + { +- struct uevent *uev, *tmp; +- +- list_for_each_entry_safe(uev, tmp, tmpq, node) { +- list_del_init(&uev->node); +- +- pthread_cleanup_push(cleanup_uev, uev); +- if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data)) +- condlog(0, "uevent trigger error"); +- pthread_cleanup_pop(1); +- } ++ struct uevent *uev = list_pop_entry(tmpq, typeof(*uev), node); ++ ++ if (uev == NULL) ++ return; ++ condlog(4, "servicing uevent '%s %s'", uev->action, uev->kernel); ++ pthread_cleanup_push(cleanup_uev, uev); ++ if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data)) ++ condlog(0, "uevent trigger error"); ++ pthread_cleanup_pop(1); + } + + static void uevent_cleanup(void *arg) +@@ -434,33 +483,43 @@ static void cleanup_global_uevq(void *arg __attribute__((unused))) + int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data), + void * trigger_data) + { ++ LIST_HEAD(uevq_work); ++ + my_uev_trigger = uev_trigger; + my_trigger_data = trigger_data; + + mlockall(MCL_CURRENT | MCL_FUTURE); + ++ pthread_cleanup_push(cleanup_uevq, &uevq_work); + while (1) { +- LIST_HEAD(uevq_tmp); ++ struct list_head *stop; + + pthread_cleanup_push(cleanup_mutex, uevq_lockp); + pthread_mutex_lock(uevq_lockp); +- servicing_uev = 0; + +- while (list_empty(&uevq)) ++ servicing_uev = !list_empty(&uevq_work); ++ ++ while (list_empty(&uevq_work) && list_empty(&uevq)) + pthread_cond_wait(uev_condp, uevq_lockp); + + servicing_uev = 1; +- list_splice_init(&uevq, &uevq_tmp); ++ /* ++ * "stop" is the list element towards which merge_uevq() ++ * will iterate: the last element of uevq_work before ++ * appending new uevents. If uveq_is empty, uevq_work.prev ++ * equals &uevq_work, which is what we need. ++ */ ++ stop = uevq_work.prev; ++ list_splice_tail_init(&uevq, &uevq_work); + pthread_cleanup_pop(1); + + if (!my_uev_trigger) + break; + +- pthread_cleanup_push(cleanup_uevq, &uevq_tmp); +- merge_uevq(&uevq_tmp); +- service_uevq(&uevq_tmp); +- pthread_cleanup_pop(1); ++ merge_uevq(&uevq_work, stop); ++ service_uevq(&uevq_work); + } ++ pthread_cleanup_pop(1); + condlog(3, "Terminating uev service queue"); + return 0; + } diff --git a/SOURCES/0202-libmultipath-uevent_listen-don-t-delay-uevents.patch b/SOURCES/0202-libmultipath-uevent_listen-don-t-delay-uevents.patch new file mode 100644 index 0000000..8cee865 --- /dev/null +++ b/SOURCES/0202-libmultipath-uevent_listen-don-t-delay-uevents.patch @@ -0,0 +1,204 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Tue, 29 Mar 2022 23:25:48 +0200 +Subject: [PATCH] libmultipath: uevent_listen(): don't delay uevents + +When multipathd starts up early, basically all devices are added +through uevent processing. This takes much more time than necessary +because of the artificial delays introduced for passing uevents +between the listener and the receiver thread in ee8888f +("multipath-tools: improve processing efficiency for addition and deletion of +multipath devices"). This delay could be up to 30s. + +It's generally not a good idea to delay uevent processing in multipathd. +ADD events must normally be handled ASAP in order to avoid maps entering +queueing mode or eventually failing. Handling REMOVE events quickly is +also important to make multipathd aware of deleted devices and keep +kernel and multipathd state in sync. + +If uevents arrive quickly, the assumption is that the dispatcher will process +them more slowly than the listener. This was the idea of commit ee8888f, +AFAIU: if a queue of unprocessed events piles up because the dispatcher is +too slow, use filtering and merging to reduce the length of the queue, and +thus the work to be done for the uevent dispatcher, especially the work +that needs to be done while holding the vecs lock. In ee8888f, the +queue was created by allowing uevents to accumulate in the listener. + +This patch changes the logic of ee8888f, while keeping the uevent +filtering and discarding features. The idea is that the uevent dispatcher +shouldn't be idle if there are uevents to process. Therefore uevents +are passed to it immediately. But it now checks for new uevents after +processing every individual event, before processing the entire queue, +and it applies filtering and merging to the queue as it grows. + +This patch set avoids any delay when the uevent dispatcher is idle or +able to keep up with the rate of incoming uevents, while applying an +increasing amount of filtering and merging as pressure on the uevent +dispatcher increases. It's reasonable to assume that filtering and +merging get more efficient with increasing queue length, because the +probability of finding matching events will increase. + +Signed-off-by: Martin Wilck +Reviewed-by: Benjamin Marzinski +Signed-off-by: Benjamin Marzinski +--- + libmultipath/uevent.c | 108 +++++++++++++++--------------------------- + 1 file changed, 37 insertions(+), 71 deletions(-) + +diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c +index 2198e254..c3984fef 100644 +--- a/libmultipath/uevent.c ++++ b/libmultipath/uevent.c +@@ -54,10 +54,6 @@ + #include "blacklist.h" + #include "devmapper.h" + +-#define MAX_ACCUMULATION_COUNT 2048 +-#define MAX_ACCUMULATION_TIME 30*1000 +-#define MIN_BURST_SPEED 10 +- + typedef int (uev_trigger)(struct uevent *, void * trigger_data); + + static LIST_HEAD(uevq); +@@ -586,44 +582,43 @@ static struct uevent *uevent_from_udev_device(struct udev_device *dev) + return uev; + } + +-static bool uevent_burst(struct timeval *start_time, int events) ++#define MAX_UEVENTS 1000 ++static int uevent_receive_events(int fd, struct list_head *tmpq, ++ struct udev_monitor *monitor) + { +- struct timeval diff_time, end_time; +- unsigned long speed; +- unsigned long eclipse_ms; +- +- if(events > MAX_ACCUMULATION_COUNT) { +- condlog(2, "burst got %u uevents, too much uevents, stopped", events); +- return false; +- } ++ struct pollfd ev_poll; ++ int n = 0; + +- gettimeofday(&end_time, NULL); +- timersub(&end_time, start_time, &diff_time); ++ do { ++ struct uevent *uev; ++ struct udev_device *dev; + +- eclipse_ms = diff_time.tv_sec * 1000 + diff_time.tv_usec / 1000; ++ dev = udev_monitor_receive_device(monitor); ++ if (!dev) { ++ condlog(0, "failed getting udev device"); ++ break; ++ } ++ uev = uevent_from_udev_device(dev); ++ if (!uev) ++ break; + +- if (eclipse_ms == 0) +- return true; ++ list_add_tail(&uev->node, tmpq); ++ n++; ++ condlog(4, "received uevent \"%s %s\"", uev->action, uev->kernel); + +- if (eclipse_ms > MAX_ACCUMULATION_TIME) { +- condlog(2, "burst continued %lu ms, too long time, stopped", eclipse_ms); +- return false; +- } ++ ev_poll.fd = fd; ++ ev_poll.events = POLLIN; + +- speed = (events * 1000) / eclipse_ms; +- if (speed > MIN_BURST_SPEED) +- return true; ++ } while (n < MAX_UEVENTS && poll(&ev_poll, 1, 0) > 0); + +- return false; ++ return n; + } + + int uevent_listen(struct udev *udev) + { + int err = 2; + struct udev_monitor *monitor = NULL; +- int fd, socket_flags, events; +- struct timeval start_time; +- int timeout = 30; ++ int fd, socket_flags; + LIST_HEAD(uevlisten_tmp); + + /* +@@ -675,59 +670,30 @@ int uevent_listen(struct udev *udev) + goto out; + } + +- events = 0; +- gettimeofday(&start_time, NULL); + pthread_cleanup_push(cleanup_global_uevq, NULL); + pthread_cleanup_push(cleanup_uevq, &uevlisten_tmp); + while (1) { +- struct uevent *uev; +- struct udev_device *dev; +- struct pollfd ev_poll; +- int poll_timeout; +- int fdcount; ++ int fdcount, events; ++ struct pollfd ev_poll = { .fd = fd, .events = POLLIN, }; + +- memset(&ev_poll, 0, sizeof(struct pollfd)); +- ev_poll.fd = fd; +- ev_poll.events = POLLIN; +- poll_timeout = timeout * 1000; +- errno = 0; +- fdcount = poll(&ev_poll, 1, poll_timeout); +- if (fdcount > 0 && ev_poll.revents & POLLIN) { +- timeout = uevent_burst(&start_time, events + 1) ? 1 : 0; +- dev = udev_monitor_receive_device(monitor); +- if (!dev) { +- condlog(0, "failed getting udev device"); +- continue; +- } +- uev = uevent_from_udev_device(dev); +- if (!uev) +- continue; +- list_add_tail(&uev->node, &uevlisten_tmp); +- events++; +- continue; +- } ++ fdcount = poll(&ev_poll, 1, -1); + if (fdcount < 0) { + if (errno == EINTR) + continue; + +- condlog(0, "error receiving " +- "uevent message: %m"); ++ condlog(0, "error receiving uevent message: %m"); + err = -errno; + break; + } +- if (!list_empty(&uevlisten_tmp)) { +- /* +- * Queue uevents and poke service pthread. +- */ +- condlog(3, "Forwarding %d uevents", events); +- pthread_mutex_lock(uevq_lockp); +- list_splice_tail_init(&uevlisten_tmp, &uevq); +- pthread_cond_signal(uev_condp); +- pthread_mutex_unlock(uevq_lockp); +- events = 0; +- } +- gettimeofday(&start_time, NULL); +- timeout = 30; ++ events = uevent_receive_events(fd, &uevlisten_tmp, monitor); ++ if (events <= 0) ++ continue; ++ ++ condlog(4, "Forwarding %d uevents", events); ++ pthread_mutex_lock(uevq_lockp); ++ list_splice_tail_init(&uevlisten_tmp, &uevq); ++ pthread_cond_signal(uev_condp); ++ pthread_mutex_unlock(uevq_lockp); + } + pthread_cleanup_pop(1); + pthread_cleanup_pop(1); diff --git a/SOURCES/0203-libmultipath-uevent-use-struct-to-pass-parameters-ar.patch b/SOURCES/0203-libmultipath-uevent-use-struct-to-pass-parameters-ar.patch new file mode 100644 index 0000000..51baf0e --- /dev/null +++ b/SOURCES/0203-libmultipath-uevent-use-struct-to-pass-parameters-ar.patch @@ -0,0 +1,356 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Wed, 30 Mar 2022 00:06:15 +0200 +Subject: [PATCH] libmultipath: uevent: use struct to pass parameters around + +libmultipath: uevent_dispatch(): just grab config once + +Introduce struct uevent_filter_state to pass parameters around. +This simplifies the function signatures and allows for easy extension +later. + +Instead of grabbing multipath config repeatedly, do it just +once per dispatcher iteration, and pass the pointer around in +struct uevent_filter_state. We shouldn't use different configs +for different paths in a single iteration, anyway. + +Also, properly constify get_uid_attribute_by_attrs() and +pp->uid_attribute. + +Signed-off-by: Martin Wilck +Reviewed-by: Benjamin Marzinski +Signed-off-by: Benjamin Marzinski +--- + libmultipath/config.c | 6 +-- + libmultipath/config.h | 4 +- + libmultipath/discovery.c | 2 +- + libmultipath/structs.h | 2 +- + libmultipath/uevent.c | 110 +++++++++++++++++---------------------- + libmultipath/uevent.h | 3 +- + tests/uevent.c | 2 +- + 7 files changed, 58 insertions(+), 71 deletions(-) + +diff --git a/libmultipath/config.c b/libmultipath/config.c +index f31200a3..bd8296bf 100644 +--- a/libmultipath/config.c ++++ b/libmultipath/config.c +@@ -1112,10 +1112,10 @@ out: + return 1; + } + +-char *get_uid_attribute_by_attrs(struct config *conf, +- const char *path_dev) ++const char *get_uid_attribute_by_attrs(const struct config *conf, ++ const char *path_dev) + { +- vector uid_attrs = &conf->uid_attrs; ++ const struct _vector *uid_attrs = &conf->uid_attrs; + int j; + char *att, *col; + +diff --git a/libmultipath/config.h b/libmultipath/config.h +index 5807ac68..d3abbaea 100644 +--- a/libmultipath/config.h ++++ b/libmultipath/config.h +@@ -329,7 +329,7 @@ void libmp_put_multipath_config(void *); + void put_multipath_config(void *); + + int parse_uid_attrs(char *uid_attrs, struct config *conf); +-char *get_uid_attribute_by_attrs(struct config *conf, +- const char *path_dev); ++const char *get_uid_attribute_by_attrs(const struct config *conf, ++ const char *path_dev); + + #endif +diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c +index 22d114b3..186423e0 100644 +--- a/libmultipath/discovery.c ++++ b/libmultipath/discovery.c +@@ -2071,7 +2071,7 @@ fix_broken_nvme_wwid(struct path *pp, const char *value, size_t size) + } + + static int +-get_udev_uid(struct path * pp, char *uid_attribute, struct udev_device *udev) ++get_udev_uid(struct path * pp, const char *uid_attribute, struct udev_device *udev) + { + ssize_t len; + const char *value; +diff --git a/libmultipath/structs.h b/libmultipath/structs.h +index 2f69e831..423c8b78 100644 +--- a/libmultipath/structs.h ++++ b/libmultipath/structs.h +@@ -350,7 +350,7 @@ struct path { + int detect_prio; + int detect_checker; + int tpgs; +- char * uid_attribute; ++ const char *uid_attribute; + char * getuid; + struct prio prio; + struct checker checker; +diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c +index c3984fef..4ef7181c 100644 +--- a/libmultipath/uevent.c ++++ b/libmultipath/uevent.c +@@ -65,6 +65,12 @@ static uev_trigger *my_uev_trigger; + static void *my_trigger_data; + static int servicing_uev; + ++struct uevent_filter_state { ++ struct list_head uevq; ++ struct list_head *old_tail; ++ struct config *conf; ++}; ++ + int is_uevent_busy(void) + { + int empty; +@@ -160,40 +166,24 @@ int uevent_get_env_positive_int(const struct uevent *uev, + } + + void +-uevent_get_wwid(struct uevent *uev) ++uevent_get_wwid(struct uevent *uev, const struct config *conf) + { +- char *uid_attribute; ++ const char *uid_attribute; + const char *val; +- struct config * conf; + +- conf = get_multipath_config(); +- pthread_cleanup_push(put_multipath_config, conf); + uid_attribute = get_uid_attribute_by_attrs(conf, uev->kernel); +- pthread_cleanup_pop(1); +- + val = uevent_get_env_var(uev, uid_attribute); + if (val) + uev->wwid = val; + } + +-static bool uevent_need_merge(void) ++static bool uevent_need_merge(const struct config *conf) + { +- struct config * conf; +- bool need_merge = false; +- +- conf = get_multipath_config(); +- if (VECTOR_SIZE(&conf->uid_attrs) > 0) +- need_merge = true; +- put_multipath_config(conf); +- +- return need_merge; ++ return VECTOR_SIZE(&conf->uid_attrs) > 0; + } + +-static bool uevent_can_discard(struct uevent *uev) ++static bool uevent_can_discard(struct uevent *uev, const struct config *conf) + { +- int invalid = 0; +- struct config * conf; +- + /* + * do not filter dm devices by devnode + */ +@@ -202,15 +192,10 @@ static bool uevent_can_discard(struct uevent *uev) + /* + * filter paths devices by devnode + */ +- conf = get_multipath_config(); +- pthread_cleanup_push(put_multipath_config, conf); + if (filter_devnode(conf->blist_devnode, conf->elist_devnode, + uev->kernel) > 0) +- invalid = 1; +- pthread_cleanup_pop(1); +- +- if (invalid) + return true; ++ + return false; + } + +@@ -354,29 +339,28 @@ static void uevent_delete_simple(struct uevent *to_delete) + free(to_delete); + } + +-static void +-uevent_prepare(struct list_head *tmpq, const struct list_head *stop) ++static void uevent_prepare(struct uevent_filter_state *st) + { + struct uevent *uev, *tmp; + +- list_for_some_entry_reverse_safe(uev, tmp, tmpq, stop, node) { +- if (uevent_can_discard(uev)) { ++ list_for_some_entry_reverse_safe(uev, tmp, &st->uevq, st->old_tail, node) { ++ if (uevent_can_discard(uev, st->conf)) { + uevent_delete_simple(uev); + continue; + } + + if (strncmp(uev->kernel, "dm-", 3) && +- uevent_need_merge()) +- uevent_get_wwid(uev); ++ uevent_need_merge(st->conf)) ++ uevent_get_wwid(uev, st->conf); + } + } + + static void +-uevent_filter(struct uevent *later, struct list_head *tmpq, struct list_head **stop) ++uevent_filter(struct uevent *later, struct uevent_filter_state *st) + { + struct uevent *earlier, *tmp; + +- list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) { ++ list_for_some_entry_reverse_safe(earlier, tmp, &later->node, &st->uevq, node) { + /* + * filter unnessary earlier uevents + * by the later uevent +@@ -386,17 +370,16 @@ uevent_filter(struct uevent *later, struct list_head *tmpq, struct list_head **s + earlier->kernel, earlier->action, + later->kernel, later->action); + +- uevent_delete_from_list(earlier, &tmp, stop); ++ uevent_delete_from_list(earlier, &tmp, &st->old_tail); + } + } + } + +-static void +-uevent_merge(struct uevent *later, struct list_head *tmpq, struct list_head **stop) ++static void uevent_merge(struct uevent *later, struct uevent_filter_state *st) + { + struct uevent *earlier, *tmp; + +- list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) { ++ list_for_some_entry_reverse_safe(earlier, tmp, &later->node, &st->uevq, node) { + if (merge_need_stop(earlier, later)) + break; + /* +@@ -408,8 +391,8 @@ uevent_merge(struct uevent *later, struct list_head *tmpq, struct list_head **st + later->action, later->kernel, later->wwid); + + /* See comment in uevent_delete_from_list() */ +- if (&earlier->node == *stop) +- *stop = earlier->node.prev; ++ if (&earlier->node == st->old_tail) ++ st->old_tail = earlier->node.prev; + + list_move(&earlier->node, &later->merge_node); + list_splice_init(&earlier->merge_node, +@@ -418,16 +401,15 @@ uevent_merge(struct uevent *later, struct list_head *tmpq, struct list_head **st + } + } + +-static void +-merge_uevq(struct list_head *tmpq, struct list_head *stop) ++static void merge_uevq(struct uevent_filter_state *st) + { + struct uevent *later; + +- uevent_prepare(tmpq, stop); +- list_for_some_entry_reverse(later, tmpq, stop, node) { +- uevent_filter(later, tmpq, &stop); +- if(uevent_need_merge()) +- uevent_merge(later, tmpq, &stop); ++ uevent_prepare(st); ++ list_for_some_entry_reverse(later, &st->uevq, st->old_tail, node) { ++ uevent_filter(later, st); ++ if(uevent_need_merge(st->conf)) ++ uevent_merge(later, st); + } + } + +@@ -479,41 +461,45 @@ static void cleanup_global_uevq(void *arg __attribute__((unused))) + int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data), + void * trigger_data) + { +- LIST_HEAD(uevq_work); ++ struct uevent_filter_state filter_state; + ++ INIT_LIST_HEAD(&filter_state.uevq); + my_uev_trigger = uev_trigger; + my_trigger_data = trigger_data; + + mlockall(MCL_CURRENT | MCL_FUTURE); + +- pthread_cleanup_push(cleanup_uevq, &uevq_work); ++ pthread_cleanup_push(cleanup_uevq, &filter_state.uevq); + while (1) { +- struct list_head *stop; +- + pthread_cleanup_push(cleanup_mutex, uevq_lockp); + pthread_mutex_lock(uevq_lockp); + +- servicing_uev = !list_empty(&uevq_work); ++ servicing_uev = !list_empty(&filter_state.uevq); + +- while (list_empty(&uevq_work) && list_empty(&uevq)) ++ while (list_empty(&filter_state.uevq) && list_empty(&uevq)) + pthread_cond_wait(uev_condp, uevq_lockp); + + servicing_uev = 1; + /* +- * "stop" is the list element towards which merge_uevq() +- * will iterate: the last element of uevq_work before +- * appending new uevents. If uveq_is empty, uevq_work.prev +- * equals &uevq_work, which is what we need. ++ * "old_tail" is the list element towards which merge_uevq() ++ * will iterate: the last element of uevq before ++ * appending new uevents. If uveq empty, uevq.prev ++ * equals &uevq, which is what we need. + */ +- stop = uevq_work.prev; +- list_splice_tail_init(&uevq, &uevq_work); ++ filter_state.old_tail = filter_state.uevq.prev; ++ list_splice_tail_init(&uevq, &filter_state.uevq); + pthread_cleanup_pop(1); + + if (!my_uev_trigger) + break; + +- merge_uevq(&uevq_work, stop); +- service_uevq(&uevq_work); ++ ++ pthread_cleanup_push(put_multipath_config, filter_state.conf); ++ filter_state.conf = get_multipath_config(); ++ merge_uevq(&filter_state); ++ pthread_cleanup_pop(1); ++ ++ service_uevq(&filter_state.uevq); + } + pthread_cleanup_pop(1); + condlog(3, "Terminating uev service queue"); +diff --git a/libmultipath/uevent.h b/libmultipath/uevent.h +index 61ca1b56..53a7ca29 100644 +--- a/libmultipath/uevent.h ++++ b/libmultipath/uevent.h +@@ -10,6 +10,7 @@ + #define OBJECT_SIZE 512 + + struct udev; ++struct config; + + struct uevent { + struct list_head node; +@@ -31,7 +32,7 @@ int uevent_listen(struct udev *udev); + int uevent_dispatch(int (*store_uev)(struct uevent *, void * trigger_data), + void * trigger_data); + bool uevent_is_mpath(const struct uevent *uev); +-void uevent_get_wwid(struct uevent *uev); ++void uevent_get_wwid(struct uevent *uev, const struct config *conf); + + int uevent_get_env_positive_int(const struct uevent *uev, + const char *attr); +diff --git a/tests/uevent.c b/tests/uevent.c +index 648ff268..e237a208 100644 +--- a/tests/uevent.c ++++ b/tests/uevent.c +@@ -111,7 +111,7 @@ static void test_uid_attrs(void **state) + static void test_wwid(void **state) + { + struct uevent *uev = *state; +- uevent_get_wwid(uev); ++ uevent_get_wwid(uev, &conf); + + assert_string_equal(uev->wwid, WWID); + } diff --git a/SOURCES/0204-libmultipath-is_uevent_busy-check-servicing_uev-unde.patch b/SOURCES/0204-libmultipath-is_uevent_busy-check-servicing_uev-unde.patch new file mode 100644 index 0000000..c5b65de --- /dev/null +++ b/SOURCES/0204-libmultipath-is_uevent_busy-check-servicing_uev-unde.patch @@ -0,0 +1,37 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Fri, 5 Jan 2024 18:51:02 +0100 +Subject: [PATCH] libmultipath: is_uevent_busy(): check servicing_uev under + lock + +This fixes a coverity-reported defect (413384 Data race condition). +Indeed, we always set servicing_uev with the lock held, so it makes +sense to read it with the lock held, too. + +Signed-off-by: Martin Wilck +Reviewed-by: Benjamin Marzinski +Signed-off-by: Benjamin Marzinski +--- + libmultipath/uevent.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c +index 4ef7181c..8cd928a9 100644 +--- a/libmultipath/uevent.c ++++ b/libmultipath/uevent.c +@@ -73,12 +73,13 @@ struct uevent_filter_state { + + int is_uevent_busy(void) + { +- int empty; ++ int empty, servicing; + + pthread_mutex_lock(uevq_lockp); + empty = list_empty(&uevq); ++ servicing = servicing_uev; + pthread_mutex_unlock(uevq_lockp); +- return (!empty || servicing_uev); ++ return (!empty || servicing); + } + + struct uevent * alloc_uevent (void) diff --git a/SOURCES/0205-multipathd-make-multipathd-show-status-busy-checker-.patch b/SOURCES/0205-multipathd-make-multipathd-show-status-busy-checker-.patch new file mode 100644 index 0000000..72d64e2 --- /dev/null +++ b/SOURCES/0205-multipathd-make-multipathd-show-status-busy-checker-.patch @@ -0,0 +1,63 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Wed, 21 Jan 2026 16:03:12 -0500 +Subject: [PATCH] multipathd: make "multipathd show status" busy checker better + +while uevent_listen() was grabbing new uevents, "multipathd show status" +would still show show busy as "False". Add a check there, to make catch +multipathd's uevent processing earlier. Also, access servicing_uev (as +well as the new variable, adding_uev) atomically, just to make sure that +the compiler doesn't do stupid things trying to optimize them. + +Signed-off-by: Benjamin Marzinski +Reviewed-by: Martin Wilck +--- + libmultipath/uevent.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c +index 8cd928a9..c230e963 100644 +--- a/libmultipath/uevent.c ++++ b/libmultipath/uevent.c +@@ -64,6 +64,7 @@ static pthread_cond_t *uev_condp = &uev_cond; + static uev_trigger *my_uev_trigger; + static void *my_trigger_data; + static int servicing_uev; ++static int adding_uev; /* uatomic access only */ + + struct uevent_filter_state { + struct list_head uevq; +@@ -73,13 +74,14 @@ struct uevent_filter_state { + + int is_uevent_busy(void) + { +- int empty, servicing; ++ int empty, servicing, adding; + + pthread_mutex_lock(uevq_lockp); + empty = list_empty(&uevq); + servicing = servicing_uev; ++ adding = uatomic_read(&adding_uev); + pthread_mutex_unlock(uevq_lockp); +- return (!empty || servicing); ++ return (!empty || servicing || adding); + } + + struct uevent * alloc_uevent (void) +@@ -663,6 +665,7 @@ int uevent_listen(struct udev *udev) + int fdcount, events; + struct pollfd ev_poll = { .fd = fd, .events = POLLIN, }; + ++ uatomic_set(&adding_uev, 0); + fdcount = poll(&ev_poll, 1, -1); + if (fdcount < 0) { + if (errno == EINTR) +@@ -672,6 +675,8 @@ int uevent_listen(struct udev *udev) + err = -errno; + break; + } ++ uatomic_set(&adding_uev, 1); ++ + events = uevent_receive_events(fd, &uevlisten_tmp, monitor); + if (events <= 0) + continue; diff --git a/SOURCES/0206-libmultipath-add-purge_disconnected-configuration-op.patch b/SOURCES/0206-libmultipath-add-purge_disconnected-configuration-op.patch new file mode 100644 index 0000000..86fd877 --- /dev/null +++ b/SOURCES/0206-libmultipath-add-purge_disconnected-configuration-op.patch @@ -0,0 +1,279 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Brian Bunker +Date: Fri, 9 Jan 2026 16:50:43 -0800 +Subject: [PATCH] libmultipath: add purge_disconnected configuration option + +Add a new configuration option 'purge_disconnected' that can be set +per multipath device, hardware entry, or globally. This option will +be used to control whether multipathd should automatically remove +paths that are in a disconnected state. + +The option is disabled by default (PURGE_DISCONNECTED_OFF). + +This patch only adds the configuration infrastructure. The actual +purge functionality will be implemented in a subsequent patch. + +Signed-off-by: Brian Bunker +Reviewed-by: Benjamin Marzinski +Reviewed-by: Martin Wilck +Signed-off-by: Benjamin Marzinski +--- + libmultipath/config.c | 2 ++ + libmultipath/config.h | 3 +++ + libmultipath/configure.c | 1 + + libmultipath/defaults.h | 1 + + libmultipath/dict.c | 14 ++++++++++++++ + libmultipath/propsel.c | 16 ++++++++++++++++ + libmultipath/propsel.h | 1 + + libmultipath/structs.h | 12 ++++++++++++ + multipath/multipath.conf.5 | 22 ++++++++++++++++++++++ + 9 files changed, 72 insertions(+) + +diff --git a/libmultipath/config.c b/libmultipath/config.c +index bd8296bf..2aa69cb4 100644 +--- a/libmultipath/config.c ++++ b/libmultipath/config.c +@@ -476,6 +476,7 @@ merge_hwe (struct hwentry * dst, struct hwentry * src) + merge_num(marginal_path_err_rate_threshold); + merge_num(marginal_path_err_recheck_gap_time); + merge_num(marginal_path_double_failed_time); ++ merge_num(purge_disconnected); + + snprintf(id, sizeof(id), "%s/%s", dst->vendor, dst->product); + reconcile_features_with_options(id, &dst->features, +@@ -524,6 +525,7 @@ merge_mpe(struct mpentry *dst, struct mpentry *src) + merge_num(skip_kpartx); + merge_num(max_sectors_kb); + merge_num(ghost_delay); ++ merge_num(purge_disconnected); + merge_num(uid); + merge_num(gid); + merge_num(mode); +diff --git a/libmultipath/config.h b/libmultipath/config.h +index d3abbaea..e0b1e4c8 100644 +--- a/libmultipath/config.h ++++ b/libmultipath/config.h +@@ -87,6 +87,7 @@ struct hwentry { + int marginal_path_err_rate_threshold; + int marginal_path_err_recheck_gap_time; + int marginal_path_double_failed_time; ++ int purge_disconnected; + int skip_kpartx; + int max_sectors_kb; + int ghost_delay; +@@ -130,6 +131,7 @@ struct mpentry { + int marginal_path_err_rate_threshold; + int marginal_path_err_recheck_gap_time; + int marginal_path_double_failed_time; ++ int purge_disconnected; + int skip_kpartx; + int max_sectors_kb; + int ghost_delay; +@@ -186,6 +188,7 @@ struct config { + int marginal_path_err_rate_threshold; + int marginal_path_err_recheck_gap_time; + int marginal_path_double_failed_time; ++ int purge_disconnected; + int uxsock_timeout; + int strict_timing; + int retrigger_tries; +diff --git a/libmultipath/configure.c b/libmultipath/configure.c +index 71acb968..c1b6a077 100644 +--- a/libmultipath/configure.c ++++ b/libmultipath/configure.c +@@ -389,6 +389,7 @@ int setup_map(struct multipath *mpp, char **params, struct vectors *vecs) + select_max_sectors_kb(conf, mpp); + select_ghost_delay(conf, mpp); + select_flush_on_last_del(conf, mpp); ++ select_purge_disconnected(conf, mpp); + + sysfs_set_scsi_tmo(conf, mpp); + marginal_pathgroups = conf->marginal_pathgroups; +diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h +index 1ab5b99c..caf4ccce 100644 +--- a/libmultipath/defaults.h ++++ b/libmultipath/defaults.h +@@ -55,6 +55,7 @@ + #define DEFAULT_ALL_TG_PT ALL_TG_PT_OFF + #define DEFAULT_RECHECK_WWID RECHECK_WWID_OFF + #define DEFAULT_AUTO_RESIZE AUTO_RESIZE_NEVER ++#define DEFAULT_PURGE_DISCONNECTED PURGE_DISCONNECTED_OFF + /* Enable no foreign libraries by default */ + #define DEFAULT_ENABLE_FOREIGN "NONE" + +diff --git a/libmultipath/dict.c b/libmultipath/dict.c +index 3c011ece..421070df 100644 +--- a/libmultipath/dict.c ++++ b/libmultipath/dict.c +@@ -979,6 +979,16 @@ static int snprint_def_disable_changed_wwids(struct config *conf, + return print_ignored(buff); + } + ++declare_def_handler(purge_disconnected, set_yes_no_undef) ++declare_def_snprint_defint(purge_disconnected, print_yes_no_undef, ++ DEFAULT_PURGE_DISCONNECTED) ++declare_ovr_handler(purge_disconnected, set_yes_no_undef) ++declare_ovr_snprint(purge_disconnected, print_yes_no_undef) ++declare_hw_handler(purge_disconnected, set_yes_no_undef) ++declare_hw_snprint(purge_disconnected, print_yes_no_undef) ++declare_mp_handler(purge_disconnected, set_yes_no_undef) ++declare_mp_snprint(purge_disconnected, print_yes_no_undef) ++ + declare_def_range_handler(remove_retries, 0, INT_MAX) + declare_def_snprint(remove_retries, print_int) + +@@ -2289,6 +2299,7 @@ init_keywords(vector keywords) + install_keyword("retrigger_delay", &def_retrigger_delay_handler, &snprint_def_retrigger_delay); + install_keyword("missing_uev_wait_timeout", &def_uev_wait_timeout_handler, &snprint_def_uev_wait_timeout); + install_keyword("skip_kpartx", &def_skip_kpartx_handler, &snprint_def_skip_kpartx); ++ install_keyword("purge_disconnected", &def_purge_disconnected_handler, &snprint_def_purge_disconnected); + install_keyword("disable_changed_wwids", &def_disable_changed_wwids_handler, &snprint_def_disable_changed_wwids); + install_keyword("remove_retries", &def_remove_retries_handler, &snprint_def_remove_retries); + install_keyword("max_sectors_kb", &def_max_sectors_kb_handler, &snprint_def_max_sectors_kb); +@@ -2386,6 +2397,7 @@ init_keywords(vector keywords) + install_keyword("marginal_path_err_recheck_gap_time", &hw_marginal_path_err_recheck_gap_time_handler, &snprint_hw_marginal_path_err_recheck_gap_time); + install_keyword("marginal_path_double_failed_time", &hw_marginal_path_double_failed_time_handler, &snprint_hw_marginal_path_double_failed_time); + install_keyword("skip_kpartx", &hw_skip_kpartx_handler, &snprint_hw_skip_kpartx); ++ install_keyword("purge_disconnected", &hw_purge_disconnected_handler, &snprint_hw_purge_disconnected); + install_keyword("max_sectors_kb", &hw_max_sectors_kb_handler, &snprint_hw_max_sectors_kb); + install_keyword("ghost_delay", &hw_ghost_delay_handler, &snprint_hw_ghost_delay); + install_keyword("all_tg_pt", &hw_all_tg_pt_handler, &snprint_hw_all_tg_pt); +@@ -2429,6 +2441,7 @@ init_keywords(vector keywords) + install_keyword("marginal_path_double_failed_time", &ovr_marginal_path_double_failed_time_handler, &snprint_ovr_marginal_path_double_failed_time); + + install_keyword("skip_kpartx", &ovr_skip_kpartx_handler, &snprint_ovr_skip_kpartx); ++ install_keyword("purge_disconnected", &ovr_purge_disconnected_handler, &snprint_ovr_purge_disconnected); + install_keyword("max_sectors_kb", &ovr_max_sectors_kb_handler, &snprint_ovr_max_sectors_kb); + install_keyword("ghost_delay", &ovr_ghost_delay_handler, &snprint_ovr_ghost_delay); + install_keyword("all_tg_pt", &ovr_all_tg_pt_handler, &snprint_ovr_all_tg_pt); +@@ -2474,6 +2487,7 @@ init_keywords(vector keywords) + install_keyword("marginal_path_err_recheck_gap_time", &mp_marginal_path_err_recheck_gap_time_handler, &snprint_mp_marginal_path_err_recheck_gap_time); + install_keyword("marginal_path_double_failed_time", &mp_marginal_path_double_failed_time_handler, &snprint_mp_marginal_path_double_failed_time); + install_keyword("skip_kpartx", &mp_skip_kpartx_handler, &snprint_mp_skip_kpartx); ++ install_keyword("purge_disconnected", &mp_purge_disconnected_handler, &snprint_mp_purge_disconnected); + install_keyword("max_sectors_kb", &mp_max_sectors_kb_handler, &snprint_mp_max_sectors_kb); + install_keyword("ghost_delay", &mp_ghost_delay_handler, &snprint_mp_ghost_delay); + install_sublevel_end(); +diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c +index 0b6e22c1..c45488f7 100644 +--- a/libmultipath/propsel.c ++++ b/libmultipath/propsel.c +@@ -1306,6 +1306,22 @@ out: + return 0; + } + ++int select_purge_disconnected(struct config *conf, struct multipath *mp) ++{ ++ const char *origin; ++ ++ mp_set_mpe(purge_disconnected); ++ mp_set_ovr(purge_disconnected); ++ mp_set_hwe(purge_disconnected); ++ mp_set_conf(purge_disconnected); ++ mp_set_default(purge_disconnected, DEFAULT_PURGE_DISCONNECTED); ++out: ++ condlog(3, "%s: purge_disconnected = %s %s", mp->alias, ++ (mp->purge_disconnected == PURGE_DISCONNECTED_ON) ? "yes" : "no", ++ origin); ++ return 0; ++} ++ + int select_max_sectors_kb(struct config *conf, struct multipath * mp) + { + const char *origin; +diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h +index 152ca44c..325c1ee3 100644 +--- a/libmultipath/propsel.h ++++ b/libmultipath/propsel.h +@@ -35,6 +35,7 @@ int select_marginal_path_err_rate_threshold(struct config *conf, struct multipat + int select_marginal_path_err_recheck_gap_time(struct config *conf, struct multipath *mp); + int select_marginal_path_double_failed_time(struct config *conf, struct multipath *mp); + int select_ghost_delay(struct config *conf, struct multipath * mp); ++int select_purge_disconnected(struct config *conf, struct multipath *mp); + void reconcile_features_with_options(const char *id, char **features, + int* no_path_retry, + int *retain_hwhandler); +diff --git a/libmultipath/structs.h b/libmultipath/structs.h +index 423c8b78..e1969b95 100644 +--- a/libmultipath/structs.h ++++ b/libmultipath/structs.h +@@ -174,6 +174,17 @@ enum auto_resize_state { + AUTO_RESIZE_GROW_SHRINK, + }; + ++/* ++ * purge_disconnected configuration option (per multipath device) ++ * Controls whether paths that become disconnected at the storage target ++ * should be automatically removed from the system via sysfs. ++ */ ++enum purge_disconnected_states { ++ PURGE_DISCONNECTED_UNDEF = YNU_UNDEF, ++ PURGE_DISCONNECTED_OFF = YNU_NO, /* Don't purge */ ++ PURGE_DISCONNECTED_ON = YNU_YES, /* Purge disconnected paths */ ++}; ++ + #define PROTOCOL_UNSET -1 + + enum scsi_protocol { +@@ -427,6 +438,7 @@ struct multipath { + int ghost_delay; + int ghost_delay_tick; + int queue_mode; ++ int purge_disconnected; + uid_t uid; + gid_t gid; + mode_t mode; +diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5 +index 8684bd63..529bd98e 100644 +--- a/multipath/multipath.conf.5 ++++ b/multipath/multipath.conf.5 +@@ -1265,6 +1265,22 @@ The default is: \fBno\fR + . + . + .TP ++.B purge_disconnected ++If set to ++.I yes ++, multipathd will automatically remove devices that are in a disconnected state. ++A path is considered disconnected when the TUR (Test Unit Ready) path checker ++receives the SCSI sense code "LOGICAL UNIT NOT SUPPORTED" (sense key 0x5, ++ASC/ASCQ 0x25/0x00). This typically indicates that the LUN has been unmapped ++or is no longer presented by the storage array. This option helps clean up ++stale device entries that would otherwise remain in the system. ++.RS ++.TP ++The default is: \fBno\fR ++.RE ++. ++. ++.TP + .B disable_changed_wwids + This option is deprecated and ignored. If the WWID of a path suddenly changes, + multipathd handles it as if it was removed and then added again. +@@ -1540,6 +1556,8 @@ section: + .TP + .B skip_kpartx + .TP ++.B purge_disconnected ++.TP + .B max_sectors_kb + .TP + .B ghost_delay +@@ -1715,6 +1733,8 @@ section: + .TP + .B skip_kpartx + .TP ++.B purge_disconnected ++.TP + .B max_sectors_kb + .TP + .B ghost_delay +@@ -1801,6 +1821,8 @@ the values are taken from the \fIdevices\fR or \fIdefaults\fR sections: + .TP + .B skip_kpartx + .TP ++.B purge_disconnected ++.TP + .B max_sectors_kb + .TP + .B ghost_delay diff --git a/SOURCES/0207-multipathd-implement-purge-functionality-for-disconn.patch b/SOURCES/0207-multipathd-implement-purge-functionality-for-disconn.patch new file mode 100644 index 0000000..c6ae129 --- /dev/null +++ b/SOURCES/0207-multipathd-implement-purge-functionality-for-disconn.patch @@ -0,0 +1,752 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Brian Bunker +Date: Fri, 9 Jan 2026 16:50:43 -0800 +Subject: [PATCH] multipathd: implement purge functionality for disconnected + paths + +Implement automatic purging of paths that have been disconnected at the +storage target (e.g., LUN unmapped). This builds on the purge_disconnected +configuration option added in the previous patch. + +This adds: +- New PATH_DISCONNECTED checker state to signal disconnection +- TUR checker support for detecting LUN NOT SUPPORTED (ASC/ASCQ 0x25/0x00) +- Purge thread (purgeloop) that removes paths via sysfs delete attribute +- State machine to track disconnection and delay purging +- Conversion of PATH_DISCONNECTED to PATH_DOWN for normal processing + +The purge thread runs independently and processes paths that have been +marked for purging by the checker thread. Paths are only purged after +remaining disconnected for delay_wait_checks intervals to avoid removing +paths that are temporarily flapping. + +Signed-off-by: Brian Bunker +Signed-off-by: Krishna Kant +Reviewed-by: Benjamin Marzinski +Reviewed-by: Martin Wilck +Signed-off-by: Benjamin Marzinski +--- + libmultipath/checkers.c | 2 + + libmultipath/checkers.h | 15 +- + libmultipath/checkers/tur.c | 10 ++ + libmultipath/discovery.c | 17 ++ + libmultipath/io_err_stat.c | 1 + + libmultipath/print.c | 2 + + libmultipath/structs.h | 14 ++ + multipathd/Makefile | 2 +- + multipathd/main.c | 74 +++++++- + multipathd/purge.c | 326 ++++++++++++++++++++++++++++++++++++ + multipathd/purge.h | 41 +++++ + 11 files changed, 496 insertions(+), 8 deletions(-) + create mode 100644 multipathd/purge.c + create mode 100644 multipathd/purge.h + +diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c +index 9eb3e261..e1b84df0 100644 +--- a/libmultipath/checkers.c ++++ b/libmultipath/checkers.c +@@ -39,6 +39,7 @@ static const char *checker_state_names[PATH_MAX_STATE] = { + [PATH_TIMEOUT] = "timeout", + [PATH_REMOVED] = "removed", + [PATH_DELAYED] = "delayed", ++ [PATH_DISCONNECTED] = "disconnected", + }; + + static LIST_HEAD(checkers); +@@ -343,6 +344,7 @@ static const char *generic_msg[CHECKER_GENERIC_MSGTABLE_SIZE] = { + [CHECKER_MSGID_DOWN] = " reports path is down", + [CHECKER_MSGID_GHOST] = " reports path is ghost", + [CHECKER_MSGID_UNSUPPORTED] = " doesn't support this device", ++ [CHECKER_MSGID_DISCONNECTED] = " no access to this device", + }; + + const char *checker_message(const struct checker *c) +diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h +index 2fd1d1c6..2f32f612 100644 +--- a/libmultipath/checkers.h ++++ b/libmultipath/checkers.h +@@ -66,6 +66,15 @@ + * delay_watch_checks checks, when it comes back up again, it will not + * be marked as up until it has been up for delay_wait_checks checks. + * During this time, it is marked as "delayed" ++ * ++ * PATH_DISCONNECTED is a special ephemeral state used to signal that a path ++ * has been disconnected at the storage target (e.g., LUN unmapped). When a ++ * checker returns PATH_DISCONNECTED: ++ * 1. The path's pp->disconnected field is set to track purge state ++ * 2. The state is immediately converted to PATH_DOWN for normal processing ++ * 3. If purge_disconnected is enabled, the path will be removed via sysfs ++ * This state should never be stored in pp->state or pp->chkrstate; it exists ++ * only as a transient return value from checkers to trigger special handling. + */ + enum path_check_state { + PATH_WILD = 0, +@@ -78,6 +87,7 @@ enum path_check_state { + PATH_TIMEOUT, + PATH_REMOVED, + PATH_DELAYED, ++ PATH_DISCONNECTED, /* Ephemeral: mapped to PATH_DOWN */ + PATH_MAX_STATE + }; + +@@ -113,9 +123,10 @@ enum { + CHECKER_MSGID_DOWN, + CHECKER_MSGID_GHOST, + CHECKER_MSGID_UNSUPPORTED, ++ CHECKER_MSGID_DISCONNECTED, + CHECKER_GENERIC_MSGTABLE_SIZE, +- CHECKER_FIRST_MSGID = 100, /* lowest msgid for checkers */ +- CHECKER_MSGTABLE_SIZE = 100, /* max msg table size for checkers */ ++ CHECKER_FIRST_MSGID = 100, /* lowest msgid for checkers */ ++ CHECKER_MSGTABLE_SIZE = 100, /* max msg table size for checkers */ + }; + + struct checker_class; +diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c +index d82f7dbc..370a02a6 100644 +--- a/libmultipath/checkers/tur.c ++++ b/libmultipath/checkers/tur.c +@@ -188,6 +188,16 @@ retry: + *msgid = CHECKER_MSGID_GHOST; + return PATH_GHOST; + } ++ } else if (key == 0x5) { ++ /* Illegal request */ ++ if (asc == 0x25 && ascq == 0x00) { ++ /* ++ * LUN NOT SUPPORTED: unmapped at target. ++ * Signals pp->disconnected, becomes PATH_DOWN. ++ */ ++ *msgid = CHECKER_MSGID_DISCONNECTED; ++ return PATH_DISCONNECTED; ++ } + } + *msgid = CHECKER_MSGID_DOWN; + return PATH_DOWN; +diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c +index 186423e0..c529f336 100644 +--- a/libmultipath/discovery.c ++++ b/libmultipath/discovery.c +@@ -2416,8 +2416,25 @@ int pathinfo(struct path *pp, struct config *conf, int mask) + pp->state == PATH_UNCHECKED || + pp->state == PATH_WILD) + pp->chkrstate = pp->state = newstate; ++ /* ++ * PATH_TIMEOUT and PATH_DISCONNECTED are ephemeral ++ * states that should never be stored in pp->state. ++ * Convert them to PATH_DOWN immediately. ++ */ + if (pp->state == PATH_TIMEOUT) + pp->state = PATH_DOWN; ++ if (pp->state == PATH_DISCONNECTED) { ++ int purge_enabled = pp->mpp && ++ pp->mpp->purge_disconnected == ++ PURGE_DISCONNECTED_ON; ++ if (purge_enabled && ++ pp->disconnected == NOT_DISCONNECTED) { ++ condlog(2, "%s: mark path for purge", ++ pp->dev); ++ pp->disconnected = DISCONNECTED_READY_FOR_PURGE; ++ } ++ pp->state = PATH_DOWN; ++ } + if (pp->state == PATH_UP && !pp->size) { + condlog(3, "%s: device size is 0, " + "path unusable", pp->dev); +diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c +index d8d91f64..d744d50e 100644 +--- a/libmultipath/io_err_stat.c ++++ b/libmultipath/io_err_stat.c +@@ -380,6 +380,7 @@ static void account_async_io_state(struct io_err_stat_path *pp, int rc) + switch (rc) { + case PATH_DOWN: + case PATH_TIMEOUT: ++ case PATH_DISCONNECTED: + pp->io_err_nr++; + break; + case PATH_UNCHECKED: +diff --git a/libmultipath/print.c b/libmultipath/print.c +index ff224bc4..42d1d44c 100644 +--- a/libmultipath/print.c ++++ b/libmultipath/print.c +@@ -487,6 +487,8 @@ snprint_chk_state (struct strbuf *buff, const struct path * pp) + return append_strbuf_str(buff, "i/o timeout"); + case PATH_DELAYED: + return append_strbuf_str(buff, "delayed"); ++ case PATH_DISCONNECTED: ++ return append_strbuf_str(buff, "disconnected"); + default: + return append_strbuf_str(buff, "undef"); + } +diff --git a/libmultipath/structs.h b/libmultipath/structs.h +index e1969b95..32643684 100644 +--- a/libmultipath/structs.h ++++ b/libmultipath/structs.h +@@ -185,6 +185,18 @@ enum purge_disconnected_states { + PURGE_DISCONNECTED_ON = YNU_YES, /* Purge disconnected paths */ + }; + ++/* ++ * Path disconnection state (per path) ++ * Tracks whether a path has been marked for purge and whether it's already queued. ++ */ ++enum path_disconnected_state { ++ NOT_DISCONNECTED, /* Path is not disconnected */ ++ DISCONNECTED_READY_FOR_PURGE, /* Path is disconnected and ready to be ++ queued for purge */ ++ DISCONNECTED_QUEUED_FOR_PURGE, /* Path is disconnected and already ++ queued for purge */ ++}; ++ + #define PROTOCOL_UNSET -1 + + enum scsi_protocol { +@@ -355,6 +367,8 @@ struct path { + int state; + int dmstate; + int chkrstate; ++ enum path_disconnected_state disconnected; /* Marked for purge due to ++ disconnection */ + int failcount; + int priority; + int pgindex; +diff --git a/multipathd/Makefile b/multipathd/Makefile +index 00342464..a49c4973 100644 +--- a/multipathd/Makefile ++++ b/multipathd/Makefile +@@ -46,7 +46,7 @@ ifeq ($(ENABLE_DMEVENTS_POLL),0) + endif + + OBJS = main.o pidfile.o uxlsnr.o uxclnt.o cli.o cli_handlers.o waiter.o \ +- dmevents.o init_unwinder.o ++ dmevents.o init_unwinder.o purge.o + + ifeq ($(FPIN_SUPPORT),1) + OBJS += fpin_handlers.o +diff --git a/multipathd/main.c b/multipathd/main.c +index 9beb0e06..d91a4d49 100644 +--- a/multipathd/main.c ++++ b/multipathd/main.c +@@ -84,6 +84,7 @@ + #include "io_err_stat.h" + #include "wwids.h" + #include "foreign.h" ++#include "purge.h" + #include "../third-party/valgrind/drd.h" + #include "init_unwinder.h" + +@@ -135,11 +136,11 @@ static volatile enum daemon_status running_state = DAEMON_INIT; + pid_t daemon_pid; + static pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER; + static pthread_cond_t config_cond; +-static pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, dmevent_thr, +- fpin_thr, fpin_consumer_thr; +-static bool check_thr_started, uevent_thr_started, uxlsnr_thr_started, +- uevq_thr_started, dmevent_thr_started, fpin_thr_started, +- fpin_consumer_thr_started; ++static pthread_t check_thr, purge_thr, uevent_thr, uxlsnr_thr, uevq_thr, ++ dmevent_thr, fpin_thr, fpin_consumer_thr; ++static bool check_thr_started, purge_thr_started, uevent_thr_started, ++ uxlsnr_thr_started, uevq_thr_started, dmevent_thr_started, ++ fpin_thr_started, fpin_consumer_thr_started; + static int pid_fd = -1; + + static inline enum daemon_status get_running_state(void) +@@ -2377,6 +2378,28 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) + if (newstate == PATH_REMOVED) + newstate = PATH_DOWN; + ++ /* ++ * PATH_DISCONNECTED is an ephemeral state used to signal that a path ++ * has been disconnected at the storage target (LUN unmapped). We use ++ * it to set pp->disconnected for purge tracking, then immediately ++ * convert it to PATH_DOWN for normal path failure handling. ++ * ++ * This ensures PATH_DISCONNECTED never gets stored in pp->state or ++ * pp->chkrstate - it exists only as a transient signal from the ++ * checker to trigger special handling before becoming PATH_DOWN. ++ */ ++ if (newstate == PATH_DISCONNECTED) { ++ if (pp->mpp && ++ pp->mpp->purge_disconnected == PURGE_DISCONNECTED_ON && ++ pp->disconnected == NOT_DISCONNECTED) { ++ condlog(2, "%s: mark (%s) path for purge", pp->dev, ++ checker_state_name(newstate)); ++ pp->disconnected = DISCONNECTED_READY_FOR_PURGE; ++ } ++ /* Always convert to PATH_DOWN for normal processing */ ++ newstate = PATH_DOWN; ++ } ++ + if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) { + condlog(2, "%s: unusable path (%s) - checker failed", + pp->dev, checker_state_name(newstate)); +@@ -2684,6 +2707,7 @@ checkerloop (void *ap) + struct timespec diff_time, start_time, end_time; + int num_paths = 0, strict_timing, rc = 0; + unsigned int ticks = 0; ++ LIST_HEAD(purge_list); + + get_monotonic_time(&start_time); + if (start_time.tv_sec && last_time.tv_sec) { +@@ -2724,6 +2748,12 @@ checkerloop (void *ap) + } + lock_cleanup_pop(vecs->lock); + ++ /* ++ * Cleanup handler to free purge_list if thread is cancelled. ++ * This prevents memory leaks during shutdown. ++ */ ++ pthread_cleanup_push(cleanup_purge_list, &purge_list); ++ + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); +@@ -2731,6 +2761,11 @@ checkerloop (void *ap) + retry_count_tick(vecs->mpvec); + missing_uev_wait_tick(vecs); + ghost_delay_tick(vecs); ++ /* ++ * Build purge list for disconnected paths. ++ * The caller will queue it after releasing vecs->lock. ++ */ ++ build_purge_list(vecs, &purge_list); + lock_cleanup_pop(vecs->lock); + + if (count) +@@ -2745,6 +2780,26 @@ checkerloop (void *ap) + lock_cleanup_pop(vecs->lock); + } + ++ /* ++ * Queue purge work for disconnected paths. ++ * This is done after releasing vecs->lock to avoid holding ++ * the lock while signaling the purge thread. ++ */ ++ if (!list_empty(&purge_list)) { ++ pthread_cleanup_push(cleanup_mutex, &purge_mutex); ++ pthread_mutex_lock(&purge_mutex); ++ pthread_testcancel(); ++ list_splice_tail_init(&purge_list, &purge_queue); ++ pthread_cond_signal(&purge_cond); ++ pthread_cleanup_pop(1); ++ } ++ ++ /* ++ * Pop cleanup handler. Execute it (arg=1) to free purge_list ++ * at the end of each iteration. ++ */ ++ pthread_cleanup_pop(1); ++ + diff_time.tv_nsec = 0; + if (start_time.tv_sec) { + get_monotonic_time(&end_time); +@@ -3225,6 +3280,8 @@ static void cleanup_threads(void) + + if (check_thr_started) + pthread_cancel(check_thr); ++ if (purge_thr_started) ++ pthread_cancel(purge_thr); + if (uevent_thr_started) + pthread_cancel(uevent_thr); + if (uxlsnr_thr_started) +@@ -3241,6 +3298,8 @@ static void cleanup_threads(void) + + if (check_thr_started) + pthread_join(check_thr, NULL); ++ if (purge_thr_started) ++ pthread_join(purge_thr, NULL); + if (uevent_thr_started) + pthread_join(uevent_thr, NULL); + if (uxlsnr_thr_started) +@@ -3496,6 +3555,11 @@ child (__attribute__((unused)) void *param) + goto failed; + } else + check_thr_started = true; ++ if ((rc = pthread_create(&purge_thr, &misc_attr, purgeloop, vecs))) { ++ condlog(0, "failed to create purge loop thread: %d", rc); ++ goto failed; ++ } else ++ purge_thr_started = true; + if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) { + condlog(0, "failed to create uevent dispatcher: %d", rc); + goto failed; +diff --git a/multipathd/purge.c b/multipathd/purge.c +new file mode 100644 +index 00000000..44f0c905 +--- /dev/null ++++ b/multipathd/purge.c +@@ -0,0 +1,326 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Copyright (C) 2025 Brian Bunker ++ * Copyright (C) 2025 Krishna Kant ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "vector.h" ++#include "structs.h" ++#include "structs_vec.h" ++#include "debug.h" ++#include "util.h" ++#include "lock.h" ++#include "sysfs.h" ++#include "list.h" ++#include "purge.h" ++ ++pthread_mutex_t purge_mutex = PTHREAD_MUTEX_INITIALIZER; ++pthread_cond_t purge_cond = PTHREAD_COND_INITIALIZER; ++LIST_HEAD(purge_queue); ++ ++/* ++ * Information needed to purge a path. We copy this data while holding ++ * vecs->lock, then release the lock before doing the actual sysfs write. ++ * This prevents blocking other operations while waiting for sysfs I/O. ++ * ++ * The udev device reference captures the sysfs path (including H:C:T:L). ++ * The duplicated fd prevents device name/number reuse: the kernel will not ++ * reuse the device's minor number (which maps to the device name) for a new ++ * device while we hold an open file descriptor, even if the original device ++ * has been removed. This protects against deleting a new device that reused ++ * the same name after the original was removed externally. ++ */ ++struct purge_path_info { ++ struct list_head node; /* List linkage */ ++ struct udev_device *udev; /* Udev device (refcounted) */ ++ int fd; /* Dup'd fd prevents device reuse */ ++}; ++ ++/* ++ * Attempt to delete a path by writing to the SCSI device's sysfs delete ++ * attribute. This triggers kernel-level device removal. The actual cleanup ++ * of the path structure from pathvec happens later when a uevent arrives ++ * (handled by uev_remove_path). ++ * ++ * This function does NOT require vecs->lock to be held, as it operates on ++ * copied data. This function may block while writing to sysfs, which is ++ * why it's called without holding any locks. ++ * ++ * Protection against device reuse: ++ * The duplicated fd in purge_path_info prevents the kernel from reusing ++ * the device's minor number (and thus the device name like /dev/sdd) for ++ * a new device, even if the original device has been removed externally. ++ * This ensures we cannot accidentally delete a new device that reused the ++ * same name. The kernel maintains this guarantee as long as we hold the ++ * open file descriptor. ++ */ ++static void delete_path_sysfs(struct purge_path_info *info) ++{ ++ struct udev_device *ud; ++ const char *devname; ++ ++ if (!info->udev) ++ goto out; ++ ++ devname = udev_device_get_devnode(info->udev); ++ ++ /* ++ * Get the SCSI device parent. This is where we'll write to the ++ * "delete" attribute to trigger device removal. ++ */ ++ ud = udev_device_get_parent_with_subsystem_devtype(info->udev, "scsi", ++ "scsi_device"); ++ if (!ud) { ++ condlog(3, "%s: failed to purge, no SCSI parent found", devname); ++ goto out; ++ } ++ ++ /* ++ * Write "1" to the SCSI device's delete attribute to trigger ++ * kernel-level device removal. ++ */ ++ if (sysfs_attr_set_value(ud, "delete", "1", 1) < 0) ++ condlog(3, "%s: failed to purge", devname); ++ else ++ condlog(2, "%s: purged", devname); ++ ++out: ++ return; ++} ++ ++/* ++ * Prepare purge info for a path while holding vecs->lock. ++ * Takes a reference on the udev device and duplicates the fd. ++ * Returns allocated purge_path_info on success, NULL on failure. ++ * ++ * We require a valid fd because it prevents the kernel from reusing ++ * the device's minor number (and device name) for a new device while ++ * we hold it open. This protects against accidentally deleting a new ++ * device that reused the same name after the original was removed. ++ */ ++static struct purge_path_info *prepare_purge_path_info(struct path *pp) ++{ ++ struct purge_path_info *info = NULL; ++ ++ if (!pp->udev || !pp->mpp) ++ goto out; ++ ++ /* ++ * We require a valid fd to prevent device name reuse. ++ * Without it, we cannot safely purge the device. ++ */ ++ if (pp->fd < 0) { ++ condlog(3, "%s: no fd available, cannot safely purge", pp->dev); ++ goto out; ++ } ++ ++ info = calloc(1, sizeof(*info)); ++ if (!info) ++ goto out; ++ ++ INIT_LIST_HEAD(&info->node); ++ info->udev = udev_device_ref(pp->udev); ++ if (!info->udev) ++ goto out_free; ++ ++ info->fd = dup(pp->fd); ++ if (info->fd < 0) { ++ condlog(3, "%s: failed to dup fd: %s, cannot safely purge", ++ pp->dev, strerror(errno)); ++ goto out_unref; ++ } ++ ++ return info; ++ ++out_unref: ++ udev_device_unref(info->udev); ++out_free: ++ free(info); ++ info = NULL; ++out: ++ return info; ++} ++ ++/* ++ * Clean up and free purge info. ++ */ ++static void free_purge_path_info(struct purge_path_info *info) ++{ ++ if (!info) ++ return; ++ ++ if (info->fd >= 0) ++ close(info->fd); ++ if (info->udev) ++ udev_device_unref(info->udev); ++ free(info); ++} ++ ++/* ++ * Build a list of purge_path_info for all paths marked for purge. ++ * This should be called while holding vecs->lock. It clears the ++ * disconnected flag and prepares purge info for each path, adding ++ * them to tmpq. ++ */ ++void build_purge_list(struct vectors *vecs, struct list_head *tmpq) ++{ ++ struct path *pp; ++ unsigned int i; ++ ++ vector_foreach_slot (vecs->pathvec, pp, i) { ++ struct purge_path_info *info; ++ ++ if (pp->disconnected != DISCONNECTED_READY_FOR_PURGE) ++ continue; ++ ++ /* ++ * Mark as queued whether we succeed or fail. ++ * On success, we're purging it now. ++ * On failure, retrying is unlikely to help until ++ * the checker re-evaluates the path. ++ */ ++ pp->disconnected = DISCONNECTED_QUEUED_FOR_PURGE; ++ ++ info = prepare_purge_path_info(pp); ++ if (info) { ++ condlog(2, "%s: queuing path for purge", pp->dev); ++ list_add_tail(&info->node, tmpq); ++ } else ++ condlog(3, "%s: failed to prepare purge info", pp->dev); ++ } ++} ++ ++static void rcu_unregister(__attribute__((unused)) void *param) ++{ ++ rcu_unregister_thread(); ++} ++ ++/* ++ * Cleanup handler for a single purge_path_info. ++ * Used to prevent memory leaks if thread is cancelled while processing. ++ */ ++static void cleanup_purge_path_info(void *arg) ++{ ++ struct purge_path_info *info = arg; ++ ++ free_purge_path_info(info); ++} ++ ++/* ++ * Cleanup handler for purge list. Frees all purge_path_info entries. ++ * Can be called as a pthread cleanup handler or directly. ++ */ ++void cleanup_purge_list(void *arg) ++{ ++ struct list_head *purge_list = arg; ++ struct purge_path_info *info, *tmp; ++ ++ list_for_each_entry_safe(info, tmp, purge_list, node) ++ { ++ list_del_init(&info->node); ++ free_purge_path_info(info); ++ } ++} ++ ++/* ++ * Cleanup handler for the global purge queue. ++ * Used during shutdown to free any remaining queued items. ++ */ ++static void cleanup_global_purge_queue(void *arg __attribute__((unused))) ++{ ++ pthread_mutex_lock(&purge_mutex); ++ cleanup_purge_list(&purge_queue); ++ pthread_mutex_unlock(&purge_mutex); ++} ++ ++/* ++ * Main purge thread loop. ++ * ++ * This thread waits for purge_path_info structs to be queued by the checker ++ * thread, then processes them by writing to their sysfs delete attributes. ++ * The checker thread builds the list while holding vecs->lock, so this ++ * thread doesn't need to grab that lock at all. ++ * ++ * Uses list_splice_tail_init() like uevent_dispatch() to safely transfer ++ * items from the global queue to a local list for processing. ++ * ++ * Cleanup handlers are registered for both the local purge_list and the ++ * global purge_queue (similar to uevent_listen), and for each individual ++ * purge_path_info after it's popped off the list (similar to service_uevq). ++ * This ensures no memory leaks if the thread is cancelled at any point. ++ */ ++void *purgeloop(void *ap __attribute__((unused))) ++{ ++ pthread_cleanup_push(rcu_unregister, NULL); ++ rcu_register_thread(); ++ mlockall(MCL_CURRENT | MCL_FUTURE); ++ ++ /* ++ * Cleanup handler for global purge_queue. ++ * This handles items that were queued but not yet moved to purge_list. ++ */ ++ pthread_cleanup_push(cleanup_global_purge_queue, NULL); ++ ++ while (1) { ++ LIST_HEAD(purge_list); ++ struct purge_path_info *info; ++ ++ /* ++ * Cleanup handler for local purge_list. ++ * This handles items that were moved from purge_queue but ++ * not yet processed. ++ */ ++ pthread_cleanup_push(cleanup_purge_list, &purge_list); ++ ++ /* ++ * Cleanup handler for purge_mutex. ++ * Note: pthread_cond_wait() reacquires the mutex before ++ * returning, even on cancellation, so this cleanup handler ++ * will properly unlock it if we're cancelled. ++ */ ++ pthread_cleanup_push(cleanup_mutex, &purge_mutex); ++ pthread_mutex_lock(&purge_mutex); ++ pthread_testcancel(); ++ while (list_empty(&purge_queue)) { ++ condlog(4, "purgeloop waiting for work"); ++ pthread_cond_wait(&purge_cond, &purge_mutex); ++ } ++ list_splice_tail_init(&purge_queue, &purge_list); ++ pthread_cleanup_pop(1); ++ ++ /* ++ * Process all paths in the list without holding any locks. ++ * The sysfs operations may block, but that's fine since we're ++ * not holding vecs->lock. ++ * ++ * After popping each info off the list, we immediately push ++ * a cleanup handler for it. This ensures it gets freed even ++ * if we're cancelled inside delete_path_sysfs(). ++ */ ++ while ((info = list_pop_entry(&purge_list, typeof(*info), node))) { ++ pthread_cleanup_push(cleanup_purge_path_info, info); ++ delete_path_sysfs(info); ++ pthread_cleanup_pop(1); ++ } ++ ++ /* ++ * Pop cleanup handler without executing it (0) since we've ++ * already freed everything above. The handler only runs if ++ * the thread is cancelled during processing. ++ */ ++ pthread_cleanup_pop(0); ++ } ++ ++ pthread_cleanup_pop(1); ++ pthread_cleanup_pop(1); ++ return NULL; ++} +diff --git a/multipathd/purge.h b/multipathd/purge.h +new file mode 100644 +index 00000000..1fe755f3 +--- /dev/null ++++ b/multipathd/purge.h +@@ -0,0 +1,41 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Copyright (C) 2025 Brian Bunker ++ * Copyright (C) 2025 Krishna Kant ++ */ ++ ++#ifndef PURGE_H_INCLUDED ++#define PURGE_H_INCLUDED ++ ++#include ++#include "list.h" ++ ++struct vectors; ++ ++/* ++ * Purge thread synchronization. ++ * The checker thread builds a list of paths to purge and queues them here. ++ * The purge thread picks up the queue and processes it. ++ */ ++extern pthread_mutex_t purge_mutex; ++extern pthread_cond_t purge_cond; ++extern struct list_head purge_queue; ++ ++/* ++ * Build a list of paths to purge and add them to tmpq. Called by checker ++ * thread while holding vecs->lock. ++ */ ++void build_purge_list(struct vectors *vecs, struct list_head *tmpq); ++ ++/* ++ * Cleanup handler for purge list. Frees all purge_path_info entries. ++ * Can be called as a pthread cleanup handler or directly for shutdown cleanup. ++ */ ++void cleanup_purge_list(void *arg); ++ ++/* ++ * Main purge thread loop ++ */ ++void *purgeloop(void *ap); ++ ++#endif /* PURGE_H_INCLUDED */ diff --git a/SOURCES/0208-libmpathpersist-fix-register-retry-status-checking.patch b/SOURCES/0208-libmpathpersist-fix-register-retry-status-checking.patch new file mode 100644 index 0000000..2dceac3 --- /dev/null +++ b/SOURCES/0208-libmpathpersist-fix-register-retry-status-checking.patch @@ -0,0 +1,47 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Mon, 1 Dec 2025 22:02:10 -0500 +Subject: [PATCH] libmpathpersist: fix register retry status checking + +If there libmpathpersist failed to create a thread to retry the register +and ignore command, mpath_prout_reg should fail. Instead, the code was +simply ignoring the failed threads. Fix that. + +Fixes: 2a4ca250 ("libmpathpersist: change how reservation conflicts are handled") +Signed-off-by: Benjamin Marzinski +Reviewed-by: Martin Wilck +--- + libmpathpersist/mpath_persist.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/libmpathpersist/mpath_persist.c b/libmpathpersist/mpath_persist.c +index 72be48c1..6cb56dab 100644 +--- a/libmpathpersist/mpath_persist.c ++++ b/libmpathpersist/mpath_persist.c +@@ -858,18 +858,19 @@ int mpath_prout_reg(struct multipath *mpp,int rq_servact, int rq_scope, + } + } + for (i = 0; i < count; i++) { +- if (thread[i].param.status != MPATH_PR_SKIP && +- thread[i].param.status != MPATH_PR_THREAD_ERROR) { ++ if (thread[i].param.status == MPATH_PR_SKIP) ++ continue; ++ if (thread[i].param.status != MPATH_PR_THREAD_ERROR) { + rc = pthread_join(thread[i].id, NULL); + if (rc) { + condlog(3, "%s: failed to join thread while retrying %d", +- mpp->wwid, i); ++ mpp->wwid, i); + } +- if (thread[i].param.status == MPATH_PR_RETRYABLE_ERROR) +- retryable_error = true; +- else if (status == MPATH_PR_SUCCESS) +- status = thread[i].param.status; + } ++ if (thread[i].param.status == MPATH_PR_RETRYABLE_ERROR) ++ retryable_error = true; ++ else if (status == MPATH_PR_SUCCESS) ++ status = thread[i].param.status; + } + need_retry = false; + } diff --git a/SOURCES/0209-multipathd-remember-number-of-registered-keys-when-i.patch b/SOURCES/0209-multipathd-remember-number-of-registered-keys-when-i.patch new file mode 100644 index 0000000..6911cdb --- /dev/null +++ b/SOURCES/0209-multipathd-remember-number-of-registered-keys-when-i.patch @@ -0,0 +1,59 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Mon, 1 Dec 2025 22:02:12 -0500 +Subject: [PATCH] multipathd: remember number of registered keys when ioctl + fails + +If prin_do_scsi_ioctl() fails in update_map_pr() for some reason other +than Persistent Reservations not being supported, It shouldn't clear the +number of registered keys, since there's no reason to think that it has +changed. Similarly, if update_map_pr() fails in mpath_pr_event_handle(), +don't assume that the nr_keys_needed was cleared. Just return whatever +the value is now. This saves multipathd from doing pointless calls to +update_map_pr(), if one of the paths is failing. + +Signed-off-by: Benjamin Marzinski +Reviewed-by: Martin Wilck +--- + multipathd/main.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/multipathd/main.c b/multipathd/main.c +index d91a4d49..2526d8d1 100644 +--- a/multipathd/main.c ++++ b/multipathd/main.c +@@ -3842,7 +3842,9 @@ void unset_pr(struct multipath *mpp) + * The number of found keys must be at least as large as *nr_keys, + * and if MPATH_PR_SUCCESS is returned and mpp->prflag is PR_SET after + * the call, *nr_keys will be set to the number of found keys. Otherwise +- * it will be set to 0. ++ * if mpp->prflag is PR_UNSET it will be set to 0. If MPATH_PR_SUCCESS ++ * is not returned and mpp->prflag is not PR_UNSET, nr_keys will not be ++ * changed. + */ + static int update_map_pr(struct multipath *mpp, struct path *pp, unsigned int *nr_keys) + { +@@ -3871,11 +3873,12 @@ static int update_map_pr(struct multipath *mpp, struct path *pp, unsigned int *n + + ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, &resp, 0); + if (ret != MPATH_PR_SUCCESS) { +- if (ret == MPATH_PR_ILLEGAL_REQ) ++ if (ret == MPATH_PR_ILLEGAL_REQ) { + unset_pr(mpp); ++ *nr_keys = 0; ++ } + condlog(0, "%s : pr in read keys service action failed Error=%d", + mpp->alias, ret); +- *nr_keys = 0; + return ret; + } + +@@ -3990,7 +3993,7 @@ retry: + clear_reg ? "Clearing" : "Setting", pp->dev, ret); + } else if (!clear_reg) { + if (update_map_pr(mpp, pp, &nr_keys_needed) != MPATH_PR_SUCCESS) +- return 0; ++ return nr_keys_needed; + if (mpp->prflag != PR_SET) { + memset(¶m, 0, sizeof(param)); + clear_reg = true; diff --git a/SOURCES/0210-libmpathpersist-fix-code-for-skipping-multipathd-pat.patch b/SOURCES/0210-libmpathpersist-fix-code-for-skipping-multipathd-pat.patch new file mode 100644 index 0000000..90b99d5 --- /dev/null +++ b/SOURCES/0210-libmpathpersist-fix-code-for-skipping-multipathd-pat.patch @@ -0,0 +1,459 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Mon, 15 Dec 2025 15:29:58 -0500 +Subject: [PATCH] libmpathpersist: fix code for skipping multipathd path + registration + +When libmpathpersist notifies multipathd that a key has been registered, +cli_setprstatus() calls pr_register_active_paths() with a flag to let it +know that the paths are likely already registered, and it can skip +re-registering them, as long as the number of active paths matches the +number of registered keys. This shortcut can fail, causing multipathd to +not register needed paths, if either a path becomes usable and another +becomes unusable while libmpathpersist is running or if there already +were registered keys for I_T Nexus's that don't correspond to path +devices. + +To make this shortcut work in cases like that, this commit adds a new +multipathd command "setprstatus map pathlist ", where + is a quoted, whitespace separated list of scsi path devices. +libmpathpersist will send out the list of paths it registered the key +on. pr_register_active_paths() will skip calling mpath_pr_event_handle() +for paths on that list. + +In order to deal with the possiblity of a preempt occuring while +libmpathpersist was running, the code still needs to check that it has +the expected number of keys. + +Fixes: f7d6cd17 ("multipathd: Fix race while registering PR key") +Signed-off-by: Benjamin Marzinski +Reviewed-by: Martin Wilck +--- + libmpathpersist/mpath_persist.c | 6 +-- + libmpathpersist/mpath_updatepr.c | 50 ++++++++++++++++++------- + libmpathpersist/mpathpr.h | 2 +- + multipathd/cli.c | 2 + + multipathd/cli.h | 2 + + multipathd/cli_handlers.c | 37 +++++++++++++++++-- + multipathd/cli_handlers.h | 1 + + multipathd/main.c | 63 +++++++++++++++++++++----------- + multipathd/main.h | 4 +- + multipathd/multipathd.8 | 10 ++++- + 10 files changed, 132 insertions(+), 45 deletions(-) + +diff --git a/libmpathpersist/mpath_persist.c b/libmpathpersist/mpath_persist.c +index 6cb56dab..ab8fa630 100644 +--- a/libmpathpersist/mpath_persist.c ++++ b/libmpathpersist/mpath_persist.c +@@ -527,12 +527,12 @@ static int do_mpath_persistent_reserve_out(vector curmp, vector pathvec, int fd, + case MPATH_PROUT_REG_SA: + case MPATH_PROUT_REG_IGN_SA: + if (unregistering) +- update_prflag(alias, 0); ++ update_prflag(mpp, 0); + else +- update_prflag(alias, 1); ++ update_prflag(mpp, 1); + break; + case MPATH_PROUT_CLEAR_SA: +- update_prflag(alias, 0); ++ update_prflag(mpp, 0); + if (mpp->prkey_source == PRKEY_SOURCE_FILE) + update_prkey(alias, 0); + break; +diff --git a/libmpathpersist/mpath_updatepr.c b/libmpathpersist/mpath_updatepr.c +index bfa6e089..e24432bb 100644 +--- a/libmpathpersist/mpath_updatepr.c ++++ b/libmpathpersist/mpath_updatepr.c +@@ -19,9 +19,9 @@ + #include "memory.h" + #include "mpathpr.h" + #include "structs.h" ++#include "strbuf.h" + +- +-static char *do_pr(char *alias, char *str) ++static char *do_pr(char *alias, const char *str) + { + int fd; + char *reply; +@@ -51,24 +51,26 @@ static char *do_pr(char *alias, char *str) + return reply; + } + +-static int do_update_pr(char *alias, char *cmd, char *key) ++static int do_update_pr(char *alias, char *cmd, const char *data) + { +- char str[256]; ++ STRBUF_ON_STACK(buf); + char *reply = NULL; + int ret = -1; + +- if (key) +- snprintf(str,sizeof(str),"%s map %s key %s", cmd, alias, key); ++ if (data) ++ print_strbuf(&buf, "%s map %s %s %s", cmd, alias, ++ strcmp(cmd, "setprkey") ? "pathlist" : "key", data); + else +- snprintf(str,sizeof(str),"%s map %s", cmd, alias); ++ print_strbuf(&buf, "%s map %s", cmd, alias); + +- reply = do_pr(alias, str); ++ reply = do_pr(alias, get_strbuf_str(&buf)); + if (reply) { +- condlog (2, "%s: message=%s reply=%s", alias, str, reply); +- if (reply && strncmp(reply,"ok", 2) == 0) ++ if (strncmp(reply, "ok", 2) == 0) + ret = 0; + else + ret = -1; ++ condlog(ret ? 0 : 4, "%s: message=%s reply=%s", alias, ++ get_strbuf_str(&buf), reply); + } + + free(reply); +@@ -104,9 +106,31 @@ int get_prhold(char *mapname) { + return do_get_pr(mapname, "getprhold"); + } + +-int update_prflag(char *mapname, int set) { +- return do_update_pr(mapname, (set)? "setprstatus" : "unsetprstatus", +- NULL); ++int update_prflag(struct multipath *mpp, int set) ++{ ++ STRBUF_ON_STACK(buf); ++ int i, j; ++ bool first = true; ++ struct pathgroup *pgp = NULL; ++ struct path *pp = NULL; ++ ++ if (!set) ++ return do_update_pr(mpp->alias, "unsetprstatus", NULL); ++ ++ append_strbuf_str(&buf, "\""); ++ vector_foreach_slot (mpp->pg, pgp, j) { ++ vector_foreach_slot (pgp->paths, pp, i) { ++ if (pp->state == PATH_UP || pp->state == PATH_GHOST) { ++ if (first) { ++ append_strbuf_str(&buf, pp->dev); ++ first = false; ++ } else ++ print_strbuf(&buf, " %s", pp->dev_t); ++ } ++ } ++ } ++ append_strbuf_str(&buf, "\""); ++ return do_update_pr(mpp->alias, "setprstatus", get_strbuf_str(&buf)); + } + + int update_prhold(char *mapname, bool set) { +diff --git a/libmpathpersist/mpathpr.h b/libmpathpersist/mpathpr.h +index 69f402f2..67d04247 100644 +--- a/libmpathpersist/mpathpr.h ++++ b/libmpathpersist/mpathpr.h +@@ -40,7 +40,7 @@ int mpath_prout_rel(struct multipath *mpp,int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor *paramp, int noisy, + bool unregister); + +-int update_prflag(char *mapname, int set); ++int update_prflag(struct multipath *mpp, int set); + int update_prkey_flags(char *mapname, uint64_t prkey, uint8_t sa_flags); + int get_prflag(char *mapname); + int get_prhold(char *mapname); +diff --git a/multipathd/cli.c b/multipathd/cli.c +index d33b571d..9e833b02 100644 +--- a/multipathd/cli.c ++++ b/multipathd/cli.c +@@ -226,6 +226,7 @@ load_keys (void) + r += add_key(keys, "getprhold", GETPRHOLD, 0); + r += add_key(keys, "setprhold", SETPRHOLD, 0); + r += add_key(keys, "unsetprhold", UNSETPRHOLD, 0); ++ r += add_key(keys, "pathlist", PATHLIST, 1); + + if (r) { + free_keys(keys); +@@ -572,6 +573,7 @@ cli_init (void) { + add_handler(SHUTDOWN, NULL); + add_handler(GETPRSTATUS+MAP, NULL); + add_handler(SETPRSTATUS+MAP, NULL); ++ add_handler(SETPRSTATUS+MAP+PATHLIST, NULL); + add_handler(UNSETPRSTATUS+MAP, NULL); + add_handler(GETPRKEY+MAP, NULL); + add_handler(SETPRKEY+MAP+KEY, NULL); +diff --git a/multipathd/cli.h b/multipathd/cli.h +index 4d12f8fd..17344cb4 100644 +--- a/multipathd/cli.h ++++ b/multipathd/cli.h +@@ -50,6 +50,7 @@ enum { + __GETPRHOLD, + __SETPRHOLD, + __UNSETPRHOLD, ++ __PATHLIST, + }; + + #define LIST (1 << __LIST) +@@ -99,6 +100,7 @@ enum { + #define GETPRHOLD (1ULL << __GETPRHOLD) + #define SETPRHOLD (1ULL << __SETPRHOLD) + #define UNSETPRHOLD (1ULL << __UNSETPRHOLD) ++#define PATHLIST (1ULL << __PATHLIST) + + #define INITIAL_REPLY_LEN 1200 + +diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c +index 0c63ca9a..f183fceb 100644 +--- a/multipathd/cli_handlers.c ++++ b/multipathd/cli_handlers.c +@@ -32,6 +32,7 @@ + #include "foreign.h" + #include "strbuf.h" + #include "cli_handlers.h" ++#include + + #define SET_REPLY_AND_LEN(__rep, __len, string_literal) \ + do { \ +@@ -1308,8 +1309,8 @@ cli_getprstatus (void * v, char ** reply, int * len, void * data) + return 0; + } + +-int +-cli_setprstatus(void * v, char ** reply, int * len, void * data) ++static int do_setprstatus(void * v, char ** reply, int * len, void * data, ++ const struct _vector *registered_paths) + { + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; +@@ -1324,7 +1325,7 @@ cli_setprstatus(void * v, char ** reply, int * len, void * data) + + if (mpp->prflag != PR_SET) { + set_pr(mpp); +- pr_register_active_paths(mpp, true); ++ pr_register_active_paths(mpp, registered_paths); + if (mpp->prflag == PR_SET) + condlog(2, "%s: prflag set", param); + else +@@ -1336,6 +1337,36 @@ cli_setprstatus(void * v, char ** reply, int * len, void * data) + return 0; + } + ++int ++cli_setprstatus(void * v, char ** reply, int * len, void * data) ++{ ++ return do_setprstatus(v, reply, len, data, NULL); ++} ++ ++int ++cli_setprstatus_list(void * v, char ** reply, int * len, void *data) ++{ ++ int r; ++ struct _vector registered_paths_vec = {.allocated = 0}; ++ vector registered_paths ++ __attribute__((cleanup(cleanup_reset_vec))) = ®istered_paths_vec; ++ char *ptr = get_keyparam(v, PATHLIST); ++ ++ while (isspace(*ptr)) ++ ptr++; ++ while (*ptr) { ++ if (!vector_alloc_slot(registered_paths)) ++ return -ENOMEM; ++ vector_set_slot(registered_paths, ptr); ++ while (*ptr && !isspace(*ptr)) ++ ptr++; ++ while (isspace(*ptr)) ++ *ptr++ = '\0'; ++ } ++ r = do_setprstatus(v, reply, len, data, registered_paths); ++ return r; ++} ++ + int + cli_unsetprstatus(void * v, char ** reply, int * len, void * data) + { +diff --git a/multipathd/cli_handlers.h b/multipathd/cli_handlers.h +index 348c8485..28b6f2bb 100644 +--- a/multipathd/cli_handlers.h ++++ b/multipathd/cli_handlers.h +@@ -46,6 +46,7 @@ int cli_shutdown(void * v, char ** reply, int * len, void * data); + int cli_reassign (void * v, char ** reply, int * len, void * data); + int cli_getprstatus(void * v, char ** reply, int * len, void * data); + int cli_setprstatus(void * v, char ** reply, int * len, void * data); ++int cli_setprstatus_list(void * v, char ** reply, int * len, void * data); + int cli_unsetprstatus(void * v, char ** reply, int * len, void * data); + int cli_getprkey(void * v, char ** reply, int * len, void * data); + int cli_setprkey(void * v, char ** reply, int * len, void * data); +diff --git a/multipathd/main.c b/multipathd/main.c +index 2526d8d1..09d26fe2 100644 +--- a/multipathd/main.c ++++ b/multipathd/main.c +@@ -548,28 +548,47 @@ flush_map_nopaths(struct multipath *mpp, struct vectors *vecs) { + return true; + } + +-void pr_register_active_paths(struct multipath *mpp, bool check_nr_active) ++/* ++ * If reg_paths in non-NULL, it is a vector of paths that libmpathpersist ++ * registered. If the number of registered keys is smaller than the number ++ * of registered paths, then likely a preempt that occurred while ++ * libmpathpersist was registering the key. As long as there are still some ++ * registered keys, treat the preempt as happening first, and make sure to ++ * register keys on all the paths. If the number of registered keys is at ++ * least as large as the number of registered paths, then no preempt happened, ++ * and multipathd does not need to re-register the paths that libmpathpersist ++ * handled ++ */ ++void pr_register_active_paths(struct multipath *mpp, const struct _vector *reg_paths) + { +- unsigned int i, j, nr_keys = 0; +- unsigned int nr_active = 0; ++ unsigned int i, j, k, nr_keys = 0; ++ unsigned int wanted_nr = VECTOR_SIZE(reg_paths); + struct path *pp; + struct pathgroup *pgp; +- +- if (check_nr_active) { +- nr_active = count_active_paths(mpp); +- if (!nr_active) +- return; +- } ++ char *pathname; + + vector_foreach_slot (mpp->pg, pgp, i) { + vector_foreach_slot (pgp->paths, pp, j) { + if (mpp->prflag == PR_UNSET) + return; +- if (pp->state == PATH_UP || pp->state == PATH_GHOST) { +- nr_keys = mpath_pr_event_handle(pp, nr_keys, nr_active); +- if (check_nr_active && nr_keys == nr_active) +- return; ++ if (pp->state != PATH_UP && pp->state != PATH_GHOST) ++ continue; ++ if (wanted_nr && nr_keys) { ++ vector_foreach_slot (reg_paths, pathname, k) { ++ if (strcmp(pp->dev_t, pathname) == 0) { ++ goto skip; ++ } ++ } + } ++ nr_keys = mpath_pr_event_handle(pp, nr_keys, wanted_nr); ++ if (nr_keys && nr_keys < wanted_nr) { ++ /* ++ * Incorrect number of registered keys. Need ++ * to register all devices ++ */ ++ wanted_nr = 0; ++ } ++ skip:; /* a statement must follow a label on pre C23 clang */ + } + } + } +@@ -599,8 +618,7 @@ handle_orphaned_offline_paths(vector offline_paths) + pp->add_when_online = true; + } + +-static void +-cleanup_reset_vec(struct _vector **v) ++void cleanup_reset_vec(struct _vector **v) + { + vector_reset(*v); + } +@@ -656,7 +674,7 @@ fail: + + sync_map_state(mpp); + +- pr_register_active_paths(mpp, false); ++ pr_register_active_paths(mpp, NULL); + + if (VECTOR_SIZE(offline_paths) != 0) + handle_orphaned_offline_paths(offline_paths); +@@ -1279,7 +1297,7 @@ rescan: + + if (retries >= 0) { + if ((mpp->prflag == PR_SET && prflag != PR_SET) || start_waiter) +- pr_register_active_paths(mpp, false); ++ pr_register_active_paths(mpp, NULL); + condlog(2, "%s [%s]: path added to devmap %s", + pp->dev, pp->dev_t, mpp->alias); + return 0; +@@ -1875,6 +1893,7 @@ uxlsnrloop (void * ap) + set_unlocked_handler_callback(SHUTDOWN, cli_shutdown); + set_handler_callback(GETPRSTATUS|MAP, cli_getprstatus); + set_handler_callback(SETPRSTATUS|MAP, cli_setprstatus); ++ set_handler_callback(SETPRSTATUS|MAP|PATHLIST, cli_setprstatus_list); + set_handler_callback(UNSETPRSTATUS|MAP, cli_unsetprstatus); + set_handler_callback(FORCEQ|DAEMON, cli_force_no_daemon_q); + set_handler_callback(RESTOREQ|DAEMON, cli_restore_no_daemon_q); +@@ -2581,7 +2600,7 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) + mpath_pr_event_handle(pp, 0, 0); + if (pp->mpp->prflag == PR_SET && + prflag != PR_SET) +- pr_register_active_paths(pp->mpp, false); ++ pr_register_active_paths(pp->mpp, NULL); + } + } + +@@ -2950,7 +2969,7 @@ configure (struct vectors * vecs) + vector_foreach_slot(mpvec, mpp, i){ + if (remember_wwid(mpp->wwid) == 1) + trigger_paths_udev_change(mpp, true); +- pr_register_active_paths(mpp, false); ++ pr_register_active_paths(mpp, NULL); + } + + /* +@@ -3933,8 +3952,8 @@ static int update_map_pr(struct multipath *mpp, struct path *pp, unsigned int *n + * + * nr_keys_wanted: Only used if nr_keys_needed is 0, so we don't know how + * many keys we currently have. If nr_keys_wanted in non-zero and the +- * number of keys found by the initial call to update_map_pr() matches it, +- * exit early, since we have all the keys we are expecting. ++ * number of keys found by the initial call to update_map_pr() is at least ++ * as large as it, exit early, since we have all the keys we are expecting. + * + * The function returns the number of keys that are registered or 0 if + * it's unknown. +@@ -3957,7 +3976,7 @@ mpath_pr_event_handle(struct path *pp, unsigned int nr_keys_needed, + nr_keys_needed = 1; + if (update_map_pr(mpp, pp, &nr_keys_needed) != MPATH_PR_SUCCESS) + return 0; +- if (nr_keys_wanted && nr_keys_wanted == nr_keys_needed) ++ if (nr_keys_wanted && nr_keys_wanted <= nr_keys_needed) + return nr_keys_needed; + } + +diff --git a/multipathd/main.h b/multipathd/main.h +index 8a4c5f88..ea12455f 100644 +--- a/multipathd/main.h ++++ b/multipathd/main.h +@@ -60,5 +60,7 @@ int resize_map(struct multipath *mpp, unsigned long long size, + struct vectors *vecs); + void set_pr(struct multipath *mpp); + void unset_pr(struct multipath *mpp); +-void pr_register_active_paths(struct multipath *mpp, bool check_active_nr); ++void pr_register_active_paths(struct multipath *mpp, ++ const struct _vector *registered_paths); ++void cleanup_reset_vec(struct _vector **v); + #endif /* MAIN_H */ +diff --git a/multipathd/multipathd.8 b/multipathd/multipathd.8 +index 2ed036d4..0a76f3e3 100644 +--- a/multipathd/multipathd.8 ++++ b/multipathd/multipathd.8 +@@ -324,11 +324,17 @@ will not be disabled when the daemon stops. + Restores configured queue_without_daemon mode. + . + .TP +-.B map|multipath $map setprstatus ++.B setprstatus map|multipath $map + Enable persistent reservation management on $map. + . + .TP +-.B map|multipath $map unsetprstatus ++.B setprstatus map|multipath $map pathlist $pathlist ++Enable persistent reservation management on $map, and notify multipathd of ++the paths that have been registered, so it doesn't attempt to re-register ++them. ++. ++.TP ++.B unsetprstatus map|multipath $map + Disable persistent reservation management on $map. + . + .TP diff --git a/SPECS/device-mapper-multipath.spec b/SPECS/device-mapper-multipath.spec index 44fac7e..53810a0 100644 --- a/SPECS/device-mapper-multipath.spec +++ b/SPECS/device-mapper-multipath.spec @@ -1,6 +1,6 @@ Name: device-mapper-multipath Version: 0.8.7 -Release: 39%{?dist}.1 +Release: 45%{?dist} Summary: Tools to manage multipath devices using device-mapper License: GPLv2 URL: http://christophe.varoqui.free.fr/ @@ -206,7 +206,20 @@ Patch0193: 0193-libmpathpersist-Fix-race-between-restoring-a-path-an.patch Patch0194: 0194-multipathd-Fix-tracking-of-old-PR-key.patch Patch0195: 0195-multipathd-Fix-race-while-registering-PR-key.patch Patch0196: 0196-mpathpersist-Fix-REPORT-CAPABILITIES-output.patch - +Patch0197: 0197-multipath-tools-update-NFINIDAT-InfiniBox-config-in-.patch +Patch0198: 0198-multipathd-print-path-offline-message-even-without-a.patch +Patch0199: 0199-libmultipath-improve-cleanup-of-uevent-queues-on-exi.patch +Patch0200: 0200-uevent_dispatch-use-while-in-wait-loop.patch +Patch0201: 0201-libmultipath-uevent_dispatch-process-uevents-one-by-.patch +Patch0202: 0202-libmultipath-uevent_listen-don-t-delay-uevents.patch +Patch0203: 0203-libmultipath-uevent-use-struct-to-pass-parameters-ar.patch +Patch0204: 0204-libmultipath-is_uevent_busy-check-servicing_uev-unde.patch +Patch0205: 0205-multipathd-make-multipathd-show-status-busy-checker-.patch +Patch0206: 0206-libmultipath-add-purge_disconnected-configuration-op.patch +Patch0207: 0207-multipathd-implement-purge-functionality-for-disconn.patch +Patch0208: 0208-libmpathpersist-fix-register-retry-status-checking.patch +Patch0209: 0209-multipathd-remember-number-of-registered-keys-when-i.patch +Patch0210: 0210-libmpathpersist-fix-code-for-skipping-multipathd-pat.patch # runtime Requires: %{name}-libs = %{version}-%{release} @@ -409,7 +422,51 @@ fi %{_pkgconfdir}/libdmmp.pc %changelog -* Tue Nov 11 2025 Benjamin Marzinski - 0.8.7-39.1 +* Thu Feb 19 2026 Benjamin Marzinski - 0.8.7-45 +- Add 0208-libmpathpersist-fix-register-retry-status-checking.patch +- Add 0209-multipathd-remember-number-of-registered-keys-when-i.patch +- Add 0210-libmpathpersist-fix-code-for-skipping-multipathd-pat.patch + * Fixes RHEL-148462 ("Improve multipathd's handling of updating + persistent reservations on restored paths. [rhel-9]") +- Resolves: RHEL-148462 + +* Thu Jan 29 2026 Benjamin Marzinski - 0.8.7-44 +- Add 0206-libmultipath-add-purge_disconnected-configuration-op.patch +- Add 0207-multipathd-implement-purge-functionality-for-disconn.patch + * Fixes RHEL-141291 ("Add purge_disconnected support to multipathd + [rhel-9]") +- Resolves: RHEL-141291 + +* Thu Jan 22 2026 Benjamin Marzinski - 0.8.7-43 +- Add 0198-multipathd-print-path-offline-message-even-without-a.patch + * Fixes RHEL-133814 ("log_checker_err is not printing messages + repeatedly for failed path [rhel-9]") +- Add 0199-libmultipath-improve-cleanup-of-uevent-queues-on-exi.patch +- Add 0200-uevent_dispatch-use-while-in-wait-loop.patch +- Add 0201-libmultipath-uevent_dispatch-process-uevents-one-by-.patch +- Add 0202-libmultipath-uevent_listen-don-t-delay-uevents.patch +- Add 0203-libmultipath-uevent-use-struct-to-pass-parameters-ar.patch +- Add 0204-libmultipath-is_uevent_busy-check-servicing_uev-unde.patch +- Add 0205-multipathd-make-multipathd-show-status-busy-checker-.patch + * Fixes RHEL-135904 (VM reboot in RHOSP environment fails with error + "Could not open '/dev/dm-95': No such file or directory") +- Resolves: RHEL-133814 +- Resolves: RHEL-135904 + +* Wed Nov 19 2025 Benjamin Marzinski - 0.8.7-42 +- Add 0197-multipath-tools-update-NFINIDAT-InfiniBox-config-in-.patch + * Fixes RHEL-128396 ("Update the multipath.conf stanza for Infinidat + storage [rhel-9]") +- Resolves: RHEL-128396 + +* Tue Nov 11 2025 Benjamin Marzinski - 0.8.7-41 +- Add 0195-multipathd-Fix-race-while-registering-PR-key.patch +- Add 0196-mpathpersist-Fix-REPORT-CAPABILITIES-output.patch + * Fixes RHEL-118515 ("There are many bugs in multipath's persistent + reservation handling") +- Resolves: RHEL-118515 + +* Wed Oct 1 2025 Benjamin Marzinski - 0.8.7-40 - Add 0150-libmpathpersist-retry-commands-on-other-paths-in-mpa.patch - Add 0151-libmpathpersist-check-released-key-against-the-reser.patch - Add 0152-multipathd-remove-thread-from-mpath_pr_event_handle.patch @@ -455,11 +512,9 @@ fi - Add 0192-libmpathpersist-Fix-unregistering-while-holding-the-.patch - Add 0193-libmpathpersist-Fix-race-between-restoring-a-path-an.patch - Add 0194-multipathd-Fix-tracking-of-old-PR-key.patch -- Add 0195-multipathd-Fix-race-while-registering-PR-key.patch -- Add 0196-mpathpersist-Fix-REPORT-CAPABILITIES-output.patch - * Fixes RHEL-118723 ("There are many bugs in multipath's persistent - reservation handling [rhel-9.7.z]") -- Resolves: RHEL-118723 + * Fixes RHEL-118515 ("There are many bugs in multipath's persistent + reservation handling") +- Resolves: RHEL-118515 * Mon Jul 14 2025 Benjamin Marzinski - 0.8.7-39 - Add 0145-multipath-tools-add-DellEMC-ME4-PowerVault-ME4-to-ha.patch