import UBI device-mapper-multipath-0.8.7-45.el9

This commit is contained in:
AlmaLinux RelEng Bot 2026-05-19 20:27:22 -04:00
parent daa7dc6b75
commit c7cbc2e47d
15 changed files with 2910 additions and 8 deletions

View File

@ -0,0 +1,65 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Xose Vazquez Perez <xose.vazquez@gmail.com>
Date: Sat, 16 Aug 2025 20:16:23 +0200
Subject: [PATCH] multipath-tools: update NFINIDAT/InfiniBox config in hwtable
New recommended values for SLES-15, RHEL-8, and Ubuntu-22, or above:
https://lh3.googleusercontent.com/pw/AP1GczMchJ6bcMIohp_g3Ik5DS6BZv_IW0iwaEXf968sJsR8fr_p3mR4ThRpmHpZE_VMnlcV8j0uuuI2kz-eoqekRCUBxyTBKS7n-4WFMsUiecq8i7nIjEuhfZFrV54DBQLDzGt6ofakAAF1L-ZcAuCWW18=w0-h0
device {
vendor "NFINIDAT"
product "InfiniBox"
path_grouping_policy "group_by_prio"
path_checker "tur"
features 0
hardware_handler "1 alua"
prio "alua"
rr_weight "priorities"
no_path_retry "queue"
rr_min_io 1
rr_min_io_rq 1
flush_on_last_del "yes"
fast_io_fail_tmo 15
dev_loss_tmo "infinity"
path_selector "service-time 0"
failback "immediate"
detect_prio "no"
user_friendly_names "no"
}
Cc: Martin Wilck <mwilck@suse.com>
Cc: Benjamin Marzinski <bmarzins@redhat.com>
Cc: Christophe Varoqui <christophe.varoqui@opensvc.com>
Cc: DM_DEVEL-ML <dm-devel@lists.linux.dev>
Signed-off-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
libmultipath/hwtable.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/libmultipath/hwtable.c b/libmultipath/hwtable.c
index 2d359829..188ad5e0 100644
--- a/libmultipath/hwtable.c
+++ b/libmultipath/hwtable.c
@@ -1124,16 +1124,13 @@ static struct hwentry default_hw[] = {
.vendor = "NFINIDAT",
.product = "InfiniBox",
.pgpolicy = GROUP_BY_PRIO,
- .pgfailback = 30,
+ .pgfailback = -FAILBACK_IMMEDIATE,
.prio_name = PRIO_ALUA,
- .selector = "round-robin 0",
- .rr_weight = RR_WEIGHT_PRIO,
- .no_path_retry = NO_PATH_RETRY_FAIL,
- .minio = 1,
- .minio_rq = 1,
+ .no_path_retry = NO_PATH_RETRY_QUEUE,
.flush_on_last_del = FLUSH_ALWAYS,
.fast_io_fail = 15,
- .dev_loss = 15,
+ .dev_loss = MAX_DEV_LOSS_TMO,
+ .detect_prio = DETECT_PRIO_OFF,
},
/*
* Kaminario

View File

@ -0,0 +1,36 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Wed, 21 Jan 2026 16:03:13 -0500
Subject: [PATCH] multipathd: print path offline message even without a checker
If a path has a checker selected and is offline, multipathd will print a
"path offline" message. However if the checker isn't selected, for
instance because multipathd was started or reconfigured while the path
was offline, multipathd was not printing the "path offline" message.
Fix that.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
---
multipathd/main.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/multipathd/main.c b/multipathd/main.c
index a85c0db4..9beb0e06 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -97,12 +97,11 @@ mpath_pr_event_handle(struct path *pp, unsigned int nr_keys_needed,
#define LOG_MSG(lvl, pp) \
do { \
- if (pp->mpp && checker_selected(&pp->checker) && \
- lvl <= libmp_verbosity) { \
+ if (pp->mpp && lvl <= libmp_verbosity) { \
if (pp->offline) \
condlog(lvl, "%s: %s - path offline", \
pp->mpp->alias, pp->dev); \
- else { \
+ else if (checker_selected(&pp->checker)) { \
const char *__m = \
checker_message(&pp->checker); \
\

View File

@ -0,0 +1,133 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Thu, 9 Sep 2021 23:59:42 +0200
Subject: [PATCH] libmultipath: improve cleanup of uevent queues on exit
uevents listed on merge_node must be cleaned up, too. uevents
cancelled while being serviced and temporary queues, likewise.
The global uevq must be cleaned out in the uevent listener thread,
because it might have added events after the dispatcher thread
had already finished.
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
libmultipath/uevent.c | 49 ++++++++++++++++++++++++++++++++-----------
1 file changed, 37 insertions(+), 12 deletions(-)
diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c
index 4265904b..80941f87 100644
--- a/libmultipath/uevent.c
+++ b/libmultipath/uevent.c
@@ -91,16 +91,25 @@ struct uevent * alloc_uevent (void)
return uev;
}
+static void uevq_cleanup(struct list_head *tmpq);
+
+static void cleanup_uev(void *arg)
+{
+ struct uevent *uev = arg;
+
+ uevq_cleanup(&uev->merge_node);
+ if (uev->udev)
+ udev_device_unref(uev->udev);
+ free(uev);
+}
+
static void uevq_cleanup(struct list_head *tmpq)
{
struct uevent *uev, *tmp;
list_for_each_entry_safe(uev, tmp, tmpq, node) {
list_del_init(&uev->node);
-
- if (uev->udev)
- udev_device_unref(uev->udev);
- FREE(uev);
+ cleanup_uev(uev);
}
}
@@ -384,14 +393,10 @@ service_uevq(struct list_head *tmpq)
list_for_each_entry_safe(uev, tmp, tmpq, node) {
list_del_init(&uev->node);
+ pthread_cleanup_push(cleanup_uev, uev);
if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
condlog(0, "uevent trigger error");
-
- uevq_cleanup(&uev->merge_node);
-
- if (uev->udev)
- udev_device_unref(uev->udev);
- FREE(uev);
+ pthread_cleanup_pop(1);
}
}
@@ -411,6 +416,18 @@ static void monitor_cleanup(void *arg)
udev_monitor_unref(monitor);
}
+static void cleanup_uevq(void *arg)
+{
+ uevq_cleanup(arg);
+}
+
+static void cleanup_global_uevq(void *arg __attribute__((unused)))
+{
+ pthread_mutex_lock(uevq_lockp);
+ uevq_cleanup(&uevq);
+ pthread_mutex_unlock(uevq_lockp);
+}
+
/*
* Service the uevent queue.
*/
@@ -425,6 +442,7 @@ int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
while (1) {
LIST_HEAD(uevq_tmp);
+ pthread_cleanup_push(cleanup_mutex, uevq_lockp);
pthread_mutex_lock(uevq_lockp);
servicing_uev = 0;
/*
@@ -436,14 +454,17 @@ int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
}
servicing_uev = 1;
list_splice_init(&uevq, &uevq_tmp);
- pthread_mutex_unlock(uevq_lockp);
+ pthread_cleanup_pop(1);
+
if (!my_uev_trigger)
break;
+
+ pthread_cleanup_push(cleanup_uevq, &uevq_tmp);
merge_uevq(&uevq_tmp);
service_uevq(&uevq_tmp);
+ pthread_cleanup_pop(1);
}
condlog(3, "Terminating uev service queue");
- uevq_cleanup(&uevq);
return 0;
}
@@ -600,6 +621,8 @@ int uevent_listen(struct udev *udev)
events = 0;
gettimeofday(&start_time, NULL);
+ pthread_cleanup_push(cleanup_global_uevq, NULL);
+ pthread_cleanup_push(cleanup_uevq, &uevlisten_tmp);
while (1) {
struct uevent *uev;
struct udev_device *dev;
@@ -650,6 +673,8 @@ int uevent_listen(struct udev *udev)
gettimeofday(&start_time, NULL);
timeout = 30;
}
+ pthread_cleanup_pop(1);
+ pthread_cleanup_pop(1);
out:
pthread_cleanup_pop(1);
out_udev:

View File

@ -0,0 +1,38 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Tue, 29 Mar 2022 16:06:25 +0200
Subject: [PATCH] uevent_dispatch(): use while in wait loop
Callers of pthread_cond_wait() should generally use a while loop
to test the condition. Also, remove the misleading comment.
Condition variables aren't unreliable, they're just not strictly
tied to the condition tested.
Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
libmultipath/uevent.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c
index 80941f87..e3ec1ac1 100644
--- a/libmultipath/uevent.c
+++ b/libmultipath/uevent.c
@@ -445,13 +445,10 @@ int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
pthread_cleanup_push(cleanup_mutex, uevq_lockp);
pthread_mutex_lock(uevq_lockp);
servicing_uev = 0;
- /*
- * Condition signals are unreliable,
- * so make sure we only wait if we have to.
- */
- if (list_empty(&uevq)) {
+
+ while (list_empty(&uevq))
pthread_cond_wait(uev_condp, uevq_lockp);
- }
+
servicing_uev = 1;
list_splice_init(&uevq, &uevq_tmp);
pthread_cleanup_pop(1);

View File

@ -0,0 +1,319 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Tue, 29 Mar 2022 18:04:42 +0200
Subject: [PATCH] libmultipath: uevent_dispatch(): process uevents one by one
The main rationale for delaying uevents is that the
uevent dispatcher may have to wait for other threads to release the
vecs lock, may the vecs lock for an extended amount of time, and
even sleep occasionally. By delaying them, we have the chance
to accumulate events for the same path device ("filtering") or
WWID ("merging"), thus avoiding duplicate work if we merge these
into one.
A similar effect can be obtained in the uevent dispatcher itself
by looking for new uevents after each dispatched event, and trying
to merge the newly arrived events with those that remained
in the queue.
When uevq_work is non-empty and we append a list of new events,
we don't need to check the entire list for filterable and mergeable
uevents. uevq_work had been filtered and merged already. So we just
need to check the newly appended events. These must of course be
checked for merges with earlier events, too.
We must deal with some special cases here, like previously merged
uevents being filtered later.
Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
libmultipath/list.h | 53 +++++++++++++++++
libmultipath/uevent.c | 129 ++++++++++++++++++++++++++++++------------
2 files changed, 147 insertions(+), 35 deletions(-)
diff --git a/libmultipath/list.h b/libmultipath/list.h
index ced021f5..248f72bc 100644
--- a/libmultipath/list.h
+++ b/libmultipath/list.h
@@ -246,6 +246,35 @@ static inline void list_splice_tail_init(struct list_head *list,
#define list_entry(ptr, type, member) \
container_of(ptr, type, member)
+
+/**
+ * list_pop - unlink and return the first list element
+ * @head: the &struct list_head pointer.
+ */
+static inline struct list_head *list_pop(struct list_head *head)
+{
+ struct list_head *tmp;
+
+ if (list_empty(head))
+ return NULL;
+ tmp = head->next;
+ list_del_init(tmp);
+ return tmp;
+}
+
+/**
+ * list_pop_entry - unlink and return the entry of the first list element
+ * @head: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_pop_entry(head, type, member) \
+({ \
+ struct list_head *__h = list_pop(head); \
+ \
+ (__h ? container_of(__h, type, member) : NULL); \
+})
+
/**
* list_for_each - iterate over a list
* @pos: the &struct list_head to use as a loop counter.
@@ -334,6 +363,30 @@ static inline void list_splice_tail_init(struct list_head *list,
&pos->member != (head); \
pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+/**
+ * list_for_some_entry - iterate list from the given begin node to the given end node
+ * @pos: the type * to use as a loop counter.
+ * @from: the begin node of the iteration.
+ * @to: the end node of the iteration.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_some_entry(pos, from, to, member) \
+ for (pos = list_entry((from)->next, typeof(*pos), member); \
+ &pos->member != (to); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_some_entry_reverse - iterate backwards list from the given begin node to the given end node
+ * @pos: the type * to use as a loop counter.
+ * @from: the begin node of the iteration.
+ * @to: the end node of the iteration.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_some_entry_reverse(pos, from, to, member) \
+ for (pos = list_entry((from)->prev, typeof(*pos), member); \
+ &pos->member != (to); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
/**
* list_for_some_entry_safe - iterate list from the given begin node to the given end node safe against removal of list entry
* @pos: the type * to use as a loop counter.
diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c
index e3ec1ac1..2198e254 100644
--- a/libmultipath/uevent.c
+++ b/libmultipath/uevent.c
@@ -308,17 +308,64 @@ uevent_can_merge(struct uevent *earlier, struct uevent *later)
return false;
}
+static void uevent_delete_from_list(struct uevent *to_delete,
+ struct uevent **previous,
+ struct list_head **old_tail)
+{
+ /*
+ * "old_tail" is the list_head before the last list element to which
+ * the caller iterates (the list anchor if the caller iterates over
+ * the entire list). If this element is removed (which can't happen
+ * for the anchor), "old_tail" must be moved. It can happen that
+ * "old_tail" ends up pointing at the anchor.
+ */
+ if (*old_tail == &to_delete->node)
+ *old_tail = to_delete->node.prev;
+
+ list_del_init(&to_delete->node);
+
+ /*
+ * The "to_delete" uevent has been merged with other uevents
+ * previously. Re-insert them into the list, at the point we're
+ * currently at. This must be done after the list_del_init() above,
+ * otherwise previous->next would still point to to_delete.
+ */
+ if (!list_empty(&to_delete->merge_node)) {
+ struct uevent *last = list_entry(to_delete->merge_node.prev,
+ typeof(*last), node);
+
+ list_splice(&to_delete->merge_node, &(*previous)->node);
+ *previous = last;
+ }
+ if (to_delete->udev)
+ udev_device_unref(to_delete->udev);
+
+ free(to_delete);
+}
+
+/*
+ * Use this function to delete events that are known not to
+ * be equal to old_tail, and have an empty merge_node list.
+ * For others, use uevent_delete_from_list().
+ */
+static void uevent_delete_simple(struct uevent *to_delete)
+{
+ list_del_init(&to_delete->node);
+
+ if (to_delete->udev)
+ udev_device_unref(to_delete->udev);
+
+ free(to_delete);
+}
+
static void
-uevent_prepare(struct list_head *tmpq)
+uevent_prepare(struct list_head *tmpq, const struct list_head *stop)
{
struct uevent *uev, *tmp;
- list_for_each_entry_reverse_safe(uev, tmp, tmpq, node) {
+ list_for_some_entry_reverse_safe(uev, tmp, tmpq, stop, node) {
if (uevent_can_discard(uev)) {
- list_del_init(&uev->node);
- if (uev->udev)
- udev_device_unref(uev->udev);
- FREE(uev);
+ uevent_delete_simple(uev);
continue;
}
@@ -329,7 +376,7 @@ uevent_prepare(struct list_head *tmpq)
}
static void
-uevent_filter(struct uevent *later, struct list_head *tmpq)
+uevent_filter(struct uevent *later, struct list_head *tmpq, struct list_head **stop)
{
struct uevent *earlier, *tmp;
@@ -343,16 +390,13 @@ uevent_filter(struct uevent *later, struct list_head *tmpq)
earlier->kernel, earlier->action,
later->kernel, later->action);
- list_del_init(&earlier->node);
- if (earlier->udev)
- udev_device_unref(earlier->udev);
- FREE(earlier);
+ uevent_delete_from_list(earlier, &tmp, stop);
}
}
}
static void
-uevent_merge(struct uevent *later, struct list_head *tmpq)
+uevent_merge(struct uevent *later, struct list_head *tmpq, struct list_head **stop)
{
struct uevent *earlier, *tmp;
@@ -367,37 +411,42 @@ uevent_merge(struct uevent *later, struct list_head *tmpq)
earlier->action, earlier->kernel, earlier->wwid,
later->action, later->kernel, later->wwid);
+ /* See comment in uevent_delete_from_list() */
+ if (&earlier->node == *stop)
+ *stop = earlier->node.prev;
+
list_move(&earlier->node, &later->merge_node);
+ list_splice_init(&earlier->merge_node,
+ &later->merge_node);
}
}
}
static void
-merge_uevq(struct list_head *tmpq)
+merge_uevq(struct list_head *tmpq, struct list_head *stop)
{
struct uevent *later;
- uevent_prepare(tmpq);
- list_for_each_entry_reverse(later, tmpq, node) {
- uevent_filter(later, tmpq);
+ uevent_prepare(tmpq, stop);
+ list_for_some_entry_reverse(later, tmpq, stop, node) {
+ uevent_filter(later, tmpq, &stop);
if(uevent_need_merge())
- uevent_merge(later, tmpq);
+ uevent_merge(later, tmpq, &stop);
}
}
static void
service_uevq(struct list_head *tmpq)
{
- struct uevent *uev, *tmp;
-
- list_for_each_entry_safe(uev, tmp, tmpq, node) {
- list_del_init(&uev->node);
-
- pthread_cleanup_push(cleanup_uev, uev);
- if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
- condlog(0, "uevent trigger error");
- pthread_cleanup_pop(1);
- }
+ struct uevent *uev = list_pop_entry(tmpq, typeof(*uev), node);
+
+ if (uev == NULL)
+ return;
+ condlog(4, "servicing uevent '%s %s'", uev->action, uev->kernel);
+ pthread_cleanup_push(cleanup_uev, uev);
+ if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data))
+ condlog(0, "uevent trigger error");
+ pthread_cleanup_pop(1);
}
static void uevent_cleanup(void *arg)
@@ -434,33 +483,43 @@ static void cleanup_global_uevq(void *arg __attribute__((unused)))
int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
void * trigger_data)
{
+ LIST_HEAD(uevq_work);
+
my_uev_trigger = uev_trigger;
my_trigger_data = trigger_data;
mlockall(MCL_CURRENT | MCL_FUTURE);
+ pthread_cleanup_push(cleanup_uevq, &uevq_work);
while (1) {
- LIST_HEAD(uevq_tmp);
+ struct list_head *stop;
pthread_cleanup_push(cleanup_mutex, uevq_lockp);
pthread_mutex_lock(uevq_lockp);
- servicing_uev = 0;
- while (list_empty(&uevq))
+ servicing_uev = !list_empty(&uevq_work);
+
+ while (list_empty(&uevq_work) && list_empty(&uevq))
pthread_cond_wait(uev_condp, uevq_lockp);
servicing_uev = 1;
- list_splice_init(&uevq, &uevq_tmp);
+ /*
+ * "stop" is the list element towards which merge_uevq()
+ * will iterate: the last element of uevq_work before
+ * appending new uevents. If uveq_is empty, uevq_work.prev
+ * equals &uevq_work, which is what we need.
+ */
+ stop = uevq_work.prev;
+ list_splice_tail_init(&uevq, &uevq_work);
pthread_cleanup_pop(1);
if (!my_uev_trigger)
break;
- pthread_cleanup_push(cleanup_uevq, &uevq_tmp);
- merge_uevq(&uevq_tmp);
- service_uevq(&uevq_tmp);
- pthread_cleanup_pop(1);
+ merge_uevq(&uevq_work, stop);
+ service_uevq(&uevq_work);
}
+ pthread_cleanup_pop(1);
condlog(3, "Terminating uev service queue");
return 0;
}

View File

@ -0,0 +1,204 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Tue, 29 Mar 2022 23:25:48 +0200
Subject: [PATCH] libmultipath: uevent_listen(): don't delay uevents
When multipathd starts up early, basically all devices are added
through uevent processing. This takes much more time than necessary
because of the artificial delays introduced for passing uevents
between the listener and the receiver thread in ee8888f
("multipath-tools: improve processing efficiency for addition and deletion of
multipath devices"). This delay could be up to 30s.
It's generally not a good idea to delay uevent processing in multipathd.
ADD events must normally be handled ASAP in order to avoid maps entering
queueing mode or eventually failing. Handling REMOVE events quickly is
also important to make multipathd aware of deleted devices and keep
kernel and multipathd state in sync.
If uevents arrive quickly, the assumption is that the dispatcher will process
them more slowly than the listener. This was the idea of commit ee8888f,
AFAIU: if a queue of unprocessed events piles up because the dispatcher is
too slow, use filtering and merging to reduce the length of the queue, and
thus the work to be done for the uevent dispatcher, especially the work
that needs to be done while holding the vecs lock. In ee8888f, the
queue was created by allowing uevents to accumulate in the listener.
This patch changes the logic of ee8888f, while keeping the uevent
filtering and discarding features. The idea is that the uevent dispatcher
shouldn't be idle if there are uevents to process. Therefore uevents
are passed to it immediately. But it now checks for new uevents after
processing every individual event, before processing the entire queue,
and it applies filtering and merging to the queue as it grows.
This patch set avoids any delay when the uevent dispatcher is idle or
able to keep up with the rate of incoming uevents, while applying an
increasing amount of filtering and merging as pressure on the uevent
dispatcher increases. It's reasonable to assume that filtering and
merging get more efficient with increasing queue length, because the
probability of finding matching events will increase.
Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
libmultipath/uevent.c | 108 +++++++++++++++---------------------------
1 file changed, 37 insertions(+), 71 deletions(-)
diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c
index 2198e254..c3984fef 100644
--- a/libmultipath/uevent.c
+++ b/libmultipath/uevent.c
@@ -54,10 +54,6 @@
#include "blacklist.h"
#include "devmapper.h"
-#define MAX_ACCUMULATION_COUNT 2048
-#define MAX_ACCUMULATION_TIME 30*1000
-#define MIN_BURST_SPEED 10
-
typedef int (uev_trigger)(struct uevent *, void * trigger_data);
static LIST_HEAD(uevq);
@@ -586,44 +582,43 @@ static struct uevent *uevent_from_udev_device(struct udev_device *dev)
return uev;
}
-static bool uevent_burst(struct timeval *start_time, int events)
+#define MAX_UEVENTS 1000
+static int uevent_receive_events(int fd, struct list_head *tmpq,
+ struct udev_monitor *monitor)
{
- struct timeval diff_time, end_time;
- unsigned long speed;
- unsigned long eclipse_ms;
-
- if(events > MAX_ACCUMULATION_COUNT) {
- condlog(2, "burst got %u uevents, too much uevents, stopped", events);
- return false;
- }
+ struct pollfd ev_poll;
+ int n = 0;
- gettimeofday(&end_time, NULL);
- timersub(&end_time, start_time, &diff_time);
+ do {
+ struct uevent *uev;
+ struct udev_device *dev;
- eclipse_ms = diff_time.tv_sec * 1000 + diff_time.tv_usec / 1000;
+ dev = udev_monitor_receive_device(monitor);
+ if (!dev) {
+ condlog(0, "failed getting udev device");
+ break;
+ }
+ uev = uevent_from_udev_device(dev);
+ if (!uev)
+ break;
- if (eclipse_ms == 0)
- return true;
+ list_add_tail(&uev->node, tmpq);
+ n++;
+ condlog(4, "received uevent \"%s %s\"", uev->action, uev->kernel);
- if (eclipse_ms > MAX_ACCUMULATION_TIME) {
- condlog(2, "burst continued %lu ms, too long time, stopped", eclipse_ms);
- return false;
- }
+ ev_poll.fd = fd;
+ ev_poll.events = POLLIN;
- speed = (events * 1000) / eclipse_ms;
- if (speed > MIN_BURST_SPEED)
- return true;
+ } while (n < MAX_UEVENTS && poll(&ev_poll, 1, 0) > 0);
- return false;
+ return n;
}
int uevent_listen(struct udev *udev)
{
int err = 2;
struct udev_monitor *monitor = NULL;
- int fd, socket_flags, events;
- struct timeval start_time;
- int timeout = 30;
+ int fd, socket_flags;
LIST_HEAD(uevlisten_tmp);
/*
@@ -675,59 +670,30 @@ int uevent_listen(struct udev *udev)
goto out;
}
- events = 0;
- gettimeofday(&start_time, NULL);
pthread_cleanup_push(cleanup_global_uevq, NULL);
pthread_cleanup_push(cleanup_uevq, &uevlisten_tmp);
while (1) {
- struct uevent *uev;
- struct udev_device *dev;
- struct pollfd ev_poll;
- int poll_timeout;
- int fdcount;
+ int fdcount, events;
+ struct pollfd ev_poll = { .fd = fd, .events = POLLIN, };
- memset(&ev_poll, 0, sizeof(struct pollfd));
- ev_poll.fd = fd;
- ev_poll.events = POLLIN;
- poll_timeout = timeout * 1000;
- errno = 0;
- fdcount = poll(&ev_poll, 1, poll_timeout);
- if (fdcount > 0 && ev_poll.revents & POLLIN) {
- timeout = uevent_burst(&start_time, events + 1) ? 1 : 0;
- dev = udev_monitor_receive_device(monitor);
- if (!dev) {
- condlog(0, "failed getting udev device");
- continue;
- }
- uev = uevent_from_udev_device(dev);
- if (!uev)
- continue;
- list_add_tail(&uev->node, &uevlisten_tmp);
- events++;
- continue;
- }
+ fdcount = poll(&ev_poll, 1, -1);
if (fdcount < 0) {
if (errno == EINTR)
continue;
- condlog(0, "error receiving "
- "uevent message: %m");
+ condlog(0, "error receiving uevent message: %m");
err = -errno;
break;
}
- if (!list_empty(&uevlisten_tmp)) {
- /*
- * Queue uevents and poke service pthread.
- */
- condlog(3, "Forwarding %d uevents", events);
- pthread_mutex_lock(uevq_lockp);
- list_splice_tail_init(&uevlisten_tmp, &uevq);
- pthread_cond_signal(uev_condp);
- pthread_mutex_unlock(uevq_lockp);
- events = 0;
- }
- gettimeofday(&start_time, NULL);
- timeout = 30;
+ events = uevent_receive_events(fd, &uevlisten_tmp, monitor);
+ if (events <= 0)
+ continue;
+
+ condlog(4, "Forwarding %d uevents", events);
+ pthread_mutex_lock(uevq_lockp);
+ list_splice_tail_init(&uevlisten_tmp, &uevq);
+ pthread_cond_signal(uev_condp);
+ pthread_mutex_unlock(uevq_lockp);
}
pthread_cleanup_pop(1);
pthread_cleanup_pop(1);

View File

@ -0,0 +1,356 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Wed, 30 Mar 2022 00:06:15 +0200
Subject: [PATCH] libmultipath: uevent: use struct to pass parameters around
libmultipath: uevent_dispatch(): just grab config once
Introduce struct uevent_filter_state to pass parameters around.
This simplifies the function signatures and allows for easy extension
later.
Instead of grabbing multipath config repeatedly, do it just
once per dispatcher iteration, and pass the pointer around in
struct uevent_filter_state. We shouldn't use different configs
for different paths in a single iteration, anyway.
Also, properly constify get_uid_attribute_by_attrs() and
pp->uid_attribute.
Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
libmultipath/config.c | 6 +--
libmultipath/config.h | 4 +-
libmultipath/discovery.c | 2 +-
libmultipath/structs.h | 2 +-
libmultipath/uevent.c | 110 +++++++++++++++++----------------------
libmultipath/uevent.h | 3 +-
tests/uevent.c | 2 +-
7 files changed, 58 insertions(+), 71 deletions(-)
diff --git a/libmultipath/config.c b/libmultipath/config.c
index f31200a3..bd8296bf 100644
--- a/libmultipath/config.c
+++ b/libmultipath/config.c
@@ -1112,10 +1112,10 @@ out:
return 1;
}
-char *get_uid_attribute_by_attrs(struct config *conf,
- const char *path_dev)
+const char *get_uid_attribute_by_attrs(const struct config *conf,
+ const char *path_dev)
{
- vector uid_attrs = &conf->uid_attrs;
+ const struct _vector *uid_attrs = &conf->uid_attrs;
int j;
char *att, *col;
diff --git a/libmultipath/config.h b/libmultipath/config.h
index 5807ac68..d3abbaea 100644
--- a/libmultipath/config.h
+++ b/libmultipath/config.h
@@ -329,7 +329,7 @@ void libmp_put_multipath_config(void *);
void put_multipath_config(void *);
int parse_uid_attrs(char *uid_attrs, struct config *conf);
-char *get_uid_attribute_by_attrs(struct config *conf,
- const char *path_dev);
+const char *get_uid_attribute_by_attrs(const struct config *conf,
+ const char *path_dev);
#endif
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
index 22d114b3..186423e0 100644
--- a/libmultipath/discovery.c
+++ b/libmultipath/discovery.c
@@ -2071,7 +2071,7 @@ fix_broken_nvme_wwid(struct path *pp, const char *value, size_t size)
}
static int
-get_udev_uid(struct path * pp, char *uid_attribute, struct udev_device *udev)
+get_udev_uid(struct path * pp, const char *uid_attribute, struct udev_device *udev)
{
ssize_t len;
const char *value;
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 2f69e831..423c8b78 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -350,7 +350,7 @@ struct path {
int detect_prio;
int detect_checker;
int tpgs;
- char * uid_attribute;
+ const char *uid_attribute;
char * getuid;
struct prio prio;
struct checker checker;
diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c
index c3984fef..4ef7181c 100644
--- a/libmultipath/uevent.c
+++ b/libmultipath/uevent.c
@@ -65,6 +65,12 @@ static uev_trigger *my_uev_trigger;
static void *my_trigger_data;
static int servicing_uev;
+struct uevent_filter_state {
+ struct list_head uevq;
+ struct list_head *old_tail;
+ struct config *conf;
+};
+
int is_uevent_busy(void)
{
int empty;
@@ -160,40 +166,24 @@ int uevent_get_env_positive_int(const struct uevent *uev,
}
void
-uevent_get_wwid(struct uevent *uev)
+uevent_get_wwid(struct uevent *uev, const struct config *conf)
{
- char *uid_attribute;
+ const char *uid_attribute;
const char *val;
- struct config * conf;
- conf = get_multipath_config();
- pthread_cleanup_push(put_multipath_config, conf);
uid_attribute = get_uid_attribute_by_attrs(conf, uev->kernel);
- pthread_cleanup_pop(1);
-
val = uevent_get_env_var(uev, uid_attribute);
if (val)
uev->wwid = val;
}
-static bool uevent_need_merge(void)
+static bool uevent_need_merge(const struct config *conf)
{
- struct config * conf;
- bool need_merge = false;
-
- conf = get_multipath_config();
- if (VECTOR_SIZE(&conf->uid_attrs) > 0)
- need_merge = true;
- put_multipath_config(conf);
-
- return need_merge;
+ return VECTOR_SIZE(&conf->uid_attrs) > 0;
}
-static bool uevent_can_discard(struct uevent *uev)
+static bool uevent_can_discard(struct uevent *uev, const struct config *conf)
{
- int invalid = 0;
- struct config * conf;
-
/*
* do not filter dm devices by devnode
*/
@@ -202,15 +192,10 @@ static bool uevent_can_discard(struct uevent *uev)
/*
* filter paths devices by devnode
*/
- conf = get_multipath_config();
- pthread_cleanup_push(put_multipath_config, conf);
if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
uev->kernel) > 0)
- invalid = 1;
- pthread_cleanup_pop(1);
-
- if (invalid)
return true;
+
return false;
}
@@ -354,29 +339,28 @@ static void uevent_delete_simple(struct uevent *to_delete)
free(to_delete);
}
-static void
-uevent_prepare(struct list_head *tmpq, const struct list_head *stop)
+static void uevent_prepare(struct uevent_filter_state *st)
{
struct uevent *uev, *tmp;
- list_for_some_entry_reverse_safe(uev, tmp, tmpq, stop, node) {
- if (uevent_can_discard(uev)) {
+ list_for_some_entry_reverse_safe(uev, tmp, &st->uevq, st->old_tail, node) {
+ if (uevent_can_discard(uev, st->conf)) {
uevent_delete_simple(uev);
continue;
}
if (strncmp(uev->kernel, "dm-", 3) &&
- uevent_need_merge())
- uevent_get_wwid(uev);
+ uevent_need_merge(st->conf))
+ uevent_get_wwid(uev, st->conf);
}
}
static void
-uevent_filter(struct uevent *later, struct list_head *tmpq, struct list_head **stop)
+uevent_filter(struct uevent *later, struct uevent_filter_state *st)
{
struct uevent *earlier, *tmp;
- list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
+ list_for_some_entry_reverse_safe(earlier, tmp, &later->node, &st->uevq, node) {
/*
* filter unnessary earlier uevents
* by the later uevent
@@ -386,17 +370,16 @@ uevent_filter(struct uevent *later, struct list_head *tmpq, struct list_head **s
earlier->kernel, earlier->action,
later->kernel, later->action);
- uevent_delete_from_list(earlier, &tmp, stop);
+ uevent_delete_from_list(earlier, &tmp, &st->old_tail);
}
}
}
-static void
-uevent_merge(struct uevent *later, struct list_head *tmpq, struct list_head **stop)
+static void uevent_merge(struct uevent *later, struct uevent_filter_state *st)
{
struct uevent *earlier, *tmp;
- list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) {
+ list_for_some_entry_reverse_safe(earlier, tmp, &later->node, &st->uevq, node) {
if (merge_need_stop(earlier, later))
break;
/*
@@ -408,8 +391,8 @@ uevent_merge(struct uevent *later, struct list_head *tmpq, struct list_head **st
later->action, later->kernel, later->wwid);
/* See comment in uevent_delete_from_list() */
- if (&earlier->node == *stop)
- *stop = earlier->node.prev;
+ if (&earlier->node == st->old_tail)
+ st->old_tail = earlier->node.prev;
list_move(&earlier->node, &later->merge_node);
list_splice_init(&earlier->merge_node,
@@ -418,16 +401,15 @@ uevent_merge(struct uevent *later, struct list_head *tmpq, struct list_head **st
}
}
-static void
-merge_uevq(struct list_head *tmpq, struct list_head *stop)
+static void merge_uevq(struct uevent_filter_state *st)
{
struct uevent *later;
- uevent_prepare(tmpq, stop);
- list_for_some_entry_reverse(later, tmpq, stop, node) {
- uevent_filter(later, tmpq, &stop);
- if(uevent_need_merge())
- uevent_merge(later, tmpq, &stop);
+ uevent_prepare(st);
+ list_for_some_entry_reverse(later, &st->uevq, st->old_tail, node) {
+ uevent_filter(later, st);
+ if(uevent_need_merge(st->conf))
+ uevent_merge(later, st);
}
}
@@ -479,41 +461,45 @@ static void cleanup_global_uevq(void *arg __attribute__((unused)))
int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data),
void * trigger_data)
{
- LIST_HEAD(uevq_work);
+ struct uevent_filter_state filter_state;
+ INIT_LIST_HEAD(&filter_state.uevq);
my_uev_trigger = uev_trigger;
my_trigger_data = trigger_data;
mlockall(MCL_CURRENT | MCL_FUTURE);
- pthread_cleanup_push(cleanup_uevq, &uevq_work);
+ pthread_cleanup_push(cleanup_uevq, &filter_state.uevq);
while (1) {
- struct list_head *stop;
-
pthread_cleanup_push(cleanup_mutex, uevq_lockp);
pthread_mutex_lock(uevq_lockp);
- servicing_uev = !list_empty(&uevq_work);
+ servicing_uev = !list_empty(&filter_state.uevq);
- while (list_empty(&uevq_work) && list_empty(&uevq))
+ while (list_empty(&filter_state.uevq) && list_empty(&uevq))
pthread_cond_wait(uev_condp, uevq_lockp);
servicing_uev = 1;
/*
- * "stop" is the list element towards which merge_uevq()
- * will iterate: the last element of uevq_work before
- * appending new uevents. If uveq_is empty, uevq_work.prev
- * equals &uevq_work, which is what we need.
+ * "old_tail" is the list element towards which merge_uevq()
+ * will iterate: the last element of uevq before
+ * appending new uevents. If uveq empty, uevq.prev
+ * equals &uevq, which is what we need.
*/
- stop = uevq_work.prev;
- list_splice_tail_init(&uevq, &uevq_work);
+ filter_state.old_tail = filter_state.uevq.prev;
+ list_splice_tail_init(&uevq, &filter_state.uevq);
pthread_cleanup_pop(1);
if (!my_uev_trigger)
break;
- merge_uevq(&uevq_work, stop);
- service_uevq(&uevq_work);
+
+ pthread_cleanup_push(put_multipath_config, filter_state.conf);
+ filter_state.conf = get_multipath_config();
+ merge_uevq(&filter_state);
+ pthread_cleanup_pop(1);
+
+ service_uevq(&filter_state.uevq);
}
pthread_cleanup_pop(1);
condlog(3, "Terminating uev service queue");
diff --git a/libmultipath/uevent.h b/libmultipath/uevent.h
index 61ca1b56..53a7ca29 100644
--- a/libmultipath/uevent.h
+++ b/libmultipath/uevent.h
@@ -10,6 +10,7 @@
#define OBJECT_SIZE 512
struct udev;
+struct config;
struct uevent {
struct list_head node;
@@ -31,7 +32,7 @@ int uevent_listen(struct udev *udev);
int uevent_dispatch(int (*store_uev)(struct uevent *, void * trigger_data),
void * trigger_data);
bool uevent_is_mpath(const struct uevent *uev);
-void uevent_get_wwid(struct uevent *uev);
+void uevent_get_wwid(struct uevent *uev, const struct config *conf);
int uevent_get_env_positive_int(const struct uevent *uev,
const char *attr);
diff --git a/tests/uevent.c b/tests/uevent.c
index 648ff268..e237a208 100644
--- a/tests/uevent.c
+++ b/tests/uevent.c
@@ -111,7 +111,7 @@ static void test_uid_attrs(void **state)
static void test_wwid(void **state)
{
struct uevent *uev = *state;
- uevent_get_wwid(uev);
+ uevent_get_wwid(uev, &conf);
assert_string_equal(uev->wwid, WWID);
}

View File

@ -0,0 +1,37 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Fri, 5 Jan 2024 18:51:02 +0100
Subject: [PATCH] libmultipath: is_uevent_busy(): check servicing_uev under
lock
This fixes a coverity-reported defect (413384 Data race condition).
Indeed, we always set servicing_uev with the lock held, so it makes
sense to read it with the lock held, too.
Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
libmultipath/uevent.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c
index 4ef7181c..8cd928a9 100644
--- a/libmultipath/uevent.c
+++ b/libmultipath/uevent.c
@@ -73,12 +73,13 @@ struct uevent_filter_state {
int is_uevent_busy(void)
{
- int empty;
+ int empty, servicing;
pthread_mutex_lock(uevq_lockp);
empty = list_empty(&uevq);
+ servicing = servicing_uev;
pthread_mutex_unlock(uevq_lockp);
- return (!empty || servicing_uev);
+ return (!empty || servicing);
}
struct uevent * alloc_uevent (void)

View File

@ -0,0 +1,63 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Wed, 21 Jan 2026 16:03:12 -0500
Subject: [PATCH] multipathd: make "multipathd show status" busy checker better
while uevent_listen() was grabbing new uevents, "multipathd show status"
would still show show busy as "False". Add a check there, to make catch
multipathd's uevent processing earlier. Also, access servicing_uev (as
well as the new variable, adding_uev) atomically, just to make sure that
the compiler doesn't do stupid things trying to optimize them.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
---
libmultipath/uevent.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c
index 8cd928a9..c230e963 100644
--- a/libmultipath/uevent.c
+++ b/libmultipath/uevent.c
@@ -64,6 +64,7 @@ static pthread_cond_t *uev_condp = &uev_cond;
static uev_trigger *my_uev_trigger;
static void *my_trigger_data;
static int servicing_uev;
+static int adding_uev; /* uatomic access only */
struct uevent_filter_state {
struct list_head uevq;
@@ -73,13 +74,14 @@ struct uevent_filter_state {
int is_uevent_busy(void)
{
- int empty, servicing;
+ int empty, servicing, adding;
pthread_mutex_lock(uevq_lockp);
empty = list_empty(&uevq);
servicing = servicing_uev;
+ adding = uatomic_read(&adding_uev);
pthread_mutex_unlock(uevq_lockp);
- return (!empty || servicing);
+ return (!empty || servicing || adding);
}
struct uevent * alloc_uevent (void)
@@ -663,6 +665,7 @@ int uevent_listen(struct udev *udev)
int fdcount, events;
struct pollfd ev_poll = { .fd = fd, .events = POLLIN, };
+ uatomic_set(&adding_uev, 0);
fdcount = poll(&ev_poll, 1, -1);
if (fdcount < 0) {
if (errno == EINTR)
@@ -672,6 +675,8 @@ int uevent_listen(struct udev *udev)
err = -errno;
break;
}
+ uatomic_set(&adding_uev, 1);
+
events = uevent_receive_events(fd, &uevlisten_tmp, monitor);
if (events <= 0)
continue;

View File

@ -0,0 +1,279 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Brian Bunker <brian@purestorage.com>
Date: Fri, 9 Jan 2026 16:50:43 -0800
Subject: [PATCH] libmultipath: add purge_disconnected configuration option
Add a new configuration option 'purge_disconnected' that can be set
per multipath device, hardware entry, or globally. This option will
be used to control whether multipathd should automatically remove
paths that are in a disconnected state.
The option is disabled by default (PURGE_DISCONNECTED_OFF).
This patch only adds the configuration infrastructure. The actual
purge functionality will be implemented in a subsequent patch.
Signed-off-by: Brian Bunker <brian@purestorage.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
libmultipath/config.c | 2 ++
libmultipath/config.h | 3 +++
libmultipath/configure.c | 1 +
libmultipath/defaults.h | 1 +
libmultipath/dict.c | 14 ++++++++++++++
libmultipath/propsel.c | 16 ++++++++++++++++
libmultipath/propsel.h | 1 +
libmultipath/structs.h | 12 ++++++++++++
multipath/multipath.conf.5 | 22 ++++++++++++++++++++++
9 files changed, 72 insertions(+)
diff --git a/libmultipath/config.c b/libmultipath/config.c
index bd8296bf..2aa69cb4 100644
--- a/libmultipath/config.c
+++ b/libmultipath/config.c
@@ -476,6 +476,7 @@ merge_hwe (struct hwentry * dst, struct hwentry * src)
merge_num(marginal_path_err_rate_threshold);
merge_num(marginal_path_err_recheck_gap_time);
merge_num(marginal_path_double_failed_time);
+ merge_num(purge_disconnected);
snprintf(id, sizeof(id), "%s/%s", dst->vendor, dst->product);
reconcile_features_with_options(id, &dst->features,
@@ -524,6 +525,7 @@ merge_mpe(struct mpentry *dst, struct mpentry *src)
merge_num(skip_kpartx);
merge_num(max_sectors_kb);
merge_num(ghost_delay);
+ merge_num(purge_disconnected);
merge_num(uid);
merge_num(gid);
merge_num(mode);
diff --git a/libmultipath/config.h b/libmultipath/config.h
index d3abbaea..e0b1e4c8 100644
--- a/libmultipath/config.h
+++ b/libmultipath/config.h
@@ -87,6 +87,7 @@ struct hwentry {
int marginal_path_err_rate_threshold;
int marginal_path_err_recheck_gap_time;
int marginal_path_double_failed_time;
+ int purge_disconnected;
int skip_kpartx;
int max_sectors_kb;
int ghost_delay;
@@ -130,6 +131,7 @@ struct mpentry {
int marginal_path_err_rate_threshold;
int marginal_path_err_recheck_gap_time;
int marginal_path_double_failed_time;
+ int purge_disconnected;
int skip_kpartx;
int max_sectors_kb;
int ghost_delay;
@@ -186,6 +188,7 @@ struct config {
int marginal_path_err_rate_threshold;
int marginal_path_err_recheck_gap_time;
int marginal_path_double_failed_time;
+ int purge_disconnected;
int uxsock_timeout;
int strict_timing;
int retrigger_tries;
diff --git a/libmultipath/configure.c b/libmultipath/configure.c
index 71acb968..c1b6a077 100644
--- a/libmultipath/configure.c
+++ b/libmultipath/configure.c
@@ -389,6 +389,7 @@ int setup_map(struct multipath *mpp, char **params, struct vectors *vecs)
select_max_sectors_kb(conf, mpp);
select_ghost_delay(conf, mpp);
select_flush_on_last_del(conf, mpp);
+ select_purge_disconnected(conf, mpp);
sysfs_set_scsi_tmo(conf, mpp);
marginal_pathgroups = conf->marginal_pathgroups;
diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h
index 1ab5b99c..caf4ccce 100644
--- a/libmultipath/defaults.h
+++ b/libmultipath/defaults.h
@@ -55,6 +55,7 @@
#define DEFAULT_ALL_TG_PT ALL_TG_PT_OFF
#define DEFAULT_RECHECK_WWID RECHECK_WWID_OFF
#define DEFAULT_AUTO_RESIZE AUTO_RESIZE_NEVER
+#define DEFAULT_PURGE_DISCONNECTED PURGE_DISCONNECTED_OFF
/* Enable no foreign libraries by default */
#define DEFAULT_ENABLE_FOREIGN "NONE"
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
index 3c011ece..421070df 100644
--- a/libmultipath/dict.c
+++ b/libmultipath/dict.c
@@ -979,6 +979,16 @@ static int snprint_def_disable_changed_wwids(struct config *conf,
return print_ignored(buff);
}
+declare_def_handler(purge_disconnected, set_yes_no_undef)
+declare_def_snprint_defint(purge_disconnected, print_yes_no_undef,
+ DEFAULT_PURGE_DISCONNECTED)
+declare_ovr_handler(purge_disconnected, set_yes_no_undef)
+declare_ovr_snprint(purge_disconnected, print_yes_no_undef)
+declare_hw_handler(purge_disconnected, set_yes_no_undef)
+declare_hw_snprint(purge_disconnected, print_yes_no_undef)
+declare_mp_handler(purge_disconnected, set_yes_no_undef)
+declare_mp_snprint(purge_disconnected, print_yes_no_undef)
+
declare_def_range_handler(remove_retries, 0, INT_MAX)
declare_def_snprint(remove_retries, print_int)
@@ -2289,6 +2299,7 @@ init_keywords(vector keywords)
install_keyword("retrigger_delay", &def_retrigger_delay_handler, &snprint_def_retrigger_delay);
install_keyword("missing_uev_wait_timeout", &def_uev_wait_timeout_handler, &snprint_def_uev_wait_timeout);
install_keyword("skip_kpartx", &def_skip_kpartx_handler, &snprint_def_skip_kpartx);
+ install_keyword("purge_disconnected", &def_purge_disconnected_handler, &snprint_def_purge_disconnected);
install_keyword("disable_changed_wwids", &def_disable_changed_wwids_handler, &snprint_def_disable_changed_wwids);
install_keyword("remove_retries", &def_remove_retries_handler, &snprint_def_remove_retries);
install_keyword("max_sectors_kb", &def_max_sectors_kb_handler, &snprint_def_max_sectors_kb);
@@ -2386,6 +2397,7 @@ init_keywords(vector keywords)
install_keyword("marginal_path_err_recheck_gap_time", &hw_marginal_path_err_recheck_gap_time_handler, &snprint_hw_marginal_path_err_recheck_gap_time);
install_keyword("marginal_path_double_failed_time", &hw_marginal_path_double_failed_time_handler, &snprint_hw_marginal_path_double_failed_time);
install_keyword("skip_kpartx", &hw_skip_kpartx_handler, &snprint_hw_skip_kpartx);
+ install_keyword("purge_disconnected", &hw_purge_disconnected_handler, &snprint_hw_purge_disconnected);
install_keyword("max_sectors_kb", &hw_max_sectors_kb_handler, &snprint_hw_max_sectors_kb);
install_keyword("ghost_delay", &hw_ghost_delay_handler, &snprint_hw_ghost_delay);
install_keyword("all_tg_pt", &hw_all_tg_pt_handler, &snprint_hw_all_tg_pt);
@@ -2429,6 +2441,7 @@ init_keywords(vector keywords)
install_keyword("marginal_path_double_failed_time", &ovr_marginal_path_double_failed_time_handler, &snprint_ovr_marginal_path_double_failed_time);
install_keyword("skip_kpartx", &ovr_skip_kpartx_handler, &snprint_ovr_skip_kpartx);
+ install_keyword("purge_disconnected", &ovr_purge_disconnected_handler, &snprint_ovr_purge_disconnected);
install_keyword("max_sectors_kb", &ovr_max_sectors_kb_handler, &snprint_ovr_max_sectors_kb);
install_keyword("ghost_delay", &ovr_ghost_delay_handler, &snprint_ovr_ghost_delay);
install_keyword("all_tg_pt", &ovr_all_tg_pt_handler, &snprint_ovr_all_tg_pt);
@@ -2474,6 +2487,7 @@ init_keywords(vector keywords)
install_keyword("marginal_path_err_recheck_gap_time", &mp_marginal_path_err_recheck_gap_time_handler, &snprint_mp_marginal_path_err_recheck_gap_time);
install_keyword("marginal_path_double_failed_time", &mp_marginal_path_double_failed_time_handler, &snprint_mp_marginal_path_double_failed_time);
install_keyword("skip_kpartx", &mp_skip_kpartx_handler, &snprint_mp_skip_kpartx);
+ install_keyword("purge_disconnected", &mp_purge_disconnected_handler, &snprint_mp_purge_disconnected);
install_keyword("max_sectors_kb", &mp_max_sectors_kb_handler, &snprint_mp_max_sectors_kb);
install_keyword("ghost_delay", &mp_ghost_delay_handler, &snprint_mp_ghost_delay);
install_sublevel_end();
diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
index 0b6e22c1..c45488f7 100644
--- a/libmultipath/propsel.c
+++ b/libmultipath/propsel.c
@@ -1306,6 +1306,22 @@ out:
return 0;
}
+int select_purge_disconnected(struct config *conf, struct multipath *mp)
+{
+ const char *origin;
+
+ mp_set_mpe(purge_disconnected);
+ mp_set_ovr(purge_disconnected);
+ mp_set_hwe(purge_disconnected);
+ mp_set_conf(purge_disconnected);
+ mp_set_default(purge_disconnected, DEFAULT_PURGE_DISCONNECTED);
+out:
+ condlog(3, "%s: purge_disconnected = %s %s", mp->alias,
+ (mp->purge_disconnected == PURGE_DISCONNECTED_ON) ? "yes" : "no",
+ origin);
+ return 0;
+}
+
int select_max_sectors_kb(struct config *conf, struct multipath * mp)
{
const char *origin;
diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h
index 152ca44c..325c1ee3 100644
--- a/libmultipath/propsel.h
+++ b/libmultipath/propsel.h
@@ -35,6 +35,7 @@ int select_marginal_path_err_rate_threshold(struct config *conf, struct multipat
int select_marginal_path_err_recheck_gap_time(struct config *conf, struct multipath *mp);
int select_marginal_path_double_failed_time(struct config *conf, struct multipath *mp);
int select_ghost_delay(struct config *conf, struct multipath * mp);
+int select_purge_disconnected(struct config *conf, struct multipath *mp);
void reconcile_features_with_options(const char *id, char **features,
int* no_path_retry,
int *retain_hwhandler);
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 423c8b78..e1969b95 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -174,6 +174,17 @@ enum auto_resize_state {
AUTO_RESIZE_GROW_SHRINK,
};
+/*
+ * purge_disconnected configuration option (per multipath device)
+ * Controls whether paths that become disconnected at the storage target
+ * should be automatically removed from the system via sysfs.
+ */
+enum purge_disconnected_states {
+ PURGE_DISCONNECTED_UNDEF = YNU_UNDEF,
+ PURGE_DISCONNECTED_OFF = YNU_NO, /* Don't purge */
+ PURGE_DISCONNECTED_ON = YNU_YES, /* Purge disconnected paths */
+};
+
#define PROTOCOL_UNSET -1
enum scsi_protocol {
@@ -427,6 +438,7 @@ struct multipath {
int ghost_delay;
int ghost_delay_tick;
int queue_mode;
+ int purge_disconnected;
uid_t uid;
gid_t gid;
mode_t mode;
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
index 8684bd63..529bd98e 100644
--- a/multipath/multipath.conf.5
+++ b/multipath/multipath.conf.5
@@ -1265,6 +1265,22 @@ The default is: \fBno\fR
.
.
.TP
+.B purge_disconnected
+If set to
+.I yes
+, multipathd will automatically remove devices that are in a disconnected state.
+A path is considered disconnected when the TUR (Test Unit Ready) path checker
+receives the SCSI sense code "LOGICAL UNIT NOT SUPPORTED" (sense key 0x5,
+ASC/ASCQ 0x25/0x00). This typically indicates that the LUN has been unmapped
+or is no longer presented by the storage array. This option helps clean up
+stale device entries that would otherwise remain in the system.
+.RS
+.TP
+The default is: \fBno\fR
+.RE
+.
+.
+.TP
.B disable_changed_wwids
This option is deprecated and ignored. If the WWID of a path suddenly changes,
multipathd handles it as if it was removed and then added again.
@@ -1540,6 +1556,8 @@ section:
.TP
.B skip_kpartx
.TP
+.B purge_disconnected
+.TP
.B max_sectors_kb
.TP
.B ghost_delay
@@ -1715,6 +1733,8 @@ section:
.TP
.B skip_kpartx
.TP
+.B purge_disconnected
+.TP
.B max_sectors_kb
.TP
.B ghost_delay
@@ -1801,6 +1821,8 @@ the values are taken from the \fIdevices\fR or \fIdefaults\fR sections:
.TP
.B skip_kpartx
.TP
+.B purge_disconnected
+.TP
.B max_sectors_kb
.TP
.B ghost_delay

View File

@ -0,0 +1,752 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Brian Bunker <brian@purestorage.com>
Date: Fri, 9 Jan 2026 16:50:43 -0800
Subject: [PATCH] multipathd: implement purge functionality for disconnected
paths
Implement automatic purging of paths that have been disconnected at the
storage target (e.g., LUN unmapped). This builds on the purge_disconnected
configuration option added in the previous patch.
This adds:
- New PATH_DISCONNECTED checker state to signal disconnection
- TUR checker support for detecting LUN NOT SUPPORTED (ASC/ASCQ 0x25/0x00)
- Purge thread (purgeloop) that removes paths via sysfs delete attribute
- State machine to track disconnection and delay purging
- Conversion of PATH_DISCONNECTED to PATH_DOWN for normal processing
The purge thread runs independently and processes paths that have been
marked for purging by the checker thread. Paths are only purged after
remaining disconnected for delay_wait_checks intervals to avoid removing
paths that are temporarily flapping.
Signed-off-by: Brian Bunker <brian@purestorage.com>
Signed-off-by: Krishna Kant <krishna.kant@purestorage.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
libmultipath/checkers.c | 2 +
libmultipath/checkers.h | 15 +-
libmultipath/checkers/tur.c | 10 ++
libmultipath/discovery.c | 17 ++
libmultipath/io_err_stat.c | 1 +
libmultipath/print.c | 2 +
libmultipath/structs.h | 14 ++
multipathd/Makefile | 2 +-
multipathd/main.c | 74 +++++++-
multipathd/purge.c | 326 ++++++++++++++++++++++++++++++++++++
multipathd/purge.h | 41 +++++
11 files changed, 496 insertions(+), 8 deletions(-)
create mode 100644 multipathd/purge.c
create mode 100644 multipathd/purge.h
diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
index 9eb3e261..e1b84df0 100644
--- a/libmultipath/checkers.c
+++ b/libmultipath/checkers.c
@@ -39,6 +39,7 @@ static const char *checker_state_names[PATH_MAX_STATE] = {
[PATH_TIMEOUT] = "timeout",
[PATH_REMOVED] = "removed",
[PATH_DELAYED] = "delayed",
+ [PATH_DISCONNECTED] = "disconnected",
};
static LIST_HEAD(checkers);
@@ -343,6 +344,7 @@ static const char *generic_msg[CHECKER_GENERIC_MSGTABLE_SIZE] = {
[CHECKER_MSGID_DOWN] = " reports path is down",
[CHECKER_MSGID_GHOST] = " reports path is ghost",
[CHECKER_MSGID_UNSUPPORTED] = " doesn't support this device",
+ [CHECKER_MSGID_DISCONNECTED] = " no access to this device",
};
const char *checker_message(const struct checker *c)
diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
index 2fd1d1c6..2f32f612 100644
--- a/libmultipath/checkers.h
+++ b/libmultipath/checkers.h
@@ -66,6 +66,15 @@
* delay_watch_checks checks, when it comes back up again, it will not
* be marked as up until it has been up for delay_wait_checks checks.
* During this time, it is marked as "delayed"
+ *
+ * PATH_DISCONNECTED is a special ephemeral state used to signal that a path
+ * has been disconnected at the storage target (e.g., LUN unmapped). When a
+ * checker returns PATH_DISCONNECTED:
+ * 1. The path's pp->disconnected field is set to track purge state
+ * 2. The state is immediately converted to PATH_DOWN for normal processing
+ * 3. If purge_disconnected is enabled, the path will be removed via sysfs
+ * This state should never be stored in pp->state or pp->chkrstate; it exists
+ * only as a transient return value from checkers to trigger special handling.
*/
enum path_check_state {
PATH_WILD = 0,
@@ -78,6 +87,7 @@ enum path_check_state {
PATH_TIMEOUT,
PATH_REMOVED,
PATH_DELAYED,
+ PATH_DISCONNECTED, /* Ephemeral: mapped to PATH_DOWN */
PATH_MAX_STATE
};
@@ -113,9 +123,10 @@ enum {
CHECKER_MSGID_DOWN,
CHECKER_MSGID_GHOST,
CHECKER_MSGID_UNSUPPORTED,
+ CHECKER_MSGID_DISCONNECTED,
CHECKER_GENERIC_MSGTABLE_SIZE,
- CHECKER_FIRST_MSGID = 100, /* lowest msgid for checkers */
- CHECKER_MSGTABLE_SIZE = 100, /* max msg table size for checkers */
+ CHECKER_FIRST_MSGID = 100, /* lowest msgid for checkers */
+ CHECKER_MSGTABLE_SIZE = 100, /* max msg table size for checkers */
};
struct checker_class;
diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
index d82f7dbc..370a02a6 100644
--- a/libmultipath/checkers/tur.c
+++ b/libmultipath/checkers/tur.c
@@ -188,6 +188,16 @@ retry:
*msgid = CHECKER_MSGID_GHOST;
return PATH_GHOST;
}
+ } else if (key == 0x5) {
+ /* Illegal request */
+ if (asc == 0x25 && ascq == 0x00) {
+ /*
+ * LUN NOT SUPPORTED: unmapped at target.
+ * Signals pp->disconnected, becomes PATH_DOWN.
+ */
+ *msgid = CHECKER_MSGID_DISCONNECTED;
+ return PATH_DISCONNECTED;
+ }
}
*msgid = CHECKER_MSGID_DOWN;
return PATH_DOWN;
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
index 186423e0..c529f336 100644
--- a/libmultipath/discovery.c
+++ b/libmultipath/discovery.c
@@ -2416,8 +2416,25 @@ int pathinfo(struct path *pp, struct config *conf, int mask)
pp->state == PATH_UNCHECKED ||
pp->state == PATH_WILD)
pp->chkrstate = pp->state = newstate;
+ /*
+ * PATH_TIMEOUT and PATH_DISCONNECTED are ephemeral
+ * states that should never be stored in pp->state.
+ * Convert them to PATH_DOWN immediately.
+ */
if (pp->state == PATH_TIMEOUT)
pp->state = PATH_DOWN;
+ if (pp->state == PATH_DISCONNECTED) {
+ int purge_enabled = pp->mpp &&
+ pp->mpp->purge_disconnected ==
+ PURGE_DISCONNECTED_ON;
+ if (purge_enabled &&
+ pp->disconnected == NOT_DISCONNECTED) {
+ condlog(2, "%s: mark path for purge",
+ pp->dev);
+ pp->disconnected = DISCONNECTED_READY_FOR_PURGE;
+ }
+ pp->state = PATH_DOWN;
+ }
if (pp->state == PATH_UP && !pp->size) {
condlog(3, "%s: device size is 0, "
"path unusable", pp->dev);
diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
index d8d91f64..d744d50e 100644
--- a/libmultipath/io_err_stat.c
+++ b/libmultipath/io_err_stat.c
@@ -380,6 +380,7 @@ static void account_async_io_state(struct io_err_stat_path *pp, int rc)
switch (rc) {
case PATH_DOWN:
case PATH_TIMEOUT:
+ case PATH_DISCONNECTED:
pp->io_err_nr++;
break;
case PATH_UNCHECKED:
diff --git a/libmultipath/print.c b/libmultipath/print.c
index ff224bc4..42d1d44c 100644
--- a/libmultipath/print.c
+++ b/libmultipath/print.c
@@ -487,6 +487,8 @@ snprint_chk_state (struct strbuf *buff, const struct path * pp)
return append_strbuf_str(buff, "i/o timeout");
case PATH_DELAYED:
return append_strbuf_str(buff, "delayed");
+ case PATH_DISCONNECTED:
+ return append_strbuf_str(buff, "disconnected");
default:
return append_strbuf_str(buff, "undef");
}
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index e1969b95..32643684 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -185,6 +185,18 @@ enum purge_disconnected_states {
PURGE_DISCONNECTED_ON = YNU_YES, /* Purge disconnected paths */
};
+/*
+ * Path disconnection state (per path)
+ * Tracks whether a path has been marked for purge and whether it's already queued.
+ */
+enum path_disconnected_state {
+ NOT_DISCONNECTED, /* Path is not disconnected */
+ DISCONNECTED_READY_FOR_PURGE, /* Path is disconnected and ready to be
+ queued for purge */
+ DISCONNECTED_QUEUED_FOR_PURGE, /* Path is disconnected and already
+ queued for purge */
+};
+
#define PROTOCOL_UNSET -1
enum scsi_protocol {
@@ -355,6 +367,8 @@ struct path {
int state;
int dmstate;
int chkrstate;
+ enum path_disconnected_state disconnected; /* Marked for purge due to
+ disconnection */
int failcount;
int priority;
int pgindex;
diff --git a/multipathd/Makefile b/multipathd/Makefile
index 00342464..a49c4973 100644
--- a/multipathd/Makefile
+++ b/multipathd/Makefile
@@ -46,7 +46,7 @@ ifeq ($(ENABLE_DMEVENTS_POLL),0)
endif
OBJS = main.o pidfile.o uxlsnr.o uxclnt.o cli.o cli_handlers.o waiter.o \
- dmevents.o init_unwinder.o
+ dmevents.o init_unwinder.o purge.o
ifeq ($(FPIN_SUPPORT),1)
OBJS += fpin_handlers.o
diff --git a/multipathd/main.c b/multipathd/main.c
index 9beb0e06..d91a4d49 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -84,6 +84,7 @@
#include "io_err_stat.h"
#include "wwids.h"
#include "foreign.h"
+#include "purge.h"
#include "../third-party/valgrind/drd.h"
#include "init_unwinder.h"
@@ -135,11 +136,11 @@ static volatile enum daemon_status running_state = DAEMON_INIT;
pid_t daemon_pid;
static pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t config_cond;
-static pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, dmevent_thr,
- fpin_thr, fpin_consumer_thr;
-static bool check_thr_started, uevent_thr_started, uxlsnr_thr_started,
- uevq_thr_started, dmevent_thr_started, fpin_thr_started,
- fpin_consumer_thr_started;
+static pthread_t check_thr, purge_thr, uevent_thr, uxlsnr_thr, uevq_thr,
+ dmevent_thr, fpin_thr, fpin_consumer_thr;
+static bool check_thr_started, purge_thr_started, uevent_thr_started,
+ uxlsnr_thr_started, uevq_thr_started, dmevent_thr_started,
+ fpin_thr_started, fpin_consumer_thr_started;
static int pid_fd = -1;
static inline enum daemon_status get_running_state(void)
@@ -2377,6 +2378,28 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
if (newstate == PATH_REMOVED)
newstate = PATH_DOWN;
+ /*
+ * PATH_DISCONNECTED is an ephemeral state used to signal that a path
+ * has been disconnected at the storage target (LUN unmapped). We use
+ * it to set pp->disconnected for purge tracking, then immediately
+ * convert it to PATH_DOWN for normal path failure handling.
+ *
+ * This ensures PATH_DISCONNECTED never gets stored in pp->state or
+ * pp->chkrstate - it exists only as a transient signal from the
+ * checker to trigger special handling before becoming PATH_DOWN.
+ */
+ if (newstate == PATH_DISCONNECTED) {
+ if (pp->mpp &&
+ pp->mpp->purge_disconnected == PURGE_DISCONNECTED_ON &&
+ pp->disconnected == NOT_DISCONNECTED) {
+ condlog(2, "%s: mark (%s) path for purge", pp->dev,
+ checker_state_name(newstate));
+ pp->disconnected = DISCONNECTED_READY_FOR_PURGE;
+ }
+ /* Always convert to PATH_DOWN for normal processing */
+ newstate = PATH_DOWN;
+ }
+
if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
condlog(2, "%s: unusable path (%s) - checker failed",
pp->dev, checker_state_name(newstate));
@@ -2684,6 +2707,7 @@ checkerloop (void *ap)
struct timespec diff_time, start_time, end_time;
int num_paths = 0, strict_timing, rc = 0;
unsigned int ticks = 0;
+ LIST_HEAD(purge_list);
get_monotonic_time(&start_time);
if (start_time.tv_sec && last_time.tv_sec) {
@@ -2724,6 +2748,12 @@ checkerloop (void *ap)
}
lock_cleanup_pop(vecs->lock);
+ /*
+ * Cleanup handler to free purge_list if thread is cancelled.
+ * This prevents memory leaks during shutdown.
+ */
+ pthread_cleanup_push(cleanup_purge_list, &purge_list);
+
pthread_cleanup_push(cleanup_lock, &vecs->lock);
lock(&vecs->lock);
pthread_testcancel();
@@ -2731,6 +2761,11 @@ checkerloop (void *ap)
retry_count_tick(vecs->mpvec);
missing_uev_wait_tick(vecs);
ghost_delay_tick(vecs);
+ /*
+ * Build purge list for disconnected paths.
+ * The caller will queue it after releasing vecs->lock.
+ */
+ build_purge_list(vecs, &purge_list);
lock_cleanup_pop(vecs->lock);
if (count)
@@ -2745,6 +2780,26 @@ checkerloop (void *ap)
lock_cleanup_pop(vecs->lock);
}
+ /*
+ * Queue purge work for disconnected paths.
+ * This is done after releasing vecs->lock to avoid holding
+ * the lock while signaling the purge thread.
+ */
+ if (!list_empty(&purge_list)) {
+ pthread_cleanup_push(cleanup_mutex, &purge_mutex);
+ pthread_mutex_lock(&purge_mutex);
+ pthread_testcancel();
+ list_splice_tail_init(&purge_list, &purge_queue);
+ pthread_cond_signal(&purge_cond);
+ pthread_cleanup_pop(1);
+ }
+
+ /*
+ * Pop cleanup handler. Execute it (arg=1) to free purge_list
+ * at the end of each iteration.
+ */
+ pthread_cleanup_pop(1);
+
diff_time.tv_nsec = 0;
if (start_time.tv_sec) {
get_monotonic_time(&end_time);
@@ -3225,6 +3280,8 @@ static void cleanup_threads(void)
if (check_thr_started)
pthread_cancel(check_thr);
+ if (purge_thr_started)
+ pthread_cancel(purge_thr);
if (uevent_thr_started)
pthread_cancel(uevent_thr);
if (uxlsnr_thr_started)
@@ -3241,6 +3298,8 @@ static void cleanup_threads(void)
if (check_thr_started)
pthread_join(check_thr, NULL);
+ if (purge_thr_started)
+ pthread_join(purge_thr, NULL);
if (uevent_thr_started)
pthread_join(uevent_thr, NULL);
if (uxlsnr_thr_started)
@@ -3496,6 +3555,11 @@ child (__attribute__((unused)) void *param)
goto failed;
} else
check_thr_started = true;
+ if ((rc = pthread_create(&purge_thr, &misc_attr, purgeloop, vecs))) {
+ condlog(0, "failed to create purge loop thread: %d", rc);
+ goto failed;
+ } else
+ purge_thr_started = true;
if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
condlog(0, "failed to create uevent dispatcher: %d", rc);
goto failed;
diff --git a/multipathd/purge.c b/multipathd/purge.c
new file mode 100644
index 00000000..44f0c905
--- /dev/null
+++ b/multipathd/purge.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Brian Bunker <brian@purestorage.com>
+ * Copyright (C) 2025 Krishna Kant <krishna.kant@purestorage.com>
+ */
+
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <libudev.h>
+#include <urcu.h>
+
+#include "vector.h"
+#include "structs.h"
+#include "structs_vec.h"
+#include "debug.h"
+#include "util.h"
+#include "lock.h"
+#include "sysfs.h"
+#include "list.h"
+#include "purge.h"
+
+pthread_mutex_t purge_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t purge_cond = PTHREAD_COND_INITIALIZER;
+LIST_HEAD(purge_queue);
+
+/*
+ * Information needed to purge a path. We copy this data while holding
+ * vecs->lock, then release the lock before doing the actual sysfs write.
+ * This prevents blocking other operations while waiting for sysfs I/O.
+ *
+ * The udev device reference captures the sysfs path (including H:C:T:L).
+ * The duplicated fd prevents device name/number reuse: the kernel will not
+ * reuse the device's minor number (which maps to the device name) for a new
+ * device while we hold an open file descriptor, even if the original device
+ * has been removed. This protects against deleting a new device that reused
+ * the same name after the original was removed externally.
+ */
+struct purge_path_info {
+ struct list_head node; /* List linkage */
+ struct udev_device *udev; /* Udev device (refcounted) */
+ int fd; /* Dup'd fd prevents device reuse */
+};
+
+/*
+ * Attempt to delete a path by writing to the SCSI device's sysfs delete
+ * attribute. This triggers kernel-level device removal. The actual cleanup
+ * of the path structure from pathvec happens later when a uevent arrives
+ * (handled by uev_remove_path).
+ *
+ * This function does NOT require vecs->lock to be held, as it operates on
+ * copied data. This function may block while writing to sysfs, which is
+ * why it's called without holding any locks.
+ *
+ * Protection against device reuse:
+ * The duplicated fd in purge_path_info prevents the kernel from reusing
+ * the device's minor number (and thus the device name like /dev/sdd) for
+ * a new device, even if the original device has been removed externally.
+ * This ensures we cannot accidentally delete a new device that reused the
+ * same name. The kernel maintains this guarantee as long as we hold the
+ * open file descriptor.
+ */
+static void delete_path_sysfs(struct purge_path_info *info)
+{
+ struct udev_device *ud;
+ const char *devname;
+
+ if (!info->udev)
+ goto out;
+
+ devname = udev_device_get_devnode(info->udev);
+
+ /*
+ * Get the SCSI device parent. This is where we'll write to the
+ * "delete" attribute to trigger device removal.
+ */
+ ud = udev_device_get_parent_with_subsystem_devtype(info->udev, "scsi",
+ "scsi_device");
+ if (!ud) {
+ condlog(3, "%s: failed to purge, no SCSI parent found", devname);
+ goto out;
+ }
+
+ /*
+ * Write "1" to the SCSI device's delete attribute to trigger
+ * kernel-level device removal.
+ */
+ if (sysfs_attr_set_value(ud, "delete", "1", 1) < 0)
+ condlog(3, "%s: failed to purge", devname);
+ else
+ condlog(2, "%s: purged", devname);
+
+out:
+ return;
+}
+
+/*
+ * Prepare purge info for a path while holding vecs->lock.
+ * Takes a reference on the udev device and duplicates the fd.
+ * Returns allocated purge_path_info on success, NULL on failure.
+ *
+ * We require a valid fd because it prevents the kernel from reusing
+ * the device's minor number (and device name) for a new device while
+ * we hold it open. This protects against accidentally deleting a new
+ * device that reused the same name after the original was removed.
+ */
+static struct purge_path_info *prepare_purge_path_info(struct path *pp)
+{
+ struct purge_path_info *info = NULL;
+
+ if (!pp->udev || !pp->mpp)
+ goto out;
+
+ /*
+ * We require a valid fd to prevent device name reuse.
+ * Without it, we cannot safely purge the device.
+ */
+ if (pp->fd < 0) {
+ condlog(3, "%s: no fd available, cannot safely purge", pp->dev);
+ goto out;
+ }
+
+ info = calloc(1, sizeof(*info));
+ if (!info)
+ goto out;
+
+ INIT_LIST_HEAD(&info->node);
+ info->udev = udev_device_ref(pp->udev);
+ if (!info->udev)
+ goto out_free;
+
+ info->fd = dup(pp->fd);
+ if (info->fd < 0) {
+ condlog(3, "%s: failed to dup fd: %s, cannot safely purge",
+ pp->dev, strerror(errno));
+ goto out_unref;
+ }
+
+ return info;
+
+out_unref:
+ udev_device_unref(info->udev);
+out_free:
+ free(info);
+ info = NULL;
+out:
+ return info;
+}
+
+/*
+ * Clean up and free purge info.
+ */
+static void free_purge_path_info(struct purge_path_info *info)
+{
+ if (!info)
+ return;
+
+ if (info->fd >= 0)
+ close(info->fd);
+ if (info->udev)
+ udev_device_unref(info->udev);
+ free(info);
+}
+
+/*
+ * Build a list of purge_path_info for all paths marked for purge.
+ * This should be called while holding vecs->lock. It clears the
+ * disconnected flag and prepares purge info for each path, adding
+ * them to tmpq.
+ */
+void build_purge_list(struct vectors *vecs, struct list_head *tmpq)
+{
+ struct path *pp;
+ unsigned int i;
+
+ vector_foreach_slot (vecs->pathvec, pp, i) {
+ struct purge_path_info *info;
+
+ if (pp->disconnected != DISCONNECTED_READY_FOR_PURGE)
+ continue;
+
+ /*
+ * Mark as queued whether we succeed or fail.
+ * On success, we're purging it now.
+ * On failure, retrying is unlikely to help until
+ * the checker re-evaluates the path.
+ */
+ pp->disconnected = DISCONNECTED_QUEUED_FOR_PURGE;
+
+ info = prepare_purge_path_info(pp);
+ if (info) {
+ condlog(2, "%s: queuing path for purge", pp->dev);
+ list_add_tail(&info->node, tmpq);
+ } else
+ condlog(3, "%s: failed to prepare purge info", pp->dev);
+ }
+}
+
+static void rcu_unregister(__attribute__((unused)) void *param)
+{
+ rcu_unregister_thread();
+}
+
+/*
+ * Cleanup handler for a single purge_path_info.
+ * Used to prevent memory leaks if thread is cancelled while processing.
+ */
+static void cleanup_purge_path_info(void *arg)
+{
+ struct purge_path_info *info = arg;
+
+ free_purge_path_info(info);
+}
+
+/*
+ * Cleanup handler for purge list. Frees all purge_path_info entries.
+ * Can be called as a pthread cleanup handler or directly.
+ */
+void cleanup_purge_list(void *arg)
+{
+ struct list_head *purge_list = arg;
+ struct purge_path_info *info, *tmp;
+
+ list_for_each_entry_safe(info, tmp, purge_list, node)
+ {
+ list_del_init(&info->node);
+ free_purge_path_info(info);
+ }
+}
+
+/*
+ * Cleanup handler for the global purge queue.
+ * Used during shutdown to free any remaining queued items.
+ */
+static void cleanup_global_purge_queue(void *arg __attribute__((unused)))
+{
+ pthread_mutex_lock(&purge_mutex);
+ cleanup_purge_list(&purge_queue);
+ pthread_mutex_unlock(&purge_mutex);
+}
+
+/*
+ * Main purge thread loop.
+ *
+ * This thread waits for purge_path_info structs to be queued by the checker
+ * thread, then processes them by writing to their sysfs delete attributes.
+ * The checker thread builds the list while holding vecs->lock, so this
+ * thread doesn't need to grab that lock at all.
+ *
+ * Uses list_splice_tail_init() like uevent_dispatch() to safely transfer
+ * items from the global queue to a local list for processing.
+ *
+ * Cleanup handlers are registered for both the local purge_list and the
+ * global purge_queue (similar to uevent_listen), and for each individual
+ * purge_path_info after it's popped off the list (similar to service_uevq).
+ * This ensures no memory leaks if the thread is cancelled at any point.
+ */
+void *purgeloop(void *ap __attribute__((unused)))
+{
+ pthread_cleanup_push(rcu_unregister, NULL);
+ rcu_register_thread();
+ mlockall(MCL_CURRENT | MCL_FUTURE);
+
+ /*
+ * Cleanup handler for global purge_queue.
+ * This handles items that were queued but not yet moved to purge_list.
+ */
+ pthread_cleanup_push(cleanup_global_purge_queue, NULL);
+
+ while (1) {
+ LIST_HEAD(purge_list);
+ struct purge_path_info *info;
+
+ /*
+ * Cleanup handler for local purge_list.
+ * This handles items that were moved from purge_queue but
+ * not yet processed.
+ */
+ pthread_cleanup_push(cleanup_purge_list, &purge_list);
+
+ /*
+ * Cleanup handler for purge_mutex.
+ * Note: pthread_cond_wait() reacquires the mutex before
+ * returning, even on cancellation, so this cleanup handler
+ * will properly unlock it if we're cancelled.
+ */
+ pthread_cleanup_push(cleanup_mutex, &purge_mutex);
+ pthread_mutex_lock(&purge_mutex);
+ pthread_testcancel();
+ while (list_empty(&purge_queue)) {
+ condlog(4, "purgeloop waiting for work");
+ pthread_cond_wait(&purge_cond, &purge_mutex);
+ }
+ list_splice_tail_init(&purge_queue, &purge_list);
+ pthread_cleanup_pop(1);
+
+ /*
+ * Process all paths in the list without holding any locks.
+ * The sysfs operations may block, but that's fine since we're
+ * not holding vecs->lock.
+ *
+ * After popping each info off the list, we immediately push
+ * a cleanup handler for it. This ensures it gets freed even
+ * if we're cancelled inside delete_path_sysfs().
+ */
+ while ((info = list_pop_entry(&purge_list, typeof(*info), node))) {
+ pthread_cleanup_push(cleanup_purge_path_info, info);
+ delete_path_sysfs(info);
+ pthread_cleanup_pop(1);
+ }
+
+ /*
+ * Pop cleanup handler without executing it (0) since we've
+ * already freed everything above. The handler only runs if
+ * the thread is cancelled during processing.
+ */
+ pthread_cleanup_pop(0);
+ }
+
+ pthread_cleanup_pop(1);
+ pthread_cleanup_pop(1);
+ return NULL;
+}
diff --git a/multipathd/purge.h b/multipathd/purge.h
new file mode 100644
index 00000000..1fe755f3
--- /dev/null
+++ b/multipathd/purge.h
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Brian Bunker <brian@purestorage.com>
+ * Copyright (C) 2025 Krishna Kant <krishna.kant@purestorage.com>
+ */
+
+#ifndef PURGE_H_INCLUDED
+#define PURGE_H_INCLUDED
+
+#include <pthread.h>
+#include "list.h"
+
+struct vectors;
+
+/*
+ * Purge thread synchronization.
+ * The checker thread builds a list of paths to purge and queues them here.
+ * The purge thread picks up the queue and processes it.
+ */
+extern pthread_mutex_t purge_mutex;
+extern pthread_cond_t purge_cond;
+extern struct list_head purge_queue;
+
+/*
+ * Build a list of paths to purge and add them to tmpq. Called by checker
+ * thread while holding vecs->lock.
+ */
+void build_purge_list(struct vectors *vecs, struct list_head *tmpq);
+
+/*
+ * Cleanup handler for purge list. Frees all purge_path_info entries.
+ * Can be called as a pthread cleanup handler or directly for shutdown cleanup.
+ */
+void cleanup_purge_list(void *arg);
+
+/*
+ * Main purge thread loop
+ */
+void *purgeloop(void *ap);
+
+#endif /* PURGE_H_INCLUDED */

View File

@ -0,0 +1,47 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Mon, 1 Dec 2025 22:02:10 -0500
Subject: [PATCH] libmpathpersist: fix register retry status checking
If there libmpathpersist failed to create a thread to retry the register
and ignore command, mpath_prout_reg should fail. Instead, the code was
simply ignoring the failed threads. Fix that.
Fixes: 2a4ca250 ("libmpathpersist: change how reservation conflicts are handled")
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
---
libmpathpersist/mpath_persist.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/libmpathpersist/mpath_persist.c b/libmpathpersist/mpath_persist.c
index 72be48c1..6cb56dab 100644
--- a/libmpathpersist/mpath_persist.c
+++ b/libmpathpersist/mpath_persist.c
@@ -858,18 +858,19 @@ int mpath_prout_reg(struct multipath *mpp,int rq_servact, int rq_scope,
}
}
for (i = 0; i < count; i++) {
- if (thread[i].param.status != MPATH_PR_SKIP &&
- thread[i].param.status != MPATH_PR_THREAD_ERROR) {
+ if (thread[i].param.status == MPATH_PR_SKIP)
+ continue;
+ if (thread[i].param.status != MPATH_PR_THREAD_ERROR) {
rc = pthread_join(thread[i].id, NULL);
if (rc) {
condlog(3, "%s: failed to join thread while retrying %d",
- mpp->wwid, i);
+ mpp->wwid, i);
}
- if (thread[i].param.status == MPATH_PR_RETRYABLE_ERROR)
- retryable_error = true;
- else if (status == MPATH_PR_SUCCESS)
- status = thread[i].param.status;
}
+ if (thread[i].param.status == MPATH_PR_RETRYABLE_ERROR)
+ retryable_error = true;
+ else if (status == MPATH_PR_SUCCESS)
+ status = thread[i].param.status;
}
need_retry = false;
}

View File

@ -0,0 +1,59 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Mon, 1 Dec 2025 22:02:12 -0500
Subject: [PATCH] multipathd: remember number of registered keys when ioctl
fails
If prin_do_scsi_ioctl() fails in update_map_pr() for some reason other
than Persistent Reservations not being supported, It shouldn't clear the
number of registered keys, since there's no reason to think that it has
changed. Similarly, if update_map_pr() fails in mpath_pr_event_handle(),
don't assume that the nr_keys_needed was cleared. Just return whatever
the value is now. This saves multipathd from doing pointless calls to
update_map_pr(), if one of the paths is failing.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
---
multipathd/main.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/multipathd/main.c b/multipathd/main.c
index d91a4d49..2526d8d1 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -3842,7 +3842,9 @@ void unset_pr(struct multipath *mpp)
* The number of found keys must be at least as large as *nr_keys,
* and if MPATH_PR_SUCCESS is returned and mpp->prflag is PR_SET after
* the call, *nr_keys will be set to the number of found keys. Otherwise
- * it will be set to 0.
+ * if mpp->prflag is PR_UNSET it will be set to 0. If MPATH_PR_SUCCESS
+ * is not returned and mpp->prflag is not PR_UNSET, nr_keys will not be
+ * changed.
*/
static int update_map_pr(struct multipath *mpp, struct path *pp, unsigned int *nr_keys)
{
@@ -3871,11 +3873,12 @@ static int update_map_pr(struct multipath *mpp, struct path *pp, unsigned int *n
ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, &resp, 0);
if (ret != MPATH_PR_SUCCESS) {
- if (ret == MPATH_PR_ILLEGAL_REQ)
+ if (ret == MPATH_PR_ILLEGAL_REQ) {
unset_pr(mpp);
+ *nr_keys = 0;
+ }
condlog(0, "%s : pr in read keys service action failed Error=%d",
mpp->alias, ret);
- *nr_keys = 0;
return ret;
}
@@ -3990,7 +3993,7 @@ retry:
clear_reg ? "Clearing" : "Setting", pp->dev, ret);
} else if (!clear_reg) {
if (update_map_pr(mpp, pp, &nr_keys_needed) != MPATH_PR_SUCCESS)
- return 0;
+ return nr_keys_needed;
if (mpp->prflag != PR_SET) {
memset(&param, 0, sizeof(param));
clear_reg = true;

View File

@ -0,0 +1,459 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Mon, 15 Dec 2025 15:29:58 -0500
Subject: [PATCH] libmpathpersist: fix code for skipping multipathd path
registration
When libmpathpersist notifies multipathd that a key has been registered,
cli_setprstatus() calls pr_register_active_paths() with a flag to let it
know that the paths are likely already registered, and it can skip
re-registering them, as long as the number of active paths matches the
number of registered keys. This shortcut can fail, causing multipathd to
not register needed paths, if either a path becomes usable and another
becomes unusable while libmpathpersist is running or if there already
were registered keys for I_T Nexus's that don't correspond to path
devices.
To make this shortcut work in cases like that, this commit adds a new
multipathd command "setprstatus map <map> pathlist <pathlist>", where
<pathlist> is a quoted, whitespace separated list of scsi path devices.
libmpathpersist will send out the list of paths it registered the key
on. pr_register_active_paths() will skip calling mpath_pr_event_handle()
for paths on that list.
In order to deal with the possiblity of a preempt occuring while
libmpathpersist was running, the code still needs to check that it has
the expected number of keys.
Fixes: f7d6cd17 ("multipathd: Fix race while registering PR key")
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
---
libmpathpersist/mpath_persist.c | 6 +--
libmpathpersist/mpath_updatepr.c | 50 ++++++++++++++++++-------
libmpathpersist/mpathpr.h | 2 +-
multipathd/cli.c | 2 +
multipathd/cli.h | 2 +
multipathd/cli_handlers.c | 37 +++++++++++++++++--
multipathd/cli_handlers.h | 1 +
multipathd/main.c | 63 +++++++++++++++++++++-----------
multipathd/main.h | 4 +-
multipathd/multipathd.8 | 10 ++++-
10 files changed, 132 insertions(+), 45 deletions(-)
diff --git a/libmpathpersist/mpath_persist.c b/libmpathpersist/mpath_persist.c
index 6cb56dab..ab8fa630 100644
--- a/libmpathpersist/mpath_persist.c
+++ b/libmpathpersist/mpath_persist.c
@@ -527,12 +527,12 @@ static int do_mpath_persistent_reserve_out(vector curmp, vector pathvec, int fd,
case MPATH_PROUT_REG_SA:
case MPATH_PROUT_REG_IGN_SA:
if (unregistering)
- update_prflag(alias, 0);
+ update_prflag(mpp, 0);
else
- update_prflag(alias, 1);
+ update_prflag(mpp, 1);
break;
case MPATH_PROUT_CLEAR_SA:
- update_prflag(alias, 0);
+ update_prflag(mpp, 0);
if (mpp->prkey_source == PRKEY_SOURCE_FILE)
update_prkey(alias, 0);
break;
diff --git a/libmpathpersist/mpath_updatepr.c b/libmpathpersist/mpath_updatepr.c
index bfa6e089..e24432bb 100644
--- a/libmpathpersist/mpath_updatepr.c
+++ b/libmpathpersist/mpath_updatepr.c
@@ -19,9 +19,9 @@
#include "memory.h"
#include "mpathpr.h"
#include "structs.h"
+#include "strbuf.h"
-
-static char *do_pr(char *alias, char *str)
+static char *do_pr(char *alias, const char *str)
{
int fd;
char *reply;
@@ -51,24 +51,26 @@ static char *do_pr(char *alias, char *str)
return reply;
}
-static int do_update_pr(char *alias, char *cmd, char *key)
+static int do_update_pr(char *alias, char *cmd, const char *data)
{
- char str[256];
+ STRBUF_ON_STACK(buf);
char *reply = NULL;
int ret = -1;
- if (key)
- snprintf(str,sizeof(str),"%s map %s key %s", cmd, alias, key);
+ if (data)
+ print_strbuf(&buf, "%s map %s %s %s", cmd, alias,
+ strcmp(cmd, "setprkey") ? "pathlist" : "key", data);
else
- snprintf(str,sizeof(str),"%s map %s", cmd, alias);
+ print_strbuf(&buf, "%s map %s", cmd, alias);
- reply = do_pr(alias, str);
+ reply = do_pr(alias, get_strbuf_str(&buf));
if (reply) {
- condlog (2, "%s: message=%s reply=%s", alias, str, reply);
- if (reply && strncmp(reply,"ok", 2) == 0)
+ if (strncmp(reply, "ok", 2) == 0)
ret = 0;
else
ret = -1;
+ condlog(ret ? 0 : 4, "%s: message=%s reply=%s", alias,
+ get_strbuf_str(&buf), reply);
}
free(reply);
@@ -104,9 +106,31 @@ int get_prhold(char *mapname) {
return do_get_pr(mapname, "getprhold");
}
-int update_prflag(char *mapname, int set) {
- return do_update_pr(mapname, (set)? "setprstatus" : "unsetprstatus",
- NULL);
+int update_prflag(struct multipath *mpp, int set)
+{
+ STRBUF_ON_STACK(buf);
+ int i, j;
+ bool first = true;
+ struct pathgroup *pgp = NULL;
+ struct path *pp = NULL;
+
+ if (!set)
+ return do_update_pr(mpp->alias, "unsetprstatus", NULL);
+
+ append_strbuf_str(&buf, "\"");
+ vector_foreach_slot (mpp->pg, pgp, j) {
+ vector_foreach_slot (pgp->paths, pp, i) {
+ if (pp->state == PATH_UP || pp->state == PATH_GHOST) {
+ if (first) {
+ append_strbuf_str(&buf, pp->dev);
+ first = false;
+ } else
+ print_strbuf(&buf, " %s", pp->dev_t);
+ }
+ }
+ }
+ append_strbuf_str(&buf, "\"");
+ return do_update_pr(mpp->alias, "setprstatus", get_strbuf_str(&buf));
}
int update_prhold(char *mapname, bool set) {
diff --git a/libmpathpersist/mpathpr.h b/libmpathpersist/mpathpr.h
index 69f402f2..67d04247 100644
--- a/libmpathpersist/mpathpr.h
+++ b/libmpathpersist/mpathpr.h
@@ -40,7 +40,7 @@ int mpath_prout_rel(struct multipath *mpp,int rq_servact, int rq_scope,
unsigned int rq_type, struct prout_param_descriptor *paramp, int noisy,
bool unregister);
-int update_prflag(char *mapname, int set);
+int update_prflag(struct multipath *mpp, int set);
int update_prkey_flags(char *mapname, uint64_t prkey, uint8_t sa_flags);
int get_prflag(char *mapname);
int get_prhold(char *mapname);
diff --git a/multipathd/cli.c b/multipathd/cli.c
index d33b571d..9e833b02 100644
--- a/multipathd/cli.c
+++ b/multipathd/cli.c
@@ -226,6 +226,7 @@ load_keys (void)
r += add_key(keys, "getprhold", GETPRHOLD, 0);
r += add_key(keys, "setprhold", SETPRHOLD, 0);
r += add_key(keys, "unsetprhold", UNSETPRHOLD, 0);
+ r += add_key(keys, "pathlist", PATHLIST, 1);
if (r) {
free_keys(keys);
@@ -572,6 +573,7 @@ cli_init (void) {
add_handler(SHUTDOWN, NULL);
add_handler(GETPRSTATUS+MAP, NULL);
add_handler(SETPRSTATUS+MAP, NULL);
+ add_handler(SETPRSTATUS+MAP+PATHLIST, NULL);
add_handler(UNSETPRSTATUS+MAP, NULL);
add_handler(GETPRKEY+MAP, NULL);
add_handler(SETPRKEY+MAP+KEY, NULL);
diff --git a/multipathd/cli.h b/multipathd/cli.h
index 4d12f8fd..17344cb4 100644
--- a/multipathd/cli.h
+++ b/multipathd/cli.h
@@ -50,6 +50,7 @@ enum {
__GETPRHOLD,
__SETPRHOLD,
__UNSETPRHOLD,
+ __PATHLIST,
};
#define LIST (1 << __LIST)
@@ -99,6 +100,7 @@ enum {
#define GETPRHOLD (1ULL << __GETPRHOLD)
#define SETPRHOLD (1ULL << __SETPRHOLD)
#define UNSETPRHOLD (1ULL << __UNSETPRHOLD)
+#define PATHLIST (1ULL << __PATHLIST)
#define INITIAL_REPLY_LEN 1200
diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c
index 0c63ca9a..f183fceb 100644
--- a/multipathd/cli_handlers.c
+++ b/multipathd/cli_handlers.c
@@ -32,6 +32,7 @@
#include "foreign.h"
#include "strbuf.h"
#include "cli_handlers.h"
+#include <ctype.h>
#define SET_REPLY_AND_LEN(__rep, __len, string_literal) \
do { \
@@ -1308,8 +1309,8 @@ cli_getprstatus (void * v, char ** reply, int * len, void * data)
return 0;
}
-int
-cli_setprstatus(void * v, char ** reply, int * len, void * data)
+static int do_setprstatus(void * v, char ** reply, int * len, void * data,
+ const struct _vector *registered_paths)
{
struct multipath * mpp;
struct vectors * vecs = (struct vectors *)data;
@@ -1324,7 +1325,7 @@ cli_setprstatus(void * v, char ** reply, int * len, void * data)
if (mpp->prflag != PR_SET) {
set_pr(mpp);
- pr_register_active_paths(mpp, true);
+ pr_register_active_paths(mpp, registered_paths);
if (mpp->prflag == PR_SET)
condlog(2, "%s: prflag set", param);
else
@@ -1336,6 +1337,36 @@ cli_setprstatus(void * v, char ** reply, int * len, void * data)
return 0;
}
+int
+cli_setprstatus(void * v, char ** reply, int * len, void * data)
+{
+ return do_setprstatus(v, reply, len, data, NULL);
+}
+
+int
+cli_setprstatus_list(void * v, char ** reply, int * len, void *data)
+{
+ int r;
+ struct _vector registered_paths_vec = {.allocated = 0};
+ vector registered_paths
+ __attribute__((cleanup(cleanup_reset_vec))) = &registered_paths_vec;
+ char *ptr = get_keyparam(v, PATHLIST);
+
+ while (isspace(*ptr))
+ ptr++;
+ while (*ptr) {
+ if (!vector_alloc_slot(registered_paths))
+ return -ENOMEM;
+ vector_set_slot(registered_paths, ptr);
+ while (*ptr && !isspace(*ptr))
+ ptr++;
+ while (isspace(*ptr))
+ *ptr++ = '\0';
+ }
+ r = do_setprstatus(v, reply, len, data, registered_paths);
+ return r;
+}
+
int
cli_unsetprstatus(void * v, char ** reply, int * len, void * data)
{
diff --git a/multipathd/cli_handlers.h b/multipathd/cli_handlers.h
index 348c8485..28b6f2bb 100644
--- a/multipathd/cli_handlers.h
+++ b/multipathd/cli_handlers.h
@@ -46,6 +46,7 @@ int cli_shutdown(void * v, char ** reply, int * len, void * data);
int cli_reassign (void * v, char ** reply, int * len, void * data);
int cli_getprstatus(void * v, char ** reply, int * len, void * data);
int cli_setprstatus(void * v, char ** reply, int * len, void * data);
+int cli_setprstatus_list(void * v, char ** reply, int * len, void * data);
int cli_unsetprstatus(void * v, char ** reply, int * len, void * data);
int cli_getprkey(void * v, char ** reply, int * len, void * data);
int cli_setprkey(void * v, char ** reply, int * len, void * data);
diff --git a/multipathd/main.c b/multipathd/main.c
index 2526d8d1..09d26fe2 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -548,28 +548,47 @@ flush_map_nopaths(struct multipath *mpp, struct vectors *vecs) {
return true;
}
-void pr_register_active_paths(struct multipath *mpp, bool check_nr_active)
+/*
+ * If reg_paths in non-NULL, it is a vector of paths that libmpathpersist
+ * registered. If the number of registered keys is smaller than the number
+ * of registered paths, then likely a preempt that occurred while
+ * libmpathpersist was registering the key. As long as there are still some
+ * registered keys, treat the preempt as happening first, and make sure to
+ * register keys on all the paths. If the number of registered keys is at
+ * least as large as the number of registered paths, then no preempt happened,
+ * and multipathd does not need to re-register the paths that libmpathpersist
+ * handled
+ */
+void pr_register_active_paths(struct multipath *mpp, const struct _vector *reg_paths)
{
- unsigned int i, j, nr_keys = 0;
- unsigned int nr_active = 0;
+ unsigned int i, j, k, nr_keys = 0;
+ unsigned int wanted_nr = VECTOR_SIZE(reg_paths);
struct path *pp;
struct pathgroup *pgp;
-
- if (check_nr_active) {
- nr_active = count_active_paths(mpp);
- if (!nr_active)
- return;
- }
+ char *pathname;
vector_foreach_slot (mpp->pg, pgp, i) {
vector_foreach_slot (pgp->paths, pp, j) {
if (mpp->prflag == PR_UNSET)
return;
- if (pp->state == PATH_UP || pp->state == PATH_GHOST) {
- nr_keys = mpath_pr_event_handle(pp, nr_keys, nr_active);
- if (check_nr_active && nr_keys == nr_active)
- return;
+ if (pp->state != PATH_UP && pp->state != PATH_GHOST)
+ continue;
+ if (wanted_nr && nr_keys) {
+ vector_foreach_slot (reg_paths, pathname, k) {
+ if (strcmp(pp->dev_t, pathname) == 0) {
+ goto skip;
+ }
+ }
}
+ nr_keys = mpath_pr_event_handle(pp, nr_keys, wanted_nr);
+ if (nr_keys && nr_keys < wanted_nr) {
+ /*
+ * Incorrect number of registered keys. Need
+ * to register all devices
+ */
+ wanted_nr = 0;
+ }
+ skip:; /* a statement must follow a label on pre C23 clang */
}
}
}
@@ -599,8 +618,7 @@ handle_orphaned_offline_paths(vector offline_paths)
pp->add_when_online = true;
}
-static void
-cleanup_reset_vec(struct _vector **v)
+void cleanup_reset_vec(struct _vector **v)
{
vector_reset(*v);
}
@@ -656,7 +674,7 @@ fail:
sync_map_state(mpp);
- pr_register_active_paths(mpp, false);
+ pr_register_active_paths(mpp, NULL);
if (VECTOR_SIZE(offline_paths) != 0)
handle_orphaned_offline_paths(offline_paths);
@@ -1279,7 +1297,7 @@ rescan:
if (retries >= 0) {
if ((mpp->prflag == PR_SET && prflag != PR_SET) || start_waiter)
- pr_register_active_paths(mpp, false);
+ pr_register_active_paths(mpp, NULL);
condlog(2, "%s [%s]: path added to devmap %s",
pp->dev, pp->dev_t, mpp->alias);
return 0;
@@ -1875,6 +1893,7 @@ uxlsnrloop (void * ap)
set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
set_handler_callback(GETPRSTATUS|MAP, cli_getprstatus);
set_handler_callback(SETPRSTATUS|MAP, cli_setprstatus);
+ set_handler_callback(SETPRSTATUS|MAP|PATHLIST, cli_setprstatus_list);
set_handler_callback(UNSETPRSTATUS|MAP, cli_unsetprstatus);
set_handler_callback(FORCEQ|DAEMON, cli_force_no_daemon_q);
set_handler_callback(RESTOREQ|DAEMON, cli_restore_no_daemon_q);
@@ -2581,7 +2600,7 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
mpath_pr_event_handle(pp, 0, 0);
if (pp->mpp->prflag == PR_SET &&
prflag != PR_SET)
- pr_register_active_paths(pp->mpp, false);
+ pr_register_active_paths(pp->mpp, NULL);
}
}
@@ -2950,7 +2969,7 @@ configure (struct vectors * vecs)
vector_foreach_slot(mpvec, mpp, i){
if (remember_wwid(mpp->wwid) == 1)
trigger_paths_udev_change(mpp, true);
- pr_register_active_paths(mpp, false);
+ pr_register_active_paths(mpp, NULL);
}
/*
@@ -3933,8 +3952,8 @@ static int update_map_pr(struct multipath *mpp, struct path *pp, unsigned int *n
*
* nr_keys_wanted: Only used if nr_keys_needed is 0, so we don't know how
* many keys we currently have. If nr_keys_wanted in non-zero and the
- * number of keys found by the initial call to update_map_pr() matches it,
- * exit early, since we have all the keys we are expecting.
+ * number of keys found by the initial call to update_map_pr() is at least
+ * as large as it, exit early, since we have all the keys we are expecting.
*
* The function returns the number of keys that are registered or 0 if
* it's unknown.
@@ -3957,7 +3976,7 @@ mpath_pr_event_handle(struct path *pp, unsigned int nr_keys_needed,
nr_keys_needed = 1;
if (update_map_pr(mpp, pp, &nr_keys_needed) != MPATH_PR_SUCCESS)
return 0;
- if (nr_keys_wanted && nr_keys_wanted == nr_keys_needed)
+ if (nr_keys_wanted && nr_keys_wanted <= nr_keys_needed)
return nr_keys_needed;
}
diff --git a/multipathd/main.h b/multipathd/main.h
index 8a4c5f88..ea12455f 100644
--- a/multipathd/main.h
+++ b/multipathd/main.h
@@ -60,5 +60,7 @@ int resize_map(struct multipath *mpp, unsigned long long size,
struct vectors *vecs);
void set_pr(struct multipath *mpp);
void unset_pr(struct multipath *mpp);
-void pr_register_active_paths(struct multipath *mpp, bool check_active_nr);
+void pr_register_active_paths(struct multipath *mpp,
+ const struct _vector *registered_paths);
+void cleanup_reset_vec(struct _vector **v);
#endif /* MAIN_H */
diff --git a/multipathd/multipathd.8 b/multipathd/multipathd.8
index 2ed036d4..0a76f3e3 100644
--- a/multipathd/multipathd.8
+++ b/multipathd/multipathd.8
@@ -324,11 +324,17 @@ will not be disabled when the daemon stops.
Restores configured queue_without_daemon mode.
.
.TP
-.B map|multipath $map setprstatus
+.B setprstatus map|multipath $map
Enable persistent reservation management on $map.
.
.TP
-.B map|multipath $map unsetprstatus
+.B setprstatus map|multipath $map pathlist $pathlist
+Enable persistent reservation management on $map, and notify multipathd of
+the paths that have been registered, so it doesn't attempt to re-register
+them.
+.
+.TP
+.B unsetprstatus map|multipath $map
Disable persistent reservation management on $map.
.
.TP

View File

@ -1,6 +1,6 @@
Name: device-mapper-multipath
Version: 0.8.7
Release: 39%{?dist}.1
Release: 45%{?dist}
Summary: Tools to manage multipath devices using device-mapper
License: GPLv2
URL: http://christophe.varoqui.free.fr/
@ -206,7 +206,20 @@ Patch0193: 0193-libmpathpersist-Fix-race-between-restoring-a-path-an.patch
Patch0194: 0194-multipathd-Fix-tracking-of-old-PR-key.patch
Patch0195: 0195-multipathd-Fix-race-while-registering-PR-key.patch
Patch0196: 0196-mpathpersist-Fix-REPORT-CAPABILITIES-output.patch
Patch0197: 0197-multipath-tools-update-NFINIDAT-InfiniBox-config-in-.patch
Patch0198: 0198-multipathd-print-path-offline-message-even-without-a.patch
Patch0199: 0199-libmultipath-improve-cleanup-of-uevent-queues-on-exi.patch
Patch0200: 0200-uevent_dispatch-use-while-in-wait-loop.patch
Patch0201: 0201-libmultipath-uevent_dispatch-process-uevents-one-by-.patch
Patch0202: 0202-libmultipath-uevent_listen-don-t-delay-uevents.patch
Patch0203: 0203-libmultipath-uevent-use-struct-to-pass-parameters-ar.patch
Patch0204: 0204-libmultipath-is_uevent_busy-check-servicing_uev-unde.patch
Patch0205: 0205-multipathd-make-multipathd-show-status-busy-checker-.patch
Patch0206: 0206-libmultipath-add-purge_disconnected-configuration-op.patch
Patch0207: 0207-multipathd-implement-purge-functionality-for-disconn.patch
Patch0208: 0208-libmpathpersist-fix-register-retry-status-checking.patch
Patch0209: 0209-multipathd-remember-number-of-registered-keys-when-i.patch
Patch0210: 0210-libmpathpersist-fix-code-for-skipping-multipathd-pat.patch
# runtime
Requires: %{name}-libs = %{version}-%{release}
@ -409,7 +422,51 @@ fi
%{_pkgconfdir}/libdmmp.pc
%changelog
* Tue Nov 11 2025 Benjamin Marzinski <bmarzins@redhat.com> - 0.8.7-39.1
* Thu Feb 19 2026 Benjamin Marzinski <bmarzins@redhat.com> - 0.8.7-45
- Add 0208-libmpathpersist-fix-register-retry-status-checking.patch
- Add 0209-multipathd-remember-number-of-registered-keys-when-i.patch
- Add 0210-libmpathpersist-fix-code-for-skipping-multipathd-pat.patch
* Fixes RHEL-148462 ("Improve multipathd's handling of updating
persistent reservations on restored paths. [rhel-9]")
- Resolves: RHEL-148462
* Thu Jan 29 2026 Benjamin Marzinski <bmarzins@redhat.com> - 0.8.7-44
- Add 0206-libmultipath-add-purge_disconnected-configuration-op.patch
- Add 0207-multipathd-implement-purge-functionality-for-disconn.patch
* Fixes RHEL-141291 ("Add purge_disconnected support to multipathd
[rhel-9]")
- Resolves: RHEL-141291
* Thu Jan 22 2026 Benjamin Marzinski <bmarzins@redhat.com> - 0.8.7-43
- Add 0198-multipathd-print-path-offline-message-even-without-a.patch
* Fixes RHEL-133814 ("log_checker_err is not printing messages
repeatedly for failed path [rhel-9]")
- Add 0199-libmultipath-improve-cleanup-of-uevent-queues-on-exi.patch
- Add 0200-uevent_dispatch-use-while-in-wait-loop.patch
- Add 0201-libmultipath-uevent_dispatch-process-uevents-one-by-.patch
- Add 0202-libmultipath-uevent_listen-don-t-delay-uevents.patch
- Add 0203-libmultipath-uevent-use-struct-to-pass-parameters-ar.patch
- Add 0204-libmultipath-is_uevent_busy-check-servicing_uev-unde.patch
- Add 0205-multipathd-make-multipathd-show-status-busy-checker-.patch
* Fixes RHEL-135904 (VM reboot in RHOSP environment fails with error
"Could not open '/dev/dm-95': No such file or directory")
- Resolves: RHEL-133814
- Resolves: RHEL-135904
* Wed Nov 19 2025 Benjamin Marzinski <bmarzins@redhat.com> - 0.8.7-42
- Add 0197-multipath-tools-update-NFINIDAT-InfiniBox-config-in-.patch
* Fixes RHEL-128396 ("Update the multipath.conf stanza for Infinidat
storage [rhel-9]")
- Resolves: RHEL-128396
* Tue Nov 11 2025 Benjamin Marzinski <bmarzins@redhat.com> - 0.8.7-41
- Add 0195-multipathd-Fix-race-while-registering-PR-key.patch
- Add 0196-mpathpersist-Fix-REPORT-CAPABILITIES-output.patch
* Fixes RHEL-118515 ("There are many bugs in multipath's persistent
reservation handling")
- Resolves: RHEL-118515
* Wed Oct 1 2025 Benjamin Marzinski <bmarzins@redhat.com> - 0.8.7-40
- Add 0150-libmpathpersist-retry-commands-on-other-paths-in-mpa.patch
- Add 0151-libmpathpersist-check-released-key-against-the-reser.patch
- Add 0152-multipathd-remove-thread-from-mpath_pr_event_handle.patch
@ -455,11 +512,9 @@ fi
- Add 0192-libmpathpersist-Fix-unregistering-while-holding-the-.patch
- Add 0193-libmpathpersist-Fix-race-between-restoring-a-path-an.patch
- Add 0194-multipathd-Fix-tracking-of-old-PR-key.patch
- Add 0195-multipathd-Fix-race-while-registering-PR-key.patch
- Add 0196-mpathpersist-Fix-REPORT-CAPABILITIES-output.patch
* Fixes RHEL-118723 ("There are many bugs in multipath's persistent
reservation handling [rhel-9.7.z]")
- Resolves: RHEL-118723
* Fixes RHEL-118515 ("There are many bugs in multipath's persistent
reservation handling")
- Resolves: RHEL-118515
* Mon Jul 14 2025 Benjamin Marzinski <bmarzins@redhat.com> - 0.8.7-39
- Add 0145-multipath-tools-add-DellEMC-ME4-PowerVault-ME4-to-ha.patch