device-mapper-multipath-0.8.7-8
Add 0044-multipathd-Don-t-keep-starting-TUR-threads-if-they-a.patch Resolves: bz #2070266
This commit is contained in:
parent
8daba4e745
commit
4828101630
@ -0,0 +1,86 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Benjamin Marzinski <bmarzins@redhat.com>
|
||||
Date: Tue, 29 Mar 2022 22:22:10 -0500
|
||||
Subject: [PATCH] multipathd: Don't keep starting TUR threads, if they always
|
||||
hang.
|
||||
|
||||
If tur thead hangs, multipathd was simply creating a new thread, and
|
||||
assuming that the old thread would get cleaned up eventually. I have
|
||||
seen a case recently where there were 26000 multipathd threads on a
|
||||
system, all stuck trying to send TUR commands to path devices. The root
|
||||
cause of the issue was a scsi kernel issue, but it shows that the way
|
||||
multipathd currently deals with stuck threads could use some refinement.
|
||||
|
||||
Now, when one tur thread hangs, multipathd will act as it did before.
|
||||
If a second one in a row hangs, multipathd will instead wait for it to
|
||||
complete before starting another thread. Once the thread completes, the
|
||||
count is reset.
|
||||
|
||||
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
|
||||
Reviewed-by: Martin Wilck <mwilck@suse.com
|
||||
---
|
||||
libmultipath/checkers/tur.c | 23 +++++++++++++++++++++--
|
||||
1 file changed, 21 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
|
||||
index a4b4a213..d82f7dbc 100644
|
||||
--- a/libmultipath/checkers/tur.c
|
||||
+++ b/libmultipath/checkers/tur.c
|
||||
@@ -27,6 +27,7 @@
|
||||
|
||||
#define TUR_CMD_LEN 6
|
||||
#define HEAVY_CHECK_COUNT 10
|
||||
+#define MAX_NR_TIMEOUTS 1
|
||||
|
||||
enum {
|
||||
MSG_TUR_RUNNING = CHECKER_FIRST_MSGID,
|
||||
@@ -55,6 +56,7 @@ struct tur_checker_context {
|
||||
int holders; /* uatomic access only */
|
||||
int msgid;
|
||||
struct checker_context ctx;
|
||||
+ unsigned int nr_timeouts;
|
||||
};
|
||||
|
||||
int libcheck_init (struct checker * c)
|
||||
@@ -359,8 +361,23 @@ int libcheck_check(struct checker * c)
|
||||
}
|
||||
} else {
|
||||
if (uatomic_read(&ct->holders) > 1) {
|
||||
+ /* The thread has been cancelled but hasn't quit. */
|
||||
+ if (ct->nr_timeouts == MAX_NR_TIMEOUTS) {
|
||||
+ condlog(2, "%d:%d : waiting for stalled tur thread to finish",
|
||||
+ major(ct->devt), minor(ct->devt));
|
||||
+ ct->nr_timeouts++;
|
||||
+ }
|
||||
/*
|
||||
- * The thread has been cancelled but hasn't quit.
|
||||
+ * Don't start new threads until the last once has
|
||||
+ * finished.
|
||||
+ */
|
||||
+ if (ct->nr_timeouts > MAX_NR_TIMEOUTS) {
|
||||
+ c->msgid = MSG_TUR_TIMEOUT;
|
||||
+ return PATH_TIMEOUT;
|
||||
+ }
|
||||
+ ct->nr_timeouts++;
|
||||
+ /*
|
||||
+ * Start a new thread while the old one is stalled.
|
||||
* We have to prevent it from interfering with the new
|
||||
* thread. We create a new context and leave the old
|
||||
* one with the stale thread, hoping it will clean up
|
||||
@@ -376,13 +393,15 @@ int libcheck_check(struct checker * c)
|
||||
*/
|
||||
if (libcheck_init(c) != 0)
|
||||
return PATH_UNCHECKED;
|
||||
+ ((struct tur_checker_context *)c->context)->nr_timeouts = ct->nr_timeouts;
|
||||
|
||||
if (!uatomic_sub_return(&ct->holders, 1))
|
||||
/* It did terminate, eventually */
|
||||
cleanup_context(ct);
|
||||
|
||||
ct = c->context;
|
||||
- }
|
||||
+ } else
|
||||
+ ct->nr_timeouts = 0;
|
||||
/* Start new TUR checker */
|
||||
pthread_mutex_lock(&ct->lock);
|
||||
tur_status = ct->state = PATH_PENDING;
|
@ -1,6 +1,6 @@
|
||||
Name: device-mapper-multipath
|
||||
Version: 0.8.7
|
||||
Release: 7%{?dist}
|
||||
Release: 8%{?dist}
|
||||
Summary: Tools to manage multipath devices using device-mapper
|
||||
License: GPLv2
|
||||
URL: http://christophe.varoqui.free.fr/
|
||||
@ -53,6 +53,7 @@ Patch0040: 0040-RH-add-support-to-mpathconf-for-setting-recheck_wwid.patch
|
||||
Patch0041: 0041-multipathd-handle-fpin-events.patch
|
||||
Patch0042: 0042-multipathd-disallow-changing-to-from-fpin-marginal-p.patch
|
||||
Patch0043: 0043-libmultipath-fix-printing-native-nvme-multipath-topo.patch
|
||||
Patch0044: 0044-multipathd-Don-t-keep-starting-TUR-threads-if-they-a.patch
|
||||
|
||||
|
||||
# runtime
|
||||
@ -252,6 +253,10 @@ fi
|
||||
%{_pkgconfdir}/libdmmp.pc
|
||||
|
||||
%changelog
|
||||
* Tue May 10 2022 Benjamin Marzinski <bmarzins@redhat.com> - 0.8.7-8
|
||||
- Add 0044-multipathd-Don-t-keep-starting-TUR-threads-if-they-a.patch
|
||||
- Resolves: bz #2070266
|
||||
|
||||
* Wed Feb 16 2022 Benjamin Marzinski <bmarzins@redhat.com> - 0.8.7-7
|
||||
- Add 0043-libmultipath-fix-printing-native-nvme-multipath-topo.patch
|
||||
- Resolves: bz #2054839
|
||||
|
Loading…
Reference in New Issue
Block a user