270 lines
8.2 KiB
Diff
270 lines
8.2 KiB
Diff
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||
|
From: Benjamin Marzinski <bmarzins@redhat.com>
|
||
|
Date: Thu, 8 Feb 2018 16:56:45 -0600
|
||
|
Subject: [PATCH] libmultipath: fix tur checker locking
|
||
|
|
||
|
Commit 6e2423fd fixed a bug where the tur checker could cancel a
|
||
|
detached thread after it had exitted. However in fixing this, the new
|
||
|
code grabbed a mutex (to call condlog) while holding a spin_lock. To
|
||
|
deal with this, I've done away with the holder spin_lock completely, and
|
||
|
replaced it with two atomic variables, based on a suggestion by Martin
|
||
|
Wilck.
|
||
|
|
||
|
The holder variable works exactly like before. When the checker is
|
||
|
initialized, it is set to 1. When a thread is created it is incremented.
|
||
|
When either the thread or the checker are done with the context, they
|
||
|
atomically decrement the holder variable and check its value. If it
|
||
|
is 0, they free the context. If it is 1, they never touch the context
|
||
|
again.
|
||
|
|
||
|
The other variable has changed. First, ct->running and ct->thread have
|
||
|
switched uses. ct->thread is now only ever accessed by the checker,
|
||
|
never the thread. If it is non-NULL, a thread has started up, but
|
||
|
hasn't been dealt with by the checker yet. It is also obviously used
|
||
|
by the checker to cancel the thread.
|
||
|
|
||
|
ct->running is now an atomic variable. When the thread is started
|
||
|
it is set to 1. When the checker wants to kill a thread, it atomically
|
||
|
sets the value to 0 and reads the previous value. If it was 1,
|
||
|
the checker cancels the thread. If it was 0, the nothing needs to be
|
||
|
done. After the checker has dealt with the thread, it sets ct->thread
|
||
|
to NULL.
|
||
|
|
||
|
Right before the thread finishes and pops the cleanup handler, it
|
||
|
atomically sets the value of ct->running to 0 and reads the previous
|
||
|
value. If it was 1, the thread just pops the cleanup handler and exits.
|
||
|
If it was 0, then the checker is trying to cancel the thread, and so the
|
||
|
thread calls pause(), which is a cancellation point.
|
||
|
|
||
|
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
|
||
|
---
|
||
|
libmultipath/checkers/tur.c | 107 ++++++++++++++++++--------------------------
|
||
|
1 file changed, 43 insertions(+), 64 deletions(-)
|
||
|
|
||
|
diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
|
||
|
index b4a5cb2..9155960 100644
|
||
|
--- a/libmultipath/checkers/tur.c
|
||
|
+++ b/libmultipath/checkers/tur.c
|
||
|
@@ -15,6 +15,7 @@
|
||
|
#include <errno.h>
|
||
|
#include <sys/time.h>
|
||
|
#include <pthread.h>
|
||
|
+#include <urcu/uatomic.h>
|
||
|
|
||
|
#include "checkers.h"
|
||
|
|
||
|
@@ -44,7 +45,6 @@ struct tur_checker_context {
|
||
|
pthread_t thread;
|
||
|
pthread_mutex_t lock;
|
||
|
pthread_cond_t active;
|
||
|
- pthread_spinlock_t hldr_lock;
|
||
|
int holders;
|
||
|
char message[CHECKER_MSG_LEN];
|
||
|
};
|
||
|
@@ -74,13 +74,12 @@ int libcheck_init (struct checker * c)
|
||
|
|
||
|
ct->state = PATH_UNCHECKED;
|
||
|
ct->fd = -1;
|
||
|
- ct->holders = 1;
|
||
|
+ uatomic_set(&ct->holders, 1);
|
||
|
pthread_cond_init_mono(&ct->active);
|
||
|
pthread_mutexattr_init(&attr);
|
||
|
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
|
||
|
pthread_mutex_init(&ct->lock, &attr);
|
||
|
pthread_mutexattr_destroy(&attr);
|
||
|
- pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE);
|
||
|
c->context = ct;
|
||
|
|
||
|
return 0;
|
||
|
@@ -90,7 +89,6 @@ static void cleanup_context(struct tur_checker_context *ct)
|
||
|
{
|
||
|
pthread_mutex_destroy(&ct->lock);
|
||
|
pthread_cond_destroy(&ct->active);
|
||
|
- pthread_spin_destroy(&ct->hldr_lock);
|
||
|
free(ct);
|
||
|
}
|
||
|
|
||
|
@@ -99,16 +97,14 @@ void libcheck_free (struct checker * c)
|
||
|
if (c->context) {
|
||
|
struct tur_checker_context *ct = c->context;
|
||
|
int holders;
|
||
|
- pthread_t thread;
|
||
|
-
|
||
|
- pthread_spin_lock(&ct->hldr_lock);
|
||
|
- ct->holders--;
|
||
|
- holders = ct->holders;
|
||
|
- thread = ct->thread;
|
||
|
- pthread_spin_unlock(&ct->hldr_lock);
|
||
|
- if (holders)
|
||
|
- pthread_cancel(thread);
|
||
|
- else
|
||
|
+ int running;
|
||
|
+
|
||
|
+ running = uatomic_xchg(&ct->running, 0);
|
||
|
+ if (running)
|
||
|
+ pthread_cancel(ct->thread);
|
||
|
+ ct->thread = 0;
|
||
|
+ holders = uatomic_sub_return(&ct->holders, 1);
|
||
|
+ if (!holders)
|
||
|
cleanup_context(ct);
|
||
|
c->context = NULL;
|
||
|
}
|
||
|
@@ -220,26 +216,12 @@ static void cleanup_func(void *data)
|
||
|
{
|
||
|
int holders;
|
||
|
struct tur_checker_context *ct = data;
|
||
|
- pthread_spin_lock(&ct->hldr_lock);
|
||
|
- ct->holders--;
|
||
|
- holders = ct->holders;
|
||
|
- ct->thread = 0;
|
||
|
- pthread_spin_unlock(&ct->hldr_lock);
|
||
|
+
|
||
|
+ holders = uatomic_sub_return(&ct->holders, 1);
|
||
|
if (!holders)
|
||
|
cleanup_context(ct);
|
||
|
}
|
||
|
|
||
|
-static int tur_running(struct tur_checker_context *ct)
|
||
|
-{
|
||
|
- pthread_t thread;
|
||
|
-
|
||
|
- pthread_spin_lock(&ct->hldr_lock);
|
||
|
- thread = ct->thread;
|
||
|
- pthread_spin_unlock(&ct->hldr_lock);
|
||
|
-
|
||
|
- return thread != 0;
|
||
|
-}
|
||
|
-
|
||
|
static void copy_msg_to_tcc(void *ct_p, const char *msg)
|
||
|
{
|
||
|
struct tur_checker_context *ct = ct_p;
|
||
|
@@ -252,7 +234,7 @@ static void copy_msg_to_tcc(void *ct_p, const char *msg)
|
||
|
static void *tur_thread(void *ctx)
|
||
|
{
|
||
|
struct tur_checker_context *ct = ctx;
|
||
|
- int state;
|
||
|
+ int state, running;
|
||
|
char devt[32];
|
||
|
|
||
|
condlog(3, "%s: tur checker starting up",
|
||
|
@@ -278,6 +260,11 @@ static void *tur_thread(void *ctx)
|
||
|
|
||
|
condlog(3, "%s: tur checker finished, state %s",
|
||
|
tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
|
||
|
+
|
||
|
+ running = uatomic_xchg(&ct->running, 0);
|
||
|
+ if (!running)
|
||
|
+ pause();
|
||
|
+
|
||
|
tur_thread_cleanup_pop(ct);
|
||
|
|
||
|
return ((void *)0);
|
||
|
@@ -325,7 +312,6 @@ int libcheck_check(struct checker * c)
|
||
|
int tur_status, r;
|
||
|
char devt[32];
|
||
|
|
||
|
-
|
||
|
if (!ct)
|
||
|
return PATH_UNCHECKED;
|
||
|
|
||
|
@@ -349,38 +335,29 @@ int libcheck_check(struct checker * c)
|
||
|
return PATH_WILD;
|
||
|
}
|
||
|
|
||
|
- if (ct->running) {
|
||
|
- /*
|
||
|
- * Check if TUR checker is still running. Hold hldr_lock
|
||
|
- * around the pthread_cancel() call to avoid that
|
||
|
- * pthread_cancel() gets called after the (detached) TUR
|
||
|
- * thread has exited.
|
||
|
- */
|
||
|
- pthread_spin_lock(&ct->hldr_lock);
|
||
|
- if (ct->thread) {
|
||
|
- if (tur_check_async_timeout(c)) {
|
||
|
- condlog(3, "%s: tur checker timeout",
|
||
|
- tur_devt(devt, sizeof(devt), ct));
|
||
|
+ if (ct->thread) {
|
||
|
+ if (tur_check_async_timeout(c)) {
|
||
|
+ int running = uatomic_xchg(&ct->running, 0);
|
||
|
+ if (running)
|
||
|
pthread_cancel(ct->thread);
|
||
|
- ct->running = 0;
|
||
|
- MSG(c, MSG_TUR_TIMEOUT);
|
||
|
- tur_status = PATH_TIMEOUT;
|
||
|
- } else {
|
||
|
- condlog(3, "%s: tur checker not finished",
|
||
|
+ condlog(3, "%s: tur checker timeout",
|
||
|
+ tur_devt(devt, sizeof(devt), ct));
|
||
|
+ ct->thread = 0;
|
||
|
+ MSG(c, MSG_TUR_TIMEOUT);
|
||
|
+ tur_status = PATH_TIMEOUT;
|
||
|
+ } else if (uatomic_read(&ct->running) != 0) {
|
||
|
+ condlog(3, "%s: tur checker not finished",
|
||
|
tur_devt(devt, sizeof(devt), ct));
|
||
|
- ct->running++;
|
||
|
- tur_status = PATH_PENDING;
|
||
|
- }
|
||
|
+ tur_status = PATH_PENDING;
|
||
|
} else {
|
||
|
/* TUR checker done */
|
||
|
- ct->running = 0;
|
||
|
+ ct->thread = 0;
|
||
|
tur_status = ct->state;
|
||
|
strlcpy(c->message, ct->message, sizeof(c->message));
|
||
|
}
|
||
|
- pthread_spin_unlock(&ct->hldr_lock);
|
||
|
pthread_mutex_unlock(&ct->lock);
|
||
|
} else {
|
||
|
- if (tur_running(ct)) {
|
||
|
+ if (uatomic_read(&ct->running) != 0) {
|
||
|
/* pthread cancel failed. continue in sync mode */
|
||
|
pthread_mutex_unlock(&ct->lock);
|
||
|
condlog(3, "%s: tur thread not responding",
|
||
|
@@ -391,19 +368,17 @@ int libcheck_check(struct checker * c)
|
||
|
ct->state = PATH_UNCHECKED;
|
||
|
ct->fd = c->fd;
|
||
|
ct->timeout = c->timeout;
|
||
|
- pthread_spin_lock(&ct->hldr_lock);
|
||
|
- ct->holders++;
|
||
|
- pthread_spin_unlock(&ct->hldr_lock);
|
||
|
+ uatomic_add(&ct->holders, 1);
|
||
|
+ uatomic_set(&ct->running, 1);
|
||
|
tur_set_async_timeout(c);
|
||
|
setup_thread_attr(&attr, 32 * 1024, 1);
|
||
|
r = pthread_create(&ct->thread, &attr, tur_thread, ct);
|
||
|
pthread_attr_destroy(&attr);
|
||
|
if (r) {
|
||
|
- pthread_spin_lock(&ct->hldr_lock);
|
||
|
- ct->holders--;
|
||
|
- pthread_spin_unlock(&ct->hldr_lock);
|
||
|
- pthread_mutex_unlock(&ct->lock);
|
||
|
+ uatomic_sub(&ct->holders, 1);
|
||
|
+ uatomic_set(&ct->running, 0);
|
||
|
ct->thread = 0;
|
||
|
+ pthread_mutex_unlock(&ct->lock);
|
||
|
condlog(3, "%s: failed to start tur thread, using"
|
||
|
" sync mode", tur_devt(devt, sizeof(devt), ct));
|
||
|
return tur_check(c->fd, c->timeout,
|
||
|
@@ -414,12 +389,16 @@ int libcheck_check(struct checker * c)
|
||
|
tur_status = ct->state;
|
||
|
strlcpy(c->message, ct->message, sizeof(c->message));
|
||
|
pthread_mutex_unlock(&ct->lock);
|
||
|
- if (tur_running(ct) &&
|
||
|
+ if (uatomic_read(&ct->running) != 0 &&
|
||
|
(tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
|
||
|
condlog(3, "%s: tur checker still running",
|
||
|
tur_devt(devt, sizeof(devt), ct));
|
||
|
- ct->running = 1;
|
||
|
tur_status = PATH_PENDING;
|
||
|
+ } else {
|
||
|
+ int running = uatomic_xchg(&ct->running, 0);
|
||
|
+ if (running)
|
||
|
+ pthread_cancel(ct->thread);
|
||
|
+ ct->thread = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
--
|
||
|
2.7.4
|
||
|
|