From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 14 Jan 2021 20:20:23 -0600 Subject: [PATCH] multipathd: avoid io_err_stat crash during shutdown The checker thread is reponsible for enqueueing paths for the io_err_stat thread to check. During shutdown, the io_err_stat thread is shut down and cleaned up before the checker thread. There is no code to make sure that the checker thread isn't accessing the io_err_stat pathvec or its mutex while they are being freed, which can lead to memory corruption crashes. To solve this, get rid of the io_err_stat_pathvec structure, and statically define the mutex. This means that the mutex is always valid to access, and the io_err_stat pathvec can only be accessed while holding it. If the io_err_stat thread has already been cleaned up when the checker tries to access the pathvec, it will be NULL, and the checker will simply fail to enqueue the path. This change also fixes a bug in free_io_err_pathvec(), which previously only attempted to free the pathvec if it was not set, instead of when it was set. Signed-off-by: Benjamin Marzinski Reviewed-by: Martin Wilck --- libmultipath/io_err_stat.c | 112 +++++++++++++++---------------------- libmultipath/util.c | 5 ++ libmultipath/util.h | 1 + 3 files changed, 50 insertions(+), 68 deletions(-) diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c index 449760a0..f6c564f0 100644 --- a/libmultipath/io_err_stat.c +++ b/libmultipath/io_err_stat.c @@ -34,6 +34,7 @@ #include "lock.h" #include "time-util.h" #include "io_err_stat.h" +#include "util.h" #define IOTIMEOUT_SEC 60 #define TIMEOUT_NO_IO_NSEC 10000000 /*10ms = 10000000ns*/ @@ -46,12 +47,6 @@ #define io_err_stat_log(prio, fmt, args...) \ condlog(prio, "io error statistic: " fmt, ##args) - -struct io_err_stat_pathvec { - pthread_mutex_t mutex; - vector pathvec; -}; - struct dio_ctx { struct timespec io_starttime; unsigned int blksize; @@ -76,9 +71,10 @@ pthread_attr_t io_err_stat_attr; static pthread_mutex_t io_err_thread_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t io_err_thread_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t io_err_pathvec_lock = PTHREAD_MUTEX_INITIALIZER; static int io_err_thread_running = 0; -static struct io_err_stat_pathvec *paths; +static vector io_err_pathvec; struct vectors *vecs; io_context_t ioctx; @@ -208,46 +204,23 @@ static void free_io_err_stat_path(struct io_err_stat_path *p) FREE(p); } -static struct io_err_stat_pathvec *alloc_pathvec(void) -{ - struct io_err_stat_pathvec *p; - int r; - - p = (struct io_err_stat_pathvec *)MALLOC(sizeof(*p)); - if (!p) - return NULL; - p->pathvec = vector_alloc(); - if (!p->pathvec) - goto out_free_struct_pathvec; - r = pthread_mutex_init(&p->mutex, NULL); - if (r) - goto out_free_member_pathvec; - - return p; - -out_free_member_pathvec: - vector_free(p->pathvec); -out_free_struct_pathvec: - FREE(p); - return NULL; -} - -static void free_io_err_pathvec(struct io_err_stat_pathvec *p) +static void free_io_err_pathvec(void) { struct io_err_stat_path *path; int i; - if (!p) - return; - pthread_mutex_destroy(&p->mutex); - if (!p->pathvec) { - vector_foreach_slot(p->pathvec, path, i) { - destroy_directio_ctx(path); - free_io_err_stat_path(path); - } - vector_free(p->pathvec); + pthread_mutex_lock(&io_err_pathvec_lock); + pthread_cleanup_push(cleanup_mutex, &io_err_pathvec_lock); + if (!io_err_pathvec) + goto out; + vector_foreach_slot(io_err_pathvec, path, i) { + destroy_directio_ctx(path); + free_io_err_stat_path(path); } - FREE(p); + vector_free(io_err_pathvec); + io_err_pathvec = NULL; +out: + pthread_cleanup_pop(1); } /* @@ -259,13 +232,13 @@ static int enqueue_io_err_stat_by_path(struct path *path) { struct io_err_stat_path *p; - pthread_mutex_lock(&paths->mutex); - p = find_err_path_by_dev(paths->pathvec, path->dev); + pthread_mutex_lock(&io_err_pathvec_lock); + p = find_err_path_by_dev(io_err_pathvec, path->dev); if (p) { - pthread_mutex_unlock(&paths->mutex); + pthread_mutex_unlock(&io_err_pathvec_lock); return 0; } - pthread_mutex_unlock(&paths->mutex); + pthread_mutex_unlock(&io_err_pathvec_lock); p = alloc_io_err_stat_path(); if (!p) @@ -277,18 +250,18 @@ static int enqueue_io_err_stat_by_path(struct path *path) if (setup_directio_ctx(p)) goto free_ioerr_path; - pthread_mutex_lock(&paths->mutex); - if (!vector_alloc_slot(paths->pathvec)) + pthread_mutex_lock(&io_err_pathvec_lock); + if (!vector_alloc_slot(io_err_pathvec)) goto unlock_destroy; - vector_set_slot(paths->pathvec, p); - pthread_mutex_unlock(&paths->mutex); + vector_set_slot(io_err_pathvec, p); + pthread_mutex_unlock(&io_err_pathvec_lock); io_err_stat_log(2, "%s: enqueue path %s to check", path->mpp->alias, path->dev); return 0; unlock_destroy: - pthread_mutex_unlock(&paths->mutex); + pthread_mutex_unlock(&io_err_pathvec_lock); destroy_directio_ctx(p); free_ioerr_path: free_io_err_stat_path(p); @@ -421,9 +394,9 @@ static int delete_io_err_stat_by_addr(struct io_err_stat_path *p) { int i; - i = find_slot(paths->pathvec, p); + i = find_slot(io_err_pathvec, p); if (i != -1) - vector_del_slot(paths->pathvec, i); + vector_del_slot(io_err_pathvec, i); destroy_directio_ctx(p); free_io_err_stat_path(p); @@ -594,7 +567,7 @@ static void poll_async_io_timeout(void) if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0) return; - vector_foreach_slot(paths->pathvec, pp, i) { + vector_foreach_slot(io_err_pathvec, pp, i) { for (j = 0; j < CONCUR_NR_EVENT; j++) { rc = try_to_cancel_timeout_io(pp->dio_ctx_array + j, &curr_time, pp->devname); @@ -640,7 +613,7 @@ static void handle_async_io_done_event(struct io_event *io_evt) int rc = PATH_UNCHECKED; int i, j; - vector_foreach_slot(paths->pathvec, pp, i) { + vector_foreach_slot(io_err_pathvec, pp, i) { for (j = 0; j < CONCUR_NR_EVENT; j++) { ct = pp->dio_ctx_array + j; if (&ct->io == io_evt->obj) { @@ -674,19 +647,14 @@ static void service_paths(void) struct io_err_stat_path *pp; int i; - pthread_mutex_lock(&paths->mutex); - vector_foreach_slot(paths->pathvec, pp, i) { + pthread_mutex_lock(&io_err_pathvec_lock); + vector_foreach_slot(io_err_pathvec, pp, i) { send_batch_async_ios(pp); process_async_ios_event(TIMEOUT_NO_IO_NSEC, pp->devname); poll_async_io_timeout(); poll_io_err_stat(vecs, pp); } - pthread_mutex_unlock(&paths->mutex); -} - -static void cleanup_unlock(void *arg) -{ - pthread_mutex_unlock((pthread_mutex_t*) arg); + pthread_mutex_unlock(&io_err_pathvec_lock); } static void cleanup_exited(__attribute__((unused)) void *arg) @@ -744,12 +712,17 @@ int start_io_err_stat_thread(void *data) io_err_stat_log(4, "io_setup failed"); return 1; } - paths = alloc_pathvec(); - if (!paths) + + pthread_mutex_lock(&io_err_pathvec_lock); + io_err_pathvec = vector_alloc(); + if (!io_err_pathvec) { + pthread_mutex_unlock(&io_err_pathvec_lock); goto destroy_ctx; + } + pthread_mutex_unlock(&io_err_pathvec_lock); pthread_mutex_lock(&io_err_thread_lock); - pthread_cleanup_push(cleanup_unlock, &io_err_thread_lock); + pthread_cleanup_push(cleanup_mutex, &io_err_thread_lock); ret = pthread_create(&io_err_stat_thr, &io_err_stat_attr, io_err_stat_loop, data); @@ -769,7 +742,10 @@ int start_io_err_stat_thread(void *data) return 0; out_free: - free_io_err_pathvec(paths); + pthread_mutex_lock(&io_err_pathvec_lock); + vector_free(io_err_pathvec); + io_err_pathvec = NULL; + pthread_mutex_unlock(&io_err_pathvec_lock); destroy_ctx: io_destroy(ioctx); io_err_stat_log(0, "failed to start io_error statistic thread"); @@ -785,6 +761,6 @@ void stop_io_err_stat_thread(void) pthread_cancel(io_err_stat_thr); pthread_join(io_err_stat_thr, NULL); - free_io_err_pathvec(paths); + free_io_err_pathvec(); io_destroy(ioctx); } diff --git a/libmultipath/util.c b/libmultipath/util.c index 51c38c87..dd30a46e 100644 --- a/libmultipath/util.c +++ b/libmultipath/util.c @@ -469,3 +469,8 @@ void close_fd(void *arg) { close((long)arg); } + +void cleanup_mutex(void *arg) +{ + pthread_mutex_unlock(arg); +} diff --git a/libmultipath/util.h b/libmultipath/util.h index 56bd78c7..ce277680 100644 --- a/libmultipath/util.h +++ b/libmultipath/util.h @@ -44,6 +44,7 @@ void set_max_fds(rlim_t max_fds); pthread_cleanup_push(((void (*)(void *))&f), (arg)) void close_fd(void *arg); +void cleanup_mutex(void *arg); struct scandir_result { struct dirent **di; -- 2.17.2