From 993954fd8d7afc7aa19320029ad0a3962cc9a9c1 Mon Sep 17 00:00:00 2001 From: Rinku Kothiya Date: Wed, 19 Feb 2020 22:52:20 -0500 Subject: [PATCH] autobuild v6.0-30.1 Resolves: bz#1800703 Signed-off-by: Rinku Kothiya --- ...ore-fix-memory-pool-management-races.patch | 466 ++++++++++++++++++ ...Prevent-crash-on-process-termination.patch | 74 +++ glusterfs.spec | 7 +- 3 files changed, 546 insertions(+), 1 deletion(-) create mode 100644 0354-core-fix-memory-pool-management-races.patch create mode 100644 0355-core-Prevent-crash-on-process-termination.patch diff --git a/0354-core-fix-memory-pool-management-races.patch b/0354-core-fix-memory-pool-management-races.patch new file mode 100644 index 0000000..a7cdfc0 --- /dev/null +++ b/0354-core-fix-memory-pool-management-races.patch @@ -0,0 +1,466 @@ +From 75a9d946d252ce70460144615ca17dbdf2e80fab Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Fri, 7 Feb 2020 10:19:57 +0100 +Subject: [PATCH 354/355] core: fix memory pool management races + +Objects allocated from a per-thread memory pool keep a reference to it +to be able to return the object to the pool when not used anymore. The +object holding this reference can have a long life cycle that could +survive a glfs_fini() call. + +This means that it's unsafe to destroy memory pools from glfs_fini(). + +Another side effect of destroying memory pools from glfs_fini() is that +the TLS variable that points to one of those pools cannot be reset for +all alive threads. This means that any attempt to allocate memory from +those threads will access already free'd memory, which is very +dangerous. + +To fix these issues, mem_pools_fini() doesn't destroy pool lists +anymore. Only at process termination the pools are destroyed. + +Upatream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24099 +> Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 +> Fixes: bz#1801684 +> Signed-off-by: Xavi Hernandez + +Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 +BUG: 1800703 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/192262 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/globals.c | 13 ++- + libglusterfs/src/glusterfs/globals.h | 3 + + libglusterfs/src/glusterfs/mem-pool.h | 28 ++--- + libglusterfs/src/mem-pool.c | 201 ++++++++++++++++++---------------- + libglusterfs/src/syncop.c | 7 ++ + 5 files changed, 146 insertions(+), 106 deletions(-) + +diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c +index 02098e6..e433ee8 100644 +--- a/libglusterfs/src/globals.c ++++ b/libglusterfs/src/globals.c +@@ -319,7 +319,18 @@ glusterfs_cleanup(void *ptr) + GF_FREE(thread_syncopctx.groups); + } + +- mem_pool_thread_destructor(); ++ mem_pool_thread_destructor(NULL); ++} ++ ++void ++gf_thread_needs_cleanup(void) ++{ ++ /* The value stored in free_key TLS is not really used for anything, but ++ * pthread implementation doesn't call the TLS destruction function unless ++ * it's != NULL. This function must be called whenever something is ++ * allocated for this thread so that glusterfs_cleanup() will be called ++ * and resources can be released. */ ++ (void)pthread_setspecific(free_key, (void *)1); + } + + static void +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index e218285..31717ed 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -181,6 +181,9 @@ glusterfs_leaseid_exist(void); + int + glusterfs_globals_init(glusterfs_ctx_t *ctx); + ++void ++gf_thread_needs_cleanup(void); ++ + struct tvec_base * + glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx); + void +diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h +index be0a26d..97bf76c 100644 +--- a/libglusterfs/src/glusterfs/mem-pool.h ++++ b/libglusterfs/src/glusterfs/mem-pool.h +@@ -245,24 +245,26 @@ typedef struct per_thread_pool { + } per_thread_pool_t; + + typedef struct per_thread_pool_list { +- /* +- * These first two members are protected by the global pool lock. When +- * a thread first tries to use any pool, we create one of these. We +- * link it into the global list using thr_list so the pool-sweeper +- * thread can find it, and use pthread_setspecific so this thread can +- * find it. When the per-thread destructor runs, we "poison" the pool +- * list to prevent further allocations. This also signals to the +- * pool-sweeper thread that the list should be detached and freed after +- * the next time it's swept. +- */ ++ /* thr_list is used to place the TLS pool_list into the active global list ++ * (pool_threads) or the inactive global list (pool_free_threads). It's ++ * protected by the global pool_lock. */ + struct list_head thr_list; +- unsigned int poison; ++ ++ /* This lock is used to update poison and the hot/cold lists of members ++ * of 'pools' array. */ ++ pthread_spinlock_t lock; ++ ++ /* This field is used to mark a pool_list as not being owned by any thread. ++ * This means that the sweeper thread won't be cleaning objects stored in ++ * its pools. mem_put() uses it to decide if the object being released is ++ * placed into its original pool_list or directly destroyed. */ ++ bool poison; ++ + /* + * There's really more than one pool, but the actual number is hidden + * in the implementation code so we just make it a single-element array + * here. + */ +- pthread_spinlock_t lock; + per_thread_pool_t pools[1]; + } per_thread_pool_list_t; + +@@ -307,7 +309,7 @@ void + mem_pool_destroy(struct mem_pool *pool); + + void +-mem_pool_thread_destructor(void); ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list); + + void + gf_mem_acct_enable_set(void *ctx); +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index d88041d..2b41c01 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -367,7 +367,6 @@ static __thread per_thread_pool_list_t *thread_pool_list = NULL; + #define POOL_SWEEP_SECS 30 + + typedef struct { +- struct list_head death_row; + pooled_obj_hdr_t *cold_lists[N_COLD_LISTS]; + unsigned int n_cold_lists; + } sweep_state_t; +@@ -384,36 +383,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; + static unsigned int init_count = 0; + static pthread_t sweeper_tid; + +-gf_boolean_t ++static bool + collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list) + { + unsigned int i; + per_thread_pool_t *pt_pool; +- gf_boolean_t poisoned; + + (void)pthread_spin_lock(&pool_list->lock); + +- poisoned = pool_list->poison != 0; +- if (!poisoned) { +- for (i = 0; i < NPOOLS; ++i) { +- pt_pool = &pool_list->pools[i]; +- if (pt_pool->cold_list) { +- if (state->n_cold_lists >= N_COLD_LISTS) { +- break; +- } +- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; ++ for (i = 0; i < NPOOLS; ++i) { ++ pt_pool = &pool_list->pools[i]; ++ if (pt_pool->cold_list) { ++ if (state->n_cold_lists >= N_COLD_LISTS) { ++ (void)pthread_spin_unlock(&pool_list->lock); ++ return true; + } +- pt_pool->cold_list = pt_pool->hot_list; +- pt_pool->hot_list = NULL; ++ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; + } ++ pt_pool->cold_list = pt_pool->hot_list; ++ pt_pool->hot_list = NULL; + } + + (void)pthread_spin_unlock(&pool_list->lock); + +- return poisoned; ++ return false; + } + +-void ++static void + free_obj_list(pooled_obj_hdr_t *victim) + { + pooled_obj_hdr_t *next; +@@ -425,82 +421,96 @@ free_obj_list(pooled_obj_hdr_t *victim) + } + } + +-void * ++static void * + pool_sweeper(void *arg) + { + sweep_state_t state; + per_thread_pool_list_t *pool_list; +- per_thread_pool_list_t *next_pl; +- per_thread_pool_t *pt_pool; +- unsigned int i; +- gf_boolean_t poisoned; ++ uint32_t i; ++ bool pending; + + /* + * This is all a bit inelegant, but the point is to avoid doing + * expensive things (like freeing thousands of objects) while holding a +- * global lock. Thus, we split each iteration into three passes, with ++ * global lock. Thus, we split each iteration into two passes, with + * only the first and fastest holding the lock. + */ + ++ pending = true; ++ + for (;;) { +- sleep(POOL_SWEEP_SECS); ++ /* If we know there's pending work to do (or it's the first run), we ++ * do collect garbage more often. */ ++ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS); ++ + (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); +- INIT_LIST_HEAD(&state.death_row); + state.n_cold_lists = 0; ++ pending = false; + + /* First pass: collect stuff that needs our attention. */ + (void)pthread_mutex_lock(&pool_lock); +- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list) ++ list_for_each_entry(pool_list, &pool_threads, thr_list) + { +- (void)pthread_mutex_unlock(&pool_lock); +- poisoned = collect_garbage(&state, pool_list); +- (void)pthread_mutex_lock(&pool_lock); +- +- if (poisoned) { +- list_move(&pool_list->thr_list, &state.death_row); ++ if (collect_garbage(&state, pool_list)) { ++ pending = true; + } + } + (void)pthread_mutex_unlock(&pool_lock); + +- /* Second pass: free dead pools. */ +- (void)pthread_mutex_lock(&pool_free_lock); +- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list) +- { +- for (i = 0; i < NPOOLS; ++i) { +- pt_pool = &pool_list->pools[i]; +- free_obj_list(pt_pool->cold_list); +- free_obj_list(pt_pool->hot_list); +- pt_pool->hot_list = pt_pool->cold_list = NULL; +- } +- list_del(&pool_list->thr_list); +- list_add(&pool_list->thr_list, &pool_free_threads); +- } +- (void)pthread_mutex_unlock(&pool_free_lock); +- +- /* Third pass: free cold objects from live pools. */ ++ /* Second pass: free cold objects from live pools. */ + for (i = 0; i < state.n_cold_lists; ++i) { + free_obj_list(state.cold_lists[i]); + } + (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + } ++ ++ return NULL; + } + + void +-mem_pool_thread_destructor(void) ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) + { +- per_thread_pool_list_t *pool_list = thread_pool_list; +- +- /* The pool-sweeper thread will take it from here. +- * +- * We can change 'poison' here without taking locks because the change +- * itself doesn't interact with other parts of the code and a simple write +- * is already atomic from the point of view of the processor. +- * +- * This change can modify what mem_put() does, but both possibilities are +- * fine until the sweeper thread kicks in. The real synchronization must be +- * between mem_put() and the sweeper thread. */ ++ per_thread_pool_t *pt_pool; ++ uint32_t i; ++ ++ if (pool_list == NULL) { ++ pool_list = thread_pool_list; ++ } ++ ++ /* The current thread is terminating. None of the allocated objects will ++ * be used again. We can directly destroy them here instead of delaying ++ * it until the next sweeper loop. */ + if (pool_list != NULL) { +- pool_list->poison = 1; ++ /* Remove pool_list from the global list to avoid that sweeper ++ * could touch it. */ ++ pthread_mutex_lock(&pool_lock); ++ list_del(&pool_list->thr_list); ++ pthread_mutex_unlock(&pool_lock); ++ ++ /* We need to protect hot/cold changes from potential mem_put() calls ++ * that reference this pool_list. Once poison is set to true, we are ++ * sure that no one else will touch hot/cold lists. The only possible ++ * race is when at the same moment a mem_put() is adding a new item ++ * to the hot list. We protect from that by taking pool_list->lock. ++ * After that we don't need the lock to destroy the hot/cold lists. */ ++ pthread_spin_lock(&pool_list->lock); ++ pool_list->poison = true; ++ pthread_spin_unlock(&pool_list->lock); ++ ++ for (i = 0; i < NPOOLS; i++) { ++ pt_pool = &pool_list->pools[i]; ++ ++ free_obj_list(pt_pool->hot_list); ++ pt_pool->hot_list = NULL; ++ ++ free_obj_list(pt_pool->cold_list); ++ pt_pool->cold_list = NULL; ++ } ++ ++ pthread_mutex_lock(&pool_free_lock); ++ list_add(&pool_list->thr_list, &pool_free_threads); ++ pthread_mutex_unlock(&pool_free_lock); ++ + thread_pool_list = NULL; + } + } +@@ -528,6 +538,30 @@ mem_pools_preinit(void) + init_done = GF_MEMPOOL_INIT_EARLY; + } + ++static __attribute__((destructor)) void ++mem_pools_postfini(void) ++{ ++ per_thread_pool_list_t *pool_list, *next; ++ ++ /* This is part of a process shutdown (or dlclose()) which means that ++ * most probably all threads should be stopped. However this is not the ++ * case for gluster and there are even legitimate situations in which we ++ * could have some threads alive. What is sure is that none of those ++ * threads should be using anything from this library, so destroying ++ * everything here should be fine and safe. */ ++ ++ list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list) ++ { ++ mem_pool_thread_destructor(pool_list); ++ } ++ ++ list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list) ++ { ++ list_del(&pool_list->thr_list); ++ FREE(pool_list); ++ } ++} ++ + /* Call mem_pools_init() once threading has been configured completely. This + * prevent the pool_sweeper thread from getting killed once the main() thread + * exits during deamonizing. */ +@@ -560,10 +594,6 @@ mem_pools_fini(void) + */ + break; + case 1: { +- per_thread_pool_list_t *pool_list; +- per_thread_pool_list_t *next_pl; +- unsigned int i; +- + /* if mem_pools_init() was not called, sweeper_tid will be invalid + * and the functions will error out. That is not critical. In all + * other cases, the sweeper_tid will be valid and the thread gets +@@ -571,32 +601,11 @@ mem_pools_fini(void) + (void)pthread_cancel(sweeper_tid); + (void)pthread_join(sweeper_tid, NULL); + +- /* At this point all threads should have already terminated, so +- * it should be safe to destroy all pending per_thread_pool_list_t +- * structures that are stored for each thread. */ +- mem_pool_thread_destructor(); +- +- /* free all objects from all pools */ +- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, +- thr_list) +- { +- for (i = 0; i < NPOOLS; ++i) { +- free_obj_list(pool_list->pools[i].hot_list); +- free_obj_list(pool_list->pools[i].cold_list); +- pool_list->pools[i].hot_list = NULL; +- pool_list->pools[i].cold_list = NULL; +- } +- +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } +- +- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads, +- thr_list) +- { +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } ++ /* There could be threads still running in some cases, so we can't ++ * destroy pool_lists in use. We can also not destroy unused ++ * pool_lists because some allocated objects may still be pointing ++ * to them. */ ++ mem_pool_thread_destructor(NULL); + + init_done = GF_MEMPOOL_INIT_DESTROY; + /* Fall through. */ +@@ -617,7 +626,7 @@ mem_pools_fini(void) + { + } + void +-mem_pool_thread_destructor(void) ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) + { + } + +@@ -738,13 +747,21 @@ mem_get_pool_list(void) + } + } + ++ /* There's no need to take pool_list->lock, because this is already an ++ * atomic operation and we don't need to synchronize it with any change ++ * in hot/cold lists. */ ++ pool_list->poison = false; ++ + (void)pthread_mutex_lock(&pool_lock); +- pool_list->poison = 0; + list_add(&pool_list->thr_list, &pool_threads); + (void)pthread_mutex_unlock(&pool_lock); + + thread_pool_list = pool_list; + ++ /* Ensure that all memory objects associated to the new pool_list are ++ * destroyed when the thread terminates. */ ++ gf_thread_needs_cleanup(); ++ + return pool_list; + } + +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index 2eb7b49..0de53c6 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -97,6 +97,13 @@ syncopctx_setfsgroups(int count, const void *groups) + + /* set/reset the ngrps, this is where reset of groups is handled */ + opctx->ngrps = count; ++ ++ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) { ++ /* This is the first time we are storing groups into the TLS structure ++ * so we mark the current thread so that it will be properly cleaned ++ * up when the thread terminates. */ ++ gf_thread_needs_cleanup(); ++ } + opctx->valid |= SYNCOPCTX_GROUPS; + + out: +-- +1.8.3.1 + diff --git a/0355-core-Prevent-crash-on-process-termination.patch b/0355-core-Prevent-crash-on-process-termination.patch new file mode 100644 index 0000000..fca3f2c --- /dev/null +++ b/0355-core-Prevent-crash-on-process-termination.patch @@ -0,0 +1,74 @@ +From 10f1730073b9fb02d2ed7f7de855afd6df0e5202 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Wed, 19 Feb 2020 12:24:15 +0100 +Subject: [PATCH 355/355] core: Prevent crash on process termination + +A previous patch (ce61da816a) has fixed a use-after-free issue, +but it doesn't work well when the final cleanup is done at process +termination because gluster doesn't stop other threads before +calling exit(). + +For this reason, the final cleanup is removed to avoid the crash, +at least until the termination sequence properly stops all gluster +threads before exiting the program. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24138 +> Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b +> Fixes: bz#1801684 +> Signed-off-by: Xavi Hernandez + +Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b +BUG: 1800703 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/192344 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/mem-pool.c | 30 +++++++++++------------------- + 1 file changed, 11 insertions(+), 19 deletions(-) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 2b41c01..73503e0 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -541,25 +541,17 @@ mem_pools_preinit(void) + static __attribute__((destructor)) void + mem_pools_postfini(void) + { +- per_thread_pool_list_t *pool_list, *next; +- +- /* This is part of a process shutdown (or dlclose()) which means that +- * most probably all threads should be stopped. However this is not the +- * case for gluster and there are even legitimate situations in which we +- * could have some threads alive. What is sure is that none of those +- * threads should be using anything from this library, so destroying +- * everything here should be fine and safe. */ +- +- list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list) +- { +- mem_pool_thread_destructor(pool_list); +- } +- +- list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list) +- { +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } ++ /* TODO: This function should destroy all per thread memory pools that ++ * are still alive, but this is not possible right now because glibc ++ * starts calling destructors as soon as exit() is called, and ++ * gluster doesn't ensure that all threads have been stopped before ++ * calling exit(). Existing threads would crash when they try to use ++ * memory or they terminate if we destroy things here. ++ * ++ * When we propertly terminate all threads, we can add the needed ++ * code here. Till then we need to leave the memory allocated. Most ++ * probably this function will be executed on process termination, ++ * so the memory will be released anyway by the system. */ + } + + /* Call mem_pools_init() once threading has been configured completely. This +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index 71f08ab..691a98b 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -231,7 +231,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 30%{?dist} +Release: 30.1%{?dist} ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ @@ -662,6 +662,8 @@ Patch0350: 0350-tools-glusterfind-Remove-an-extra-argument.patch Patch0351: 0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch Patch0352: 0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch Patch0353: 0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch +Patch0354: 0354-core-fix-memory-pool-management-races.patch +Patch0355: 0355-core-Prevent-crash-on-process-termination.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -2391,6 +2393,9 @@ fi %endif %changelog +* Thu Feb 20 2020 Rinku Kothiya - 6.0-30.1 +- fixes bugs bz#1800703 + * Sat Feb 01 2020 Rinku Kothiya - 6.0-30 - fixes bugs bz#1775564 bz#1794153