1f2f23ddef
Resolves: bz#1350745 bz#1362129 bz#1541568 bz#1597252 bz#1599220 Resolves: bz#1633177 bz#1637564 bz#1639476 bz#1639568 bz#1643370 Resolves: bz#1645480 bz#1648296 bz#1648893 bz#1651040 bz#1651460 Resolves: bz#1652466 bz#1652537 bz#1653224 bz#1653613 bz#1654103 Resolves: bz#1654161 bz#1655385 bz#1655578 bz#1656357 bz#1659439 Signed-off-by: Milind Changire <mchangir@redhat.com>
290 lines
11 KiB
Diff
290 lines
11 KiB
Diff
From a5471a84069631ab0d0605cf7b68f16285f5079f Mon Sep 17 00:00:00 2001
|
|
From: Xavi Hernandez <xhernandez@redhat.com>
|
|
Date: Fri, 14 Dec 2018 11:26:36 +0100
|
|
Subject: [PATCH 478/493] libglusterfs: fix memory corruption caused by
|
|
per-thread mem pools
|
|
|
|
There was a race in the per-thread memory pool management that could lead
|
|
to memory corruption. The race appeared when the following sequence of
|
|
events happened:
|
|
|
|
1. Thread T1 allocated a memory object O1 from its own private pool P1
|
|
2. T1 terminates and P1 is marked to be destroyed
|
|
3. The mem-sweeper thread is woken up and scans all private pools
|
|
4. It detects that P1 needs to be destroyed and starts releasing the
|
|
objects from hot and cold lists.
|
|
5. Thread T2 releases O1
|
|
6. O1 is added to the hot list of P1
|
|
|
|
The problem happens because steps 4 and 6 are protected by diferent locks,
|
|
so they can run concurrently. This means that both T1 and T2 are modifying
|
|
the same list at the same time, potentially causing corruption.
|
|
|
|
This patch fixes the problem using the following approach:
|
|
|
|
1. When an object is released, it's only returned to the hot list of the
|
|
corresponding memory pool if it's not marked to be destroyed. Otherwise
|
|
the memory is released to the system.
|
|
2. Object release and mem-sweeper thread synchronize access to the deletion
|
|
mark of the memory pool to prevent simultaneous access to the list.
|
|
|
|
Some other minor adjustments are made to reduce the lengths of the locked
|
|
regions.
|
|
|
|
This patch is not 100% identical to upstream version because changes
|
|
coming from https://github.com/gluster/glusterfs/issues/307 are not
|
|
backported.
|
|
|
|
Upstream patch: https://review.gluster.org/c/glusterfs/+/21583
|
|
> Fixes: bz#1651165
|
|
> Change-Id: I63be3893f92096e57f54a6150e0461340084ddde
|
|
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
|
Upstream patch: https://review.gluster.org/c/glusterfs/+/21727
|
|
> Change-Id: Idbf23bda7f9228d60c644a1bea4b6c2cfc582090
|
|
> updates: bz#1193929
|
|
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
|
Change-Id: I63be3893f92096e57f54a6150e0461340084ddde
|
|
BUG: 1647499
|
|
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/158658
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
---
|
|
libglusterfs/src/mem-pool.c | 137 ++++++++++++++++++++++++++------------------
|
|
1 file changed, 81 insertions(+), 56 deletions(-)
|
|
|
|
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
|
|
index ba29137..8ff261c 100644
|
|
--- a/libglusterfs/src/mem-pool.c
|
|
+++ b/libglusterfs/src/mem-pool.c
|
|
@@ -411,37 +411,34 @@ static unsigned int init_count = 0;
|
|
static pthread_t sweeper_tid;
|
|
|
|
|
|
-void
|
|
+gf_boolean_t
|
|
collect_garbage (sweep_state_t *state, per_thread_pool_list_t *pool_list)
|
|
{
|
|
unsigned int i;
|
|
per_thread_pool_t *pt_pool;
|
|
-
|
|
- if (pool_list->poison) {
|
|
- list_del (&pool_list->thr_list);
|
|
- list_add (&pool_list->thr_list, &state->death_row);
|
|
- return;
|
|
- }
|
|
-
|
|
- if (state->n_cold_lists >= N_COLD_LISTS) {
|
|
- return;
|
|
- }
|
|
+ gf_boolean_t poisoned;
|
|
|
|
(void) pthread_spin_lock (&pool_list->lock);
|
|
- for (i = 0; i < NPOOLS; ++i) {
|
|
- pt_pool = &pool_list->pools[i];
|
|
- if (pt_pool->cold_list) {
|
|
- state->cold_lists[state->n_cold_lists++]
|
|
- = pt_pool->cold_list;
|
|
- }
|
|
- pt_pool->cold_list = pt_pool->hot_list;
|
|
- pt_pool->hot_list = NULL;
|
|
- if (state->n_cold_lists >= N_COLD_LISTS) {
|
|
- /* We'll just catch up on a future pass. */
|
|
- break;
|
|
+
|
|
+ poisoned = pool_list->poison != 0;
|
|
+ if (!poisoned) {
|
|
+ for (i = 0; i < NPOOLS; ++i) {
|
|
+ pt_pool = &pool_list->pools[i];
|
|
+ if (pt_pool->cold_list) {
|
|
+ if (state->n_cold_lists >= N_COLD_LISTS) {
|
|
+ break;
|
|
+ }
|
|
+ state->cold_lists[state->n_cold_lists++]
|
|
+ = pt_pool->cold_list;
|
|
+ }
|
|
+ pt_pool->cold_list = pt_pool->hot_list;
|
|
+ pt_pool->hot_list = NULL;
|
|
}
|
|
}
|
|
+
|
|
(void) pthread_spin_unlock (&pool_list->lock);
|
|
+
|
|
+ return poisoned;
|
|
}
|
|
|
|
|
|
@@ -469,6 +466,7 @@ pool_sweeper (void *arg)
|
|
struct timeval begin_time;
|
|
struct timeval end_time;
|
|
struct timeval elapsed;
|
|
+ gf_boolean_t poisoned;
|
|
|
|
/*
|
|
* This is all a bit inelegant, but the point is to avoid doing
|
|
@@ -488,7 +486,13 @@ pool_sweeper (void *arg)
|
|
(void) pthread_mutex_lock (&pool_lock);
|
|
list_for_each_entry_safe (pool_list, next_pl,
|
|
&pool_threads, thr_list) {
|
|
- collect_garbage (&state, pool_list);
|
|
+ (void) pthread_mutex_unlock (&pool_lock);
|
|
+ poisoned = collect_garbage (&state, pool_list);
|
|
+ (void) pthread_mutex_lock (&pool_lock);
|
|
+ if (poisoned) {
|
|
+ list_move(&pool_list->thr_list,
|
|
+ &state.death_row);
|
|
+ }
|
|
}
|
|
(void) pthread_mutex_unlock (&pool_lock);
|
|
(void) gettimeofday (&end_time, NULL);
|
|
@@ -525,7 +529,15 @@ pool_destructor (void *arg)
|
|
{
|
|
per_thread_pool_list_t *pool_list = arg;
|
|
|
|
- /* The pool-sweeper thread will take it from here. */
|
|
+ /* The pool-sweeper thread will take it from here.
|
|
+ *
|
|
+ * We can change 'poison' here without taking locks because the change
|
|
+ * itself doesn't interact with other parts of the code and a simple
|
|
+ * write is already atomic from the point of view of the processor.
|
|
+ *
|
|
+ * This change can modify what mem_put() does, but both possibilities
|
|
+ * are fine until the sweeper thread kicks in. The real synchronization
|
|
+ * must be between mem_put() and the sweeper thread. */
|
|
pool_list->poison = 1;
|
|
}
|
|
|
|
@@ -736,7 +748,7 @@ mem_get_pool_list (void)
|
|
(void) pthread_mutex_unlock (&pool_free_lock);
|
|
|
|
if (!pool_list) {
|
|
- pool_list = CALLOC (pool_list_size, 1);
|
|
+ pool_list = MALLOC (pool_list_size);
|
|
if (!pool_list) {
|
|
return NULL;
|
|
}
|
|
@@ -761,26 +773,47 @@ mem_get_pool_list (void)
|
|
}
|
|
|
|
pooled_obj_hdr_t *
|
|
-mem_get_from_pool (per_thread_pool_t *pt_pool)
|
|
+mem_get_from_pool (struct mem_pool *mem_pool)
|
|
{
|
|
+ per_thread_pool_list_t *pool_list;
|
|
+ per_thread_pool_t *pt_pool;
|
|
pooled_obj_hdr_t *retval;
|
|
|
|
+ pool_list = mem_get_pool_list ();
|
|
+ if (!pool_list || pool_list->poison) {
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST];
|
|
+
|
|
+ (void) pthread_spin_lock (&pool_list->lock);
|
|
+
|
|
retval = pt_pool->hot_list;
|
|
if (retval) {
|
|
- GF_ATOMIC_INC (pt_pool->parent->allocs_hot);
|
|
pt_pool->hot_list = retval->next;
|
|
- return retval;
|
|
+ (void) pthread_spin_unlock (&pool_list->lock);
|
|
+ GF_ATOMIC_INC (pt_pool->parent->allocs_hot);
|
|
+ } else {
|
|
+ retval = pt_pool->cold_list;
|
|
+ if (retval) {
|
|
+ pt_pool->cold_list = retval->next;
|
|
+ (void) pthread_spin_unlock (&pool_list->lock);
|
|
+ GF_ATOMIC_INC (pt_pool->parent->allocs_cold);
|
|
+ } else {
|
|
+ (void) pthread_spin_unlock (&pool_list->lock);
|
|
+ GF_ATOMIC_INC (pt_pool->parent->allocs_stdc);
|
|
+ retval = malloc (1 << mem_pool->power_of_two);
|
|
+ }
|
|
}
|
|
|
|
- retval = pt_pool->cold_list;
|
|
if (retval) {
|
|
- GF_ATOMIC_INC (pt_pool->parent->allocs_cold);
|
|
- pt_pool->cold_list = retval->next;
|
|
- return retval;
|
|
+ retval->magic = GF_MEM_HEADER_MAGIC;
|
|
+ retval->next = NULL;
|
|
+ retval->pool_list = pool_list;
|
|
+ retval->power_of_two = mem_pool->power_of_two;
|
|
}
|
|
|
|
- GF_ATOMIC_INC (pt_pool->parent->allocs_stdc);
|
|
- return malloc (1 << pt_pool->parent->power_of_two);
|
|
+ return retval;
|
|
}
|
|
|
|
|
|
@@ -791,8 +824,6 @@ mem_get (struct mem_pool *mem_pool)
|
|
return GF_CALLOC (1, AVAILABLE_SIZE (mem_pool->power_of_two),
|
|
gf_common_mt_mem_pool);
|
|
#else
|
|
- per_thread_pool_list_t *pool_list;
|
|
- per_thread_pool_t *pt_pool;
|
|
pooled_obj_hdr_t *retval;
|
|
|
|
if (!mem_pool) {
|
|
@@ -801,25 +832,11 @@ mem_get (struct mem_pool *mem_pool)
|
|
return NULL;
|
|
}
|
|
|
|
- pool_list = mem_get_pool_list ();
|
|
- if (!pool_list || pool_list->poison) {
|
|
- return NULL;
|
|
- }
|
|
-
|
|
- (void) pthread_spin_lock (&pool_list->lock);
|
|
- pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST];
|
|
- retval = mem_get_from_pool (pt_pool);
|
|
- (void) pthread_spin_unlock (&pool_list->lock);
|
|
-
|
|
+ retval = mem_get_from_pool (mem_pool);
|
|
if (!retval) {
|
|
return NULL;
|
|
}
|
|
|
|
- retval->magic = GF_MEM_HEADER_MAGIC;
|
|
- retval->next = NULL;
|
|
- retval->pool_list = pool_list;;
|
|
- retval->power_of_two = mem_pool->power_of_two;
|
|
-
|
|
return retval + 1;
|
|
#endif /* GF_DISABLE_MEMPOOL */
|
|
}
|
|
@@ -849,12 +866,20 @@ mem_put (void *ptr)
|
|
pool_list = hdr->pool_list;
|
|
pt_pool = &pool_list->pools[hdr->power_of_two-POOL_SMALLEST];
|
|
|
|
- (void) pthread_spin_lock (&pool_list->lock);
|
|
hdr->magic = GF_MEM_INVALID_MAGIC;
|
|
- hdr->next = pt_pool->hot_list;
|
|
- pt_pool->hot_list = hdr;
|
|
- GF_ATOMIC_INC (pt_pool->parent->frees_to_list);
|
|
- (void) pthread_spin_unlock (&pool_list->lock);
|
|
+
|
|
+ (void) pthread_spin_lock (&pool_list->lock);
|
|
+ if (!pool_list->poison) {
|
|
+ hdr->next = pt_pool->hot_list;
|
|
+ pt_pool->hot_list = hdr;
|
|
+ (void) pthread_spin_unlock (&pool_list->lock);
|
|
+ GF_ATOMIC_INC (pt_pool->parent->frees_to_list);
|
|
+ } else {
|
|
+ (void) pthread_spin_unlock (&pool_list->lock);
|
|
+ /* If the owner thread of this element has terminated, we
|
|
+ * simply release its memory. */
|
|
+ free(hdr);
|
|
+ }
|
|
#endif /* GF_DISABLE_MEMPOOL */
|
|
}
|
|
|
|
--
|
|
1.8.3.1
|
|
|