diff --git a/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch b/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch new file mode 100644 index 0000000..b295fc2 --- /dev/null +++ b/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch @@ -0,0 +1,70 @@ +From 341d75642ecc4e27bc6fecb56eb98a0ba03d8544 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Mon, 23 Mar 2020 11:47:10 +0530 +Subject: [PATCH 376/379] features/shard: Fix crash during shards cleanup in + error cases + +Backport of: +> https://review.gluster.org/c/glusterfs/+/24244 +> Change-Id: I0b49f2b58becd0d8874b3d4b14ff8d92a89d02d5 +> Fixes: #1127 +> Signed-off-by: Krutika Dhananjay + +A crash is seen during a reattempt to clean up shards in background +upon remount. And this happens even on remount (which means a remount +is no workaround for the crash). + +In such a situation, the in-memory base inode object will not be +existent (new process, non-existent base shard). +So local->resolver_base_inode will be NULL. + +In the event of an error (in this case, of space running out), the +process would crash at the time of logging the error in the following line - + + gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, + "failed to delete shards of %s", + uuid_utoa(local->resolver_base_inode->gfid)); + +Fixed that by using local->base_gfid as the source of gfid when +local->resolver_base_inode is NULL. + +Change-Id: I0b49f2b58becd0d8874b3d4b14ff8d92a89d02d5 +BUG: 1836233 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/200689 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/shard/src/shard.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 9ed597b..ee38ed2 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -2729,13 +2729,20 @@ int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); + int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; + + local = frame->local; + ++ if (local->resolver_base_inode) ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { + gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, +- "failed to delete shards of %s", +- uuid_utoa(local->resolver_base_inode->gfid)); ++ "failed to delete shards of %s", uuid_utoa(gfid)); + return 0; + } + local->op_ret = 0; +-- +1.8.3.1 + diff --git a/0377-syncop-improve-scaling-and-implement-more-tools.patch b/0377-syncop-improve-scaling-and-implement-more-tools.patch new file mode 100644 index 0000000..66cccc3 --- /dev/null +++ b/0377-syncop-improve-scaling-and-implement-more-tools.patch @@ -0,0 +1,862 @@ +From 66600fb55522d405a68d7340a5680a2633c4237e Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Thu, 30 Apr 2020 11:19:01 +0200 +Subject: [PATCH 377/379] syncop: improve scaling and implement more tools + +The current scaling of the syncop thread pool is not working properly +and can leave some tasks in the run queue more time than necessary +when the maximum number of threads is not reached. + +This patch provides a better scaling condition to react faster to +pending work. + +Condition variables and sleep in the context of a synctask have also +been implemented. Their purpose is to replace regular condition +variables and sleeps that block synctask threads and prevent other +tasks to be executed. + +The new features have been applied to several places in glusterd. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/24396/ + +> Change-Id: Ic50b7c73c104f9e41f08101a357d30b95efccfbf +> Fixes: #1116 +> Signed-off-by: Xavi Hernandez + +Change-Id: Ic50b7c73c104f9e41f08101a357d30b95efccfbf +BUG: 1810516 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/200409 +Tested-by: Sanju Rakonde +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez Juan +--- + libglusterfs/src/glusterfs/syncop.h | 52 +++- + libglusterfs/src/libglusterfs.sym | 7 + + libglusterfs/src/syncop.c | 306 ++++++++++++++++----- + xlators/cluster/dht/src/dht-rebalance.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 9 +- + xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 2 +- + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 5 +- + xlators/mgmt/glusterd/src/glusterd-syncop.h | 2 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 29 +- + xlators/mgmt/glusterd/src/glusterd.c | 2 + + xlators/mgmt/glusterd/src/glusterd.h | 2 + + 11 files changed, 317 insertions(+), 101 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h +index e0f1017..3011b4c 100644 +--- a/libglusterfs/src/glusterfs/syncop.h ++++ b/libglusterfs/src/glusterfs/syncop.h +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include "glusterfs/timer.h" + + #define SYNCENV_PROC_MAX 16 + #define SYNCENV_PROC_MIN 2 +@@ -32,6 +33,7 @@ + struct synctask; + struct syncproc; + struct syncenv; ++struct synccond; + + typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque); + +@@ -55,9 +57,12 @@ struct synctask { + call_frame_t *opframe; + synctask_cbk_t synccbk; + synctask_fn_t syncfn; +- synctask_state_t state; ++ struct timespec *delta; ++ gf_timer_t *timer; ++ struct synccond *synccond; + void *opaque; + void *stack; ++ synctask_state_t state; + int woken; + int slept; + int ret; +@@ -85,19 +90,21 @@ struct syncproc { + /* hosts the scheduler thread and framework for executing synctasks */ + struct syncenv { + struct syncproc proc[SYNCENV_PROC_MAX]; +- int procs; ++ ++ pthread_mutex_t mutex; ++ pthread_cond_t cond; + + struct list_head runq; +- int runcount; + struct list_head waitq; +- int waitcount; ++ ++ int procs; ++ int procs_idle; ++ ++ int runcount; + + int procmin; + int procmax; + +- pthread_mutex_t mutex; +- pthread_cond_t cond; +- + size_t stacksize; + + int destroy; /* FLAG to mark syncenv is in destroy mode +@@ -123,6 +130,13 @@ struct synclock { + }; + typedef struct synclock synclock_t; + ++struct synccond { ++ pthread_mutex_t pmutex; ++ pthread_cond_t pcond; ++ struct list_head waitq; ++}; ++typedef struct synccond synccond_t; ++ + struct syncbarrier { + gf_boolean_t initialized; /*Set on successful initialization*/ + pthread_mutex_t guard; /* guard the remaining members, pair @cond */ +@@ -219,7 +233,7 @@ struct syncopctx { + #define __yield(args) \ + do { \ + if (args->task) { \ +- synctask_yield(args->task); \ ++ synctask_yield(args->task, NULL); \ + } else { \ + pthread_mutex_lock(&args->mutex); \ + { \ +@@ -307,7 +321,9 @@ synctask_join(struct synctask *task); + void + synctask_wake(struct synctask *task); + void +-synctask_yield(struct synctask *task); ++synctask_yield(struct synctask *task, struct timespec *delta); ++void ++synctask_sleep(int32_t secs); + void + synctask_waitfor(struct synctask *task, int count); + +@@ -405,6 +421,24 @@ synclock_trylock(synclock_t *lock); + int + synclock_unlock(synclock_t *lock); + ++int32_t ++synccond_init(synccond_t *cond); ++ ++void ++synccond_destroy(synccond_t *cond); ++ ++int ++synccond_wait(synccond_t *cond, synclock_t *lock); ++ ++int ++synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta); ++ ++void ++synccond_signal(synccond_t *cond); ++ ++void ++synccond_broadcast(synccond_t *cond); ++ + int + syncbarrier_init(syncbarrier_t *barrier); + int +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index 467a1b7..5a721e0 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -938,6 +938,12 @@ syncbarrier_destroy + syncbarrier_init + syncbarrier_wait + syncbarrier_wake ++synccond_init ++synccond_destroy ++synccond_wait ++synccond_timedwait ++synccond_signal ++synccond_broadcast + syncenv_destroy + syncenv_new + synclock_destroy +@@ -1015,6 +1021,7 @@ synctask_new + synctask_new1 + synctask_set + synctask_setid ++synctask_sleep + synctask_wake + synctask_yield + sys_access +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index 693970f..71d37b7 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -154,10 +154,14 @@ out: + return ret; + } + ++void * ++syncenv_processor(void *thdata); ++ + static void + __run(struct synctask *task) + { + struct syncenv *env = NULL; ++ int32_t total, ret, i; + + env = task->env; + +@@ -173,7 +177,6 @@ __run(struct synctask *task) + env->runcount--; + break; + case SYNCTASK_WAIT: +- env->waitcount--; + break; + case SYNCTASK_DONE: + gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK, +@@ -187,8 +190,27 @@ __run(struct synctask *task) + } + + list_add_tail(&task->all_tasks, &env->runq); +- env->runcount++; + task->state = SYNCTASK_RUN; ++ ++ env->runcount++; ++ ++ total = env->procs + env->runcount - env->procs_idle; ++ if (total > env->procmax) { ++ total = env->procmax; ++ } ++ if (total > env->procs) { ++ for (i = 0; i < env->procmax; i++) { ++ if (env->proc[i].env == NULL) { ++ env->proc[i].env = env; ++ ret = gf_thread_create(&env->proc[i].processor, NULL, ++ syncenv_processor, &env->proc[i], ++ "sproc%d", i); ++ if ((ret < 0) || (++env->procs >= total)) { ++ break; ++ } ++ } ++ } ++ } + } + + static void +@@ -210,7 +232,6 @@ __wait(struct synctask *task) + gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_REWAITING_TASK, + "re-waiting already waiting " + "task"); +- env->waitcount--; + break; + case SYNCTASK_DONE: + gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK, +@@ -223,12 +244,11 @@ __wait(struct synctask *task) + } + + list_add_tail(&task->all_tasks, &env->waitq); +- env->waitcount++; + task->state = SYNCTASK_WAIT; + } + + void +-synctask_yield(struct synctask *task) ++synctask_yield(struct synctask *task, struct timespec *delta) + { + xlator_t *oldTHIS = THIS; + +@@ -237,6 +257,8 @@ synctask_yield(struct synctask *task) + task->proc->sched.uc_flags &= ~_UC_TLSBASE; + #endif + ++ task->delta = delta; ++ + if (task->state != SYNCTASK_DONE) { + task->state = SYNCTASK_SUSPEND; + } +@@ -249,6 +271,35 @@ synctask_yield(struct synctask *task) + } + + void ++synctask_sleep(int32_t secs) ++{ ++ struct timespec delta; ++ struct synctask *task; ++ ++ task = synctask_get(); ++ ++ if (task == NULL) { ++ sleep(secs); ++ } else { ++ delta.tv_sec = secs; ++ delta.tv_nsec = 0; ++ ++ synctask_yield(task, &delta); ++ } ++} ++ ++static void ++__synctask_wake(struct synctask *task) ++{ ++ task->woken = 1; ++ ++ if (task->slept) ++ __run(task); ++ ++ pthread_cond_broadcast(&task->env->cond); ++} ++ ++void + synctask_wake(struct synctask *task) + { + struct syncenv *env = NULL; +@@ -257,13 +308,18 @@ synctask_wake(struct synctask *task) + + pthread_mutex_lock(&env->mutex); + { +- task->woken = 1; ++ if (task->timer != NULL) { ++ if (gf_timer_call_cancel(task->xl->ctx, task->timer) != 0) { ++ goto unlock; ++ } + +- if (task->slept) +- __run(task); ++ task->timer = NULL; ++ task->synccond = NULL; ++ } + +- pthread_cond_broadcast(&env->cond); ++ __synctask_wake(task); + } ++unlock: + pthread_mutex_unlock(&env->mutex); + } + +@@ -282,7 +338,7 @@ synctask_wrap(void) + + task->state = SYNCTASK_DONE; + +- synctask_yield(task); ++ synctask_yield(task, NULL); + } + + void +@@ -422,11 +478,6 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn, + } + + synctask_wake(newtask); +- /* +- * Make sure someone's there to execute anything we just put on the +- * run queue. +- */ +- syncenv_scale(env); + + return newtask; + err: +@@ -520,8 +571,12 @@ syncenv_task(struct syncproc *proc) + goto unlock; + } + ++ env->procs_idle++; ++ + sleep_till.tv_sec = time(NULL) + SYNCPROC_IDLE_TIME; + ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till); ++ ++ env->procs_idle--; + } + + task = list_entry(env->runq.next, struct synctask, all_tasks); +@@ -540,6 +595,34 @@ unlock: + return task; + } + ++static void ++synctask_timer(void *data) ++{ ++ struct synctask *task = data; ++ struct synccond *cond; ++ ++ cond = task->synccond; ++ if (cond != NULL) { ++ pthread_mutex_lock(&cond->pmutex); ++ ++ list_del_init(&task->waitq); ++ task->synccond = NULL; ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ task->ret = -ETIMEDOUT; ++ } ++ ++ pthread_mutex_lock(&task->env->mutex); ++ ++ gf_timer_call_cancel(task->xl->ctx, task->timer); ++ task->timer = NULL; ++ ++ __synctask_wake(task); ++ ++ pthread_mutex_unlock(&task->env->mutex); ++} ++ + void + synctask_switchto(struct synctask *task) + { +@@ -572,7 +655,14 @@ synctask_switchto(struct synctask *task) + } else { + task->slept = 1; + __wait(task); ++ ++ if (task->delta != NULL) { ++ task->timer = gf_timer_call_after(task->xl->ctx, *task->delta, ++ synctask_timer, task); ++ } + } ++ ++ task->delta = NULL; + } + pthread_mutex_unlock(&env->mutex); + } +@@ -580,65 +670,18 @@ synctask_switchto(struct synctask *task) + void * + syncenv_processor(void *thdata) + { +- struct syncenv *env = NULL; + struct syncproc *proc = NULL; + struct synctask *task = NULL; + + proc = thdata; +- env = proc->env; +- +- for (;;) { +- task = syncenv_task(proc); +- if (!task) +- break; + ++ while ((task = syncenv_task(proc)) != NULL) { + synctask_switchto(task); +- +- syncenv_scale(env); + } + + return NULL; + } + +-void +-syncenv_scale(struct syncenv *env) +-{ +- int diff = 0; +- int scale = 0; +- int i = 0; +- int ret = 0; +- +- pthread_mutex_lock(&env->mutex); +- { +- if (env->procs > env->runcount) +- goto unlock; +- +- scale = env->runcount; +- if (scale > env->procmax) +- scale = env->procmax; +- if (scale > env->procs) +- diff = scale - env->procs; +- while (diff) { +- diff--; +- for (; (i < env->procmax); i++) { +- if (env->proc[i].processor == 0) +- break; +- } +- +- env->proc[i].env = env; +- ret = gf_thread_create(&env->proc[i].processor, NULL, +- syncenv_processor, &env->proc[i], +- "sproc%03hx", env->procs & 0x3ff); +- if (ret) +- break; +- env->procs++; +- i++; +- } +- } +-unlock: +- pthread_mutex_unlock(&env->mutex); +-} +- + /* The syncenv threads are cleaned up in this routine. + */ + void +@@ -715,12 +758,13 @@ syncenv_new(size_t stacksize, int procmin, int procmax) + newenv->stacksize = stacksize; + newenv->procmin = procmin; + newenv->procmax = procmax; ++ newenv->procs_idle = 0; + + for (i = 0; i < newenv->procmin; i++) { + newenv->proc[i].env = newenv; + ret = gf_thread_create(&newenv->proc[i].processor, NULL, + syncenv_processor, &newenv->proc[i], "sproc%d", +- newenv->procs); ++ i); + if (ret) + break; + newenv->procs++; +@@ -810,7 +854,7 @@ __synclock_lock(struct synclock *lock) + task->woken = 0; + list_add_tail(&task->waitq, &lock->waitq); + pthread_mutex_unlock(&lock->guard); +- synctask_yield(task); ++ synctask_yield(task, NULL); + /* task is removed from waitq in unlock, + * under lock->guard.*/ + pthread_mutex_lock(&lock->guard); +@@ -963,6 +1007,136 @@ synclock_unlock(synclock_t *lock) + return ret; + } + ++/* Condition variables */ ++ ++int32_t ++synccond_init(synccond_t *cond) ++{ ++ int32_t ret; ++ ++ INIT_LIST_HEAD(&cond->waitq); ++ ++ ret = pthread_mutex_init(&cond->pmutex, NULL); ++ if (ret != 0) { ++ return -ret; ++ } ++ ++ ret = pthread_cond_init(&cond->pcond, NULL); ++ if (ret != 0) { ++ pthread_mutex_destroy(&cond->pmutex); ++ } ++ ++ return -ret; ++} ++ ++void ++synccond_destroy(synccond_t *cond) ++{ ++ pthread_cond_destroy(&cond->pcond); ++ pthread_mutex_destroy(&cond->pmutex); ++} ++ ++int ++synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta) ++{ ++ struct timespec now; ++ struct synctask *task = NULL; ++ int ret; ++ ++ task = synctask_get(); ++ ++ if (task == NULL) { ++ if (delta != NULL) { ++ timespec_now_realtime(&now); ++ timespec_adjust_delta(&now, *delta); ++ } ++ ++ pthread_mutex_lock(&cond->pmutex); ++ ++ if (delta == NULL) { ++ ret = -pthread_cond_wait(&cond->pcond, &cond->pmutex); ++ } else { ++ ret = -pthread_cond_timedwait(&cond->pcond, &cond->pmutex, &now); ++ } ++ } else { ++ pthread_mutex_lock(&cond->pmutex); ++ ++ list_add_tail(&task->waitq, &cond->waitq); ++ task->synccond = cond; ++ ++ ret = synclock_unlock(lock); ++ if (ret == 0) { ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ synctask_yield(task, delta); ++ ++ ret = synclock_lock(lock); ++ if (ret == 0) { ++ ret = task->ret; ++ } ++ task->ret = 0; ++ ++ return ret; ++ } ++ ++ list_del_init(&task->waitq); ++ } ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ return ret; ++} ++ ++int ++synccond_wait(synccond_t *cond, synclock_t *lock) ++{ ++ return synccond_timedwait(cond, lock, NULL); ++} ++ ++void ++synccond_signal(synccond_t *cond) ++{ ++ struct synctask *task; ++ ++ pthread_mutex_lock(&cond->pmutex); ++ ++ if (!list_empty(&cond->waitq)) { ++ task = list_first_entry(&cond->waitq, struct synctask, waitq); ++ list_del_init(&task->waitq); ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ synctask_wake(task); ++ } else { ++ pthread_cond_signal(&cond->pcond); ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ } ++} ++ ++void ++synccond_broadcast(synccond_t *cond) ++{ ++ struct list_head list; ++ struct synctask *task; ++ ++ INIT_LIST_HEAD(&list); ++ ++ pthread_mutex_lock(&cond->pmutex); ++ ++ list_splice_init(&cond->waitq, &list); ++ pthread_cond_broadcast(&cond->pcond); ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ while (!list_empty(&list)) { ++ task = list_first_entry(&list, struct synctask, waitq); ++ list_del_init(&task->waitq); ++ ++ synctask_wake(task); ++ } ++} ++ + /* Barriers */ + + int +@@ -1032,7 +1206,7 @@ __syncbarrier_wait(struct syncbarrier *barrier, int waitfor) + /* called within a synctask */ + list_add_tail(&task->waitq, &barrier->waitq); + pthread_mutex_unlock(&barrier->guard); +- synctask_yield(task); ++ synctask_yield(task, NULL); + pthread_mutex_lock(&barrier->guard); + } else { + /* called by a non-synctask */ +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index c692119..957deaa 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -5224,7 +5224,7 @@ gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag) + defrag->tier_conf.pause_timer = gf_timer_call_after( + this->ctx, delta, gf_defrag_pause_tier_timeout, this); + +- synctask_yield(defrag->tier_conf.pause_synctask); ++ synctask_yield(defrag->tier_conf.pause_synctask, NULL); + + if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 0d29de2..6475611 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -6076,13 +6076,8 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr, + static void + glusterd_wait_for_blockers(glusterd_conf_t *priv) + { +- uint64_t blockers = GF_ATOMIC_GET(priv->blockers); +- +- while (blockers) { +- synclock_unlock(&priv->big_lock); +- sleep(1); +- blockers = GF_ATOMIC_GET(priv->blockers); +- synclock_lock(&priv->big_lock); ++ while (GF_ATOMIC_GET(priv->blockers)) { ++ synccond_wait(&priv->cond_blockers, &priv->big_lock); + } + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +index 36018a0..f55a5fd 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +@@ -112,7 +112,7 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags) + goto out; + + synclock_unlock(&conf->big_lock); +- sleep(1); ++ synctask_sleep(1); + synclock_lock(&conf->big_lock); + if (gf_is_service_running(proc->pidfile, &pid)) { + ret = kill(pid, SIGKILL); +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index d225854..386eed2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -1961,9 +1961,7 @@ glusterd_update_snaps_synctask(void *opaque) + synclock_lock(&conf->big_lock); + + while (conf->restart_bricks) { +- synclock_unlock(&conf->big_lock); +- sleep(2); +- synclock_lock(&conf->big_lock); ++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + +@@ -2070,6 +2068,7 @@ out: + if (dict) + dict_unref(dict); + conf->restart_bricks = _gf_false; ++ synccond_broadcast(&conf->cond_restart_bricks); + + return ret; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h +index ce4a940..a265f21 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.h ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h +@@ -32,7 +32,7 @@ + ret = gd_syncop_submit_request(rpc, req, stb, cookie, prog, procnum, \ + cbk, (xdrproc_t)xdrproc); \ + if (!ret) \ +- synctask_yield(stb->task); \ ++ synctask_yield(stb->task, NULL); \ + else \ + gf_asprintf(&stb->errstr, \ + "%s failed. Check log file" \ +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 812c698..ce9931c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -5068,22 +5068,22 @@ glusterd_import_friend_volumes_synctask(void *opaque) + * restarted (refer glusterd_restart_bricks ()) + */ + while (conf->restart_bricks) { +- synclock_unlock(&conf->big_lock); +- sleep(2); +- synclock_lock(&conf->big_lock); ++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + + while (i <= count) { + ret = glusterd_import_friend_volume(peer_data, i); + if (ret) { +- conf->restart_bricks = _gf_false; +- goto out; ++ break; + } + i++; + } +- glusterd_svcs_manager(NULL); ++ if (i > count) { ++ glusterd_svcs_manager(NULL); ++ } + conf->restart_bricks = _gf_false; ++ synccond_broadcast(&conf->cond_restart_bricks); + out: + if (peer_data) + dict_unref(peer_data); +@@ -5769,7 +5769,9 @@ my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame) + call_frame_t *frame = v_frame; + glusterd_conf_t *conf = frame->this->private; + +- GF_ATOMIC_DEC(conf->blockers); ++ if (GF_ATOMIC_DEC(conf->blockers) == 0) { ++ synccond_broadcast(&conf->cond_blockers); ++ } + + STACK_DESTROY(frame->root); + return 0; +@@ -5865,7 +5867,9 @@ attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count, + } + } + out: +- GF_ATOMIC_DEC(conf->blockers); ++ if (GF_ATOMIC_DEC(conf->blockers) == 0) { ++ synccond_broadcast(&conf->cond_blockers); ++ } + STACK_DESTROY(frame->root); + return 0; + } +@@ -6053,7 +6057,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, + * TBD: see if there's a better way + */ + synclock_unlock(&conf->big_lock); +- sleep(1); ++ synctask_sleep(1); + synclock_lock(&conf->big_lock); + } + +@@ -6193,7 +6197,7 @@ find_compat_brick_in_vol(glusterd_conf_t *conf, + "brick %s is still" + " starting, waiting for 2 seconds ", + other_brick->path); +- sleep(2); ++ synctask_sleep(2); + synclock_lock(&conf->big_lock); + retries--; + } +@@ -6680,9 +6684,7 @@ glusterd_restart_bricks(void *opaque) + * glusterd_compare_friend_data ()) + */ + while (conf->restart_bricks) { +- synclock_unlock(&conf->big_lock); +- sleep(2); +- synclock_lock(&conf->big_lock); ++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + +@@ -6798,6 +6800,7 @@ out: + GF_ATOMIC_DEC(conf->blockers); + conf->restart_done = _gf_true; + conf->restart_bricks = _gf_false; ++ synccond_broadcast(&conf->cond_restart_bricks); + + return_block: + return ret; +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index d360312..a01034a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1845,6 +1845,8 @@ init(xlator_t *this) + (void)strncpy(conf->rundir, rundir, sizeof(conf->rundir)); + + synclock_init(&conf->big_lock, SYNC_LOCK_RECURSIVE); ++ synccond_init(&conf->cond_restart_bricks); ++ synccond_init(&conf->cond_blockers); + pthread_mutex_init(&conf->xprt_lock, NULL); + INIT_LIST_HEAD(&conf->xprt_list); + pthread_mutex_init(&conf->import_volumes, NULL); +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 2be005c..1c6c3b1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -209,6 +209,8 @@ typedef struct { + dict_t *opts; + synclock_t big_lock; + gf_boolean_t restart_done; ++ synccond_t cond_restart_bricks; ++ synccond_t cond_blockers; + rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ + uint32_t base_port; + uint32_t max_port; +-- +1.8.3.1 + diff --git a/0378-Revert-open-behind-fix-missing-fd-reference.patch b/0378-Revert-open-behind-fix-missing-fd-reference.patch new file mode 100644 index 0000000..e228be2 --- /dev/null +++ b/0378-Revert-open-behind-fix-missing-fd-reference.patch @@ -0,0 +1,120 @@ +From d79660ccc65f163e0d9cf91cc13a199bec04d5f1 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez Juan +Date: Wed, 20 May 2020 12:55:43 +0000 +Subject: [PATCH 378/379] Revert "open-behind: fix missing fd reference" + +This reverts commit 30cbdf8c06145a0c290da42ecc0a7eae928200b7. + +The patch is not complete because there have been some crash reports +upstream recently after the patch was released. A new patch that should +cover all corner cases is under review (), but it's a big change and it +could be risky to backport it without enough testing. + +Since there exists a workaround to avoid the problem (disable +open-behind), for now we revert the patch. + +Change-Id: I9cfc55623c33758cf5530b18f03c0d795b0f650b +BUG: 1830713 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/200952 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/performance/open-behind/src/open-behind.c | 27 +++++++++-------------- + 1 file changed, 11 insertions(+), 16 deletions(-) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 14ebc12..268c717 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -206,13 +206,8 @@ ob_fd_free(ob_fd_t *ob_fd) + if (ob_fd->xdata) + dict_unref(ob_fd->xdata); + +- if (ob_fd->open_frame) { +- /* If we sill have a frame it means that background open has never +- * been triggered. We need to release the pending reference. */ +- fd_unref(ob_fd->fd); +- ++ if (ob_fd->open_frame) + STACK_DESTROY(ob_fd->open_frame->root); +- } + + GF_FREE(ob_fd); + } +@@ -302,7 +297,6 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + call_resume(stub); + } + +- /* The background open is completed. We can release the 'fd' reference. */ + fd_unref(fd); + + STACK_DESTROY(frame->root); +@@ -337,9 +331,7 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) + } + + if (frame) { +- /* We don't need to take a reference here. We already have a reference +- * while the open is pending. */ +- frame->local = fd; ++ frame->local = fd_ref(fd); + + STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, +@@ -353,12 +345,15 @@ void + ob_inode_wake(xlator_t *this, struct list_head *ob_fds) + { + ob_fd_t *ob_fd = NULL, *tmp = NULL; ++ fd_t *fd = NULL; + + if (!list_empty(ob_fds)) { + list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) + { + ob_fd_wake(this, ob_fd->fd, ob_fd); ++ fd = ob_fd->fd; + ob_fd_free(ob_fd); ++ fd_unref(fd); + } + } + } +@@ -370,7 +365,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) + if (!src || !dst) + goto out; + +- dst->fd = src->fd; ++ dst->fd = __fd_ref(src->fd); + dst->loc.inode = inode_ref(src->loc.inode); + gf_uuid_copy(dst->loc.gfid, src->loc.gfid); + dst->flags = src->flags; +@@ -514,6 +509,7 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + + ob_fd->ob_inode = ob_inode; + ++ /* don't do fd_ref, it'll cause leaks */ + ob_fd->fd = fd; + + ob_fd->open_frame = copy_frame(frame); +@@ -543,16 +539,15 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + } + UNLOCK(&fd->inode->lock); + +- /* We take a reference while the background open is pending or being +- * processed. If we finally wind the request in the foreground, then +- * ob_fd_free() will take care of this additional reference. */ +- fd_ref(fd); +- + if (!open_in_progress && !unlinked) { ++ fd_ref(fd); ++ + STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); + + if (!conf->lazy_open) + ob_fd_wake(this, fd, NULL); ++ ++ fd_unref(fd); + } else { + ob_fd_free(ob_fd); + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +-- +1.8.3.1 + diff --git a/0379-glusterd-add-missing-synccond_broadcast.patch b/0379-glusterd-add-missing-synccond_broadcast.patch new file mode 100644 index 0000000..cd51c6d --- /dev/null +++ b/0379-glusterd-add-missing-synccond_broadcast.patch @@ -0,0 +1,45 @@ +From e06882a7fea9720a2899f7d52d5d3866ff098866 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Thu, 21 May 2020 08:26:11 +0200 +Subject: [PATCH 379/379] glusterd: add missing synccond_broadcast() + +After the changes in commit 3da22f8cb08b05562a4c6bd2694f2f19199cff7f, +there was a place where synccond_broadcast() was missing. It could +cause a hang if another synctask was waiting on the condition variable. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24476 +> Change-Id: I92bfe4e15c5c3591e4854a64aa9e1566d50dd204 +> Fixes: #1116 +> Signed-off-by: Xavi Hernandez + +Change-Id: I92bfe4e15c5c3591e4854a64aa9e1566d50dd204 +BUG: 1810516 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/201057 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index ce9931c..c92cdf3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -6797,9 +6797,11 @@ glusterd_restart_bricks(void *opaque) + ret = 0; + + out: +- GF_ATOMIC_DEC(conf->blockers); + conf->restart_done = _gf_true; + conf->restart_bricks = _gf_false; ++ if (GF_ATOMIC_DEC(conf->blockers) == 0) { ++ synccond_broadcast(&conf->cond_blockers); ++ } + synccond_broadcast(&conf->cond_restart_bricks); + + return_block: +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index 8fa530c..48d6be0 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -237,7 +237,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 34%{?dist} +Release: 35%{?dist} ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ @@ -690,6 +690,10 @@ Patch0372: 0372-posix-fix-seek-functionality.patch Patch0373: 0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch Patch0374: 0374-open-behind-fix-missing-fd-reference.patch Patch0375: 0375-features-shard-Send-correct-size-when-reads-are-sent.patch +Patch0376: 0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch +Patch0377: 0377-syncop-improve-scaling-and-implement-more-tools.patch +Patch0378: 0378-Revert-open-behind-fix-missing-fd-reference.patch +Patch0379: 0379-glusterd-add-missing-synccond_broadcast.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -2430,9 +2434,21 @@ fi %endif %changelog -* Fri May 15 2020 Rinku Kothiya - 6.0-34 +* Fri May 22 2020 Rinku Kothiya - 6.0-35 +- fixes bugs bz#1810516 bz#1830713 bz#1836233 + +* Sun May 17 2020 Rinku Kothiya - 6.0-34 - fixes bugs bz#1802013 bz#1823706 bz#1825177 bz#1830713 bz#1831403 bz#1833017 +* Wed Apr 29 2020 Rinku Kothiya - 6.0-33 +- fixes bugs bz#1812789 bz#1813917 bz#1823703 bz#1823706 bz#1825195 + +* Sat Apr 04 2020 Rinku Kothiya - 6.0-32 +- fixes bugs bz#1781543 bz#1812789 bz#1812824 bz#1817369 bz#1819059 + +* Tue Mar 17 2020 Rinku Kothiya - 6.0-31 +- fixes bugs bz#1802727 + * Thu Feb 20 2020 Rinku Kothiya - 6.0-30.1 - fixes bugs bz#1800703