From dfef94e4a2313b399d93f4dbc368651478eb4f2c Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Mon, 14 Jan 2019 08:07:43 +0530 Subject: [PATCH] autobuild v3.12.2-37 Resolves: bz#1662059 bz#1662828 bz#1664529 Signed-off-by: Milind Changire --- ...th_check_thread_proc-crash-due-to-pr.patch | 92 +++++ ...ss-is-crashed-at-the-time-of-spawn-t.patch | 202 ++++++++++ ...ck-for-stbuf-in-dht_rmdir_lookup_cbk.patch | 39 ++ ...ix-launch-of-multiple-synctasks-for-.patch | 378 ++++++++++++++++++ ...ssign-fop-id-during-background-delet.patch | 49 +++ glusterfs.spec | 12 +- 6 files changed, 770 insertions(+), 2 deletions(-) create mode 100644 0502-posix-posix_health_check_thread_proc-crash-due-to-pr.patch create mode 100644 0503-core-brick-process-is-crashed-at-the-time-of-spawn-t.patch create mode 100644 0504-dht-Add-NULL-check-for-stbuf-in-dht_rmdir_lookup_cbk.patch create mode 100644 0505-features-shard-Fix-launch-of-multiple-synctasks-for-.patch create mode 100644 0506-features-shard-Assign-fop-id-during-background-delet.patch diff --git a/0502-posix-posix_health_check_thread_proc-crash-due-to-pr.patch b/0502-posix-posix_health_check_thread_proc-crash-due-to-pr.patch new file mode 100644 index 0000000..8c3df64 --- /dev/null +++ b/0502-posix-posix_health_check_thread_proc-crash-due-to-pr.patch @@ -0,0 +1,92 @@ +From 11f9ffcd733c95e8a728c150ff2ffc3dbeaddac1 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Fri, 11 Jan 2019 11:57:20 +0530 +Subject: [PATCH 502/506] posix: posix_health_check_thread_proc crash due to + priv is NULL + +Problem: posix_fini sends a cancellation request to health_check + thread and cleanup priv without ensuring health_check thread + is running + +Solution: Make health_check && disk_space thread joinable and call + gf_thread_cleanup_xint to wait unless thread is not finished + +> Change-Id: I4d37b08138766881dab0922a47ed68a2c3411f13 +> fixes: bz#1636570 +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21717/) +> (Cherry pick from commit e82bcc33ed2d5cd54d3f918397f31818089299ad) + +Change-Id: I9edadd5bc445549b5f45bab98e4794d62a185f1c +BUG: 1662828 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/160404 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix-helpers.c | 8 +++----- + xlators/storage/posix/src/posix.c | 15 +++++++++++---- + 2 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index cddf02e63..ed5d3e55e 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1854,8 +1854,6 @@ posix_spawn_health_check_thread (xlator_t *xl) + goto unlock; + } + +- /* run the thread detached, resources will be freed on exit */ +- pthread_detach (priv->health_check); + priv->health_check_active = _gf_true; + } + unlock: +@@ -1958,9 +1956,9 @@ posix_spawn_disk_space_check_thread (xlator_t *xl) + priv->disk_space_check_active = _gf_false; + } + +- ret = gf_thread_create_detached (&priv->disk_space_check, +- posix_disk_space_check_thread_proc, +- xl, "posix_reserve"); ++ ret = gf_thread_create (&priv->disk_space_check, NULL, ++ posix_disk_space_check_thread_proc, ++ xl, "posix_reserve"); + if (ret < 0) { + priv->disk_space_check_active = _gf_false; + gf_msg (xl->name, GF_LOG_ERROR, errno, +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 13b4aa6b6..591119ea9 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -7999,18 +7999,25 @@ void + fini (xlator_t *this) + { + struct posix_private *priv = this->private; ++ gf_boolean_t health_check = _gf_false; ++ + if (!priv) + return; + LOCK (&priv->lock); +- if (priv->health_check_active) { ++ { ++ health_check = priv->health_check_active; + priv->health_check_active = _gf_false; +- pthread_cancel (priv->health_check); +- priv->health_check = 0; + } + UNLOCK (&priv->lock); ++ ++ if (health_check) { ++ (void)gf_thread_cleanup_xint(priv->health_check); ++ priv->health_check = 0; ++ } ++ + if (priv->disk_space_check) { + priv->disk_space_check_active = _gf_false; +- pthread_cancel (priv->disk_space_check); ++ (void)gf_thread_cleanup_xint(priv->disk_space_check); + priv->disk_space_check = 0; + } + if (priv->janitor) { +-- +2.20.1 + diff --git a/0503-core-brick-process-is-crashed-at-the-time-of-spawn-t.patch b/0503-core-brick-process-is-crashed-at-the-time-of-spawn-t.patch new file mode 100644 index 0000000..a9df84b --- /dev/null +++ b/0503-core-brick-process-is-crashed-at-the-time-of-spawn-t.patch @@ -0,0 +1,202 @@ +From 5989899b7aa5cc86e589c5ff20560476b959d98b Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Fri, 11 Jan 2019 12:42:20 +0530 +Subject: [PATCH 503/506] core: brick process is crashed at the time of spawn + thread + +Problem: brick is getting crashed at the time of calling + pthread_detach after just call gf_thread_create.If + sufficient resources are not available on the system + pthread_create returns EAGAIN (non-negative) but the + caller function expects negative error code in case of failure + +Solution: Change the condition in caller function to avoid the crash + +> Change-Id: Ifeaa49f809957eb6c33aa9792f5af1b55566756d +> fixes: bz#1662906 +> (Cherry pick from commit 1e28c54c5ec8d84ec8a22493161314010992918e) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21976/) + +Change-Id: I9e5c3de4b98236de22f834d66268ab21001817a1 +BUG: 1662828 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/160409 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix-helpers.c | 15 ++++++----- + xlators/storage/posix/src/posix.c | 31 +++++++++++++++++------ + xlators/storage/posix/src/posix.h | 6 ++--- + 3 files changed, 35 insertions(+), 17 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index ed5d3e55e..1137f1c41 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1322,7 +1322,7 @@ posix_janitor_thread_proc (void *data) + } + + +-void ++int + posix_spawn_janitor_thread (xlator_t *this) + { + struct posix_private *priv = NULL; +@@ -1337,7 +1337,7 @@ posix_spawn_janitor_thread (xlator_t *this) + posix_janitor_thread_proc, + this, "posixjan"); + +- if (ret < 0) { ++ if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_THREAD_FAILED, "spawning janitor " + "thread failed"); +@@ -1349,6 +1349,7 @@ posix_spawn_janitor_thread (xlator_t *this) + } + unlock: + UNLOCK (&priv->lock); ++ return ret; + } + + static int +@@ -1822,7 +1823,7 @@ abort: + return NULL; + } + +-void ++int + posix_spawn_health_check_thread (xlator_t *xl) + { + struct posix_private *priv = NULL; +@@ -1845,7 +1846,7 @@ posix_spawn_health_check_thread (xlator_t *xl) + ret = gf_thread_create (&priv->health_check, NULL, + posix_health_check_thread_proc, + xl, "posixhc"); +- if (ret < 0) { ++ if (ret) { + priv->health_check_interval = 0; + priv->health_check_active = _gf_false; + gf_msg (xl->name, GF_LOG_ERROR, errno, +@@ -1858,6 +1859,7 @@ posix_spawn_health_check_thread (xlator_t *xl) + } + unlock: + UNLOCK (&priv->lock); ++ return ret; + } + + void +@@ -1940,7 +1942,7 @@ out: + return NULL; + } + +-void ++int + posix_spawn_disk_space_check_thread (xlator_t *xl) + { + struct posix_private *priv = NULL; +@@ -1959,7 +1961,7 @@ posix_spawn_disk_space_check_thread (xlator_t *xl) + ret = gf_thread_create (&priv->disk_space_check, NULL, + posix_disk_space_check_thread_proc, + xl, "posix_reserve"); +- if (ret < 0) { ++ if (ret) { + priv->disk_space_check_active = _gf_false; + gf_msg (xl->name, GF_LOG_ERROR, errno, + P_MSG_DISK_SPACE_CHECK_FAILED, +@@ -1971,6 +1973,7 @@ posix_spawn_disk_space_check_thread (xlator_t *xl) + } + unlock: + UNLOCK (&priv->lock); ++ return ret; + } + + int +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 591119ea9..8a6282d29 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -7317,12 +7317,19 @@ reconfigure (xlator_t *this, dict_t *options) + + GF_OPTION_RECONF ("reserve", priv->disk_reserve, + options, uint32, out); +- if (priv->disk_reserve) +- posix_spawn_disk_space_check_thread (this); ++ if (priv->disk_reserve) { ++ ret = posix_spawn_disk_space_check_thread (this); ++ if (ret) ++ goto out; ++ } + + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); +- posix_spawn_health_check_thread (this); ++ if (priv->health_check_interval) { ++ ret = posix_spawn_health_check_thread (this); ++ if (ret) ++ goto out; ++ } + + GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count, + options, int32, out); +@@ -7925,20 +7932,28 @@ init (xlator_t *this) + _private->disk_space_full = 0; + GF_OPTION_INIT ("reserve", + _private->disk_reserve, uint32, out); +- if (_private->disk_reserve) +- posix_spawn_disk_space_check_thread (this); ++ if (_private->disk_reserve) { ++ ret = posix_spawn_disk_space_check_thread (this); ++ if (ret) ++ goto out; ++ } + + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); +- if (_private->health_check_interval) +- posix_spawn_health_check_thread (this); ++ if (_private->health_check_interval) { ++ ret = posix_spawn_health_check_thread (this); ++ if (ret) ++ goto out; ++ } + + pthread_mutex_init (&_private->janitor_lock, NULL); + pthread_cond_init (&_private->janitor_cond, NULL); + INIT_LIST_HEAD (&_private->janitor_fds); + +- posix_spawn_janitor_thread (this); ++ ret = posix_spawn_janitor_thread (this); ++ if (ret) ++ goto out; + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index bda41726c..cb8dc8acc 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -305,7 +305,7 @@ int posix_handle_pair (xlator_t *this, const char *real_path, char *key, + data_t *value, int flags, struct iatt *stbuf); + int posix_fhandle_pair (xlator_t *this, int fd, char *key, data_t *value, + int flags, struct iatt *stbuf); +-void posix_spawn_janitor_thread (xlator_t *this); ++int posix_spawn_janitor_thread (xlator_t *this); + int posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req); + int posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req); + int posix_entry_create_xattr_set (xlator_t *this, const char *path, +@@ -320,9 +320,9 @@ gf_boolean_t posix_special_xattr (char **pattern, char *key); + void + __posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags, + off_t offset, size_t size); +-void posix_spawn_health_check_thread (xlator_t *this); ++int posix_spawn_health_check_thread (xlator_t *this); + +-void posix_spawn_disk_space_check_thread (xlator_t *this); ++int posix_spawn_disk_space_check_thread (xlator_t *this); + + void *posix_fsyncer (void *); + int +-- +2.20.1 + diff --git a/0504-dht-Add-NULL-check-for-stbuf-in-dht_rmdir_lookup_cbk.patch b/0504-dht-Add-NULL-check-for-stbuf-in-dht_rmdir_lookup_cbk.patch new file mode 100644 index 0000000..03cd944 --- /dev/null +++ b/0504-dht-Add-NULL-check-for-stbuf-in-dht_rmdir_lookup_cbk.patch @@ -0,0 +1,39 @@ +From b6feae228a82f34b88b67b8a8f8fa55189dcdb72 Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Fri, 11 Jan 2019 13:00:59 +0530 +Subject: [PATCH 504/506] dht: Add NULL check for stbuf in dht_rmdir_lookup_cbk + +> Change-Id: I2ced288113a369cc6497a77ac1871007df434da4 +> fixes: bz#1664647 +> Signed-off-by: Susant Palai + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22004/ + +Change-Id: Ib53b06f6bbf20e0fbf1663cddf7fd76b37f23186 +BUG: 1664529 +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/160438 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index ff0099c1d..767c6a868 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -9842,8 +9842,8 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + gf_msg (this->name, GF_LOG_WARNING, op_errno, + DHT_MSG_FILE_LOOKUP_FAILED, +- "lookup failed for %s on %s (type=0%o)", +- local->loc.path, src->name, stbuf->ia_type); ++ "lookup failed for %s on %s", local->loc.path, ++ src->name); + goto err; + } + +-- +2.20.1 + diff --git a/0505-features-shard-Fix-launch-of-multiple-synctasks-for-.patch b/0505-features-shard-Fix-launch-of-multiple-synctasks-for-.patch new file mode 100644 index 0000000..b471bac --- /dev/null +++ b/0505-features-shard-Fix-launch-of-multiple-synctasks-for-.patch @@ -0,0 +1,378 @@ +From a7ade5267ebaf4bf318ee2aebe48000cee583e3b Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Fri, 28 Dec 2018 18:53:15 +0530 +Subject: [PATCH 505/506] features/shard: Fix launch of multiple synctasks for + background deletion + +> Upstream: https://review.gluster.org/21957 +> BUG: 1662368 +> Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e + +PROBLEM: + +When multiple sharded files are deleted in quick succession, multiple +issues were observed: +1. misleading logs corresponding to a sharded file where while one log + message said the shards corresponding to the file were deleted + successfully, this was followed by multiple logs suggesting the very + same operation failed. This was because of multiple synctasks + attempting to clean up shards of the same file and only one of them + succeeding (the one that gets ENTRYLK successfully), and the rest of + them logging failure. + +2. multiple synctasks to do background deletion would be launched, one + for each deleted file but all of them could readdir entries from + .remove_me at the same time could potentially contend for ENTRYLK on + .shard for each of the entry names. This is undesirable and wasteful. + +FIX: +Background deletion will now follow a state machine. In the event that +there are multiple attempts to launch synctask for background deletion, +one for each file deleted, only the first task is launched. And if while +this task is doing the cleanup, more attempts are made to delete other +files, the state of the synctask is adjusted so that it restarts the +crawl even after reaching end-of-directory to pick up any files it may +have missed in the previous iteration. + +This patch also fixes uninitialized lk-owner during syncop_entrylk() +which was leading to multiple background deletion synctasks entering +the critical section at the same time and leading to illegal memory access +of base inode in the second syntcask after it was destroyed post shard deletion +by the first synctask. + +Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e +BUG: 1662059 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/160437 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +--- + xlators/features/shard/src/shard.c | 199 +++++++++++++++++++---------- + xlators/features/shard/src/shard.h | 12 +- + 2 files changed, 136 insertions(+), 75 deletions(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 5b72399f5..8aed1a386 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -1465,16 +1465,45 @@ int + shard_start_background_deletion (xlator_t *this) + { + int ret = 0; ++ gf_boolean_t i_cleanup = _gf_true; ++ shard_priv_t *priv = NULL; + call_frame_t *cleanup_frame = NULL; + ++ priv = this->private; ++ ++ LOCK(&priv->lock); ++ { ++ switch (priv->bg_del_state) { ++ case SHARD_BG_DELETION_NONE: ++ i_cleanup = _gf_true; ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ break; ++ case SHARD_BG_DELETION_LAUNCHING: ++ i_cleanup = _gf_false; ++ break; ++ case SHARD_BG_DELETION_IN_PROGRESS: ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ i_cleanup = _gf_false; ++ break; ++ default: ++ break; ++ } ++ } ++ UNLOCK(&priv->lock); ++ if (!i_cleanup) ++ return 0; ++ + cleanup_frame = create_frame (this, this->ctx->pool); + if (!cleanup_frame) { + gf_msg (this->name, GF_LOG_WARNING, ENOMEM, + SHARD_MSG_MEMALLOC_FAILED, "Failed to create " + "new frame to delete shards"); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto err; + } + ++ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); ++ + ret = synctask_new (this->ctx->env, shard_delete_shards, + shard_delete_shards_cbk, cleanup_frame, + cleanup_frame); +@@ -1484,7 +1513,16 @@ shard_start_background_deletion (xlator_t *this) + "failed to create task to do background " + "cleanup of shards"); + STACK_DESTROY (cleanup_frame->root); ++ goto err; + } ++ return 0; ++ ++err: ++ LOCK(&priv->lock); ++ { ++ priv->bg_del_state = SHARD_BG_DELETION_NONE; ++ } ++ UNLOCK(&priv->lock); + return ret; + } + +@@ -1493,7 +1531,7 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) + { +- int ret = 0; ++ int ret = -1; + shard_priv_t *priv = NULL; + gf_boolean_t i_start_cleanup = _gf_false; + +@@ -1526,22 +1564,23 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + LOCK (&priv->lock); + { +- if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) { +- priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS; ++ if (priv->first_lookup_done == _gf_false) { ++ priv->first_lookup_done = _gf_true; + i_start_cleanup = _gf_true; + } + } + UNLOCK (&priv->lock); + +- if (i_start_cleanup) { +- ret = shard_start_background_deletion (this); +- if (ret) { +- LOCK (&priv->lock); +- { +- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; +- } +- UNLOCK (&priv->lock); ++ if (!i_start_cleanup) ++ goto unwind; ++ ++ ret = shard_start_background_deletion(this); ++ if (ret < 0) { ++ LOCK(&priv->lock); ++ { ++ priv->first_lookup_done = _gf_false; + } ++ UNLOCK(&priv->lock); + } + unwind: + SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, +@@ -2940,9 +2979,10 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) + if (ctx->fsync_needed) { + unref_base_inode++; + list_del_init (&ctx->to_fsync_list); +- if (base_inode) ++ if (base_inode) { + __shard_inode_ctx_get (base_inode, this, &base_ictx); +- base_ictx->fsync_count--; ++ base_ictx->fsync_count--; ++ } + } + } + UNLOCK(&inode->lock); +@@ -3334,10 +3374,15 @@ shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this, + loc.inode = inode_ref (priv->dot_shard_rm_inode); + + ret = syncop_entrylk (FIRST_CHILD(this), this->name, &loc, +- entry->d_name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, +- NULL); +- if (ret) ++ entry->d_name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, ++ NULL, NULL); ++ if (ret < 0) { ++ if (ret == -EAGAIN) { ++ ret = 0; ++ } + goto out; ++ } ++ + { + ret = __shard_delete_shards_of_entry (cleanup_frame, this, + entry, inode); +@@ -3352,20 +3397,6 @@ out: + int + shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data) + { +- xlator_t *this = NULL; +- shard_priv_t *priv = NULL; +- +- this = frame->this; +- priv = this->private; +- +- if (ret < 0) { +- gf_msg (this->name, GF_LOG_WARNING, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Background deletion of shards failed"); +- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; +- } else { +- priv->first_lookup = SHARD_FIRST_LOOKUP_DONE; +- } + SHARD_STACK_DESTROY (frame); + return 0; + } +@@ -3482,6 +3513,7 @@ shard_delete_shards (void *opaque) + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + call_frame_t *cleanup_frame = NULL; ++ gf_boolean_t done = _gf_false; + + this = THIS; + priv = this->private; +@@ -3534,52 +3566,81 @@ shard_delete_shards (void *opaque) + goto err; + } + +- while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, 131072, +- offset, &entries, local->xattr_req, +- NULL))) { +- if (ret > 0) +- ret = 0; +- list_for_each_entry (entry, &entries.list, list) { +- offset = entry->d_off; +- +- if (!strcmp (entry->d_name, ".") || +- !strcmp (entry->d_name, "..")) +- continue; ++ for (;;) { ++ offset = 0; ++ LOCK(&priv->lock); ++ { ++ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { ++ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; ++ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { ++ priv->bg_del_state = SHARD_BG_DELETION_NONE; ++ done = _gf_true; ++ } ++ } ++ UNLOCK(&priv->lock); ++ if (done) ++ break; ++ while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, ++ 131072, offset, &entries, ++ local->xattr_req, NULL))) { ++ if (ret > 0) ++ ret = 0; ++ list_for_each_entry (entry, &entries.list, list) { ++ offset = entry->d_off; ++ ++ if (!strcmp (entry->d_name, ".") || ++ !strcmp (entry->d_name, "..")) ++ continue; + +- if (!entry->inode) { +- ret = shard_lookup_marker_entry (this, local, +- entry); +- if (ret < 0) ++ if (!entry->inode) { ++ ret = shard_lookup_marker_entry (this, ++ local, ++ entry); ++ if (ret < 0) ++ continue; ++ } ++ link_inode = inode_link (entry->inode, ++ local->fd->inode, ++ entry->d_name, ++ &entry->d_stat); ++ ++ gf_msg_debug (this->name, 0, "Initiating " ++ "deletion of shards of gfid %s", ++ entry->d_name); ++ ret = shard_delete_shards_of_entry (cleanup_frame, ++ this, ++ entry, ++ link_inode); ++ inode_unlink (link_inode, local->fd->inode, ++ entry->d_name); ++ inode_unref (link_inode); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, -ret, ++ SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to clean up shards of " ++ "gfid %s", entry->d_name); + continue; +- } +- link_inode = inode_link (entry->inode, local->fd->inode, +- entry->d_name, &entry->d_stat); +- +- gf_msg_debug (this->name, 0, "Initiating deletion of " +- "shards of gfid %s", entry->d_name); +- ret = shard_delete_shards_of_entry (cleanup_frame, this, +- entry, link_inode); +- inode_unlink (link_inode, local->fd->inode, +- entry->d_name); +- inode_unref (link_inode); +- if (ret) { +- gf_msg (this->name, GF_LOG_ERROR, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Failed to clean up shards of gfid %s", ++ } ++ gf_msg (this->name, GF_LOG_INFO, 0, ++ SHARD_MSG_SHARDS_DELETION_COMPLETED, ++ "Deleted shards of gfid=%s from backend", + entry->d_name); +- continue; + } +- gf_msg (this->name, GF_LOG_INFO, 0, +- SHARD_MSG_SHARDS_DELETION_COMPLETED, "Deleted " +- "shards of gfid=%s from backend", +- entry->d_name); ++ gf_dirent_free (&entries); ++ if (ret) ++ break; + } +- gf_dirent_free (&entries); +- if (ret) +- break; + } + ret = 0; ++ loc_wipe(&loc); ++ return ret; ++ + err: ++ LOCK(&priv->lock); ++ { ++ priv->bg_del_state = SHARD_BG_DELETION_NONE; ++ } ++ UNLOCK(&priv->lock); + loc_wipe (&loc); + return ret; + } +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index ac3813c8c..37934f3a2 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -196,11 +196,10 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); + } while (0) + + typedef enum { +- SHARD_FIRST_LOOKUP_PENDING = 0, +- SHARD_FIRST_LOOKUP_IN_PROGRESS, +- SHARD_FIRST_LOOKUP_DONE, +-} shard_first_lookup_state_t; +- ++ SHARD_BG_DELETION_NONE = 0, ++ SHARD_BG_DELETION_LAUNCHING, ++ SHARD_BG_DELETION_IN_PROGRESS, ++} shard_bg_deletion_state_t; + /* rm = "remove me" */ + + typedef struct shard_priv { +@@ -213,7 +212,8 @@ typedef struct shard_priv { + int inode_count; + struct list_head ilist_head; + uint32_t deletion_rate; +- shard_first_lookup_state_t first_lookup; ++ shard_bg_deletion_state_t bg_del_state; ++ gf_boolean_t first_lookup_done; + uint64_t lru_limit; + } shard_priv_t; + +-- +2.20.1 + diff --git a/0506-features-shard-Assign-fop-id-during-background-delet.patch b/0506-features-shard-Assign-fop-id-during-background-delet.patch new file mode 100644 index 0000000..01cda18 --- /dev/null +++ b/0506-features-shard-Assign-fop-id-during-background-delet.patch @@ -0,0 +1,49 @@ +From 6f7a336da731a5113d8fdf9632f37ef181f04f9c Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Fri, 28 Dec 2018 07:27:11 +0530 +Subject: [PATCH 506/506] features/shard: Assign fop id during background + deletion to prevent excessive logging + +> Upstream: https://review.gluster.org/21946 +> BUG: 1662368 +> Change-Id: I0ca8d3b3bfbcd354b4a555eee520eb0479bcda35 + +... of the kind + +"[2018-12-26 05:22:44.195019] E [MSGID: 133010] +[shard.c:2253:shard_common_lookup_shards_cbk] 0-volume1-shard: Lookup +on shard 785 failed. Base file gfid = cd938e64-bf06-476f-a5d4-d580a0d37416 +[No such file or directory]" + +shard_common_lookup_shards_cbk() has a specific check to ignore ENOENT error without +logging them during specific fops. But because background deletion is done in a new +frame (with local->fop being GF_FOP_NULL), the ENOENT check is skipped and the +absence of shards gets logged everytime. + +To fix this, local->fop is initialized to GF_FOP_UNLINK during background deletion. + +Change-Id: I0ca8d3b3bfbcd354b4a555eee520eb0479bcda35 +BUG: 1662059 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/160436 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +--- + xlators/features/shard/src/shard.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 8aed1a386..19dd3e4ba 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -3530,6 +3530,7 @@ shard_delete_shards (void *opaque) + goto err; + } + cleanup_frame->local = local; ++ local->fop = GF_FOP_UNLINK; + + local->xattr_req = dict_new (); + if (!local->xattr_req) { +-- +2.20.1 + diff --git a/glusterfs.spec b/glusterfs.spec index 1aeb724..2c8ec6c 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -192,7 +192,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 3.12.2 -Release: 36%{?dist} +Release: 37%{?dist} %endif License: GPLv2 or LGPLv3+ Group: System Environment/Base @@ -766,6 +766,11 @@ Patch0498: 0498-mem-pool-Code-refactor-in-mem_pool.c.patch Patch0499: 0499-cluster-dht-Fix-incorrect-backport.patch Patch0500: 0500-extras-Add-readdir-ahead-to-samba-group-command.patch Patch0501: 0501-glusterd-aggregate-rsp-from-peers-for-profile-comman.patch +Patch0502: 0502-posix-posix_health_check_thread_proc-crash-due-to-pr.patch +Patch0503: 0503-core-brick-process-is-crashed-at-the-time-of-spawn-t.patch +Patch0504: 0504-dht-Add-NULL-check-for-stbuf-in-dht_rmdir_lookup_cbk.patch +Patch0505: 0505-features-shard-Fix-launch-of-multiple-synctasks-for-.patch +Patch0506: 0506-features-shard-Assign-fop-id-during-background-delet.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -1216,7 +1221,7 @@ while read p do # if the destination file exists, its most probably stale # so we must remove it - rename_to=( $(grep -i 'rename to' $p | cut -f 3 -d ' ') ) + rename_to=( $(egrep '^rename to' $p | cut -f 3 -d ' ') ) if [ ${#rename_to[*]} -gt 0 ]; then for f in ${rename_to[*]} do @@ -2726,6 +2731,9 @@ fi %endif %changelog +* Mon Jan 14 2019 Milind Changire - 3.12.2-37 +- fixes bugs bz#1662059 bz#1662828 bz#1664529 + * Fri Jan 04 2019 Milind Changire - 3.12.2-36 - fixes bugs bz#1290124 bz#1655385 bz#1663232