dfef94e4a2
Resolves: bz#1662059 bz#1662828 bz#1664529 Signed-off-by: Milind Changire <mchangir@redhat.com>
379 lines
16 KiB
Diff
379 lines
16 KiB
Diff
From a7ade5267ebaf4bf318ee2aebe48000cee583e3b Mon Sep 17 00:00:00 2001
|
|
From: Krutika Dhananjay <kdhananj@redhat.com>
|
|
Date: Fri, 28 Dec 2018 18:53:15 +0530
|
|
Subject: [PATCH 505/506] features/shard: Fix launch of multiple synctasks for
|
|
background deletion
|
|
|
|
> Upstream: https://review.gluster.org/21957
|
|
> BUG: 1662368
|
|
> Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e
|
|
|
|
PROBLEM:
|
|
|
|
When multiple sharded files are deleted in quick succession, multiple
|
|
issues were observed:
|
|
1. misleading logs corresponding to a sharded file where while one log
|
|
message said the shards corresponding to the file were deleted
|
|
successfully, this was followed by multiple logs suggesting the very
|
|
same operation failed. This was because of multiple synctasks
|
|
attempting to clean up shards of the same file and only one of them
|
|
succeeding (the one that gets ENTRYLK successfully), and the rest of
|
|
them logging failure.
|
|
|
|
2. multiple synctasks to do background deletion would be launched, one
|
|
for each deleted file but all of them could readdir entries from
|
|
.remove_me at the same time could potentially contend for ENTRYLK on
|
|
.shard for each of the entry names. This is undesirable and wasteful.
|
|
|
|
FIX:
|
|
Background deletion will now follow a state machine. In the event that
|
|
there are multiple attempts to launch synctask for background deletion,
|
|
one for each file deleted, only the first task is launched. And if while
|
|
this task is doing the cleanup, more attempts are made to delete other
|
|
files, the state of the synctask is adjusted so that it restarts the
|
|
crawl even after reaching end-of-directory to pick up any files it may
|
|
have missed in the previous iteration.
|
|
|
|
This patch also fixes uninitialized lk-owner during syncop_entrylk()
|
|
which was leading to multiple background deletion synctasks entering
|
|
the critical section at the same time and leading to illegal memory access
|
|
of base inode in the second syntcask after it was destroyed post shard deletion
|
|
by the first synctask.
|
|
|
|
Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e
|
|
BUG: 1662059
|
|
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/160437
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
---
|
|
xlators/features/shard/src/shard.c | 199 +++++++++++++++++++----------
|
|
xlators/features/shard/src/shard.h | 12 +-
|
|
2 files changed, 136 insertions(+), 75 deletions(-)
|
|
|
|
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
|
|
index 5b72399f5..8aed1a386 100644
|
|
--- a/xlators/features/shard/src/shard.c
|
|
+++ b/xlators/features/shard/src/shard.c
|
|
@@ -1465,16 +1465,45 @@ int
|
|
shard_start_background_deletion (xlator_t *this)
|
|
{
|
|
int ret = 0;
|
|
+ gf_boolean_t i_cleanup = _gf_true;
|
|
+ shard_priv_t *priv = NULL;
|
|
call_frame_t *cleanup_frame = NULL;
|
|
|
|
+ priv = this->private;
|
|
+
|
|
+ LOCK(&priv->lock);
|
|
+ {
|
|
+ switch (priv->bg_del_state) {
|
|
+ case SHARD_BG_DELETION_NONE:
|
|
+ i_cleanup = _gf_true;
|
|
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
|
|
+ break;
|
|
+ case SHARD_BG_DELETION_LAUNCHING:
|
|
+ i_cleanup = _gf_false;
|
|
+ break;
|
|
+ case SHARD_BG_DELETION_IN_PROGRESS:
|
|
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
|
|
+ i_cleanup = _gf_false;
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ UNLOCK(&priv->lock);
|
|
+ if (!i_cleanup)
|
|
+ return 0;
|
|
+
|
|
cleanup_frame = create_frame (this, this->ctx->pool);
|
|
if (!cleanup_frame) {
|
|
gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
|
|
SHARD_MSG_MEMALLOC_FAILED, "Failed to create "
|
|
"new frame to delete shards");
|
|
- return -ENOMEM;
|
|
+ ret = -ENOMEM;
|
|
+ goto err;
|
|
}
|
|
|
|
+ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
|
|
+
|
|
ret = synctask_new (this->ctx->env, shard_delete_shards,
|
|
shard_delete_shards_cbk, cleanup_frame,
|
|
cleanup_frame);
|
|
@@ -1484,7 +1513,16 @@ shard_start_background_deletion (xlator_t *this)
|
|
"failed to create task to do background "
|
|
"cleanup of shards");
|
|
STACK_DESTROY (cleanup_frame->root);
|
|
+ goto err;
|
|
}
|
|
+ return 0;
|
|
+
|
|
+err:
|
|
+ LOCK(&priv->lock);
|
|
+ {
|
|
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
|
|
+ }
|
|
+ UNLOCK(&priv->lock);
|
|
return ret;
|
|
}
|
|
|
|
@@ -1493,7 +1531,7 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
int32_t op_ret, int32_t op_errno, inode_t *inode,
|
|
struct iatt *buf, dict_t *xdata, struct iatt *postparent)
|
|
{
|
|
- int ret = 0;
|
|
+ int ret = -1;
|
|
shard_priv_t *priv = NULL;
|
|
gf_boolean_t i_start_cleanup = _gf_false;
|
|
|
|
@@ -1526,22 +1564,23 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
|
LOCK (&priv->lock);
|
|
{
|
|
- if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) {
|
|
- priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS;
|
|
+ if (priv->first_lookup_done == _gf_false) {
|
|
+ priv->first_lookup_done = _gf_true;
|
|
i_start_cleanup = _gf_true;
|
|
}
|
|
}
|
|
UNLOCK (&priv->lock);
|
|
|
|
- if (i_start_cleanup) {
|
|
- ret = shard_start_background_deletion (this);
|
|
- if (ret) {
|
|
- LOCK (&priv->lock);
|
|
- {
|
|
- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
|
|
- }
|
|
- UNLOCK (&priv->lock);
|
|
+ if (!i_start_cleanup)
|
|
+ goto unwind;
|
|
+
|
|
+ ret = shard_start_background_deletion(this);
|
|
+ if (ret < 0) {
|
|
+ LOCK(&priv->lock);
|
|
+ {
|
|
+ priv->first_lookup_done = _gf_false;
|
|
}
|
|
+ UNLOCK(&priv->lock);
|
|
}
|
|
unwind:
|
|
SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
|
|
@@ -2940,9 +2979,10 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
|
|
if (ctx->fsync_needed) {
|
|
unref_base_inode++;
|
|
list_del_init (&ctx->to_fsync_list);
|
|
- if (base_inode)
|
|
+ if (base_inode) {
|
|
__shard_inode_ctx_get (base_inode, this, &base_ictx);
|
|
- base_ictx->fsync_count--;
|
|
+ base_ictx->fsync_count--;
|
|
+ }
|
|
}
|
|
}
|
|
UNLOCK(&inode->lock);
|
|
@@ -3334,10 +3374,15 @@ shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this,
|
|
loc.inode = inode_ref (priv->dot_shard_rm_inode);
|
|
|
|
ret = syncop_entrylk (FIRST_CHILD(this), this->name, &loc,
|
|
- entry->d_name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL,
|
|
- NULL);
|
|
- if (ret)
|
|
+ entry->d_name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
|
|
+ NULL, NULL);
|
|
+ if (ret < 0) {
|
|
+ if (ret == -EAGAIN) {
|
|
+ ret = 0;
|
|
+ }
|
|
goto out;
|
|
+ }
|
|
+
|
|
{
|
|
ret = __shard_delete_shards_of_entry (cleanup_frame, this,
|
|
entry, inode);
|
|
@@ -3352,20 +3397,6 @@ out:
|
|
int
|
|
shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data)
|
|
{
|
|
- xlator_t *this = NULL;
|
|
- shard_priv_t *priv = NULL;
|
|
-
|
|
- this = frame->this;
|
|
- priv = this->private;
|
|
-
|
|
- if (ret < 0) {
|
|
- gf_msg (this->name, GF_LOG_WARNING, -ret,
|
|
- SHARD_MSG_SHARDS_DELETION_FAILED,
|
|
- "Background deletion of shards failed");
|
|
- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
|
|
- } else {
|
|
- priv->first_lookup = SHARD_FIRST_LOOKUP_DONE;
|
|
- }
|
|
SHARD_STACK_DESTROY (frame);
|
|
return 0;
|
|
}
|
|
@@ -3482,6 +3513,7 @@ shard_delete_shards (void *opaque)
|
|
gf_dirent_t entries;
|
|
gf_dirent_t *entry = NULL;
|
|
call_frame_t *cleanup_frame = NULL;
|
|
+ gf_boolean_t done = _gf_false;
|
|
|
|
this = THIS;
|
|
priv = this->private;
|
|
@@ -3534,52 +3566,81 @@ shard_delete_shards (void *opaque)
|
|
goto err;
|
|
}
|
|
|
|
- while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, 131072,
|
|
- offset, &entries, local->xattr_req,
|
|
- NULL))) {
|
|
- if (ret > 0)
|
|
- ret = 0;
|
|
- list_for_each_entry (entry, &entries.list, list) {
|
|
- offset = entry->d_off;
|
|
-
|
|
- if (!strcmp (entry->d_name, ".") ||
|
|
- !strcmp (entry->d_name, ".."))
|
|
- continue;
|
|
+ for (;;) {
|
|
+ offset = 0;
|
|
+ LOCK(&priv->lock);
|
|
+ {
|
|
+ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
|
|
+ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
|
|
+ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
|
|
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
|
|
+ done = _gf_true;
|
|
+ }
|
|
+ }
|
|
+ UNLOCK(&priv->lock);
|
|
+ if (done)
|
|
+ break;
|
|
+ while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd,
|
|
+ 131072, offset, &entries,
|
|
+ local->xattr_req, NULL))) {
|
|
+ if (ret > 0)
|
|
+ ret = 0;
|
|
+ list_for_each_entry (entry, &entries.list, list) {
|
|
+ offset = entry->d_off;
|
|
+
|
|
+ if (!strcmp (entry->d_name, ".") ||
|
|
+ !strcmp (entry->d_name, ".."))
|
|
+ continue;
|
|
|
|
- if (!entry->inode) {
|
|
- ret = shard_lookup_marker_entry (this, local,
|
|
- entry);
|
|
- if (ret < 0)
|
|
+ if (!entry->inode) {
|
|
+ ret = shard_lookup_marker_entry (this,
|
|
+ local,
|
|
+ entry);
|
|
+ if (ret < 0)
|
|
+ continue;
|
|
+ }
|
|
+ link_inode = inode_link (entry->inode,
|
|
+ local->fd->inode,
|
|
+ entry->d_name,
|
|
+ &entry->d_stat);
|
|
+
|
|
+ gf_msg_debug (this->name, 0, "Initiating "
|
|
+ "deletion of shards of gfid %s",
|
|
+ entry->d_name);
|
|
+ ret = shard_delete_shards_of_entry (cleanup_frame,
|
|
+ this,
|
|
+ entry,
|
|
+ link_inode);
|
|
+ inode_unlink (link_inode, local->fd->inode,
|
|
+ entry->d_name);
|
|
+ inode_unref (link_inode);
|
|
+ if (ret) {
|
|
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
|
|
+ SHARD_MSG_SHARDS_DELETION_FAILED,
|
|
+ "Failed to clean up shards of "
|
|
+ "gfid %s", entry->d_name);
|
|
continue;
|
|
- }
|
|
- link_inode = inode_link (entry->inode, local->fd->inode,
|
|
- entry->d_name, &entry->d_stat);
|
|
-
|
|
- gf_msg_debug (this->name, 0, "Initiating deletion of "
|
|
- "shards of gfid %s", entry->d_name);
|
|
- ret = shard_delete_shards_of_entry (cleanup_frame, this,
|
|
- entry, link_inode);
|
|
- inode_unlink (link_inode, local->fd->inode,
|
|
- entry->d_name);
|
|
- inode_unref (link_inode);
|
|
- if (ret) {
|
|
- gf_msg (this->name, GF_LOG_ERROR, -ret,
|
|
- SHARD_MSG_SHARDS_DELETION_FAILED,
|
|
- "Failed to clean up shards of gfid %s",
|
|
+ }
|
|
+ gf_msg (this->name, GF_LOG_INFO, 0,
|
|
+ SHARD_MSG_SHARDS_DELETION_COMPLETED,
|
|
+ "Deleted shards of gfid=%s from backend",
|
|
entry->d_name);
|
|
- continue;
|
|
}
|
|
- gf_msg (this->name, GF_LOG_INFO, 0,
|
|
- SHARD_MSG_SHARDS_DELETION_COMPLETED, "Deleted "
|
|
- "shards of gfid=%s from backend",
|
|
- entry->d_name);
|
|
+ gf_dirent_free (&entries);
|
|
+ if (ret)
|
|
+ break;
|
|
}
|
|
- gf_dirent_free (&entries);
|
|
- if (ret)
|
|
- break;
|
|
}
|
|
ret = 0;
|
|
+ loc_wipe(&loc);
|
|
+ return ret;
|
|
+
|
|
err:
|
|
+ LOCK(&priv->lock);
|
|
+ {
|
|
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
|
|
+ }
|
|
+ UNLOCK(&priv->lock);
|
|
loc_wipe (&loc);
|
|
return ret;
|
|
}
|
|
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
|
|
index ac3813c8c..37934f3a2 100644
|
|
--- a/xlators/features/shard/src/shard.h
|
|
+++ b/xlators/features/shard/src/shard.h
|
|
@@ -196,11 +196,10 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
|
|
} while (0)
|
|
|
|
typedef enum {
|
|
- SHARD_FIRST_LOOKUP_PENDING = 0,
|
|
- SHARD_FIRST_LOOKUP_IN_PROGRESS,
|
|
- SHARD_FIRST_LOOKUP_DONE,
|
|
-} shard_first_lookup_state_t;
|
|
-
|
|
+ SHARD_BG_DELETION_NONE = 0,
|
|
+ SHARD_BG_DELETION_LAUNCHING,
|
|
+ SHARD_BG_DELETION_IN_PROGRESS,
|
|
+} shard_bg_deletion_state_t;
|
|
/* rm = "remove me" */
|
|
|
|
typedef struct shard_priv {
|
|
@@ -213,7 +212,8 @@ typedef struct shard_priv {
|
|
int inode_count;
|
|
struct list_head ilist_head;
|
|
uint32_t deletion_rate;
|
|
- shard_first_lookup_state_t first_lookup;
|
|
+ shard_bg_deletion_state_t bg_del_state;
|
|
+ gf_boolean_t first_lookup_done;
|
|
uint64_t lru_limit;
|
|
} shard_priv_t;
|
|
|
|
--
|
|
2.20.1
|
|
|