7674b73703
Resolves: bz#1668304 bz#1669020 Signed-off-by: Milind Changire <mchangir@redhat.com>
184 lines
7.1 KiB
Diff
184 lines
7.1 KiB
Diff
From f4d1a1683882a4da81240413dae1f6a390ee2442 Mon Sep 17 00:00:00 2001
|
|
From: Krutika Dhananjay <kdhananj@redhat.com>
|
|
Date: Thu, 24 Jan 2019 14:14:39 +0530
|
|
Subject: [PATCH 510/510] features/shard: Ref shard inode while adding to fsync
|
|
list
|
|
|
|
> Upstream: https://review.gluster.org/22091
|
|
> BUG: 1669077
|
|
> Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2
|
|
|
|
PROBLEM:
|
|
|
|
Lot of the earlier changes in the management of shards in lru, fsync
|
|
lists assumed that if a given shard exists in fsync list, it must be
|
|
part of lru list as well. This was found to be not true.
|
|
|
|
Consider this - a file is FALLOCATE'd to a size which would make the
|
|
number of participant shards to be greater than the lru list size.
|
|
In this case, some of the resolved shards that are to participate in
|
|
this fop will be evicted from lru list to give way to the rest of the
|
|
shards. And once FALLOCATE completes, these shards are added to fsync
|
|
list but without a ref. After the fop completes, these shard inodes
|
|
are unref'd and destroyed while their inode ctxs are still part of
|
|
fsync list. Now when an FSYNC is called on the base file and the
|
|
fsync-list traversed, the client crashes due to illegal memory access.
|
|
|
|
FIX:
|
|
|
|
Hold a ref on the shard inode when adding to fsync list as well.
|
|
And unref under following conditions:
|
|
1. when the shard is evicted from lru list
|
|
2. when the base file is fsync'd
|
|
3. when the shards are deleted.
|
|
|
|
Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2
|
|
BUG: 1668304
|
|
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/161397
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
---
|
|
tests/bugs/shard/bug-1669077.t | 29 +++++++++++++++++++++++++++++
|
|
xlators/features/shard/src/shard.c | 29 +++++++++++++++++++++--------
|
|
2 files changed, 50 insertions(+), 8 deletions(-)
|
|
create mode 100644 tests/bugs/shard/bug-1669077.t
|
|
|
|
diff --git a/tests/bugs/shard/bug-1669077.t b/tests/bugs/shard/bug-1669077.t
|
|
new file mode 100644
|
|
index 000000000..8d3a67a36
|
|
--- /dev/null
|
|
+++ b/tests/bugs/shard/bug-1669077.t
|
|
@@ -0,0 +1,29 @@
|
|
+#!/bin/bash
|
|
+
|
|
+. $(dirname $0)/../../include.rc
|
|
+. $(dirname $0)/../../volume.rc
|
|
+
|
|
+SHARD_COUNT_TIME=5
|
|
+
|
|
+cleanup
|
|
+
|
|
+TEST glusterd
|
|
+TEST pidof glusterd
|
|
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
|
|
+TEST $CLI volume set $V0 features.shard on
|
|
+TEST $CLI volume set $V0 features.shard-block-size 4MB
|
|
+TEST $CLI volume set $V0 features.shard-lru-limit 25
|
|
+
|
|
+TEST $CLI volume start $V0
|
|
+
|
|
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
|
|
+
|
|
+# If the bug still exists, client should crash during fallocate below
|
|
+TEST fallocate -l 200M $M0/foo
|
|
+
|
|
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
+
|
|
+TEST $CLI volume stop $V0
|
|
+TEST $CLI volume delete $V0
|
|
+
|
|
+cleanup
|
|
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
|
|
index 19dd3e4ba..cd388500d 100644
|
|
--- a/xlators/features/shard/src/shard.c
|
|
+++ b/xlators/features/shard/src/shard.c
|
|
@@ -272,6 +272,7 @@ shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this,
|
|
* of the to_fsync_list.
|
|
*/
|
|
inode_ref (base_inode);
|
|
+ inode_ref(shard_inode);
|
|
|
|
LOCK (&base_inode->lock);
|
|
LOCK (&shard_inode->lock);
|
|
@@ -285,8 +286,10 @@ shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this,
|
|
/* Unref the base inode corresponding to the ref above, if the shard is
|
|
* found to be already part of the fsync list.
|
|
*/
|
|
- if (ret != 0)
|
|
+ if (ret != 0) {
|
|
inode_unref (base_inode);
|
|
+ inode_unref(shard_inode);
|
|
+ }
|
|
return ret;
|
|
}
|
|
|
|
@@ -735,6 +738,10 @@ after_fsync_check:
|
|
block_bname);
|
|
inode_forget (lru_inode, 0);
|
|
} else {
|
|
+ /* The following unref corresponds to the ref
|
|
+ * held when the shard was added to fsync list.
|
|
+ */
|
|
+ inode_unref(lru_inode);
|
|
fsync_inode = lru_inode;
|
|
if (lru_base_inode)
|
|
inode_unref (lru_base_inode);
|
|
@@ -2947,7 +2954,7 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
|
|
shard_priv_t *priv = NULL;
|
|
shard_inode_ctx_t *ctx = NULL;
|
|
shard_inode_ctx_t *base_ictx = NULL;
|
|
- gf_boolean_t unlink_unref_forget = _gf_false;
|
|
+ int unref_shard_inode = 0;
|
|
|
|
this = THIS;
|
|
priv = this->private;
|
|
@@ -2973,11 +2980,12 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
|
|
list_del_init (&ctx->ilist);
|
|
priv->inode_count--;
|
|
unref_base_inode++;
|
|
+ unref_shard_inode++;
|
|
GF_ASSERT (priv->inode_count >= 0);
|
|
- unlink_unref_forget = _gf_true;
|
|
}
|
|
if (ctx->fsync_needed) {
|
|
unref_base_inode++;
|
|
+ unref_shard_inode++;
|
|
list_del_init (&ctx->to_fsync_list);
|
|
if (base_inode) {
|
|
__shard_inode_ctx_get (base_inode, this, &base_ictx);
|
|
@@ -2988,11 +2996,11 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
|
|
UNLOCK(&inode->lock);
|
|
if (base_inode)
|
|
UNLOCK(&base_inode->lock);
|
|
- if (unlink_unref_forget) {
|
|
- inode_unlink (inode, priv->dot_shard_inode, block_bname);
|
|
- inode_unref (inode);
|
|
- inode_forget (inode, 0);
|
|
- }
|
|
+
|
|
+ inode_unlink(inode, priv->dot_shard_inode, block_bname);
|
|
+ inode_ref_reduce_by_n(inode, unref_shard_inode);
|
|
+ inode_forget(inode, 0);
|
|
+
|
|
if (base_inode && unref_base_inode)
|
|
inode_ref_reduce_by_n (base_inode, unref_base_inode);
|
|
UNLOCK(&priv->lock);
|
|
@@ -5824,6 +5832,7 @@ shard_fsync_shards_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
shard_inode_ctx_t *ctx = NULL;
|
|
shard_inode_ctx_t *base_ictx = NULL;
|
|
inode_t *base_inode = NULL;
|
|
+ gf_boolean_t unref_shard_inode = _gf_false;
|
|
|
|
local = frame->local;
|
|
base_inode = local->fd->inode;
|
|
@@ -5858,11 +5867,15 @@ out:
|
|
list_add_tail (&ctx->to_fsync_list,
|
|
&base_ictx->to_fsync_list);
|
|
base_ictx->fsync_count++;
|
|
+ } else {
|
|
+ unref_shard_inode = _gf_true;
|
|
}
|
|
}
|
|
UNLOCK (&anon_fd->inode->lock);
|
|
UNLOCK (&base_inode->lock);
|
|
}
|
|
+ if (unref_shard_inode)
|
|
+ inode_unref(anon_fd->inode);
|
|
if (anon_fd)
|
|
fd_unref (anon_fd);
|
|
|
|
--
|
|
2.20.1
|
|
|