44012ad580
Resolves: bz#1378371 bz#1384983 bz#1472445 bz#1493085 bz#1508999 Resolves: bz#1516638 bz#1518260 bz#1529072 bz#1530519 bz#1537357 Resolves: bz#1540908 bz#1541122 bz#1541932 bz#1543068 bz#1544382 Resolves: bz#1544852 bz#1545570 bz#1546075 bz#1546945 bz#1546960 Resolves: bz#1547012 bz#1549497 Signed-off-by: Milind Changire <mchangir@redhat.com>
888 lines
35 KiB
Diff
888 lines
35 KiB
Diff
From 1c094d1dc70dbc5d08181ef64fb300c95f331aec Mon Sep 17 00:00:00 2001
|
|
From: Krutika Dhananjay <kdhananj@redhat.com>
|
|
Date: Wed, 6 Dec 2017 16:55:33 +0530
|
|
Subject: [PATCH 171/180] features/shard: Upon FSYNC from upper layers, wind
|
|
fsync on all changed shards
|
|
|
|
> Upstream: https://review.gluster.org/19566
|
|
> BUG: 1468483
|
|
> Change-Id: Ib74354f57a18569762ad45a51f182822a2537421
|
|
|
|
Change-Id: I93797a60e3449d02413d171babfb8e4292e3f2f6
|
|
BUG: 1493085
|
|
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/131737
|
|
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
---
|
|
tests/bugs/shard/bug-1468483.t | 58 +++
|
|
tests/bugs/shard/shard-inode-refcount-test.t | 2 +-
|
|
xlators/features/shard/src/shard-messages.h | 9 +-
|
|
xlators/features/shard/src/shard.c | 533 +++++++++++++++++++++++++--
|
|
xlators/features/shard/src/shard.h | 6 +
|
|
5 files changed, 569 insertions(+), 39 deletions(-)
|
|
create mode 100644 tests/bugs/shard/bug-1468483.t
|
|
|
|
diff --git a/tests/bugs/shard/bug-1468483.t b/tests/bugs/shard/bug-1468483.t
|
|
new file mode 100644
|
|
index 0000000..e462b8d
|
|
--- /dev/null
|
|
+++ b/tests/bugs/shard/bug-1468483.t
|
|
@@ -0,0 +1,58 @@
|
|
+#!/bin/bash
|
|
+
|
|
+. $(dirname $0)/../../include.rc
|
|
+. $(dirname $0)/../../volume.rc
|
|
+. $(dirname $0)/../../common-utils.rc
|
|
+
|
|
+cleanup
|
|
+
|
|
+TEST glusterd
|
|
+TEST pidof glusterd
|
|
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
|
|
+TEST $CLI volume set $V0 performance.write-behind off
|
|
+TEST $CLI volume set $V0 features.shard on
|
|
+TEST $CLI volume set $V0 features.shard-block-size 16MB
|
|
+TEST $CLI volume start $V0
|
|
+TEST $CLI volume profile $V0 start
|
|
+
|
|
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
|
|
+TEST dd if=/dev/zero conv=fsync of=$M0/foo bs=1M count=100
|
|
+
|
|
+#This should ensure /.shard is created on the bricks.
|
|
+TEST stat $B0/${V0}0/.shard
|
|
+
|
|
+gfid_foo=$(get_gfid_string $M0/foo)
|
|
+
|
|
+TEST stat $B0/${V0}0/.shard/$gfid_foo.1
|
|
+TEST stat $B0/${V0}0/.shard/$gfid_foo.2
|
|
+TEST stat $B0/${V0}0/.shard/$gfid_foo.3
|
|
+TEST stat $B0/${V0}0/.shard/$gfid_foo.4
|
|
+TEST stat $B0/${V0}0/.shard/$gfid_foo.5
|
|
+TEST stat $B0/${V0}0/.shard/$gfid_foo.6
|
|
+
|
|
+# For a file with 7 shards, there should be 7 fsyncs on the brick. Without this
|
|
+# fix, I was seeing only 1 fsync (on the base shard alone).
|
|
+
|
|
+EXPECT "7" echo `$CLI volume profile $V0 info incremental | grep -w FSYNC | awk '{print $8}'`
|
|
+
|
|
+useradd -M test_user 2>/dev/null
|
|
+
|
|
+TEST touch $M0/bar
|
|
+
|
|
+# Change ownership to non-root on bar.
|
|
+TEST chown test_user:test_user $M0/bar
|
|
+
|
|
+TEST $CLI volume profile $V0 stop
|
|
+TEST $CLI volume profile $V0 start
|
|
+
|
|
+# Write 100M of data on bar as non-root.
|
|
+TEST run_cmd_as_user test_user "dd if=/dev/zero conv=fsync of=$M0/bar bs=1M count=100"
|
|
+
|
|
+EXPECT "7" echo `$CLI volume profile $V0 info incremental | grep -w FSYNC | awk '{print $8}'`
|
|
+
|
|
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
+userdel test_user
|
|
+TEST $CLI volume stop $V0
|
|
+TEST $CLI volume delete $V0
|
|
+
|
|
+cleanup
|
|
diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t
|
|
index 6358097..03e0cc9 100644
|
|
--- a/tests/bugs/shard/shard-inode-refcount-test.t
|
|
+++ b/tests/bugs/shard/shard-inode-refcount-test.t
|
|
@@ -14,7 +14,7 @@ TEST $CLI volume start $V0
|
|
|
|
TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
|
|
|
|
-TEST dd if=/dev/zero of=$M0/one-plus-five-shards bs=1M count=23
|
|
+TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23
|
|
|
|
ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0)
|
|
TEST rm -f $M0/one-plus-five-shards
|
|
diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h
|
|
index 588cb68..8e61630 100644
|
|
--- a/xlators/features/shard/src/shard-messages.h
|
|
+++ b/xlators/features/shard/src/shard-messages.h
|
|
@@ -40,7 +40,7 @@
|
|
*/
|
|
|
|
#define GLFS_COMP_BASE_SHARD GLFS_MSGID_COMP_SHARD
|
|
-#define GLFS_NUM_MESSAGES 18
|
|
+#define GLFS_NUM_MESSAGES 19
|
|
#define GLFS_MSGID_END (GLFS_COMP_BASE_SHARD + GLFS_NUM_MESSAGES + 1)
|
|
|
|
#define glfs_msg_start_x GLFS_COMP_BASE_SHARD, "Invalid: Start of messages"
|
|
@@ -180,5 +180,12 @@
|
|
*/
|
|
#define SHARD_MSG_INVALID_FOP (GLFS_COMP_BASE_SHARD + 18)
|
|
|
|
+/*!
|
|
+ * @messageid 133019
|
|
+ * @diagnosis
|
|
+ * @recommendedaction
|
|
+*/
|
|
+#define SHARD_MSG_MEMALLOC_FAILED (GLFS_COMP_BASE_SHARD + 19)
|
|
+
|
|
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
|
|
#endif /* !_SHARD_MESSAGES_H_ */
|
|
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
|
|
index 49cf04a..a661345 100644
|
|
--- a/xlators/features/shard/src/shard.c
|
|
+++ b/xlators/features/shard/src/shard.c
|
|
@@ -76,6 +76,7 @@ __shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
|
|
return ret;
|
|
|
|
INIT_LIST_HEAD (&ctx_p->ilist);
|
|
+ INIT_LIST_HEAD (&ctx_p->to_fsync_list);
|
|
|
|
ret = __inode_ctx_set (inode, this, (uint64_t *)&ctx_p);
|
|
if (ret < 0) {
|
|
@@ -205,6 +206,65 @@ shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)
|
|
return ret;
|
|
}
|
|
|
|
+int
|
|
+__shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this,
|
|
+ inode_t *shard_inode)
|
|
+{
|
|
+ int ret = -1;
|
|
+ shard_inode_ctx_t *base_ictx = NULL;
|
|
+ shard_inode_ctx_t *shard_ictx = NULL;
|
|
+
|
|
+ ret = __shard_inode_ctx_get (base_inode, this, &base_ictx);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ret = __shard_inode_ctx_get (shard_inode, this, &shard_ictx);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ if (shard_ictx->fsync_needed) {
|
|
+ shard_ictx->fsync_needed++;
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ list_add_tail (&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list);
|
|
+ shard_ictx->inode = shard_inode;
|
|
+ shard_ictx->fsync_needed++;
|
|
+ base_ictx->fsync_count++;
|
|
+ shard_ictx->base_inode = base_inode;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this,
|
|
+ inode_t *shard_inode)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ /* This ref acts as a refkeepr on the base inode. We
|
|
+ * need to keep this inode alive as it holds the head
|
|
+ * of the to_fsync_list.
|
|
+ */
|
|
+ inode_ref (base_inode);
|
|
+
|
|
+ LOCK (&base_inode->lock);
|
|
+ LOCK (&shard_inode->lock);
|
|
+ {
|
|
+ ret = __shard_inode_ctx_add_to_fsync_list (base_inode, this,
|
|
+ shard_inode);
|
|
+ }
|
|
+ UNLOCK (&shard_inode->lock);
|
|
+ UNLOCK (&base_inode->lock);
|
|
+
|
|
+ /* Unref the base inode corresponding to the ref above, if the shard is
|
|
+ * found to be already part of the fsync list.
|
|
+ */
|
|
+ if (ret != 0)
|
|
+ inode_unref (base_inode);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
gf_boolean_t
|
|
__shard_inode_ctx_needs_lookup (inode_t *inode, xlator_t *this)
|
|
{
|
|
@@ -301,6 +361,40 @@ shard_inode_ctx_get_block_size (inode_t *inode, xlator_t *this,
|
|
}
|
|
|
|
int
|
|
+__shard_inode_ctx_get_fsync_count (inode_t *inode, xlator_t *this,
|
|
+ int *fsync_count)
|
|
+{
|
|
+ int ret = -1;
|
|
+ uint64_t ctx_uint = 0;
|
|
+ shard_inode_ctx_t *ctx = NULL;
|
|
+
|
|
+ ret = __inode_ctx_get (inode, this, &ctx_uint);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ ctx = (shard_inode_ctx_t *) ctx_uint;
|
|
+
|
|
+ *fsync_count = ctx->fsync_needed;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+shard_inode_ctx_get_fsync_count (inode_t *inode, xlator_t *this,
|
|
+ int *fsync_count)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ LOCK (&inode->lock);
|
|
+ {
|
|
+ ret = __shard_inode_ctx_get_fsync_count (inode, this,
|
|
+ fsync_count);
|
|
+ }
|
|
+ UNLOCK (&inode->lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+int
|
|
__shard_inode_ctx_get_all (inode_t *inode, xlator_t *this,
|
|
shard_inode_ctx_t *ctx_out)
|
|
{
|
|
@@ -482,15 +576,19 @@ out:
|
|
return ret;
|
|
}
|
|
|
|
-void
|
|
+inode_t *
|
|
__shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
|
|
inode_t *base_inode, int block_num)
|
|
{
|
|
- char block_bname[256] = {0,};
|
|
- inode_t *lru_inode = NULL;
|
|
- shard_priv_t *priv = NULL;
|
|
- shard_inode_ctx_t *ctx = NULL;
|
|
- shard_inode_ctx_t *lru_inode_ctx = NULL;
|
|
+ char block_bname[256] = {0,};
|
|
+ inode_t *lru_inode = NULL;
|
|
+ shard_priv_t *priv = NULL;
|
|
+ shard_inode_ctx_t *ctx = NULL;
|
|
+ shard_inode_ctx_t *lru_inode_ctx = NULL;
|
|
+ shard_inode_ctx_t *lru_base_inode_ctx = NULL;
|
|
+ inode_t *fsync_inode = NULL;
|
|
+ inode_t *lru_base_inode = NULL;
|
|
+ gf_boolean_t do_fsync = _gf_false;
|
|
|
|
priv = this->private;
|
|
|
|
@@ -510,6 +608,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
|
|
ctx->block_num = block_num;
|
|
list_add_tail (&ctx->ilist, &priv->ilist_head);
|
|
priv->inode_count++;
|
|
+ ctx->base_inode = base_inode;
|
|
} else {
|
|
/*If on the other hand there is no available slot for this inode
|
|
* in the list, delete the lru inode from the head of the list,
|
|
@@ -519,30 +618,56 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
|
|
shard_inode_ctx_t,
|
|
ilist);
|
|
GF_ASSERT (lru_inode_ctx->block_num > 0);
|
|
+ lru_base_inode = lru_inode_ctx->base_inode;
|
|
list_del_init (&lru_inode_ctx->ilist);
|
|
lru_inode = inode_find (linked_inode->table,
|
|
lru_inode_ctx->stat.ia_gfid);
|
|
- shard_make_block_bname (lru_inode_ctx->block_num,
|
|
- lru_inode_ctx->base_gfid,
|
|
- block_bname,
|
|
- sizeof (block_bname));
|
|
- inode_unlink (lru_inode, priv->dot_shard_inode,
|
|
- block_bname);
|
|
- /* The following unref corresponds to the ref held by
|
|
- * inode_find() above.
|
|
+ /* If the lru inode was part of the pending-fsync list,
|
|
+ * the base inode needs to be unref'd, the lru inode
|
|
+ * deleted from fsync list and fsync'd in a new frame,
|
|
+ * and then unlinked in memory and forgotten.
|
|
*/
|
|
- inode_unref (lru_inode);
|
|
+ LOCK (&lru_base_inode->lock);
|
|
+ LOCK (&lru_inode->lock);
|
|
+ {
|
|
+ if (!list_empty(&lru_inode_ctx->to_fsync_list)) {
|
|
+ list_del_init (&lru_inode_ctx->to_fsync_list);
|
|
+ lru_inode_ctx->fsync_needed = 0;
|
|
+ do_fsync = _gf_true;
|
|
+ __shard_inode_ctx_get (lru_base_inode, this, &lru_base_inode_ctx);
|
|
+ lru_base_inode_ctx->fsync_count--;
|
|
+ }
|
|
+ }
|
|
+ UNLOCK (&lru_inode->lock);
|
|
+ UNLOCK (&lru_base_inode->lock);
|
|
+
|
|
+ if (!do_fsync) {
|
|
+ shard_make_block_bname (lru_inode_ctx->block_num,
|
|
+ lru_inode_ctx->base_gfid,
|
|
+ block_bname,
|
|
+ sizeof (block_bname));
|
|
/* The following unref corresponds to the ref held at
|
|
- * the time the shard was created or looked up
|
|
+ * the time the shard was added to the lru list.
|
|
+ */
|
|
+ inode_unref (lru_inode);
|
|
+ inode_unlink (lru_inode, priv->dot_shard_inode,
|
|
+ block_bname);
|
|
+ inode_forget (lru_inode, 0);
|
|
+ } else {
|
|
+ fsync_inode = lru_inode;
|
|
+ inode_unref (lru_base_inode);
|
|
+ }
|
|
+ /* The following unref corresponds to the ref
|
|
+ * held by inode_find() above.
|
|
*/
|
|
inode_unref (lru_inode);
|
|
- inode_forget (lru_inode, 0);
|
|
/* For as long as an inode is in lru list, we try to
|
|
* keep it alive by holding a ref on it.
|
|
*/
|
|
inode_ref (linked_inode);
|
|
gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
|
|
ctx->block_num = block_num;
|
|
+ ctx->base_inode = base_inode;
|
|
list_add_tail (&ctx->ilist, &priv->ilist_head);
|
|
}
|
|
} else {
|
|
@@ -551,6 +676,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
|
|
*/
|
|
list_move_tail (&ctx->ilist, &priv->ilist_head);
|
|
}
|
|
+ return fsync_inode;
|
|
}
|
|
|
|
int
|
|
@@ -617,6 +743,85 @@ shard_common_inode_write_success_unwind (glusterfs_fop_t fop,
|
|
}
|
|
|
|
int
|
|
+shard_evicted_inode_fsync_cbk (call_frame_t *frame, void *cookie,
|
|
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
|
|
+ struct iatt *prebuf, struct iatt *postbuf,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ char block_bname[256] = {0,};
|
|
+ fd_t *anon_fd = cookie;
|
|
+ inode_t *shard_inode = NULL;
|
|
+ shard_inode_ctx_t *ctx = NULL;
|
|
+ shard_priv_t *priv = NULL;
|
|
+
|
|
+ priv = this->private;
|
|
+ shard_inode = anon_fd->inode;
|
|
+
|
|
+ if (op_ret < 0) {
|
|
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
|
|
+ SHARD_MSG_MEMALLOC_FAILED, "fsync failed on shard");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ LOCK (&priv->lock);
|
|
+ LOCK(&shard_inode->lock);
|
|
+ {
|
|
+ __shard_inode_ctx_get (shard_inode, this, &ctx);
|
|
+ if ((list_empty(&ctx->to_fsync_list)) &&
|
|
+ (list_empty(&ctx->ilist))) {
|
|
+ shard_make_block_bname (ctx->block_num,
|
|
+ shard_inode->gfid, block_bname,
|
|
+ sizeof (block_bname));
|
|
+ inode_unlink (shard_inode, priv->dot_shard_inode,
|
|
+ block_bname);
|
|
+ /* The following unref corresponds to the ref held by
|
|
+ * inode_link() at the time the shard was created or
|
|
+ * looked up
|
|
+ */
|
|
+ inode_unref (shard_inode);
|
|
+ inode_forget (shard_inode, 0);
|
|
+ }
|
|
+ }
|
|
+ UNLOCK(&shard_inode->lock);
|
|
+ UNLOCK(&priv->lock);
|
|
+
|
|
+out:
|
|
+ if (anon_fd)
|
|
+ fd_unref (anon_fd);
|
|
+ STACK_DESTROY (frame->root);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+shard_initiate_evicted_inode_fsync (xlator_t *this, inode_t *inode)
|
|
+{
|
|
+ fd_t *anon_fd = NULL;
|
|
+ call_frame_t *fsync_frame = NULL;
|
|
+
|
|
+ fsync_frame = create_frame (this, this->ctx->pool);
|
|
+ if (!fsync_frame) {
|
|
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
|
|
+ SHARD_MSG_MEMALLOC_FAILED, "Failed to create new frame "
|
|
+ "to fsync shard");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ anon_fd = fd_anonymous (inode);
|
|
+ if (!anon_fd) {
|
|
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
|
|
+ SHARD_MSG_MEMALLOC_FAILED, "Failed to create anon fd to"
|
|
+ " fsync shard");
|
|
+ STACK_DESTROY (fsync_frame->root);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ STACK_WIND_COOKIE (fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd,
|
|
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
|
|
+ anon_fd, 1, NULL);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
|
|
shard_post_resolve_fop_handler_t post_res_handler)
|
|
{
|
|
@@ -625,6 +830,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
|
|
char path[PATH_MAX] = {0,};
|
|
inode_t *inode = NULL;
|
|
inode_t *res_inode = NULL;
|
|
+ inode_t *fsync_inode = NULL;
|
|
shard_priv_t *priv = NULL;
|
|
shard_local_t *local = NULL;
|
|
|
|
@@ -661,20 +867,22 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
|
|
*/
|
|
LOCK(&priv->lock);
|
|
{
|
|
- __shard_update_shards_inode_list (inode, this,
|
|
+ fsync_inode = __shard_update_shards_inode_list (inode,
|
|
+ this,
|
|
res_inode,
|
|
shard_idx_iter);
|
|
}
|
|
UNLOCK(&priv->lock);
|
|
shard_idx_iter++;
|
|
-
|
|
+ if (fsync_inode)
|
|
+ shard_initiate_evicted_inode_fsync (this,
|
|
+ fsync_inode);
|
|
continue;
|
|
} else {
|
|
local->call_count++;
|
|
shard_idx_iter++;
|
|
}
|
|
}
|
|
-
|
|
out:
|
|
post_res_handler (frame, this);
|
|
return 0;
|
|
@@ -1657,6 +1865,7 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode,
|
|
char block_bname[256] = {0,};
|
|
inode_t *linked_inode = NULL;
|
|
xlator_t *this = NULL;
|
|
+ inode_t *fsync_inode = NULL;
|
|
shard_priv_t *priv = NULL;
|
|
|
|
this = THIS;
|
|
@@ -1674,10 +1883,14 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode,
|
|
|
|
LOCK(&priv->lock);
|
|
{
|
|
- __shard_update_shards_inode_list (linked_inode, this,
|
|
- local->loc.inode, block_num);
|
|
+ fsync_inode = __shard_update_shards_inode_list (linked_inode,
|
|
+ this,
|
|
+ local->loc.inode,
|
|
+ block_num);
|
|
}
|
|
UNLOCK(&priv->lock);
|
|
+ if (fsync_inode)
|
|
+ shard_initiate_evicted_inode_fsync (this, fsync_inode);
|
|
}
|
|
|
|
int
|
|
@@ -2120,6 +2333,7 @@ shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
|
|
local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
|
|
if (!local->xattr_req)
|
|
goto err;
|
|
+ local->resolver_base_inode = loc->inode;
|
|
|
|
shard_lookup_base_file (frame, this, &local->loc,
|
|
shard_post_lookup_truncate_handler);
|
|
@@ -2172,6 +2386,7 @@ shard_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
|
|
|
|
local->loc.inode = inode_ref (fd->inode);
|
|
gf_uuid_copy (local->loc.gfid, fd->inode->gfid);
|
|
+ local->resolver_base_inode = fd->inode;
|
|
|
|
shard_lookup_base_file (frame, this, &local->loc,
|
|
shard_post_lookup_truncate_handler);
|
|
@@ -2509,32 +2724,48 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
|
|
{
|
|
char block_bname[256] = {0,};
|
|
inode_t *inode = NULL;
|
|
+ inode_t *base_inode = NULL;
|
|
xlator_t *this = NULL;
|
|
shard_priv_t *priv = NULL;
|
|
shard_inode_ctx_t *ctx = NULL;
|
|
+ shard_inode_ctx_t *base_ictx = NULL;
|
|
+ gf_boolean_t unlink_unref_forget = _gf_false;
|
|
|
|
this = THIS;
|
|
priv = this->private;
|
|
|
|
inode = local->inode_list[shard_block_num - local->first_block];
|
|
+ base_inode = local->resolver_base_inode;
|
|
|
|
shard_make_block_bname (shard_block_num, (local->loc.inode)->gfid,
|
|
block_bname, sizeof (block_bname));
|
|
|
|
LOCK(&priv->lock);
|
|
+ LOCK(&base_inode->lock);
|
|
+ LOCK(&inode->lock);
|
|
{
|
|
- shard_inode_ctx_get (inode, this, &ctx);
|
|
+ __shard_inode_ctx_get (inode, this, &ctx);
|
|
if (!list_empty (&ctx->ilist)) {
|
|
list_del_init (&ctx->ilist);
|
|
priv->inode_count--;
|
|
GF_ASSERT (priv->inode_count >= 0);
|
|
- inode_unlink (inode, priv->dot_shard_inode, block_bname);
|
|
- inode_unref (inode);
|
|
- inode_forget (inode, 0);
|
|
+ unlink_unref_forget = _gf_true;
|
|
+ }
|
|
+ if (ctx->fsync_needed) {
|
|
+ inode_unref (base_inode);
|
|
+ list_del_init (&ctx->to_fsync_list);
|
|
+ __shard_inode_ctx_get (base_inode, this, &base_ictx);
|
|
+ base_ictx->fsync_count--;
|
|
}
|
|
}
|
|
+ UNLOCK(&inode->lock);
|
|
+ UNLOCK(&base_inode->lock);
|
|
+ if (unlink_unref_forget) {
|
|
+ inode_unlink (inode, priv->dot_shard_inode, block_bname);
|
|
+ inode_unref (inode);
|
|
+ inode_forget (inode, 0);
|
|
+ }
|
|
UNLOCK(&priv->lock);
|
|
-
|
|
}
|
|
|
|
int
|
|
@@ -2752,6 +2983,7 @@ shard_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
|
|
local->xflag = xflag;
|
|
local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
|
|
local->block_size = block_size;
|
|
+ local->resolver_base_inode = loc->inode;
|
|
local->fop = GF_FOP_UNLINK;
|
|
if (!this->itable)
|
|
this->itable = (local->loc.inode)->table;
|
|
@@ -2988,6 +3220,7 @@ shard_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
|
|
frame->local = local;
|
|
loc_copy (&local->loc, oldloc);
|
|
loc_copy (&local->loc2, newloc);
|
|
+ local->resolver_base_inode = newloc->inode;
|
|
local->fop = GF_FOP_RENAME;
|
|
local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new();
|
|
if (!local->xattr_req)
|
|
@@ -3754,6 +3987,10 @@ shard_common_inode_write_do_cbk (call_frame_t *frame, void *cookie,
|
|
local->delta_size += (post->ia_size - pre->ia_size);
|
|
shard_inode_ctx_set (local->fd->inode, this, post, 0,
|
|
SHARD_MASK_TIMES);
|
|
+ if (local->fd->inode != anon_fd->inode)
|
|
+ shard_inode_ctx_add_to_fsync_list (local->fd->inode,
|
|
+ this,
|
|
+ anon_fd->inode);
|
|
}
|
|
}
|
|
UNLOCK (&frame->lock);
|
|
@@ -4204,18 +4441,198 @@ shard_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
|
|
}
|
|
|
|
int
|
|
-shard_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
|
|
- struct iatt *postbuf, dict_t *xdata)
|
|
+__shard_get_timestamps_from_inode_ctx (shard_local_t *local, inode_t *inode,
|
|
+ xlator_t *this)
|
|
{
|
|
- if (op_ret < 0)
|
|
+ int ret = -1;
|
|
+ uint64_t ctx_uint = 0;
|
|
+ shard_inode_ctx_t *ctx = NULL;
|
|
+
|
|
+ ret = __inode_ctx_get (inode, this, &ctx_uint);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ ctx = (shard_inode_ctx_t *) ctx_uint;
|
|
+
|
|
+ local->postbuf.ia_ctime = ctx->stat.ia_ctime;
|
|
+ local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec;
|
|
+ local->postbuf.ia_atime = ctx->stat.ia_atime;
|
|
+ local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec;
|
|
+ local->postbuf.ia_mtime = ctx->stat.ia_mtime;
|
|
+ local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+shard_get_timestamps_from_inode_ctx (shard_local_t *local, inode_t *inode,
|
|
+ xlator_t *this)
|
|
+{
|
|
+ int ret = 0;
|
|
+
|
|
+ LOCK (&inode->lock);
|
|
+ {
|
|
+ ret = __shard_get_timestamps_from_inode_ctx (local, inode,
|
|
+ this);
|
|
+ }
|
|
+ UNLOCK (&inode->lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+shard_fsync_shards_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
|
|
+ struct iatt *postbuf, dict_t *xdata)
|
|
+{
|
|
+ int call_count = 0;
|
|
+ uint64_t fsync_count = 0;
|
|
+ fd_t *anon_fd = cookie;
|
|
+ shard_local_t *local = NULL;
|
|
+ shard_inode_ctx_t *ctx = NULL;
|
|
+ shard_inode_ctx_t *base_ictx = NULL;
|
|
+ inode_t *base_inode = NULL;
|
|
+
|
|
+ local = frame->local;
|
|
+ base_inode = local->fd->inode;
|
|
+
|
|
+ if (local->op_ret < 0)
|
|
goto out;
|
|
|
|
- /* To-Do: Wind fsync on all shards of the file */
|
|
- postbuf->ia_ctime = 0;
|
|
+ LOCK (&frame->lock);
|
|
+ {
|
|
+ if (op_ret < 0) {
|
|
+ local->op_ret = op_ret;
|
|
+ local->op_errno = op_errno;
|
|
+ goto out;
|
|
+ }
|
|
+ shard_inode_ctx_set (local->fd->inode, this, postbuf, 0,
|
|
+ SHARD_MASK_TIMES);
|
|
+ }
|
|
+ UNLOCK (&frame->lock);
|
|
+ fd_ctx_get (anon_fd, this, &fsync_count);
|
|
out:
|
|
- SHARD_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
|
|
- xdata);
|
|
+ if (base_inode != anon_fd->inode) {
|
|
+ LOCK (&base_inode->lock);
|
|
+ LOCK (&anon_fd->inode->lock);
|
|
+ {
|
|
+ __shard_inode_ctx_get (anon_fd->inode, this, &ctx);
|
|
+ __shard_inode_ctx_get (base_inode, this, &base_ictx);
|
|
+ if (op_ret == 0)
|
|
+ ctx->fsync_needed -= fsync_count;
|
|
+ GF_ASSERT (ctx->fsync_needed >= 0);
|
|
+ list_del_init (&ctx->to_fsync_list);
|
|
+ if (ctx->fsync_needed != 0) {
|
|
+ list_add_tail (&ctx->to_fsync_list,
|
|
+ &base_ictx->to_fsync_list);
|
|
+ base_ictx->fsync_count++;
|
|
+ }
|
|
+ }
|
|
+ UNLOCK (&anon_fd->inode->lock);
|
|
+ UNLOCK (&base_inode->lock);
|
|
+ }
|
|
+ if (anon_fd)
|
|
+ fd_unref (anon_fd);
|
|
+
|
|
+ call_count = shard_call_count_return (frame);
|
|
+ if (call_count != 0)
|
|
+ return 0;
|
|
+
|
|
+ if (local->op_ret < 0) {
|
|
+ SHARD_STACK_UNWIND (fsync, frame, local->op_ret,
|
|
+ local->op_errno, NULL, NULL, NULL);
|
|
+ } else {
|
|
+ shard_get_timestamps_from_inode_ctx (local, base_inode, this);
|
|
+ SHARD_STACK_UNWIND (fsync, frame, local->op_ret,
|
|
+ local->op_errno, &local->prebuf,
|
|
+ &local->postbuf, local->xattr_rsp);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+shard_post_lookup_fsync_handler (call_frame_t *frame, xlator_t *this)
|
|
+{
|
|
+ int ret = 0;
|
|
+ int call_count = 0;
|
|
+ int fsync_count = 0;
|
|
+ fd_t *anon_fd = NULL;
|
|
+ inode_t *base_inode = NULL;
|
|
+ shard_local_t *local = NULL;
|
|
+ shard_inode_ctx_t *ctx = NULL;
|
|
+ shard_inode_ctx_t *iter = NULL;
|
|
+ struct list_head copy = {0,};
|
|
+ shard_inode_ctx_t *tmp = NULL;
|
|
+
|
|
+ local = frame->local;
|
|
+ base_inode = local->fd->inode;
|
|
+ local->postbuf = local->prebuf;
|
|
+ INIT_LIST_HEAD (©);
|
|
+
|
|
+ if (local->op_ret < 0) {
|
|
+ SHARD_STACK_UNWIND (fsync, frame, local->op_ret,
|
|
+ local->op_errno, NULL, NULL, NULL);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ LOCK (&base_inode->lock);
|
|
+ {
|
|
+ __shard_inode_ctx_get (base_inode, this, &ctx);
|
|
+ list_splice_init (&ctx->to_fsync_list, ©);
|
|
+ call_count = ctx->fsync_count;
|
|
+ ctx->fsync_count = 0;
|
|
+ }
|
|
+ UNLOCK (&base_inode->lock);
|
|
+
|
|
+ local->call_count = ++call_count;
|
|
+
|
|
+ /* Send fsync() on the base shard first */
|
|
+ anon_fd = fd_ref (local->fd);
|
|
+ STACK_WIND_COOKIE (frame, shard_fsync_shards_cbk, anon_fd,
|
|
+ FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->fsync, anon_fd,
|
|
+ local->datasync, local->xattr_req);
|
|
+ call_count--;
|
|
+ anon_fd = NULL;
|
|
+
|
|
+ list_for_each_entry_safe (iter, tmp, ©, to_fsync_list) {
|
|
+ fsync_count = 0;
|
|
+ shard_inode_ctx_get_fsync_count (iter->inode, this,
|
|
+ &fsync_count);
|
|
+ GF_ASSERT (fsync_count > 0);
|
|
+ anon_fd = fd_anonymous (iter->inode);
|
|
+ if (!anon_fd) {
|
|
+ local->op_ret = -1;
|
|
+ local->op_errno = ENOMEM;
|
|
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
|
|
+ SHARD_MSG_MEMALLOC_FAILED, "Failed to create "
|
|
+ "anon fd to fsync shard");
|
|
+ shard_fsync_shards_cbk (frame, (void *) (long) anon_fd,
|
|
+ this, -1, ENOMEM, NULL, NULL,
|
|
+ NULL);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ ret = fd_ctx_set (anon_fd, this, fsync_count);
|
|
+ if (ret) {
|
|
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
+ SHARD_MSG_FD_CTX_SET_FAILED, "Failed to set fd "
|
|
+ "ctx for shard inode gfid=%s",
|
|
+ uuid_utoa (iter->inode->gfid));
|
|
+ local->op_ret = -1;
|
|
+ local->op_errno = ENOMEM;
|
|
+ shard_fsync_shards_cbk (frame, (void *) (long) anon_fd,
|
|
+ this, -1, ENOMEM, NULL, NULL,
|
|
+ NULL);
|
|
+ continue;
|
|
+ }
|
|
+ STACK_WIND_COOKIE (frame, shard_fsync_shards_cbk, anon_fd,
|
|
+ FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->fsync, anon_fd,
|
|
+ local->datasync, local->xattr_req);
|
|
+ call_count--;
|
|
+ }
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -4223,8 +4640,50 @@ int
|
|
shard_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
|
|
dict_t *xdata)
|
|
{
|
|
- STACK_WIND (frame, shard_fsync_cbk, FIRST_CHILD(this),
|
|
- FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
|
|
+ int ret = 0;
|
|
+ uint64_t block_size = 0;
|
|
+ shard_local_t *local = NULL;
|
|
+
|
|
+ ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size);
|
|
+ if (ret) {
|
|
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
|
|
+ "size for %s from its inode ctx",
|
|
+ uuid_utoa (fd->inode->gfid));
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
|
|
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->fsync, fd, datasync,
|
|
+ xdata);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ if (!this->itable)
|
|
+ this->itable = fd->inode->table;
|
|
+
|
|
+ local = mem_get0 (this->local_pool);
|
|
+ if (!local)
|
|
+ goto err;
|
|
+
|
|
+ frame->local = local;
|
|
+
|
|
+ local->fd = fd_ref (fd);
|
|
+ local->fop = GF_FOP_FSYNC;
|
|
+ local->datasync = datasync;
|
|
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
|
|
+ if (!local->xattr_req)
|
|
+ goto err;
|
|
+
|
|
+ local->loc.inode = inode_ref (fd->inode);
|
|
+ gf_uuid_copy (local->loc.gfid, fd->inode->gfid);
|
|
+
|
|
+ shard_lookup_base_file (frame, this, &local->loc,
|
|
+ shard_post_lookup_fsync_handler);
|
|
+ return 0;
|
|
+err:
|
|
+ SHARD_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
|
|
index 7319598..75d39a1 100644
|
|
--- a/xlators/features/shard/src/shard.h
|
|
+++ b/xlators/features/shard/src/shard.h
|
|
@@ -215,6 +215,7 @@ typedef struct shard_local {
|
|
uint32_t gid;
|
|
uint64_t block_size;
|
|
uint64_t dst_block_size;
|
|
+ int32_t datasync;
|
|
off_t offset;
|
|
size_t total_size;
|
|
size_t written_size;
|
|
@@ -270,6 +271,11 @@ typedef struct shard_inode_ctx {
|
|
uuid_t base_gfid;
|
|
int block_num;
|
|
gf_boolean_t refreshed;
|
|
+ struct list_head to_fsync_list;
|
|
+ int fsync_needed;
|
|
+ inode_t *inode;
|
|
+ int fsync_count;
|
|
+ inode_t *base_inode;
|
|
} shard_inode_ctx_t;
|
|
|
|
#endif /* __SHARD_H__ */
|
|
--
|
|
1.8.3.1
|
|
|