164 lines
5.7 KiB
Diff
164 lines
5.7 KiB
Diff
|
From 2b6e6c234dffa72c9f2af747908b1e1f29080698 Mon Sep 17 00:00:00 2001
|
||
|
From: Ravishankar N <ravishankar@redhat.com>
|
||
|
Date: Thu, 25 Mar 2021 11:52:13 +0530
|
||
|
Subject: [PATCH 559/584] afr: make fsync post-op aware of inodelk count
|
||
|
(#2273)
|
||
|
|
||
|
Problem:
|
||
|
Since commit bd540db1e, eager-locking was enabled for fsync. But on
|
||
|
certain VM workloads wit sharding enabled, shard xlator keeps sending
|
||
|
fsync on the base shard. This can cause blocked inodelks from other
|
||
|
clients (including shd) to time out due to call bail.
|
||
|
|
||
|
Fix:
|
||
|
Make afr fsync aware of inodelk count and not delay post-op + unlock
|
||
|
when inodelk count > 1, just like writev.
|
||
|
|
||
|
Code is restructured so that any fd based AFR_DATA_TRANSACTION can be made
|
||
|
aware by setting GLUSTERFS_INODELK_DOM_COUNT in xdata request.
|
||
|
|
||
|
Note: We do not know yet why VMs go in to paused state because of the
|
||
|
blocked inodelks but this patch should be a first step in reducing the
|
||
|
occurence.
|
||
|
|
||
|
Upstream patch details:
|
||
|
> https://github.com/gluster/glusterfs/pull/2273/
|
||
|
> Updates: #2198
|
||
|
> Change-Id: Ib91ebdd3101d590c326e69c829cf9335003e260b
|
||
|
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
||
|
|
||
|
BUG: 1943467
|
||
|
Change-Id: Id407ca54007e3bbb206a1d9431ebaf89a2167f74
|
||
|
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
||
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244516
|
||
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||
|
---
|
||
|
xlators/cluster/afr/src/afr-inode-write.c | 40 ++++++++++++++++++-------------
|
||
|
xlators/features/locks/src/posix.c | 1 +
|
||
|
2 files changed, 24 insertions(+), 17 deletions(-)
|
||
|
|
||
|
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
|
||
|
index df82b6e..962a7b1 100644
|
||
|
--- a/xlators/cluster/afr/src/afr-inode-write.c
|
||
|
+++ b/xlators/cluster/afr/src/afr-inode-write.c
|
||
|
@@ -42,6 +42,7 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
|
||
|
struct iatt *stbuf = NULL;
|
||
|
afr_local_t *local = NULL;
|
||
|
afr_private_t *priv = NULL;
|
||
|
+ afr_lock_t *lock = NULL;
|
||
|
afr_read_subvol_args_t args = {
|
||
|
0,
|
||
|
};
|
||
|
@@ -50,6 +51,12 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
|
||
|
priv = this->private;
|
||
|
GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
|
||
|
|
||
|
+ if (local->update_num_inodelks &&
|
||
|
+ local->transaction.type == AFR_DATA_TRANSACTION) {
|
||
|
+ lock = &local->inode_ctx->lock[local->transaction.type];
|
||
|
+ lock->num_inodelks = local->num_inodelks;
|
||
|
+ }
|
||
|
+
|
||
|
/*This code needs to stay till DHT sends fops on linked
|
||
|
* inodes*/
|
||
|
if (!inode_is_linked(local->inode)) {
|
||
|
@@ -134,6 +141,7 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
|
||
|
{
|
||
|
afr_local_t *local = NULL;
|
||
|
afr_private_t *priv = NULL;
|
||
|
+ int num_inodelks = 0;
|
||
|
|
||
|
local = frame->local;
|
||
|
priv = this->private;
|
||
|
@@ -146,8 +154,16 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
|
||
|
|
||
|
local->replies[child_index].op_ret = op_ret;
|
||
|
local->replies[child_index].op_errno = op_errno;
|
||
|
- if (xdata)
|
||
|
+ if (xdata) {
|
||
|
local->replies[child_index].xdata = dict_ref(xdata);
|
||
|
+ if (dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
|
||
|
+ &num_inodelks) == 0) {
|
||
|
+ if (num_inodelks > local->num_inodelks) {
|
||
|
+ local->num_inodelks = num_inodelks;
|
||
|
+ local->update_num_inodelks = _gf_true;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
|
||
|
if (op_ret >= 0) {
|
||
|
if (prebuf)
|
||
|
@@ -284,7 +300,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
|
||
|
afr_local_t *local = frame->local;
|
||
|
uint32_t open_fd_count = 0;
|
||
|
uint32_t write_is_append = 0;
|
||
|
- int32_t num_inodelks = 0;
|
||
|
|
||
|
LOCK(&frame->lock);
|
||
|
{
|
||
|
@@ -306,15 +321,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
|
||
|
local->open_fd_count = open_fd_count;
|
||
|
local->update_open_fd_count = _gf_true;
|
||
|
}
|
||
|
-
|
||
|
- ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
|
||
|
- &num_inodelks);
|
||
|
- if (ret < 0)
|
||
|
- goto unlock;
|
||
|
- if (num_inodelks > local->num_inodelks) {
|
||
|
- local->num_inodelks = num_inodelks;
|
||
|
- local->update_num_inodelks = _gf_true;
|
||
|
- }
|
||
|
}
|
||
|
unlock:
|
||
|
UNLOCK(&frame->lock);
|
||
|
@@ -324,7 +330,6 @@ void
|
||
|
afr_process_post_writev(call_frame_t *frame, xlator_t *this)
|
||
|
{
|
||
|
afr_local_t *local = NULL;
|
||
|
- afr_lock_t *lock = NULL;
|
||
|
|
||
|
local = frame->local;
|
||
|
|
||
|
@@ -343,11 +348,6 @@ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
|
||
|
|
||
|
if (local->update_open_fd_count)
|
||
|
local->inode_ctx->open_fd_count = local->open_fd_count;
|
||
|
- if (local->update_num_inodelks &&
|
||
|
- local->transaction.type == AFR_DATA_TRANSACTION) {
|
||
|
- lock = &local->inode_ctx->lock[local->transaction.type];
|
||
|
- lock->num_inodelks = local->num_inodelks;
|
||
|
- }
|
||
|
}
|
||
|
|
||
|
int
|
||
|
@@ -2516,6 +2516,12 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
|
||
|
if (!local->xdata_req)
|
||
|
goto out;
|
||
|
|
||
|
+ if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
|
||
|
+ this->name)) {
|
||
|
+ op_errno = ENOMEM;
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
local->fd = fd_ref(fd);
|
||
|
ret = afr_set_inode_local(this, local, fd->inode);
|
||
|
if (ret)
|
||
|
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
|
||
|
index cdd1ff7..22ef5b8 100644
|
||
|
--- a/xlators/features/locks/src/posix.c
|
||
|
+++ b/xlators/features/locks/src/posix.c
|
||
|
@@ -4943,6 +4943,7 @@ struct xlator_fops fops = {
|
||
|
.rchecksum = pl_rchecksum,
|
||
|
.statfs = pl_statfs,
|
||
|
.fsyncdir = pl_fsyncdir,
|
||
|
+ .fsync = pl_fsync,
|
||
|
.readdir = pl_readdir,
|
||
|
.symlink = pl_symlink,
|
||
|
.link = pl_link,
|
||
|
--
|
||
|
1.8.3.1
|
||
|
|