glusterfs/0559-afr-make-fsync-post-op-aware-of-inodelk-count-2273.patch
Gluster Jenkins af562eab56 autobuild v6.0-57
Related: rhbz#2055630
Resolves: bz#1600379 bz#1689375 bz#1782428 bz#1798897 bz#1815462
Resolves: bz#1889966 bz#1891403 bz#1901468 bz#1903911 bz#1908635
Resolves: bz#1917488 bz#1918018 bz#1919132 bz#1925425 bz#1927411
Resolves: bz#1927640 bz#1928676 bz#1942816 bz#1943467 bz#1945143
Resolves: bz#1946171 bz#1957191 bz#1957641
Signed-off-by: Gluster Jenkins <dkhandel+glusterjenkins@redhat.com>
2022-02-28 21:15:34 +02:00

164 lines
5.7 KiB
Diff

From 2b6e6c234dffa72c9f2af747908b1e1f29080698 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Thu, 25 Mar 2021 11:52:13 +0530
Subject: [PATCH 559/584] afr: make fsync post-op aware of inodelk count
(#2273)
Problem:
Since commit bd540db1e, eager-locking was enabled for fsync. But on
certain VM workloads wit sharding enabled, shard xlator keeps sending
fsync on the base shard. This can cause blocked inodelks from other
clients (including shd) to time out due to call bail.
Fix:
Make afr fsync aware of inodelk count and not delay post-op + unlock
when inodelk count > 1, just like writev.
Code is restructured so that any fd based AFR_DATA_TRANSACTION can be made
aware by setting GLUSTERFS_INODELK_DOM_COUNT in xdata request.
Note: We do not know yet why VMs go in to paused state because of the
blocked inodelks but this patch should be a first step in reducing the
occurence.
Upstream patch details:
> https://github.com/gluster/glusterfs/pull/2273/
> Updates: #2198
> Change-Id: Ib91ebdd3101d590c326e69c829cf9335003e260b
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
BUG: 1943467
Change-Id: Id407ca54007e3bbb206a1d9431ebaf89a2167f74
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244516
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/afr/src/afr-inode-write.c | 40 ++++++++++++++++++-------------
xlators/features/locks/src/posix.c | 1 +
2 files changed, 24 insertions(+), 17 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index df82b6e..962a7b1 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -42,6 +42,7 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
struct iatt *stbuf = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_lock_t *lock = NULL;
afr_read_subvol_args_t args = {
0,
};
@@ -50,6 +51,12 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
+ if (local->update_num_inodelks &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ lock->num_inodelks = local->num_inodelks;
+ }
+
/*This code needs to stay till DHT sends fops on linked
* inodes*/
if (!inode_is_linked(local->inode)) {
@@ -134,6 +141,7 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int num_inodelks = 0;
local = frame->local;
priv = this->private;
@@ -146,8 +154,16 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
local->replies[child_index].op_ret = op_ret;
local->replies[child_index].op_errno = op_errno;
- if (xdata)
+ if (xdata) {
local->replies[child_index].xdata = dict_ref(xdata);
+ if (dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
+ &num_inodelks) == 0) {
+ if (num_inodelks > local->num_inodelks) {
+ local->num_inodelks = num_inodelks;
+ local->update_num_inodelks = _gf_true;
+ }
+ }
+ }
if (op_ret >= 0) {
if (prebuf)
@@ -284,7 +300,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
afr_local_t *local = frame->local;
uint32_t open_fd_count = 0;
uint32_t write_is_append = 0;
- int32_t num_inodelks = 0;
LOCK(&frame->lock);
{
@@ -306,15 +321,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
local->open_fd_count = open_fd_count;
local->update_open_fd_count = _gf_true;
}
-
- ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
- &num_inodelks);
- if (ret < 0)
- goto unlock;
- if (num_inodelks > local->num_inodelks) {
- local->num_inodelks = num_inodelks;
- local->update_num_inodelks = _gf_true;
- }
}
unlock:
UNLOCK(&frame->lock);
@@ -324,7 +330,6 @@ void
afr_process_post_writev(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- afr_lock_t *lock = NULL;
local = frame->local;
@@ -343,11 +348,6 @@ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
if (local->update_open_fd_count)
local->inode_ctx->open_fd_count = local->open_fd_count;
- if (local->update_num_inodelks &&
- local->transaction.type == AFR_DATA_TRANSACTION) {
- lock = &local->inode_ctx->lock[local->transaction.type];
- lock->num_inodelks = local->num_inodelks;
- }
}
int
@@ -2516,6 +2516,12 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local->xdata_req)
goto out;
+ if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
+ this->name)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index cdd1ff7..22ef5b8 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -4943,6 +4943,7 @@ struct xlator_fops fops = {
.rchecksum = pl_rchecksum,
.statfs = pl_statfs,
.fsyncdir = pl_fsyncdir,
+ .fsync = pl_fsync,
.readdir = pl_readdir,
.symlink = pl_symlink,
.link = pl_link,
--
1.8.3.1