107 lines
3.9 KiB
Diff
107 lines
3.9 KiB
Diff
From 87d8070f80487322a1736846a78725fd88f8de34 Mon Sep 17 00:00:00 2001
|
|
From: Pranith Kumar K <pkarampu@redhat.com>
|
|
Date: Tue, 20 Aug 2019 13:27:24 +0530
|
|
Subject: [PATCH 291/297] cluster/ec: Mark release only when it is acquired
|
|
|
|
Problem:
|
|
Mount-1 Mount-2
|
|
1)Tries to acquire lock on 'dir1' 1)Tries to acquire lock on 'dir1'
|
|
2)Lock is granted on brick-0 2)Lock gets EAGAIN on brick-0 and
|
|
leads to blocking lock on brick-0
|
|
3)Gets a lock-contention 3) Doesn't matter what happens on mount-2
|
|
notification, marks lock->release from here on.
|
|
to true.
|
|
4)New fop comes on 'dir1' which will
|
|
be put in frozen list as lock->release
|
|
is set to true.
|
|
5) Lock acquisition from step-2 fails because
|
|
3 bricks went down in 4+2 setup.
|
|
|
|
Fop on mount-1 which is put in frozen list will hang because no codepath will
|
|
move it from frozen list to any other list and the lock will not be retried.
|
|
|
|
Fix:
|
|
Don't set lock->release to true if lock is not acquired at the time of
|
|
lock-contention-notification
|
|
|
|
Upstream-patch: https://review.gluster.org/c/glusterfs/+/23272
|
|
fixes: bz#1731896
|
|
Change-Id: Ie6630db8735ccf372cc54b873a3a3aed7a6082b7
|
|
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/180870
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Ashish Pandey <aspandey@redhat.com>
|
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
---
|
|
xlators/cluster/ec/src/ec-common.c | 20 ++++++++++++++++++--
|
|
xlators/cluster/ec/src/ec-types.h | 1 +
|
|
2 files changed, 19 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
|
|
index 2e59180..5cae37b 100644
|
|
--- a/xlators/cluster/ec/src/ec-common.c
|
|
+++ b/xlators/cluster/ec/src/ec-common.c
|
|
@@ -1867,6 +1867,10 @@ ec_lock_acquired(ec_lock_link_t *link)
|
|
LOCK(&lock->loc.inode->lock);
|
|
|
|
lock->acquired = _gf_true;
|
|
+ if (lock->contention) {
|
|
+ lock->release = _gf_true;
|
|
+ lock->contention = _gf_false;
|
|
+ }
|
|
|
|
ec_lock_update_fd(lock, fop);
|
|
ec_lock_wake_shared(lock, &list);
|
|
@@ -1892,15 +1896,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
|
|
ec_lock_link_t *link = NULL;
|
|
ec_lock_t *lock = NULL;
|
|
|
|
+ link = fop->data;
|
|
+ lock = link->lock;
|
|
if (op_ret >= 0) {
|
|
- link = fop->data;
|
|
- lock = link->lock;
|
|
lock->mask = lock->good_mask = fop->good;
|
|
lock->healing = 0;
|
|
|
|
ec_lock_acquired(link);
|
|
ec_lock(fop->parent);
|
|
} else {
|
|
+ LOCK(&lock->loc.inode->lock);
|
|
+ {
|
|
+ lock->contention = _gf_false;
|
|
+ }
|
|
+ UNLOCK(&lock->loc.inode->lock);
|
|
gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,
|
|
"Failed to complete preop lock");
|
|
}
|
|
@@ -2547,6 +2556,13 @@ ec_lock_release(ec_t *ec, inode_t *inode)
|
|
gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",
|
|
inode);
|
|
|
|
+ if (!lock->acquired) {
|
|
+ /* This happens if some bricks already got the lock while inodelk is in
|
|
+ * progress. Set release to true after lock is acquired*/
|
|
+ lock->contention = _gf_true;
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
/* The lock is not marked to be released, so the frozen list should be
|
|
* empty. */
|
|
GF_ASSERT(list_empty(&lock->frozen));
|
|
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
|
|
index ea4f6ad..34a9768 100644
|
|
--- a/xlators/cluster/ec/src/ec-types.h
|
|
+++ b/xlators/cluster/ec/src/ec-types.h
|
|
@@ -267,6 +267,7 @@ struct _ec_lock {
|
|
uint32_t refs_pending; /* Refs assigned to fops being prepared */
|
|
uint32_t waiting_flags; /*Track xattrop/dirty marking*/
|
|
gf_boolean_t acquired;
|
|
+ gf_boolean_t contention;
|
|
gf_boolean_t unlock_now;
|
|
gf_boolean_t release;
|
|
gf_boolean_t query;
|
|
--
|
|
1.8.3.1
|
|
|