389 lines
13 KiB
Diff
389 lines
13 KiB
Diff
From e3813685237dbdf8dc7cf28726fff2caf2288706 Mon Sep 17 00:00:00 2001
|
|
From: Xavi Hernandez <xhernandez@redhat.com>
|
|
Date: Mon, 19 Jul 2021 15:37:02 +0200
|
|
Subject: [PATCH 588/610] locks: Fix null gfid in lock contention notifications
|
|
|
|
This patch fixes 3 problems:
|
|
|
|
First problem:
|
|
|
|
After commit c0bd592e, the pl_inode_t object was also created in the
|
|
cbk of lookup requests. Lookup requests are a bit different than any
|
|
other request because the inode received may not be completely
|
|
initialized. In particular, inode->gfid may be null.
|
|
|
|
This caused that the gfid stored in the pl_inode_t object was null in
|
|
some cases. This gfid is used mostly for logs, but also to send lock
|
|
contention notifications. This meant that some notifications could be
|
|
sent with a null gfid, making impossible for the client xlator to
|
|
correctly identify the contending inode, so the lock was not released
|
|
immediately when eager-lock was also enabled.
|
|
|
|
Second problem:
|
|
|
|
The feature introduced by c0bd592e needed to track the number of
|
|
hardlinks of each inode to detect when it was deleted. However it
|
|
was done using the 'get-link-count' special xattr on lookup, while
|
|
posix only implements it for unlink and rename.
|
|
|
|
Also, the number of hardlinks was not incremented for mkdir, mknod,
|
|
rename, ..., so it didn't work correctly for directories.
|
|
|
|
Third problem:
|
|
|
|
When the last hardlink of an open file is deleted, all locks will be
|
|
denied with ESTALE error, but that's not correct. Access to the open
|
|
fd must succeed.
|
|
|
|
The first problem is fixed by avoiding creating pl_inode_t objects
|
|
during lookup. Second and third problems are fixed by completely
|
|
ignoring if the file has been deleted or not. Even if we grant a
|
|
lock on a non-existing file, the next operation done by the client
|
|
inside the lock will return the correct error, which should be enough.
|
|
|
|
Upstream patch:
|
|
> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2553
|
|
> Fixes: #2551
|
|
> Change-Id: Ic73e82f6b725b838c1600b6a128ea36a75f13253
|
|
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
|
BUG: 1962972
|
|
Change-Id: Ic73e82f6b725b838c1600b6a128ea36a75f13253
|
|
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279192
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
---
|
|
tests/bugs/locks/issue-2551.t | 58 ++++++++++++++++++
|
|
xlators/features/locks/src/common.c | 31 +++-------
|
|
xlators/features/locks/src/locks.h | 2 -
|
|
xlators/features/locks/src/posix.c | 118 +++---------------------------------
|
|
4 files changed, 74 insertions(+), 135 deletions(-)
|
|
create mode 100644 tests/bugs/locks/issue-2551.t
|
|
|
|
diff --git a/tests/bugs/locks/issue-2551.t b/tests/bugs/locks/issue-2551.t
|
|
new file mode 100644
|
|
index 0000000..a32af02
|
|
--- /dev/null
|
|
+++ b/tests/bugs/locks/issue-2551.t
|
|
@@ -0,0 +1,58 @@
|
|
+#!/bin/bash
|
|
+
|
|
+. $(dirname $0)/../../include.rc
|
|
+. $(dirname $0)/../../volume.rc
|
|
+
|
|
+function check_time() {
|
|
+ local max="${1}"
|
|
+ local start="$(date +"%s")"
|
|
+
|
|
+ shift
|
|
+
|
|
+ if "${@}"; then
|
|
+ if [[ $(($(date +"%s") - ${start})) -lt ${max} ]]; then
|
|
+ return 0
|
|
+ fi
|
|
+ fi
|
|
+
|
|
+ return 1
|
|
+}
|
|
+
|
|
+cleanup
|
|
+
|
|
+TEST glusterd
|
|
+TEST pidof glusterd
|
|
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/brick{0..2}
|
|
+TEST $CLI volume set $V0 disperse.eager-lock on
|
|
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 30
|
|
+TEST $CLI volume set $V0 features.locks-notify-contention on
|
|
+TEST $CLI volume set $V0 performance.write-behind off
|
|
+TEST $CLI volume set $V0 performance.open-behind off
|
|
+TEST $CLI volume set $V0 performance.quick-read off
|
|
+
|
|
+TEST $CLI volume start $V0
|
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
|
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
|
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
|
|
+
|
|
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
|
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
|
|
+
|
|
+TEST mkdir $M0/dir
|
|
+TEST dd if=/dev/zero of=$M0/dir/test bs=4k count=1
|
|
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
+
|
|
+TEST $CLI volume stop $V0
|
|
+TEST $CLI volume start $V0
|
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
|
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
|
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
|
|
+
|
|
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
|
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
|
|
+
|
|
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
|
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M1
|
|
+
|
|
+TEST dd if=/dev/zero of=$M0/dir/test bs=4k count=1 conv=notrunc
|
|
+TEST check_time 5 dd if=/dev/zero of=$M1/dir/test bs=4k count=1 conv=notrunc
|
|
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
|
|
index cddbfa6..5403086 100644
|
|
--- a/xlators/features/locks/src/common.c
|
|
+++ b/xlators/features/locks/src/common.c
|
|
@@ -468,9 +468,7 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
|
|
pl_inode->check_mlock_info = _gf_true;
|
|
pl_inode->mlock_enforced = _gf_false;
|
|
|
|
- /* -2 means never looked up. -1 means something went wrong and link
|
|
- * tracking is disabled. */
|
|
- pl_inode->links = -2;
|
|
+ pl_inode->remove_running = 0;
|
|
|
|
ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
|
|
if (ret) {
|
|
@@ -1403,11 +1401,6 @@ pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
|
|
|
|
pthread_mutex_lock(&pl_inode->mutex);
|
|
|
|
- if (pl_inode->removed) {
|
|
- error = ESTALE;
|
|
- goto unlock;
|
|
- }
|
|
-
|
|
if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
|
|
error = -1;
|
|
/* We skip the unlock here because the caller must create a stub when
|
|
@@ -1420,7 +1413,6 @@ pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
|
|
pl_inode->is_locked = _gf_true;
|
|
pl_inode->remove_running++;
|
|
|
|
-unlock:
|
|
pthread_mutex_unlock(&pl_inode->mutex);
|
|
|
|
done:
|
|
@@ -1490,20 +1482,18 @@ pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
|
|
|
|
pthread_mutex_lock(&pl_inode->mutex);
|
|
|
|
- if (error == 0) {
|
|
- if (pl_inode->links >= 0) {
|
|
- pl_inode->links--;
|
|
- }
|
|
- if (pl_inode->links == 0) {
|
|
- pl_inode->removed = _gf_true;
|
|
- }
|
|
- }
|
|
-
|
|
pl_inode->remove_running--;
|
|
|
|
if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
|
|
pl_inode->is_locked = _gf_false;
|
|
|
|
+ /* At this point it's possible that the inode has been deleted, but
|
|
+ * there could be open fd's still referencing it, so we can't prevent
|
|
+ * pending locks from being granted. If the file has really been
|
|
+ * deleted, whatever the client does once the lock is granted will
|
|
+ * fail with the appropriate error, so we don't need to worry about
|
|
+ * it here. */
|
|
+
|
|
list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
|
|
{
|
|
__grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
|
|
@@ -1555,11 +1545,6 @@ pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
|
|
pl_dom_list_t *dom;
|
|
pl_inode_lock_t *ilock;
|
|
|
|
- /* If the inode has been deleted, we won't allow any lock. */
|
|
- if (pl_inode->removed) {
|
|
- return -ESTALE;
|
|
- }
|
|
-
|
|
/* We only synchronize with locks made for regular operations coming from
|
|
* the user. Locks done for internal purposes are hard to control and could
|
|
* lead to long delays or deadlocks quite easily. */
|
|
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
|
|
index 6666feb..2406dcd 100644
|
|
--- a/xlators/features/locks/src/locks.h
|
|
+++ b/xlators/features/locks/src/locks.h
|
|
@@ -202,10 +202,8 @@ struct __pl_inode {
|
|
int fop_wind_count;
|
|
pthread_cond_t check_fop_wind_count;
|
|
|
|
- int32_t links; /* Number of hard links the inode has. */
|
|
uint32_t remove_running; /* Number of remove operations running. */
|
|
gf_boolean_t is_locked; /* Regular locks will be blocked. */
|
|
- gf_boolean_t removed; /* The inode has been deleted. */
|
|
};
|
|
typedef struct __pl_inode pl_inode_t;
|
|
|
|
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
|
|
index 22ef5b8..d5effef 100644
|
|
--- a/xlators/features/locks/src/posix.c
|
|
+++ b/xlators/features/locks/src/posix.c
|
|
@@ -2975,104 +2975,24 @@ out:
|
|
return ret;
|
|
}
|
|
|
|
-static int32_t
|
|
-pl_request_link_count(dict_t **pxdata)
|
|
-{
|
|
- dict_t *xdata;
|
|
-
|
|
- xdata = *pxdata;
|
|
- if (xdata == NULL) {
|
|
- xdata = dict_new();
|
|
- if (xdata == NULL) {
|
|
- return ENOMEM;
|
|
- }
|
|
- } else {
|
|
- dict_ref(xdata);
|
|
- }
|
|
-
|
|
- if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
|
|
- dict_unref(xdata);
|
|
- return ENOMEM;
|
|
- }
|
|
-
|
|
- *pxdata = xdata;
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int32_t
|
|
-pl_check_link_count(dict_t *xdata)
|
|
-{
|
|
- int32_t count;
|
|
-
|
|
- /* In case we are unable to read the link count from xdata, we take a
|
|
- * conservative approach and return -2, which will prevent the inode from
|
|
- * being considered deleted. In fact it will cause link tracking for this
|
|
- * inode to be disabled completely to avoid races. */
|
|
-
|
|
- if (xdata == NULL) {
|
|
- return -2;
|
|
- }
|
|
-
|
|
- if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
|
|
- return -2;
|
|
- }
|
|
-
|
|
- return count;
|
|
-}
|
|
-
|
|
int32_t
|
|
pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
|
|
int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
|
|
struct iatt *postparent)
|
|
{
|
|
- pl_inode_t *pl_inode;
|
|
-
|
|
- if (op_ret >= 0) {
|
|
- pl_inode = pl_inode_get(this, inode, NULL);
|
|
- if (pl_inode == NULL) {
|
|
- PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
|
|
- NULL);
|
|
- return 0;
|
|
- }
|
|
-
|
|
- pthread_mutex_lock(&pl_inode->mutex);
|
|
-
|
|
- /* We only update the link count if we previously didn't know it.
|
|
- * Doing it always can lead to races since lookup is not executed
|
|
- * atomically most of the times. */
|
|
- if (pl_inode->links == -2) {
|
|
- pl_inode->links = pl_check_link_count(xdata);
|
|
- if (buf->ia_type == IA_IFDIR) {
|
|
- /* Directories have at least 2 links. To avoid special handling
|
|
- * for directories, we simply decrement the value here to make
|
|
- * them equivalent to regular files. */
|
|
- pl_inode->links--;
|
|
- }
|
|
- }
|
|
-
|
|
- pthread_mutex_unlock(&pl_inode->mutex);
|
|
- }
|
|
-
|
|
PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
|
|
postparent);
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
int32_t
|
|
pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
|
|
{
|
|
- int32_t error;
|
|
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
|
|
+ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
|
|
|
|
- error = pl_request_link_count(&xdata);
|
|
- if (error == 0) {
|
|
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
|
|
- STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
|
|
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
|
|
- dict_unref(xdata);
|
|
- } else {
|
|
- STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
|
|
- }
|
|
return 0;
|
|
}
|
|
|
|
@@ -3881,9 +3801,7 @@ unlock:
|
|
__dump_posixlks(pl_inode);
|
|
}
|
|
|
|
- gf_proc_dump_write("links", "%d", pl_inode->links);
|
|
gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
|
|
- gf_proc_dump_write("removed", "%u", pl_inode->removed);
|
|
}
|
|
pthread_mutex_unlock(&pl_inode->mutex);
|
|
|
|
@@ -4508,21 +4426,9 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
|
|
int32_t op_errno, inode_t *inode, struct iatt *buf,
|
|
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
|
|
{
|
|
- pl_inode_t *pl_inode = (pl_inode_t *)cookie;
|
|
-
|
|
- if (op_ret >= 0) {
|
|
- pthread_mutex_lock(&pl_inode->mutex);
|
|
-
|
|
- /* TODO: can happen pl_inode->links == 0 ? */
|
|
- if (pl_inode->links >= 0) {
|
|
- pl_inode->links++;
|
|
- }
|
|
-
|
|
- pthread_mutex_unlock(&pl_inode->mutex);
|
|
- }
|
|
-
|
|
PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
|
|
preparent, postparent, xdata);
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -4530,18 +4436,10 @@ int
|
|
pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
|
|
dict_t *xdata)
|
|
{
|
|
- pl_inode_t *pl_inode;
|
|
-
|
|
- pl_inode = pl_inode_get(this, oldloc->inode, NULL);
|
|
- if (pl_inode == NULL) {
|
|
- STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
|
|
- NULL);
|
|
- return 0;
|
|
- }
|
|
-
|
|
PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
|
|
- STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
|
|
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
|
|
+ STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
--
|
|
1.8.3.1
|
|
|