glusterfs/SOURCES/0577-protocol-client-don-t-reopen-fds-on-which-POSIX-lock.patch
2022-02-25 05:25:05 +00:00

473 lines
18 KiB
Diff

From d7665cf3249310c5faf87368f395b4e25cb86b48 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Thu, 15 Apr 2021 10:29:06 +0530
Subject: [PATCH 577/584] protocol/client: don't reopen fds on which POSIX
locks are held after a reconnect
XXXXXXXXXXXXXXXXXXX
IMPORTANT:
XXXXXXXXXXXXXXXXXXX
As a best pratice, with this patch we are bumping up the op-version
from GD_OP_VERSION_7_1 to GD_OP_VERSION_7_2 since it introduces a
new volume option. Enabling the new option will have effect only
after all the servers and clients are upgraded to this version.
----------------------------------------------------------------------
Bricks cleanup any granted locks after a client disconnects and
currently these locks are not healed after a reconnect. This means
post reconnect a competing process could be granted a lock even though
the first process which was granted locks has not unlocked. By not
re-opening fds, subsequent operations on such fds will fail forcing
the application to close the current fd and reopen a new one. This way
we prevent any silent corruption.
A new option "client.strict-locks" is introduced to control this
behaviour. This option is set to "off" by default.
> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22712/
> Change-Id: Ieed545efea466cb5e8f5a36199aa26380c301b9e
> Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
> updates: bz#1694920
BUG: 1689375
Change-Id: Ieed545efea466cb5e8f5a36199aa26380c301b9e
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244909
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
---
libglusterfs/src/glusterfs/globals.h | 4 +-
tests/bugs/bug-1694920.t | 63 ++++++++++++++++++++++++
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 14 ++++++
xlators/protocol/client/src/client-handshake.c | 3 +-
xlators/protocol/client/src/client-helpers.c | 5 +-
xlators/protocol/client/src/client-lk.c | 2 +-
xlators/protocol/client/src/client-rpc-fops.c | 45 ++++++++++++++++-
xlators/protocol/client/src/client-rpc-fops_v2.c | 32 +++++++++++-
xlators/protocol/client/src/client.c | 13 +++++
xlators/protocol/client/src/client.h | 16 ++++++
10 files changed, 190 insertions(+), 7 deletions(-)
create mode 100644 tests/bugs/bug-1694920.t
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
index 33fb023..ce2d110 100644
--- a/libglusterfs/src/glusterfs/globals.h
+++ b/libglusterfs/src/glusterfs/globals.h
@@ -50,7 +50,7 @@
1 /* MIN is the fresh start op-version, mostly \
should not change */
#define GD_OP_VERSION_MAX \
- GD_OP_VERSION_7_1 /* MAX VERSION is the maximum \
+ GD_OP_VERSION_7_2 /* MAX VERSION is the maximum \
count in VME table, should \
keep changing with \
introduction of newer \
@@ -140,6 +140,8 @@
#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
+#define GD_OP_VERSION_7_2 70200 /* Op-version for GlusterFS 7.2 */
+
#include "glusterfs/xlator.h"
#include "glusterfs/options.h"
diff --git a/tests/bugs/bug-1694920.t b/tests/bugs/bug-1694920.t
new file mode 100644
index 0000000..5bf93c9
--- /dev/null
+++ b/tests/bugs/bug-1694920.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should succeed by default
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_write $fd1 "data"
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should fail when client.strict-locks is on
+TEST $CLI volume set $V0 client.strict-locks on
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST ! fd_write $fd1 "data"
+TEST fd_close $fd1
+
+cleanup
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index c1ca190..01f3912 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2022,6 +2022,20 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.value = "9",
.flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "client.strict-locks",
+ .voltype = "protocol/client",
+ .option = "strict-locks",
+ .value = "off",
+ .op_version = GD_OP_VERSION_7_2,
+ .validate_fn = validate_boolean,
+ .type = GLOBAL_DOC,
+ .description = "When set, doesn't reopen saved fds after reconnect "
+ "if POSIX locks are held on them. Hence subsequent "
+ "operations on these fds will fail. This is "
+ "necessary for stricter lock complaince as bricks "
+ "cleanup any granted locks when a client "
+ "disconnects."},
+
/* Server xlator options */
{.key = "network.tcp-window-size",
.voltype = "protocol/server",
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index 6b20d92..a12472b 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -910,7 +910,8 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
{
list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
{
- if (fdctx->remote_fd != -1)
+ if (fdctx->remote_fd != -1 ||
+ (!list_empty(&fdctx->lock_list) && conf->strict_locks))
continue;
fdctx->reopen_done = client_child_up_reopen_done;
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index 53b4484..6543100 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -410,6 +410,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
{
clnt_fd_ctx_t *fdctx = NULL;
clnt_conf_t *conf = NULL;
+ gf_boolean_t locks_held = _gf_false;
GF_VALIDATE_OR_GOTO(this->name, fd, out);
GF_VALIDATE_OR_GOTO(this->name, remote_fd, out);
@@ -431,11 +432,13 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
*remote_fd = -1;
else
*remote_fd = fdctx->remote_fd;
+
+ locks_held = !list_empty(&fdctx->lock_list);
}
}
pthread_spin_unlock(&conf->fd_lock);
- if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1))
+ if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) && (!locks_held))
*remote_fd = GF_ANON_FD_NO;
return 0;
diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
index 679e198..c1fb055 100644
--- a/xlators/protocol/client/src/client-lk.c
+++ b/xlators/protocol/client/src/client-lk.c
@@ -351,7 +351,7 @@ delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
list_for_each_entry_safe(lock, tmp, &fdctx->lock_list, list)
{
- if (!is_same_lkowner(&lock->owner, owner)) {
+ if (is_same_lkowner(&lock->owner, owner)) {
list_del_init(&lock->list);
list_add_tail(&lock->list, &delete_list);
count++;
diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
index 1c8b31b..3110c78 100644
--- a/xlators/protocol/client/src/client-rpc-fops.c
+++ b/xlators/protocol/client/src/client-rpc-fops.c
@@ -22,8 +22,18 @@ int32_t
client3_getspec(call_frame_t *frame, xlator_t *this, void *data);
rpc_clnt_prog_t clnt3_3_fop_prog;
-/* CBK */
+int
+client_is_setlk(int32_t cmd)
+{
+ if ((cmd == F_SETLK) || (cmd == F_SETLK64) || (cmd == F_SETLKW) ||
+ (cmd == F_SETLKW64)) {
+ return 1;
+ }
+ return 0;
+}
+
+/* CBK */
int
client3_3_symlink_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
@@ -816,7 +826,8 @@ client3_3_flush_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- if (rsp.op_ret >= 0 && !fd_is_anonymous(local->fd)) {
+ if ((rsp.op_ret >= 0 || (rsp.op_errno == ENOTCONN)) &&
+ !fd_is_anonymous(local->fd)) {
/* Delete all saved locks of the owner issuing flush */
ret = delete_granted_locks_owner(local->fd, &local->owner);
gf_msg_trace(this->name, 0, "deleting locks of owner (%s) returned %d",
@@ -2388,10 +2399,12 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
int ret = 0;
xlator_t *this = NULL;
dict_t *xdata = NULL;
+ clnt_local_t *local = NULL;
this = THIS;
frame = myframe;
+ local = frame->local;
if (-1 == req->rpc_status) {
rsp.op_ret = -1;
@@ -2412,6 +2425,18 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = client_post_lk(this, &rsp, &lock, &xdata);
if (ret < 0)
goto out;
+
+ /* Save the lock to the client lock cache to be able
+ to recover in the case of server reboot.*/
+
+ if (client_is_setlk(local->cmd)) {
+ ret = client_add_lock_for_recovery(local->fd, &lock, &local->owner,
+ local->cmd);
+ if (ret < 0) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ }
+ }
}
out:
@@ -4263,8 +4288,16 @@ client3_3_flush(call_frame_t *frame, xlator_t *this, void *data)
ret = client_pre_flush(this, &req, args->fd, args->xdata);
if (ret) {
op_errno = -ret;
+ if (op_errno == EBADF) {
+ ret = delete_granted_locks_owner(local->fd, &local->owner);
+ gf_msg_trace(this->name, 0,
+ "deleting locks of owner (%s) returned %d",
+ lkowner_utoa(&local->owner), ret);
+ }
+
goto unwind;
}
+
ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
client3_3_flush_cbk, NULL,
(xdrproc_t)xdr_gfs3_flush_req);
@@ -5199,8 +5232,16 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
args->xdata);
if (ret) {
op_errno = -ret;
+
+ if ((op_errno == EBADF) && (args->flock->l_type == F_UNLCK) &&
+ client_is_setlk(local->cmd)) {
+ client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
+ local->cmd);
+ }
+
goto unwind;
}
+
ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
client3_3_lk_cbk, NULL,
(xdrproc_t)xdr_gfs3_lk_req);
diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
index 613dda8..954fc58 100644
--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
+++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
@@ -723,7 +723,8 @@ client4_0_flush_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- if (rsp.op_ret >= 0 && !fd_is_anonymous(local->fd)) {
+ if ((rsp.op_ret >= 0 || (rsp.op_errno == ENOTCONN)) &&
+ !fd_is_anonymous(local->fd)) {
/* Delete all saved locks of the owner issuing flush */
ret = delete_granted_locks_owner(local->fd, &local->owner);
gf_msg_trace(this->name, 0, "deleting locks of owner (%s) returned %d",
@@ -2193,10 +2194,12 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
int ret = 0;
xlator_t *this = NULL;
dict_t *xdata = NULL;
+ clnt_local_t *local = NULL;
this = THIS;
frame = myframe;
+ local = frame->local;
if (-1 == req->rpc_status) {
rsp.op_ret = -1;
@@ -2217,6 +2220,18 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = client_post_lk_v2(this, &rsp, &lock, &xdata);
if (ret < 0)
goto out;
+
+ /* Save the lock to the client lock cache to be able
+ to recover in the case of server reboot.*/
+
+ if (client_is_setlk(local->cmd)) {
+ ret = client_add_lock_for_recovery(local->fd, &lock, &local->owner,
+ local->cmd);
+ if (ret < 0) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ }
+ }
}
out:
@@ -3998,6 +4013,13 @@ client4_0_flush(call_frame_t *frame, xlator_t *this, void *data)
ret = client_pre_flush_v2(this, &req, args->fd, args->xdata);
if (ret) {
op_errno = -ret;
+ if (op_errno == EBADF) {
+ ret = delete_granted_locks_owner(local->fd, &local->owner);
+ gf_msg_trace(this->name, 0,
+ "deleting locks of owner (%s) returned %d",
+ lkowner_utoa(&local->owner), ret);
+ }
+
goto unwind;
}
ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
@@ -4771,8 +4793,16 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
args->xdata);
if (ret) {
op_errno = -ret;
+
+ if ((op_errno == EBADF) && (args->flock->l_type == F_UNLCK) &&
+ client_is_setlk(local->cmd)) {
+ client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
+ local->cmd);
+ }
+
goto unwind;
}
+
ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
client4_0_lk_cbk, NULL,
(xdrproc_t)xdr_gfx_lk_req);
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index ed855ca..63c90ea 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -2491,6 +2491,7 @@ build_client_config(xlator_t *this, clnt_conf_t *conf)
GF_OPTION_INIT("filter-O_DIRECT", conf->filter_o_direct, bool, out);
GF_OPTION_INIT("send-gids", conf->send_gids, bool, out);
+ GF_OPTION_INIT("strict-locks", conf->strict_locks, bool, out);
conf->client_id = glusterfs_leaf_position(this);
@@ -2676,6 +2677,7 @@ reconfigure(xlator_t *this, dict_t *options)
out);
GF_OPTION_RECONF("send-gids", conf->send_gids, options, bool, out);
+ GF_OPTION_RECONF("strict-locks", conf->strict_locks, options, bool, out);
ret = 0;
out:
@@ -3032,6 +3034,17 @@ struct volume_options options[] = {
" power. Range 1-32 threads.",
.op_version = {GD_OP_VERSION_RHS_3_0},
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"strict-locks"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_7_2},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "When set, doesn't reopen saved fds after reconnect "
+ "if POSIX locks are held on them. Hence subsequent "
+ "operations on these fds will fail. This is "
+ "necessary for stricter lock complaince as bricks "
+ "cleanup any granted locks when a client "
+ "disconnects."},
{.key = {NULL}},
};
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index f12fa61..bde3d1a 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -235,6 +235,15 @@ typedef struct clnt_conf {
* up, disconnects can be
* logged
*/
+
+ gf_boolean_t strict_locks; /* When set, doesn't reopen saved fds after
+ reconnect if POSIX locks are held on them.
+ Hence subsequent operations on these fds will
+ fail. This is necessary for stricter lock
+ complaince as bricks cleanup any granted
+ locks when a client disconnects.
+ */
+
} clnt_conf_t;
typedef struct _client_fd_ctx {
@@ -513,4 +522,11 @@ compound_request_cleanup_v2(gfx_compound_req *req);
void
client_compound_rsp_cleanup_v2(gfx_compound_rsp *rsp, int len);
+int
+client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
+ gf_lkowner_t *owner, int32_t cmd);
+
+int
+client_is_setlk(int32_t cmd);
+
#endif /* !_CLIENT_H */
--
1.8.3.1