From 1b5eaf155e66e58904df0561528715b0c872f303 Mon Sep 17 00:00:00 2001
From: Gluster Jenkins <dkhandel+glusterjenkins@redhat.com>
Date: Tue, 6 Jul 2021 08:47:32 +0000
Subject: [PATCH] autobuild v6.0-59

Related: rhbz#2055630
Resolves: bz#1689375
Signed-off-by: Gluster Jenkins <dkhandel+glusterjenkins@redhat.com>
---
 ...Do-not-reopen-fd-post-handshake-if-p.patch | 298 ++++++++++++++++++
 glusterfs.spec                                |   7 +-
 2 files changed, 303 insertions(+), 2 deletions(-)
 create mode 100644 0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch

diff --git a/0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch b/0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch
new file mode 100644
index 0000000..62c574d
--- /dev/null
+++ b/0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch
@@ -0,0 +1,298 @@
+From e431321f1348b5d51733a6b6c5e046fd8c6e28cc Mon Sep 17 00:00:00 2001
+From: karthik-us <ksubrahm@redhat.com>
+Date: Mon, 5 Jul 2021 10:52:10 +0530
+Subject: [PATCH 586/586] protocol/client: Do not reopen fd post handshake if
+ posix lock is held
+
+Problem:
+With client.strict-locks enabled, in some cases where the posix lock is
+taken after a brick gets disconnected, the fd is getting reopened when
+the brick gets reconnected to the client as part of client_post_handshake.
+In such cases the saved fdctx's lock_list may not have the latest
+information.
+
+Fix:
+Check the lock information in the fdctx->lk_ctx as well post handshake
+which will have the latest information on the locks.
+Also check for this field in other places as well to prevent writes
+happening with anonymous fd even without re-opening the fd on the
+restarted brick.
+
+> Upstream patch: https://github.com/gluster/glusterfs/pull/2582
+> Fixes: #2581
+> Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
+> Signed-off-by: karthik-us ksubrahm@redhat.com
+
+BUG: 1689375
+Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
+Signed-off-by: karthik-us <ksubrahm@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/252588
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ tests/bugs/replicate/do-not-reopen-fd.t        | 76 ++++++++++++++++++--------
+ xlators/protocol/client/src/client-handshake.c |  2 +-
+ xlators/protocol/client/src/client-helpers.c   | 11 +++-
+ xlators/protocol/client/src/client.c           |  2 +-
+ xlators/protocol/client/src/client.h           |  3 +
+ 5 files changed, 67 insertions(+), 27 deletions(-)
+
+diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
+index 13b5218..f346709 100644
+--- a/tests/bugs/replicate/do-not-reopen-fd.t
++++ b/tests/bugs/replicate/do-not-reopen-fd.t
+@@ -20,10 +20,41 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
+ 
+ TEST touch $M0/a
++gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
++gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
++
++
++# Open fd from a client, check for open fd on all the bricks.
++TEST fd1=`fd_available`
++TEST fd_open $fd1 'rw' $M0/a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++# Kill a brick and take lock on the fd
++TEST kill_brick $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
++TEST flock -x $fd1
++
++# Restart the brick and check for no open fd on the restarted brick.
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
++EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++
++# Write on the fd. It should fail on the restarted brick.
++TEST fd_write $fd1 "data-0"
++EXPECT "" cat $B0/${V0}0/a
++EXPECT "data-0" cat $B0/${V0}1/a
++EXPECT "data-0" cat $B0/${V0}2/a
++
++TEST fd_close $fd1
+ 
+ # Kill one brick and take lock on the fd and do a write.
+ TEST kill_brick $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ 
+@@ -34,7 +65,7 @@ TEST fd_write $fd1 "data-1"
+ # should still succeed as there were no quorum disconnects.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd_write $fd1 "data-2"
+ EXPECT "" cat $B0/${V0}0/a
+ EXPECT "data-2" cat $B0/${V0}1/a
+@@ -42,9 +73,6 @@ EXPECT "data-2" cat $B0/${V0}2/a
+ 
+ # Check there is no fd opened on the 1st brick by checking for the gfid inside
+ # /proc/pid-of-brick/fd/ directory
+-gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
+-gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
+-
+ EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+@@ -59,7 +87,7 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ # Kill 2nd brick and try writing to the file. The write should fail due to
+ # quorum failure.
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-3"
+ TEST ! fd_cat $fd1
+ 
+@@ -67,7 +95,7 @@ TEST ! fd_cat $fd1
+ # which were down previously, will return EBADFD now.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-4"
+ TEST ! fd_cat $fd1
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+@@ -79,9 +107,9 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+ TEST $CLI volume heal $V0 enable
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+ 
+ TEST $CLI volume heal $V0
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+@@ -103,7 +131,7 @@ TEST ! fd_write $fd1 "data-5"
+ # Kill the only brick that is having lock and try taking lock on another client
+ # which should succeed.
+ TEST kill_brick $V0 $H0 $B0/${V0}2
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 2
+ TEST flock -x $fd2
+ TEST fd_write $fd2 "data-6"
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+@@ -114,17 +142,17 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+ # fail and operations on the 2nd fd should succeed.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 2
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M1 $V0-replicate-0 2
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ TEST ! fd_write $fd1 "data-7"
+ 
+ TEST ! fd_cat $fd1
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
++EXPECT "^0" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ TEST fd_cat $fd2
+ 
+ # Close both the fds which will release the locks and then re-open and take lock
+@@ -159,9 +187,9 @@ EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0
+ # Heal the volume
+ TEST $CLI volume heal $V0 enable
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+ 
+ TEST $CLI volume heal $V0
+ EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+@@ -169,7 +197,7 @@ TEST $CLI volume heal $V0 disable
+ 
+ # Kill one brick and open a fd.
+ TEST kill_brick $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ 
+@@ -182,7 +210,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ # any of the bricks.
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd_write $fd1 "data-10"
+ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ 
+@@ -193,7 +221,7 @@ TEST fd_close $fd1
+ 
+ # Kill one brick, open and take lock on a fd.
+ TEST kill_brick $V0 $H0 $B0/${V0}0
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+ TEST fd1=`fd_available`
+ TEST fd_open $fd1 'rw' $M0/a
+ TEST flock -x $fd1
+@@ -204,7 +232,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+ 
+ # Kill & restart another brick so that it will return EBADFD
+ TEST kill_brick $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" brick_up_status $V0 $H0 $B0/${V0}1
+ 
+ # Restart the bricks and then write. Now fd should not get re-opened since lock
+ # is still held on one brick and write should also fail as there is no quorum.
+@@ -212,8 +240,8 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
+ TEST ! fd_write $fd1 "data-11"
+ EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+ EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
+index a12472b..20e03d8 100644
+--- a/xlators/protocol/client/src/client-handshake.c
++++ b/xlators/protocol/client/src/client-handshake.c
+@@ -911,7 +911,7 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
+         list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
+         {
+             if (fdctx->remote_fd != -1 ||
+-                (!list_empty(&fdctx->lock_list) && conf->strict_locks))
++                (!fdctx_lock_lists_empty(fdctx) && conf->strict_locks))
+                 continue;
+ 
+             fdctx->reopen_done = client_child_up_reopen_done;
+diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
+index a80f303..b4a7294 100644
+--- a/xlators/protocol/client/src/client-helpers.c
++++ b/xlators/protocol/client/src/client-helpers.c
+@@ -15,6 +15,15 @@
+ #include <glusterfs/compat-errno.h>
+ #include <glusterfs/common-utils.h>
+ 
++gf_boolean_t
++fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx)
++{
++    if (list_empty(&fdctx->lock_list) && fd_lk_ctx_empty(fdctx->lk_ctx))
++        return _gf_true;
++
++    return _gf_false;
++}
++
+ int
+ client_fd_lk_list_empty(fd_lk_ctx_t *lk_ctx, gf_boolean_t try_lock)
+ {
+@@ -441,7 +450,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
+                 *remote_fd = fdctx->remote_fd;
+             }
+ 
+-            locks_involved = !list_empty(&fdctx->lock_list);
++            locks_involved = !fdctx_lock_lists_empty(fdctx);
+         }
+     }
+     pthread_spin_unlock(&conf->fd_lock);
+diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
+index 35a5340..6df2ed1 100644
+--- a/xlators/protocol/client/src/client.c
++++ b/xlators/protocol/client/src/client.c
+@@ -881,7 +881,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+         pthread_spin_lock(&conf->fd_lock);
+         {
+             fdctx = this_fd_get_ctx(fd, this);
+-            if (fdctx && !list_empty(&fdctx->lock_list)) {
++            if (fdctx && !fdctx_lock_lists_empty(fdctx)) {
+                 ret = -1;
+                 op_errno = EBADFD;
+             }
+diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
+index f952aea..799fe6e 100644
+--- a/xlators/protocol/client/src/client.h
++++ b/xlators/protocol/client/src/client.h
+@@ -535,4 +535,7 @@ client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
+ int
+ client_is_setlk(int32_t cmd);
+ 
++gf_boolean_t
++fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx);
++
+ #endif /* !_CLIENT_H */
+-- 
+1.8.3.1
+
diff --git a/glusterfs.spec b/glusterfs.spec
index a8575fa..898c5fd 100644
--- a/glusterfs.spec
+++ b/glusterfs.spec
@@ -237,7 +237,7 @@ Release:          0.2%{?prereltag:.%{prereltag}}%{?dist}
 %else
 Name:             glusterfs
 Version:          6.0
-Release:          58%{?dist}
+Release:          59%{?dist}
 ExcludeArch:      i686
 %endif
 License:          GPLv2 or LGPLv3+
@@ -902,6 +902,7 @@ Patch0582: 0582-protocol-client-Fix-lock-memory-leak.patch
 Patch0583: 0583-protocol-client-Initialize-list-head-to-prevent-NULL.patch
 Patch0584: 0584-dht-fixing-xattr-inconsistency.patch
 Patch0585: 0585-ganesha_ha-ganesha_grace-RA-fails-in-start-and-or-fa.patch
+Patch0586: 0586-protocol-client-Do-not-reopen-fd-post-handshake-if-p.patch
 
 %description
 GlusterFS is a distributed file-system capable of scaling to several
@@ -2648,7 +2649,6 @@ fi
 %endif
 
 %changelog
-<<<<<<< HEAD
 * Mon Aug 09 2021 Mohan Boddu <mboddu@redhat.com> - 6.0-57.4
 - Rebuilt for IMA sigs, glibc 2.34, aarch64 flags
   Related: rhbz#1991688
@@ -2662,6 +2662,9 @@ fi
 - Fix changlog chronological order by removing unneeded changelogs
 - fixes bug bz#1939340
 
+* Tue Jul 06 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-59
+- fixes bugs bz#1689375
+
 * Wed Jun 16 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-58
 - fixes bugs bz#1945143