b7dd6f45c1
Resolves: bz#1479446 bz#1520882 bz#1579758 bz#1598407 bz#1599808 Resolves: bz#1603118 bz#1619357 bz#1622001 bz#1622308 bz#1631166 Resolves: bz#1631418 bz#1632563 bz#1634649 bz#1635071 bz#1635100 Resolves: bz#1635136 bz#1636291 bz#1638069 bz#1640347 bz#1642854 Resolves: bz#1643035 bz#1644120 bz#1644279 bz#1645916 bz#1647675 Signed-off-by: Milind Changire <mchangir@redhat.com>
273 lines
9.6 KiB
Diff
273 lines
9.6 KiB
Diff
From 68b0db385ce968547349b187222b9a9401faee12 Mon Sep 17 00:00:00 2001
|
|
From: Pranith Kumar K <pkarampu@redhat.com>
|
|
Date: Mon, 27 Aug 2018 11:46:33 +0530
|
|
Subject: [PATCH 424/444] cluster/afr: Delegate metadata heal with pending
|
|
xattrs to SHD
|
|
|
|
Problem:
|
|
When metadata-self-heal is triggered on the mount, it blocks
|
|
lookup until metadata-self-heal completes. But that can lead
|
|
to hangs when lot of clients are accessing a directory which
|
|
needs metadata heal and all of them trigger heals waiting
|
|
for other clients to complete heal.
|
|
|
|
Fix:
|
|
Only when the heal is needed but the pending xattrs are not set,
|
|
trigger metadata heal that could block lookup. This is the only
|
|
case where different clients may give different metadata to the
|
|
clients without heals, which should be avoided.
|
|
|
|
BUG: 1619357
|
|
Upstream Patch: https://review.gluster.org/c/glusterfs/+/21086
|
|
Change-Id: I6089e9fda0770a83fb287941b229c882711f4e66
|
|
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/155028
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
---
|
|
tests/basic/afr/client-side-heal.t | 28 ++++++++++------
|
|
tests/bugs/glusterfs/bug-906646.t | 10 ++++--
|
|
xlators/cluster/afr/src/afr-common.c | 44 ++++++++++++++++++++++++++
|
|
xlators/cluster/afr/src/afr-self-heal-common.c | 38 ----------------------
|
|
xlators/cluster/afr/src/afr.h | 3 ++
|
|
5 files changed, 72 insertions(+), 51 deletions(-)
|
|
|
|
diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t
|
|
index eba7dc2..1e93361 100755
|
|
--- a/tests/basic/afr/client-side-heal.t
|
|
+++ b/tests/basic/afr/client-side-heal.t
|
|
@@ -17,6 +17,7 @@ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
echo "some data" > $M0/datafile
|
|
EXPECT 0 echo $?
|
|
TEST touch $M0/mdatafile
|
|
+TEST touch $M0/mdatafile-backend-direct-modify
|
|
TEST mkdir $M0/dir
|
|
|
|
#Kill a brick and perform I/O to have pending heals.
|
|
@@ -29,6 +30,7 @@ EXPECT 0 echo $?
|
|
|
|
#pending metadata heal
|
|
TEST chmod +x $M0/mdatafile
|
|
+TEST chmod +x $B0/${V0}0/mdatafile-backend-direct-modify
|
|
|
|
#pending entry heal. Also causes pending metadata/data heals on file{1..5}
|
|
TEST touch $M0/dir/file{1..5}
|
|
@@ -40,9 +42,12 @@ TEST $CLI volume start $V0 force
|
|
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
|
#Medatada heal via explicit lookup must not happen
|
|
-TEST ls $M0/mdatafile
|
|
+TEST getfattr -d -m. -e hex $M0/mdatafile
|
|
+TEST ls $M0/mdatafile-backend-direct-modify
|
|
|
|
-#Inode refresh must not trigger data and entry heals.
|
|
+TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" != "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
|
|
+
|
|
+#Inode refresh must not trigger data metadata and entry heals.
|
|
#To trigger inode refresh for sure, the volume is unmounted and mounted each time.
|
|
#Check that data heal does not happen.
|
|
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
@@ -52,7 +57,6 @@ TEST cat $M0/datafile
|
|
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
TEST ls $M0/dir
|
|
-
|
|
#No heal must have happened
|
|
EXPECT 8 get_pending_heal_count $V0
|
|
|
|
@@ -61,21 +65,25 @@ TEST $CLI volume set $V0 cluster.data-self-heal on
|
|
TEST $CLI volume set $V0 cluster.metadata-self-heal on
|
|
TEST $CLI volume set $V0 cluster.entry-self-heal on
|
|
|
|
-#Metadata heal is triggered by lookup without need for inode refresh.
|
|
-TEST ls $M0/mdatafile
|
|
-EXPECT 7 get_pending_heal_count $V0
|
|
-
|
|
-#Inode refresh must trigger data and entry heals.
|
|
+#Inode refresh must trigger data metadata and entry heals.
|
|
#To trigger inode refresh for sure, the volume is unmounted and mounted each time.
|
|
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
+TEST ls $M0/mdatafile-backend-direct-modify
|
|
+
|
|
+TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" == "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
|
|
+
|
|
+
|
|
+TEST getfattr -d -m. -e hex $M0/mdatafile
|
|
+EXPECT_WITHIN $HEAL_TIMEOUT 7 get_pending_heal_count $V0
|
|
+
|
|
TEST cat $M0/datafile
|
|
EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0
|
|
|
|
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
TEST ls $M0/dir
|
|
-EXPECT 5 get_pending_heal_count $V0
|
|
+EXPECT_WITHIN $HEAL_TIMEOUT 5 get_pending_heal_count $V0
|
|
|
|
TEST cat $M0/dir/file1
|
|
TEST cat $M0/dir/file2
|
|
@@ -83,5 +91,5 @@ TEST cat $M0/dir/file3
|
|
TEST cat $M0/dir/file4
|
|
TEST cat $M0/dir/file5
|
|
|
|
-EXPECT 0 get_pending_heal_count $V0
|
|
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
|
|
cleanup;
|
|
diff --git a/tests/bugs/glusterfs/bug-906646.t b/tests/bugs/glusterfs/bug-906646.t
|
|
index 45c85d9..37b8fe5 100644
|
|
--- a/tests/bugs/glusterfs/bug-906646.t
|
|
+++ b/tests/bugs/glusterfs/bug-906646.t
|
|
@@ -13,7 +13,6 @@ TEST pidof glusterd
|
|
TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11
|
|
TEST $CLI volume start $V0
|
|
|
|
-TEST $CLI volume set $V0 cluster.self-heal-daemon off
|
|
TEST $CLI volume set $V0 cluster.background-self-heal-count 0
|
|
|
|
## Mount FUSE with caching disabled
|
|
@@ -82,10 +81,15 @@ EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
|
|
# restart the brick process
|
|
TEST $CLI volume start $V0 force
|
|
|
|
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 `expr $brick_id - 1`
|
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
|
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
|
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
|
|
|
|
-cat $pth >/dev/null
|
|
+TEST $CLI volume heal $V0
|
|
|
|
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
|
|
# check backends - xattr should not be present anywhere
|
|
EXPECT 1 xattr_query_check ${backend_paths_array[0]} "trusted.name"
|
|
EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
|
|
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
|
|
index e8107c9..e74fdec 100644
|
|
--- a/xlators/cluster/afr/src/afr-common.c
|
|
+++ b/xlators/cluster/afr/src/afr-common.c
|
|
@@ -2571,6 +2571,42 @@ out:
|
|
return 0;
|
|
}
|
|
|
|
+gf_boolean_t
|
|
+afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
|
|
+{
|
|
+ int idx = -1;
|
|
+ afr_private_t *priv = NULL;
|
|
+ void *pending_raw = NULL;
|
|
+ int *pending_int = NULL;
|
|
+ int i = 0;
|
|
+
|
|
+ priv = this->private;
|
|
+ idx = afr_index_for_transaction_type (type);
|
|
+
|
|
+ if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
|
|
+ if (pending_raw) {
|
|
+ pending_int = pending_raw;
|
|
+
|
|
+ if (ntoh32 (pending_int[idx]))
|
|
+ return _gf_true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < priv->child_count; i++) {
|
|
+ if (dict_get_ptr (xdata, priv->pending_key[i],
|
|
+ &pending_raw))
|
|
+ continue;
|
|
+ if (!pending_raw)
|
|
+ continue;
|
|
+ pending_int = pending_raw;
|
|
+
|
|
+ if (ntoh32 (pending_int[idx]))
|
|
+ return _gf_true;
|
|
+ }
|
|
+
|
|
+ return _gf_false;
|
|
+}
|
|
+
|
|
static gf_boolean_t
|
|
afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
|
|
{
|
|
@@ -2597,6 +2633,14 @@ afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
|
|
continue;
|
|
}
|
|
|
|
+ if (afr_is_pending_set (this, replies[i].xdata,
|
|
+ AFR_METADATA_TRANSACTION)) {
|
|
+ /* Let shd do the heal so that lookup is not blocked
|
|
+ * on getting metadata lock/doing the heal */
|
|
+ start = _gf_false;
|
|
+ break;
|
|
+ }
|
|
+
|
|
if (gf_uuid_compare (stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {
|
|
start = _gf_false;
|
|
break;
|
|
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
index d04f11d..c6ee75b 100644
|
|
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
@@ -2182,44 +2182,6 @@ afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
return 0;
|
|
}
|
|
|
|
-
|
|
-gf_boolean_t
|
|
-afr_is_pending_set (xlator_t *this, dict_t *xdata, int type)
|
|
-{
|
|
- int idx = -1;
|
|
- afr_private_t *priv = NULL;
|
|
- void *pending_raw = NULL;
|
|
- int *pending_int = NULL;
|
|
- int i = 0;
|
|
-
|
|
- priv = this->private;
|
|
- idx = afr_index_for_transaction_type (type);
|
|
-
|
|
- if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) {
|
|
- if (pending_raw) {
|
|
- pending_int = pending_raw;
|
|
-
|
|
- if (ntoh32 (pending_int[idx]))
|
|
- return _gf_true;
|
|
- }
|
|
- }
|
|
-
|
|
- for (i = 0; i < priv->child_count; i++) {
|
|
- if (dict_get_ptr (xdata, priv->pending_key[i],
|
|
- &pending_raw))
|
|
- continue;
|
|
- if (!pending_raw)
|
|
- continue;
|
|
- pending_int = pending_raw;
|
|
-
|
|
- if (ntoh32 (pending_int[idx]))
|
|
- return _gf_true;
|
|
- }
|
|
-
|
|
- return _gf_false;
|
|
-}
|
|
-
|
|
-
|
|
gf_boolean_t
|
|
afr_is_data_set (xlator_t *this, dict_t *xdata)
|
|
{
|
|
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
|
|
index 2e6d995..af9dbc8 100644
|
|
--- a/xlators/cluster/afr/src/afr.h
|
|
+++ b/xlators/cluster/afr/src/afr.h
|
|
@@ -1225,4 +1225,7 @@ afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode);
|
|
|
|
gf_boolean_t
|
|
afr_is_symmetric_error (call_frame_t *frame, xlator_t *this);
|
|
+
|
|
+gf_boolean_t
|
|
+afr_is_pending_set (xlator_t *this, dict_t *xdata, int type);
|
|
#endif /* __AFR_H__ */
|
|
--
|
|
1.8.3.1
|
|
|