166 lines
6.1 KiB
Diff
166 lines
6.1 KiB
Diff
From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001
|
|
From: karthik-us <ksubrahm@redhat.com>
|
|
Date: Wed, 20 Nov 2019 12:26:11 +0530
|
|
Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no
|
|
healed_sinks
|
|
|
|
Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/
|
|
|
|
Problem:
|
|
In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame
|
|
anything for entry heal, heal will not complete even though we have
|
|
clear source and sinks. This will happen because while doing
|
|
afr_selfheal_find_direction() only the bricks which are blamed by
|
|
non-accused bricks are considered as sinks. Later in
|
|
__afr_selfheal_entry_finalize_source() when it tries to mark all the
|
|
non-sources as sinks it fails to do so because there won't be any
|
|
healed_sinks marked, no witness present and there will be a source.
|
|
|
|
Fix:
|
|
If there is a source and no healed_sinks, then reset all the locked
|
|
sources to 0 and healed sinks to 1 to do conservative merge.
|
|
|
|
Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57
|
|
Signed-off-by: karthik-us <ksubrahm@redhat.com>
|
|
BUG: 1764095
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/185834
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
---
|
|
.../bug-1749322-entry-heal-not-happening.t | 89 ++++++++++++++++++++++
|
|
xlators/cluster/afr/src/afr-self-heal-entry.c | 15 ++++
|
|
2 files changed, 104 insertions(+)
|
|
create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
|
|
|
|
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
|
|
new file mode 100644
|
|
index 0000000..9627908
|
|
--- /dev/null
|
|
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
|
|
@@ -0,0 +1,89 @@
|
|
+#!/bin/bash
|
|
+
|
|
+. $(dirname $0)/../../include.rc
|
|
+. $(dirname $0)/../../volume.rc
|
|
+. $(dirname $0)/../../afr.rc
|
|
+
|
|
+cleanup
|
|
+
|
|
+function check_gfid_and_link_count
|
|
+{
|
|
+ local file=$1
|
|
+
|
|
+ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
|
|
+ TEST [ ! -z $file_gfid_b0 ]
|
|
+ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
|
|
+ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
|
|
+ EXPECT $file_gfid_b0 echo $file_gfid_b1
|
|
+ EXPECT $file_gfid_b0 echo $file_gfid_b2
|
|
+
|
|
+ EXPECT "2" stat -c %h $B0/${V0}0/$file
|
|
+ EXPECT "2" stat -c %h $B0/${V0}1/$file
|
|
+ EXPECT "2" stat -c %h $B0/${V0}2/$file
|
|
+}
|
|
+TESTS_EXPECTED_IN_LOOP=18
|
|
+
|
|
+################################################################################
|
|
+## Start and create a volume
|
|
+TEST glusterd;
|
|
+TEST pidof glusterd;
|
|
+TEST $CLI volume info;
|
|
+
|
|
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
|
|
+TEST $CLI volume start $V0;
|
|
+TEST $CLI volume set $V0 cluster.heal-timeout 5
|
|
+TEST $CLI volume heal $V0 disable
|
|
+EXPECT 'Started' volinfo_field $V0 'Status';
|
|
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
|
|
+
|
|
+TEST mkdir $M0/dir
|
|
+TEST `echo "File 1 " > $M0/dir/file1`
|
|
+TEST touch $M0/dir/file{2..4}
|
|
+
|
|
+# Remove file2 from 1st & 3rd bricks
|
|
+TEST rm -f $B0/$V0"0"/dir/file2
|
|
+TEST rm -f $B0/$V0"2"/dir/file2
|
|
+
|
|
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
|
|
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
|
|
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
|
|
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
|
|
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
|
|
+TEST rm -f $B0/$V0"0"/dir/file3
|
|
+TEST rm -f $B0/$V0"1"/dir/file3
|
|
+
|
|
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
|
|
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
|
|
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
|
|
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
|
|
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
|
|
+
|
|
+# B0 and B2 blame each other
|
|
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
|
|
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
+
|
|
+# Add entry to xattrop dir on first brick.
|
|
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
|
|
+base_entry_b0=`ls $xattrop_dir0`
|
|
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
|
|
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
+
|
|
+EXPECT "^1$" get_pending_heal_count $V0
|
|
+
|
|
+# Launch heal
|
|
+TEST $CLI volume heal $V0 enable
|
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
|
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
|
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
|
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
|
|
+TEST $CLI volume heal $V0
|
|
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
+
|
|
+# All the files must be present on all the bricks after conservative merge and
|
|
+# should have the gfid xattr and the .glusterfs hardlink.
|
|
+check_gfid_and_link_count dir/file1
|
|
+check_gfid_and_link_count dir/file2
|
|
+check_gfid_and_link_count dir/file3
|
|
+check_gfid_and_link_count dir/file4
|
|
+
|
|
+cleanup
|
|
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
index 35b600f..3ce882e 100644
|
|
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
|
|
afr_private_t *priv = NULL;
|
|
int source = -1;
|
|
int sources_count = 0;
|
|
+ int i = 0;
|
|
|
|
priv = this->private;
|
|
|
|
@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
|
|
}
|
|
|
|
source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
|
|
+
|
|
+ /*If the selected source does not blame any other brick, then mark
|
|
+ * everything as sink to trigger conservative merge.
|
|
+ */
|
|
+ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
|
|
+ for (i = 0; i < priv->child_count; i++) {
|
|
+ if (locked_on[i]) {
|
|
+ sources[i] = 0;
|
|
+ healed_sinks[i] = 1;
|
|
+ }
|
|
+ }
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
return source;
|
|
}
|
|
|
|
--
|
|
1.8.3.1
|
|
|