glusterfs/0323-cluster-afr-Mark-dirty-for-entry-transactions-for-qu.patch
Milind Changire 0820681560 autobuild v3.12.2-14
Resolves: bz#1547903 bz#1566336 bz#1568896 bz#1578716 bz#1581047
Resolves: bz#1581231 bz#1582066 bz#1593865 bz#1597506 bz#1597511
Resolves: bz#1597654 bz#1597768 bz#1598105 bz#1598356 bz#1599037
Resolves: bz#1599823 bz#1600057 bz#1601314
Signed-off-by: Milind Changire <mchangir@redhat.com>
2018-07-18 08:38:52 -04:00

229 lines
7.8 KiB
Diff

From 77c33f6c257928576d328e6e735f7e7a086202a3 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Tue, 17 Jul 2018 11:56:10 +0530
Subject: [PATCH 323/325] cluster/afr: Mark dirty for entry transactions for
quorum failures
Backport of:https://review.gluster.org/#/c/20153/
Problem:
If an entry creation transaction fails on quprum number of bricks
it might end up setting the pending changelogs on the file itself
on the brick where it got created. But the parent does not have
any entry pending marker set. This will lead to the entry not
getting healed by the self heal daemon automatically.
Fix:
For entry transactions mark dirty on the parent if it fails on
quorum number of bricks, so that the heal can do conservative
merge and entry gets healed by shd.
Change-Id: I8bbd02da7c4c9edd9c3f947e9a4ed3d37c9bec1c
BUG: 1566336
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/144145
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 73 ++++++++++++++++++++++
xlators/cluster/afr/src/afr-transaction.c | 62 ++++++++++++++----
2 files changed, 124 insertions(+), 11 deletions(-)
create mode 100644 tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
new file mode 100644
index 0000000..7fec3b4
--- /dev/null
+++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function create_files {
+ local i=1
+ while (true)
+ do
+ dd if=/dev/zero of=$M0/file$i bs=1M count=10
+ if [ -e $B0/${V0}0/file$i ] && [ -e $B0/${V0}1/file$i ]; then
+ ((i++))
+ else
+ break
+ fi
+ done
+ echo $i
+}
+
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 100M $B0/brick0
+TEST truncate -s 100M $B0/brick1
+#Have the 3rd brick of a higher size to test the scenario of entry transaction
+#passing on only one brick and not on other bricks.
+TEST truncate -s 110M $B0/brick2
+LO1=`SETUP_LOOP $B0/brick0`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+LO2=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+LO3=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO3
+TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+TEST MOUNT_LOOP $LO1 $B0/${V0}0
+TEST MOUNT_LOOP $LO2 $B0/${V0}1
+TEST MOUNT_LOOP $LO3 $B0/${V0}2
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+i=$(create_files)
+TEST ! ls $B0/${V0}0/file$i
+TEST ! ls $B0/${V0}1/file$i
+TEST ls $B0/${V0}2/file$i
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST rm -f $M0/file1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+UMOUNT_LOOP ${B0}/${V0}{0,1,2}
+rm -f ${B0}/brick{0,1,2}
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 5b18f63..321b6f1 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -774,8 +774,38 @@ afr_has_fop_cbk_quorum (call_frame_t *frame)
return afr_has_quorum (success, this);
}
+gf_boolean_t
+afr_need_dirty_marking (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ gf_boolean_t need_dirty = _gf_false;
+
+ local = frame->local;
+
+ if (!priv->quorum_count || !local->optimistic_change_log)
+ return _gf_false;
+
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION)
+ return _gf_false;
+
+ if (AFR_COUNT (local->transaction.failed_subvols, priv->child_count) ==
+ priv->child_count)
+ return _gf_false;
+
+ if (priv->arbiter_count) {
+ if (!afr_has_arbiter_fop_cbk_quorum (frame))
+ need_dirty = _gf_true;
+ } else if (!afr_has_fop_cbk_quorum (frame)) {
+ need_dirty = _gf_true;
+ }
+
+ return need_dirty;
+}
+
void
-afr_handle_quorum (call_frame_t *frame)
+afr_handle_quorum (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -826,11 +856,15 @@ afr_handle_quorum (call_frame_t *frame)
return;
}
+ if (afr_need_dirty_marking (frame, this))
+ goto set_response;
+
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i])
afr_transaction_fop_failed (frame, frame->this, i);
}
+set_response:
local->op_ret = -1;
local->op_errno = afr_final_errno (local, priv);
if (local->op_errno == 0)
@@ -874,9 +908,17 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
int nothing_failed = 1;
gf_boolean_t need_undirty = _gf_false;
- afr_handle_quorum (frame);
+ afr_handle_quorum (frame, this);
local = frame->local;
- idx = afr_index_for_transaction_type (local->transaction.type);
+ idx = afr_index_for_transaction_type (local->transaction.type);
+
+ xattr = dict_new ();
+ if (!xattr) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
nothing_failed = afr_txn_nothing_failed (frame, this);
@@ -886,6 +928,11 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
need_undirty = _gf_true;
if (local->op_ret < 0 && !nothing_failed) {
+ if (afr_need_dirty_marking (frame, this)) {
+ local->dirty[idx] = hton32(1);
+ goto set_dirty;
+ }
+
afr_changelog_post_op_done (frame, this);
goto out;
}
@@ -902,14 +949,6 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
goto out;
}
- xattr = dict_new ();
- if (!xattr) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- afr_changelog_post_op_done (frame, this);
- goto out;
- }
-
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.failed_subvols[i])
local->pending[i][idx] = hton32(1);
@@ -928,6 +967,7 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
else
local->dirty[idx] = hton32(0);
+set_dirty:
ret = dict_set_static_bin (xattr, AFR_DIRTY, local->dirty,
sizeof(int) * AFR_NUM_CHANGE_LOGS);
if (ret) {
--
1.8.3.1