import glusterfs-6.0-37.2.el8

This commit is contained in:
CentOS Sources 2020-09-17 00:08:53 +00:00 committed by Andrew Lukoshko
parent 9190a18465
commit 02ae2cd7e2
10 changed files with 1330 additions and 2 deletions

View File

@ -0,0 +1,44 @@
From 2948ee521316d40384130138233178ba940b175f Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Mon, 4 May 2020 14:30:57 +0530
Subject: [PATCH 384/392] extras: Modify group 'virt' to include
network-related options
This is needed to work around an issue seen where vms running on
online hosts are getting killed when a different host is rebooted
in ovirt-gluster hyperconverged environments. Actual RCA is quite
lengthy and documented in the github issue. Please refer to it
for more details.
Upstream patch:
> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24400
> Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1
> Fixes: #1217
> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1
BUG: 1848899
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/203685
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
extras/group-virt.example | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/extras/group-virt.example b/extras/group-virt.example
index c2ce89d..3a441eb 100644
--- a/extras/group-virt.example
+++ b/extras/group-virt.example
@@ -16,3 +16,8 @@ cluster.choose-local=off
client.event-threads=4
server.event-threads=4
performance.client-io-threads=on
+network.ping-timeout=20
+server.tcp-user-timeout=20
+server.keepalive-time=10
+server.keepalive-interval=2
+server.keepalive-count=5
--
1.8.3.1

View File

@ -0,0 +1,237 @@
From cdd067dcc0cd70d4f57e173b4050d8e2eb79725a Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Fri, 5 Jun 2020 17:20:04 +0530
Subject: [PATCH 385/392] cluster/afr: Prioritize ENOSPC over other errors
Backport of: https://review.gluster.org/#/c/glusterfs/+/24477/
Problem:
In a replicate/arbiter volume if file creations or writes fails on
quorum number of bricks and on one brick it is due to ENOSPC and
on other brick it fails for a different reason, it may fail with
errors other than ENOSPC in some cases.
Fix:
Prioritize ENOSPC over other lesser priority errors and do not set
op_errno in posix_gfid_set if op_ret is 0 to avoid receiving any
error_no which can be misinterpreted by __afr_dir_write_finalize().
Also removing the function afr_has_arbiter_fop_cbk_quorum() which
might consider a successful reply form a single brick as quorum
success in some cases, whereas we always need fop to be successful
on quorum number of bricks in arbiter configuration.
Change-Id: I4dd2bff17e6812bc7c8372130976e365e2407d88
Signed-off-by: karthik-us <ksubrahm@redhat.com>
BUG: 1848895
(cherry picked from commit 8b11ac1575ef167af2a47a96f7b7ed0f32bb5897)
Reviewed-on: https://code.engineering.redhat.com/gerrit/203691
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
.../bugs/replicate/issue-1254-prioritize-enospc.t | 80 ++++++++++++++++++++++
xlators/cluster/afr/src/afr-common.c | 4 +-
xlators/cluster/afr/src/afr-transaction.c | 48 +------------
xlators/storage/posix/src/posix-helpers.c | 2 +-
4 files changed, 86 insertions(+), 48 deletions(-)
create mode 100644 tests/bugs/replicate/issue-1254-prioritize-enospc.t
diff --git a/tests/bugs/replicate/issue-1254-prioritize-enospc.t b/tests/bugs/replicate/issue-1254-prioritize-enospc.t
new file mode 100644
index 0000000..fab94b7
--- /dev/null
+++ b/tests/bugs/replicate/issue-1254-prioritize-enospc.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function create_bricks {
+ TEST truncate -s 100M $B0/brick0
+ TEST truncate -s 100M $B0/brick1
+ TEST truncate -s 20M $B0/brick2
+ LO1=`SETUP_LOOP $B0/brick0`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO1
+ LO2=`SETUP_LOOP $B0/brick1`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO2
+ LO3=`SETUP_LOOP $B0/brick2`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO3
+ TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+ TEST MOUNT_LOOP $LO1 $B0/${V0}0
+ TEST MOUNT_LOOP $LO2 $B0/${V0}1
+ TEST MOUNT_LOOP $LO3 $B0/${V0}2
+}
+
+function create_files {
+ local i=1
+ while (true)
+ do
+ touch $M0/file$i
+ if [ -e $B0/${V0}2/file$i ];
+ then
+ ((i++))
+ else
+ break
+ fi
+ done
+}
+
+TESTS_EXPECTED_IN_LOOP=13
+
+#Arbiter volume: Check for ENOSPC when arbiter brick becomes full#
+TEST glusterd
+create_bricks
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+create_files
+TEST kill_brick $V0 $H0 $B0/${V0}1
+error1=$(touch $M0/file-1 2>&1)
+EXPECT "No space left on device" echo $error1
+error2=$(mkdir $M0/dir-1 2>&1)
+EXPECT "No space left on device" echo $error2
+error3=$((echo "Test" > $M0/file-3) 2>&1)
+EXPECT "No space left on device" echo $error3
+
+cleanup
+
+#Replica-3 volume: Check for ENOSPC when one of the brick becomes full#
+#Keeping the third brick of lower size to simulate disk full scenario#
+TEST glusterd
+create_bricks
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+create_files
+TEST kill_brick $V0 $H0 $B0/${V0}1
+error1=$(touch $M0/file-1 2>&1)
+EXPECT "No space left on device" echo $error1
+error2=$(mkdir $M0/dir-1 2>&1)
+EXPECT "No space left on device" echo $error2
+error3=$((cat /dev/zero > $M0/file1) 2>&1)
+EXPECT "No space left on device" echo $error3
+
+cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 3690b84..d6b70e9 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2463,7 +2463,7 @@ error:
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ENODATA > ENOENT > ESTALE > others
+ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
*/
int
@@ -2475,6 +2475,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno)
return ENOENT;
if (old_errno == ESTALE || new_errno == ESTALE)
return ESTALE;
+ if (old_errno == ENOSPC || new_errno == ENOSPC)
+ return ENOSPC;
return new_errno;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 15f3a7e..8e65ae2 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -514,42 +514,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this)
local->transaction.pre_op_sources[j] = 0;
}
-gf_boolean_t
-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- xlator_t *this = NULL;
- gf_boolean_t fop_failed = _gf_false;
- unsigned char *pre_op_sources = NULL;
- int i = 0;
-
- local = frame->local;
- this = frame->this;
- priv = this->private;
- pre_op_sources = local->transaction.pre_op_sources;
-
- /* If the fop failed on the brick, it is not a source. */
- for (i = 0; i < priv->child_count; i++)
- if (local->transaction.failed_subvols[i])
- pre_op_sources[i] = 0;
-
- switch (AFR_COUNT(pre_op_sources, priv->child_count)) {
- case 1:
- if (pre_op_sources[ARBITER_BRICK_INDEX])
- fop_failed = _gf_true;
- break;
- case 0:
- fop_failed = _gf_true;
- break;
- }
-
- if (fop_failed)
- return _gf_false;
-
- return _gf_true;
-}
-
void
afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this)
{
@@ -968,12 +932,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this)
priv->child_count)
return _gf_false;
- if (priv->arbiter_count) {
- if (!afr_has_arbiter_fop_cbk_quorum(frame))
- need_dirty = _gf_true;
- } else if (!afr_has_fop_cbk_quorum(frame)) {
+ if (!afr_has_fop_cbk_quorum(frame))
need_dirty = _gf_true;
- }
return need_dirty;
}
@@ -1023,12 +983,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this)
* no split-brain with the fix. The problem is eliminated completely.
*/
- if (priv->arbiter_count) {
- if (afr_has_arbiter_fop_cbk_quorum(frame))
- return;
- } else if (afr_has_fop_cbk_quorum(frame)) {
+ if (afr_has_fop_cbk_quorum(frame))
return;
- }
if (afr_need_dirty_marking(frame, this))
goto set_response;
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 35dd3b6..aca0df6 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1059,7 +1059,7 @@ verify_handle:
ret = posix_handle_soft(this, path, loc, uuid_curr, &stat);
out:
- if (!(*op_errno))
+ if (ret && !(*op_errno))
*op_errno = errno;
return ret;
}
--
1.8.3.1

View File

@ -0,0 +1,251 @@
From 7689fbb4be83f0e0657ec2729c4d66ed341b5751 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Tue, 11 Feb 2020 14:34:48 +0530
Subject: [PATCH 386/392] afr: prevent spurious entry heals leading to gfid
split-brain
Problem:
In a hyperconverged setup with granular-entry-heal enabled, if a file is
recreated while one of the bricks is down, and an index heal is triggered
(with the brick still down), entry-self heal was doing a spurious heal
with just the 2 good bricks. It was doing a post-op leading to removal
of the filename from .glusterfs/indices/entry-changes as well as
erroneous setting of afr xattrs on the parent. When the brick came up,
the xattrs were cleared, resulting in the renamed file not getting
healed and leading to gfid split-brain and EIO on the mount.
Fix:
Proceed with entry heal only when shd can connect to all bricks of the replica,
just like in data and metadata heal.
BUG: 1848893
> Upstream patch:https://review.gluster.org/#/c/glusterfs/+/24109/
> fixes: bz#1801624
> Change-Id: I916ae26ad1fabf259bc6362da52d433b7223b17e
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Change-Id: I23f57e543cff1e3f35eb8dbc60a2babfae6838c7
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
(cherry picked from commit 2b2eb846c49caba13ab92ec66af20292e7780fc1)
Reviewed-on: https://code.engineering.redhat.com/gerrit/203692
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Tested-by: Karthik Subrahmanya <ksubrahm@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
.../bug-1433571-undo-pending-only-on-up-bricks.t | 18 ++-----
tests/bugs/replicate/bug-1801624-entry-heal.t | 58 ++++++++++++++++++++++
xlators/cluster/afr/src/afr-common.c | 4 +-
xlators/cluster/afr/src/afr-self-heal-common.c | 8 +--
xlators/cluster/afr/src/afr-self-heal-entry.c | 6 +--
xlators/cluster/afr/src/afr-self-heal-name.c | 2 +-
xlators/cluster/afr/src/afr-self-heal.h | 2 -
7 files changed, 69 insertions(+), 29 deletions(-)
create mode 100644 tests/bugs/replicate/bug-1801624-entry-heal.t
diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
index 0767f47..10ce013 100644
--- a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
+++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
@@ -49,25 +49,15 @@ TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
-#Kill brick 0 and turn on the client side heal and do ls to trigger the heal.
-#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0.
-TEST kill_brick $V0 $H0 $B0/${V0}0
+# We were killing one brick and checking that entry heal does not reset the
+# pending xattrs for the down brick. Now that we need all bricks to be up for
+# entry heal, I'm removing that test from the .t
+
TEST $CLI volume set $V0 cluster.data-self-heal on
TEST $CLI volume set $V0 cluster.metadata-self-heal on
TEST $CLI volume set $V0 cluster.entry-self-heal on
TEST ls $M0
-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
-
-#Bring back all the bricks and trigger the heal again by doing ls. Now the
-#pending xattrs on all the bricks should be 0.
-TEST $CLI volume start $V0 force
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
-TEST ls $M0
-
TEST cat $M0/f1
TEST cat $M0/f2
TEST cat $M0/f3
diff --git a/tests/bugs/replicate/bug-1801624-entry-heal.t b/tests/bugs/replicate/bug-1801624-entry-heal.t
new file mode 100644
index 0000000..94b4651
--- /dev/null
+++ b/tests/bugs/replicate/bug-1801624-entry-heal.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+TEST $CLI volume set $V0 heal-timeout 5
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+echo "Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+
+# Re-create the file when a brick is down.
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST rm $M0/FILE
+echo "New Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0
+
+# Launching index heal must not reset parent dir afr xattrs or remove granular entry indices.
+$CLI volume heal $V0 # CLI will fail but heal is launched anyway.
+TEST sleep 5 # give index heal a chance to do one run.
+brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick0/)
+brick2_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick2/)
+TEST [ $brick0_pending -eq "000000000000000000000002" ]
+TEST [ $brick2_pending -eq "000000000000000000000002" ]
+EXPECT "FILE" ls $B0/brick0/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/
+EXPECT "FILE" ls $B0/brick2/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+$CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# No gfid-split-brain (i.e. EIO) must be seen. Try on fresh mount to avoid cached values.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST cat $M0/FILE
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index d6b70e9..939246e 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -6632,7 +6632,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS)
+ if (ret < priv->child_count)
goto data_unlock;
ret = __afr_selfheal_data_prepare(
heal_frame, this, inode, locked_on, sources, sinks,
@@ -6649,7 +6649,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,
LLONG_MAX - 1, 0, locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS)
+ if (ret < priv->child_count)
goto mdata_unlock;
ret = __afr_selfheal_metadata_prepare(
heal_frame, this, inode, locked_on, sources, sinks,
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 81ef38a..ce1ea50 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1575,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
char *accused = NULL; /* Accused others without any self-accusal */
char *pending = NULL; /* Have pending operations on others */
char *self_accused = NULL; /* Accused itself */
- int min_participants = -1;
priv = this->private;
@@ -1599,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) {
- min_participants = priv->child_count;
- } else {
- min_participants = AFR_SH_MIN_PARTICIPANTS;
- }
- if (afr_success_count(replies, priv->child_count) < min_participants) {
+ if (afr_success_count(replies, priv->child_count) < priv->child_count) {
/* Treat this just like locks not being acquired */
return -ENOTCONN;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 3ce882e..40be898 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -597,7 +597,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes "
@@ -991,7 +991,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
data_lock);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes could "
@@ -1115,7 +1115,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain,
NULL, locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes could "
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 36640b5..7d4f208 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -514,7 +514,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
ret = -ENOTCONN;
goto unlock;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 6555ec5..8234cec 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -11,8 +11,6 @@
#ifndef _AFR_SELFHEAL_H
#define _AFR_SELFHEAL_H
-#define AFR_SH_MIN_PARTICIPANTS 2
-
/* Perform fop on all UP subvolumes and wait for all callbacks to return */
#define AFR_ONALL(frame, rfn, fop, args...) \
--
1.8.3.1

View File

@ -0,0 +1,440 @@
From 399fad1ac0f9273483270e8af06a5b2d28927533 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Fri, 29 May 2020 14:24:53 +0530
Subject: [PATCH 387/392] cluster/afr: Delay post-op for fsync
Problem:
AFR doesn't delay post-op for fsync fop. For fsync heavy workloads
this leads to un-necessary fxattrop/finodelk for every fsync leading
to bad performance.
Fix:
Have delayed post-op for fsync. Add special flag in xdata to indicate
that afr shouldn't delay post-op in cases where either the
process will terminate or graph-switch would happen. Otherwise it leads
to un-necessary heals when the graph-switch/process-termination
happens before delayed-post-op completes.
> Upstream-patch: https://review.gluster.org/c/glusterfs/+/24473
> Fixes: #1253
BUG: 1848896
Change-Id: I531940d13269a111c49e0510d49514dc169f4577
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
(cherry picked from commit 3ed98fc9dcb39223032e343fd5b0ad17fa3cae14)
Reviewed-on: https://code.engineering.redhat.com/gerrit/203694
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Tested-by: Karthik Subrahmanya <ksubrahm@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
api/src/glfs-resolve.c | 14 ++-
tests/basic/afr/durability-off.t | 2 +
tests/basic/gfapi/gfapi-graph-switch-open-fd.t | 44 +++++++++
tests/basic/gfapi/gfapi-keep-writing.c | 129 +++++++++++++++++++++++++
xlators/cluster/afr/src/afr-inode-write.c | 11 ++-
xlators/cluster/afr/src/afr-transaction.c | 9 +-
xlators/cluster/afr/src/afr.h | 2 +-
xlators/cluster/dht/src/dht-rebalance.c | 15 ++-
xlators/mount/fuse/src/fuse-bridge.c | 23 ++++-
9 files changed, 239 insertions(+), 10 deletions(-)
create mode 100644 tests/basic/gfapi/gfapi-graph-switch-open-fd.t
create mode 100644 tests/basic/gfapi/gfapi-keep-writing.c
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
index a79f490..062b7dc 100644
--- a/api/src/glfs-resolve.c
+++ b/api/src/glfs-resolve.c
@@ -722,6 +722,7 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
0,
};
char uuid1[64];
+ dict_t *xdata = NULL;
oldinode = oldfd->inode;
oldsubvol = oldinode->table->xl;
@@ -730,7 +731,15 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
return fd_ref(oldfd);
if (!oldsubvol->switched) {
- ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, NULL, NULL);
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_msg(fs->volname, GF_LOG_WARNING, ENOMEM, API_MSG_FSYNC_FAILED,
+ "last-fsync set failed on %s graph %s (%d)",
+ uuid_utoa_r(oldfd->inode->gfid, uuid1),
+ graphid_str(oldsubvol), oldsubvol->graph->id);
+ }
+
+ ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, xdata, NULL);
DECODE_SYNCOP_ERR(ret);
if (ret) {
gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED,
@@ -809,6 +818,9 @@ out:
newfd = NULL;
}
+ if (xdata)
+ dict_unref(xdata);
+
return newfd;
}
diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t
index 155ffa0..6e0f18b 100644
--- a/tests/basic/afr/durability-off.t
+++ b/tests/basic/afr/durability-off.t
@@ -26,6 +26,8 @@ TEST $CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
#Test that fsyncs happen when durability is on
TEST $CLI volume set $V0 cluster.ensure-durability on
TEST $CLI volume set $V0 performance.strict-write-ordering on
diff --git a/tests/basic/gfapi/gfapi-graph-switch-open-fd.t b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t
new file mode 100644
index 0000000..2e666be
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{0..2};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/sync
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-keep-writing.c -lgfapi
+
+
+#Launch a program to keep doing writes on an fd
+./$(dirname $0)/gfapi-keep-writing ${H0} $V0 $logdir/gfapi-async-calls-test.log sync &
+p=$!
+sleep 1 #Let some writes go through
+#Check if graph switch will lead to any pending markers for ever
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+
+TEST rm -f $M0/sync #Make sure the glfd is closed
+TEST wait #Wait for background process to die
+#Goal is to check if there is permanent FOOL changelog
+sleep 5
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick0/glfs_test.txt trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick1/glfs_test.txt trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick2/glfs_test.txt trusted.afr.dirty
+
+cleanup_tester $(dirname $0)/gfapi-async-calls-test
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-keep-writing.c b/tests/basic/gfapi/gfapi-keep-writing.c
new file mode 100644
index 0000000..91b59ce
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-keep-writing.c
@@ -0,0 +1,129 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+glfs_test_function(const char *hostname, const char *volname,
+ const char *logfile, const char *syncfile)
+{
+ int ret = -1;
+ int flags = O_CREAT | O_RDWR;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ const char *buff = "This is from my prog\n";
+ const char *filename = "glfs_test.txt";
+ struct stat buf = {0};
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ goto out;
+ }
+
+ while (glfs_stat(fs, syncfile, &buf) == 0) {
+ ret = glfs_write(glfd, buff, strlen(buff), flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+out:
+ ret = glfs_fini(fs);
+ if (ret) {
+ LOG_ERR("glfs_fini failed");
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *syncfile = NULL;
+
+ if (argc != 5) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+ syncfile = argv[4];
+
+ ret = glfs_test_function(hostname, volname, logfile, syncfile);
+ if (ret) {
+ LOG_ERR("glfs_test_function failed");
+ }
+
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 7fcc9d4..df82b6e 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -2492,6 +2492,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
call_frame_t *transaction_frame = NULL;
int ret = -1;
int32_t op_errno = ENOMEM;
+ int8_t last_fsync = 0;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
@@ -2501,10 +2502,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local)
goto out;
- if (xdata)
+ if (xdata) {
local->xdata_req = dict_copy_with_ref(xdata, NULL);
- else
+ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
+ if (last_fsync) {
+ local->transaction.disable_delayed_post_op = _gf_true;
+ }
+ }
+ } else {
local->xdata_req = dict_new();
+ }
if (!local->xdata_req)
goto out;
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 8e65ae2..ffd0ab8 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -2385,8 +2385,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
goto out;
}
- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) {
- /*Only allow writes but shard does [f]xattrops on writes, so
+ if (local->transaction.disable_delayed_post_op) {
+ goto out;
+ }
+
+ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
+ (local->op != GF_FOP_FSYNC)) {
+ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
* they are fine too*/
goto out;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index e731cfa..6bc4721 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -854,7 +854,7 @@ typedef struct _afr_local {
int (*unwind)(call_frame_t *frame, xlator_t *this);
- /* post-op hook */
+ gf_boolean_t disable_delayed_post_op;
} transaction;
syncbarrier_t barrier;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 8f31dca..145e616 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -1564,6 +1564,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
xlator_t *old_target = NULL;
xlator_t *hashed_subvol = NULL;
fd_t *linkto_fd = NULL;
+ dict_t *xdata = NULL;
if (from == to) {
gf_msg_debug(this->name, 0,
@@ -1882,7 +1883,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
/* TODO: Sync the locks */
- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL);
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set last-fsync flag on "
+ "%s (%s)",
+ loc->path, to->name, strerror(ENOMEM));
+ }
+
+ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
if (ret) {
gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
loc->path, to->name, strerror(-ret));
@@ -2356,11 +2365,15 @@ out:
if (dst_fd)
syncop_close(dst_fd);
+
if (src_fd)
syncop_close(src_fd);
if (linkto_fd)
syncop_close(linkto_fd);
+ if (xdata)
+ dict_unref(xdata);
+
loc_wipe(&tmp_loc);
loc_wipe(&parent_loc);
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 6e99053..1592067 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -5551,6 +5551,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
char create_in_progress = 0;
fuse_fd_ctx_t *basefd_ctx = NULL;
fd_t *oldfd = NULL;
+ dict_t *xdata = NULL;
basefd_ctx = fuse_fd_ctx_get(this, basefd);
GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out);
@@ -5587,10 +5588,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
}
if (oldfd->inode->table->xl == old_subvol) {
- if (IA_ISDIR(oldfd->inode->ia_type))
+ if (IA_ISDIR(oldfd->inode->ia_type)) {
ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL);
- else
- ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL);
+ } else {
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+ "last-fsync set failed (%s) on fd (%p)"
+ "(basefd:%p basefd-inode.gfid:%s) "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ strerror(ENOMEM), oldfd, basefd,
+ uuid_utoa(basefd->inode->gfid), old_subvol->name,
+ old_subvol->graph->id, new_subvol->name,
+ new_subvol->graph->id);
+ }
+
+ ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL);
+ }
if (ret < 0) {
gf_log("glusterfs-fuse", GF_LOG_WARNING,
@@ -5645,6 +5659,9 @@ out:
fd_unref(oldfd);
+ if (xdata)
+ dict_unref(xdata);
+
return ret;
}
--
1.8.3.1

View File

@ -0,0 +1,142 @@
From be6fafebe1e391e9d9f14d9aed18adbfda8a262b Mon Sep 17 00:00:00 2001
From: l17zhou <cynthia.zhou@nokia-sbell.com.cn>
Date: Mon, 4 Nov 2019 08:45:52 +0200
Subject: [PATCH 388/392] rpc: Cleanup SSL specific data at the time of freeing
rpc object
Problem: At the time of cleanup rpc object ssl specific data
is not freeing so it has become a leak.
Solution: To avoid the leak cleanup ssl specific data at the
time of cleanup rpc object
> Credits: l17zhou <cynthia.zhou@nokia-sbell.com.cn>
> Fixes: bz#1768407
> Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0
> (Cherry pick from commit 54ed71dba174385ab0d8fa415e09262f6250430c)
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23650/)
Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0
BUG: 1848891
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/203698
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
rpc/rpc-transport/socket/src/socket.c | 22 ++++++++++++++++++++--
tests/features/ssl-authz.t | 23 ++++++++++++++++++++---
2 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index 65845ea..226b2e2 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -446,6 +446,7 @@ ssl_setup_connection_postfix(rpc_transport_t *this)
gf_log(this->name, GF_LOG_DEBUG,
"SSL verification succeeded (client: %s) (server: %s)",
this->peerinfo.identifier, this->myinfo.identifier);
+ X509_free(peer);
return gf_strdup(peer_CN);
/* Error paths. */
@@ -1157,7 +1158,15 @@ __socket_reset(rpc_transport_t *this)
memset(&priv->incoming, 0, sizeof(priv->incoming));
event_unregister_close(this->ctx->event_pool, priv->sock, priv->idx);
-
+ if (priv->use_ssl && priv->ssl_ssl) {
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ }
+ if (priv->use_ssl && priv->ssl_ctx) {
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ctx = NULL;
+ }
priv->sock = -1;
priv->idx = -1;
priv->connected = -1;
@@ -3217,7 +3226,6 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
new_priv->sock = new_sock;
new_priv->ssl_enabled = priv->ssl_enabled;
- new_priv->ssl_ctx = priv->ssl_ctx;
new_priv->connected = 1;
new_priv->is_server = _gf_true;
@@ -4672,6 +4680,16 @@ fini(rpc_transport_t *this)
pthread_mutex_destroy(&priv->out_lock);
pthread_mutex_destroy(&priv->cond_lock);
pthread_cond_destroy(&priv->cond);
+ if (priv->use_ssl && priv->ssl_ssl) {
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ }
+ if (priv->use_ssl && priv->ssl_ctx) {
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ctx = NULL;
+ }
+
if (priv->ssl_private_key) {
GF_FREE(priv->ssl_private_key);
}
diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t
index cae010c..132b598 100755
--- a/tests/features/ssl-authz.t
+++ b/tests/features/ssl-authz.t
@@ -25,6 +25,7 @@ TEST glusterd
TEST pidof glusterd
TEST $CLI volume info;
+TEST $CLI v set all cluster.brick-multiplex on
# Construct a cipher list that excludes CBC because of POODLE.
# http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2014-3566
#
@@ -45,12 +46,12 @@ TEST openssl genrsa -out $SSL_KEY 2048
TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
ln $SSL_CERT $SSL_CA
-TEST $CLI volume create $V0 $H0:$B0/1
+TEST $CLI volume create $V0 replica 3 $H0:$B0/{1,2,3} force
TEST $CLI volume set $V0 server.ssl on
TEST $CLI volume set $V0 client.ssl on
TEST $CLI volume set $V0 ssl.cipher-list $(valid_ciphers)
TEST $CLI volume start $V0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count
# This mount should SUCCEED because ssl-allow=* by default. This effectively
# disables SSL authorization, though authentication and encryption might still
@@ -59,11 +60,27 @@ TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
TEST ping_file $M0/before
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+glusterfsd_pid=`pgrep glusterfsd`
+TEST [ $glusterfsd_pid != 0 ]
+start=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
+echo "Memory consumption for glusterfsd process"
+for i in $(seq 1 100); do
+ gluster v heal $V0 info >/dev/null
+done
+
+end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
+diff=$((end-start))
+
+# If memory consumption is more than 5M some leak in SSL code path
+
+TEST [ $diff -lt 5000 ]
+
+
# Set ssl-allow to a wildcard that includes our identity.
TEST $CLI volume stop $V0
TEST $CLI volume set $V0 auth.ssl-allow Any*
TEST $CLI volume start $V0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count
# This mount should SUCCEED because we match the wildcard.
TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
--
1.8.3.1

View File

@ -0,0 +1,54 @@
From d3558cfbded7e973fae45ce2196767611336e351 Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Mon, 8 Jun 2020 13:27:50 +0530
Subject: [PATCH 389/392] socket: Resolve ssl_ctx leak for a brick while only
mgmt SSL is enabled
Problem: While only mgmt SSL is enabled for a brick process use_ssl flag
is false for a brick process and socket api's cleanup ssl_ctx only
while use_ssl and ssl_ctx both are valid
Solution: To avoid a leak check only ssl_ctx, if it is valid cleanup
ssl_ctx
> Fixes: #1196
> Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> (Cherry pick from commit 9873baee34afdf0c20f5fc98a7dbf2a9f07447e2)
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24366/)
BUG: 1848894
Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/203705
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
rpc/rpc-transport/socket/src/socket.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index 226b2e2..54cd5df 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -1163,7 +1163,7 @@ __socket_reset(rpc_transport_t *this)
SSL_free(priv->ssl_ssl);
priv->ssl_ssl = NULL;
}
- if (priv->use_ssl && priv->ssl_ctx) {
+ if (priv->ssl_ctx) {
SSL_CTX_free(priv->ssl_ctx);
priv->ssl_ctx = NULL;
}
@@ -4685,7 +4685,7 @@ fini(rpc_transport_t *this)
SSL_free(priv->ssl_ssl);
priv->ssl_ssl = NULL;
}
- if (priv->use_ssl && priv->ssl_ctx) {
+ if (priv->ssl_ctx) {
SSL_CTX_free(priv->ssl_ctx);
priv->ssl_ctx = NULL;
}
--
1.8.3.1

View File

@ -0,0 +1,51 @@
From b68fa363c5981441c20fbc78b6dc00437bd698a7 Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Mon, 22 Jun 2020 11:35:29 +0530
Subject: [PATCH 390/392] tests: Avoid ssl-authz.t failure
Problem: ssl-authz.t is failing at the time of checking memory
consumption if brick is consuming more than 5M
Solution: Update the check to avoid a failure.
> Change-Id: Iffb031f0695a7da83d5a2f6bac8863dad225317e
> Fixes: bz#1811631
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
> Reviewd on upstream link https://review.gluster.org/#/c/glusterfs/+/24221/)
> (Cherry pick from commit fb20713b380e1df8d7f9e9df96563be2f9144fd6)
BUG: 1848894
Change-Id: I4fc5d2e2597abfafc1e26d908c8c4184ab82afd5
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/203844
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/features/ssl-authz.t | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t
index 132b598..ab05c49 100755
--- a/tests/features/ssl-authz.t
+++ b/tests/features/ssl-authz.t
@@ -67,13 +67,15 @@ echo "Memory consumption for glusterfsd process"
for i in $(seq 1 100); do
gluster v heal $V0 info >/dev/null
done
+#Wait to cleanup memory
+sleep 10
end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
diff=$((end-start))
-# If memory consumption is more than 5M some leak in SSL code path
+# If memory consumption is more than 15M some leak in SSL code path
-TEST [ $diff -lt 5000 ]
+TEST [ $diff -lt 15000 ]
# Set ssl-allow to a wildcard that includes our identity.
--
1.8.3.1

View File

@ -0,0 +1,54 @@
From e9cb714d66a7926a746b8cd5f9288d59aefee918 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Mon, 18 Mar 2019 20:47:54 +0800
Subject: [PATCH 391/392] cluster-syncop: avoid duplicate unlock of
inodelk/entrylk
When using ec, there are many messages at brick log as,
[inodelk.c:514:__inode_unlock_lock] 0-test-locks: Matching lock not found for unlock 0-9223372036854775807, lo=68e040a84b7f0000 on 0x7f208c006f78
[MSGID: 115053] [server-rpc-fops_v2.c:280:server4_inodelk_cbk] 0-test-server: 2557439: INODELK <gfid:df4e41be-723f-4289-b7af-b4272b3e880c> (df4e41be-723f-4289-b7af-b4272b3e880c), client: CTX_ID:67d4a7f3-605a-4965-89a5-31309d62d1fa-GRAPH_ID:0-PID:1659-HOST:openfs-node2-PC_NAME:test-client-1-RECON_NO:-28, error-xlator: test-locks [Invalid argument]
> Change-Id: Ib164d29ebb071f620a4ca9679c4345ef7c88512a
> Updates: bz#1689920
> Signed-off-by: Kinglong Mee <mijinlong@open-fs.com>
> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22377/
BUG: 1848890
Change-Id: Ib164d29ebb071f620a4ca9679c4345ef7c88512a
Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/203852
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/cluster-syncop.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c
index 5a08f26..6ee89dd 100644
--- a/libglusterfs/src/cluster-syncop.c
+++ b/libglusterfs/src/cluster-syncop.c
@@ -1203,6 +1203,10 @@ cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on,
if (num_success) {
FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame,
inodelk, dom, &loc, F_SETLKW, &flock, NULL);
+ } else {
+ loc_wipe(&loc);
+ memset(locked_on, 0, numsubvols);
+ return 0;
}
break;
}
@@ -1244,7 +1248,9 @@ cluster_tiebreaker_entrylk(xlator_t **subvols, unsigned char *on,
entrylk, dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK,
NULL);
} else {
+ loc_wipe(&loc);
memset(locked_on, 0, numsubvols);
+ return 0;
}
break;
}
--
1.8.3.1

View File

@ -0,0 +1,39 @@
From 6a2cef546457e6dc9a2268cc2f5cc11b850b7f5c Mon Sep 17 00:00:00 2001
From: Sanju Rakonde <srakonde@redhat.com>
Date: Tue, 17 Dec 2019 15:52:30 +0530
Subject: [PATCH 392/392] glusterd: unlink the file after killing the process
In glusterd_proc_stop(), after killing the pid
we should remove the pidfile.
> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23890/
> fixes: bz#1784375
> Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686
> Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
BUG: 1849533
Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686
Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/203871
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
index f55a5fd..a05c90d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
@@ -107,6 +107,8 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags)
"service, reason:%s",
proc->name, strerror(errno));
}
+ } else {
+ (void)glusterd_unlink_file(proc->pidfile);
}
if (flags != PROC_STOP_FORCE)
goto out;
--
1.8.3.1

View File

@ -237,7 +237,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
%else
Name: glusterfs
Version: 6.0
Release: 37%{?dist}
Release: 37.2%{?dist}
ExcludeArch: i686
%endif
License: GPLv2 or LGPLv3+
@ -698,6 +698,15 @@ Patch0380: 0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch
Patch0381: 0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch
Patch0382: 0382-features-shard-Aggregate-file-size-block-count-befor.patch
Patch0383: 0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch
Patch0384: 0384-extras-Modify-group-virt-to-include-network-related-.patch
Patch0385: 0385-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch
Patch0386: 0386-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch
Patch0387: 0387-cluster-afr-Delay-post-op-for-fsync.patch
Patch0388: 0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch
Patch0389: 0389-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch
Patch0390: 0390-tests-Avoid-ssl-authz.t-failure.patch
Patch0391: 0391-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch
Patch0392: 0392-glusterd-unlink-the-file-after-killing-the-process.patch
%description
GlusterFS is a distributed file-system capable of scaling to several
@ -2438,6 +2447,13 @@ fi
%endif
%changelog
* Tue Sep 08 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-37.2
- fixes bugs bz#1876857
* Wed Jun 24 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-37.1
- fixes bugs bz#1848890 bz#1848891 bz#1848893 bz#1848894 bz#1848895
bz#1848896 bz#1848899 bz#1849533
* Fri May 29 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-37
- fixes bugs bz#1840794