From 02ae2cd7e275b6a910625b277c6b9a9bf4df2d6b Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Thu, 17 Sep 2020 00:08:53 +0000 Subject: [PATCH] import glusterfs-6.0-37.2.el8 --- ...oup-virt-to-include-network-related-.patch | 44 ++ ...-Prioritize-ENOSPC-over-other-errors.patch | 237 ++++++++++ ...ious-entry-heals-leading-to-gfid-spl.patch | 251 ++++++++++ ...-cluster-afr-Delay-post-op-for-fsync.patch | 440 ++++++++++++++++++ ...specific-data-at-the-time-of-freeing.patch | 142 ++++++ ...sl_ctx-leak-for-a-brick-while-only-m.patch | 54 +++ ...0390-tests-Avoid-ssl-authz.t-failure.patch | 51 ++ ...void-duplicate-unlock-of-inodelk-ent.patch | 54 +++ ...k-the-file-after-killing-the-process.patch | 39 ++ SPECS/glusterfs.spec | 20 +- 10 files changed, 1330 insertions(+), 2 deletions(-) create mode 100644 SOURCES/0384-extras-Modify-group-virt-to-include-network-related-.patch create mode 100644 SOURCES/0385-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch create mode 100644 SOURCES/0386-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch create mode 100644 SOURCES/0387-cluster-afr-Delay-post-op-for-fsync.patch create mode 100644 SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch create mode 100644 SOURCES/0389-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch create mode 100644 SOURCES/0390-tests-Avoid-ssl-authz.t-failure.patch create mode 100644 SOURCES/0391-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch create mode 100644 SOURCES/0392-glusterd-unlink-the-file-after-killing-the-process.patch diff --git a/SOURCES/0384-extras-Modify-group-virt-to-include-network-related-.patch b/SOURCES/0384-extras-Modify-group-virt-to-include-network-related-.patch new file mode 100644 index 0000000..45684e1 --- /dev/null +++ b/SOURCES/0384-extras-Modify-group-virt-to-include-network-related-.patch @@ -0,0 +1,44 @@ +From 2948ee521316d40384130138233178ba940b175f Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Mon, 4 May 2020 14:30:57 +0530 +Subject: [PATCH 384/392] extras: Modify group 'virt' to include + network-related options + +This is needed to work around an issue seen where vms running on +online hosts are getting killed when a different host is rebooted +in ovirt-gluster hyperconverged environments. Actual RCA is quite +lengthy and documented in the github issue. Please refer to it +for more details. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24400 +> Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1 +> Fixes: #1217 +> Signed-off-by: Krutika Dhananjay + +Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1 +BUG: 1848899 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/203685 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/group-virt.example | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/extras/group-virt.example b/extras/group-virt.example +index c2ce89d..3a441eb 100644 +--- a/extras/group-virt.example ++++ b/extras/group-virt.example +@@ -16,3 +16,8 @@ cluster.choose-local=off + client.event-threads=4 + server.event-threads=4 + performance.client-io-threads=on ++network.ping-timeout=20 ++server.tcp-user-timeout=20 ++server.keepalive-time=10 ++server.keepalive-interval=2 ++server.keepalive-count=5 +-- +1.8.3.1 + diff --git a/SOURCES/0385-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch b/SOURCES/0385-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch new file mode 100644 index 0000000..5572e7f --- /dev/null +++ b/SOURCES/0385-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch @@ -0,0 +1,237 @@ +From cdd067dcc0cd70d4f57e173b4050d8e2eb79725a Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Fri, 5 Jun 2020 17:20:04 +0530 +Subject: [PATCH 385/392] cluster/afr: Prioritize ENOSPC over other errors + +Backport of: https://review.gluster.org/#/c/glusterfs/+/24477/ + +Problem: +In a replicate/arbiter volume if file creations or writes fails on +quorum number of bricks and on one brick it is due to ENOSPC and +on other brick it fails for a different reason, it may fail with +errors other than ENOSPC in some cases. + +Fix: +Prioritize ENOSPC over other lesser priority errors and do not set +op_errno in posix_gfid_set if op_ret is 0 to avoid receiving any +error_no which can be misinterpreted by __afr_dir_write_finalize(). + +Also removing the function afr_has_arbiter_fop_cbk_quorum() which +might consider a successful reply form a single brick as quorum +success in some cases, whereas we always need fop to be successful +on quorum number of bricks in arbiter configuration. + +Change-Id: I4dd2bff17e6812bc7c8372130976e365e2407d88 +Signed-off-by: karthik-us +BUG: 1848895 +(cherry picked from commit 8b11ac1575ef167af2a47a96f7b7ed0f32bb5897) +Reviewed-on: https://code.engineering.redhat.com/gerrit/203691 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bugs/replicate/issue-1254-prioritize-enospc.t | 80 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 4 +- + xlators/cluster/afr/src/afr-transaction.c | 48 +------------ + xlators/storage/posix/src/posix-helpers.c | 2 +- + 4 files changed, 86 insertions(+), 48 deletions(-) + create mode 100644 tests/bugs/replicate/issue-1254-prioritize-enospc.t + +diff --git a/tests/bugs/replicate/issue-1254-prioritize-enospc.t b/tests/bugs/replicate/issue-1254-prioritize-enospc.t +new file mode 100644 +index 0000000..fab94b7 +--- /dev/null ++++ b/tests/bugs/replicate/issue-1254-prioritize-enospc.t +@@ -0,0 +1,80 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup ++ ++function create_bricks { ++ TEST truncate -s 100M $B0/brick0 ++ TEST truncate -s 100M $B0/brick1 ++ TEST truncate -s 20M $B0/brick2 ++ LO1=`SETUP_LOOP $B0/brick0` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO1 ++ LO2=`SETUP_LOOP $B0/brick1` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO2 ++ LO3=`SETUP_LOOP $B0/brick2` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO3 ++ TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2 ++ TEST MOUNT_LOOP $LO1 $B0/${V0}0 ++ TEST MOUNT_LOOP $LO2 $B0/${V0}1 ++ TEST MOUNT_LOOP $LO3 $B0/${V0}2 ++} ++ ++function create_files { ++ local i=1 ++ while (true) ++ do ++ touch $M0/file$i ++ if [ -e $B0/${V0}2/file$i ]; ++ then ++ ((i++)) ++ else ++ break ++ fi ++ done ++} ++ ++TESTS_EXPECTED_IN_LOOP=13 ++ ++#Arbiter volume: Check for ENOSPC when arbiter brick becomes full# ++TEST glusterd ++create_bricks ++TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume start $V0 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++ ++create_files ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++error1=$(touch $M0/file-1 2>&1) ++EXPECT "No space left on device" echo $error1 ++error2=$(mkdir $M0/dir-1 2>&1) ++EXPECT "No space left on device" echo $error2 ++error3=$((echo "Test" > $M0/file-3) 2>&1) ++EXPECT "No space left on device" echo $error3 ++ ++cleanup ++ ++#Replica-3 volume: Check for ENOSPC when one of the brick becomes full# ++#Keeping the third brick of lower size to simulate disk full scenario# ++TEST glusterd ++create_bricks ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume start $V0 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++ ++create_files ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++error1=$(touch $M0/file-1 2>&1) ++EXPECT "No space left on device" echo $error1 ++error2=$(mkdir $M0/dir-1 2>&1) ++EXPECT "No space left on device" echo $error2 ++error3=$((cat /dev/zero > $M0/file1) 2>&1) ++EXPECT "No space left on device" echo $error3 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 3690b84..d6b70e9 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2463,7 +2463,7 @@ error: + * others in that they must be given higher priority while + * returning to the user. + * +- * The hierarchy is ENODATA > ENOENT > ESTALE > others ++ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others + */ + + int +@@ -2475,6 +2475,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno) + return ENOENT; + if (old_errno == ESTALE || new_errno == ESTALE) + return ESTALE; ++ if (old_errno == ENOSPC || new_errno == ENOSPC) ++ return ENOSPC; + + return new_errno; + } +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 15f3a7e..8e65ae2 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -514,42 +514,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this) + local->transaction.pre_op_sources[j] = 0; + } + +-gf_boolean_t +-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame) +-{ +- afr_local_t *local = NULL; +- afr_private_t *priv = NULL; +- xlator_t *this = NULL; +- gf_boolean_t fop_failed = _gf_false; +- unsigned char *pre_op_sources = NULL; +- int i = 0; +- +- local = frame->local; +- this = frame->this; +- priv = this->private; +- pre_op_sources = local->transaction.pre_op_sources; +- +- /* If the fop failed on the brick, it is not a source. */ +- for (i = 0; i < priv->child_count; i++) +- if (local->transaction.failed_subvols[i]) +- pre_op_sources[i] = 0; +- +- switch (AFR_COUNT(pre_op_sources, priv->child_count)) { +- case 1: +- if (pre_op_sources[ARBITER_BRICK_INDEX]) +- fop_failed = _gf_true; +- break; +- case 0: +- fop_failed = _gf_true; +- break; +- } +- +- if (fop_failed) +- return _gf_false; +- +- return _gf_true; +-} +- + void + afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this) + { +@@ -968,12 +932,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this) + priv->child_count) + return _gf_false; + +- if (priv->arbiter_count) { +- if (!afr_has_arbiter_fop_cbk_quorum(frame)) +- need_dirty = _gf_true; +- } else if (!afr_has_fop_cbk_quorum(frame)) { ++ if (!afr_has_fop_cbk_quorum(frame)) + need_dirty = _gf_true; +- } + + return need_dirty; + } +@@ -1023,12 +983,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this) + * no split-brain with the fix. The problem is eliminated completely. + */ + +- if (priv->arbiter_count) { +- if (afr_has_arbiter_fop_cbk_quorum(frame)) +- return; +- } else if (afr_has_fop_cbk_quorum(frame)) { ++ if (afr_has_fop_cbk_quorum(frame)) + return; +- } + + if (afr_need_dirty_marking(frame, this)) + goto set_response; +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 35dd3b6..aca0df6 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1059,7 +1059,7 @@ verify_handle: + ret = posix_handle_soft(this, path, loc, uuid_curr, &stat); + + out: +- if (!(*op_errno)) ++ if (ret && !(*op_errno)) + *op_errno = errno; + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0386-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch b/SOURCES/0386-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch new file mode 100644 index 0000000..9ba5451 --- /dev/null +++ b/SOURCES/0386-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch @@ -0,0 +1,251 @@ +From 7689fbb4be83f0e0657ec2729c4d66ed341b5751 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Tue, 11 Feb 2020 14:34:48 +0530 +Subject: [PATCH 386/392] afr: prevent spurious entry heals leading to gfid + split-brain + +Problem: +In a hyperconverged setup with granular-entry-heal enabled, if a file is +recreated while one of the bricks is down, and an index heal is triggered +(with the brick still down), entry-self heal was doing a spurious heal +with just the 2 good bricks. It was doing a post-op leading to removal +of the filename from .glusterfs/indices/entry-changes as well as +erroneous setting of afr xattrs on the parent. When the brick came up, +the xattrs were cleared, resulting in the renamed file not getting +healed and leading to gfid split-brain and EIO on the mount. + +Fix: +Proceed with entry heal only when shd can connect to all bricks of the replica, +just like in data and metadata heal. + +BUG: 1848893 + +> Upstream patch:https://review.gluster.org/#/c/glusterfs/+/24109/ +> fixes: bz#1801624 +> Change-Id: I916ae26ad1fabf259bc6362da52d433b7223b17e +> Signed-off-by: Ravishankar N + +Change-Id: I23f57e543cff1e3f35eb8dbc60a2babfae6838c7 +Signed-off-by: Ravishankar N +(cherry picked from commit 2b2eb846c49caba13ab92ec66af20292e7780fc1) +Reviewed-on: https://code.engineering.redhat.com/gerrit/203692 +Tested-by: RHGS Build Bot +Tested-by: Karthik Subrahmanya +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bug-1433571-undo-pending-only-on-up-bricks.t | 18 ++----- + tests/bugs/replicate/bug-1801624-entry-heal.t | 58 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 4 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 8 +-- + xlators/cluster/afr/src/afr-self-heal-entry.c | 6 +-- + xlators/cluster/afr/src/afr-self-heal-name.c | 2 +- + xlators/cluster/afr/src/afr-self-heal.h | 2 - + 7 files changed, 69 insertions(+), 29 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1801624-entry-heal.t + +diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t +index 0767f47..10ce013 100644 +--- a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t ++++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t +@@ -49,25 +49,15 @@ TEST $CLI volume start $V0 force + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +-#Kill brick 0 and turn on the client side heal and do ls to trigger the heal. +-#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0. +-TEST kill_brick $V0 $H0 $B0/${V0}0 ++# We were killing one brick and checking that entry heal does not reset the ++# pending xattrs for the down brick. Now that we need all bricks to be up for ++# entry heal, I'm removing that test from the .t ++ + TEST $CLI volume set $V0 cluster.data-self-heal on + TEST $CLI volume set $V0 cluster.metadata-self-heal on + TEST $CLI volume set $V0 cluster.entry-self-heal on + + TEST ls $M0 +-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1 +-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2 +-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 +-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2 +- +-#Bring back all the bricks and trigger the heal again by doing ls. Now the +-#pending xattrs on all the bricks should be 0. +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +-TEST ls $M0 +- + TEST cat $M0/f1 + TEST cat $M0/f2 + TEST cat $M0/f3 +diff --git a/tests/bugs/replicate/bug-1801624-entry-heal.t b/tests/bugs/replicate/bug-1801624-entry-heal.t +new file mode 100644 +index 0000000..94b4651 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1801624-entry-heal.t +@@ -0,0 +1,58 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2} ++TEST $CLI volume set $V0 heal-timeout 5 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 granular-entry-heal enable ++ ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++echo "Data">$M0/FILE ++ret=$? ++TEST [ $ret -eq 0 ] ++ ++# Re-create the file when a brick is down. ++TEST kill_brick $V0 $H0 $B0/brick1 ++TEST rm $M0/FILE ++echo "New Data">$M0/FILE ++ret=$? ++TEST [ $ret -eq 0 ] ++EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0 ++ ++# Launching index heal must not reset parent dir afr xattrs or remove granular entry indices. ++$CLI volume heal $V0 # CLI will fail but heal is launched anyway. ++TEST sleep 5 # give index heal a chance to do one run. ++brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick0/) ++brick2_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick2/) ++TEST [ $brick0_pending -eq "000000000000000000000002" ] ++TEST [ $brick2_pending -eq "000000000000000000000002" ] ++EXPECT "FILE" ls $B0/brick0/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/ ++EXPECT "FILE" ls $B0/brick2/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/ ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++$CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++# No gfid-split-brain (i.e. EIO) must be seen. Try on fresh mount to avoid cached values. ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++TEST cat $M0/FILE ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index d6b70e9..939246e 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -6632,7 +6632,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) + ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) ++ if (ret < priv->child_count) + goto data_unlock; + ret = __afr_selfheal_data_prepare( + heal_frame, this, inode, locked_on, sources, sinks, +@@ -6649,7 +6649,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) + ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, + LLONG_MAX - 1, 0, locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) ++ if (ret < priv->child_count) + goto mdata_unlock; + ret = __afr_selfheal_metadata_prepare( + heal_frame, this, inode, locked_on, sources, sinks, +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 81ef38a..ce1ea50 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1575,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + char *accused = NULL; /* Accused others without any self-accusal */ + char *pending = NULL; /* Have pending operations on others */ + char *self_accused = NULL; /* Accused itself */ +- int min_participants = -1; + + priv = this->private; + +@@ -1599,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + } + } + +- if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) { +- min_participants = priv->child_count; +- } else { +- min_participants = AFR_SH_MIN_PARTICIPANTS; +- } +- if (afr_success_count(replies, priv->child_count) < min_participants) { ++ if (afr_success_count(replies, priv->child_count) < priv->child_count) { + /* Treat this just like locks not being acquired */ + return -ENOTCONN; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 3ce882e..40be898 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -597,7 +597,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes " +@@ -991,7 +991,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd, + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + data_lock); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes could " +@@ -1115,7 +1115,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode) + ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain, + NULL, locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes could " +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index 36640b5..7d4f208 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -514,7 +514,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + ret = -ENOTCONN; + goto unlock; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index 6555ec5..8234cec 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -11,8 +11,6 @@ + #ifndef _AFR_SELFHEAL_H + #define _AFR_SELFHEAL_H + +-#define AFR_SH_MIN_PARTICIPANTS 2 +- + /* Perform fop on all UP subvolumes and wait for all callbacks to return */ + + #define AFR_ONALL(frame, rfn, fop, args...) \ +-- +1.8.3.1 + diff --git a/SOURCES/0387-cluster-afr-Delay-post-op-for-fsync.patch b/SOURCES/0387-cluster-afr-Delay-post-op-for-fsync.patch new file mode 100644 index 0000000..3913c14 --- /dev/null +++ b/SOURCES/0387-cluster-afr-Delay-post-op-for-fsync.patch @@ -0,0 +1,440 @@ +From 399fad1ac0f9273483270e8af06a5b2d28927533 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Fri, 29 May 2020 14:24:53 +0530 +Subject: [PATCH 387/392] cluster/afr: Delay post-op for fsync + +Problem: +AFR doesn't delay post-op for fsync fop. For fsync heavy workloads +this leads to un-necessary fxattrop/finodelk for every fsync leading +to bad performance. + +Fix: +Have delayed post-op for fsync. Add special flag in xdata to indicate +that afr shouldn't delay post-op in cases where either the +process will terminate or graph-switch would happen. Otherwise it leads +to un-necessary heals when the graph-switch/process-termination +happens before delayed-post-op completes. + +> Upstream-patch: https://review.gluster.org/c/glusterfs/+/24473 +> Fixes: #1253 + +BUG: 1848896 +Change-Id: I531940d13269a111c49e0510d49514dc169f4577 +Signed-off-by: Pranith Kumar K +(cherry picked from commit 3ed98fc9dcb39223032e343fd5b0ad17fa3cae14) +Reviewed-on: https://code.engineering.redhat.com/gerrit/203694 +Tested-by: RHGS Build Bot +Tested-by: Karthik Subrahmanya +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + api/src/glfs-resolve.c | 14 ++- + tests/basic/afr/durability-off.t | 2 + + tests/basic/gfapi/gfapi-graph-switch-open-fd.t | 44 +++++++++ + tests/basic/gfapi/gfapi-keep-writing.c | 129 +++++++++++++++++++++++++ + xlators/cluster/afr/src/afr-inode-write.c | 11 ++- + xlators/cluster/afr/src/afr-transaction.c | 9 +- + xlators/cluster/afr/src/afr.h | 2 +- + xlators/cluster/dht/src/dht-rebalance.c | 15 ++- + xlators/mount/fuse/src/fuse-bridge.c | 23 ++++- + 9 files changed, 239 insertions(+), 10 deletions(-) + create mode 100644 tests/basic/gfapi/gfapi-graph-switch-open-fd.t + create mode 100644 tests/basic/gfapi/gfapi-keep-writing.c + +diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c +index a79f490..062b7dc 100644 +--- a/api/src/glfs-resolve.c ++++ b/api/src/glfs-resolve.c +@@ -722,6 +722,7 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) + 0, + }; + char uuid1[64]; ++ dict_t *xdata = NULL; + + oldinode = oldfd->inode; + oldsubvol = oldinode->table->xl; +@@ -730,7 +731,15 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) + return fd_ref(oldfd); + + if (!oldsubvol->switched) { +- ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, NULL, NULL); ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_msg(fs->volname, GF_LOG_WARNING, ENOMEM, API_MSG_FSYNC_FAILED, ++ "last-fsync set failed on %s graph %s (%d)", ++ uuid_utoa_r(oldfd->inode->gfid, uuid1), ++ graphid_str(oldsubvol), oldsubvol->graph->id); ++ } ++ ++ ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, xdata, NULL); + DECODE_SYNCOP_ERR(ret); + if (ret) { + gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED, +@@ -809,6 +818,9 @@ out: + newfd = NULL; + } + ++ if (xdata) ++ dict_unref(xdata); ++ + return newfd; + } + +diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t +index 155ffa0..6e0f18b 100644 +--- a/tests/basic/afr/durability-off.t ++++ b/tests/basic/afr/durability-off.t +@@ -26,6 +26,8 @@ TEST $CLI volume heal $V0 + EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 + EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l) + ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + #Test that fsyncs happen when durability is on + TEST $CLI volume set $V0 cluster.ensure-durability on + TEST $CLI volume set $V0 performance.strict-write-ordering on +diff --git a/tests/basic/gfapi/gfapi-graph-switch-open-fd.t b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t +new file mode 100644 +index 0000000..2e666be +--- /dev/null ++++ b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t +@@ -0,0 +1,44 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{0..2}; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST touch $M0/sync ++logdir=`gluster --print-logdir` ++ ++TEST build_tester $(dirname $0)/gfapi-keep-writing.c -lgfapi ++ ++ ++#Launch a program to keep doing writes on an fd ++./$(dirname $0)/gfapi-keep-writing ${H0} $V0 $logdir/gfapi-async-calls-test.log sync & ++p=$! ++sleep 1 #Let some writes go through ++#Check if graph switch will lead to any pending markers for ever ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.read-ahead off ++ ++ ++TEST rm -f $M0/sync #Make sure the glfd is closed ++TEST wait #Wait for background process to die ++#Goal is to check if there is permanent FOOL changelog ++sleep 5 ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick0/glfs_test.txt trusted.afr.dirty ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick1/glfs_test.txt trusted.afr.dirty ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick2/glfs_test.txt trusted.afr.dirty ++ ++cleanup_tester $(dirname $0)/gfapi-async-calls-test ++ ++cleanup; +diff --git a/tests/basic/gfapi/gfapi-keep-writing.c b/tests/basic/gfapi/gfapi-keep-writing.c +new file mode 100644 +index 0000000..91b59ce +--- /dev/null ++++ b/tests/basic/gfapi/gfapi-keep-writing.c +@@ -0,0 +1,129 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define LOG_ERR(msg) \ ++ do { \ ++ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \ ++ } while (0) ++ ++glfs_t * ++init_glfs(const char *hostname, const char *volname, const char *logfile) ++{ ++ int ret = -1; ++ glfs_t *fs = NULL; ++ ++ fs = glfs_new(volname); ++ if (!fs) { ++ LOG_ERR("glfs_new failed"); ++ return NULL; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007); ++ if (ret < 0) { ++ LOG_ERR("glfs_set_volfile_server failed"); ++ goto out; ++ } ++ ++ ret = glfs_set_logging(fs, logfile, 7); ++ if (ret < 0) { ++ LOG_ERR("glfs_set_logging failed"); ++ goto out; ++ } ++ ++ ret = glfs_init(fs); ++ if (ret < 0) { ++ LOG_ERR("glfs_init failed"); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ if (ret) { ++ glfs_fini(fs); ++ fs = NULL; ++ } ++ ++ return fs; ++} ++ ++int ++glfs_test_function(const char *hostname, const char *volname, ++ const char *logfile, const char *syncfile) ++{ ++ int ret = -1; ++ int flags = O_CREAT | O_RDWR; ++ glfs_t *fs = NULL; ++ glfs_fd_t *glfd = NULL; ++ const char *buff = "This is from my prog\n"; ++ const char *filename = "glfs_test.txt"; ++ struct stat buf = {0}; ++ ++ fs = init_glfs(hostname, volname, logfile); ++ if (fs == NULL) { ++ LOG_ERR("init_glfs failed"); ++ return -1; ++ } ++ ++ glfd = glfs_creat(fs, filename, flags, 0644); ++ if (glfd == NULL) { ++ LOG_ERR("glfs_creat failed"); ++ goto out; ++ } ++ ++ while (glfs_stat(fs, syncfile, &buf) == 0) { ++ ret = glfs_write(glfd, buff, strlen(buff), flags); ++ if (ret < 0) { ++ LOG_ERR("glfs_write failed"); ++ goto out; ++ } ++ } ++ ++ ret = glfs_close(glfd); ++ if (ret < 0) { ++ LOG_ERR("glfs_write failed"); ++ goto out; ++ } ++ ++out: ++ ret = glfs_fini(fs); ++ if (ret) { ++ LOG_ERR("glfs_fini failed"); ++ } ++ ++ return ret; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ int ret = 0; ++ char *hostname = NULL; ++ char *volname = NULL; ++ char *logfile = NULL; ++ char *syncfile = NULL; ++ ++ if (argc != 5) { ++ fprintf(stderr, "Invalid argument\n"); ++ exit(1); ++ } ++ ++ hostname = argv[1]; ++ volname = argv[2]; ++ logfile = argv[3]; ++ syncfile = argv[4]; ++ ++ ret = glfs_test_function(hostname, volname, logfile, syncfile); ++ if (ret) { ++ LOG_ERR("glfs_test_function failed"); ++ } ++ ++ return ret; ++} +diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c +index 7fcc9d4..df82b6e 100644 +--- a/xlators/cluster/afr/src/afr-inode-write.c ++++ b/xlators/cluster/afr/src/afr-inode-write.c +@@ -2492,6 +2492,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + call_frame_t *transaction_frame = NULL; + int ret = -1; + int32_t op_errno = ENOMEM; ++ int8_t last_fsync = 0; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) +@@ -2501,10 +2502,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + if (!local) + goto out; + +- if (xdata) ++ if (xdata) { + local->xdata_req = dict_copy_with_ref(xdata, NULL); +- else ++ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) { ++ if (last_fsync) { ++ local->transaction.disable_delayed_post_op = _gf_true; ++ } ++ } ++ } else { + local->xdata_req = dict_new(); ++ } + + if (!local->xdata_req) + goto out; +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 8e65ae2..ffd0ab8 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -2385,8 +2385,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this, + goto out; + } + +- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) { +- /*Only allow writes but shard does [f]xattrops on writes, so ++ if (local->transaction.disable_delayed_post_op) { ++ goto out; ++ } ++ ++ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) && ++ (local->op != GF_FOP_FSYNC)) { ++ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so + * they are fine too*/ + goto out; + } +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index e731cfa..6bc4721 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -854,7 +854,7 @@ typedef struct _afr_local { + + int (*unwind)(call_frame_t *frame, xlator_t *this); + +- /* post-op hook */ ++ gf_boolean_t disable_delayed_post_op; + } transaction; + + syncbarrier_t barrier; +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 8f31dca..145e616 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -1564,6 +1564,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + xlator_t *old_target = NULL; + xlator_t *hashed_subvol = NULL; + fd_t *linkto_fd = NULL; ++ dict_t *xdata = NULL; + + if (from == to) { + gf_msg_debug(this->name, 0, +@@ -1882,7 +1883,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + + /* TODO: Sync the locks */ + +- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL); ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "%s: failed to set last-fsync flag on " ++ "%s (%s)", ++ loc->path, to->name, strerror(ENOMEM)); ++ } ++ ++ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)", + loc->path, to->name, strerror(-ret)); +@@ -2356,11 +2365,15 @@ out: + + if (dst_fd) + syncop_close(dst_fd); ++ + if (src_fd) + syncop_close(src_fd); + if (linkto_fd) + syncop_close(linkto_fd); + ++ if (xdata) ++ dict_unref(xdata); ++ + loc_wipe(&tmp_loc); + loc_wipe(&parent_loc); + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 6e99053..1592067 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -5551,6 +5551,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, + char create_in_progress = 0; + fuse_fd_ctx_t *basefd_ctx = NULL; + fd_t *oldfd = NULL; ++ dict_t *xdata = NULL; + + basefd_ctx = fuse_fd_ctx_get(this, basefd); + GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out); +@@ -5587,10 +5588,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, + } + + if (oldfd->inode->table->xl == old_subvol) { +- if (IA_ISDIR(oldfd->inode->ia_type)) ++ if (IA_ISDIR(oldfd->inode->ia_type)) { + ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL); +- else +- ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL); ++ } else { ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_log("glusterfs-fuse", GF_LOG_WARNING, ++ "last-fsync set failed (%s) on fd (%p)" ++ "(basefd:%p basefd-inode.gfid:%s) " ++ "(old-subvolume:%s-%d new-subvolume:%s-%d)", ++ strerror(ENOMEM), oldfd, basefd, ++ uuid_utoa(basefd->inode->gfid), old_subvol->name, ++ old_subvol->graph->id, new_subvol->name, ++ new_subvol->graph->id); ++ } ++ ++ ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL); ++ } + + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, +@@ -5645,6 +5659,9 @@ out: + + fd_unref(oldfd); + ++ if (xdata) ++ dict_unref(xdata); ++ + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch b/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch new file mode 100644 index 0000000..094a484 --- /dev/null +++ b/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch @@ -0,0 +1,142 @@ +From be6fafebe1e391e9d9f14d9aed18adbfda8a262b Mon Sep 17 00:00:00 2001 +From: l17zhou +Date: Mon, 4 Nov 2019 08:45:52 +0200 +Subject: [PATCH 388/392] rpc: Cleanup SSL specific data at the time of freeing + rpc object + +Problem: At the time of cleanup rpc object ssl specific data + is not freeing so it has become a leak. + +Solution: To avoid the leak cleanup ssl specific data at the + time of cleanup rpc object + +> Credits: l17zhou +> Fixes: bz#1768407 +> Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0 +> (Cherry pick from commit 54ed71dba174385ab0d8fa415e09262f6250430c) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23650/) + +Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0 +BUG: 1848891 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/203698 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-transport/socket/src/socket.c | 22 ++++++++++++++++++++-- + tests/features/ssl-authz.t | 23 ++++++++++++++++++++--- + 2 files changed, 40 insertions(+), 5 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 65845ea..226b2e2 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -446,6 +446,7 @@ ssl_setup_connection_postfix(rpc_transport_t *this) + gf_log(this->name, GF_LOG_DEBUG, + "SSL verification succeeded (client: %s) (server: %s)", + this->peerinfo.identifier, this->myinfo.identifier); ++ X509_free(peer); + return gf_strdup(peer_CN); + + /* Error paths. */ +@@ -1157,7 +1158,15 @@ __socket_reset(rpc_transport_t *this) + memset(&priv->incoming, 0, sizeof(priv->incoming)); + + event_unregister_close(this->ctx->event_pool, priv->sock, priv->idx); +- ++ if (priv->use_ssl && priv->ssl_ssl) { ++ SSL_clear(priv->ssl_ssl); ++ SSL_free(priv->ssl_ssl); ++ priv->ssl_ssl = NULL; ++ } ++ if (priv->use_ssl && priv->ssl_ctx) { ++ SSL_CTX_free(priv->ssl_ctx); ++ priv->ssl_ctx = NULL; ++ } + priv->sock = -1; + priv->idx = -1; + priv->connected = -1; +@@ -3217,7 +3226,6 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + new_priv->sock = new_sock; + + new_priv->ssl_enabled = priv->ssl_enabled; +- new_priv->ssl_ctx = priv->ssl_ctx; + new_priv->connected = 1; + new_priv->is_server = _gf_true; + +@@ -4672,6 +4680,16 @@ fini(rpc_transport_t *this) + pthread_mutex_destroy(&priv->out_lock); + pthread_mutex_destroy(&priv->cond_lock); + pthread_cond_destroy(&priv->cond); ++ if (priv->use_ssl && priv->ssl_ssl) { ++ SSL_clear(priv->ssl_ssl); ++ SSL_free(priv->ssl_ssl); ++ priv->ssl_ssl = NULL; ++ } ++ if (priv->use_ssl && priv->ssl_ctx) { ++ SSL_CTX_free(priv->ssl_ctx); ++ priv->ssl_ctx = NULL; ++ } ++ + if (priv->ssl_private_key) { + GF_FREE(priv->ssl_private_key); + } +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index cae010c..132b598 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -25,6 +25,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume info; + ++TEST $CLI v set all cluster.brick-multiplex on + # Construct a cipher list that excludes CBC because of POODLE. + # http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2014-3566 + # +@@ -45,12 +46,12 @@ TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +-TEST $CLI volume create $V0 $H0:$B0/1 ++TEST $CLI volume create $V0 replica 3 $H0:$B0/{1,2,3} force + TEST $CLI volume set $V0 server.ssl on + TEST $CLI volume set $V0 client.ssl on + TEST $CLI volume set $V0 ssl.cipher-list $(valid_ciphers) + TEST $CLI volume start $V0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count + + # This mount should SUCCEED because ssl-allow=* by default. This effectively + # disables SSL authorization, though authentication and encryption might still +@@ -59,11 +60,27 @@ TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 + TEST ping_file $M0/before + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + ++glusterfsd_pid=`pgrep glusterfsd` ++TEST [ $glusterfsd_pid != 0 ] ++start=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` ++echo "Memory consumption for glusterfsd process" ++for i in $(seq 1 100); do ++ gluster v heal $V0 info >/dev/null ++done ++ ++end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` ++diff=$((end-start)) ++ ++# If memory consumption is more than 5M some leak in SSL code path ++ ++TEST [ $diff -lt 5000 ] ++ ++ + # Set ssl-allow to a wildcard that includes our identity. + TEST $CLI volume stop $V0 + TEST $CLI volume set $V0 auth.ssl-allow Any* + TEST $CLI volume start $V0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count + + # This mount should SUCCEED because we match the wildcard. + TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 +-- +1.8.3.1 + diff --git a/SOURCES/0389-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch b/SOURCES/0389-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch new file mode 100644 index 0000000..a4f9bf2 --- /dev/null +++ b/SOURCES/0389-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch @@ -0,0 +1,54 @@ +From d3558cfbded7e973fae45ce2196767611336e351 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 8 Jun 2020 13:27:50 +0530 +Subject: [PATCH 389/392] socket: Resolve ssl_ctx leak for a brick while only + mgmt SSL is enabled + +Problem: While only mgmt SSL is enabled for a brick process use_ssl flag + is false for a brick process and socket api's cleanup ssl_ctx only + while use_ssl and ssl_ctx both are valid + +Solution: To avoid a leak check only ssl_ctx, if it is valid cleanup + ssl_ctx + +> Fixes: #1196 +> Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3 +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit 9873baee34afdf0c20f5fc98a7dbf2a9f07447e2) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24366/) + +BUG: 1848894 +Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/203705 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-transport/socket/src/socket.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 226b2e2..54cd5df 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -1163,7 +1163,7 @@ __socket_reset(rpc_transport_t *this) + SSL_free(priv->ssl_ssl); + priv->ssl_ssl = NULL; + } +- if (priv->use_ssl && priv->ssl_ctx) { ++ if (priv->ssl_ctx) { + SSL_CTX_free(priv->ssl_ctx); + priv->ssl_ctx = NULL; + } +@@ -4685,7 +4685,7 @@ fini(rpc_transport_t *this) + SSL_free(priv->ssl_ssl); + priv->ssl_ssl = NULL; + } +- if (priv->use_ssl && priv->ssl_ctx) { ++ if (priv->ssl_ctx) { + SSL_CTX_free(priv->ssl_ctx); + priv->ssl_ctx = NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/0390-tests-Avoid-ssl-authz.t-failure.patch b/SOURCES/0390-tests-Avoid-ssl-authz.t-failure.patch new file mode 100644 index 0000000..443cdb9 --- /dev/null +++ b/SOURCES/0390-tests-Avoid-ssl-authz.t-failure.patch @@ -0,0 +1,51 @@ +From b68fa363c5981441c20fbc78b6dc00437bd698a7 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 22 Jun 2020 11:35:29 +0530 +Subject: [PATCH 390/392] tests: Avoid ssl-authz.t failure + +Problem: ssl-authz.t is failing at the time of checking memory + consumption if brick is consuming more than 5M + +Solution: Update the check to avoid a failure. + +> Change-Id: Iffb031f0695a7da83d5a2f6bac8863dad225317e +> Fixes: bz#1811631 +> Signed-off-by: Mohit Agrawal +> Reviewd on upstream link https://review.gluster.org/#/c/glusterfs/+/24221/) +> (Cherry pick from commit fb20713b380e1df8d7f9e9df96563be2f9144fd6) + +BUG: 1848894 +Change-Id: I4fc5d2e2597abfafc1e26d908c8c4184ab82afd5 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/203844 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/features/ssl-authz.t | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index 132b598..ab05c49 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -67,13 +67,15 @@ echo "Memory consumption for glusterfsd process" + for i in $(seq 1 100); do + gluster v heal $V0 info >/dev/null + done ++#Wait to cleanup memory ++sleep 10 + + end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` + diff=$((end-start)) + +-# If memory consumption is more than 5M some leak in SSL code path ++# If memory consumption is more than 15M some leak in SSL code path + +-TEST [ $diff -lt 5000 ] ++TEST [ $diff -lt 15000 ] + + + # Set ssl-allow to a wildcard that includes our identity. +-- +1.8.3.1 + diff --git a/SOURCES/0391-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch b/SOURCES/0391-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch new file mode 100644 index 0000000..414f259 --- /dev/null +++ b/SOURCES/0391-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch @@ -0,0 +1,54 @@ +From e9cb714d66a7926a746b8cd5f9288d59aefee918 Mon Sep 17 00:00:00 2001 +From: Kinglong Mee +Date: Mon, 18 Mar 2019 20:47:54 +0800 +Subject: [PATCH 391/392] cluster-syncop: avoid duplicate unlock of + inodelk/entrylk + +When using ec, there are many messages at brick log as, + +[inodelk.c:514:__inode_unlock_lock] 0-test-locks: Matching lock not found for unlock 0-9223372036854775807, lo=68e040a84b7f0000 on 0x7f208c006f78 +[MSGID: 115053] [server-rpc-fops_v2.c:280:server4_inodelk_cbk] 0-test-server: 2557439: INODELK (df4e41be-723f-4289-b7af-b4272b3e880c), client: CTX_ID:67d4a7f3-605a-4965-89a5-31309d62d1fa-GRAPH_ID:0-PID:1659-HOST:openfs-node2-PC_NAME:test-client-1-RECON_NO:-28, error-xlator: test-locks [Invalid argument] + +> Change-Id: Ib164d29ebb071f620a4ca9679c4345ef7c88512a +> Updates: bz#1689920 +> Signed-off-by: Kinglong Mee +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22377/ + +BUG: 1848890 +Change-Id: Ib164d29ebb071f620a4ca9679c4345ef7c88512a +Signed-off-by: Sheetal Pamecha +Reviewed-on: https://code.engineering.redhat.com/gerrit/203852 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/cluster-syncop.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c +index 5a08f26..6ee89dd 100644 +--- a/libglusterfs/src/cluster-syncop.c ++++ b/libglusterfs/src/cluster-syncop.c +@@ -1203,6 +1203,10 @@ cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on, + if (num_success) { + FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame, + inodelk, dom, &loc, F_SETLKW, &flock, NULL); ++ } else { ++ loc_wipe(&loc); ++ memset(locked_on, 0, numsubvols); ++ return 0; + } + break; + } +@@ -1244,7 +1248,9 @@ cluster_tiebreaker_entrylk(xlator_t **subvols, unsigned char *on, + entrylk, dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, + NULL); + } else { ++ loc_wipe(&loc); + memset(locked_on, 0, numsubvols); ++ return 0; + } + break; + } +-- +1.8.3.1 + diff --git a/SOURCES/0392-glusterd-unlink-the-file-after-killing-the-process.patch b/SOURCES/0392-glusterd-unlink-the-file-after-killing-the-process.patch new file mode 100644 index 0000000..03d67f8 --- /dev/null +++ b/SOURCES/0392-glusterd-unlink-the-file-after-killing-the-process.patch @@ -0,0 +1,39 @@ +From 6a2cef546457e6dc9a2268cc2f5cc11b850b7f5c Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 17 Dec 2019 15:52:30 +0530 +Subject: [PATCH 392/392] glusterd: unlink the file after killing the process + +In glusterd_proc_stop(), after killing the pid +we should remove the pidfile. + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23890/ +> fixes: bz#1784375 +> Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686 +> Signed-off-by: Sanju Rakonde + +BUG: 1849533 +Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/203871 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +index f55a5fd..a05c90d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +@@ -107,6 +107,8 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags) + "service, reason:%s", + proc->name, strerror(errno)); + } ++ } else { ++ (void)glusterd_unlink_file(proc->pidfile); + } + if (flags != PROC_STOP_FORCE) + goto out; +-- +1.8.3.1 + diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec index e97233c..ef7c804 100644 --- a/SPECS/glusterfs.spec +++ b/SPECS/glusterfs.spec @@ -237,7 +237,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 37%{?dist} +Release: 37.2%{?dist} ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ @@ -698,6 +698,15 @@ Patch0380: 0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch Patch0381: 0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch Patch0382: 0382-features-shard-Aggregate-file-size-block-count-befor.patch Patch0383: 0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch +Patch0384: 0384-extras-Modify-group-virt-to-include-network-related-.patch +Patch0385: 0385-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch +Patch0386: 0386-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch +Patch0387: 0387-cluster-afr-Delay-post-op-for-fsync.patch +Patch0388: 0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch +Patch0389: 0389-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch +Patch0390: 0390-tests-Avoid-ssl-authz.t-failure.patch +Patch0391: 0391-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch +Patch0392: 0392-glusterd-unlink-the-file-after-killing-the-process.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -1154,7 +1163,7 @@ do DEST_FILES=( $(egrep '^\+\+\+ b/' $p | cut -f 2- -d '/') ) EXCLUDE_DOCS=() for idx in ${!SOURCE_FILES[@]}; do - # skip the doc + # skip the doc source_file=${SOURCE_FILES[$idx]} dest_file=${DEST_FILES[$idx]} if [[ "$dest_file" =~ ^doc/.+ ]]; then @@ -2438,6 +2447,13 @@ fi %endif %changelog +* Tue Sep 08 2020 Rinku Kothiya - 6.0-37.2 +- fixes bugs bz#1876857 + +* Wed Jun 24 2020 Deepshikha Khandelwal - 6.0-37.1 +- fixes bugs bz#1848890 bz#1848891 bz#1848893 bz#1848894 bz#1848895 + bz#1848896 bz#1848899 bz#1849533 + * Fri May 29 2020 Rinku Kothiya - 6.0-37 - fixes bugs bz#1840794