From 2f3c8edfdf517a7282b8742178b3904fc4e1945d Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Mon, 11 Feb 2019 20:46:22 -0500 Subject: [PATCH] autobuild v3.12.2-41 Resolves: bz#1390151 bz#1410145 bz#1429190 bz#1510752 bz#1511779 Resolves: bz#1570958 bz#1574490 bz#1595246 bz#1618669 bz#1661393 Resolves: bz#1668989 bz#1669020 Signed-off-by: Milind Changire --- 0511-Update-rfc.sh-to-rhgs-3.4.4.patch | 27 + 0512-Test-fixes-for-rhgs-3.4-downstream.patch | 533 ++++++++ ...cli-is-throwing-error-target-is-busy.patch | 114 ++ ...-gluster-volume-status-when-quorum-t.patch | 49 + 0515-cli-change-the-warning-message.patch | 38 + ...-Fix-permissions-with-non-root-setup.patch | 84 ++ ...te-the-config-checkpoint-date-format.patch | 39 + ...g-create-parent-dir-if-not-available.patch | 57 + ...Delete-invalid-linkto-files-in-rmdir.patch | 118 ++ ...-creation-of-temp-file-in-lua-script.patch | 205 +++ ...-address-family-option-from-vol-file.patch | 276 ++++ 0522-fuse-add-lru-limit-option.patch | 1028 ++++++++++++++ ...ame-macros-roof-and-floor-to-not-con.patch | 225 +++ ...Shield-ping-processing-from-traffic-.patch | 266 ++++ ...ach-request-handler-thread-its-own-q.patch | 1211 +++++++++++++++++ 0526-fuse-interrupt-handling-framework.patch | 671 +++++++++ 0527-fuse-diagnostic-FLUSH-interrupt.patch | 568 ++++++++ ...e-clear-locks-xattr-in-fgetxattr-too.patch | 250 ++++ 0529-fuse-SETLKW-interrupt.patch | 222 +++ glusterfs.spec | 111 +- 20 files changed, 6018 insertions(+), 74 deletions(-) create mode 100644 0511-Update-rfc.sh-to-rhgs-3.4.4.patch create mode 100644 0512-Test-fixes-for-rhgs-3.4-downstream.patch create mode 100644 0513-core-heketi-cli-is-throwing-error-target-is-busy.patch create mode 100644 0514-glusterd-display-gluster-volume-status-when-quorum-t.patch create mode 100644 0515-cli-change-the-warning-message.patch create mode 100644 0516-geo-rep-Fix-permissions-with-non-root-setup.patch create mode 100644 0517-geo-rep-validate-the-config-checkpoint-date-format.patch create mode 100644 0518-logging-create-parent-dir-if-not-available.patch create mode 100644 0519-cluster-dht-Delete-invalid-linkto-files-in-rmdir.patch create mode 100644 0520-spec-avoid-creation-of-temp-file-in-lua-script.patch create mode 100644 0521-rpc-use-address-family-option-from-vol-file.patch create mode 100644 0522-fuse-add-lru-limit-option.patch create mode 100644 0523-libglusterfs-rename-macros-roof-and-floor-to-not-con.patch create mode 100644 0524-program-GF-DUMP-Shield-ping-processing-from-traffic-.patch create mode 100644 0525-rpcsvc-provide-each-request-handler-thread-its-own-q.patch create mode 100644 0526-fuse-interrupt-handling-framework.patch create mode 100644 0527-fuse-diagnostic-FLUSH-interrupt.patch create mode 100644 0528-locks-handle-clear-locks-xattr-in-fgetxattr-too.patch create mode 100644 0529-fuse-SETLKW-interrupt.patch diff --git a/0511-Update-rfc.sh-to-rhgs-3.4.4.patch b/0511-Update-rfc.sh-to-rhgs-3.4.4.patch new file mode 100644 index 0000000..5b21d6a --- /dev/null +++ b/0511-Update-rfc.sh-to-rhgs-3.4.4.patch @@ -0,0 +1,27 @@ +From 4049de578f44e028ebe6beab3b1b13ce4d3de954 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Fri, 1 Feb 2019 19:55:33 +0530 +Subject: [PATCH 511/529] Update rfc.sh to rhgs-3.4.4 + +Change-Id: I826c246fefecf8cf12999e3b4b307d0a29aef668 +Signed-off-by: Milind Changire +--- + rfc.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rfc.sh b/rfc.sh +index bd52851..6f5c77c 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -17,7 +17,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.4.3"; ++branch="rhgs-3.4.4"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/0512-Test-fixes-for-rhgs-3.4-downstream.patch b/0512-Test-fixes-for-rhgs-3.4-downstream.patch new file mode 100644 index 0000000..62b3ad5 --- /dev/null +++ b/0512-Test-fixes-for-rhgs-3.4-downstream.patch @@ -0,0 +1,533 @@ +From 5f8f80190c154bbb159a3cebbb7d3e12014275ed Mon Sep 17 00:00:00 2001 +From: Nigel Babu +Date: Mon, 30 Apr 2018 11:28:06 +0530 +Subject: [PATCH 512/529] Test fixes for rhgs-3.4 downstream + +This patch includes test fixes and two tests are removed +because they're not supported downstream. + +Label: DOWNSTREAM ONLY +Change-Id: I99072130cea4780654980837522c76eab38e79d3 +Signed-off-by: Krutika Dhananjay +Signed-off-by: Sanju Rakonde +Signed-off-by: Ravishankar N +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/162177 +--- + tests/basic/bd.t | 142 --------------------- + tests/basic/ec/ec-1468261.t | 1 + + tests/bugs/cli/bug-1169302.t | 4 +- + tests/bugs/core/multiplex-limit-issue-151.t | 2 +- + tests/bugs/distribute/bug-882278.t | 73 ----------- + tests/bugs/glusterd/brick-mux-fd-cleanup.t | 3 + + .../glusterd/bug-1245045-remove-brick-validation.t | 16 ++- + .../glusterd/bug-1293414-import-brickinfo-uuid.t | 1 + + .../bug-1483058-replace-brick-quorum-validation.t | 9 +- + tests/bugs/glusterd/bug-1595320.t | 2 +- + .../df-results-post-replace-brick-operations.t | 3 + + tests/bugs/posix/bug-990028.t | 2 +- + tests/bugs/readdir-ahead/bug-1439640.t | 1 + + .../replicate/bug-1591193-assign-gfid-and-heal.t | 5 +- + .../bug-1637802-arbiter-stale-data-heal-lock.t | 1 + + tests/bugs/shard/zero-flag.t | 1 + + tests/cluster.rc | 10 ++ + tests/include.rc | 1 + + 18 files changed, 48 insertions(+), 229 deletions(-) + delete mode 100755 tests/basic/bd.t + delete mode 100755 tests/bugs/distribute/bug-882278.t + +diff --git a/tests/basic/bd.t b/tests/basic/bd.t +deleted file mode 100755 +index 63622ed..0000000 +--- a/tests/basic/bd.t ++++ /dev/null +@@ -1,142 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../include.rc +-. $(dirname $0)/../volume.rc +- +-function execute() +-{ +- cmd=$1 +- shift +- ${cmd} $@ >/dev/null 2>&1 +-} +- +-function bd_cleanup() +-{ +- execute vgremove -f ${V0} +- execute pvremove ${ld} +- execute losetup -d ${ld} +- execute rm ${BD_DISK} +- cleanup +-} +- +-function check() +-{ +- if [ $? -ne 0 ]; then +- echo prerequsite $@ failed +- bd_cleanup +- exit +- fi +-} +- +-SIZE=256 #in MB +- +-bd_cleanup; +- +-## Configure environment needed for BD backend volumes +-## Create a file with configured size and +-## set it as a temporary loop device to create +-## physical volume & VG. These are basic things needed +-## for testing BD xlator if anyone of these steps fail, +-## test script exits +-function configure() +-{ +- GLDIR=`$CLI system:: getwd` +- BD_DISK=${GLDIR}/bd_disk +- +- execute truncate -s${SIZE}M ${BD_DISK} +- check ${BD_DISK} creation +- +- execute losetup -f +- check losetup +- ld=`losetup -f` +- +- execute losetup ${ld} ${BD_DISK} +- check losetup ${BD_DISK} +- execute pvcreate -f ${ld} +- check pvcreate ${ld} +- execute vgcreate ${V0} ${ld} +- check vgcreate ${V0} +- execute lvcreate --thin ${V0}/pool --size 128M +-} +- +-function volinfo_field() +-{ +- local vol=$1; +- local field=$2; +- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +-} +- +-function volume_type() +-{ +- getfattr -n volume.type $M0/. --only-values --absolute-names -e text +-} +- +-case $OSTYPE in +-NetBSD) +- echo "Skip test on LVM which is not available on NetBSD" >&2 +- SKIP_TESTS +- exit 0 +- ;; +-*) +- ;; +-esac +- +-TEST glusterd +-TEST pidof glusterd +-configure +- +-TEST $CLI volume create $V0 ${H0}:/$B0/$V0?${V0} +-EXPECT "$V0" volinfo_field $V0 'Volume Name'; +-EXPECT 'Created' volinfo_field $V0 'Status'; +- +-## Start volume and verify +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +-EXPECT '1' volume_type +- +-## Create posix file +-TEST touch $M0/posix +- +-TEST touch $M0/lv +-gfid=`getfattr -n glusterfs.gfid.string $M0/lv --only-values --absolute-names` +-TEST setfattr -n user.glusterfs.bd -v "lv:4MB" $M0/lv +-# Check if LV is created +-TEST stat /dev/$V0/${gfid} +- +-## Create filesystem +-sleep 1 +-TEST mkfs.ext4 -qF $M0/lv +-# Cloning +-TEST touch $M0/lv_clone +-gfid=`getfattr -n glusterfs.gfid.string $M0/lv_clone --only-values --absolute-names` +-TEST setfattr -n clone -v ${gfid} $M0/lv +-TEST stat /dev/$V0/${gfid} +- +-sleep 1 +-## Check mounting +-TEST mount -o loop $M0/lv $M1 +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 +- +-# Snapshot +-TEST touch $M0/lv_sn +-gfid=`getfattr -n glusterfs.gfid.string $M0/lv_sn --only-values --absolute-names` +-TEST setfattr -n snapshot -v ${gfid} $M0/lv +-TEST stat /dev/$V0/${gfid} +- +-# Merge +-sleep 1 +-TEST setfattr -n merge -v "$M0/lv_sn" $M0/lv_sn +-TEST ! stat $M0/lv_sn +-TEST ! stat /dev/$V0/${gfid} +- +- +-rm $M0/* -f +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +-TEST $CLI volume stop ${V0} +-EXPECT 'Stopped' volinfo_field $V0 'Status'; +-TEST $CLI volume delete ${V0} +- +-bd_cleanup +diff --git a/tests/basic/ec/ec-1468261.t b/tests/basic/ec/ec-1468261.t +index d687d7b..b2d92fc 100644 +--- a/tests/basic/ec/ec-1468261.t ++++ b/tests/basic/ec/ec-1468261.t +@@ -14,6 +14,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} + TEST $CLI volume set $V0 disperse.optimistic-change-log on ++TEST $CLI volume set $V0 disperse.other-eager-lock on + TEST $CLI volume start $V0 + + #Mount the volume +diff --git a/tests/bugs/cli/bug-1169302.t b/tests/bugs/cli/bug-1169302.t +index 24355e5..05c006c 100755 +--- a/tests/bugs/cli/bug-1169302.t ++++ b/tests/bugs/cli/bug-1169302.t +@@ -40,7 +40,9 @@ cleanup_statedump + # hostname or IP-address with the connection from the bug-1169302 executable. + # In our CI it seems not possible to use $H0, 'localhost', $(hostname --fqdn) + # or even "127.0.0.1".... +-TEST $CLI_3 volume statedump $V0 client $H1:$GFAPI_PID ++sleep 2 ++host=`netstat -nap | grep $GFAPI_PID | grep 24007 | awk '{print $4}' | cut -d: -f1` ++TEST $CLI_3 volume statedump $V0 client $host:$GFAPI_PID + EXPECT_WITHIN $STATEDUMP_TIMEOUT "Y" path_exists $statedumpdir/glusterdump.$GFAPI_PID* + + kill $GFAPI_PID +diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/bugs/core/multiplex-limit-issue-151.t +index 9511756..c5bbbda 100644 +--- a/tests/bugs/core/multiplex-limit-issue-151.t ++++ b/tests/bugs/core/multiplex-limit-issue-151.t +@@ -50,7 +50,7 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks + + TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start +-TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 commit ++TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 force + + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids +diff --git a/tests/bugs/distribute/bug-882278.t b/tests/bugs/distribute/bug-882278.t +deleted file mode 100755 +index 8cb5147..0000000 +--- a/tests/bugs/distribute/bug-882278.t ++++ /dev/null +@@ -1,73 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-cleanup +- +-# Is there a good reason to require --fqdn elsewhere? It's worse than useless +-# here. +-H0=$(hostname -s) +- +-function recreate { +- # The rm is necessary so we don't get fooled by leftovers from old runs. +- rm -rf $1 && mkdir -p $1 +-} +- +-function count_lines { +- grep "$1" $2/* | wc -l +-} +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume info; +- +-## Start and create a volume +-TEST recreate ${B0}/${V0}-0 +-TEST recreate ${B0}/${V0}-1 +-TEST $CLI volume create $V0 $H0:$B0/${V0}-{0,1} +-TEST $CLI volume set $V0 cluster.nufa on +- +-function volinfo_field() +-{ +- local vol=$1; +- local field=$2; +- +- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +-} +- +- +-## Verify volume is created +-EXPECT "$V0" volinfo_field $V0 'Volume Name'; +-EXPECT 'Created' volinfo_field $V0 'Status'; +- +-## Start volume and verify +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-## Mount native +-special_option="--xlator-option ${V0}-dht.local-volume-name=${V0}-client-1" +-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $special_option $M0 +- +-## Create a bunch of test files. +-for i in $(seq 0 99); do +- echo hello > $(printf $M0/file%02d $i) +-done +- +-## Make sure the files went to the right place. There might be link files in +-## the other brick, but they won't have any contents. +-EXPECT "0" count_lines hello ${B0}/${V0}-0 +-EXPECT "100" count_lines hello ${B0}/${V0}-1 +- +-if [ "$EXIT_EARLY" = "1" ]; then +- exit 0; +-fi +- +-## Finish up +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +-TEST $CLI volume stop $V0; +-EXPECT 'Stopped' volinfo_field $V0 'Status'; +- +-TEST $CLI volume delete $V0; +-TEST ! $CLI volume info $V0; +- +-cleanup; +diff --git a/tests/bugs/glusterd/brick-mux-fd-cleanup.t b/tests/bugs/glusterd/brick-mux-fd-cleanup.t +index de11c17..2ac7f9c 100644 +--- a/tests/bugs/glusterd/brick-mux-fd-cleanup.t ++++ b/tests/bugs/glusterd/brick-mux-fd-cleanup.t +@@ -76,3 +76,6 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 + + cleanup ++ ++#delay-gen in not present downstream ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000 +diff --git a/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t +index 597c40c..a931d29 100644 +--- a/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t ++++ b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t +@@ -3,12 +3,16 @@ + . $(dirname $0)/../../include.rc + . $(dirname $0)/../../cluster.rc + ++function peer_count { ++eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l ++} ++ + cleanup + + TEST launch_cluster 3; + TEST $CLI_1 peer probe $H2; + TEST $CLI_1 peer probe $H3; +-EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1 + + TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 + TEST $CLI_1 volume start $V0 +@@ -21,7 +25,9 @@ TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start + TEST start_glusterd 2 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0} + +-EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1 ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 2 ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 3 + + #volume status should work + TEST $CLI_2 volume status +@@ -36,7 +42,7 @@ TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} commit + TEST start_glusterd 2 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0} + +-EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1 + + #volume status should work + TEST $CLI_2 volume status +@@ -44,12 +50,12 @@ TEST $CLI_2 volume status + TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} stop + + kill_glusterd 3 +-EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1 + + TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start + + TEST start_glusterd 3 +-EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1 + + TEST $CLI_3 volume status + +diff --git a/tests/bugs/glusterd/bug-1293414-import-brickinfo-uuid.t b/tests/bugs/glusterd/bug-1293414-import-brickinfo-uuid.t +index 9f67e4c..977276e 100755 +--- a/tests/bugs/glusterd/bug-1293414-import-brickinfo-uuid.t ++++ b/tests/bugs/glusterd/bug-1293414-import-brickinfo-uuid.t +@@ -24,6 +24,7 @@ EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count + TEST ! $CLI_3 peer detach $H1 + TEST ! $CLI_3 peer detach $H2 + ++EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count + + # peer not hosting bricks should be detachable + TEST $CLI_3 peer detach $H4 +diff --git a/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t b/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t +index 3dbe28a..2d9e528 100644 +--- a/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t ++++ b/tests/bugs/glusterd/bug-1483058-replace-brick-quorum-validation.t +@@ -45,7 +45,14 @@ TEST start_glusterd 2 + + EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + ++# checking peer_count is not enough to call that quorum is regained as ++# peer_count is based on peerinfo->connected where as quorum is calculated based ++# on peerinfo->quorum_contrib. To avoid this spurious race of replace brick ++# commit force to execute and fail before the quorum is regained run the command ++# in EXPECT_WITHIN to ensure that with multiple attempts the command goes ++# through once the quorum is regained. ++ + # Now quorum is met. replace-brick will execute successfuly +-TEST $CLI_1 volume replace-brick $V0 $H2:$B2/${V0}1 $H1:$B1/${V0}1_new commit force ++EXPECT_WITHIN $PEER_SYNC_TIMEOUT 0 attempt_replace_brick 1 $V0 $H2:$B2/${V0}1 $H1:$B1/${V0}1_new + + #cleanup; +diff --git a/tests/bugs/glusterd/bug-1595320.t b/tests/bugs/glusterd/bug-1595320.t +index f41df9d..3a289f3 100644 +--- a/tests/bugs/glusterd/bug-1595320.t ++++ b/tests/bugs/glusterd/bug-1595320.t +@@ -25,7 +25,7 @@ TEST pidof glusterd + + # Create volume and enable brick multiplexing + TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3 +-gluster v set all cluster.brick-multiplex on ++TEST $CLI v set all cluster.brick-multiplex on + + # Start the volume + TEST $CLI volume start $V0 +diff --git a/tests/bugs/glusterd/df-results-post-replace-brick-operations.t b/tests/bugs/glusterd/df-results-post-replace-brick-operations.t +index 443911c..04f7588 100644 +--- a/tests/bugs/glusterd/df-results-post-replace-brick-operations.t ++++ b/tests/bugs/glusterd/df-results-post-replace-brick-operations.t +@@ -53,6 +53,9 @@ total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') + TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1/brick1 $H0:$B0/${V0}4/brick1 commit force + TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}2/brick1 $H0:$B0/${V0}5/brick1 commit force + ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++ + # check for the size at mount point, it should be same as previous + total_space_new=$(df -P $M0 | tail -1 | awk '{ print $2}') + TEST [ $total_space -eq $total_space_new ] +diff --git a/tests/bugs/posix/bug-990028.t b/tests/bugs/posix/bug-990028.t +index c864214..bef36a8 100755 +--- a/tests/bugs/posix/bug-990028.t ++++ b/tests/bugs/posix/bug-990028.t +@@ -78,7 +78,7 @@ function links_across_directories() + TEST [ $LINES = 2 ] + + for i in $(seq 1 2); do +- HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir$i/file$i 2>&1 | grep "trusted.pgfid" | cut -d$'\n' -f$i | cut -d'=' -f2` ++ HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir$i/file$i 2>&1 | grep "trusted.pgfid" | awk -v n=$i 'NR==n' | cut -d'=' -f2` + TEST_IN_LOOP [ $HL = "0x00000001" ] + done + +diff --git a/tests/bugs/readdir-ahead/bug-1439640.t b/tests/bugs/readdir-ahead/bug-1439640.t +index cc6c829..dcd5407 100755 +--- a/tests/bugs/readdir-ahead/bug-1439640.t ++++ b/tests/bugs/readdir-ahead/bug-1439640.t +@@ -8,6 +8,7 @@ cleanup; + TEST glusterd + + TEST $CLI volume create $V0 $H0:$B{0..1}/$V0 ++TEST $CLI volume set $V0 readdir-ahead on + TEST $CLI volume start $V0 + + TEST ! $CLI volume set $V0 parallel-readdir sdf +diff --git a/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t +index d3b5f9a..a2abaf6 100644 +--- a/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t ++++ b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t +@@ -108,10 +108,7 @@ TEST stat $M0/file2 + + # Though file is created on all 3 bricks, lookup will fail as arbiter blames the + # other 2 bricks and ariter is not 'readable'. +-# TEST ! stat $M0/file3 +-# But the checks for failing lookups when quorum is not met is not yet there in +-# rhgs-3.4.0, so stat will succeed. +-TEST stat $M0/file3 ++TEST ! stat $M0/file3 + + # Launch index heal to complete any pending data/metadata heals. + TEST $CLI volume heal $V0 +diff --git a/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t +index 91ed39b..d7d1f28 100644 +--- a/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t ++++ b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t +@@ -32,6 +32,7 @@ EXPECT 2 get_pending_heal_count $V0 + # Bring it back up and let heal complete. + TEST $CLI volume start $V0 force + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +diff --git a/tests/bugs/shard/zero-flag.t b/tests/bugs/shard/zero-flag.t +index 84cb963..1f39787 100644 +--- a/tests/bugs/shard/zero-flag.t ++++ b/tests/bugs/shard/zero-flag.t +@@ -14,6 +14,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3} + TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB + TEST $CLI volume start $V0 + + TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 +diff --git a/tests/cluster.rc b/tests/cluster.rc +index c1ff8ab..e258b58 100644 +--- a/tests/cluster.rc ++++ b/tests/cluster.rc +@@ -142,6 +142,16 @@ function peer_count() { + $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l + } + ++function attempt_replace_brick { ++ local cli_no=$1 ++ local vol=$2; ++ local src_brick=$3; ++ local dst_brick=$4; ++ ++ eval \$CLI_$cli_no volume replace-brick $vol $src_brick $dst_brick commit force; ++ echo $? ++} ++ + function cluster_rebalance_status_field { + #The rebalance status can be up to 3 words, (e.g.:'fix-layout in progress'), hence the awk-print $7 thru $9. + #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(e.g.:'completed 3.00'). +diff --git a/tests/include.rc b/tests/include.rc +index aca4c4a..81146f4 100644 +--- a/tests/include.rc ++++ b/tests/include.rc +@@ -74,6 +74,7 @@ PROCESS_UP_TIMEOUT=30 + NFS_EXPORT_TIMEOUT=20 + CHILD_UP_TIMEOUT=20 + PROBE_TIMEOUT=60 ++PEER_SYNC_TIMEOUT=20 + REBALANCE_TIMEOUT=360 + REOPEN_TIMEOUT=20 + HEAL_TIMEOUT=80 +-- +1.8.3.1 + diff --git a/0513-core-heketi-cli-is-throwing-error-target-is-busy.patch b/0513-core-heketi-cli-is-throwing-error-target-is-busy.patch new file mode 100644 index 0000000..aa15a69 --- /dev/null +++ b/0513-core-heketi-cli-is-throwing-error-target-is-busy.patch @@ -0,0 +1,114 @@ +From 61d178c25468adfac4cbdfcef43a0d002c635466 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 5 Feb 2019 12:49:10 +0530 +Subject: [PATCH 513/529] core: heketi-cli is throwing error "target is busy" + +Problem: When rpc-transport-disconnect happens, server_connection_cleanup_flush_cbk() + is supposed to call rpc_transport_unref() after open-files on + that transport are flushed per transport.But open-fd-count is + maintained in bound_xl->fd_count, which can be incremented/decremented + cumulatively in server_connection_cleanup() by all transport + disconnect paths. So instead of rpc_transport_unref() happening + per transport, it ends up doing it only once after all the files + on all the transports for the brick are flushed leading to + rpc-leaks. + +Solution: To avoid races maintain fd_cnt at client instead of maintaining + on brick + +Credits: Pranith Kumar Karampuri +> Change-Id: I6e8ea37a61f82d9aefb227c5b3ab57a7a36850e6 +> fixes: bz#1668190 +> (Cherry pick from commit b41cdeb638f9f9ec2fef13ec95c216faf52a9df9) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22108/) + +Change-Id: Ic810095ea1ce418836d240d411168df8be0e4a41 +BUG: 1669020 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/162251 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/client_t.c | 1 + + libglusterfs/src/client_t.h | 2 ++ + libglusterfs/src/xlator.c | 1 - + libglusterfs/src/xlator.h | 3 --- + xlators/protocol/server/src/server-helpers.c | 4 ++-- + 5 files changed, 5 insertions(+), 6 deletions(-) + +diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c +index a9ae62c..7d92e0d 100644 +--- a/libglusterfs/src/client_t.c ++++ b/libglusterfs/src/client_t.c +@@ -232,6 +232,7 @@ gf_client_get (xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid, + + GF_ATOMIC_INIT (client->bind, 1); + GF_ATOMIC_INIT (client->count, 1); ++ GF_ATOMIC_INIT(client->fd_cnt, 0); + + client->auth.flavour = cred->flavour; + if (cred->flavour != AUTH_NONE) { +diff --git a/libglusterfs/src/client_t.h b/libglusterfs/src/client_t.h +index 088508e..403c488 100644 +--- a/libglusterfs/src/client_t.h ++++ b/libglusterfs/src/client_t.h +@@ -46,6 +46,8 @@ typedef struct _client { + inode_t *subdir_inode; + uuid_t subdir_gfid; + int32_t opversion; ++ /* Variable to save fd_count for detach brick */ ++ gf_atomic_t fd_cnt; + } client_t; + + #define GF_CLIENTCTX_INITIAL_SIZE 8 +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 340d83d..7d90baa 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -490,7 +490,6 @@ xlator_init (xlator_t *xl) + + xl->instance_name = NULL; + GF_ATOMIC_INIT(xl->xprtrefcnt, 0); +- GF_ATOMIC_INIT(xl->fd_cnt, 0); + if (!xl->init) { + gf_msg (xl->name, GF_LOG_WARNING, 0, LG_MSG_INIT_FAILED, + "No init() found"); +diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h +index f8f2630..202bdca 100644 +--- a/libglusterfs/src/xlator.h ++++ b/libglusterfs/src/xlator.h +@@ -965,9 +965,6 @@ struct _xlator { + /* flag to avoid recall of xlator_mem_cleanup for xame xlator */ + uint32_t call_cleanup; + +- /* Variable to save fd_count for detach brick */ +- gf_atomic_t fd_cnt; +- + /* Variable to save xprt associated for detach brick */ + gf_atomic_t xprtrefcnt; + +diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c +index 99256bf..30045ef 100644 +--- a/xlators/protocol/server/src/server-helpers.c ++++ b/xlators/protocol/server/src/server-helpers.c +@@ -266,7 +266,7 @@ server_connection_cleanup_flush_cbk (call_frame_t *frame, void *cookie, + victim = client->bound_xl; + + if (victim) { +- fd_cnt = GF_ATOMIC_DEC(victim->fd_cnt); ++ fd_cnt = GF_ATOMIC_DEC(client->fd_cnt); + if (!fd_cnt && conf && detach) { + pthread_mutex_lock(&conf->mutex); + { +@@ -413,7 +413,7 @@ server_connection_cleanup (xlator_t *this, client_t *client, + if (fd_cnt) { + if (fd_exist) + (*fd_exist) = _gf_true; +- GF_ATOMIC_ADD(bound_xl->fd_cnt, fd_cnt); ++ GF_ATOMIC_ADD(client->fd_cnt, fd_cnt); + } + } + +-- +1.8.3.1 + diff --git a/0514-glusterd-display-gluster-volume-status-when-quorum-t.patch b/0514-glusterd-display-gluster-volume-status-when-quorum-t.patch new file mode 100644 index 0000000..17d7c3c --- /dev/null +++ b/0514-glusterd-display-gluster-volume-status-when-quorum-t.patch @@ -0,0 +1,49 @@ +From 4413ccd6818a8680c74bc072b784319ce8d8429f Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Thu, 9 Nov 2017 13:15:51 +0530 +Subject: [PATCH 514/529] glusterd: display gluster volume status, when quorum + type is server + +Problem: when server-quorum-type is server, after restarting glusterd +in the node which is up, gluster volume status is giving incorrect +information. + +Fix: check whether server is blank, before adding other keys into the +dictionary. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/18703/ + +>Change-Id: I926ebdffab330ccef844f23f6d6556e137914047 +>BUG: 1511339 +>Signed-off-by: Sanju Rakonde + +Change-Id: I926ebdffab330ccef844f23f6d6556e137914047 +BUG: 1574490 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/162399 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index e21ec4e..bafc3af 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -5204,6 +5204,12 @@ glusterd_add_node_to_dict (char *server, dict_t *dict, int count, + * the brick as hostname+path, so this will make more sense + * when output. + */ ++ ++ if (!strcmp(server, "")) { ++ ret = 0; ++ goto out; ++ } ++ + snprintf (key, sizeof (key), "brick%d.hostname", count); + if (!strcmp (server, priv->nfs_svc.name)) + ret = dict_set_str (dict, key, "NFS Server"); +-- +1.8.3.1 + diff --git a/0515-cli-change-the-warning-message.patch b/0515-cli-change-the-warning-message.patch new file mode 100644 index 0000000..7414d11 --- /dev/null +++ b/0515-cli-change-the-warning-message.patch @@ -0,0 +1,38 @@ +From d7d4504b6f3e7fb659dd20640b53922812d50445 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 6 Feb 2019 19:06:45 +0530 +Subject: [PATCH 515/529] cli: change the warning message + +This patch changes the warning message user gets, when enabling brick +multiplexing to reflect OCS instead of CNS/CRS. + +Label: DOWNSTREAM ONLY + +Change-Id: Id5fd87955d5a692f8e57560245f8b0cf9882e1da +BUG: 1661393 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/162405 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + cli/src/cli-cmd-parser.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index e790d79..dcce9d3 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1742,8 +1742,8 @@ cli_cmd_volume_set_parse (struct cli_state *state, const char **words, + + if ((strcmp (key, "cluster.brick-multiplex") == 0)) { + question = "Brick-multiplexing is supported only for " +- "container workloads (CNS/CRS). Also it is " +- "advised to make sure that either all " ++ "OCS converged or independent mode. Also it" ++ " is advised to make sure that either all " + "volumes are in stopped state or no bricks " + "are running before this option is modified." + "Do you still want to continue?"; +-- +1.8.3.1 + diff --git a/0516-geo-rep-Fix-permissions-with-non-root-setup.patch b/0516-geo-rep-Fix-permissions-with-non-root-setup.patch new file mode 100644 index 0000000..12ab13c --- /dev/null +++ b/0516-geo-rep-Fix-permissions-with-non-root-setup.patch @@ -0,0 +1,84 @@ +From 39bf395e91021dd51d53c312d6e02638267c3a6b Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 20 Nov 2018 12:36:55 +0530 +Subject: [PATCH 516/529] geo-rep: Fix permissions with non-root setup + +Problem: +In non-root fail-over/fail-back(FO/FB), when slave is +promoted as master, the session goes to 'Faulty' + +Cause: +The command 'gluster-mountbroker ' +is run as a pre-requisite on slave in non-root setup. +It modifies the permission and group of following required +directories and files recursively + + [1] /var/lib/glusterd/geo-replication + [2] /var/log/glusterfs/geo-replication-slaves + +In a normal setup, this is executed on slave node and hence +doing it recursively is not an issue on [1]. But when original +master becomes slave in non-root during FO/FB, it contains +ssh public keys and modifying permissions on them causes +geo-rep to fail with incorrect permissions. + +Fix: +Don't do permission change recursively. Fix permissions for +required files. + +Backport of: + > Patch: https://review.gluster.org/#/c/glusterfs/+/21689/ + > BUG: bz#1651498 + > Change-Id: I68a744644842e3b00abc26c95c06f123aa78361d + > Signed-off-by: Kotresh HR + +BUG: 1510752 +Change-Id: I68a744644842e3b00abc26c95c06f123aa78361d +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/162463 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/src/peer_mountbroker.py.in | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/src/peer_mountbroker.py.in b/geo-replication/src/peer_mountbroker.py.in +index be182c5..5be16a2 100644 +--- a/geo-replication/src/peer_mountbroker.py.in ++++ b/geo-replication/src/peer_mountbroker.py.in +@@ -8,6 +8,7 @@ from gluster.cliutils import (execute, Cmd, node_output_ok, + from prettytable import PrettyTable + + LOG_DIR = "@localstatedir@/log/glusterfs/geo-replication-slaves" ++CLI_LOG = "@localstatedir@/log/glusterfs/cli.log" + GEOREP_DIR = "@GLUSTERD_WORKDIR@/geo-replication" + GLUSTERD_VOLFILE = "@GLUSTERD_VOLFILE@" + +@@ -142,7 +143,7 @@ class NodeSetup(Cmd): + # chgrp -R /var/log/glusterfs/geo-replication-slaves + # chgrp -R /var/lib/glusterd/geo-replication + # chmod -R 770 /var/log/glusterfs/geo-replication-slaves +- # chmod -R 770 /var/lib/glusterd/geo-replication ++ # chmod 770 /var/lib/glusterd/geo-replication + # mkdir -p + # chmod 0711 + # If selinux, +@@ -192,8 +193,13 @@ class NodeSetup(Cmd): + + execute(["chgrp", "-R", args.group, GEOREP_DIR]) + execute(["chgrp", "-R", args.group, LOG_DIR]) +- execute(["chmod", "-R", "770", GEOREP_DIR]) +- execute(["chmod", "-R", "770", args.group, LOG_DIR]) ++ execute(["chgrp", args.group, CLI_LOG]) ++ execute(["chmod", "770", args.group, GEOREP_DIR]) ++ execute(["find", LOG_DIR, "-type", "d", "-exec", "chmod", "770", "{}", ++ "+"]) ++ execute(["find", LOG_DIR, "-type", "f", "-exec", "chmod", "660", "{}", ++ "+"]) ++ execute(["chmod", "660", CLI_LOG]) + + m.set_mount_root_and_group(args.mount_root, args.group) + m.save() +-- +1.8.3.1 + diff --git a/0517-geo-rep-validate-the-config-checkpoint-date-format.patch b/0517-geo-rep-validate-the-config-checkpoint-date-format.patch new file mode 100644 index 0000000..f2ae211 --- /dev/null +++ b/0517-geo-rep-validate-the-config-checkpoint-date-format.patch @@ -0,0 +1,39 @@ +From ba30dc0db99f0cd6e83ba5085be43607e4710711 Mon Sep 17 00:00:00 2001 +From: Shwetha Acharya +Date: Wed, 21 Nov 2018 12:24:00 +0530 +Subject: [PATCH 517/529] geo-rep: validate the config checkpoint date format + +Added a strlen check to ensure that the format is (Y-m-d H:M:S). + +>Change-Id: I8844aaa33418d43ffe2320c4a05eb1eddd306903 +>updates: bz#1651584 +>Signed-off-by: Shwetha Acharya + +backport of https://review.gluster.org/#/c/glusterfs/+/21692/ + +BUG: 1429190 +Change-Id: I70d56925abfffb02d2d4b7d6f570b2c063a8d9c2 +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/162467 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-parser.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index dcce9d3..a450797 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -2774,7 +2774,7 @@ config_parse (const char **words, int wordcount, dict_t *dict, + ret_chkpt = strptime(append_str, "%Y-%m-%d %H:%M:%S", + &checkpoint_time); + +- if (ret_chkpt == NULL) { ++ if (ret_chkpt == NULL || *ret_chkpt != '\0') { + ret = -1; + cli_err ("Invalid Checkpoint label. Use format " + "\"Y-m-d H:M:S\", Example: 2016-10-25 15:30:45"); +-- +1.8.3.1 + diff --git a/0518-logging-create-parent-dir-if-not-available.patch b/0518-logging-create-parent-dir-if-not-available.patch new file mode 100644 index 0000000..d1db729 --- /dev/null +++ b/0518-logging-create-parent-dir-if-not-available.patch @@ -0,0 +1,57 @@ +From 1e1bda15377a133e9a91e6f99d13e02bf4469269 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Thu, 7 Feb 2019 13:57:21 +0530 +Subject: [PATCH 518/529] logging: create parent dir if not available + +As glusterfs logging uses different directory than /var/log +(ie, /var/log/glusterfs), there is a chance it may not be +present when starting glusterfs. Create parent dir if it +doesn't exist. + +Upstream fix: +>> URL: https://review.gluster.org/21536 + +BUG: 1570958 +Change-Id: I6efaffd1e7e8aee350afcf2ca354b27747ff5e50 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/162470 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/logging.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c +index 0f238d0..631bc98 100644 +--- a/libglusterfs/src/logging.c ++++ b/libglusterfs/src/logging.c +@@ -758,6 +758,26 @@ gf_log_init (void *data, const char *file, const char *ident) + goto out; + } + ++ /* Also create parent dir */ ++ char *logdir = gf_strdup(file); ++ if (!logdir) { ++ return -1; ++ } ++ char *tmp_index = rindex(logdir, '/'); ++ if (tmp_index) { ++ tmp_index[0] = '\0'; ++ } ++ if (mkdir_p(logdir, 0755, _gf_true)) { ++ /* EEXIST is handled in mkdir_p() itself */ ++ gf_msg("logging", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR, ++ "failed to create metrics dir %s (%s)", logdir, ++ strerror(errno)); ++ GF_FREE(logdir); ++ return -1; ++ } ++ /* no need of this variable */ ++ GF_FREE(logdir); ++ + ctx->log.filename = gf_strdup (file); + if (!ctx->log.filename) { + fprintf (stderr, "ERROR: updating log-filename failed: %s\n", +-- +1.8.3.1 + diff --git a/0519-cluster-dht-Delete-invalid-linkto-files-in-rmdir.patch b/0519-cluster-dht-Delete-invalid-linkto-files-in-rmdir.patch new file mode 100644 index 0000000..7c31115 --- /dev/null +++ b/0519-cluster-dht-Delete-invalid-linkto-files-in-rmdir.patch @@ -0,0 +1,118 @@ +From a902a17263648180bba8a0167a221e549ba5186a Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Wed, 6 Feb 2019 10:26:42 +0530 +Subject: [PATCH 519/529] cluster/dht: Delete invalid linkto files in rmdir + +rm -rf fails on dirs which contain linkto files +that point to themselves because dht incorrectly thought +that they were cached files after looking them up. +The fix now treats them as invalid linkto files +and deletes them. + +upstream master: https://review.gluster.org/#/c/glusterfs/+/22066/ + +> Change-Id: I376c72a5309714ee339c74485e02cfb4e29be643 +> fixes: bz#1667804 +> Signed-off-by: N Balachandran + +Change-Id: Ib759907131f791e5853b2e0cb38a68d94a3efd81 +BUG: 1668989 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/162342 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/distribute/bug-1667804.t | 63 ++++++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.c | 6 ++-- + 2 files changed, 67 insertions(+), 2 deletions(-) + create mode 100644 tests/bugs/distribute/bug-1667804.t + +diff --git a/tests/bugs/distribute/bug-1667804.t b/tests/bugs/distribute/bug-1667804.t +new file mode 100644 +index 0000000..3f7c431 +--- /dev/null ++++ b/tests/bugs/distribute/bug-1667804.t +@@ -0,0 +1,63 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../dht.rc ++ ++function confirm_all_linkto_files () ++{ ++ inpath=$1 ++ for infile in $inpath/* ++ do ++ echo $infile ++ ret1=$(is_dht_linkfile $infile) ++ if [ "$ret1" -eq 0 ]; then ++ echo "$infile is not a linkto file" ++ echo 0 ++ return ++ fi ++ done ++ echo 1 ++} ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++#Create a distributed volume ++TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2}; ++TEST $CLI volume start $V0 ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++#Create files and rename them in order to create linkto files ++TEST mkdir -p $M0/dir0/dir1 ++TEST touch $M0/dir0/dir1/file-{1..50} ++ ++for i in {1..50}; do ++ mv $M0/dir0/dir1/file-$i $M0/dir0/dir1/nfile-$i; ++done ++ ++#Remove the second brick to force the creation of linkto files ++#on the removed brick ++ ++TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}2" ++TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop ++ ++EXPECT "1" confirm_all_linkto_files $B0/${V0}2/dir0/dir1 ++ ++#Modify the xattrs of the linkto files on the removed brick to point to itself. ++ ++target=$(cat $M0/.meta/graphs/active/$V0-dht/subvolumes/1/name) ++ ++setfattr -n trusted.glusterfs.dht.linkto -v "$target\0" $B0/${V0}2/dir0/dir1/nfile* ++ ++ ++TEST rm -rf $M0/dir0 ++ ++cleanup; +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 767c6a8..1311a8d 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -10062,8 +10062,10 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, + + subvol = dht_linkfile_subvol (this, NULL, &trav->d_stat, + trav->dict); +- if (!subvol) { +- ++ if (!subvol || (subvol == src)) { ++ /* we need to delete the linkto file if it does not ++ * have a valid subvol or it points to itself. ++ */ + gf_msg (this->name, GF_LOG_INFO, 0, + DHT_MSG_INVALID_LINKFILE, + "Linkfile does not have link subvolume. " +-- +1.8.3.1 + diff --git a/0520-spec-avoid-creation-of-temp-file-in-lua-script.patch b/0520-spec-avoid-creation-of-temp-file-in-lua-script.patch new file mode 100644 index 0000000..63a4700 --- /dev/null +++ b/0520-spec-avoid-creation-of-temp-file-in-lua-script.patch @@ -0,0 +1,205 @@ +From f807b5dd999808a8e56061690da01420d3cb4cc5 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Sat, 9 Feb 2019 14:01:28 +0530 +Subject: [PATCH 520/529] spec: avoid creation of temp file in lua script + +Avoiding creation of temporary file to execute bash shell script from a +lua scriptlet increases install time security. + +Label: DOWNSTREAM ONLY + +BUG: 1410145 +Change-Id: Ie5b9035f292402b18dea768aca8bc82a1e7fa615 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/162621 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 87 ++++++++++--------------------------------------------- + 1 file changed, 15 insertions(+), 72 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 0ad4ffc..2680bec 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1643,12 +1643,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1685,12 +1680,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1727,12 +1717,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1769,12 +1754,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1811,12 +1791,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1853,12 +1828,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1895,12 +1865,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1938,12 +1903,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1981,12 +1941,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2024,12 +1979,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2068,12 +2018,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2111,12 +2056,7 @@ fi + -- Since we run pretrans scripts only for RPMs built for a server build, + -- we can now use os.tmpname() since it is available on RHEL6 and later + -- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2173,6 +2113,9 @@ fi + %endif + + %changelog ++* Sat Feb 09 2019 Milind Changire ++- Avoid creation of temporary file in lua script during install (#1410145) ++ + * Wed Dec 19 2018 Milind Changire + - Add explicit package dependencies (#1656357) + - Remove absolute paths from spec file (#1350745) +-- +1.8.3.1 + diff --git a/0521-rpc-use-address-family-option-from-vol-file.patch b/0521-rpc-use-address-family-option-from-vol-file.patch new file mode 100644 index 0000000..a355469 --- /dev/null +++ b/0521-rpc-use-address-family-option-from-vol-file.patch @@ -0,0 +1,276 @@ +From ae01f8acacf8e51b6c3486e3349497bb4e982866 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Sat, 9 Feb 2019 13:38:40 +0530 +Subject: [PATCH 521/529] rpc: use address-family option from vol file + +This patch helps enable IPv6 connections in the cluster. +The default address-family is IPv4 without using this option explicitly. + +When address-family is set to "inet6" in the /etc/glusterfs/glusterd.vol +file, the mount command-line also needs to have +-o xlator-option="transport.address-family=inet6" added to it. + +This option also gets added to the brick command-line. +Snapshot and gfapi use-cases should also use this option to pass in the +inet6 address-family. + +mainline: +> Change-Id: I97db91021af27bacb6d7578e33ea4817f66d7270 +> fixes: bz#1635863 +> Signed-off-by: Milind Changire +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/21948 + +Change-Id: I97db91021af27bacb6d7578e33ea4817f66d7270 +BUG: 1618669 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/162620 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + api/src/glfs-mgmt.c | 12 +++++++++--- + glusterfsd/src/glusterfsd-mgmt.c | 6 +++++- + libglusterfs/src/common-utils.c | 18 ++++++++++++++++-- + libglusterfs/src/common-utils.h | 3 +++ + rpc/rpc-lib/src/rpc-transport.c | 6 +++--- + rpc/rpc-lib/src/rpc-transport.h | 2 +- + .../snapview-server/src/snapview-server-mgmt.c | 5 ++++- + xlators/mgmt/glusterd/src/glusterd-handler.c | 11 ++++++++--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 7 +++++++ + 9 files changed, 56 insertions(+), 14 deletions(-) + +diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c +index b70dc35..f1281bb 100644 +--- a/api/src/glfs-mgmt.c ++++ b/api/src/glfs-mgmt.c +@@ -646,8 +646,10 @@ glfs_mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count, + * volfile if topology hasn't changed. + * glusterfs_volfile_reconfigure returns 3 possible return states + * return 0 =======> reconfiguration of options has succeeded +- * return 1 =======> the graph has to be reconstructed and all the xlators should be inited +- * return -1(or -ve) =======> Some Internal Error occurred during the operation ++ * return 1 =======> the graph has to be reconstructed and all the ++ * xlators should be inited ++ * return -1(or -ve) =======> Some Internal Error occurred during the ++ * operation + */ + + ret = gf_volfile_reconfigure (fs->oldvollen, tmpfp, fs->ctx, +@@ -988,7 +990,11 @@ glfs_mgmt_init (struct glfs *fs) + !strcmp (cmd_args->volfile_server_transport, "unix")) { + ret = rpc_transport_unix_options_build (&options, host, 0); + } else { +- ret = rpc_transport_inet_options_build (&options, host, port); ++ xlator_cmdline_option_t *opt = ++ find_xlator_option_in_cmd_args_t("address-family", ++ cmd_args); ++ ret = rpc_transport_inet_options_build(&options, host, port, ++ (opt ? opt->value : NULL)); + } + + if (ret) +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index b952526..e38ad64 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -2552,6 +2552,7 @@ glusterfs_mgmt_init (glusterfs_ctx_t *ctx) + int ret = -1; + int port = GF_DEFAULT_BASE_PORT; + char *host = NULL; ++ xlator_cmdline_option_t *opt = NULL; + + cmd_args = &ctx->cmd_args; + GF_VALIDATE_OR_GOTO (THIS->name, cmd_args->volfile_server, out); +@@ -2570,7 +2571,10 @@ glusterfs_mgmt_init (glusterfs_ctx_t *ctx) + !strcmp (cmd_args->volfile_server_transport, "unix")) { + ret = rpc_transport_unix_options_build (&options, host, 0); + } else { +- ret = rpc_transport_inet_options_build (&options, host, port); ++ opt = find_xlator_option_in_cmd_args_t("address-family", ++ cmd_args); ++ ret = rpc_transport_inet_options_build(&options, host, port, ++ (opt ? opt->value : NULL)); + } + if (ret) + goto out; +diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c +index 1243754..e3f3989 100644 +--- a/libglusterfs/src/common-utils.c ++++ b/libglusterfs/src/common-utils.c +@@ -477,8 +477,9 @@ gf_resolve_ip6 (const char *hostname, + } + if ((ret = getaddrinfo(hostname, port_str, &hints, &cache->first)) != 0) { + gf_msg ("resolver", GF_LOG_ERROR, 0, +- LG_MSG_GETADDRINFO_FAILED, "getaddrinfo failed" +- " (%s)", gai_strerror (ret)); ++ LG_MSG_GETADDRINFO_FAILED, ++ "getaddrinfo failed (family:%d) (%s)", family, ++ gai_strerror (ret)); + + GF_FREE (*dnscache); + *dnscache = NULL; +@@ -5136,3 +5137,16 @@ out: + return NULL; + } + ++xlator_cmdline_option_t * ++find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args) ++{ ++ xlator_cmdline_option_t *pos = NULL; ++ xlator_cmdline_option_t *tmp = NULL; ++ ++ list_for_each_entry_safe(pos, tmp, &args->xlator_options, cmd_args) ++ { ++ if (strcmp(pos->key, option_name) == 0) ++ return pos; ++ } ++ return NULL; ++} +diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h +index 50c1f9a..15a31a3 100644 +--- a/libglusterfs/src/common-utils.h ++++ b/libglusterfs/src/common-utils.h +@@ -945,4 +945,7 @@ glusterfs_compute_sha256 (const unsigned char *content, size_t size, + char* + get_struct_variable (int mem_num, gf_gsync_status_t *sts_val); + ++xlator_cmdline_option_t * ++find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args); ++ + #endif /* _COMMON_UTILS_H */ +diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c +index 0c6ab66..b737ff2 100644 +--- a/rpc/rpc-lib/src/rpc-transport.c ++++ b/rpc/rpc-lib/src/rpc-transport.c +@@ -666,7 +666,7 @@ out: + + int + rpc_transport_inet_options_build (dict_t **options, const char *hostname, +- int port) ++ int port, char *af) + { + dict_t *dict = NULL; + char *host = NULL; +@@ -702,10 +702,10 @@ rpc_transport_inet_options_build (dict_t **options, const char *hostname, + goto out; + } + +- ret = dict_set_str (dict, "address-family", addr_family); ++ ret = dict_set_str (dict, "address-family", (af != NULL ? af : addr_family)); + if (ret) { + gf_log (THIS->name, GF_LOG_WARNING, +- "failed to set address-family to %s", addr_family); ++ "failed to set address-family to %s", (af != NULL ? af : addr_family)); + goto out; + } + +diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h +index f5fb6e1..c97f98d 100644 +--- a/rpc/rpc-lib/src/rpc-transport.h ++++ b/rpc/rpc-lib/src/rpc-transport.h +@@ -316,7 +316,7 @@ rpc_transport_unix_options_build (dict_t **options, char *filepath, + int frame_timeout); + + int +-rpc_transport_inet_options_build (dict_t **options, const char *hostname, int port); ++rpc_transport_inet_options_build (dict_t **options, const char *hostname, int port, char *af); + + void + rpc_transport_cleanup(rpc_transport_t *); +diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c +index 18c902d..f82c8a0 100644 +--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c ++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c +@@ -84,6 +84,7 @@ svs_mgmt_init (xlator_t *this) + char *host = NULL; + cmd_args_t *cmd_args = NULL; + glusterfs_ctx_t *ctx = NULL; ++ xlator_cmdline_option_t *opt = NULL; + + GF_VALIDATE_OR_GOTO ("snapview-server", this, out); + GF_VALIDATE_OR_GOTO (this->name, this->private, out); +@@ -98,7 +99,9 @@ svs_mgmt_init (xlator_t *this) + if (cmd_args->volfile_server) + host = cmd_args->volfile_server; + +- ret = rpc_transport_inet_options_build (&options, host, port); ++ opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args); ++ ret = rpc_transport_inet_options_build(&options, host, port, ++ (opt != NULL ? opt->value : NULL)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to build the " + "transport options"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 81b1c02..e92cb5f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3440,7 +3440,7 @@ out: + + int + glusterd_transport_inet_options_build (dict_t **options, const char *hostname, +- int port) ++ int port, char *af) + { + xlator_t *this = NULL; + dict_t *dict = NULL; +@@ -3458,7 +3458,7 @@ glusterd_transport_inet_options_build (dict_t **options, const char *hostname, + port = GLUSTERD_DEFAULT_PORT; + + /* Build default transport options */ +- ret = rpc_transport_inet_options_build (&dict, hostname, port); ++ ret = rpc_transport_inet_options_build (&dict, hostname, port, af); + if (ret) + goto out; + +@@ -3518,6 +3518,7 @@ glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + int ret = -1; + glusterd_peerctx_t *peerctx = NULL; + data_t *data = NULL; ++ char *af = NULL; + + peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); + if (!peerctx) +@@ -3532,10 +3533,14 @@ glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + number can be used to + uniquely identify a + peerinfo */ ++ ret = dict_get_str(this->options, "transport.address-family", &af); ++ if (ret) ++ gf_log(this->name, GF_LOG_TRACE, ++ "option transport.address-family is not set in xlator options"); + + ret = glusterd_transport_inet_options_build (&options, + peerinfo->hostname, +- peerinfo->port); ++ peerinfo->port, af); + if (ret) + goto out; + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index bafc3af..50758ca 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -1975,6 +1975,7 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, + rpc_clnt_connection_t *conn = NULL; + int pid = -1; + glusterd_brick_proc_t *brick_proc = NULL; ++ char *inet_family = NULL; + + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); +@@ -2140,6 +2141,12 @@ retry: + runner_argprintf (&runner, + "--volfile-server-transport=socket,rdma"); + ++ ret = dict_get_str(this->options, "transport.address-family", &inet_family); ++ if (!ret) { ++ runner_add_arg(&runner, "--xlator-option"); ++ runner_argprintf(&runner, "transport.address-family=%s", inet_family); ++ } ++ + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); + +-- +1.8.3.1 + diff --git a/0522-fuse-add-lru-limit-option.patch b/0522-fuse-add-lru-limit-option.patch new file mode 100644 index 0000000..a625d91 --- /dev/null +++ b/0522-fuse-add-lru-limit-option.patch @@ -0,0 +1,1028 @@ +From 55e67fb41ae3b4388839723ac929cd239280a0fc Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Thu, 7 Feb 2019 18:06:43 +0530 +Subject: [PATCH 522/529] fuse: add --lru-limit option + +The inode LRU mechanism is moot in fuse xlator (ie. there is no +limit for the LRU list), as fuse inodes are referenced from +kernel context, and thus they can only be dropped on request of +the kernel. This might results in a high number of passive +inodes which are useless for the glusterfs client, causing a +significant memory overhead. + +This change tries to remedy this by extending the LRU semantics +and allowing to set a finite limit on the fuse inode LRU. + +A brief history of problem: + +When gluster's inode table was designed, fuse didn't have any +'invalidate' method, which means, userspace application could +never ask kernel to send a 'forget()' fop, instead had to wait +for kernel to send it based on kernel's parameters. Inode table +remembers the number of times kernel has cached the inode based +on the 'nlookup' parameter. And 'nlookup' field is not used by +no other entry points (like server-protocol, gfapi etc). + +Hence the inode_table of fuse module always has to have lru-limit +as '0', which means no limit. GlusterFS always had to keep all +inodes in memory as kernel would have had a reference to it. +Again, the reason for this is, kernel's glusterfs inode reference +was pointer of 'inode_t' structure in glusterfs. As it is a +pointer, we could never free it (to prevent segfault, or memory +corruption). + +Solution: + +In the inode table, handle the prune case of inodes with 'nlookup' +differently, and call a 'invalidator' method, which in this case is +fuse_invalidate(), and it sends the request to kernel for getting +the forget request. + +When the kernel sends the forget, it means, it has dropped all +the reference to the inode, and it will send the forget with the +'nlookup' parameter too. We just need to make sure to reduce the +'nlookup' value we have when we get forget. That automatically +cause the relevant prune to happen. + +Credits: Csaba Henk, Xavier Hernandez, Raghavendra Gowdappa, Nithya B + +Upstream: +> URL: https://review.gluster.org/19778 + +BUG: 1511779 +Change-Id: Iabe22a62e0f819b7eb67d4ecb850dd559b0c937f +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/162494 +Reviewed-by: Nithya Balachandran +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + doc/mount.glusterfs.8 | 4 + + glusterfsd/src/glusterfsd.c | 24 +++ + glusterfsd/src/glusterfsd.h | 1 + + libglusterfs/src/glusterfs.h | 1 + + libglusterfs/src/inode.c | 256 ++++++++++++++++++++++++---- + libglusterfs/src/inode.h | 17 +- + tests/features/fuse-lru-limit.t | 42 +++++ + xlators/mount/fuse/src/fuse-bridge.c | 121 ++++++++----- + xlators/mount/fuse/src/fuse-bridge.h | 3 + + xlators/mount/fuse/utils/mount.glusterfs.in | 7 + + 10 files changed, 393 insertions(+), 83 deletions(-) + create mode 100644 tests/features/fuse-lru-limit.t + +diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 +index 95aad02..ed6b410 100644 +--- a/doc/mount.glusterfs.8 ++++ b/doc/mount.glusterfs.8 +@@ -119,6 +119,10 @@ Provide list of backup volfile servers in the following format [default: None] + \fBDeprecated\fR option - placed here for backward compatibility [default: 1] + .TP + .TP ++\fBlru-limit=\fRN ++Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072] ++.TP ++.TP + \fBbackground-qlen=\fRN + Set fuse module's background queue length to N [default: 64] + .TP +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 990036c..2e2cd77 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -203,6 +203,9 @@ static struct argp_option gf_options[] = { + "[default: 300]"}, + {"resolve-gids", ARGP_RESOLVE_GIDS_KEY, 0, 0, + "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"}, ++ {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, ++ "Set fuse module's limit for number of inodes kept in LRU list to N " ++ "[default: 131072]"}, + {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0, + "Set fuse module's background queue length to N " + "[default: 64]"}, +@@ -462,6 +465,15 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options) + } + } + ++ if (cmd_args->lru_limit >= 0) { ++ ret = dict_set_int32(options, "lru-limit", cmd_args->lru_limit); ++ if (ret < 0) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, ++ "lru-limit"); ++ goto err; ++ } ++ } ++ + if (cmd_args->background_qlen) { + ret = dict_set_int32 (options, "background-qlen", + cmd_args->background_qlen); +@@ -1169,6 +1181,13 @@ parse_opts (int key, char *arg, struct argp_state *state) + cmd_args->resolve_gids = 1; + break; + ++ case ARGP_FUSE_LRU_LIMIT_KEY: ++ if (!gf_string2int32(arg, &cmd_args->lru_limit)) ++ break; ++ ++ argp_failure(state, -1, 0, "unknown LRU limit option %s", arg); ++ break; ++ + case ARGP_FUSE_BACKGROUND_QLEN_KEY: + if (!gf_string2int (arg, &cmd_args->background_qlen)) + break; +@@ -1937,6 +1956,11 @@ parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx) + ctx->ssl_cert_depth = glusterfs_read_secure_access_file (); + } + ++ /* Need to set lru_limit to below 0 to indicate there was nothing ++ specified. This is needed as 0 is a valid option, and may not be ++ default value. */ ++ cmd_args->lru_limit = -1; ++ + argp_parse (&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args); + if (cmd_args->print_netgroups) { + /* When this option is set we don't want to do anything else +diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h +index 75cb1d8..1550a30 100644 +--- a/glusterfsd/src/glusterfsd.h ++++ b/glusterfsd/src/glusterfsd.h +@@ -100,6 +100,7 @@ enum argp_option_keys { + ARGP_SUBDIR_MOUNT_KEY = 178, + ARGP_FUSE_EVENT_HISTORY_KEY = 179, + ARGP_READER_THREAD_COUNT_KEY = 180, ++ ARGP_FUSE_LRU_LIMIT_KEY = 190, + }; + + struct _gfd_vol_top_priv { +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index 157437c..2690306 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -413,6 +413,7 @@ struct _cmd_args { + pid_t client_pid; + int client_pid_set; + unsigned uid_map_root; ++ int32_t lru_limit; + int background_qlen; + int congestion_threshold; + char *fuse_mountopts; +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 29d3c8f..f57020a 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -24,6 +24,100 @@ + move latest accessed dentry to list_head of inode + */ + ++/* clang-format off */ ++/* ++ ++Details as per Xavi: ++ ++ I think we should have 3 lists: active, lru and invalidate. ++ ++We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of ++refs, invalidate_sent flag and moving from one list to another must be done ++atomically. ++ ++With this information, these are the states that cause a transition: ++ ++ refs nlookups inv_sent op ++ 1 0 0 unref -> refs = 0, active--->destroy ++ 1 1 0 unref -> refs = 0, active--->lru ++ 1 1 0 forget -> nlookups = 0, active--->active ++ *0 1 0 forget -> nlookups = 0, lru--->destroy ++ *0 1 1 forget -> nlookups = 0, invalidate--->destroy ++ 0 1 0 ref -> refs = 1, lru--->active ++ 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active ++ 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate ++ 1 1 1 unref -> refs = 0, invalidate--->invalidate ++ 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active ++ ++(*) technically these combinations cannot happen because a forget sent by the ++kernel first calls ref() and then unref(). However it's equivalent. ++ ++overflow means that lru list has grown beyond the limit and the inode needs to ++be invalidated. All other combinations do not cause a change in state or are not ++possible. ++ ++Based on this, the code could be similar to this: ++ ++ ref(inode, inv) ++ { ++ if (refs == 0) { ++ if (inv_sent) { ++ invalidate_count--; ++ inv_sent = 0; ++ } else { ++ lru_count--; ++ } ++ if (inv) { ++ inv_sent = 1; ++ invalidate_count++; ++ list_move(inode, invalidate); ++ } else { ++ active_count++; ++ list_move(inode, active); ++ } ++ } ++ refs++; ++ } ++ ++ unref(inode, clear) ++ { ++ if (clear && inv_sent) { ++ // there is a case of fuse itself sending forget, without ++ // invalidate, after entry delete, like unlink(), rmdir(). ++ inv_sent = 0; ++ invalidate_count--; ++ active_count++; ++ list_move(inode, active); ++ } ++ refs--; ++ if ((refs == 0) && !inv_sent) { ++ active_count--; ++ if (nlookups == 0) { ++ destroy(inode); ++ } else { ++ lru_count++; ++ list_move(inode, lru); ++ } ++ } ++ } ++ ++ forget(inode) ++ { ++ ref(inode, false); ++ nlookups--; ++ unref(inode, true); ++ } ++ ++ overflow(inode) ++ { ++ ref(inode, true); ++ invalidator(inode); ++ unref(inode, false); ++ } ++ ++*/ ++/* clang-format on */ ++ + #define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \ + { \ + int i = 1; \ +@@ -37,7 +131,7 @@ + } + + static inode_t * +-__inode_unref (inode_t *inode); ++__inode_unref (inode_t *inode, gf_boolean_t clear); + + static int + inode_table_prune (inode_table_t *table); +@@ -138,7 +232,7 @@ __dentry_unset (dentry_t *dentry) + dentry->name = NULL; + + if (dentry->parent) { +- __inode_unref (dentry->parent); ++ __inode_unref (dentry->parent, _gf_false); + dentry->parent = NULL; + } + +@@ -465,7 +559,7 @@ out: + + + static inode_t * +-__inode_unref (inode_t *inode) ++__inode_unref (inode_t *inode, gf_boolean_t clear) + { + int index = 0; + xlator_t *this = NULL; +@@ -473,8 +567,6 @@ __inode_unref (inode_t *inode) + if (!inode) + return NULL; + +- this = THIS; +- + /* + * Root inode should always be in active list of inode table. So unrefs + * on root inode are no-ops. +@@ -482,6 +574,14 @@ __inode_unref (inode_t *inode) + if (__is_root_gfid(inode->gfid)) + return inode; + ++ this = THIS; ++ ++ if (clear && inode->invalidate_sent) { ++ inode->invalidate_sent = _gf_false; ++ inode->table->invalidate_size--; ++ __inode_activate(inode); ++ } ++ + GF_ASSERT (inode->ref); + + --inode->ref; +@@ -492,7 +592,7 @@ __inode_unref (inode_t *inode) + inode->_ctx[index].ref--; + } + +- if (!inode->ref) { ++ if (!inode->ref && !inode->invalidate_sent) { + inode->table->active_size--; + + if (inode->nlookup) +@@ -506,7 +606,7 @@ __inode_unref (inode_t *inode) + + + static inode_t * +-__inode_ref (inode_t *inode) ++__inode_ref (inode_t *inode, gf_boolean_t is_invalidate) + { + int index = 0; + xlator_t *this = NULL; +@@ -516,11 +616,6 @@ __inode_ref (inode_t *inode) + + this = THIS; + +- if (!inode->ref) { +- inode->table->lru_size--; +- __inode_activate (inode); +- } +- + /* + * Root inode should always be in active list of inode table. So unrefs + * on root inode are no-ops. If we do not allow unrefs but allow refs, +@@ -532,6 +627,22 @@ __inode_ref (inode_t *inode) + if (__is_root_gfid(inode->gfid) && inode->ref) + return inode; + ++ if (!inode->ref) { ++ if (inode->invalidate_sent) { ++ inode->invalidate_sent = _gf_false; ++ inode->table->invalidate_size--; ++ } else { ++ inode->table->lru_size--; ++ } ++ if (is_invalidate) { ++ inode->invalidate_sent = _gf_true; ++ inode->table->invalidate_size++; ++ list_move_tail(&inode->list, &inode->table->invalidate); ++ } else { ++ __inode_activate(inode); ++ } ++ } ++ + inode->ref++; + + index = __inode_get_xl_index (inode, this); +@@ -556,7 +667,7 @@ inode_unref (inode_t *inode) + + pthread_mutex_lock (&table->lock); + { +- inode = __inode_unref (inode); ++ inode = __inode_unref (inode, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -578,7 +689,7 @@ inode_ref (inode_t *inode) + + pthread_mutex_lock (&table->lock); + { +- inode = __inode_ref (inode); ++ inode = __inode_ref (inode, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -614,7 +725,7 @@ __dentry_create (inode_t *inode, inode_t *parent, const char *name) + } + + if (parent) +- newd->parent = __inode_ref (parent); ++ newd->parent = __inode_ref (parent, _gf_false); + + list_add (&newd->inode_list, &inode->dentry_list); + newd->inode = inode; +@@ -685,7 +796,7 @@ inode_new (inode_table_t *table) + { + inode = __inode_create (table); + if (inode != NULL) { +- __inode_ref (inode); ++ __inode_ref (inode, _gf_false); + } + } + pthread_mutex_unlock (&table->lock); +@@ -802,7 +913,7 @@ inode_grep (inode_table_t *table, inode_t *parent, const char *name) + inode = dentry->inode; + + if (inode) +- __inode_ref (inode); ++ __inode_ref (inode, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -947,7 +1058,7 @@ inode_find (inode_table_t *table, uuid_t gfid) + { + inode = __inode_find (table, gfid); + if (inode) +- __inode_ref (inode); ++ __inode_ref (inode, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -1096,7 +1207,7 @@ inode_link (inode_t *inode, inode_t *parent, const char *name, + linked_inode = __inode_link (inode, parent, name, iatt); + + if (linked_inode) +- __inode_ref (linked_inode); ++ __inode_ref (linked_inode, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -1178,6 +1289,31 @@ inode_forget (inode_t *inode, uint64_t nlookup) + return 0; + } + ++int ++inode_forget_with_unref(inode_t *inode, uint64_t nlookup) ++{ ++ inode_table_t *table = NULL; ++ ++ if (!inode) { ++ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, ++ "inode not found"); ++ return -1; ++ } ++ ++ table = inode->table; ++ ++ pthread_mutex_lock(&table->lock); ++ { ++ __inode_forget(inode, nlookup); ++ __inode_unref(inode, _gf_true); ++ } ++ pthread_mutex_unlock(&table->lock); ++ ++ inode_table_prune(table); ++ ++ return 0; ++} ++ + /* + * Invalidate an inode. This is invoked when a translator decides that an inode's + * cache is no longer valid. Any translator interested in taking action in this +@@ -1356,7 +1492,7 @@ inode_parent (inode_t *inode, uuid_t pargfid, const char *name) + parent = dentry->parent; + + if (parent) +- __inode_ref (parent); ++ __inode_ref (parent, _gf_false); + } + pthread_mutex_unlock (&table->lock); + +@@ -1540,6 +1676,7 @@ inode_table_prune (inode_table_t *table) + inode_t *del = NULL; + inode_t *tmp = NULL; + inode_t *entry = NULL; ++ int64_t lru_size = 0; + + if (!table) + return -1; +@@ -1548,8 +1685,11 @@ inode_table_prune (inode_table_t *table) + + pthread_mutex_lock (&table->lock); + { +- while (table->lru_limit +- && table->lru_size > (table->lru_limit)) { ++ if (!table->lru_limit) ++ goto purge_list; ++ ++ lru_size = table->lru_size; ++ while (lru_size > (table->lru_limit)) { + if (list_empty (&table->lru)) { + gf_msg_callingfn (THIS->name, GF_LOG_WARNING, 0, + LG_MSG_INVALID_INODE_LIST, +@@ -1559,7 +1699,18 @@ inode_table_prune (inode_table_t *table) + break; + } + ++ lru_size--; + entry = list_entry (table->lru.next, inode_t, list); ++ /* The logic of invalidation is required only if invalidator_fn ++ is present */ ++ if (table->invalidator_fn) { ++ /* check for valid inode with 'nlookup' */ ++ if (entry->nlookup) { ++ __inode_ref(entry, _gf_true); ++ tmp = entry; ++ break; ++ } ++ } + + table->lru_size--; + __inode_retire (entry); +@@ -1567,17 +1718,25 @@ inode_table_prune (inode_table_t *table) + ret++; + } + ++ purge_list: + list_splice_init (&table->purge, &purge); + table->purge_size = 0; + } + pthread_mutex_unlock (&table->lock); + +- { +- list_for_each_entry_safe (del, tmp, &purge, list) { +- list_del_init (&del->list); +- __inode_forget (del, 0); +- __inode_destroy (del); +- } ++ /* Pick 1 inode for invalidation */ ++ if (tmp) { ++ xlator_t *old_THIS = THIS; ++ THIS = table->invalidator_xl; ++ table->invalidator_fn(table->invalidator_xl, tmp); ++ THIS = old_THIS; ++ inode_unref(tmp); ++ } ++ ++ list_for_each_entry_safe (del, tmp, &purge, list) { ++ list_del_init (&del->list); ++ __inode_forget (del, 0); ++ __inode_destroy (del); + } + + return ret; +@@ -1605,9 +1764,12 @@ __inode_table_init_root (inode_table_t *table) + + + inode_table_t * +-inode_table_new (size_t lru_limit, xlator_t *xl) ++inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, ++ int32_t (*invalidator_fn)(xlator_t *, inode_t *), ++ xlator_t *invalidator_xl) + { + inode_table_t *new = NULL; ++ uint32_t mem_pool_size = lru_limit; + int ret = -1; + int i = 0; + +@@ -1619,20 +1781,19 @@ inode_table_new (size_t lru_limit, xlator_t *xl) + new->ctxcount = xl->graph->xl_count + 1; + + new->lru_limit = lru_limit; ++ new->invalidator_fn = invalidator_fn; ++ new->invalidator_xl = invalidator_xl; + + new->hashsize = 14057; /* TODO: Random Number?? */ + +- /* In case FUSE is initing the inode table. */ +- if (lru_limit == 0) +- lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES; +- +- new->inode_pool = mem_pool_new (inode_t, lru_limit); ++ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) ++ mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES; + ++ new->inode_pool = mem_pool_new(inode_t, mem_pool_size); + if (!new->inode_pool) + goto out; + +- new->dentry_pool = mem_pool_new (dentry_t, lru_limit); +- ++ new->dentry_pool = mem_pool_new (dentry_t, mem_pool_size); + if (!new->dentry_pool) + goto out; + +@@ -1667,6 +1828,7 @@ inode_table_new (size_t lru_limit, xlator_t *xl) + INIT_LIST_HEAD (&new->active); + INIT_LIST_HEAD (&new->lru); + INIT_LIST_HEAD (&new->purge); ++ INIT_LIST_HEAD(&new->invalidate); + + ret = gf_asprintf (&new->name, "%s/inode", xl->name); + if (-1 == ret) { +@@ -1696,6 +1858,14 @@ out: + return new; + } + ++inode_table_t * ++inode_table_new(uint32_t lru_limit, xlator_t *xl) ++{ ++ /* Only fuse for now requires the inode table with invalidator */ ++ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); ++} ++ ++ + int + inode_table_ctx_free (inode_table_t *table) + { +@@ -1830,6 +2000,15 @@ inode_table_destroy (inode_table_t *inode_table) { + inode_table->lru_size--; + } + ++ /* Same logic for invalidate list */ ++ while (!list_empty(&inode_table->invalidate)) { ++ trav = list_first_entry(&inode_table->invalidate, ++ inode_t, list); ++ __inode_forget(trav, 0); ++ __inode_retire(trav); ++ inode_table->invalidate_size--; ++ } ++ + while (!list_empty (&inode_table->active)) { + trav = list_first_entry (&inode_table->active, + inode_t, list); +@@ -2347,6 +2526,8 @@ inode_dump (inode_t *inode, char *prefix) + gf_proc_dump_write("active-fd-count", "%u", + inode->active_fd_count); + gf_proc_dump_write("ref", "%u", inode->ref); ++ gf_proc_dump_write("invalidate-sent", "%d", ++ inode->invalidate_sent); + gf_proc_dump_write("ia_type", "%d", inode->ia_type); + if (inode->_ctx) { + inode_ctx = GF_CALLOC (inode->table->ctxcount, +@@ -2427,10 +2608,13 @@ inode_table_dump (inode_table_t *itable, char *prefix) + gf_proc_dump_write(key, "%d", itable->lru_size); + gf_proc_dump_build_key(key, prefix, "purge_size"); + gf_proc_dump_write(key, "%d", itable->purge_size); ++ gf_proc_dump_build_key(key, prefix, "invalidate_size"); ++ gf_proc_dump_write(key, "%d", itable->invalidate_size); + + INODE_DUMP_LIST(&itable->active, key, prefix, "active"); + INODE_DUMP_LIST(&itable->lru, key, prefix, "lru"); + INODE_DUMP_LIST(&itable->purge, key, prefix, "purge"); ++ INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate"); + + pthread_mutex_unlock(&itable->lock); + } +diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h +index 7a87748..6a96447 100644 +--- a/libglusterfs/src/inode.h ++++ b/libglusterfs/src/inode.h +@@ -55,6 +55,13 @@ struct _inode_table { + struct mem_pool *dentry_pool; /* memory pool for dentrys */ + struct mem_pool *fd_mem_pool; /* memory pool for fd_t */ + int ctxcount; /* number of slots in inode->ctx */ ++ ++ /* This is required for 'invalidation' when 'nlookup' would be used, ++ specially in case of fuse-bridge */ ++ int32_t (*invalidator_fn)(xlator_t *, inode_t *); ++ xlator_t *invalidator_xl; ++ struct list_head invalidate; /* inodes which are in invalidation queue */ ++ uint32_t invalidate_size; /* count of inodes in invalidation list */ + }; + + +@@ -102,6 +109,7 @@ struct _inode { + struct list_head list; /* active/lru/purge */ + + struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ ++ gf_boolean_t invalidate_sent; /* Set it if invalidator_fn is called for inode */ + }; + + +@@ -110,7 +118,14 @@ struct _inode { + #define GFID_STR_PFX_LEN (sizeof (GFID_STR_PFX) - 1) + + inode_table_t * +-inode_table_new (size_t lru_limit, xlator_t *xl); ++inode_table_new(uint32_t lru_limit, xlator_t *xl); ++ ++inode_table_t * ++inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, ++ int32_t (*invalidator_fn)(xlator_t *, inode_t *), ++ xlator_t *invalidator_xl); ++int ++inode_forget_with_unref(inode_t *inode, uint64_t nlookup); + + void + inode_table_destroy_all (glusterfs_ctx_t *ctx); +diff --git a/tests/features/fuse-lru-limit.t b/tests/features/fuse-lru-limit.t +new file mode 100644 +index 0000000..9f12116 +--- /dev/null ++++ b/tests/features/fuse-lru-limit.t +@@ -0,0 +1,42 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} ++TEST $CLI volume start $V0 ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++EXPECT "1" get_mount_active_size_value $V0 $M0 ++EXPECT "0" get_mount_lru_size_value $V0 $M0 ++ ++mkdir ${M0}/dir-{1..9} ++for i in {1..9}; do ++ for j in {1..1000}; do ++ echo "Test file" > ${M0}/dir-$i/file-$j; ++ done; ++done ++lc=$(get_mount_lru_size_value $V0 ${M0}) ++# ideally it should be 9000+ ++TEST [ $lc -ge 9000 ] ++ ++TEST umount $M0 ++ ++TEST glusterfs -s $H0 --volfile-id $V0 --lru-limit 1000 $M0 ++ ++TEST find $M0 ++lc=$(get_mount_lru_size_value $V0 ${M0}) ++# ideally it should be <1000 ++# Not sure if there are any possibilities of buffer need. ++TEST [ $lc -le 1000 ] ++ ++TEST rm -rf $M0/* ++ ++EXPECT "1" get_mount_active_size_value $V0 $M0 ++EXPECT "0" get_mount_lru_size_value $V0 $M0 ++ ++cleanup +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 8d1e3a0..f3188d6 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -279,29 +279,31 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) + send_fuse_data (this, finh, obj, sizeof (*(obj))) + + +-#if FUSE_KERNEL_MINOR_VERSION >= 11 + static void + fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) + { ++#if FUSE_KERNEL_MINOR_VERSION >= 11 + struct fuse_out_header *fouh = NULL; + struct fuse_notify_inval_entry_out *fnieo = NULL; + fuse_private_t *priv = NULL; + dentry_t *dentry = NULL; ++ dentry_t *tmp = NULL; + inode_t *inode = NULL; + size_t nlen = 0; + fuse_invalidate_node_t *node = NULL; ++ char gfid_str[UUID_CANONICAL_FORM_LEN + 1]; + + priv = this->private; + + if (!priv->reverse_fuse_thread_started) + return; + +- inode = fuse_ino_to_inode(fuse_ino, this); ++ inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) { + return; + } + +- list_for_each_entry (dentry, &inode->dentry_list, inode_list) { ++ list_for_each_entry_safe (dentry, tmp, &inode->dentry_list, inode_list) { + node = GF_CALLOC (1, sizeof (*node), + gf_fuse_mt_invalidate_node_t); + if (node == NULL) +@@ -315,14 +317,31 @@ fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_ENTRY; + +- nlen = strlen (dentry->name); +- fouh->len = sizeof (*fouh) + sizeof (*fnieo) + nlen + 1; +- fnieo->parent = inode_to_fuse_nodeid (dentry->parent); ++ if (dentry->name) { ++ nlen = strlen (dentry->name); ++ fouh->len = sizeof (*fouh) + sizeof (*fnieo) + nlen + 1; ++ fnieo->parent = inode_to_fuse_nodeid (dentry->parent); ++ ++ fnieo->namelen = nlen; ++ strcpy (node->inval_buf + sizeof (*fouh) + sizeof (*fnieo), ++ dentry->name); ++ } + +- fnieo->namelen = nlen; +- strcpy (node->inval_buf + sizeof (*fouh) + sizeof (*fnieo), +- dentry->name); ++ gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE entry: " ++ "%"PRIu64"/%s (gfid:%s)", fnieo->parent, dentry->name, ++ uuid_utoa(inode->gfid)); + ++ if (dentry->parent) { ++ fuse_log_eh (this, "Invalidated entry %s (parent: %s)" ++ "(gfid: %s)", dentry->name, ++ uuid_utoa (dentry->parent->gfid), ++ uuid_utoa_r(inode->gfid, gfid_str)); ++ } else { ++ fuse_log_eh (this, "Invalidated entry %s(nodeid: %" ++ PRIu64 ") gfid: %s", ++ dentry->name, fnieo->parent, ++ uuid_utoa (inode->gfid)); ++ } + pthread_mutex_lock (&priv->invalidate_mutex); + { + list_add_tail (&node->next, &priv->invalidate_list); +@@ -330,23 +349,10 @@ fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) + } + pthread_mutex_unlock (&priv->invalidate_mutex); + +- gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE entry: " +- "%"PRIu64"/%s", fnieo->parent, dentry->name); +- +- if (dentry->parent) { +- fuse_log_eh (this, "Invalidated entry %s (parent: %s)", +- dentry->name, +- uuid_utoa (dentry->parent->gfid)); +- } else { +- fuse_log_eh (this, "Invalidated entry %s(nodeid: %" PRIu64 ")", +- dentry->name, fnieo->parent); +- } + } +- +- if (inode) +- inode_unref (inode); ++#endif /* KERNEL_VERSION */ ++ return; + } +-#endif + + /* + * Send an inval inode notification to fuse. This causes an invalidation of the +@@ -367,6 +373,10 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + if (!priv->reverse_fuse_thread_started) + return; + ++ inode = (inode_t *)(unsigned long)fuse_ino; ++ if (inode == NULL) ++ return; ++ + node = GF_CALLOC (1, sizeof (*node), gf_fuse_mt_invalidate_node_t); + if (node == NULL) + return; +@@ -386,7 +396,11 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + fniio->off = 0; + fniio->len = -1; + +- inode = fuse_ino_to_inode (fuse_ino, this); ++ fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino, ++ uuid_utoa(inode->gfid)); ++ gf_log("glusterfs-fuse", GF_LOG_TRACE, ++ "INVALIDATE inode: %" PRIu64 "(gfid:%s)", fuse_ino, ++ uuid_utoa(inode->gfid)); + + pthread_mutex_lock (&priv->invalidate_mutex); + { +@@ -395,24 +409,23 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + } + pthread_mutex_unlock (&priv->invalidate_mutex); + +- gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %" PRIu64, +- fuse_ino); +- +- if (inode) { +- fuse_log_eh (this, "Invalidated inode %" PRIu64 " (gfid: %s)", +- fuse_ino, uuid_utoa (inode->gfid)); +- } else { +- fuse_log_eh (this, "Invalidated inode %" PRIu64, fuse_ino); +- } +- +- if (inode) +- inode_unref (inode); + #else + gf_log ("glusterfs-fuse", GF_LOG_WARNING, +- "fuse_invalidate_inode not implemented on OS X due to missing FUSE notification"); ++ "fuse_invalidate_inode not implemented on this system"); + #endif ++ return; + } + ++#if FUSE_KERNEL_MINOR_VERSION >= 11 ++/* Need this function for the signature (inode_t *, instead of uint64_t) */ ++static int32_t ++fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) ++{ ++ fuse_invalidate_entry(this, (uint64_t)inode); ++ return 0; ++} ++#endif ++ + + int + send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error) +@@ -686,11 +699,14 @@ do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup) + { + inode_t *fuse_inode = fuse_ino_to_inode(nodeid, this); + ++ gf_log("fuse", GF_LOG_TRACE, ++ "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)", unique, ++ nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); ++ + fuse_log_eh(this, "%"PRIu64": FORGET %"PRIu64"/%"PRIu64" gfid: (%s)", + unique, nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); + +- inode_forget(fuse_inode, nlookup); +- inode_unref(fuse_inode); ++ inode_forget_with_unref(fuse_inode, nlookup); + } + + static void +@@ -705,10 +721,6 @@ fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg, + return; + } + +- gf_log ("glusterfs-fuse", GF_LOG_TRACE, +- "%"PRIu64": FORGET %"PRIu64"/%"PRIu64, +- finh->unique, finh->nodeid, ffi->nlookup); +- + do_forget(this, finh->unique, finh->nodeid, ffi->nlookup); + + GF_FREE (finh); +@@ -4940,7 +4952,9 @@ fuse_thread_proc (void *data) + fuse_in_header_t *finh = NULL; + struct iovec iov_in[2]; + void *msg = NULL; +- const size_t msg0_size = sizeof (*finh) + 128; ++ /* we need 512 extra buffer size for BATCH_FORGET fop. By tests, it is ++ found to be reduces 'REALLOC()' in the loop */ ++ const size_t msg0_size = sizeof (*finh) + 512; + fuse_handler_t **fuse_ops = NULL; + struct pollfd pfd[2] = {{0,}}; + +@@ -5283,7 +5297,12 @@ fuse_graph_setup (xlator_t *this, glusterfs_graph_t *graph) + goto unlock; + } + +- itable = inode_table_new (0, graph->top); ++#if FUSE_KERNEL_MINOR_VERSION >= 11 ++ itable = inode_table_with_invalidator(priv->lru_limit, graph->top, ++ fuse_inode_invalidate_fn, this); ++#else ++ itable = inode_table_new(0, graph->top); ++#endif + if (!itable) { + ret = -1; + goto unlock; +@@ -5740,6 +5759,8 @@ init (xlator_t *this_xl) + } + } + ++ GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit); ++ + GF_OPTION_INIT("event-history", priv->event_history, bool, + cleanup_exit); + +@@ -6061,5 +6082,13 @@ struct volume_options options[] = { + .max = 64, + .description = "Sets fuse reader thread count.", + }, ++ { ++ .key = {"lru-limit"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "131072", ++ .min = 0, ++ .description = "makes glusterfs invalidate kernel inodes after " ++ "reaching this limit (0 means 'unlimited')", ++ }, + { .key = {NULL} }, + }; +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 4ca76e9..4e32a7f 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -144,6 +144,9 @@ struct fuse_private { + gf_boolean_t mount_finished; + gf_boolean_t handle_graph_switch; + pthread_cond_t migrate_cond; ++ ++ /* LRU Limit, if not set, default is 128k for now */ ++ uint32_t lru_limit; + }; + typedef struct fuse_private fuse_private_t; + +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index 817619e..9a0404f 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -245,6 +245,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout"); + fi + ++ if [ -n "$lru_limit" ]; then ++ cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit"); ++ fi ++ + if [ -n "$bg_qlen" ]; then + cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); + fi +@@ -467,6 +471,9 @@ with_options() + "gid-timeout") + gid_timeout=$value + ;; ++ "lru-limit") ++ lru_limit=$value ++ ;; + "background-qlen") + bg_qlen=$value + ;; +-- +1.8.3.1 + diff --git a/0523-libglusterfs-rename-macros-roof-and-floor-to-not-con.patch b/0523-libglusterfs-rename-macros-roof-and-floor-to-not-con.patch new file mode 100644 index 0000000..5070d8a --- /dev/null +++ b/0523-libglusterfs-rename-macros-roof-and-floor-to-not-con.patch @@ -0,0 +1,225 @@ +From 60bd1dcd283b2d464968092711b658c12a900918 Mon Sep 17 00:00:00 2001 +From: Raghavendra Gowdappa +Date: Mon, 11 Feb 2019 16:42:48 +0530 +Subject: [PATCH 523/529] libglusterfs: rename macros roof and floor to not + conflict with math.h + +mainline: +> Change-Id: I666eeb63ebd000711b3f793b948d4e0c04b1a242 +> Signed-off-by: Raghavendra Gowdappa +> Updates: bz#1644629 +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/21703 + +Change-Id: I666eeb63ebd000711b3f793b948d4e0c04b1a242 +BUG: 1390151 +Signed-off-by: Raghavendra Gowdappa +Reviewed-on: https://code.engineering.redhat.com/gerrit/162678 +Tested-by: RHGS Build Bot +--- + libglusterfs/src/common-utils.h | 4 ++-- + rpc/rpc-transport/socket/src/socket.c | 2 +- + xlators/cluster/stripe/src/stripe.c | 18 +++++++++--------- + xlators/performance/io-cache/src/io-cache.c | 4 ++-- + xlators/performance/io-cache/src/page.c | 4 ++-- + xlators/performance/read-ahead/src/page.c | 4 ++-- + xlators/performance/read-ahead/src/read-ahead.c | 8 ++++---- + xlators/protocol/server/src/server-rpc-fops.c | 2 +- + 8 files changed, 23 insertions(+), 23 deletions(-) + +diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h +index 15a31a3..af2e0fd 100644 +--- a/libglusterfs/src/common-utils.h ++++ b/libglusterfs/src/common-utils.h +@@ -53,8 +53,8 @@ void trap (void); + + #define min(a,b) ((a)<(b)?(a):(b)) + #define max(a,b) ((a)>(b)?(a):(b)) +-#define roof(a,b) ((((a)+(b)-1)/((b)?(b):1))*(b)) +-#define floor(a,b) (((a)/((b)?(b):1))*(b)) ++#define gf_roof(a, b) ((((a) + (b) - 1)/((b) ? (b) : 1)) * (b)) ++#define gf_floor(a, b) (((a) / ((b) ? (b) : 1)) * (b)) + + #define IPv4_ADDR_SIZE 32 + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 34a937f..e28c5cd 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -1646,7 +1646,7 @@ __socket_read_accepted_successful_reply (rpc_transport_t *this) + + /* need to round off to proper roof (%4), as XDR packing pads + the end of opaque object with '0' */ +- size = roof (read_rsp.xdata.xdata_len, 4); ++ size = gf_roof (read_rsp.xdata.xdata_len, 4); + + if (!size) { + frag->call_body.reply.accepted_success_state +diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c +index 6b32f7f..fc809a0 100644 +--- a/xlators/cluster/stripe/src/stripe.c ++++ b/xlators/cluster/stripe/src/stripe.c +@@ -698,10 +698,10 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + * to the size of the previous stripe. + */ + if (i < eof_idx) +- tmp_offset = roof(offset, fctx->stripe_size * ++ tmp_offset = gf_roof(offset, fctx->stripe_size * + fctx->stripe_count); + else if (i > eof_idx) +- tmp_offset = floor(offset, fctx->stripe_size * ++ tmp_offset = gf_floor(offset, fctx->stripe_size * + fctx->stripe_count); + else + tmp_offset = offset; +@@ -3067,10 +3067,10 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d + + if (fctx->stripe_coalesce) { + if (i < eof_idx) +- tmp_offset = roof(offset, fctx->stripe_size * ++ tmp_offset = gf_roof(offset, fctx->stripe_size * + fctx->stripe_count); + else if (i > eof_idx) +- tmp_offset = floor(offset, fctx->stripe_size * ++ tmp_offset = gf_floor(offset, fctx->stripe_size * + fctx->stripe_count); + else + tmp_offset = offset; +@@ -3476,8 +3476,8 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, + * the file is in which child node. Always '0-' part of + * the file resides in the first child. + */ +- rounded_start = floor (offset, stripe_size); +- rounded_end = roof (offset+size, stripe_size); ++ rounded_start = gf_floor (offset, stripe_size); ++ rounded_end = gf_roof (offset+size, stripe_size); + num_stripe = (rounded_end- rounded_start)/stripe_size; + + local = mem_get0 (this->local_pool); +@@ -3510,7 +3510,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, + goto err; + } + +- frame_size = min (roof (frame_offset+1, stripe_size), ++ frame_size = min (gf_roof (frame_offset+1, stripe_size), + (offset + size)) - frame_offset; + + rlocal->node_index = index - off_index; +@@ -3693,8 +3693,8 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + goto err; + } + +- rounded_start = floor(offset, stripe_size); +- rounded_end = roof(offset + total_size, stripe_size); ++ rounded_start = gf_floor(offset, stripe_size); ++ rounded_end = gf_roof(offset + total_size, stripe_size); + total_chunks = (rounded_end - rounded_start) / stripe_size; + local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies), + gf_stripe_mt_stripe_replies); +diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c +index d7b3b37..5ef77b0 100644 +--- a/xlators/performance/io-cache/src/io-cache.c ++++ b/xlators/performance/io-cache/src/io-cache.c +@@ -953,8 +953,8 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, + local = frame->local; + table = ioc_inode->table; + +- rounded_offset = floor (offset, table->page_size); +- rounded_end = roof (offset + size, table->page_size); ++ rounded_offset = gf_floor (offset, table->page_size); ++ rounded_end = gf_roof (offset + size, table->page_size); + trav_offset = rounded_offset; + + /* once a frame does read, it should be waiting on something */ +diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c +index 50f5e19..832c4ee 100644 +--- a/xlators/performance/io-cache/src/page.c ++++ b/xlators/performance/io-cache/src/page.c +@@ -43,7 +43,7 @@ __ioc_page_get (ioc_inode_t *ioc_inode, off_t offset) + table = ioc_inode->table; + GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out); + +- rounded_offset = floor (offset, table->page_size); ++ rounded_offset = gf_floor (offset, table->page_size); + + page = rbthash_get (ioc_inode->cache.page_table, &rounded_offset, + sizeof (rounded_offset)); +@@ -256,7 +256,7 @@ __ioc_page_create (ioc_inode_t *ioc_inode, off_t offset) + table = ioc_inode->table; + GF_VALIDATE_OR_GOTO ("io-cache", table, out); + +- rounded_offset = floor (offset, table->page_size); ++ rounded_offset = gf_floor (offset, table->page_size); + + newpage = GF_CALLOC (1, sizeof (*newpage), gf_ioc_mt_ioc_newpage_t); + if (newpage == NULL) { +diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c +index 17e346e..8a5ce58 100644 +--- a/xlators/performance/read-ahead/src/page.c ++++ b/xlators/performance/read-ahead/src/page.c +@@ -25,7 +25,7 @@ ra_page_get (ra_file_t *file, off_t offset) + GF_VALIDATE_OR_GOTO ("read-ahead", file, out); + + page = file->pages.next; +- rounded_offset = floor (offset, file->page_size); ++ rounded_offset = gf_floor (offset, file->page_size); + + while (page != &file->pages && page->offset < rounded_offset) + page = page->next; +@@ -48,7 +48,7 @@ ra_page_create (ra_file_t *file, off_t offset) + GF_VALIDATE_OR_GOTO ("read-ahead", file, out); + + page = file->pages.next; +- rounded_offset = floor (offset, file->page_size); ++ rounded_offset = gf_floor (offset, file->page_size); + + while (page != &file->pages && page->offset < rounded_offset) + page = page->next; +diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c +index e02ca9f..8adbd53 100644 +--- a/xlators/performance/read-ahead/src/read-ahead.c ++++ b/xlators/performance/read-ahead/src/read-ahead.c +@@ -283,7 +283,7 @@ read_ahead (call_frame_t *frame, ra_file_t *file) + } + + ra_size = file->page_size * file->page_count; +- ra_offset = floor (file->offset, file->page_size); ++ ra_offset = gf_floor (file->offset, file->page_size); + cap = file->size ? file->size : file->offset + ra_size; + + while (ra_offset < min (file->offset + ra_size, cap)) { +@@ -372,8 +372,8 @@ dispatch_requests (call_frame_t *frame, ra_file_t *file) + local = frame->local; + conf = file->conf; + +- rounded_offset = floor (local->offset, file->page_size); +- rounded_end = roof (local->offset + local->size, file->page_size); ++ rounded_offset = gf_floor (local->offset, file->page_size); ++ rounded_end = gf_roof (local->offset + local->size, file->page_size); + + trav_offset = rounded_offset; + +@@ -532,7 +532,7 @@ ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + + dispatch_requests (frame, file); + +- flush_region (frame, file, 0, floor (offset, file->page_size), 0); ++ flush_region (frame, file, 0, gf_floor (offset, file->page_size), 0); + + read_ahead (frame, file); + +diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c +index 35d0887..b7bb26a 100644 +--- a/xlators/protocol/server/src/server-rpc-fops.c ++++ b/xlators/protocol/server/src/server-rpc-fops.c +@@ -4123,7 +4123,7 @@ server3_3_writev_vecsizer (int state, ssize_t *readsize, char *base_addr, + + /* need to round off to proper roof (%4), as XDR packing pads + the end of opaque object with '0' */ +- size = roof (write_req.xdata.xdata_len, 4); ++ size = gf_roof (write_req.xdata.xdata_len, 4); + + *readsize = size; + +-- +1.8.3.1 + diff --git a/0524-program-GF-DUMP-Shield-ping-processing-from-traffic-.patch b/0524-program-GF-DUMP-Shield-ping-processing-from-traffic-.patch new file mode 100644 index 0000000..46de12e --- /dev/null +++ b/0524-program-GF-DUMP-Shield-ping-processing-from-traffic-.patch @@ -0,0 +1,266 @@ +From 3de9cc04cdf5a65825cc86c8239734a284775470 Mon Sep 17 00:00:00 2001 +From: Raghavendra G +Date: Wed, 6 Feb 2019 17:30:30 +0530 +Subject: [PATCH 524/529] program/GF-DUMP: Shield ping processing from traffic + to Glusterfs Program + +Since poller thread bears the brunt of execution till the request is +handed over to io-threads, poller thread experiencies lock +contention(s) in the control flow till io-threads, which slows it +down. This delay invariably affects reading ping requests from network +and responding to them, resulting in increased ping latencies, which +sometimes results in a ping-timer-expiry on client leading to +disconnect of transport. So, this patch aims to free up poller thread +from executing code of Glusterfs Program. We do this by making + +* Glusterfs Program registering itself asking rpcsvc to execute its + actors in its own threads. +* GF-DUMP Program registering itself asking rpcsvc to _NOT_ execute + its actors in its own threads. Otherwise program's ownthreads become + bottleneck in processing ping traffic. This means that poller thread + reads a ping packet, invokes its actor and hands the response msg to + transport queue. + +Change-Id: I526268c10bdd5ef93f322a4f95385137550a6a49 +Signed-off-by: Raghavendra G +BUG: 1390151 +Reviewed-on: https://review.gluster.org/17105 +NetBSD-regression: NetBSD Build System +CentOS-regression: Gluster Build System +Smoke: Gluster Build System +Reviewed-by: Amar Tumballi +Reviewed-by: Jeff Darcy +(cherry picked from commit 2e72b24707f1886833db0b09e48b3f48b8d68d37) +Reviewed-on: https://code.engineering.redhat.com/gerrit/162426 +Tested-by: RHGS Build Bot +--- + rpc/rpc-lib/src/rpcsvc.c | 90 ++++++++++++++++++++++++++- + rpc/rpc-lib/src/rpcsvc.h | 18 +++++- + xlators/protocol/server/src/server-helpers.c | 4 -- + xlators/protocol/server/src/server-rpc-fops.c | 1 + + 4 files changed, 106 insertions(+), 7 deletions(-) + +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 695e9fb..faa1956 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -304,6 +304,7 @@ rpcsvc_program_actor (rpcsvc_request_t *req) + goto err; + } + ++ req->ownthread = program->ownthread; + req->synctask = program->synctask; + + err = SUCCESS; +@@ -411,6 +412,7 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans, + req->progver = rpc_call_progver (callmsg); + req->procnum = rpc_call_progproc (callmsg); + req->trans = rpc_transport_ref (trans); ++ gf_client_ref (req->trans->xl_private); + req->count = msg->count; + req->msg[0] = progmsg; + req->iobref = iobref_ref (msg->iobref); +@@ -426,6 +428,7 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans, + req->trans_private = msg->private; + + INIT_LIST_HEAD (&req->txlist); ++ INIT_LIST_HEAD (&req->request_list); + req->payloadsize = 0; + + /* By this time, the data bytes for the auth scheme would have already +@@ -576,7 +579,7 @@ rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans, + rpcsvc_request_t *req = NULL; + int ret = -1; + uint16_t port = 0; +- gf_boolean_t is_unix = _gf_false; ++ gf_boolean_t is_unix = _gf_false, empty = _gf_false; + gf_boolean_t unprivileged = _gf_false; + drc_cached_op_t *reply = NULL; + rpcsvc_drc_globals_t *drc = NULL; +@@ -692,6 +695,20 @@ rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans, + (synctask_fn_t) actor_fn, + rpcsvc_check_and_reply_error, NULL, + req); ++ } else if (req->ownthread) { ++ pthread_mutex_lock (&req->prog->queue_lock); ++ { ++ empty = list_empty (&req->prog->request_queue); ++ ++ list_add_tail (&req->request_list, ++ &req->prog->request_queue); ++ ++ if (empty) ++ pthread_cond_signal (&req->prog->queue_cond); ++ } ++ pthread_mutex_unlock (&req->prog->queue_lock); ++ ++ ret = 0; + } else { + ret = actor_fn (req); + } +@@ -1572,6 +1589,12 @@ rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t *program) + " Ver: %d, Port: %d", prog->progname, prog->prognum, + prog->progver, prog->progport); + ++ if (prog->ownthread) { ++ prog->alive = _gf_false; ++ ret = 0; ++ goto out; ++ } ++ + pthread_mutex_lock (&svc->rpclock); + { + list_del_init (&prog->program); +@@ -1838,6 +1861,56 @@ out: + return ret; + } + ++void * ++rpcsvc_request_handler (void *arg) ++{ ++ rpcsvc_program_t *program = arg; ++ rpcsvc_request_t *req = NULL; ++ rpcsvc_actor_t *actor = NULL; ++ gf_boolean_t done = _gf_false; ++ int ret = 0; ++ ++ if (!program) ++ return NULL; ++ ++ while (1) { ++ pthread_mutex_lock (&program->queue_lock); ++ { ++ if (!program->alive ++ && list_empty (&program->request_queue)) { ++ done = 1; ++ goto unlock; ++ } ++ ++ while (list_empty (&program->request_queue)) ++ pthread_cond_wait (&program->queue_cond, ++ &program->queue_lock); ++ ++ req = list_entry (program->request_queue.next, ++ typeof (*req), request_list); ++ ++ list_del_init (&req->request_list); ++ } ++ unlock: ++ pthread_mutex_unlock (&program->queue_lock); ++ ++ if (done) ++ break; ++ ++ THIS = req->svc->xl; ++ ++ actor = rpcsvc_program_actor (req); ++ ++ ret = actor->actor (req); ++ ++ if (ret != 0) { ++ rpcsvc_check_and_reply_error (ret, NULL, req); ++ } ++ } ++ ++ return NULL; ++} ++ + int + rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program) + { +@@ -1878,6 +1951,21 @@ rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program) + memcpy (newprog, program, sizeof (*program)); + + INIT_LIST_HEAD (&newprog->program); ++ INIT_LIST_HEAD (&newprog->request_queue); ++ pthread_mutex_init (&newprog->queue_lock, NULL); ++ pthread_cond_init (&newprog->queue_cond, NULL); ++ ++ newprog->alive = _gf_true; ++ ++ /* make sure synctask gets priority over ownthread */ ++ if (newprog->synctask) ++ newprog->ownthread = _gf_false; ++ ++ if (newprog->ownthread) { ++ gf_thread_create (&newprog->thread, NULL, ++ rpcsvc_request_handler, ++ newprog, "reqhnd"); ++ } + + pthread_mutex_lock (&svc->rpclock); + { +diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h +index d3aafac..58c0055 100644 +--- a/rpc/rpc-lib/src/rpcsvc.h ++++ b/rpc/rpc-lib/src/rpcsvc.h +@@ -233,7 +233,9 @@ struct rpcsvc_request { + */ + rpcsvc_auth_data_t verf; + +- /* Execute this request's actor function as a synctask?*/ ++ /* Execute this request's actor function in ownthread of program?*/ ++ gf_boolean_t ownthread; ++ + gf_boolean_t synctask; + /* Container for a RPC program wanting to store a temp + * request-specific item. +@@ -245,6 +247,10 @@ struct rpcsvc_request { + + /* pointer to cached reply for use in DRC */ + drc_cached_op_t *reply; ++ ++ /* request queue in rpcsvc */ ++ struct list_head request_list; ++ + }; + + #define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog)) +@@ -395,10 +401,18 @@ struct rpcsvc_program { + */ + int min_auth; + +- /* Execute actor function as a synctask? */ ++ /* Execute actor function in program's own thread? */ ++ /* This will reduce the workload on poller threads */ ++ gf_boolean_t ownthread; ++ gf_boolean_t alive; ++ + gf_boolean_t synctask; + /* list member to link to list of registered services with rpcsvc */ + struct list_head program; ++ struct list_head request_queue; ++ pthread_mutex_t queue_lock; ++ pthread_cond_t queue_cond; ++ pthread_t thread; + }; + + typedef struct rpcsvc_cbk_program { +diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c +index 30045ef..7cc3d15 100644 +--- a/xlators/protocol/server/src/server-helpers.c ++++ b/xlators/protocol/server/src/server-helpers.c +@@ -557,10 +557,6 @@ get_frame_from_request (rpcsvc_request_t *req) + } + } + +- /* Add a ref for this fop */ +- if (client) +- gf_client_ref (client); +- + frame->root->uid = req->uid; + frame->root->gid = req->gid; + frame->root->pid = req->pid; +diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c +index b7bb26a..db4242d 100644 +--- a/xlators/protocol/server/src/server-rpc-fops.c ++++ b/xlators/protocol/server/src/server-rpc-fops.c +@@ -6143,4 +6143,5 @@ struct rpcsvc_program glusterfs3_3_fop_prog = { + .progver = GLUSTER_FOP_VERSION, + .numactors = GLUSTER_FOP_PROCCNT, + .actors = glusterfs3_3_fop_actors, ++ .ownthread = _gf_true, + }; +-- +1.8.3.1 + diff --git a/0525-rpcsvc-provide-each-request-handler-thread-its-own-q.patch b/0525-rpcsvc-provide-each-request-handler-thread-its-own-q.patch new file mode 100644 index 0000000..a0dc399 --- /dev/null +++ b/0525-rpcsvc-provide-each-request-handler-thread-its-own-q.patch @@ -0,0 +1,1211 @@ +From 667e92a8dd0a21902cef39a59bc6c6b77d1f3c26 Mon Sep 17 00:00:00 2001 +From: Raghavendra Gowdappa +Date: Mon, 11 Feb 2019 12:32:52 +0530 +Subject: [PATCH 525/529] rpcsvc: provide each request handler thread its own + queue + +A single global per program queue is contended by all request handler +threads and event threads. This can lead to high contention. So, +reduce the contention by providing each request handler thread its own +private queue. + +Thanks to "Manoj Pillai" for the idea of pairing a +single queue with a fixed request-handler-thread and event-thread, +which brought down the performance regression due to overhead of +queuing significantly. + +Thanks to "Xavi Hernandez" for discussion on +how to communicate the event-thread death to request-handler-thread. + +Thanks to "Karan Sandha" for voluntarily running +the perf benchmarks to qualify that performance regression introduced +by ping-timer-fixes is fixed with this patch and patiently running +many iterations of regression tests while RCAing the issue. + +Thanks to "Milind Changire" for patiently running +the many iterations of perf benchmarking tests while RCAing the +regression caused by ping-timer-expiry fixes. + +Change-Id: I578c3fc67713f4234bd3abbec5d3fbba19059ea5 +BUG: 1390151 +Signed-off-by: Raghavendra Gowdappa +(cherry picked from commit 95e380eca19b9f0d03a53429535f15556e5724ad) +Reviewed-on: https://code.engineering.redhat.com/gerrit/162427 +Tested-by: RHGS Build Bot +--- + cli/src/cli-rl.c | 4 +- + libglusterfs/src/event-epoll.c | 156 +++++++++--- + libglusterfs/src/event-poll.c | 14 +- + libglusterfs/src/event.c | 11 +- + libglusterfs/src/event.h | 19 +- + rpc/rpc-lib/src/rpc-clnt.c | 6 + + rpc/rpc-lib/src/rpc-transport.c | 4 + + rpc/rpc-lib/src/rpc-transport.h | 3 + + rpc/rpc-lib/src/rpcsvc.c | 339 +++++++++++++++++++++++---- + rpc/rpc-lib/src/rpcsvc.h | 32 ++- + rpc/rpc-transport/socket/src/socket.c | 29 ++- + xlators/protocol/server/src/server-helpers.c | 4 + + xlators/protocol/server/src/server.c | 3 + + 13 files changed, 530 insertions(+), 94 deletions(-) + +diff --git a/cli/src/cli-rl.c b/cli/src/cli-rl.c +index 4745cf4..cffd0a8 100644 +--- a/cli/src/cli-rl.c ++++ b/cli/src/cli-rl.c +@@ -109,7 +109,7 @@ cli_rl_process_line (char *line) + + int + cli_rl_stdin (int fd, int idx, int gen, void *data, +- int poll_out, int poll_in, int poll_err) ++ int poll_out, int poll_in, int poll_err, char event_thread_died) + { + struct cli_state *state = NULL; + +@@ -394,7 +394,7 @@ cli_rl_enable (struct cli_state *state) + } + + ret = event_register (state->ctx->event_pool, 0, cli_rl_stdin, state, +- 1, 0); ++ 1, 0, 0); + if (ret == -1) + goto out; + +diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c +index 7fc53ff..310bce3 100644 +--- a/libglusterfs/src/event-epoll.c ++++ b/libglusterfs/src/event-epoll.c +@@ -32,6 +32,7 @@ struct event_slot_epoll { + int fd; + int events; + int gen; ++ int idx; + int ref; + int do_close; + int in_handler; +@@ -39,6 +40,7 @@ struct event_slot_epoll { + void *data; + event_handler_t handler; + gf_lock_t lock; ++ struct list_head poller_death; + }; + + struct event_thread_data { +@@ -60,6 +62,7 @@ __event_newtable (struct event_pool *event_pool, int table_idx) + for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { + table[i].fd = -1; + LOCK_INIT (&table[i].lock); ++ INIT_LIST_HEAD(&table[i].poller_death); + } + + event_pool->ereg[table_idx] = table; +@@ -70,7 +73,8 @@ __event_newtable (struct event_pool *event_pool, int table_idx) + + + static int +-__event_slot_alloc (struct event_pool *event_pool, int fd) ++__event_slot_alloc (struct event_pool *event_pool, int fd, ++ char notify_poller_death) + { + int i = 0; + int table_idx = -1; +@@ -105,34 +109,42 @@ __event_slot_alloc (struct event_pool *event_pool, int fd) + + table_idx = i; + +- for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { +- if (table[i].fd == -1) { +- /* wipe everything except bump the generation */ +- gen = table[i].gen; +- memset (&table[i], 0, sizeof (table[i])); +- table[i].gen = gen + 1; +- +- LOCK_INIT (&table[i].lock); ++ for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { ++ if (table[i].fd == -1) { ++ /* wipe everything except bump the generation */ ++ gen = table[i].gen; ++ memset (&table[i], 0, sizeof (table[i])); ++ table[i].gen = gen + 1; ++ ++ LOCK_INIT (&table[i].lock); ++ INIT_LIST_HEAD(&table[i].poller_death); ++ ++ table[i].fd = fd; ++ if (notify_poller_death) { ++ table[i].idx = table_idx * EVENT_EPOLL_SLOTS + i; ++ list_add_tail(&table[i].poller_death, ++ &event_pool->poller_death); ++ } + +- table[i].fd = fd; +- event_pool->slots_used[table_idx]++; ++ event_pool->slots_used[table_idx]++; + +- break; +- } +- } ++ break; ++ } ++ } + + return table_idx * EVENT_EPOLL_SLOTS + i; + } + + + static int +-event_slot_alloc (struct event_pool *event_pool, int fd) ++event_slot_alloc (struct event_pool *event_pool, int fd, ++ char notify_poller_death) + { + int idx = -1; + + pthread_mutex_lock (&event_pool->mutex); + { +- idx = __event_slot_alloc (event_pool, fd); ++ idx = __event_slot_alloc (event_pool, fd, notify_poller_death); + } + pthread_mutex_unlock (&event_pool->mutex); + +@@ -162,6 +174,7 @@ __event_slot_dealloc (struct event_pool *event_pool, int idx) + slot->fd = -1; + slot->handled_error = 0; + slot->in_handler = 0; ++ list_del_init(&slot->poller_death); + event_pool->slots_used[table_idx]--; + + return; +@@ -180,6 +193,23 @@ event_slot_dealloc (struct event_pool *event_pool, int idx) + return; + } + ++static int ++event_slot_ref(struct event_slot_epoll *slot) ++{ ++ int ref; ++ ++ if (!slot) ++ return -1; ++ ++ LOCK (&slot->lock); ++ { ++ slot->ref++; ++ ref = slot->ref; ++ } ++ UNLOCK (&slot->lock); ++ ++ return ref; ++} + + static struct event_slot_epoll * + event_slot_get (struct event_pool *event_pool, int idx) +@@ -198,15 +228,44 @@ event_slot_get (struct event_pool *event_pool, int idx) + + slot = &table[offset]; + ++ event_slot_ref (slot); ++ return slot; ++} ++ ++static void ++__event_slot_unref(struct event_pool *event_pool, struct event_slot_epoll *slot, ++ int idx) ++{ ++ int ref = -1; ++ int fd = -1; ++ int do_close = 0; ++ + LOCK (&slot->lock); + { +- slot->ref++; ++ --(slot->ref); ++ ref = slot->ref; + } + UNLOCK (&slot->lock); + +- return slot; +-} ++ if (ref) ++ /* slot still alive */ ++ goto done; ++ ++ LOCK(&slot->lock); ++ { ++ fd = slot->fd; ++ do_close = slot->do_close; ++ slot->do_close = 0; ++ } ++ UNLOCK(&slot->lock); ++ ++ __event_slot_dealloc(event_pool, idx); + ++ if (do_close) ++ sys_close(fd); ++done: ++ return; ++} + + static void + event_slot_unref (struct event_pool *event_pool, struct event_slot_epoll *slot, +@@ -264,7 +323,7 @@ event_pool_new_epoll (int count, int eventthreadcount) + event_pool->fd = epfd; + + event_pool->count = count; +- ++ INIT_LIST_HEAD(&event_pool->poller_death); + event_pool->eventthreadcount = eventthreadcount; + event_pool->auto_thread_count = 0; + +@@ -315,7 +374,8 @@ __slot_update_events (struct event_slot_epoll *slot, int poll_in, int poll_out) + int + event_register_epoll (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out) ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death) + { + int idx = -1; + int ret = -1; +@@ -345,7 +405,7 @@ event_register_epoll (struct event_pool *event_pool, int fd, + if (destroy == 1) + goto out; + +- idx = event_slot_alloc (event_pool, fd); ++ idx = event_slot_alloc (event_pool, fd, notify_poller_death); + if (idx == -1) { + gf_msg ("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, + "could not find slot for fd=%d", fd); +@@ -583,7 +643,7 @@ pre_unlock: + ret = handler (fd, idx, gen, data, + (event->events & (EPOLLIN|EPOLLPRI)), + (event->events & (EPOLLOUT)), +- (event->events & (EPOLLERR|EPOLLHUP))); ++ (event->events & (EPOLLERR|EPOLLHUP)), 0); + } + out: + event_slot_unref (event_pool, slot, idx); +@@ -600,7 +660,10 @@ event_dispatch_epoll_worker (void *data) + struct event_thread_data *ev_data = data; + struct event_pool *event_pool; + int myindex = -1; +- int timetodie = 0; ++ int timetodie = 0, gen = 0; ++ struct list_head poller_death_notify; ++ struct event_slot_epoll *slot = NULL, *tmp = NULL; ++ + + GF_VALIDATE_OR_GOTO ("event", ev_data, out); + +@@ -610,7 +673,7 @@ event_dispatch_epoll_worker (void *data) + GF_VALIDATE_OR_GOTO ("event", event_pool, out); + + gf_msg ("epoll", GF_LOG_INFO, 0, LG_MSG_STARTED_EPOLL_THREAD, "Started" +- " thread with index %d", myindex); ++ " thread with index %d", myindex - 1); + + pthread_mutex_lock (&event_pool->mutex); + { +@@ -627,21 +690,58 @@ event_dispatch_epoll_worker (void *data) + * reconfigured always */ + pthread_mutex_lock (&event_pool->mutex); + { +- if (event_pool->eventthreadcount < +- myindex) { ++ if (event_pool->eventthreadcount < myindex) { ++ while (event_pool->poller_death_sliced) { ++ pthread_cond_wait( ++ &event_pool->cond, ++ &event_pool->mutex); ++ } ++ ++ INIT_LIST_HEAD(&poller_death_notify); ++ + /* if found true in critical section, + * die */ + event_pool->pollers[myindex - 1] = 0; + event_pool->activethreadcount--; + timetodie = 1; ++ gen = ++event_pool->poller_gen; ++ list_for_each_entry(slot, &event_pool->poller_death, ++ poller_death) ++ { ++ event_slot_ref(slot); ++ } ++ ++ list_splice_init(&event_pool->poller_death, ++ &poller_death_notify); ++ event_pool->poller_death_sliced = 1; ++ + pthread_cond_broadcast (&event_pool->cond); + } + } + pthread_mutex_unlock (&event_pool->mutex); + if (timetodie) { ++ list_for_each_entry(slot, &poller_death_notify, poller_death) ++ { ++ slot->handler(slot->fd, 0, gen, slot->data, 0, 0, 0, 1); ++ } ++ ++ pthread_mutex_lock(&event_pool->mutex); ++ { ++ list_for_each_entry_safe(slot, tmp, &poller_death_notify, poller_death) ++ { ++ __event_slot_unref(event_pool, slot, slot->idx); ++ } ++ ++ list_splice(&poller_death_notify, ++ &event_pool->poller_death); ++ event_pool->poller_death_sliced = 0; ++ pthread_cond_broadcast(&event_pool->cond); ++ } ++ pthread_mutex_unlock(&event_pool->mutex); ++ + gf_msg ("epoll", GF_LOG_INFO, 0, + LG_MSG_EXITED_EPOLL_THREAD, "Exited " +- "thread with index %d", myindex); ++ "thread with index %d", myindex - 1); + goto out; + } + } +diff --git a/libglusterfs/src/event-poll.c b/libglusterfs/src/event-poll.c +index 3bffc47..ca00071 100644 +--- a/libglusterfs/src/event-poll.c ++++ b/libglusterfs/src/event-poll.c +@@ -36,12 +36,14 @@ struct event_slot_poll { + static int + event_register_poll (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out); ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death); + + + static int + __flush_fd (int fd, int idx, int gen, void *data, +- int poll_in, int poll_out, int poll_err) ++ int poll_in, int poll_out, int poll_err, ++ char notify_poller_death) + { + char buf[64]; + int ret = -1; +@@ -153,7 +155,7 @@ event_pool_new_poll (int count, int eventthreadcount) + } + + ret = event_register_poll (event_pool, event_pool->breaker[0], +- __flush_fd, NULL, 1, 0); ++ __flush_fd, NULL, 1, 0, 0); + if (ret == -1) { + gf_msg ("poll", GF_LOG_ERROR, 0, LG_MSG_REGISTER_PIPE_FAILED, + "could not register pipe fd with poll event loop"); +@@ -180,7 +182,8 @@ event_pool_new_poll (int count, int eventthreadcount) + static int + event_register_poll (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out) ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death) + { + int idx = -1; + +@@ -389,7 +392,8 @@ unlock: + ret = handler (ufds[i].fd, idx, 0, data, + (ufds[i].revents & (POLLIN|POLLPRI)), + (ufds[i].revents & (POLLOUT)), +- (ufds[i].revents & (POLLERR|POLLHUP|POLLNVAL))); ++ (ufds[i].revents & (POLLERR|POLLHUP|POLLNVAL)), ++ 0); + + return ret; + } +diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c +index bba6f84..8463c19 100644 +--- a/libglusterfs/src/event.c ++++ b/libglusterfs/src/event.c +@@ -58,14 +58,16 @@ event_pool_new (int count, int eventthreadcount) + int + event_register (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out) ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death) + { + int ret = -1; + + GF_VALIDATE_OR_GOTO ("event", event_pool, out); + + ret = event_pool->ops->event_register (event_pool, fd, handler, data, +- poll_in, poll_out); ++ poll_in, poll_out, ++ notify_poller_death); + out: + return ret; + } +@@ -170,7 +172,8 @@ out: + + int + poller_destroy_handler (int fd, int idx, int gen, void *data, +- int poll_out, int poll_in, int poll_err) ++ int poll_out, int poll_in, int poll_err, ++ char event_thread_exit) + { + struct event_destroy_data *destroy = NULL; + int readfd = -1, ret = -1; +@@ -239,7 +242,7 @@ event_dispatch_destroy (struct event_pool *event_pool) + /* From the main thread register an event on the pipe fd[0], + */ + idx = event_register (event_pool, fd[0], poller_destroy_handler, +- &data, 1, 0); ++ &data, 1, 0, 0); + if (idx < 0) + goto out; + +diff --git a/libglusterfs/src/event.h b/libglusterfs/src/event.h +index c60b14a..875cd7d 100644 +--- a/libglusterfs/src/event.h ++++ b/libglusterfs/src/event.h +@@ -12,6 +12,7 @@ + #define _EVENT_H_ + + #include ++#include "list.h" + + struct event_pool; + struct event_ops; +@@ -24,7 +25,8 @@ struct event_data { + + + typedef int (*event_handler_t) (int fd, int idx, int gen, void *data, +- int poll_in, int poll_out, int poll_err); ++ int poll_in, int poll_out, int poll_err, ++ char event_thread_exit); + + #define EVENT_EPOLL_TABLES 1024 + #define EVENT_EPOLL_SLOTS 1024 +@@ -41,6 +43,13 @@ struct event_pool { + struct event_slot_epoll *ereg[EVENT_EPOLL_TABLES]; + int slots_used[EVENT_EPOLL_TABLES]; + ++ struct list_head poller_death; ++ int poller_death_sliced; /* track whether the list of fds interested ++ * poller_death is sliced. If yes, new thread ++ * death notification has to wait till the ++ * list is added back ++ */ ++ int poller_gen; + int used; + int changed; + +@@ -54,7 +63,7 @@ struct event_pool { + * epoll. */ + int eventthreadcount; /* number of event threads to execute. */ + pthread_t pollers[EVENT_MAX_THREADS]; /* poller thread_id store, +- * and live status */ ++ * and live status */ + int destroy; + int activethreadcount; + +@@ -83,7 +92,8 @@ struct event_ops { + + int (*event_register) (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out); ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death); + + int (*event_select_on) (struct event_pool *event_pool, int fd, int idx, + int poll_in, int poll_out); +@@ -107,7 +117,8 @@ int event_select_on (struct event_pool *event_pool, int fd, int idx, + int poll_in, int poll_out); + int event_register (struct event_pool *event_pool, int fd, + event_handler_t handler, +- void *data, int poll_in, int poll_out); ++ void *data, int poll_in, int poll_out, ++ char notify_poller_death); + int event_unregister (struct event_pool *event_pool, int fd, int idx); + int event_unregister_close (struct event_pool *event_pool, int fd, int idx); + int event_dispatch (struct event_pool *event_pool); +diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c +index fd7e3ec..fe5e3fd 100644 +--- a/rpc/rpc-lib/src/rpc-clnt.c ++++ b/rpc/rpc-lib/src/rpc-clnt.c +@@ -1013,6 +1013,12 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, + */ + ret = 0; + break; ++ ++ case RPC_TRANSPORT_EVENT_THREAD_DIED: ++ /* only meaningful on a server, no need of handling this event on a ++ * client */ ++ ret = 0; ++ break; + } + + out: +diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c +index b737ff2..db02338 100644 +--- a/rpc/rpc-lib/src/rpc-transport.c ++++ b/rpc/rpc-lib/src/rpc-transport.c +@@ -294,6 +294,10 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name) + goto fail; + } + ++ if (dict_get(options, "notify-poller-death")) { ++ trans->notify_poller_death = 1; ++ } ++ + gf_log ("rpc-transport", GF_LOG_DEBUG, + "attempt to load file %s", name); + +diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h +index c97f98d..cf77c9d 100644 +--- a/rpc/rpc-lib/src/rpc-transport.h ++++ b/rpc/rpc-lib/src/rpc-transport.h +@@ -99,6 +99,7 @@ typedef enum { + RPC_TRANSPORT_MSG_RECEIVED, /* Complete rpc msg has been read */ + RPC_TRANSPORT_CONNECT, /* client is connected to server */ + RPC_TRANSPORT_MSG_SENT, ++ RPC_TRANSPORT_EVENT_THREAD_DIED /* event-thread has died */ + } rpc_transport_event_t; + + struct rpc_transport_msg { +@@ -218,6 +219,8 @@ struct rpc_transport { + */ + gf_boolean_t connect_failed; + gf_atomic_t disconnect_progress; ++ char notify_poller_death; ++ char poller_death_accept; + }; + + struct rpc_transport_ops { +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index faa1956..c769463 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -8,6 +8,7 @@ + cases as published by the Free Software Foundation. + */ + ++#include + #include "rpcsvc.h" + #include "rpc-transport.h" + #include "dict.h" +@@ -56,9 +57,76 @@ int + rpcsvc_notify (rpc_transport_t *trans, void *mydata, + rpc_transport_event_t event, void *data, ...); + ++void * ++rpcsvc_request_handler(void *arg); ++ + static int + rpcsvc_match_subnet_v4 (const char *addrtok, const char *ipaddr); + ++void ++rpcsvc_toggle_queue_status(rpcsvc_program_t *prog, ++ rpcsvc_request_queue_t *queue, char status[]) ++{ ++ int queue_index = 0, status_index = 0, set_bit = 0; ++ ++ if (queue != &prog->request_queue[0]) { ++ queue_index = (queue - &prog->request_queue[0]); ++ } ++ ++ status_index = queue_index / 8; ++ set_bit = queue_index % 8; ++ ++ status[status_index] ^= (1 << set_bit); ++ ++ return; ++} ++ ++static int ++get_rightmost_set_bit(int n) ++{ ++ return log2(n & -n); ++} ++ ++int ++rpcsvc_get_free_queue_index(rpcsvc_program_t *prog) ++{ ++ int queue_index = 0, max_index = 0, i = 0; ++ unsigned int right_most_unset_bit = 0; ++ ++ right_most_unset_bit = 8; ++ ++ max_index = gf_roof(EVENT_MAX_THREADS, 8) / 8; ++ for (i = 0; i < max_index; i++) { ++ if (prog->request_queue_status[i] == 0) { ++ right_most_unset_bit = 0; ++ break; ++ } else { ++ right_most_unset_bit = get_rightmost_set_bit( ++ ~prog->request_queue_status[i]); ++ if (right_most_unset_bit < 8) { ++ break; ++ } ++ } ++ } ++ ++ if (right_most_unset_bit > 7) { ++ queue_index = -1; ++ } else { ++ queue_index = i * 8; ++ queue_index += right_most_unset_bit; ++ ++ if (queue_index > EVENT_MAX_THREADS) { ++ queue_index = -1; ++ } ++ } ++ ++ if (queue_index != -1) { ++ prog->request_queue_status[i] |= (0x1 << right_most_unset_bit); ++ } ++ ++ return queue_index; ++} ++ + rpcsvc_notify_wrapper_t * + rpcsvc_notify_wrapper_alloc (void) + { +@@ -412,7 +480,6 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans, + req->progver = rpc_call_progver (callmsg); + req->procnum = rpc_call_progproc (callmsg); + req->trans = rpc_transport_ref (trans); +- gf_client_ref (req->trans->xl_private); + req->count = msg->count; + req->msg[0] = progmsg; + req->iobref = iobref_ref (msg->iobref); +@@ -570,6 +637,73 @@ rpcsvc_check_and_reply_error (int ret, call_frame_t *frame, void *opaque) + return 0; + } + ++void ++rpcsvc_queue_event_thread_death(rpcsvc_t *svc, rpcsvc_program_t *prog, int gen) ++{ ++ rpcsvc_request_queue_t *queue = NULL; ++ int num = 0; ++ void *value = NULL; ++ rpcsvc_request_t *req = NULL; ++ char empty = 0; ++ ++ value = pthread_getspecific(prog->req_queue_key); ++ if (value == NULL) { ++ return; ++ } ++ ++ num = ((unsigned long)value) - 1; ++ ++ queue = &prog->request_queue[num]; ++ ++ if (queue->gen == gen) { ++ /* duplicate event */ ++ gf_log(GF_RPCSVC, GF_LOG_INFO, ++ "not queuing duplicate event thread death. " ++ "queue %d program %s", ++ num, prog->progname); ++ return; ++ } ++ ++ rpcsvc_alloc_request(svc, req); ++ req->prognum = RPCSVC_INFRA_PROGRAM; ++ req->procnum = RPCSVC_PROC_EVENT_THREAD_DEATH; ++ gf_log(GF_RPCSVC, GF_LOG_INFO, ++ "queuing event thread death request to queue %d of program %s", num, ++ prog->progname); ++ ++ pthread_mutex_lock(&queue->queue_lock); ++ { ++ empty = list_empty(&queue->request_queue); ++ ++ list_add_tail(&req->request_list, &queue->request_queue); ++ queue->gen = gen; ++ ++ if (empty && queue->waiting) ++ pthread_cond_signal(&queue->queue_cond); ++ } ++ pthread_mutex_unlock(&queue->queue_lock); ++ ++ return; ++} ++ ++int ++rpcsvc_handle_event_thread_death(rpcsvc_t *svc, rpc_transport_t *trans, int gen) ++{ ++ rpcsvc_program_t *prog = NULL; ++ ++ pthread_mutex_lock (&svc->rpclock); ++ { ++ list_for_each_entry(prog, &svc->programs, program) ++ { ++ if (prog->ownthread) ++ rpcsvc_queue_event_thread_death(svc, prog, gen); ++ } ++ } ++ pthread_mutex_unlock (&svc->rpclock); ++ ++ return 0; ++} ++ + int + rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans, + rpc_transport_pollin_t *msg) +@@ -581,8 +715,12 @@ rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans, + uint16_t port = 0; + gf_boolean_t is_unix = _gf_false, empty = _gf_false; + gf_boolean_t unprivileged = _gf_false; ++ gf_boolean_t spawn_request_handler = _gf_false; + drc_cached_op_t *reply = NULL; + rpcsvc_drc_globals_t *drc = NULL; ++ rpcsvc_request_queue_t *queue = NULL; ++ long num = 0; ++ void *value = NULL; + + if (!trans || !svc) + return -1; +@@ -696,20 +834,83 @@ rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans, + rpcsvc_check_and_reply_error, NULL, + req); + } else if (req->ownthread) { +- pthread_mutex_lock (&req->prog->queue_lock); ++ value = pthread_getspecific(req->prog->req_queue_key); ++ if (value == NULL) { ++ pthread_mutex_lock(&req->prog->thr_lock); ++ { ++ num = rpcsvc_get_free_queue_index(req->prog); ++ if (num != -1) { ++ num++; ++ value = (void *)num; ++ ret = pthread_setspecific(req->prog->req_queue_key, ++ value); ++ if (ret < 0) { ++ gf_log(GF_RPCSVC, GF_LOG_WARNING, ++ "setting request queue in TLS failed"); ++ rpcsvc_toggle_queue_status( ++ req->prog, &req->prog->request_queue[num - 1], ++ req->prog->request_queue_status); ++ num = -1; ++ } else { ++ spawn_request_handler = 1; ++ } ++ } ++ } ++ pthread_mutex_unlock(&req->prog->thr_lock); ++ } ++ ++ if (num == -1) ++ goto noqueue; ++ ++ num = ((unsigned long)value) - 1; ++ ++ queue = &req->prog->request_queue[num]; ++ ++ if (spawn_request_handler) { ++ ret = gf_thread_create(&queue->thread, NULL, ++ rpcsvc_request_handler, queue, ++ "rpcrqhnd"); ++ if (!ret) { ++ gf_log(GF_RPCSVC, GF_LOG_INFO, ++ "spawned a request handler " ++ "thread for queue %d", ++ (int)num); ++ ++ req->prog->threadcount++; ++ } else { ++ gf_log(GF_RPCSVC, GF_LOG_INFO, ++ "spawning a request handler " ++ "thread for queue %d failed", ++ (int)num); ++ ret = pthread_setspecific(req->prog->req_queue_key, 0); ++ if (ret < 0) { ++ gf_log(GF_RPCSVC, GF_LOG_WARNING, ++ "resetting request " ++ "queue in TLS failed"); ++ } ++ ++ rpcsvc_toggle_queue_status( ++ req->prog, &req->prog->request_queue[num - 1], ++ req->prog->request_queue_status); ++ ++ goto noqueue; ++ } ++ } ++ ++ pthread_mutex_lock(&queue->queue_lock); + { +- empty = list_empty (&req->prog->request_queue); ++ empty = list_empty(&queue->request_queue); + +- list_add_tail (&req->request_list, +- &req->prog->request_queue); ++ list_add_tail(&req->request_list, &queue->request_queue); + +- if (empty) +- pthread_cond_signal (&req->prog->queue_cond); ++ if (empty && queue->waiting) ++ pthread_cond_signal(&queue->queue_cond); + } +- pthread_mutex_unlock (&req->prog->queue_lock); ++ pthread_mutex_unlock(&queue->queue_lock); + + ret = 0; + } else { ++noqueue: + ret = actor_fn (req); + } + } +@@ -838,6 +1039,12 @@ rpcsvc_notify (rpc_transport_t *trans, void *mydata, + "got MAP_XID event, which should have not come"); + ret = 0; + break; ++ ++ case RPC_TRANSPORT_EVENT_THREAD_DIED: ++ rpcsvc_handle_event_thread_death(svc, trans, ++ (int)(unsigned long)data); ++ ret = 0; ++ break; + } + + out: +@@ -1779,6 +1986,7 @@ rpcsvc_create_listeners (rpcsvc_t *svc, dict_t *options, char *name) + goto out; + } + ++ dict_del(options, "notify-poller-death"); + GF_FREE (transport_name); + transport_name = NULL; + count++; +@@ -1864,50 +2072,87 @@ out: + void * + rpcsvc_request_handler (void *arg) + { +- rpcsvc_program_t *program = arg; +- rpcsvc_request_t *req = NULL; ++ rpcsvc_request_queue_t *queue = NULL; ++ rpcsvc_program_t *program = NULL; ++ rpcsvc_request_t *req = NULL, *tmp_req = NULL; + rpcsvc_actor_t *actor = NULL; + gf_boolean_t done = _gf_false; + int ret = 0; ++ struct list_head tmp_list = { ++ 0, ++ }; ++ ++ queue = arg; ++ program = queue->program; ++ ++ INIT_LIST_HEAD(&tmp_list); + + if (!program) + return NULL; + + while (1) { +- pthread_mutex_lock (&program->queue_lock); ++ pthread_mutex_lock(&queue->queue_lock); + { +- if (!program->alive +- && list_empty (&program->request_queue)) { ++ if (!program->alive && list_empty(&queue->request_queue)) { + done = 1; + goto unlock; + } +- +- while (list_empty (&program->request_queue)) +- pthread_cond_wait (&program->queue_cond, +- &program->queue_lock); +- +- req = list_entry (program->request_queue.next, +- typeof (*req), request_list); +- +- list_del_init (&req->request_list); ++ while (list_empty(&queue->request_queue)) { ++ queue->waiting = _gf_true; ++ pthread_cond_wait(&queue->queue_cond, &queue->queue_lock); ++ } ++ queue->waiting = _gf_false; ++ if (!list_empty(&queue->request_queue)) { ++ INIT_LIST_HEAD(&tmp_list); ++ list_splice_init(&queue->request_queue, &tmp_list); ++ } ++ } ++unlock: ++ pthread_mutex_unlock(&queue->queue_lock); ++ list_for_each_entry_safe(req, tmp_req, &tmp_list, request_list) ++ { ++ list_del_init(&req->request_list); ++ if (req) { ++ if (req->prognum == RPCSVC_INFRA_PROGRAM) { ++ switch (req->procnum) { ++ case RPCSVC_PROC_EVENT_THREAD_DEATH: ++ gf_log(GF_RPCSVC, GF_LOG_INFO, ++ "event thread died, exiting request handler " ++ "thread for queue %d of program %s", ++ (int)(queue - &program->request_queue[0]), ++ program->progname); ++ done = 1; ++ ++ pthread_mutex_lock(&program->thr_lock); ++ { ++ rpcsvc_toggle_queue_status( ++ program, queue, ++ program->request_queue_status); ++ program->threadcount--; ++ } ++ pthread_mutex_unlock(&program->thr_lock); ++ rpcsvc_request_destroy(req); ++ break; ++ ++ default: ++ break; ++ } ++ } else { ++ THIS = req->svc->xl; ++ actor = rpcsvc_program_actor(req); ++ ret = actor->actor(req); ++ ++ if (ret != 0) { ++ rpcsvc_check_and_reply_error(ret, NULL, req); ++ } ++ ++ req = NULL; ++ } ++ } + } +- unlock: +- pthread_mutex_unlock (&program->queue_lock); +- + if (done) + break; +- +- THIS = req->svc->xl; +- +- actor = rpcsvc_program_actor (req); +- +- ret = actor->actor (req); +- +- if (ret != 0) { +- rpcsvc_check_and_reply_error (ret, NULL, req); +- } + } +- + return NULL; + } + +@@ -1917,6 +2162,7 @@ rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program) + int ret = -1; + rpcsvc_program_t *newprog = NULL; + char already_registered = 0; ++ int i = 0; + + if (!svc) { + goto out; +@@ -1951,9 +2197,16 @@ rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program) + memcpy (newprog, program, sizeof (*program)); + + INIT_LIST_HEAD (&newprog->program); +- INIT_LIST_HEAD (&newprog->request_queue); +- pthread_mutex_init (&newprog->queue_lock, NULL); +- pthread_cond_init (&newprog->queue_cond, NULL); ++ ++ for (i = 0; i < EVENT_MAX_THREADS; i++) { ++ INIT_LIST_HEAD(&newprog->request_queue[i].request_queue); ++ pthread_mutex_init(&newprog->request_queue[i].queue_lock, NULL); ++ pthread_cond_init(&newprog->request_queue[i].queue_cond, NULL); ++ newprog->request_queue[i].program = newprog; ++ } ++ ++ pthread_mutex_init(&newprog->thr_lock, NULL); ++ pthread_cond_init(&newprog->thr_cond, NULL); + + newprog->alive = _gf_true; + +@@ -1962,9 +2215,11 @@ rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program) + newprog->ownthread = _gf_false; + + if (newprog->ownthread) { +- gf_thread_create (&newprog->thread, NULL, +- rpcsvc_request_handler, +- newprog, "reqhnd"); ++ struct event_pool *ep = svc->ctx->event_pool; ++ newprog->eventthreadcount = ep->eventthreadcount; ++ ++ pthread_key_create(&newprog->req_queue_key, NULL); ++ newprog->thr_queue = 1; + } + + pthread_mutex_lock (&svc->rpclock); +diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h +index 58c0055..f500bab 100644 +--- a/rpc/rpc-lib/src/rpcsvc.h ++++ b/rpc/rpc-lib/src/rpcsvc.h +@@ -33,6 +33,16 @@ + #define MAX_IOVEC 16 + #endif + ++/* TODO: we should store prognums at a centralized location to avoid conflict ++ or use a robust random number generator to avoid conflicts ++*/ ++ ++#define RPCSVC_INFRA_PROGRAM 7712846 /* random number */ ++ ++typedef enum { ++ RPCSVC_PROC_EVENT_THREAD_DEATH = 0, ++} rpcsvc_infra_procnum_t; ++ + #define RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT 64 /* Default for protocol/server */ + #define RPCSVC_DEF_NFS_OUTSTANDING_RPC_LIMIT 16 /* Default for nfs/server */ + #define RPCSVC_MAX_OUTSTANDING_RPC_LIMIT 65536 +@@ -349,6 +359,16 @@ typedef struct rpcsvc_actor_desc { + drc_op_type_t op_type; + } rpcsvc_actor_t; + ++typedef struct rpcsvc_request_queue { ++ int gen; ++ struct list_head request_queue; ++ pthread_mutex_t queue_lock; ++ pthread_cond_t queue_cond; ++ pthread_t thread; ++ struct rpcsvc_program *program; ++ gf_boolean_t waiting; ++} rpcsvc_request_queue_t; ++ + /* Describes a program and its version along with the function pointers + * required to handle the procedures/actors of each program/version. + * Never changed ever by any thread so no need for a lock. +@@ -409,10 +429,14 @@ struct rpcsvc_program { + gf_boolean_t synctask; + /* list member to link to list of registered services with rpcsvc */ + struct list_head program; +- struct list_head request_queue; +- pthread_mutex_t queue_lock; +- pthread_cond_t queue_cond; +- pthread_t thread; ++ rpcsvc_request_queue_t request_queue[EVENT_MAX_THREADS]; ++ char request_queue_status[EVENT_MAX_THREADS / 8 + 1]; ++ pthread_mutex_t thr_lock; ++ pthread_cond_t thr_cond; ++ int thr_queue; ++ pthread_key_t req_queue_key; ++ int threadcount; ++ int eventthreadcount; + }; + + typedef struct rpcsvc_cbk_program { +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index e28c5cd..df984f8 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -2419,7 +2419,8 @@ static int socket_disconnect (rpc_transport_t *this, gf_boolean_t wait); + /* reads rpc_requests during pollin */ + static int + socket_event_handler (int fd, int idx, int gen, void *data, +- int poll_in, int poll_out, int poll_err) ++ int poll_in, int poll_out, int poll_err, ++ char event_thread_died) + { + rpc_transport_t *this = NULL; + socket_private_t *priv = NULL; +@@ -2429,6 +2430,13 @@ socket_event_handler (int fd, int idx, int gen, void *data, + + this = data; + ++ if (event_thread_died) { ++ /* to avoid duplicate notifications, ++ * notify only for listener sockets ++ */ ++ return 0; ++ } ++ + GF_VALIDATE_OR_GOTO ("socket", this, out); + GF_VALIDATE_OR_GOTO ("socket", this->private, out); + GF_VALIDATE_OR_GOTO ("socket", this->xl, out); +@@ -2720,7 +2728,8 @@ socket_spawn (rpc_transport_t *this) + + static int + socket_server_event_handler (int fd, int idx, int gen, void *data, +- int poll_in, int poll_out, int poll_err) ++ int poll_in, int poll_out, int poll_err, ++ char event_thread_died) + { + rpc_transport_t *this = NULL; + socket_private_t *priv = NULL; +@@ -2742,6 +2751,12 @@ socket_server_event_handler (int fd, int idx, int gen, void *data, + priv = this->private; + ctx = this->ctx; + ++ if (event_thread_died) { ++ rpc_transport_notify(this, RPC_TRANSPORT_EVENT_THREAD_DIED, ++ (void *)(unsigned long)gen); ++ return 0; ++ } ++ + /* NOTE: + * We have done away with the critical section in this function. since + * there's little that it helps with. There's no other code that +@@ -2840,6 +2855,7 @@ socket_server_event_handler (int fd, int idx, int gen, void *data, + new_trans->mydata = this->mydata; + new_trans->notify = this->notify; + new_trans->listener = this; ++ new_trans->notify_poller_death = this->poller_death_accept; + new_priv = new_trans->private; + + if (new_sockaddr.ss_family == AF_UNIX) { +@@ -2935,7 +2951,8 @@ socket_server_event_handler (int fd, int idx, int gen, void *data, + new_sock, + socket_event_handler, + new_trans, +- 1, 0); ++ 1, 0, ++ new_trans->notify_poller_death); + if (new_priv->idx == -1) { + ret = -1; + gf_log(this->name, GF_LOG_ERROR, +@@ -3388,7 +3405,8 @@ handler: + else { + priv->idx = event_register (ctx->event_pool, priv->sock, + socket_event_handler, +- this, 1, 1); ++ this, 1, 1, ++ this->notify_poller_death); + if (priv->idx == -1) { + gf_log ("", GF_LOG_WARNING, + "failed to register the event"); +@@ -3560,7 +3578,8 @@ socket_listen (rpc_transport_t *this) + + priv->idx = event_register (ctx->event_pool, priv->sock, + socket_server_event_handler, +- this, 1, 0); ++ this, 1, 0, ++ this->notify_poller_death); + + if (priv->idx == -1) { + gf_log (this->name, GF_LOG_WARNING, +diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c +index 7cc3d15..30045ef 100644 +--- a/xlators/protocol/server/src/server-helpers.c ++++ b/xlators/protocol/server/src/server-helpers.c +@@ -557,6 +557,10 @@ get_frame_from_request (rpcsvc_request_t *req) + } + } + ++ /* Add a ref for this fop */ ++ if (client) ++ gf_client_ref (client); ++ + frame->root->uid = req->uid; + frame->root->gid = req->gid; + frame->root->pid = req->pid; +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index ba3b831..d32f5dd 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -1342,6 +1342,9 @@ init (xlator_t *this) + ret = -1; + goto out; + } ++ ++ ret = dict_set_int32(this->options, "notify-poller-death", 1); ++ + ret = rpcsvc_create_listeners (conf->rpc, this->options, + this->name); + if (ret < 1) { +-- +1.8.3.1 + diff --git a/0526-fuse-interrupt-handling-framework.patch b/0526-fuse-interrupt-handling-framework.patch new file mode 100644 index 0000000..1670ca3 --- /dev/null +++ b/0526-fuse-interrupt-handling-framework.patch @@ -0,0 +1,671 @@ +From 77716a11910ca2b88f37ff549776f7778cc17dae Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Thu, 9 Aug 2018 11:46:33 +0200 +Subject: [PATCH 526/529] fuse: interrupt handling framework + +- add sub-framework to send timed responses to kernel +- add interrupt handler queue +- implement INTERRUPT + +fuse_interrupt looks up handlers for interrupted messages +in the queue. If found, it invokes the handler function. +Else responds with EAGAIN with a delay. + +See spec at + +https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.17#n148 + +and explanation in comments. + +Upstream: https://review.gluster.org/20686 +> Change-Id: I1a79d3679b31f36e14b4ac8f60b7f2c1ea2badfb +> updates: #465 +> Signed-off-by: Csaba Henk + +Change-Id: Idff76920aaa9f87b185dabb0b431a31fcd2a2c77 +BUG: 1595246 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/162549 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/timespec.c | 16 ++ + libglusterfs/src/timespec.h | 1 + + xlators/mount/fuse/src/fuse-bridge.c | 464 +++++++++++++++++++++++++++++++- + xlators/mount/fuse/src/fuse-bridge.h | 39 +++ + xlators/mount/fuse/src/fuse-mem-types.h | 2 + + 5 files changed, 521 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c +index 903303d..55f7575 100644 +--- a/libglusterfs/src/timespec.c ++++ b/libglusterfs/src/timespec.c +@@ -72,3 +72,19 @@ void timespec_sub (const struct timespec *begin, const struct timespec *end, + res->tv_nsec = end->tv_nsec - begin->tv_nsec; + } + } ++ ++int ++timespec_cmp(const struct timespec *lhs_ts, const struct timespec *rhs_ts) ++{ ++ if (lhs_ts->tv_sec < rhs_ts->tv_sec) { ++ return -1; ++ } else if (lhs_ts->tv_sec > rhs_ts->tv_sec) { ++ return 1; ++ } else if (lhs_ts->tv_nsec < rhs_ts->tv_nsec) { ++ return -1; ++ } else if (lhs_ts->tv_nsec > rhs_ts->tv_nsec) { ++ return 1; ++ } ++ ++ return 0; ++} +diff --git a/libglusterfs/src/timespec.h b/libglusterfs/src/timespec.h +index 9c393ee..aa37951 100644 +--- a/libglusterfs/src/timespec.h ++++ b/libglusterfs/src/timespec.h +@@ -23,5 +23,6 @@ void timespec_adjust_delta (struct timespec *ts, struct timespec delta); + void timespec_sub (const struct timespec *begin, + const struct timespec *end, + struct timespec *res); ++int timespec_cmp(const struct timespec *lhs_ts, const struct timespec *rhs_ts); + + #endif /* __INCLUDE_TIMESPEC_H__ */ +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index f3188d6..0d4b9db 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -15,6 +15,7 @@ + #include "compat-errno.h" + #include "glusterfs-acl.h" + #include "syscall.h" ++#include "timespec.h" + + #ifdef __NetBSD__ + #undef open /* in perfuse.h, pulled from mount-gluster-compat.h */ +@@ -426,6 +427,361 @@ fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) + } + #endif + ++static fuse_timed_message_t * ++fuse_timed_message_new (void) ++{ ++ fuse_timed_message_t *dmsg = NULL; ++ ++ dmsg = GF_MALLOC (sizeof (*dmsg), gf_fuse_mt_timed_message_t); ++ if (!dmsg) { ++ return NULL; ++ } ++ ++ /* should be NULL if not set */ ++ dmsg->fuse_message_body = NULL; ++ INIT_LIST_HEAD (&dmsg->next); ++ ++ return dmsg; ++} ++ ++static void ++fuse_timed_message_free (fuse_timed_message_t *dmsg) ++{ ++ GF_FREE (dmsg->fuse_message_body); ++ GF_FREE (dmsg); ++} ++ ++static void ++send_fuse_timed (xlator_t *this, fuse_timed_message_t *dmsg) ++{ ++ fuse_private_t *priv = NULL; ++ ++ priv = this->private; ++ ++ if (!priv->timed_response_fuse_thread_started) { ++ return; ++ } ++ ++ pthread_mutex_lock (&priv->timed_mutex); ++ { ++ list_add_tail (&dmsg->next, &priv->timed_list); ++ pthread_cond_signal (&priv->timed_cond); ++ } ++ pthread_mutex_unlock (&priv->timed_mutex); ++} ++ ++fuse_interrupt_record_t * ++fuse_interrupt_record_new (fuse_in_header_t *finh, ++ fuse_interrupt_handler_t handler) ++{ ++ fuse_interrupt_record_t *fir = NULL; ++ ++ fir = GF_MALLOC (sizeof (*fir), gf_fuse_mt_interrupt_record_t); ++ if (!fir) { ++ return NULL; ++ } ++ ++ fir->hit = _gf_false; ++ fir->interrupt_state = INTERRUPT_NONE; ++ fir->data = NULL; ++ ++ fir->interrupt_handler = handler; ++ memcpy (&fir->fuse_in_header, finh, sizeof (*finh)); ++ pthread_cond_init (&fir->handler_cond, NULL); ++ pthread_mutex_init (&fir->handler_mutex, NULL); ++ INIT_LIST_HEAD (&fir->next); ++ ++ return fir; ++} ++ ++static void ++fuse_interrupt_record_free (fuse_interrupt_record_t *fir, void **datap) ++{ ++ /* ++ * If caller wishes, we give back the private data to let them deal with it ++ * however they want; otherwise we take care of freeing it. ++ */ ++ if (datap) { ++ *datap = fir->data; ++ } else { ++ GF_FREE (fir->data); ++ } ++ ++ GF_FREE (fir); ++} ++ ++void ++fuse_interrupt_record_insert (xlator_t *this, fuse_interrupt_record_t *fir) ++{ ++ fuse_private_t *priv = NULL; ++ ++ priv = this->private; ++ pthread_mutex_lock (&priv->interrupt_mutex); ++ { ++ list_add_tail (&fir->next, &priv->interrupt_list); ++ } ++ pthread_mutex_unlock (&priv->interrupt_mutex); ++} ++ ++static fuse_interrupt_record_t * ++fuse_interrupt_record_fetch (xlator_t *this, uint64_t unique, gf_boolean_t reap) ++{ ++ fuse_interrupt_record_t *fir = NULL; ++ gf_boolean_t found = _gf_false; ++ fuse_private_t *priv = NULL; ++ ++ priv = this->private; ++ pthread_mutex_lock (&priv->interrupt_mutex); ++ { ++ list_for_each_entry (fir, &priv->interrupt_list, next) ++ { ++ if (fir->fuse_in_header.unique == unique) { ++ /* ++ * If we are to reap, we do it regardless the ++ * hit flag; otherwise we take the record only ++ * hasn't yet flagged hit. ++ */ ++ if (reap || !fir->hit) { ++ found = _gf_true; ++ } ++ /* ++ * If we are not reaping (coming from handler ++ * context), we set the hit flag. ++ */ ++ if (!reap) { ++ fir->hit = _gf_true; ++ } ++ break; ++ } ++ } ++ if (found && reap) { ++ list_del (&fir->next); ++ } ++ } ++ pthread_mutex_unlock (&priv->interrupt_mutex); ++ ++ if (found) { ++ return fir; ++ } ++ return NULL; ++} ++ ++static fuse_interrupt_record_t * ++fuse_interrupt_record_get (xlator_t *this, uint64_t unique) ++{ ++ return fuse_interrupt_record_fetch (this, unique, _gf_false); ++} ++ ++static fuse_interrupt_record_t * ++fuse_interrupt_record_reap (xlator_t *this, uint64_t unique) ++{ ++ return fuse_interrupt_record_fetch (this, unique, _gf_true); ++} ++ ++static void ++fuse_interrupt (xlator_t *this, fuse_in_header_t *finh, void *msg, ++ struct iobuf *iobuf) ++{ ++ struct fuse_interrupt_in *fii = msg; ++ fuse_interrupt_record_t *fir = NULL; ++ ++ gf_log ("glusterfs-fuse", GF_LOG_TRACE, ++ "unique %" PRIu64 " INTERRUPT for %" PRIu64, finh->unique, ++ fii->unique); ++ ++ fir = fuse_interrupt_record_get (this, fii->unique); ++ if (fir) { ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, ++ "unique %" PRIu64 " INTERRUPT for %" PRIu64 ++ ": handler triggered", ++ finh->unique, fii->unique); ++ ++ fir->interrupt_handler (this, fir); ++ } else { ++ fuse_timed_message_t *dmsg = NULL; ++ ++ /* ++ * No record found for this interrupt request. ++ * ++ * It's either because the handler for the interrupted message ++ * does not want to handle interrupt, or this interrupt ++ * message beat the interrupted which hasn't yet added a record ++ * to the interrupt queue. Either case we reply with error ++ * EAGAIN with some (0.01 sec) delay. That will have this ++ * interrupt request resent, unless the interrupted message ++ * has been already answered. ++ * ++ * So effectively we are looping in between kernel and ++ * userspace, which will be exited either when the interrupted ++ * message handler has added an interrupt record, or has ++ * replied to kernel. See ++ * ++ * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/ ++ * linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.18#n148 ++ */ ++ ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, ++ "unique %" PRIu64 " INTERRUPT for %" PRIu64 ": no handler found", ++ finh->unique, fii->unique); ++ ++ dmsg = fuse_timed_message_new (); ++ if (!dmsg) { ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ++ "unique %" PRIu64 " INTERRUPT for %" PRIu64 ++ ":" ++ " failed to allocate timed message", ++ finh->unique, fii->unique); ++ ++ return; ++ } ++ ++ dmsg->fuse_out_header.unique = finh->unique; ++ dmsg->fuse_out_header.len = sizeof (dmsg->fuse_out_header); ++ dmsg->fuse_out_header.error = -EAGAIN; ++ timespec_now (&dmsg->scheduled_ts); ++ timespec_adjust_delta (&dmsg->scheduled_ts, ++ (struct timespec){0, 10000000}); ++ ++ send_fuse_timed (this, dmsg); ++ } ++} ++ ++/* ++ * Function to be called in fop cbk context (if the fop engages ++ * with interrupt handling). ++ */ ++gf_boolean_t ++fuse_interrupt_finish_fop (call_frame_t *frame, xlator_t *this, ++ gf_boolean_t sync, void **datap) ++{ ++ fuse_interrupt_record_t *fir = NULL; ++ fuse_state_t *state = frame->root->state; ++ fuse_in_header_t *finh = state->finh; ++ gf_boolean_t hit = _gf_false; ++ gf_boolean_t handled = _gf_false; ++ fuse_interrupt_state_t intstat_orig = INTERRUPT_NONE; ++ ++ fir = fuse_interrupt_record_reap (this, finh->unique); ++ if (!fir) { ++ /* ++ * No interrupt record was inserted (however, caller would usually know ++ * about that and there is no point then in calling this function). ++ */ ++ return _gf_false; ++ } ++ ++ /* ++ * The interrupt handler (if finds the record) modifies fir->hit; however, ++ * that could have occurred only before fuse_interrupt_record_reap (), so ++ * we are safe here with a lock-free access. ++ */ ++ hit = fir->hit; ++ if (hit) { ++ pthread_mutex_lock (&fir->handler_mutex); ++ { ++ intstat_orig = fir->interrupt_state; ++ if (fir->interrupt_state == INTERRUPT_NONE) { ++ fir->interrupt_state = INTERRUPT_SQUELCHED; ++ if (sync) { ++ while (fir->interrupt_state == INTERRUPT_NONE) { ++ pthread_cond_wait (&fir->handler_cond, ++ &fir->handler_mutex); ++ } ++ } ++ } ++ } ++ pthread_mutex_unlock (&fir->handler_mutex); ++ } ++ ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig); ++ ++ /* ++ * From this on fir can only be referred under the conditions that imply ++ * we are to free it (otherwise interrupt handler might have already freed ++ * it). ++ */ ++ ++ if (/* there was no interrupt */ ++ !hit || ++ /* lost the race against interrupt handler */ ++ intstat_orig != INTERRUPT_NONE || ++ /* we took cleaning up on us */ ++ sync) { ++ /* cleaning up */ ++ fuse_interrupt_record_free (fir, datap); ++ } else if (datap) { ++ *datap = NULL; ++ } ++ ++ handled = (intstat_orig == INTERRUPT_HANDLED); ++ if (handled) { ++ /* ++ * Fuse request was answered already from interrupt context, we can do ++ * away with the stack. ++ */ ++ free_fuse_state (state); ++ STACK_DESTROY (frame->root); ++ } ++ ++ /* ++ * Let caller know if they have to answer the fuse request. ++ */ ++ return handled; ++} ++ ++/* ++ * Function to be called in interrupt handler context. ++ */ ++void ++fuse_interrupt_finish_interrupt (xlator_t *this, fuse_interrupt_record_t *fir, ++ fuse_interrupt_state_t intstat, ++ gf_boolean_t sync, void **datap) ++{ ++ fuse_in_header_t finh = { ++ 0, ++ }; ++ fuse_interrupt_state_t intstat_orig = INTERRUPT_NONE; ++ ++ pthread_mutex_lock (&fir->handler_mutex); ++ { ++ intstat_orig = fir->interrupt_state; ++ if (fir->interrupt_state == INTERRUPT_NONE) { ++ fir->interrupt_state = intstat; ++ if (sync) { ++ pthread_cond_signal (&fir->handler_cond); ++ } ++ } ++ finh = fir->fuse_in_header; ++ } ++ pthread_mutex_unlock (&fir->handler_mutex); ++ ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig); ++ ++ /* ++ * From this on fir can only be referred under the conditions that imply ++ * we are to free it (otherwise fop handler might have already freed it). ++ */ ++ ++ if (/* we won the race, response is up to us */ ++ intstat_orig == INTERRUPT_NONE && ++ /* interrupt handling was successful, let the kernel know */ ++ intstat == INTERRUPT_HANDLED) { ++ send_fuse_err (this, &finh, EINTR); ++ } ++ ++ if (/* lost the race ... */ ++ intstat_orig != INTERRUPT_NONE && ++ /* ++ * ... and there is no contract with fop handler that it does the ++ * cleanup ... ++ */ ++ !sync) { ++ /* ... so we do! */ ++ fuse_interrupt_record_free (fir, datap); ++ } else if (datap) { ++ *datap = NULL; ++ } ++} + + int + send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error) +@@ -4100,6 +4456,89 @@ notify_kernel_loop (void *data) + } + #endif + ++static void * ++timed_response_loop (void *data) ++{ ++ ssize_t rv = 0; ++ size_t len = 0; ++ xlator_t *this = NULL; ++ fuse_private_t *priv = NULL; ++ fuse_timed_message_t *dmsg = NULL; ++ fuse_timed_message_t *tmp = NULL; ++ struct timespec now = {0,}; ++ struct timespec delta = {0,}; ++ struct iovec iovs[2] = {{0,},}; ++ fuse_in_header_t finh = {0,}; ++ ++ this = data; ++ priv = this->private; ++ ++ for (;;) { ++ pthread_mutex_lock (&priv->timed_mutex); ++ { ++ while (list_empty (&priv->timed_list)) { ++ pthread_cond_wait (&priv->timed_cond, &priv->timed_mutex); ++ } ++ ++ dmsg = list_entry (priv->timed_list.next, fuse_timed_message_t, ++ next); ++ list_for_each_entry (tmp, &priv->timed_list, next) ++ { ++ if (timespec_cmp (&tmp->scheduled_ts, &dmsg->scheduled_ts) < 0) { ++ dmsg = tmp; ++ } ++ } ++ ++ list_del_init (&dmsg->next); ++ } ++ pthread_mutex_unlock (&priv->timed_mutex); ++ ++ timespec_now (&now); ++ if (timespec_cmp (&now, &dmsg->scheduled_ts) < 0) { ++ timespec_sub (&now, &dmsg->scheduled_ts, &delta); ++ nanosleep (&delta, NULL); ++ } ++ ++ gf_log ("glusterfs-fuse", GF_LOG_TRACE, ++ "sending timed message of unique %"PRIu64, ++ dmsg->fuse_out_header.unique); ++ ++ len = dmsg->fuse_out_header.len; ++ iovs[0] = (struct iovec){&dmsg->fuse_out_header, ++ sizeof (struct fuse_out_header)}; ++ iovs[1] = (struct iovec){dmsg->fuse_message_body, ++ len - sizeof (struct fuse_out_header)}; ++ /* ++ * Nasty hack to allow us to use the send_fuse_iov API, ++ * which we resort to, as the API used in original upstream ++ * code used is not available in this codebase. ++ */ ++ finh.unique = dmsg->fuse_out_header.unique; ++ rv = send_fuse_iov (this, &finh, iovs, 2); ++ ++ fuse_timed_message_free (dmsg); ++ ++ if (rv == EBADF) { ++ break; ++ } ++ } ++ ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, "timed response loop terminated"); ++ ++ pthread_mutex_lock (&priv->timed_mutex); ++ { ++ priv->timed_response_fuse_thread_started = _gf_false; ++ list_for_each_entry_safe (dmsg, tmp, &priv->timed_list, next) ++ { ++ list_del_init (&dmsg->next); ++ fuse_timed_message_free (dmsg); ++ } ++ } ++ pthread_mutex_unlock (&priv->timed_mutex); ++ ++ return NULL; ++} ++ + static void + fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +@@ -4112,6 +4551,7 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg, + #if FUSE_KERNEL_MINOR_VERSION >= 9 + pthread_t messenger; + #endif ++ pthread_t delayer; + + priv = this->private; + +@@ -4160,6 +4600,19 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg, + fino.flags |= FUSE_BIG_WRITES; + } + ++ /* Start the thread processing timed responses */ ++ ret = gf_thread_create (&delayer, NULL, timed_response_loop, this, ++ "fusedlyd"); ++ if (ret != 0) { ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ++ "failed to start timed response thread (%s)", ++ strerror (errno)); ++ ++ sys_close (priv->fd); ++ goto out; ++ } ++ priv->timed_response_fuse_thread_started = _gf_true; ++ + /* Used for 'reverse invalidation of inode' */ + if (fini->minor >= 12) { + ret = gf_thread_create (&messenger, NULL, notify_kernel_loop, +@@ -5229,6 +5682,8 @@ fuse_priv_dump (xlator_t *this) + (int)private->init_recvd); + gf_proc_dump_write("strict_volfile_check", "%d", + (int)private->strict_volfile_check); ++ gf_proc_dump_write("timed_response_thread_started", "%d", ++ (int)private->timed_response_fuse_thread_started); + gf_proc_dump_write("reverse_thread_started", "%d", + (int)private->reverse_fuse_thread_started); + gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp); +@@ -5486,7 +5941,7 @@ static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = { + [FUSE_SETLKW] = fuse_setlk, + [FUSE_ACCESS] = fuse_access, + [FUSE_CREATE] = fuse_create, +- /* [FUSE_INTERRUPT] */ ++ [FUSE_INTERRUPT] = fuse_interrupt, + /* [FUSE_BMAP] */ + [FUSE_DESTROY] = fuse_destroy, + /* [FUSE_IOCTL] */ +@@ -5611,6 +6066,13 @@ init (xlator_t *this_xl) + pthread_cond_init (&priv->invalidate_cond, NULL); + pthread_mutex_init (&priv->invalidate_mutex, NULL); + ++ INIT_LIST_HEAD (&priv->timed_list); ++ pthread_cond_init (&priv->timed_cond, NULL); ++ pthread_mutex_init (&priv->timed_mutex, NULL); ++ ++ INIT_LIST_HEAD (&priv->interrupt_list); ++ pthread_mutex_init (&priv->interrupt_mutex, NULL); ++ + /* get options from option dictionary */ + ret = dict_get_str (options, ZR_MOUNTPOINT_OPT, &value_string); + if (ret == -1 || value_string == NULL) { +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 4e32a7f..ba3e000 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -147,6 +147,16 @@ struct fuse_private { + + /* LRU Limit, if not set, default is 128k for now */ + uint32_t lru_limit; ++ ++ /* Delayed fuse response */ ++ struct list_head timed_list; ++ pthread_cond_t timed_cond; ++ pthread_mutex_t timed_mutex; ++ gf_boolean_t timed_response_fuse_thread_started; ++ ++ /* Interrupt subscription */ ++ struct list_head interrupt_list; ++ pthread_mutex_t interrupt_mutex; + }; + typedef struct fuse_private fuse_private_t; + +@@ -162,6 +172,35 @@ struct fuse_invalidate_node { + }; + typedef struct fuse_invalidate_node fuse_invalidate_node_t; + ++struct fuse_timed_message { ++ struct fuse_out_header fuse_out_header; ++ void *fuse_message_body; ++ struct timespec scheduled_ts; ++ struct list_head next; ++}; ++typedef struct fuse_timed_message fuse_timed_message_t; ++ ++enum fuse_interrupt_state { ++ INTERRUPT_NONE, ++ INTERRUPT_SQUELCHED, ++ INTERRUPT_HANDLED, ++}; ++typedef enum fuse_interrupt_state fuse_interrupt_state_t; ++struct fuse_interrupt_record; ++typedef struct fuse_interrupt_record fuse_interrupt_record_t; ++typedef void (*fuse_interrupt_handler_t) (xlator_t *this, ++ fuse_interrupt_record_t *); ++struct fuse_interrupt_record { ++ struct fuse_in_header fuse_in_header; ++ void *data; ++ gf_boolean_t hit; ++ fuse_interrupt_state_t interrupt_state; ++ fuse_interrupt_handler_t interrupt_handler; ++ pthread_cond_t handler_cond; ++ pthread_mutex_t handler_mutex; ++ struct list_head next; ++}; ++ + struct fuse_graph_switch_args { + xlator_t *this; + xlator_t *old_subvol; +diff --git a/xlators/mount/fuse/src/fuse-mem-types.h b/xlators/mount/fuse/src/fuse-mem-types.h +index 721b9a3..4ded879 100644 +--- a/xlators/mount/fuse/src/fuse-mem-types.h ++++ b/xlators/mount/fuse/src/fuse-mem-types.h +@@ -24,6 +24,8 @@ enum gf_fuse_mem_types_ { + gf_fuse_mt_gids_t, + gf_fuse_mt_invalidate_node_t, + gf_fuse_mt_pthread_t, ++ gf_fuse_mt_timed_message_t, ++ gf_fuse_mt_interrupt_record_t, + gf_fuse_mt_end + }; + #endif +-- +1.8.3.1 + diff --git a/0527-fuse-diagnostic-FLUSH-interrupt.patch b/0527-fuse-diagnostic-FLUSH-interrupt.patch new file mode 100644 index 0000000..c15c9ce --- /dev/null +++ b/0527-fuse-diagnostic-FLUSH-interrupt.patch @@ -0,0 +1,568 @@ +From fc4bebd605b6a579a4d19c6640aca38057397c77 Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Tue, 21 Aug 2018 12:44:54 +0200 +Subject: [PATCH 527/529] fuse: diagnostic FLUSH interrupt + +We add dummy interrupt handling for the FLUSH +fuse message. It can be enabled by the +"--fuse-flush-handle-interrupt" hidden command line +option, or "-ofuse-flush-handle-interrupt=yes" +mount option. + +It serves no other than diagnostic & demonstational +purposes -- to exercise the interrupt handling framework +a bit and to give an usage example. + +Documentation is also provided that showcases interrupt +handling via FLUSH. + +Upstream: https://review.gluster.org/20876 +> Change-Id: I522f1e798501d06b74ac3592a5f73c1ab0590c60 +> updates: #465 +> Signed-off-by: Csaba Henk + +Change-Id: I510aff8895a3fe5858ab313c47514de7087d08c1 +BUG: 1595246 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/162550 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + doc/developer-guide/Developers-Index.md | 5 ++ + doc/developer-guide/fuse-interrupt.md | 130 ++++++++++++++++++++++++++++ + glusterfsd/src/glusterfsd.c | 53 +++++++++++- + glusterfsd/src/glusterfsd.h | 1 + + libglusterfs/src/glusterfs.h | 2 + + tests/features/interrupt.t | 67 ++++++++++++++ + tests/features/open_and_sleep.c | 27 ++++++ + xlators/mount/fuse/src/fuse-bridge.c | 59 +++++++++++++ + xlators/mount/fuse/src/fuse-bridge.h | 4 +- + xlators/mount/fuse/utils/mount.glusterfs.in | 7 ++ + 10 files changed, 353 insertions(+), 2 deletions(-) + create mode 100644 doc/developer-guide/fuse-interrupt.md + create mode 100644 tests/features/interrupt.t + create mode 100644 tests/features/open_and_sleep.c + +diff --git a/doc/developer-guide/Developers-Index.md b/doc/developer-guide/Developers-Index.md +index 4c6346e..6c00a4a 100644 +--- a/doc/developer-guide/Developers-Index.md ++++ b/doc/developer-guide/Developers-Index.md +@@ -59,6 +59,11 @@ Translators + - [Storage/posix Translator](./posix.md) + - [Compression translator](./network_compression.md) + ++Fuse ++---- ++ ++- [Interrupt handling](./fuse-interrupt.md) ++ + Testing/Debugging + ----------------- + +diff --git a/doc/developer-guide/fuse-interrupt.md b/doc/developer-guide/fuse-interrupt.md +new file mode 100644 +index 0000000..f92b553 +--- /dev/null ++++ b/doc/developer-guide/fuse-interrupt.md +@@ -0,0 +1,130 @@ ++# Fuse interrupt handling ++ ++## Conventions followed ++ ++- *FUSE* refers to the "wire protocol" between kernel and userspace and ++ related specifications. ++- *fuse* refers to the kernel subsystem and also to the GlusterFs translator. ++ ++## FUSE interrupt handling spec ++ ++The [Linux kernel FUSE documentation](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.18#n148) ++desrcibes how interrupt handling happens in fuse. ++ ++## Interrupt handling in the fuse translator ++ ++### Declarations ++ ++This document describes the internal API in the fuse translator with which ++interrupt can be handled. ++ ++The API being internal (to be used only in fuse-bridge.c; the functions are ++not exported to a header file). ++ ++``` ++enum fuse_interrupt_state { ++ INTERRUPT_NONE, ++ INTERRUPT_SQUELCHED, ++ INTERRUPT_HANDLED, ++}; ++typedef enum fuse_interrupt_state fuse_interrupt_state_t; ++struct fuse_interrupt_record; ++typedef struct fuse_interrupt_record fuse_interrupt_record_t; ++typedef void (*fuse_interrupt_handler_t)(xlator_t *this, ++ fuse_interrupt_record_t *); ++struct fuse_interrupt_record { ++ fuse_in_header_t fuse_in_header; ++ void *data; ++ /* ++ ... ++ */ ++}; ++ ++fuse_interrupt_record_t * ++fuse_interrupt_record_new(fuse_in_header_t *finh, ++ fuse_interrupt_handler_t handler); ++ ++void ++fuse_interrupt_record_insert(xlator_t *this, fuse_interrupt_record_t *fir); ++ ++gf_boolean_t ++fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this, ++ gf_boolean_t sync, void **datap); ++ ++void ++fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir, ++ fuse_interrupt_state_t intstat, ++ gf_boolean_t sync, void **datap); ++``` ++ ++The code demonstrates the usage of the API through `fuse_flush()`. (It's a ++dummy implementation only for demonstration purposes.) Flush is chosen ++because a `FLUSH` interrupt is easy to trigger (see ++*tests/features/interrupt.t*). Interrupt handling for flush is switched on ++by `--fuse-flush-handle-interrupt` (a hidden glusterfs command line flag). ++The flush interrupt handling code is guarded by the ++`flush_handle_interrupt` Boolean member of `fuse_private_t`. ++ ++### Usage ++ ++A given FUSE fop can be enabled to handle interrupts via the following ++steps: ++ ++- Define a handler function (of type `fuse_interrupt_handler_t`). ++ It should implement the interrupt handling logic and in the end ++ call (directly or as async callback) `fuse_interrupt_finish_interrupt()`. ++ The `intstat` argument to `fuse_interrupt_finish_interrupt` should be ++ either `INTERRUPT_SQUELCHED` or `INTERRUPT_HANDLED`. ++ - `INTERRUPT_SQUELCHED` means that we choose not to handle the interrupt ++ and the fop is going on uninterrupted. ++ - `INTERRUPT_HANDLED` means that the interrupt was actually handled. In ++ this case the fop will be answered from interrupt context with errno ++ `EINTR` (that is, the fop should not send a response to the kernel). ++ ++ We return to the `sync` and `datap` arguments later. ++- In the `fuse_` function create an interrupt record using ++ `fuse_interrupt_record_new()`, passing the incoming `fuse_in_header` and ++ the above handler function to it. ++ - Arbitrary further data can be referred to via the `data` member of the ++ interrupt record that is to be passed on from fop context to ++ interrupt context. ++- When it's set up, pass the interrupt record to ++ `fuse_interrupt_record_insert()`. ++- In `fuse__cbk` call `fuse_interrupt_finish_fop()`. ++ - `fuse_interrupt_finish_fop()` returns a Boolean according to whether the ++ interrupt was handled. If it was, then the fuse request is already ++ answered and the stack gets destroyed in `fuse_interrupt_finish_fop` so ++ `fuse__cbk` can just return (zero). Otherwise follow the standard ++ cbk logic (answer the fuse request and destroy the stack -- these are ++ typically accomplished by `fuse_err_cbk()`). ++- The last two argument of `fuse_interrupt_finish_fop()` and ++ `fuse_interrupt_finish_interrupt()` are `gf_boolean_t sync` and ++ `void **datap`. ++ - `sync` represents the strategy for freeing the interrupt record. The ++ interrupt handler and the fop handler are in race to get at the interrupt ++ record first (interrupt handler for purposes of doing the interrupt ++ handling, fop handler for purposes of deactivating the interrupt record ++ upon completion of the fop handling). ++ - If `sync` is true, then the fop handler will wait for the interrupt ++ handler to finish and it takes care of freeing. ++ - If `sync` is false, the loser of the above race will perform freeing. ++ ++ Freeing is done within the respective interrupt finish routines, except ++ for the `data` field of the interrupt record; with respect to that, see ++ the discussion of the `datap` parameter below. The strategy has to be ++ consensual, that is, `fuse_interrupt_finish_fop()` and ++ `fuse_interrupt_finish_interrupt()` must pass the same value for `sync`. ++ If dismantling the resources associated with the interrupt record is ++ simple, `sync = _gf_false` is the suggested choice; `sync = _gf_true` can ++ be useful in the opposite case, when dismantling those resources would ++ be inconvenient to implement in two places or to enact in non-fop context. ++ - If `datap` is `NULL`, the `data` member of the interrupt record will be ++ freed within the interrupt finish routine. If it points to a valid ++ `void *` pointer, and if caller is doing the cleanup (see `sync` above), ++ then that pointer will be directed to the `data` member of the interrupt ++ record and it's up to the caller what it's doing with it. ++ - If `sync` is true, interrupt handler can use `datap = NULL`, and ++ fop handler will have `datap` set. ++ - If `sync` is false, and handlers pass a pointer to a pointer for ++ `datap`, they should check if the pointed pointer is NULL before ++ attempting to deal with the data. +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 2e2cd77..9c536cd 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -243,6 +243,9 @@ static struct argp_option gf_options[] = { + OPTION_ARG_OPTIONAL, "disable/enable fuse event-history"}, + {"reader-thread-count", ARGP_READER_THREAD_COUNT_KEY, "INTEGER", + OPTION_ARG_OPTIONAL, "set fuse reader thread count"}, ++ {"fuse-flush-handle-interrupt", ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY, ++ "BOOL", OPTION_ARG_OPTIONAL | OPTION_HIDDEN, ++ "handle interrupt in fuse FLUSH handler"}, + {0, 0, 0, 0, "Miscellaneous Options:"}, + {0, } + }; +@@ -581,6 +584,38 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options) + goto err; + } + } ++ switch (cmd_args->fuse_flush_handle_interrupt) { ++ case GF_OPTION_ENABLE: ++ ret = dict_set_static_ptr (options, ++ "flush-handle-interrupt", ++ "on"); ++ if (ret < 0) { ++ gf_msg ("glusterfsd", GF_LOG_ERROR, 0, ++ glusterfsd_msg_4, ++ "failed to set dict value for key " ++ "flush-handle-interrupt"); ++ goto err; ++ } ++ break; ++ case GF_OPTION_DISABLE: ++ ret = dict_set_static_ptr (options, ++ "flush-handle-interrupt", ++ "off"); ++ if (ret < 0) { ++ gf_msg ("glusterfsd", GF_LOG_ERROR, 0, ++ glusterfsd_msg_4, ++ "failed to set dict value for key " ++ "flush-handle-interrupt"); ++ goto err; ++ } ++ break; ++ case GF_OPTION_DEFERRED: /* default */ ++ default: ++ gf_msg_debug ("glusterfsd", 0, ++ "fuse-flush-handle-interrupt mode %d", ++ cmd_args->fuse_flush_handle_interrupt); ++ break; ++ } + + ret = 0; + err: +@@ -1352,7 +1387,22 @@ no_oom_api: + } + + break; +- } ++ case ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY: ++ if (!arg) ++ arg = "yes"; ++ ++ if (gf_string2boolean(arg, &b) == 0) { ++ cmd_args->fuse_flush_handle_interrupt = b; ++ ++ break; ++ } ++ ++ argp_failure(state, -1, 0, ++ "unknown fuse flush handle interrupt " ++ "setting \"%s\"", ++ arg); ++ break; ++ } + + return 0; + } +@@ -1648,6 +1698,7 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx) + cmd_args->fuse_attribute_timeout = -1; + cmd_args->fuse_entry_timeout = -1; + cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED; ++ cmd_args->fuse_flush_handle_interrupt = GF_OPTION_DEFERRED; + + if (ctx->mem_acct_enable) + cmd_args->mem_acct = 1; +diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h +index 1550a30..28b514a 100644 +--- a/glusterfsd/src/glusterfsd.h ++++ b/glusterfsd/src/glusterfsd.h +@@ -101,6 +101,7 @@ enum argp_option_keys { + ARGP_FUSE_EVENT_HISTORY_KEY = 179, + ARGP_READER_THREAD_COUNT_KEY = 180, + ARGP_FUSE_LRU_LIMIT_KEY = 190, ++ ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 191, + }; + + struct _gfd_vol_top_priv { +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index 2690306..9fa066e 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -448,6 +448,8 @@ struct _cmd_args { + + char *event_history; + uint32_t reader_thread_count; ++ ++ int fuse_flush_handle_interrupt; + }; + typedef struct _cmd_args cmd_args_t; + +diff --git a/tests/features/interrupt.t b/tests/features/interrupt.t +new file mode 100644 +index 0000000..476d875 +--- /dev/null ++++ b/tests/features/interrupt.t +@@ -0,0 +1,67 @@ ++#!/bin/bash ++ ++##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/) ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++TESTS_EXPECTED_IN_LOOP=4 ++ ++cleanup; ++logdir=`gluster --print-logdir` ++ ++TEST build_tester $(dirname $0)/open_and_sleep.c ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8}; ++ ++## Verify volume is is created ++EXPECT "$V0" volinfo_field $V0 'Volume Name'; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++## Start volume and verify ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++function log-file-name() ++{ ++ logfilename=$M0".log" ++ echo ${logfilename:1} | tr / - ++} ++ ++log_file=$logdir"/"`log-file-name` ++ ++function test_interrupt { ++ local handlebool="$1" ++ local logpattern="$2" ++ ++ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --fuse-flush-handle-interrupt=$handlebool --log-level=DEBUG $M0 ++ ++ # If the test helper fails (which is considered a setup error, not failure of the test ++ # case itself), kill will be invoked without argument, and that will be the actual ++ # error which is caught. ++ TEST "./$(dirname $0)/open_and_sleep $M0/testfile | { sleep 0.1; xargs -n1 kill -INT; }" ++ ++ TEST "grep -E '$logpattern' $log_file" ++ # Basic sanity check, making sure filesystem has not crashed. ++ TEST test -f $M0/testfile ++} ++ ++# Theoretically FLUSH might finish before INTERRUPT is handled, ++# in which case we'd get the "no handler found" message (but it's unlikely). ++test_interrupt yes 'FLUSH.*interrupt handler triggered|INTERRUPT.*no handler found' ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++test_interrupt no 'INTERRUPT.*no handler found' ++ ++## Finish up ++TEST $CLI volume stop $V0; ++EXPECT 'Stopped' volinfo_field $V0 'Status'; ++ ++TEST $CLI volume delete $V0; ++TEST ! $CLI volume info $V0; ++ ++cleanup_tester $(dirname $0)/open_and_sleep; ++cleanup; +diff --git a/tests/features/open_and_sleep.c b/tests/features/open_and_sleep.c +new file mode 100644 +index 0000000..da089e9 +--- /dev/null ++++ b/tests/features/open_and_sleep.c +@@ -0,0 +1,27 @@ ++#include ++#include ++#include ++ ++int ++main (int argc, char **argv) ++{ ++ pid_t pid; ++ int fd; ++ ++ if (argc >= 2) { ++ fd = open (argv[1], O_RDWR | O_CREAT, 0644); ++ if (fd == -1) { ++ fprintf (stderr, "cannot open/create %s\n", argv[1]); ++ return 1; ++ } ++ } ++ ++ pid = getpid (); ++ printf ("%d\n", pid); ++ fflush (stdout); ++ ++ for (;;) ++ sleep (1); ++ ++ return 0; ++} +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 0d4b9db..44c39e4 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -1779,6 +1779,21 @@ fuse_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + } + + static int ++fuse_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ fuse_private_t *priv = this->private; ++ ++ if (priv->flush_handle_interrupt) { ++ if (fuse_interrupt_finish_fop (frame, this, _gf_false, NULL)) { ++ return 0; ++ } ++ } ++ ++ return fuse_err_cbk (frame, cookie, this, op_ret, op_errno, xdata); ++} ++ ++static int + fuse_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +@@ -2961,6 +2976,19 @@ fuse_flush_resume (fuse_state_t *state) + { + FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH, + flush, state->fd, state->xdata); ++ FUSE_FOP (state, fuse_flush_cbk, GF_FOP_FLUSH, ++ flush, state->fd, state->xdata); ++} ++ ++static void ++fuse_flush_interrupt_handler (xlator_t *this, fuse_interrupt_record_t *fir) ++{ ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, ++ "FLUSH unique %" PRIu64 ": interrupt handler triggered", ++ fir->fuse_in_header.unique); ++ ++ fuse_interrupt_finish_interrupt (this, fir, INTERRUPT_HANDLED, ++ _gf_false, NULL); + } + + static void +@@ -2968,6 +2996,7 @@ fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) + { + struct fuse_flush_in *ffi = msg; ++ fuse_private_t *priv = NULL; + + fuse_state_t *state = NULL; + fd_t *fd = NULL; +@@ -2976,6 +3005,27 @@ fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg, + fd = FH_TO_FD (ffi->fh); + state->fd = fd; + ++ priv = this->private; ++ if (priv->flush_handle_interrupt) { ++ fuse_interrupt_record_t *fir = NULL; ++ ++ fir = fuse_interrupt_record_new (finh, ++ fuse_flush_interrupt_handler); ++ if (!fir) { ++ send_fuse_err (this, finh, ENOMEM); ++ ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ++ "FLUSH unique %" PRIu64 ++ ":" ++ " interrupt record allocation failed", ++ finh->unique); ++ free_fuse_state (state); ++ ++ return; ++ } ++ fuse_interrupt_record_insert (this, fir); ++ } ++ + fuse_resolve_fd_init (state, &state->resolve, fd); + + state->lk_owner = ffi->lock_owner; +@@ -6226,6 +6276,9 @@ init (xlator_t *this_xl) + GF_OPTION_INIT("event-history", priv->event_history, bool, + cleanup_exit); + ++ GF_OPTION_INIT ("flush-handle-interrupt", priv->flush_handle_interrupt, bool, ++ cleanup_exit); ++ + /* user has set only background-qlen, not congestion-threshold, + use the fuse kernel driver formula to set congestion. ie, 75% */ + if (dict_get (this_xl->options, "background-qlen") && +@@ -6552,5 +6605,11 @@ struct volume_options options[] = { + .description = "makes glusterfs invalidate kernel inodes after " + "reaching this limit (0 means 'unlimited')", + }, ++ { .key = {"flush-handle-interrupt"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .default_value = "false", ++ .description = "Handle iterrupts in FLUSH handler (for testing " ++ "purposes).", ++ }, + { .key = {NULL} }, + }; +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index ba3e000..e18469d 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -157,6 +157,8 @@ struct fuse_private { + /* Interrupt subscription */ + struct list_head interrupt_list; + pthread_mutex_t interrupt_mutex; ++ ++ gf_boolean_t flush_handle_interrupt; + }; + typedef struct fuse_private fuse_private_t; + +@@ -191,7 +193,7 @@ typedef struct fuse_interrupt_record fuse_interrupt_record_t; + typedef void (*fuse_interrupt_handler_t) (xlator_t *this, + fuse_interrupt_record_t *); + struct fuse_interrupt_record { +- struct fuse_in_header fuse_in_header; ++ fuse_in_header_t fuse_in_header; + void *data; + gf_boolean_t hit; + fuse_interrupt_state_t interrupt_state; +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index 9a0404f..a3a9fbd 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -273,6 +273,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line --dump-fuse=$dump_fuse"); + fi + ++ if [ -n "$fuse_flush_handle_interrupt" ]; then ++ cmd_line=$(echo "$cmd_line --fuse-flush-handle-interrupt=$fuse_flush_handle_interrupt"); ++ fi ++ + # if trasnport type is specified, we have to append it to + # volume name, so that it fetches the right client vol file + +@@ -524,6 +528,9 @@ with_options() + [ $value = "false" ] ; then + no_root_squash=1; + fi ;; ++ "fuse-flush-handle-interrupt") ++ fuse_flush_handle_interrupt=$value ++ ;; + "context"|"fscontext"|"defcontext"|"rootcontext") + # standard SElinux mount options to pass to the kernel + [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts," +-- +1.8.3.1 + diff --git a/0528-locks-handle-clear-locks-xattr-in-fgetxattr-too.patch b/0528-locks-handle-clear-locks-xattr-in-fgetxattr-too.patch new file mode 100644 index 0000000..370d8f6 --- /dev/null +++ b/0528-locks-handle-clear-locks-xattr-in-fgetxattr-too.patch @@ -0,0 +1,250 @@ +From 3c0f27fd697a8c977873d44fbdf3aa63c1065645 Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Thu, 6 Dec 2018 16:13:46 +0100 +Subject: [PATCH 528/529] locks: handle "clear locks" xattr in fgetxattr too + +The lock clearing procedure was kicked in only in +getxattr context. We need it to work the same way +if it's triggered via fgetxattr (as is the case +with interrupt handling). + +Also cleaned up the instrumentation a bit (more logs, +proper management of allocated data). + +Upstream: https://review.gluster.org/21820 +> updates: #465 +> Change-Id: Icfca26ee181da3b8e15ca3fcf61cd5702e2730c8 +> Signed-off-by: Csaba Henk + +Change-Id: Ia15108fd6d92ea2bdb73cea5fb04126785b19663 +BUG: 1595246 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/162551 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/locks/src/clear.c | 6 +++ + xlators/features/locks/src/clear.h | 2 + + xlators/features/locks/src/posix.c | 107 ++++++++++++++++++++++++------------- + 3 files changed, 77 insertions(+), 38 deletions(-) + +diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c +index 22c03b5..c3d5dd2 100644 +--- a/xlators/features/locks/src/clear.c ++++ b/xlators/features/locks/src/clear.c +@@ -24,6 +24,12 @@ + #include "statedump.h" + #include "clear.h" + ++const char *clrlk_type_names[CLRLK_TYPE_MAX] = { ++ [CLRLK_INODE] = "inode", ++ [CLRLK_ENTRY] = "entry", ++ [CLRLK_POSIX] = "posix", ++}; ++ + int + clrlk_get_kind (char *kind) + { +diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h +index 78fc5ae..1542953 100644 +--- a/xlators/features/locks/src/clear.h ++++ b/xlators/features/locks/src/clear.h +@@ -22,6 +22,8 @@ typedef enum { + CLRLK_TYPE_MAX + } clrlk_type; + ++extern const char *clrlk_type_names[]; ++ + typedef enum { + CLRLK_BLOCKED = 1, + CLRLK_GRANTED, +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 2cc2837..142a5cc 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -1028,41 +1028,35 @@ pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + } + +-int32_t +-pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) ++static int32_t ++pl_getxattr_clrlk (xlator_t *this, const char *name, inode_t *inode, ++ dict_t **dict, int32_t *op_errno) + { +- int32_t op_errno = EINVAL; +- int op_ret = -1; + int32_t bcount = 0; + int32_t gcount = 0; +- char key[PATH_MAX] = {0, }; ++ char *key = NULL; + char *lk_summary = NULL; + pl_inode_t *pl_inode = NULL; +- dict_t *dict = NULL; + clrlk_args args = {0,}; + char *brickname = NULL; ++ int32_t op_ret = -1; + +- if (!name) +- goto usual; +- +- if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD))) +- goto usual; ++ *op_errno = EINVAL; + + if (clrlk_parse_args (name, &args)) { +- op_errno = EINVAL; ++ *op_errno = EINVAL; + goto out; + } + +- dict = dict_new (); +- if (!dict) { +- op_errno = ENOMEM; ++ *dict = dict_new (); ++ if (!*dict) { ++ *op_errno = ENOMEM; + goto out; + } + +- pl_inode = pl_inode_get (this, loc->inode); ++ pl_inode = pl_inode_get (this, inode); + if (!pl_inode) { +- op_errno = ENOMEM; ++ *op_errno = ENOMEM; + goto out; + } + +@@ -1072,23 +1066,31 @@ pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode, + &args, &bcount, + &gcount, +- &op_errno); +- if (op_ret) +- goto out; ++ op_errno); + break; + case CLRLK_POSIX: + op_ret = clrlk_clear_posixlk (this, pl_inode, &args, + &bcount, &gcount, +- &op_errno); +- if (op_ret) +- goto out; ++ op_errno); + break; +- case CLRLK_TYPE_MAX: +- op_errno = EINVAL; +- goto out; ++ default: ++ op_ret = -1; ++ *op_errno = EINVAL; ++ } ++ if (op_ret) { ++ if (args.type >= CLRLK_TYPE_MAX) { ++ gf_log (this->name, GF_LOG_ERROR, ++ "clear locks: invalid lock type %d", args.type); ++ } else { ++ gf_log (this->name, GF_LOG_ERROR, ++ "clear locks of type %s failed: %s", ++ clrlk_type_names[args.type], strerror (*op_errno)); ++ } ++ ++ goto out; + } + +- op_ret = fetch_pathinfo (this, loc->inode, &op_errno, &brickname); ++ op_ret = fetch_pathinfo (this, inode, op_errno, &brickname); + if (op_ret) { + gf_log (this->name, GF_LOG_WARNING, + "Couldn't get brickname"); +@@ -1105,39 +1107,62 @@ pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + if (!gcount && !bcount) { + if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) { + op_ret = -1; +- op_errno = ENOMEM; ++ *op_errno = ENOMEM; + goto out; + } + } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d " + "granted locks=%d", + (brickname == NULL)? this->name : brickname, +- (args.type == CLRLK_INODE)? "inode": +- (args.type == CLRLK_ENTRY)? "entry": +- (args.type == CLRLK_POSIX)? "posix": " ", ++ clrlk_type_names[args.type], + bcount, gcount) == -1) { + op_ret = -1; +- op_errno = ENOMEM; ++ *op_errno = ENOMEM; + goto out; + } ++ gf_log (this->name, GF_LOG_DEBUG, "%s", lk_summary); + +- if (snprintf(key, sizeof(key), "%s", name) >= sizeof(key)) { ++ key = gf_strdup (name); ++ if (!key) { + op_ret = -1; + goto out; + } +- if (dict_set_dynstr (dict, key, lk_summary)) { ++ if (dict_set_dynstr (*dict, key, lk_summary)) { + op_ret = -1; +- op_errno = ENOMEM; ++ *op_errno = ENOMEM; + goto out; + } + + op_ret = 0; + out: + GF_FREE(brickname); +- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); + + GF_FREE (args.opts); +- if (op_ret && lk_summary) ++ if (op_ret) { + GF_FREE (lk_summary); ++ GF_FREE (key); ++ } ++ ++ return op_ret; ++} ++ ++int32_t ++pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, ++ dict_t *xdata) ++{ ++ int32_t op_errno = EINVAL; ++ int32_t op_ret = -1; ++ dict_t *dict = NULL; ++ ++ if (!name) ++ goto usual; ++ ++ if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD))) ++ goto usual; ++ ++ op_ret = pl_getxattr_clrlk (this, name, loc->inode, &dict, &op_errno); ++ ++ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); ++ + if (dict) + dict_unref (dict); + return 0; +@@ -1415,6 +1440,12 @@ pl_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + } + + goto unwind; ++ } else if (strncmp (name, GF_XATTR_CLRLK_CMD, ++ strlen (GF_XATTR_CLRLK_CMD)) == 0) { ++ op_ret = pl_getxattr_clrlk (this, name, fd->inode, &dict, ++ &op_errno); ++ ++ goto unwind; + } else { + goto usual; + } +-- +1.8.3.1 + diff --git a/0529-fuse-SETLKW-interrupt.patch b/0529-fuse-SETLKW-interrupt.patch new file mode 100644 index 0000000..7af923f --- /dev/null +++ b/0529-fuse-SETLKW-interrupt.patch @@ -0,0 +1,222 @@ +From 5f2e017ce7875de1906eb319339f11c4ef321208 Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Mon, 22 Oct 2018 00:59:05 +0200 +Subject: [PATCH 529/529] fuse: SETLKW interrupt + +Use the (f)getxattr based clearlocks interface to +interrupt a pending lock request. + +Upstream: https://review.gluster.org/21472 +> updates: #465 +> Change-Id: I4e91a4d8791fc688fed400a02de4c53487e61be2 +> Signed-off-by: Csaba Henk + +Change-Id: Ib436f1524cda6ade24c6970caee3dbd7d5f452d4 +BUG: 1595246 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/162552 +Tested-by: RHGS Build Bot +Reviewed-by: Amar Tumballi Suryanarayan +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/features/flock_interrupt.t | 33 +++++++++ + xlators/mount/fuse/src/fuse-bridge.c | 132 +++++++++++++++++++++++++++++++++++ + 2 files changed, 165 insertions(+) + create mode 100644 tests/features/flock_interrupt.t + +diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t +new file mode 100644 +index 0000000..8603b65 +--- /dev/null ++++ b/tests/features/flock_interrupt.t +@@ -0,0 +1,33 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST $CLI volume create $V0 $H0:$B0/${V0}0; ++ ++## Verify volume is is created ++EXPECT "$V0" volinfo_field $V0 'Volume Name'; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++## Start volume and verify ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST touch $M0/testfile; ++ ++function flock_interrupt { ++ flock $MO/testfile sleep 3 & flock -w 1 $M0/testfile true; ++ echo ok; ++} ++ ++EXPECT_WITHIN 2 ok flock_interrupt; ++ ++## Finish up ++cleanup; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 44c39e4..deaf533 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4346,6 +4346,18 @@ fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + { + uint32_t op = 0; + fuse_state_t *state = NULL; ++ int ret = 0; ++ ++ ret = fuse_interrupt_finish_fop (frame, this, _gf_false, ++ (void **)&state); ++ if (state) { ++ GF_FREE (state->name); ++ dict_unref (state->xdata); ++ GF_FREE (state); ++ } ++ if (ret) { ++ return 0; ++ } + + state = frame->root->state; + op = state->finh->opcode; +@@ -4392,10 +4404,130 @@ fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + return 0; + } + ++static int ++fuse_setlk_interrupt_handler_cbk (call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, dict_t *dict, ++ dict_t *xdata) ++{ ++ fuse_interrupt_state_t intstat = INTERRUPT_NONE; ++ fuse_interrupt_record_t *fir; ++ fuse_state_t *state = NULL; ++ int ret = 0; ++ ++ ret = dict_get_bin (xdata, "fuse-interrupt-record", (void **)&fir); ++ if (ret < 0) { ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ++ "interrupt record not found"); ++ ++ goto out; ++ } ++ ++ intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED; ++ ++ fuse_interrupt_finish_interrupt (this, fir, intstat, _gf_false, ++ (void **)&state); ++ if (state) { ++ GF_FREE (state->name); ++ dict_unref (state->xdata); ++ GF_FREE (state); ++ } ++ ++out: ++ STACK_DESTROY (frame->root); ++ ++ return 0; ++} ++ ++static void ++fuse_setlk_interrupt_handler (xlator_t *this, fuse_interrupt_record_t *fir) ++{ ++ fuse_state_t *state = NULL; ++ call_frame_t *frame = NULL; ++ char *xattr_name = NULL; ++ int ret = 0; ++ ++ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, ++ "SETLK%s unique %" PRIu64 ": interrupt handler triggered", ++ fir->fuse_in_header.opcode == FUSE_SETLK ? "" : "W", ++ fir->fuse_in_header.unique); ++ ++ state = fir->data; ++ ++ ret = gf_asprintf ( ++ &xattr_name, GF_XATTR_CLRLK_CMD ".tposix.kblocked.%hd,%jd-%jd", ++ state->lk_lock.l_whence, state->lk_lock.l_start, ++ state->lk_lock.l_len); ++ if (ret == -1) { ++ xattr_name = NULL; ++ goto err; ++ } ++ ++ frame = get_call_frame_for_req (state); ++ if (!frame) { ++ goto err; ++ } ++ frame->root->state = state; ++ frame->root->op = GF_FOP_GETXATTR; ++ frame->op = GF_FOP_GETXATTR; ++ state->name = xattr_name; ++ ++ STACK_WIND (frame, fuse_setlk_interrupt_handler_cbk, state->active_subvol, ++ state->active_subvol->fops->fgetxattr, state->fd, xattr_name, ++ state->xdata); ++ ++ return; ++ ++err: ++ GF_FREE (xattr_name); ++ fuse_interrupt_finish_interrupt (this, fir, INTERRUPT_SQUELCHED, ++ _gf_false, (void **)&state); ++ if (state) { ++ dict_unref (state->xdata); ++ GF_FREE (state); ++ } ++} + + void + fuse_setlk_resume (fuse_state_t *state) + { ++ fuse_interrupt_record_t *fir = NULL; ++ fuse_state_t *state_clone = NULL; ++ ++ fir = fuse_interrupt_record_new (state->finh, fuse_setlk_interrupt_handler); ++ state_clone = gf_memdup (state, sizeof (*state)); ++ if (state_clone) { ++ /* ++ * Calling this allocator with fir casted to (char *) seems like ++ * an abuse of this API, but in fact the API is stupid to assume ++ * a (char *) argument (in the funcion it's casted to (void *) ++ * anyway). ++ */ ++ state_clone->xdata = dict_for_key_value ( ++ "fuse-interrupt-record", (char *)fir, sizeof (*fir), _gf_true); ++ } ++ if (!fir || !state_clone || !state_clone->xdata) { ++ if (fir) { ++ GF_FREE (fir); ++ } ++ if (state_clone) { ++ GF_FREE (state_clone); ++ } ++ send_fuse_err (state->this, state->finh, ENOMEM); ++ ++ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ++ "SETLK%s unique %"PRIu64":" ++ " interrupt record allocation failed", ++ state->finh->opcode == FUSE_SETLK ? "" : "W", ++ state->finh->unique); ++ free_fuse_state (state); ++ ++ return; ++ } ++ state_clone->name = NULL; ++ fir->data = state_clone; ++ fuse_interrupt_record_insert (state->this, fir); ++ + gf_log ("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": SETLK%s %p", state->finh->unique, + state->finh->opcode == FUSE_SETLK ? "" : "W", state->fd); +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index 498cfd6..9bf9c64 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -192,7 +192,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 3.12.2 -Release: 40%{?dist} +Release: 41%{?dist} %endif License: GPLv2 or LGPLv3+ Group: System Environment/Base @@ -775,6 +775,25 @@ Patch0507: 0507-geo-rep-fix-rename-sync-on-hybrid-crawl.patch Patch0508: 0508-glusterd-Resolve-multiple-leaks-in-glusterd-code-pat.patch Patch0509: 0509-core-heketi-cli-is-throwing-error-target-is-busy.patch Patch0510: 0510-features-shard-Ref-shard-inode-while-adding-to-fsync.patch +Patch0511: 0511-Update-rfc.sh-to-rhgs-3.4.4.patch +Patch0512: 0512-Test-fixes-for-rhgs-3.4-downstream.patch +Patch0513: 0513-core-heketi-cli-is-throwing-error-target-is-busy.patch +Patch0514: 0514-glusterd-display-gluster-volume-status-when-quorum-t.patch +Patch0515: 0515-cli-change-the-warning-message.patch +Patch0516: 0516-geo-rep-Fix-permissions-with-non-root-setup.patch +Patch0517: 0517-geo-rep-validate-the-config-checkpoint-date-format.patch +Patch0518: 0518-logging-create-parent-dir-if-not-available.patch +Patch0519: 0519-cluster-dht-Delete-invalid-linkto-files-in-rmdir.patch +Patch0520: 0520-spec-avoid-creation-of-temp-file-in-lua-script.patch +Patch0521: 0521-rpc-use-address-family-option-from-vol-file.patch +Patch0522: 0522-fuse-add-lru-limit-option.patch +Patch0523: 0523-libglusterfs-rename-macros-roof-and-floor-to-not-con.patch +Patch0524: 0524-program-GF-DUMP-Shield-ping-processing-from-traffic-.patch +Patch0525: 0525-rpcsvc-provide-each-request-handler-thread-its-own-q.patch +Patch0526: 0526-fuse-interrupt-handling-framework.patch +Patch0527: 0527-fuse-diagnostic-FLUSH-interrupt.patch +Patch0528: 0528-locks-handle-clear-locks-xattr-in-fgetxattr-too.patch +Patch0529: 0529-fuse-SETLKW-interrupt.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -1225,7 +1244,7 @@ while read p do # if the destination file exists, its most probably stale # so we must remove it - rename_to=( $(egrep '^rename to' $p | cut -f 3 -d ' ') ) + rename_to=( $(grep -i 'rename to' $p | cut -f 3 -d ' ') ) if [ ${#rename_to[*]} -gt 0 ]; then for f in ${rename_to[*]} do @@ -2205,12 +2224,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2247,12 +2261,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2289,12 +2298,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2331,12 +2335,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2373,12 +2372,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2415,12 +2409,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2457,12 +2446,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2500,12 +2484,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2543,12 +2522,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2586,12 +2560,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2630,12 +2599,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2673,12 +2637,7 @@ fi -- Since we run pretrans scripts only for RPMs built for a server build, -- we can now use os.tmpname() since it is available on RHEL6 and later -- platforms which are server platforms. -tmpname = os.tmpname() -tmpfile = io.open(tmpname, "w") -tmpfile:write(script) -tmpfile:close() -ok, how, val = os.execute("/bin/bash " .. tmpname) -os.remove(tmpname) +ok, how, val = os.execute("/bin/bash -c \"" .. script .. "\"") if not (ok == 0) then error("Detected running glusterfs processes", ok) end @@ -2735,6 +2694,10 @@ fi %endif %changelog +* Tue Feb 12 2019 Milind Changire - 3.12.2-41 +- fixes bugs bz#1390151 bz#1410145 bz#1429190 bz#1510752 bz#1511779 + bz#1570958 bz#1574490 bz#1595246 bz#1618669 bz#1661393 bz#1668989 bz#1669020 + * Fri Jan 25 2019 Milind Changire - 3.12.2-40 - fixes bugs bz#1668304 bz#1669020