glusterfs/0430-features-shard-Perform-shards-deletion-in-the-backgr.patch
Milind Changire b7dd6f45c1 autobuild v3.12.2-26
Resolves: bz#1479446 bz#1520882 bz#1579758 bz#1598407 bz#1599808
Resolves: bz#1603118 bz#1619357 bz#1622001 bz#1622308 bz#1631166
Resolves: bz#1631418 bz#1632563 bz#1634649 bz#1635071 bz#1635100
Resolves: bz#1635136 bz#1636291 bz#1638069 bz#1640347 bz#1642854
Resolves: bz#1643035 bz#1644120 bz#1644279 bz#1645916 bz#1647675
Signed-off-by: Milind Changire <mchangir@redhat.com>
2018-11-08 22:30:35 -05:00

1791 lines
76 KiB
Diff

From 93ef66173442aaf4aeaeb161c6d6108eda54014a Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Thu, 12 Apr 2018 15:47:00 +0530
Subject: [PATCH 430/444] features/shard: Perform shards deletion in the
background
> Upstream: https://review.gluster.org/19970
> BUG: 1568521
> Change-Id: Ia83117230c9dd7d0d9cae05235644f8475e97bc3
A synctask is created that would scan the indices from
.shard/.remove_me, to delete the shards associated with the
gfid corresponding to the index bname and the rate of deletion
is controlled by the option features.shard-deletion-rate whose
default value is 100.
The task is launched on two accounts:
1. when shard receives its first-ever lookup on the volume
2. when a rename or unlink deleted an inode
Change-Id: Ia83117230c9dd7d0d9cae05235644f8475e97bc3
BUG: 1520882
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/154864
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/globals.h | 1 +
tests/bugs/shard/bug-1568521-EEXIST.t | 30 +-
tests/bugs/shard/bug-1568521.t | 53 ++
tests/bugs/shard/bug-shard-discard.t | 19 +-
tests/bugs/shard/shard-inode-refcount-test.t | 5 +-
tests/bugs/shard/unlinks-and-renames.t | 123 ++--
xlators/features/shard/src/shard-messages.h | 18 +-
xlators/features/shard/src/shard.c | 816 +++++++++++++++++++-----
xlators/features/shard/src/shard.h | 19 +-
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 5 +
10 files changed, 829 insertions(+), 260 deletions(-)
create mode 100644 tests/bugs/shard/bug-1568521.t
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index 8e218cb..699e73e 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -109,6 +109,7 @@
#define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */
+#define GD_OP_VERSION_4_2_0 40200 /* Op-version for GlusterFs 4.2.0 */
/* Downstream only change */
#define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */
diff --git a/tests/bugs/shard/bug-1568521-EEXIST.t b/tests/bugs/shard/bug-1568521-EEXIST.t
index e4c3d41..7de400d 100644
--- a/tests/bugs/shard/bug-1568521-EEXIST.t
+++ b/tests/bugs/shard/bug-1568521-EEXIST.t
@@ -5,6 +5,12 @@
cleanup
+function get_file_count {
+ ls $1* | wc -l
+}
+
+FILE_COUNT_TIME=5
+
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
@@ -41,10 +47,14 @@ TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x000000000050000000000000
sleep 2
TEST unlink $M0/dir/file
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_file
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_file
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_file
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_file
+
+TEST ! stat $B0/${V0}0/dir/file
+TEST ! stat $B0/${V0}1/dir/file
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_file
##############################
### Repeat test for rename ###
@@ -71,9 +81,13 @@ TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x000000000050000000000000
sleep 2
TEST mv -f $M0/src $M0/dir/dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+TEST ! stat $B0/${V0}0/src
+TEST ! stat $B0/${V0}1/src
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
cleanup
diff --git a/tests/bugs/shard/bug-1568521.t b/tests/bugs/shard/bug-1568521.t
new file mode 100644
index 0000000..167fb63
--- /dev/null
+++ b/tests/bugs/shard/bug-1568521.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+
+function delete_files {
+ local mountpoint=$1;
+ local success=0;
+ local value=$2
+ for i in {1..500}; do
+ unlink $mountpoint/file-$i 2>/dev/null 1>/dev/null
+ if [ $? -eq 0 ]; then
+ echo $2 >> $B0/output.txt
+ fi
+ done
+ echo $success
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
+
+for i in {1..500}; do
+ dd if=/dev/urandom of=$M0/file-$i bs=1M count=2
+done
+
+for i in {1..500}; do
+ stat $M1/file-$i > /dev/null
+done
+
+delete_files $M0 0 &
+delete_files $M1 1 &
+wait
+
+success1=$(grep 0 $B0/output.txt | wc -l);
+success2=$(grep 1 $B0/output.txt | wc -l);
+
+echo "Success1 is $success1";
+echo "Success2 is $success2";
+
+success_total=$((success1 + success2));
+
+EXPECT 500 echo $success_total
+
+cleanup
diff --git a/tests/bugs/shard/bug-shard-discard.t b/tests/bugs/shard/bug-shard-discard.t
index 884d9e7..910ade1 100644
--- a/tests/bugs/shard/bug-shard-discard.t
+++ b/tests/bugs/shard/bug-shard-discard.t
@@ -5,6 +5,12 @@
cleanup
+FILE_COUNT_TIME=5
+
+function get_shard_count {
+ ls $1/$2.* | wc -l
+}
+
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3}
@@ -42,14 +48,11 @@ EXPECT_NOT "1" file_all_zeroes `find $B0 -name $gfid_foo.1`
# Now unlink the file. And ensure that all shards associated with the file are cleaned up
TEST unlink $M0/foo
-#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1
-#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1
-#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.1
-#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.1
-#TEST ! stat $B0/${V0}0/.shard/$gfid_foo.2
-#TEST ! stat $B0/${V0}1/.shard/$gfid_foo.2
-#TEST ! stat $B0/${V0}2/.shard/$gfid_foo.2
-#TEST ! stat $B0/${V0}3/.shard/$gfid_foo.2
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}0/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}1/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}2/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}3/.shard $gfid_foo
TEST ! stat $M0/foo
#clean up everything
diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t
index c92dc07..087c8ba 100644
--- a/tests/bugs/shard/shard-inode-refcount-test.t
+++ b/tests/bugs/shard/shard-inode-refcount-test.t
@@ -5,6 +5,8 @@
cleanup
+SHARD_COUNT_TIME=5
+
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 $H0:$B0/${V0}0
@@ -18,7 +20,8 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23
ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0)
TEST rm -f $M0/one-plus-five-shards
-#EXPECT `expr $ACTIVE_INODES_BEFORE - 4` get_mount_active_size_value $V0
+# Expect 5 inodes less. But one inode more than before because .remove_me would be created.
+EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST $CLI volume stop $V0
diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t
index 997c397..6e5164f 100644
--- a/tests/bugs/shard/unlinks-and-renames.t
+++ b/tests/bugs/shard/unlinks-and-renames.t
@@ -9,6 +9,12 @@ cleanup
# and rename fops in sharding and make sure they work fine.
#
+FILE_COUNT_TIME=5
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
#################################################
################### UNLINK ######################
#################################################
@@ -36,13 +42,8 @@ gfid_foo=$(get_gfid_string $M0/dir/foo)
TEST unlink $M0/dir/foo
TEST stat $B0/${V0}0/.shard/.remove_me
TEST stat $B0/${V0}1/.shard/.remove_me
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
##################################################
##### Unlink of a sharded file without holes #####
@@ -56,20 +57,14 @@ TEST stat $B0/${V0}1/.shard/$gfid_new.1
TEST stat $B0/${V0}0/.shard/$gfid_new.2
TEST stat $B0/${V0}1/.shard/$gfid_new.2
TEST unlink $M0/dir/new
-#TEST ! stat $B0/${V0}0/.shard/$gfid_new.1
-#TEST ! stat $B0/${V0}1/.shard/$gfid_new.1
-#TEST ! stat $B0/${V0}0/.shard/$gfid_new.2
-#TEST ! stat $B0/${V0}1/.shard/$gfid_new.2
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_new
TEST ! stat $M0/dir/new
TEST ! stat $B0/${V0}0/dir/new
TEST ! stat $B0/${V0}1/dir/new
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_new
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_new
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_new
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_new
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_new
#######################################
##### Unlink with /.shard present #####
#######################################
@@ -83,13 +78,8 @@ TEST unlink $M0/dir/foo
TEST ! stat $B0/${V0}0/dir/foo
TEST ! stat $B0/${V0}1/dir/foo
TEST ! stat $M0/dir/foo
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
#############################################################
##### Unlink of a file with only one block (the zeroth) #####
@@ -102,13 +92,9 @@ TEST unlink $M0/dir/foo
TEST ! stat $B0/${V0}0/dir/foo
TEST ! stat $B0/${V0}1/dir/foo
TEST ! stat $M0/dir/foo
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_foo
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_foo
-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_foo
####################################################
##### Unlink of a sharded file with hard-links #####
####################################################
@@ -137,22 +123,15 @@ TEST stat $B0/${V0}0/link
TEST stat $B0/${V0}1/link
# Now delete the last link.
TEST unlink $M0/link
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_original
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_original
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_original
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_original
# Ensure that the shards are all cleaned up.
-#TEST ! stat $B0/${V0}0/.shard/$gfid_original.1
-#TEST ! stat $B0/${V0}1/.shard/$gfid_original.1
-#TEST ! stat $B0/${V0}0/.shard/$gfid_original.2
-#TEST ! stat $B0/${V0}1/.shard/$gfid_original.2
-#TEST ! stat $M0/link
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_original
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_original
+TEST ! stat $M0/link
TEST ! stat $B0/${V0}0/link
TEST ! stat $B0/${V0}1/link
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_original
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_original
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_original
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_original
-
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST $CLI volume stop $V0
TEST $CLI volume delete $V0
@@ -190,13 +169,8 @@ TEST ! stat $B0/${V0}0/dir/src
TEST ! stat $B0/${V0}1/dir/src
TEST stat $B0/${V0}0/dir/dst
TEST stat $B0/${V0}1/dir/dst
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
##################################################
##### Rename to a sharded file without holes #####
@@ -212,23 +186,16 @@ TEST stat $B0/${V0}1/.shard/$gfid_dst.1
TEST stat $B0/${V0}0/.shard/$gfid_dst.2
TEST stat $B0/${V0}1/.shard/$gfid_dst.2
TEST mv -f $M0/dir/src $M0/dir/dst
-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
TEST ! stat $M0/dir/src
TEST stat $M0/dir/dst
TEST ! stat $B0/${V0}0/dir/src
TEST ! stat $B0/${V0}1/dir/src
TEST stat $B0/${V0}0/dir/dst
TEST stat $B0/${V0}1/dir/dst
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
###################################################
##### Rename of dst file with /.shard present #####
@@ -245,13 +212,8 @@ TEST ! stat $B0/${V0}0/dir/src
TEST ! stat $B0/${V0}1/dir/src
TEST stat $B0/${V0}0/dir/dst
TEST stat $B0/${V0}1/dir/dst
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000500000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
###############################################################
##### Rename of dst file with only one block (the zeroth) #####
@@ -268,13 +230,8 @@ TEST ! stat $B0/${V0}0/dir/src
TEST ! stat $B0/${V0}1/dir/src
TEST stat $B0/${V0}0/dir/dst
TEST stat $B0/${V0}1/dir/dst
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
-
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000100000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
########################################################
##### Rename to a dst sharded file with hard-links #####
@@ -307,20 +264,18 @@ TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
TEST touch $M0/dir/src2
TEST mv -f $M0/dir/src2 $M0/link
# Ensure that the shards are all cleaned up.
-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
-#TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
-#TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
+TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
+TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
+TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
+TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
TEST ! stat $M0/dir/src2
TEST ! stat $B0/${V0}0/dir/src2
TEST ! stat $B0/${V0}1/dir/src2
-TEST stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
-TEST stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/.shard/.remove_me/$gfid_dst
-EXPECT "0000000000900000000000000000000000000000000000000000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/.shard/.remove_me/$gfid_dst
# Rename with non-existent dst and a sharded src
TEST touch $M0/dir/src
TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=9216
diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h
index 0267f8a..bc04e5e 100644
--- a/xlators/features/shard/src/shard-messages.h
+++ b/xlators/features/shard/src/shard-messages.h
@@ -40,7 +40,7 @@
*/
#define GLFS_COMP_BASE_SHARD GLFS_MSGID_COMP_SHARD
-#define GLFS_NUM_MESSAGES 20
+#define GLFS_NUM_MESSAGES 22
#define GLFS_MSGID_END (GLFS_COMP_BASE_SHARD + GLFS_NUM_MESSAGES + 1)
#define glfs_msg_start_x GLFS_COMP_BASE_SHARD, "Invalid: Start of messages"
@@ -58,7 +58,7 @@
* @diagnosis
* @recommendedaction
*/
-#define SHARD_MSG_DICT_SET_FAILED (GLFS_COMP_BASE_SHARD + 2)
+#define SHARD_MSG_DICT_OP_FAILED (GLFS_COMP_BASE_SHARD + 2)
/*!
@@ -194,5 +194,19 @@
*/
#define SHARD_MSG_FOP_FAILED (GLFS_COMP_BASE_SHARD + 20)
+/*!
+ * @messageid 133021
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_SHARDS_DELETION_FAILED (GLFS_COMP_BASE_SHARD + 21)
+
+/*!
+ * @messageid 133022
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_SHARDS_DELETION_COMPLETED (GLFS_COMP_BASE_SHARD + 22)
+
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* !_SHARD_MESSAGES_H_ */
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 492341c..2faf711 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -677,7 +677,8 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
* keep it alive by holding a ref on it.
*/
inode_ref (linked_inode);
- gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
+ if (base_inode)
+ gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
ctx->block_num = block_num;
list_add_tail (&ctx->ilist, &priv->ilist_head);
priv->inode_count++;
@@ -738,7 +739,8 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
* keep it alive by holding a ref on it.
*/
inode_ref (linked_inode);
- gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
+ if (base_inode)
+ gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
ctx->block_num = block_num;
ctx->base_inode = base_inode;
list_add_tail (&ctx->ilist, &priv->ilist_head);
@@ -977,6 +979,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
int i = -1;
uint32_t shard_idx_iter = 0;
char path[PATH_MAX] = {0,};
+ uuid_t gfid = {0,};
inode_t *inode = NULL;
inode_t *res_inode = NULL;
inode_t *fsync_inode = NULL;
@@ -988,6 +991,10 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
local->call_count = 0;
shard_idx_iter = local->first_block;
res_inode = local->resolver_base_inode;
+ if (res_inode)
+ gf_uuid_copy (gfid, res_inode->gfid);
+ else
+ gf_uuid_copy (gfid, local->base_gfid);
if ((local->op_ret < 0) || (local->resolve_not))
goto out;
@@ -1000,7 +1007,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
continue;
}
- shard_make_block_abspath (shard_idx_iter, res_inode->gfid, path,
+ shard_make_block_abspath (shard_idx_iter, gfid, path,
sizeof(path));
inode = NULL;
@@ -1147,7 +1154,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr,
8 * 4);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
"Failed to set key %s into dict. gfid=%s",
GF_XATTR_SHARD_FILE_SIZE, uuid_utoa (inode->gfid));
GF_FREE (size_attr);
@@ -1376,7 +1383,7 @@ shard_lookup_internal_dir (call_frame_t *frame, xlator_t *this,
ret = dict_set_bin (xattr_req, "gfid-req", *gfid, 16);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
"Failed to set gfid of %s into dict",
shard_internal_dir_string (type));
local->op_ret = -1;
@@ -1431,10 +1438,49 @@ shard_inode_ctx_update (inode_t *inode, xlator_t *this, dict_t *xdata,
}
int
+shard_delete_shards (void *opaque);
+
+int
+shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data);
+
+int
+shard_start_background_deletion (xlator_t *this)
+{
+ int ret = 0;
+ call_frame_t *cleanup_frame = NULL;
+
+ cleanup_frame = create_frame (this, this->ctx->pool);
+ if (!cleanup_frame) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ SHARD_MSG_MEMALLOC_FAILED, "Failed to create "
+ "new frame to delete shards");
+ return -ENOMEM;
+ }
+
+ ret = synctask_new (this->ctx->env, shard_delete_shards,
+ shard_delete_shards_cbk, cleanup_frame,
+ cleanup_frame);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ SHARD_MSG_SHARDS_DELETION_FAILED,
+ "failed to create task to do background "
+ "cleanup of shards");
+ STACK_DESTROY (cleanup_frame->root);
+ }
+ return ret;
+}
+
+int
shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
+ int ret = 0;
+ shard_priv_t *priv = NULL;
+ gf_boolean_t i_start_cleanup = _gf_false;
+
+ priv = this->private;
+
if (op_ret < 0)
goto unwind;
@@ -1460,6 +1506,25 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
(void) shard_inode_ctx_update (inode, this, xdata, buf);
+ LOCK (&priv->lock);
+ {
+ if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) {
+ priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS;
+ i_start_cleanup = _gf_true;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (i_start_cleanup) {
+ ret = shard_start_background_deletion (this);
+ if (ret) {
+ LOCK (&priv->lock);
+ {
+ priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
+ }
+ UNLOCK (&priv->lock);
+ }
+ }
unwind:
SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
xdata, postparent);
@@ -1475,6 +1540,7 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
uint64_t block_size = 0;
shard_local_t *local = NULL;
+ this->itable = loc->inode->table;
if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
SHARD_ENTRY_FOP_CHECK (loc, op_errno, err);
}
@@ -1496,7 +1562,7 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
GF_XATTR_SHARD_BLOCK_SIZE, 0);
if (ret) {
gf_msg (this->name, GF_LOG_WARNING, 0,
- SHARD_MSG_DICT_SET_FAILED, "Failed to set dict"
+ SHARD_MSG_DICT_OP_FAILED, "Failed to set dict"
" value: key:%s for path %s",
GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
goto err;
@@ -1508,7 +1574,7 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
if (ret) {
gf_msg (this->name, GF_LOG_WARNING, 0,
- SHARD_MSG_DICT_SET_FAILED,
+ SHARD_MSG_DICT_OP_FAILED,
"Failed to set dict value: key:%s for path %s.",
GF_XATTR_SHARD_FILE_SIZE, loc->path);
goto err;
@@ -1901,12 +1967,6 @@ shard_truncate_last_shard (call_frame_t *frame, xlator_t *this, inode_t *inode)
return 0;
}
-int
-shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata);
-
void
shard_unlink_block_inode (shard_local_t *local, int shard_block_num);
@@ -1941,17 +2001,17 @@ done:
int
shard_truncate_htol (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- int i = 1;
- int ret = -1;
- int call_count = 0;
- uint32_t cur_block = 0;
- uint32_t last_block = 0;
- char path[PATH_MAX] = {0,};
- char *bname = NULL;
- loc_t loc = {0,};
- gf_boolean_t wind_failed = _gf_false;
- shard_local_t *local = NULL;
- shard_priv_t *priv = NULL;
+ int i = 1;
+ int ret = -1;
+ int call_count = 0;
+ uint32_t cur_block = 0;
+ uint32_t last_block = 0;
+ char path[PATH_MAX] = {0,};
+ char *bname = NULL;
+ loc_t loc = {0,};
+ gf_boolean_t wind_failed = _gf_false;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
local = frame->local;
priv = this->private;
@@ -2086,6 +2146,7 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode,
{
int list_index = 0;
char block_bname[256] = {0,};
+ uuid_t gfid = {0,};
inode_t *linked_inode = NULL;
xlator_t *this = NULL;
inode_t *fsync_inode = NULL;
@@ -2093,9 +2154,12 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode,
this = THIS;
priv = this->private;
+ if (local->loc.inode)
+ gf_uuid_copy (gfid, local->loc.inode->gfid);
+ else
+ gf_uuid_copy (gfid, local->base_gfid);
- shard_make_block_bname (block_num, (local->loc.inode)->gfid,
- block_bname, sizeof (block_bname));
+ shard_make_block_bname (block_num, gfid, block_bname, sizeof (block_bname));
shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK);
linked_inode = inode_link (inode, priv->dot_shard_inode, block_bname,
@@ -2125,9 +2189,14 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
{
int call_count = 0;
int shard_block_num = (long) cookie;
+ uuid_t gfid = {0,};
shard_local_t *local = NULL;
local = frame->local;
+ if (local->resolver_base_inode)
+ gf_uuid_copy (gfid, local->resolver_base_inode->gfid);
+ else
+ gf_uuid_copy (gfid, local->base_gfid);
if (op_ret < 0) {
/* Ignore absence of shards in the backend in truncate fop. */
@@ -2162,9 +2231,7 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
gf_msg (this->name, GF_LOG_ERROR, op_errno,
SHARD_MSG_LOOKUP_SHARD_FAILED, "Lookup on shard %d "
"failed. Base file gfid = %s", shard_block_num,
- (local->fop == GF_FOP_RENAME) ?
- uuid_utoa (local->loc2.inode->gfid)
- : uuid_utoa (local->loc.inode->gfid));
+ uuid_utoa (gfid));
local->op_ret = op_ret;
local->op_errno = op_errno;
goto done;
@@ -2173,25 +2240,18 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
shard_link_block_inode (local, shard_block_num, inode, buf);
done:
- call_count = shard_call_count_return (frame);
if (local->lookup_shards_barriered) {
syncbarrier_wake (&local->barrier);
return 0;
} else {
+ call_count = shard_call_count_return (frame);
if (call_count == 0) {
if (!local->first_lookup_done)
local->first_lookup_done = _gf_true;
- if (local->op_ret < 0)
- goto unwind;
- else
- local->pls_fop_handler (frame, this);
+ local->pls_fop_handler (frame, this);
}
}
return 0;
-
-unwind:
- local->pls_fop_handler (frame, this);
- return 0;
}
dict_t*
@@ -2237,6 +2297,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
int last_block = 0;
char path[PATH_MAX] = {0,};
char *bname = NULL;
+ uuid_t gfid = {0,};
loc_t loc = {0,};
shard_local_t *local = NULL;
shard_priv_t *priv = NULL;
@@ -2252,6 +2313,11 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
if (local->lookup_shards_barriered)
local->barrier.waitfor = local->call_count;
+ if (inode)
+ gf_uuid_copy (gfid, inode->gfid);
+ else
+ gf_uuid_copy (gfid, local->base_gfid);
+
while (shard_idx_iter <= last_block) {
if (local->inode_list[i]) {
i++;
@@ -2267,7 +2333,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
goto next;
}
- shard_make_block_abspath (shard_idx_iter, inode->gfid, path,
+ shard_make_block_abspath (shard_idx_iter, gfid, path,
sizeof(path));
bname = strrchr (path, '/') + 1;
@@ -2279,7 +2345,7 @@ shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode,
gf_msg (this->name, GF_LOG_ERROR, 0,
SHARD_MSG_INODE_PATH_FAILED, "Inode path failed"
" on %s, base file gfid = %s", bname,
- uuid_utoa (inode->gfid));
+ uuid_utoa (gfid));
local->op_ret = -1;
local->op_errno = ENOMEM;
loc_wipe (&loc);
@@ -2322,8 +2388,10 @@ next:
if (!--call_count)
break;
}
- if (local->lookup_shards_barriered)
+ if (local->lookup_shards_barriered) {
syncbarrier_wait (&local->barrier, count);
+ local->pls_fop_handler (frame, this);
+ }
return 0;
}
@@ -2779,8 +2847,9 @@ shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this)
local = frame->local;
if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
- shard_common_failure_unwind (local->fop, frame, local->op_ret,
- local->op_errno);
+ gf_msg (this->name, GF_LOG_ERROR, local->op_errno,
+ SHARD_MSG_FOP_FAILED, "failed to delete shards of %s",
+ uuid_utoa (local->resolver_base_inode->gfid));
return 0;
}
local->op_ret = 0;
@@ -2791,41 +2860,12 @@ shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this)
}
int
-shard_rename_cbk (call_frame_t *frame, xlator_t *this);
-
-int32_t
-shard_unlink_cbk (call_frame_t *frame, xlator_t *this);
-
-int
shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this)
{
shard_local_t *local = NULL;
local = frame->local;
-
- if (local->op_ret < 0) {
- if (local->op_errno == ENOENT) {
- /* If lookup on /.shard fails with ENOENT, it probably
- * means that the file is being unlinked before it
- * could grow beyond its first block. In this case,
- * unlink boils down to unlinking the base file and
- * unwinding the call.
- */
- local->op_ret = 0;
- local->first_block = local->last_block = 0;
- local->num_blocks = 1;
- if (local->fop == GF_FOP_UNLINK)
- shard_unlink_cbk (frame, this);
- else
- shard_rename_cbk (frame, this);
- return 0;
- } else {
- shard_common_failure_unwind (local->fop, frame,
- local->op_ret,
- local->op_errno);
- return 0;
- }
- }
+ local->lookup_shards_barriered = _gf_true;
if (!local->call_count)
shard_unlink_shards_do (frame, this,
@@ -2841,6 +2881,7 @@ void
shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
{
char block_bname[256] = {0,};
+ uuid_t gfid = {0,};
inode_t *inode = NULL;
inode_t *base_inode = NULL;
xlator_t *this = NULL;
@@ -2854,12 +2895,17 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
inode = local->inode_list[shard_block_num - local->first_block];
base_inode = local->resolver_base_inode;
+ if (base_inode)
+ gf_uuid_copy (gfid, base_inode->gfid);
+ else
+ gf_uuid_copy (gfid, local->base_gfid);
- shard_make_block_bname (shard_block_num, (local->loc.inode)->gfid,
+ shard_make_block_bname (shard_block_num, gfid,
block_bname, sizeof (block_bname));
LOCK(&priv->lock);
- LOCK(&base_inode->lock);
+ if (base_inode)
+ LOCK(&base_inode->lock);
LOCK(&inode->lock);
{
__shard_inode_ctx_get (inode, this, &ctx);
@@ -2870,14 +2916,18 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
unlink_unref_forget = _gf_true;
}
if (ctx->fsync_needed) {
- inode_unref (base_inode);
+ if (base_inode)
+ inode_unref (base_inode);
list_del_init (&ctx->to_fsync_list);
- __shard_inode_ctx_get (base_inode, this, &base_ictx);
- base_ictx->fsync_count--;
+ if (base_inode) {
+ __shard_inode_ctx_get (base_inode, this, &base_ictx);
+ base_ictx->fsync_count--;
+ }
}
}
UNLOCK(&inode->lock);
- UNLOCK(&base_inode->lock);
+ if (base_inode)
+ UNLOCK(&base_inode->lock);
if (unlink_unref_forget) {
inode_unlink (inode, priv->dot_shard_inode, block_bname);
inode_unref (inode);
@@ -2887,7 +2937,18 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
}
int
-shard_rename_cbk (call_frame_t *frame, xlator_t *this);
+shard_rename_cbk (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->preoldparent,
+ &local->postoldparent, &local->prenewparent,
+ &local->postnewparent, local->xattr_rsp);
+ return 0;
+}
int32_t
shard_unlink_cbk (call_frame_t *frame, xlator_t *this)
@@ -2906,7 +2967,6 @@ shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
- int call_count = 0;
int shard_block_num = (long) cookie;
shard_local_t *local = NULL;
@@ -2919,22 +2979,8 @@ shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
shard_unlink_block_inode (local, shard_block_num);
-
done:
- call_count = shard_call_count_return (frame);
- if (local->unlink_shards_barriered) {
- syncbarrier_wake (&local->barrier);
- } else {
-
- if (call_count == 0) {
- SHARD_UNSET_ROOT_FS_ID (frame, local);
-
- if (local->fop == GF_FOP_UNLINK)
- shard_unlink_cbk (frame, this);
- else if (local->fop == GF_FOP_RENAME)
- shard_rename_cbk (frame, this);
- }
- }
+ syncbarrier_wake (&local->barrier);
return 0;
}
@@ -2944,11 +2990,11 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
int i = 0;
int ret = -1;
int count = 0;
- int call_count = 0;
- uint32_t last_block = 0;
uint32_t cur_block = 0;
+ uint32_t cur_block_idx = 0;/*this is idx into inode_list[] array */
char *bname = NULL;
char path[PATH_MAX] = {0,};
+ uuid_t gfid = {0,};
loc_t loc = {0,};
gf_boolean_t wind_failed = _gf_false;
shard_local_t *local = NULL;
@@ -2957,16 +3003,12 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
priv = this->private;
local = frame->local;
- /* local->num_blocks includes the base file block. This function only
- * deletes the shards under /.shard. So subtract num_blocks by 1.
- */
- local->call_count = call_count = local->num_blocks - 1;
- last_block = local->last_block;
+ if (inode)
+ gf_uuid_copy (gfid, inode->gfid);
+ else
+ gf_uuid_copy (gfid, local->base_gfid);
- /* Ignore the inode associated with the base file and start counting
- * from 1.
- */
- for (i = 1; i < local->num_blocks; i++) {
+ for (i = 0; i < local->num_blocks; i++) {
if (!local->inode_list[i])
continue;
count++;
@@ -2975,35 +3017,21 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
if (!count) {
/* callcount = 0 implies that all of the shards that need to be
* unlinked are non-existent (in other words the file is full of
- * holes). So shard xlator can simply return the fop to its
- * parent now.
+ * holes).
*/
gf_msg_debug (this->name, 0, "All shards that need to be "
"unlinked are non-existent: %s",
- uuid_utoa (inode->gfid));
- local->num_blocks = 1;
- if (local->fop == GF_FOP_UNLINK) {
- shard_unlink_cbk (frame, this);
- } else if (local->fop == GF_FOP_RENAME) {
- gf_msg_debug (this->name, 0, "Resuming rename()");
- shard_rename_cbk (frame, this);
- }
+ uuid_utoa (gfid));
return 0;
}
- local->call_count = call_count = count;
- cur_block = 1;
SHARD_SET_ROOT_FS_ID (frame, local);
- if (local->unlink_shards_barriered)
- local->barrier.waitfor = count;
+ local->barrier.waitfor = count;
+ cur_block = cur_block_idx + local->first_block;
- /* Ignore the base file and start iterating from the first block shard.
- */
- while (cur_block <= last_block) {
- if (!local->inode_list[cur_block]) {
- cur_block++;
- continue;
- }
+ while (cur_block_idx < local->num_blocks) {
+ if (!local->inode_list[cur_block_idx])
+ goto next;
if (wind_failed) {
shard_unlink_shards_do_cbk (frame,
@@ -3013,8 +3041,7 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
goto next;
}
- shard_make_block_abspath (cur_block, inode->gfid, path,
- sizeof (path));
+ shard_make_block_abspath (cur_block, gfid, path, sizeof (path));
bname = strrchr (path, '/') + 1;
loc.parent = inode_ref (priv->dot_shard_inode);
ret = inode_path (loc.parent, bname, (char **) &(loc.path));
@@ -3022,7 +3049,7 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
gf_msg (this->name, GF_LOG_ERROR, 0,
SHARD_MSG_INODE_PATH_FAILED, "Inode path failed"
" on %s, base file gfid = %s", bname,
- uuid_utoa (inode->gfid));
+ uuid_utoa (gfid));
local->op_ret = -1;
local->op_errno = ENOMEM;
loc_wipe (&loc);
@@ -3037,26 +3064,505 @@ shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode)
loc.name = strrchr (loc.path, '/');
if (loc.name)
loc.name++;
- loc.inode = inode_ref (local->inode_list[cur_block]);
+ loc.inode = inode_ref (local->inode_list[cur_block_idx]);
STACK_WIND_COOKIE (frame, shard_unlink_shards_do_cbk,
(void *) (long) cur_block, FIRST_CHILD(this),
FIRST_CHILD (this)->fops->unlink, &loc,
local->xflag, local->xattr_req);
loc_wipe (&loc);
-
next:
cur_block++;
- if (!--call_count)
- break;
+ cur_block_idx++;
}
- if (local->unlink_shards_barriered)
- syncbarrier_wait (&local->barrier, count);
+ syncbarrier_wait (&local->barrier, count);
+ SHARD_UNSET_ROOT_FS_ID (frame, local);
+ return 0;
+}
+
+int
+shard_regulated_shards_deletion (call_frame_t *cleanup_frame, xlator_t *this,
+ int now, int first_block, gf_dirent_t *entry)
+{
+ int i = 0;
+ int ret = 0;
+ shard_local_t *local = NULL;
+ uuid_t gfid = {0,};
+
+ local = cleanup_frame->local;
+
+ local->inode_list = GF_CALLOC (now, sizeof (inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list)
+ return -ENOMEM;
+
+ local->first_block = first_block;
+ local->last_block = first_block + now - 1;
+ local->num_blocks = now;
+ gf_uuid_parse (entry->d_name, gfid);
+ gf_uuid_copy (local->base_gfid, gfid);
+ local->resolver_base_inode = inode_find (this->itable, gfid);
+ local->call_count = 0;
+ syncbarrier_init (&local->barrier);
+
+ shard_common_resolve_shards (cleanup_frame, this,
+ shard_post_resolve_unlink_handler);
+
+ for (i = 0; i < local->num_blocks; i++) {
+ if (local->inode_list[i])
+ inode_unref (local->inode_list[i]);
+ }
+ GF_FREE (local->inode_list);
+ local->inode_list = NULL;
+ if (local->op_ret)
+ ret = -local->op_errno;
+ syncbarrier_destroy (&local->barrier);
+ inode_unref (local->resolver_base_inode);
+ local->resolver_base_inode = NULL;
+ STACK_RESET (cleanup_frame->root);
+ return ret;
+}
+
+
+int
+__shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this,
+ gf_dirent_t *entry, inode_t *inode)
+{
+ int ret = 0;
+ int shard_count = 0;
+ int first_block = 0;
+ int now = 0;
+ uint64_t size = 0;
+ uint64_t block_size = 0;
+ uint64_t size_array[4] = {0,};
+ void *bsize = NULL;
+ void *size_attr = NULL;
+ dict_t *xattr_rsp = NULL;
+ loc_t loc = {0,};
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+ priv = this->private;
+ local = cleanup_frame->local;
+ ret = dict_reset (local->xattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to reset dict");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_uint64 (local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set dict value: key:%s",
+ GF_XATTR_SHARD_BLOCK_SIZE);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_uint64 (local->xattr_req, GF_XATTR_SHARD_FILE_SIZE,
+ 8 * 4);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set dict value: key:%s",
+ GF_XATTR_SHARD_FILE_SIZE);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ loc.inode = inode_ref (inode);
+ loc.parent = inode_ref (priv->dot_shard_rm_inode);
+ ret = inode_path (loc.parent, entry->d_name, (char **)&(loc.path));
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed on %s", entry->d_name);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ loc.name = strrchr (loc.path, '/');
+ if (loc.name)
+ loc.name++;
+ ret = syncop_lookup (FIRST_CHILD(this), &loc, NULL, NULL,
+ local->xattr_req, &xattr_rsp);
+ if (ret)
+ goto err;
+
+ ret = dict_get_ptr (xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to get dict value: key:%s",
+ GF_XATTR_SHARD_BLOCK_SIZE);
+ goto err;
+ }
+ block_size = ntoh64 (*((uint64_t *)bsize));
+
+ ret = dict_get_ptr (xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to get dict value: key:%s",
+ GF_XATTR_SHARD_FILE_SIZE);
+ goto err;
+ }
+
+ memcpy (size_array, size_attr, sizeof (size_array));
+ size = ntoh64 (size_array[0]);
+
+ shard_count = (size / block_size) - 1;
+ if (shard_count < 0) {
+ gf_msg_debug (this->name, 0, "Size of %s hasn't grown beyond "
+ "its shard-block-size. Nothing to delete. "
+ "Returning", entry->d_name);
+ /* File size < shard-block-size, so nothing to delete */
+ ret = 0;
+ goto delete_marker;
+ }
+ if ((size % block_size) > 0)
+ shard_count++;
+
+ if (shard_count == 0) {
+ gf_msg_debug (this->name, 0, "Size of %s is exactly equal to "
+ "its shard-block-size. Nothing to delete. "
+ "Returning", entry->d_name);
+ ret = 0;
+ goto delete_marker;
+ }
+ gf_msg_debug (this->name, 0, "base file = %s, "
+ "shard-block-size=%"PRIu64", file-size=%"PRIu64", "
+ "shard_count=%d", entry->d_name, block_size, size,
+ shard_count);
+
+ /* Perform a gfid-based lookup to see if gfid corresponding to marker
+ * file's base name exists.
+ */
+ loc_wipe (&loc);
+ loc.inode = inode_new (this->itable);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ gf_uuid_parse (entry->d_name, loc.gfid);
+ ret = syncop_lookup (FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+ if (!ret) {
+ gf_msg_debug (this->name, 0, "Base shard corresponding to gfid "
+ "%s is present. Skipping shard deletion. "
+ "Returning", entry->d_name);
+ ret = 0;
+ goto delete_marker;
+ }
+
+ first_block = 1;
+
+ while (shard_count) {
+ if (shard_count < local->deletion_rate) {
+ now = shard_count;
+ shard_count = 0;
+ } else {
+ now = local->deletion_rate;
+ shard_count -= local->deletion_rate;
+ }
+
+ gf_msg_debug (this->name, 0, "deleting %d shards starting from "
+ "block %d of gfid %s", now, first_block,
+ entry->d_name);
+ ret = shard_regulated_shards_deletion (cleanup_frame, this,
+ now, first_block,
+ entry);
+ if (ret)
+ goto err;
+ first_block += now;
+ }
+
+delete_marker:
+ loc_wipe (&loc);
+ loc.inode = inode_ref (inode);
+ loc.parent = inode_ref (priv->dot_shard_rm_inode);
+ ret = inode_path (loc.parent, entry->d_name, (char **)&(loc.path));
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed on %s", entry->d_name);
+ ret = -ENOMEM;
+ goto err;
+ }
+ loc.name = strrchr (loc.path, '/');
+ if (loc.name)
+ loc.name++;
+ ret = syncop_unlink (FIRST_CHILD(this), &loc, NULL, NULL);
+ if (ret)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_SHARDS_DELETION_FAILED, "Failed to delete %s "
+ "from /%s", entry->d_name, GF_SHARD_REMOVE_ME_DIR);
+err:
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this,
+ gf_dirent_t *entry, inode_t *inode)
+{
+ int ret = -1;
+ loc_t loc = {0,};
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ loc.inode = inode_ref (priv->dot_shard_rm_inode);
+
+ ret = syncop_entrylk (FIRST_CHILD(this), this->name, &loc,
+ entry->d_name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL,
+ NULL);
+ if (ret)
+ goto out;
+ {
+ ret = __shard_delete_shards_of_entry (cleanup_frame, this,
+ entry, inode);
+ }
+ syncop_entrylk (FIRST_CHILD(this), this->name, &loc, entry->d_name,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL);
+out:
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data)
+{
+ xlator_t *this = NULL;
+ shard_priv_t *priv = NULL;
+
+ this = frame->this;
+ priv = this->private;
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ SHARD_MSG_SHARDS_DELETION_FAILED,
+ "Background deletion of shards failed");
+ priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
+ } else {
+ priv->first_lookup = SHARD_FIRST_LOOKUP_DONE;
+ }
+ SHARD_STACK_DESTROY (frame);
return 0;
}
int
+shard_resolve_internal_dir (xlator_t *this, shard_local_t *local,
+ shard_internal_dir_type_t type)
+{
+ int ret = 0;
+ char *bname = NULL;
+ loc_t *loc = NULL;
+ shard_priv_t *priv = NULL;
+ uuid_t gfid = {0,};
+ struct iatt stbuf = {0,};
+
+ priv = this->private;
+
+ switch (type) {
+ case SHARD_INTERNAL_DIR_DOT_SHARD:
+ loc = &local->dot_shard_loc;
+ gf_uuid_copy (gfid, priv->dot_shard_gfid);
+ bname = GF_SHARD_DIR;
+ break;
+ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+ loc = &local->dot_shard_rm_loc;
+ gf_uuid_copy (gfid, priv->dot_shard_rm_gfid);
+ bname = GF_SHARD_REMOVE_ME_DIR;
+ break;
+ default:
+ break;
+ }
+
+ loc->inode = inode_find (this->itable, gfid);
+ if (!loc->inode) {
+ ret = shard_init_internal_dir_loc (this, local, type);
+ if (ret)
+ goto err;
+ ret = dict_reset (local->xattr_req);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ SHARD_MSG_DICT_OP_FAILED, "Failed to reset "
+ "dict");
+ ret = -ENOMEM;
+ goto err;
+ }
+ ret = dict_set_static_bin (local->xattr_req, "gfid-req", gfid,
+ 16);
+ ret = syncop_lookup (FIRST_CHILD(this), loc, &stbuf, NULL,
+ local->xattr_req, NULL);
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ SHARD_MSG_SHARDS_DELETION_FAILED,
+ "Lookup on %s failed, exiting", bname);
+ goto err;
+ } else {
+ shard_link_internal_dir_inode (local,
+ loc->inode, &stbuf,
+ type);
+ }
+ }
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+shard_lookup_marker_entry (xlator_t *this, shard_local_t *local,
+ gf_dirent_t *entry)
+{
+ int ret = 0;
+ loc_t loc = {0,};
+
+ loc.inode = inode_new (this->itable);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ loc.parent = inode_ref (local->fd->inode);
+
+ ret = inode_path (loc.parent, entry->d_name, (char **)&(loc.path));
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed on %s", entry->d_name);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ loc.name = strrchr (loc.path, '/');
+ if (loc.name)
+ loc.name++;
+
+ ret = syncop_lookup (FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+ if (ret < 0) {
+ goto err;
+ }
+ entry->inode = inode_ref (loc.inode);
+ ret = 0;
+err:
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+shard_delete_shards (void *opaque)
+{
+ int ret = 0;
+ off_t offset = 0;
+ loc_t loc = {0,};
+ inode_t *link_inode = NULL;
+ xlator_t *this = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *cleanup_frame = NULL;
+
+ this = THIS;
+ priv = this->private;
+ INIT_LIST_HEAD (&entries.list);
+
+ cleanup_frame = opaque;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ SHARD_MSG_MEMALLOC_FAILED, "Failed to create local to "
+ "delete shards");
+ ret = -ENOMEM;
+ goto err;
+ }
+ cleanup_frame->local = local;
+
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ local->deletion_rate = priv->deletion_rate;
+
+ ret = shard_resolve_internal_dir (this, local,
+ SHARD_INTERNAL_DIR_DOT_SHARD);
+ if (ret == -ENOENT) {
+ gf_msg_debug (this->name, 0, ".shard absent. Nothing to"
+ " delete. Exiting");
+ ret = 0;
+ goto err;
+ } else if (ret < 0) {
+ goto err;
+ }
+
+ ret = shard_resolve_internal_dir (this, local,
+ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+ if (ret == -ENOENT) {
+ gf_msg_debug (this->name, 0, ".remove_me absent. "
+ "Nothing to delete. Exiting");
+ ret = 0;
+ goto err;
+ } else if (ret < 0) {
+ goto err;
+ }
+
+ local->fd = fd_anonymous (local->dot_shard_rm_loc.inode);
+ if (!local->fd) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, 131072,
+ offset, &entries, local->xattr_req,
+ NULL))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (!entry->inode) {
+ ret = shard_lookup_marker_entry (this, local,
+ entry);
+ if (ret < 0)
+ continue;
+ }
+ link_inode = inode_link (entry->inode, local->fd->inode,
+ entry->d_name, &entry->d_stat);
+
+ gf_msg_debug (this->name, 0, "Initiating deletion of "
+ "shards of gfid %s", entry->d_name);
+ ret = shard_delete_shards_of_entry (cleanup_frame, this,
+ entry, link_inode);
+ inode_unlink (link_inode, local->fd->inode,
+ entry->d_name);
+ inode_unref (link_inode);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ SHARD_MSG_SHARDS_DELETION_FAILED,
+ "Failed to clean up shards of gfid %s",
+ entry->d_name);
+ continue;
+ }
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ SHARD_MSG_SHARDS_DELETION_COMPLETED, "Deleted "
+ "shards of gfid=%s from backend",
+ entry->d_name);
+ }
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
+ }
+ ret = 0;
+err:
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
shard_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -3394,7 +3900,10 @@ shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postoldparent = *postparent;
if (xdata)
local->xattr_rsp = dict_ref (xdata);
+ if (local->cleanup_required)
+ shard_start_background_deletion (this);
}
+
if (local->entrylk_frame) {
ret = shard_unlock_entrylk (frame, this);
if (ret < 0) {
@@ -3408,6 +3917,7 @@ shard_unlink_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = -1;
local->op_errno = -ret;
}
+
shard_unlink_cbk (frame, this);
return 0;
}
@@ -3576,6 +4086,7 @@ shard_post_lookup_base_shard_rm_handler (call_frame_t *frame, xlator_t *this)
} else {
gf_msg_debug (this->name, 0, "link count on %s = 1, creating "
"file under .remove_me", local->int_inodelk.loc.path);
+ local->cleanup_required = _gf_true;
shard_acquire_entrylk (frame, this, priv->dot_shard_rm_inode,
local->prebuf.ia_gfid);
}
@@ -3788,20 +4299,6 @@ err:
}
int
-shard_rename_cbk (call_frame_t *frame, xlator_t *this)
-{
- shard_local_t *local = NULL;
-
- local = frame->local;
-
- SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->preoldparent,
- &local->postoldparent, &local->prenewparent,
- &local->postnewparent, local->xattr_rsp);
- return 0;
-}
-
-int
shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this)
{
shard_rename_cbk (frame, this);
@@ -3854,6 +4351,8 @@ shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = -ret;
goto err;
}
+ if (local->cleanup_required)
+ shard_start_background_deletion (this);
}
/* Now the base file of src, if sharded, is looked up to gather ia_size
@@ -4822,7 +5321,7 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
if (dict_set_uint32 (local->xattr_req,
GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
- gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
"Failed to set "GLUSTERFS_WRITE_UPDATE_ATOMIC" into "
"dict: %s", uuid_utoa (fd->inode->gfid));
local->op_ret = -1;
@@ -5141,7 +5640,7 @@ shard_mkdir_internal_dir (call_frame_t *frame, xlator_t *this,
ret = dict_set_bin (xattr_req, "gfid-req", *gfid, 16);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED,
+ gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
"Failed to set gfid-req for %s",
shard_internal_dir_string (type));
goto err;
@@ -6186,6 +6685,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("shard-block-size", priv->block_size, size_uint64, out);
+ GF_OPTION_INIT ("shard-deletion-rate", priv->deletion_rate, uint32, out);
+
this->local_pool = mem_pool_new (shard_local_t, 128);
if (!this->local_pool) {
ret = -1;
@@ -6241,6 +6742,8 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("shard-block-size", priv->block_size, options, size,
out);
+ GF_OPTION_RECONF ("shard-deletion-rate", priv->deletion_rate, options,
+ uint32, out);
ret = 0;
out:
@@ -6364,5 +6867,12 @@ struct volume_options options[] = {
.description = "The size unit used to break a file into multiple "
"chunks",
},
+ { .key = {"shard-deletion-rate"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "100",
+ .min = 100,
+ .max = INT_MAX,
+ .description = "The number of shards to send deletes on at a time",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index 1783ff6..5de098a 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -130,9 +130,9 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
sizeof (*__bs)); \
if (__ret) { \
gf_msg (this->name, GF_LOG_WARNING, 0, \
- SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \
+ SHARD_MSG_DICT_OP_FAILED, "Failed to set key: %s " \
"on path %s", GF_XATTR_SHARD_BLOCK_SIZE, (loc)->path);\
- GF_FREE (__bs); \
+ GF_FREE (__bs); \
goto label; \
} \
\
@@ -144,7 +144,7 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
__size_attr, 8 * 4); \
if (__ret) { \
gf_msg (this->name, GF_LOG_WARNING, 0, \
- SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \
+ SHARD_MSG_DICT_OP_FAILED, "Failed to set key: %s " \
"on path %s", GF_XATTR_SHARD_FILE_SIZE, (loc)->path); \
GF_FREE (__size_attr); \
goto label; \
@@ -160,7 +160,7 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
local->op_ret = -1; \
local->op_errno = ENOMEM; \
gf_msg (this->name, GF_LOG_WARNING, 0, \
- SHARD_MSG_DICT_SET_FAILED, "Failed to set dict value:"\
+ SHARD_MSG_DICT_OP_FAILED, "Failed to set dict value:"\
" key:%s for %s.", GF_XATTR_SHARD_FILE_SIZE, \
uuid_utoa (gfid)); \
goto label; \
@@ -197,6 +197,12 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
} \
} while (0)
+typedef enum {
+ SHARD_FIRST_LOOKUP_PENDING = 0,
+ SHARD_FIRST_LOOKUP_IN_PROGRESS,
+ SHARD_FIRST_LOOKUP_DONE,
+} shard_first_lookup_state_t;
+
/* rm = "remove me" */
typedef struct shard_priv {
@@ -208,6 +214,8 @@ typedef struct shard_priv {
gf_lock_t lock;
int inode_count;
struct list_head ilist_head;
+ uint32_t deletion_rate;
+ shard_first_lookup_state_t first_lookup;
} shard_priv_t;
typedef struct {
@@ -303,6 +311,9 @@ typedef struct shard_local {
call_frame_t *main_frame;
call_frame_t *inodelk_frame;
call_frame_t *entrylk_frame;
+ uint32_t deletion_rate;
+ gf_boolean_t cleanup_required;
+ uuid_t base_gfid;
} shard_local_t;
typedef struct shard_inode_ctx {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 5a697cf..4357562 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -3298,6 +3298,11 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_7_0,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .key = "features.shard-deletion-rate",
+ .voltype = "features/shard",
+ .op_version = GD_OP_VERSION_4_2_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
{ .key = "features.scrub-throttle",
.voltype = "features/bit-rot",
.value = "lazy",
--
1.8.3.1