From 6e04ab4906dc70cc4762b938c49ff4467989bcf8 Mon Sep 17 00:00:00 2001 From: Rinku Kothiya Date: Thu, 21 Nov 2019 01:11:39 -0500 Subject: [PATCH] autobuild v6.0-23 Resolves: bz#1344758 bz#1599802 bz#1685406 bz#1686800 bz#1724021 Resolves: bz#1726058 bz#1727755 bz#1731513 bz#1741193 bz#1758923 Resolves: bz#1761326 bz#1761486 bz#1762180 bz#1764095 bz#1766640 Signed-off-by: Rinku Kothiya --- ...p-Fix-py2-py3-compatibility-in-repce.patch | 52 + ...hon-prettytable-dependency-for-rhel6.patch | 51 + 0317-Update-rfc.sh-to-rhgs-3.5.1.patch | 43 + 0318-Update-rfc.sh-to-rhgs-3.5.1.patch | 114 + ...w-server-obtain-the-list-of-snapshot.patch | 48 + ...gf-event-Handle-unix-volfile-servers.patch | 58 + ...e-spaces-to-description-of-set-group.patch | 55 + ...-correct-rebalance-data-size-after-g.patch | 65 + ...-cli-display-detailed-rebalance-info.patch | 101 + ...-SELinux-label-on-new-bricks-during-.patch | 128 + ...tall-and-package-newly-added-post-ad.patch | 52 + ...nt.t-is-failing-for-brick_mux-regrss.patch | 51 + ...glusterfind-integrate-with-gfid2path.patch | 93 + ...ning-and-abort-in-case-of-failures-i.patch | 55 + ...-entries-when-there-is-a-source-no-h.patch | 165 + ...t.glusterfs-change-the-error-message.patch | 59 + ...o-special-handling-for-op-version-3..patch | 44 + ...-one-top-command-from-gluster-v-help.patch | 57 + ...rpc-Synchronize-slot-allocation-code.patch | 195 + ...xattr-failure-for-node-uuid-at-DEBUG.patch | 54 + ...ts-RHEL8-test-failure-fixes-for-RHGS.patch | 15991 ++++++++++++++++ glusterfs.spec | 37 +- 22 files changed, 17566 insertions(+), 2 deletions(-) create mode 100644 0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch create mode 100644 0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch create mode 100644 0317-Update-rfc.sh-to-rhgs-3.5.1.patch create mode 100644 0318-Update-rfc.sh-to-rhgs-3.5.1.patch create mode 100644 0319-features-snapview-server-obtain-the-list-of-snapshot.patch create mode 100644 0320-gf-event-Handle-unix-volfile-servers.patch create mode 100644 0321-Adding-white-spaces-to-description-of-set-group.patch create mode 100644 0322-glusterd-display-correct-rebalance-data-size-after-g.patch create mode 100644 0323-cli-display-detailed-rebalance-info.patch create mode 100644 0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch create mode 100644 0325-extras-hooks-Install-and-package-newly-added-post-ad.patch create mode 100644 0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch create mode 100644 0327-glusterfind-integrate-with-gfid2path.patch create mode 100644 0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch create mode 100644 0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch create mode 100644 0330-mount.glusterfs-change-the-error-message.patch create mode 100644 0331-features-locks-Do-special-handling-for-op-version-3..patch create mode 100644 0332-Removing-one-top-command-from-gluster-v-help.patch create mode 100644 0333-rpc-Synchronize-slot-allocation-code.patch create mode 100644 0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch create mode 100644 0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch diff --git a/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch b/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch new file mode 100644 index 0000000..a0448cc --- /dev/null +++ b/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch @@ -0,0 +1,52 @@ +From 4a04e1b5540921db22f1894f71eb30342127192d Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 12 Nov 2019 21:53:20 +0530 +Subject: [PATCH 315/316] geo-rep: Fix py2/py3 compatibility in repce + +Geo-rep fails to start on python2 only machine like +centos6. It fails with "ImportError no module named _io". +This patch fixes the same. + +Backport of: + > Patch: https://review.gluster.org/23702 + > fixes: bz#1771577 + > Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec + > Signed-off-by: Kotresh HR + +BUG: 1771524 +Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/185377 +Tested-by: RHGS Build Bot +Reviewed-by: Sunny Kumar +--- + geo-replication/syncdaemon/repce.py | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py +index 6065b82..c622afa 100644 +--- a/geo-replication/syncdaemon/repce.py ++++ b/geo-replication/syncdaemon/repce.py +@@ -8,7 +8,6 @@ + # cases as published by the Free Software Foundation. + # + +-import _io + import os + import sys + import time +@@ -58,9 +57,9 @@ def recv(inf): + """load an object from input stream + python2 and python3 compatibility, inf is sys.stdin + and is opened as text stream by default. Hence using the +- buffer attribute ++ buffer attribute in python3 + """ +- if isinstance(inf, _io.TextIOWrapper): ++ if hasattr(inf, "buffer"): + return pickle.load(inf.buffer) + else: + return pickle.load(inf) +-- +1.8.3.1 + diff --git a/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch b/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch new file mode 100644 index 0000000..c2045a0 --- /dev/null +++ b/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch @@ -0,0 +1,51 @@ +From b9a19aef5de94eb91162448ad687f2d2d194f82c Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Thu, 14 Nov 2019 09:55:15 +0000 +Subject: [PATCH 316/316] spec: fixed python-prettytable dependency for rhel6 + +Installing glusterfs on rhel6 was failing with python-prettytable +dependency as it required python2-prettytable for glusterfs-events. +This patch conditionally sets the python version for rhel7 and +fixes the problem. + +Label: DOWNSTREAM ONLY + +BUG: 1771614 + +Change-Id: I6288daa5d8c2d82a6d73a0d9722786a2a99b9db5 +fixes: bz#1771614 +Signed-off-by: Rinku Kothiya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185385 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 3c2e2dc..eeadb65 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -706,7 +706,7 @@ This package provides the translators needed on any GlusterFS client. + %package events + Summary: GlusterFS Events + Requires: %{name}-server%{?_isa} = %{version}-%{release} +-Requires: python%{_pythonver} python%{_pythonver}-prettytable ++Requires: python%{_pythonver} + Requires: python%{_pythonver}-gluster = %{version}-%{release} + %if ( 0%{?rhel} && 0%{?rhel} < 8 ) + Requires: python-requests +@@ -714,7 +714,10 @@ Requires: python-requests + Requires: python%{_pythonver}-requests + %endif + %if ( 0%{?rhel} && 0%{?rhel} < 7 ) ++Requires: python-prettytable + Requires: python-argparse ++%else ++Requires: python%{_pythonver}-prettytable + %endif + %if ( 0%{?_with_systemd:1} ) + %{?systemd_requires} +-- +1.8.3.1 + diff --git a/0317-Update-rfc.sh-to-rhgs-3.5.1.patch b/0317-Update-rfc.sh-to-rhgs-3.5.1.patch new file mode 100644 index 0000000..eccf2e3 --- /dev/null +++ b/0317-Update-rfc.sh-to-rhgs-3.5.1.patch @@ -0,0 +1,43 @@ +From 985ef94c63859907339c11b158e4540a5568d638 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Mon, 18 Nov 2019 02:25:25 -0500 +Subject: [PATCH 317/335] Update rfc.sh to rhgs-3.5.1 + +Signed-off-by: Rinku Kothiya +--- + README | 9 +++++++++ + rfc.sh | 2 +- + 2 files changed, 10 insertions(+), 1 deletion(-) + create mode 100644 README + +diff --git a/README b/README +new file mode 100644 +index 0000000..44a118b +--- /dev/null ++++ b/README +@@ -0,0 +1,9 @@ ++ ++'master' branch is just dummy branch in downstream. Any reference to 'upstream' ++will point to http://git.gluster.org. ++ ++You can checkout the release specific branch by running below command ++ bash$ git checkout -t -b rhs-x.y origin/rhs-x.y ++ ++Happy Hacking!! ++ +diff --git a/rfc.sh b/rfc.sh +index 94c92ef..69ddd2b 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.0"; ++branch="rhgs-3.5.1"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/0318-Update-rfc.sh-to-rhgs-3.5.1.patch b/0318-Update-rfc.sh-to-rhgs-3.5.1.patch new file mode 100644 index 0000000..e65ae38 --- /dev/null +++ b/0318-Update-rfc.sh-to-rhgs-3.5.1.patch @@ -0,0 +1,114 @@ +From 1f03327887645be2500cd29f69f7a77a4f5d0164 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Mon, 18 Nov 2019 14:25:12 -0500 +Subject: [PATCH 318/335] Update rfc.sh to rhgs-3.5.1 + +Removed the checks for updates and fixes from rfc.sh + +Label: DOWNSTREAM ONLY + +Change-Id: I436c959aa3b3366cd313b29f41c2466c4072efd7 +Signed-off-by: Rinku Kothiya +--- + rfc.sh | 47 ++++++++--------------------------------------- + 1 file changed, 8 insertions(+), 39 deletions(-) + +diff --git a/rfc.sh b/rfc.sh +index 69ddd2b..918fb11 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -129,13 +129,8 @@ editor_mode() + + if [ $(basename "$1") = "COMMIT_EDITMSG" ]; then + # see note above function warn_reference_missing for regex elaboration +- # Lets first check for github issues +- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?#[[:digit:]]+" | awk -F '#' '{print $2}'); +- if [ "x${ref}" = "x" ]; then +- # if not found, check for bugs +- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+bz#[[:digit:]]+" | awk -F '#' '{print $2}'); +- fi + ++ ref=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+") + if [ "x${ref}" != "x" ]; then + return; + fi +@@ -157,16 +152,6 @@ editor_mode() + bz_string="" + fi + +- echo "Select yes '(y)' if this patch fixes the bug/feature completely," +- echo -n "or is the last of the patchset which brings feature (Y/n): " +- read fixes +- fixes_string="fixes" +- if [ "${fixes}" = 'N' ] || [ "${fixes}" = 'n' ]; then +- fixes_string="updates" +- fi +- +- sed "/^Change-Id:/{p; s/^.*$/${fixes_string}: ${bz_string}#${bug}/;}" $1 > $1.new && \ +- mv $1.new $1; + return; + done + fi +@@ -234,8 +219,8 @@ check_patches_for_coding_style() + # IOW, the above helps us find the pattern with leading or training spaces + # or non word consituents like , or ; + # +-# [fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS]) +-# Finds 'fixes' OR 'updates' in any case combination ++# [bB][uU][gG] ++# Finds 'bug' in any case + # + # (:)? + # Followed by an optional : (colon) +@@ -256,28 +241,11 @@ warn_reference_missing() + echo "" + echo "=== Missing a reference in commit! ===" + echo "" +- echo "Gluster commits are made with a reference to a bug or a github issue" +- echo "" +- echo "Submissions that are enhancements (IOW, not functional" +- echo "bug fixes, but improvements of any nature to the code) are tracked" +- echo "using github issues [1]." ++ echo "You must give BUG: " + echo "" +- echo "Submissions that are bug fixes are tracked using Bugzilla [2]." ++ echo "for example:" + echo "" +- echo "A check on the commit message, reveals that there is no bug or" +- echo "github issue referenced in the commit message" +- echo "" +- echo "[1] https://github.com/gluster/glusterfs/issues/new" +- echo "[2] https://bugzilla.redhat.com/enter_bug.cgi?product=GlusterFS" +- echo "" +- echo "Please file an issue or a bug report and reference the same in the" +- echo "commit message using the following tags:" +- echo "GitHub Issues:" +- echo "\"Fixes: gluster/glusterfs#n\" OR \"Updates: gluster/glusterfs#n\"," +- echo "\"Fixes: #n\" OR \"Updates: #n\"," +- echo "Bugzilla ID:" +- echo "\"Fixes: bz#n\" OR \"Updates: bz#n\"," +- echo "where n is the issue or bug number" ++ echo "BUG: 1234567" + echo "" + echo "You may abort the submission choosing 'N' below and use" + echo "'git commit --amend' to add the issue reference before posting" +@@ -312,7 +280,7 @@ main() + assert_diverge; + + # see note above function warn_reference_missing for regex elaboration +- reference=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?(bz)?#[[:digit:]]+" | awk -F '#' '{print $2}'); ++ reference=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+" | awk '{print $2}') + + # If this is a commit against master and does not have a bug ID or a github + # issue reference. Warn the contributor that one of the 2 is required +@@ -320,6 +288,7 @@ main() + warn_reference_missing; + fi + ++ + # TODO: add clang-format command here. It will after the changes are done everywhere else + clang_format=$(clang-format --version) + if [ ! -z "${clang_format}" ]; then +-- +1.8.3.1 + diff --git a/0319-features-snapview-server-obtain-the-list-of-snapshot.patch b/0319-features-snapview-server-obtain-the-list-of-snapshot.patch new file mode 100644 index 0000000..d37efaf --- /dev/null +++ b/0319-features-snapview-server-obtain-the-list-of-snapshot.patch @@ -0,0 +1,48 @@ +From 659bd2a0fde9ba0cb8fc3905bcdb63d91e3dfa9d Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat +Date: Tue, 2 Jul 2019 16:50:23 -0400 +Subject: [PATCH 319/335] features/snapview-server: obtain the list of + snapshots inside the lock + +The current list of snapshots from priv->dirents is obtained outside +the lock. + +Upstream patch: +> Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177 +> Fixes: bz#1726783 +> Signed-off-by: Raghavendra Bhat +> patch: https://review.gluster.org/#/c/glusterfs/+/22990/ + +BUG: 1731513 +Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177 +Signed-off-by: Raghavendra Bhat +Reviewed-on: https://code.engineering.redhat.com/gerrit/185838 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/snapview-server/src/snapview-server-mgmt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c +index bc415ef..3d64383 100644 +--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c ++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c +@@ -256,7 +256,6 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, + this = frame->this; + ctx = frame->this->ctx; + priv = this->private; +- old_dirents = priv->dirents; + + if (!ctx) { + errno = EINVAL; +@@ -388,6 +387,7 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, + LOCK(&priv->snaplist_lock); + { + oldcount = priv->num_snaps; ++ old_dirents = priv->dirents; + for (i = 0; i < priv->num_snaps; i++) { + for (j = 0; j < snapcount; j++) { + if ((!strcmp(old_dirents[i].name, dirents[j].name)) && +-- +1.8.3.1 + diff --git a/0320-gf-event-Handle-unix-volfile-servers.patch b/0320-gf-event-Handle-unix-volfile-servers.patch new file mode 100644 index 0000000..48a9cad --- /dev/null +++ b/0320-gf-event-Handle-unix-volfile-servers.patch @@ -0,0 +1,58 @@ +From 7e5d8dcb4f557eaca259e8d81cf34d651907396c Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Thu, 24 Oct 2019 12:24:35 +0530 +Subject: [PATCH 320/335] gf-event: Handle unix volfile-servers + +Problem: +glfsheal program uses unix-socket-based volfile server. +volfile server will be the path to socket in this case. +gf_event expects this to be hostname in all cases. So getaddrinfo +will fail on the unix-socket path, events won't be sent in this case. + +Fix: +In case of unix sockets, default to localhost + +upstream-patch: https://review.gluster.org/c/glusterfs/+/23606 +BUG: 1758923 +Change-Id: I60d27608792c29d83fb82beb5fde5ef4754bece8 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/185851 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/events.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 9d33783..4e2f8f9 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -43,6 +43,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + struct addrinfo *result = NULL; + xlator_t *this = THIS; + int sin_family = AF_INET; ++ char *volfile_server_transport = NULL; + + /* Global context */ + ctx = THIS->ctx; +@@ -62,8 +63,16 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + ++ if (ctx) { ++ volfile_server_transport = ctx->cmd_args.volfile_server_transport; ++ } ++ ++ if (!volfile_server_transport) { ++ volfile_server_transport = "tcp"; ++ } + /* Get Host name to send message */ +- if (ctx && ctx->cmd_args.volfile_server) { ++ if (ctx && ctx->cmd_args.volfile_server && ++ (strcmp(volfile_server_transport, "unix"))) { + /* If it is client code then volfile_server is set + use that information to push the events. */ + if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints, +-- +1.8.3.1 + diff --git a/0321-Adding-white-spaces-to-description-of-set-group.patch b/0321-Adding-white-spaces-to-description-of-set-group.patch new file mode 100644 index 0000000..8dec96f --- /dev/null +++ b/0321-Adding-white-spaces-to-description-of-set-group.patch @@ -0,0 +1,55 @@ +From 5e7a2ad35a174d6d0ee5ed58a3e27955e85aa47c Mon Sep 17 00:00:00 2001 +From: kshithijiyer +Date: Mon, 24 Jun 2019 20:08:48 +0530 +Subject: [PATCH 321/335] Adding white spaces to description of set group. + +The description of set group is missing spaces which +leads to the description look like: +volume set group - This option can be used for +setting multiple pre-defined volume optionswhere group_name is a +file under /var/lib/glusterd/groups containing onekey, value pair +per line + +Instead of: +volume set group - This option can be used for +setting multiple pre-defined volume options where group_name is a +file under /var/lib/glusterd/groups containing one key value +pair per line + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/22934/ +> Fixes: bz#1723455 +> Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f +> Signed-off-by: kshithijiyer + +BUG: 1724021 +Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185756 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-volume.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 6b958bd..66beb1b 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3393,10 +3393,10 @@ struct cli_cmd volume_cmds[] = { + {"volume set ", cli_cmd_volume_set_cbk, + "set options for volume "}, + +- {"volume set group ", cli_cmd_volume_set_cbk, +- "This option can be used for setting multiple pre-defined volume options" +- "where group_name is a file under /var/lib/glusterd/groups containing one" +- "key, value pair per line"}, ++ {"volume set group ", cli_cmd_volume_set_cbk, ++ "This option can be used for setting multiple pre-defined volume options " ++ "where group_name is a file under /var/lib/glusterd/groups containing one " ++ "key value pair per line"}, + + {"volume log rotate [BRICK]", cli_cmd_log_rotate_cbk, + "rotate the log file for corresponding volume/brick"}, +-- +1.8.3.1 + diff --git a/0322-glusterd-display-correct-rebalance-data-size-after-g.patch b/0322-glusterd-display-correct-rebalance-data-size-after-g.patch new file mode 100644 index 0000000..35a234b --- /dev/null +++ b/0322-glusterd-display-correct-rebalance-data-size-after-g.patch @@ -0,0 +1,65 @@ +From 9be255f76c78fcbbda1e3a72eb2e99d3aface53e Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 16 Oct 2019 23:26:03 +0530 +Subject: [PATCH 322/335] glusterd: display correct rebalance data size after + glusterd restart + +Problem: After completion of rebalance, if glusterd is restarted, +rebalance status displays wrong rebalance data size in its output. + +Cause: While glusterd restoring the information from /var/lib/glusterd/ +into its memory, glusterd fetches rebalance_data from +/var/lib/glusterd/vols/volname/node_state.info. This value is +converted into an integer using atoi(), which is returning +incorrect value for larger values. + +Solution: use sscanf() instead of atoi() to convert string to +integer(in this case it is unsigned long) + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23560/ +> fixes: bz#1762438 +> Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e +> Signed-off-by: Sanju Rakonde + +BUG: 1761486 +Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185752 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 8a10eb8..b3b5ee9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -2974,19 +2974,19 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo) + volinfo->rebal.op = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES))) { +- volinfo->rebal.rebalance_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE))) { +- volinfo->rebal.rebalance_data = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_data); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED))) { +- volinfo->rebal.lookedup_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.lookedup_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES))) { +- volinfo->rebal.rebalance_failures = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_failures); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED))) { +- volinfo->rebal.skipped_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.skipped_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) { + volinfo->rebal.rebalance_time = atoi(value); +-- +1.8.3.1 + diff --git a/0323-cli-display-detailed-rebalance-info.patch b/0323-cli-display-detailed-rebalance-info.patch new file mode 100644 index 0000000..a00faf8 --- /dev/null +++ b/0323-cli-display-detailed-rebalance-info.patch @@ -0,0 +1,101 @@ +From 852c475040a599ed35798dbb388c6b59c1d0a820 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 22 Oct 2019 15:06:29 +0530 +Subject: [PATCH 323/335] cli: display detailed rebalance info + +Problem: When one of the node is down in cluster, +rebalance status is not displaying detailed +information. + +Cause: In glusterd_volume_rebalance_use_rsp_dict() +we are aggregating rsp from all the nodes into a +dictionary and sending it to cli for printing. While +assigning a index to keys we are considering all the +peers instead of considering only the peers which are +up. Because of which, index is not reaching till 1. +while parsing the rsp cli unable to find status-1 +key in dictionary and going out without printing +any information. + +Solution: The simplest fix for this without much +code change is to continue to look for other keys +when status-1 key is not found. + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23588 +> fixes: bz#1764119 +> Change-Id: I0062839933c9706119eb85416256eade97e976dc +> Signed-off-by: Sanju Rakonde + +BUG: 1761326 +Change-Id: I0062839933c9706119eb85416256eade97e976dc +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185749 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-rpc-ops.c | 21 ++++++++++++++------- + tests/bugs/glusterd/rebalance-in-cluster.t | 9 +++++++++ + 2 files changed, 23 insertions(+), 7 deletions(-) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index b167e26..4e91265 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -1597,13 +1597,20 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type, + goto out; + } + +- snprintf(key, sizeof(key), "status-1"); +- +- ret = dict_get_int32(dict, key, (int32_t *)&status_rcd); +- if (ret) { +- gf_log("cli", GF_LOG_TRACE, "count %d %d", count, 1); +- gf_log("cli", GF_LOG_TRACE, "failed to get status"); +- goto out; ++ for (i = 1; i <= count; i++) { ++ snprintf(key, sizeof(key), "status-%d", i); ++ ret = dict_get_int32(dict, key, (int32_t *)&status_rcd); ++ /* If information from a node is missing we should skip ++ * the node and try to fetch information of other nodes. ++ * If information is not found for all nodes, we should ++ * error out. ++ */ ++ if (!ret) ++ break; ++ if (ret && i == count) { ++ gf_log("cli", GF_LOG_TRACE, "failed to get status"); ++ goto out; ++ } + } + + /* Fix layout will be sent to all nodes for the volume +diff --git a/tests/bugs/glusterd/rebalance-in-cluster.t b/tests/bugs/glusterd/rebalance-in-cluster.t +index 9565fae..469ec6c 100644 +--- a/tests/bugs/glusterd/rebalance-in-cluster.t ++++ b/tests/bugs/glusterd/rebalance-in-cluster.t +@@ -4,6 +4,10 @@ + . $(dirname $0)/../../cluster.rc + . $(dirname $0)/../../volume.rc + ++function rebalance_status_field_1 { ++ $CLI_1 volume rebalance $1 status | awk '{print $7}' | sed -n 3p ++} ++ + cleanup; + TEST launch_cluster 2; + TEST $CLI_1 peer probe $H2; +@@ -29,6 +33,11 @@ TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 + TEST $CLI_1 volume rebalance $V0 start + EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0 + ++#bug - 1764119 - rebalance status should display detailed info when any of the node is dowm ++TEST kill_glusterd 2 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field_1 $V0 ++ ++TEST start_glusterd 2 + #bug-1245142 + + $CLI_1 volume rebalance $V0 start & +-- +1.8.3.1 + diff --git a/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch b/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch new file mode 100644 index 0000000..26e1577 --- /dev/null +++ b/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch @@ -0,0 +1,128 @@ +From dcf3f74fa7e812dfe89667bd6219f70a8457f755 Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Thu, 6 Jun 2019 18:33:19 +0530 +Subject: [PATCH 324/335] extras/hooks: Add SELinux label on new bricks during + add-brick + +Backport of https://review.gluster.org/c/glusterfs/+/22834 + +Change-Id: Ifd8ae5eeb91b968cc1a9a9b5d15844c5233d56db +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/185855 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../add-brick/post/S10selinux-label-brick.sh | 100 +++++++++++++++++++++ + 1 file changed, 100 insertions(+) + create mode 100755 extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh + +diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +new file mode 100755 +index 0000000..4a17c99 +--- /dev/null ++++ b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +@@ -0,0 +1,100 @@ ++#!/bin/bash ++# ++# Install to hooks//add-brick/post ++# ++# Add an SELinux file context for each brick using the glusterd_brick_t type. ++# This ensures that the brick is relabeled correctly on an SELinux restart or ++# restore. Subsequently, run a restore on the brick path to set the selinux ++# labels. ++# ++### ++ ++PROGNAME="Sselinux" ++OPTSPEC="volname:,version:,gd-workdir:,volume-op:" ++VOL= ++ ++parse_args () { ++ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") ++ eval set -- "${ARGS}" ++ ++ while true; do ++ case ${1} in ++ --volname) ++ shift ++ VOL=${1} ++ ;; ++ --gd-workdir) ++ shift ++ GLUSTERD_WORKDIR=$1 ++ ;; ++ --version) ++ shift ++ ;; ++ --volume-op) ++ shift ++ ;; ++ *) ++ shift ++ break ++ ;; ++ esac ++ shift ++ done ++} ++ ++set_brick_labels() ++{ ++ local volname="${1}" ++ local fctx ++ local list=() ++ ++ fctx="$(semanage fcontext --list -C)" ++ ++ # wait for new brick path to be updated under ++ # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/ ++ sleep 5 ++ ++ # grab the path for each local brick ++ brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/" ++ brickdirs=$( ++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \ ++ cut -d= -f 2 | \ ++ sort -u ++ ) ++ ++ # create a list of bricks for which custom SELinux ++ # label doesn't exist ++ for b in ${brickdirs}; do ++ pattern="${b}(/.*)?" ++ echo "${fctx}" | grep "^${pattern}\s" >/dev/null ++ if [[ $? -ne 0 ]]; then ++ list+=("${pattern}") ++ fi ++ done ++ ++ # Add a file context for each brick path in the list and associate with the ++ # glusterd_brick_t SELinux type. ++ for p in ${list[@]} ++ do ++ semanage fcontext --add -t glusterd_brick_t -r s0 "${p}" ++ done ++ ++ # Set the labels for which SELinux label was added above ++ for b in ${brickdirs} ++ do ++ echo "${list[@]}" | grep "${b}" >/dev/null ++ if [[ $? -eq 0 ]]; then ++ restorecon -R "${b}" ++ fi ++ done ++} ++ ++SELINUX_STATE=$(which getenforce && getenforce) ++[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 ++ ++parse_args "$@" ++[ -z "${VOL}" ] && exit 1 ++ ++set_brick_labels "${VOL}" ++ ++exit 0 +-- +1.8.3.1 + diff --git a/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch b/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch new file mode 100644 index 0000000..8e5a5fa --- /dev/null +++ b/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch @@ -0,0 +1,52 @@ +From 27d69d8927a946562aef08a6edfee38b9998f96d Mon Sep 17 00:00:00 2001 +From: Anoop C S +Date: Wed, 12 Jun 2019 15:41:27 +0530 +Subject: [PATCH 325/335] extras/hooks: Install and package newly added post + add-brick hook script + +Previously a new SELinux hook script was added as a post add-brick +operation to label new brick paths. But the change failed to install +and package new script. Therefore making necessary changes to Makefile +and spec file to get it installed and packaged. + +Backport of https://review.gluster.org/c/glusterfs/+/22856 + +Change-Id: I67b8f4982c2783c34a4bc749fb4387c19a038225 +BUG: 1686800 +Signed-off-by: Anoop C S +Reviewed-on: https://code.engineering.redhat.com/gerrit/185856 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/hook-scripts/add-brick/post/Makefile.am | 4 ++-- + glusterfs.spec.in | 1 + + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am +index bfc0c1c..9b236df 100644 +--- a/extras/hook-scripts/add-brick/post/Makefile.am ++++ b/extras/hook-scripts/add-brick/post/Makefile.am +@@ -1,6 +1,6 @@ +-EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh ++EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh + + hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/ + if WITH_SERVER +-hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh ++hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh + endif +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index eeadb65..91180db 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1447,6 +1447,7 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh +-- +1.8.3.1 + diff --git a/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch b/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch new file mode 100644 index 0000000..b0afcc7 --- /dev/null +++ b/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch @@ -0,0 +1,51 @@ +From a4f01ad90a0c0dfd0655da509c5ed2a11a507cc3 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 17 Jun 2019 11:10:42 +0530 +Subject: [PATCH 326/335] tests: subdir-mount.t is failing for brick_mux + regrssion + +To avoid the failure wait to run hook script S13create-subdir-mounts.sh +after executed add-brick command by test case. + +This is required as a dependency for the bz referenced below. + +Backport of https://review.gluster.org/c/glusterfs/+/22877 + +Change-Id: I063b6d0f86a550ed0a0527255e4dfbe8f0a8c02e +BUG: 1686800 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/185857 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/features/subdir-mount.t | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t +index 8401946..a02bd6b 100644 +--- a/tests/features/subdir-mount.t ++++ b/tests/features/subdir-mount.t +@@ -85,12 +85,17 @@ TEST $CLI volume start $V0 + TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2 + TEST stat $M2 + ++initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` + # mount shouldn't fail even after add-brick + TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6}; + +-# Give time for client process to get notified and use the new +-# volfile after add-brick +-sleep 1 ++# Wait to execute create-subdir-mounts.sh script by glusterd ++newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` ++while [ $newcnt -eq $initcnt ] ++do ++ newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` ++ sleep 1 ++done + + # Existing mount should still be active + mount_inode=$(stat --format "%i" "$M2") +-- +1.8.3.1 + diff --git a/0327-glusterfind-integrate-with-gfid2path.patch b/0327-glusterfind-integrate-with-gfid2path.patch new file mode 100644 index 0000000..e3e42fa --- /dev/null +++ b/0327-glusterfind-integrate-with-gfid2path.patch @@ -0,0 +1,93 @@ +From f89242132dc4756c827113154cc6ad18ad6bde88 Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Tue, 19 Feb 2019 12:49:12 +0530 +Subject: [PATCH 327/335] glusterfind: integrate with gfid2path + +Integration with gfid2path helps avoid file-system crawl and saves +precious time. Extended attributes starting with "trusted.gfid2path." +are read and the / values are extracted and the is +iteratively resolved from the brick backend to arrive at the full path. + +>Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0 +>fixes: #529 +>Signed-off-by: Milind Changire +>Signed-off-by: Shwetha K Acharya + +backport of https://review.gluster.org/#/c/glusterfs/+/22225/ +BUG: 1599802 +Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0 +Signed-off-by: Milind Changire +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185706 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tools/glusterfind/src/changelog.py | 45 ++++++++++++++++++++++++++++++++++---- + 1 file changed, 41 insertions(+), 4 deletions(-) + +diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py +index ef982db..d8f97e0 100644 +--- a/tools/glusterfind/src/changelog.py ++++ b/tools/glusterfind/src/changelog.py +@@ -114,6 +114,43 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): + continue + + ++def enum_hard_links_using_gfid2path(brick, gfid, args): ++ hardlinks = [] ++ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) ++ if not os.path.isdir(p): ++ # we have a symlink or a normal file ++ try: ++ file_xattrs = xattr.list(p) ++ for x in file_xattrs: ++ if x.startswith("trusted.gfid2path."): ++ # get the value for the xattr i.e. / ++ v = xattr.getxattr(p, x) ++ pgfid, bn = v.split(os.sep) ++ try: ++ path = symlink_gfid_to_path(brick, pgfid) ++ fullpath = os.path.join(path, bn) ++ fullpath = output_path_prepare(fullpath, args) ++ hardlinks.append(fullpath) ++ except (IOError, OSError) as e: ++ logger.warn("Error converting to path: %s" % e) ++ continue ++ except (IOError, OSError): ++ pass ++ return hardlinks ++ ++ ++def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args): ++ path = "" ++ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}): ++ gfid = row[3].strip() ++ logger.debug("Processing gfid %s" % gfid) ++ hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args) ++ ++ path = ",".join(hardlinks) ++ ++ changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid}) ++ ++ + def gfid_to_path_using_pgfid(brick, changelog_data, args): + """ + For all the pgfids collected, Converts to Path and +@@ -314,11 +351,11 @@ def get_changes(brick, hash_dir, log_file, start, end, args): + changelog_data.commit() + logger.info("[2/4] Finished 'pgfid to path' conversions.") + +- # Convert all GFIDs for which no other additional details available +- logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...") +- gfid_to_path_using_pgfid(brick, changelog_data, args) ++ # Convert all gfids recorded for data and metadata to all hardlink paths ++ logger.info("[3/4] Starting 'gfid2path' conversions ...") ++ gfid_to_all_paths_using_gfid2path(brick, changelog_data, args) + changelog_data.commit() +- logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.") ++ logger.info("[3/4] Finished 'gfid2path' conversions.") + + # If some GFIDs fail to get converted from previous step, + # convert using find +-- +1.8.3.1 + diff --git a/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch b/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch new file mode 100644 index 0000000..0d12daa --- /dev/null +++ b/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch @@ -0,0 +1,55 @@ +From a8d8fc91af226fbf49e9dd1d7d91ad287707c4fe Mon Sep 17 00:00:00 2001 +From: Vishal Pandey +Date: Wed, 7 Aug 2019 12:53:06 +0530 +Subject: [PATCH 328/335] glusterd: Add warning and abort in case of failures + in migration during remove-brick commit + +Problem - +Currently remove-brick commit goes through even though there were files +that failed to migrate or were skipped. There is no warning raised to the user. +Solution- +Add a check in the remove brick staging phase to verify if the status of the +rebalnce process is complete but there has been failures or some skipped files +while migration, In this case user will be given a warning and remove-brick +commit. User will need to use the force option to remove the bricks. + +> Upstream Path Link: https://review.gluster.org/#/c/glusterfs/+/23171/ +> Fixes: bz#1514683 +> Signed-offby- Vishal Pandey +> Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5 + +BUG: 1344758 +Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5 +Signed-off-by: Vishal Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/185831 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index ad9a572..c5141de 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -2191,6 +2191,17 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) + goto out; + } + ++ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_COMPLETE) { ++ if (volinfo->rebal.rebalance_failures > 0 || ++ volinfo->rebal.skipped_files > 0) { ++ errstr = gf_strdup( ++ "use 'force' option as migration " ++ "of some files might have been skipped or " ++ "has failed"); ++ goto out; ++ } ++ } ++ + ret = glusterd_remove_brick_validate_bricks( + cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE); + if (ret) +-- +1.8.3.1 + diff --git a/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch b/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch new file mode 100644 index 0000000..935824d --- /dev/null +++ b/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch @@ -0,0 +1,165 @@ +From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Wed, 20 Nov 2019 12:26:11 +0530 +Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no + healed_sinks + +Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/ + +Problem: +In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame +anything for entry heal, heal will not complete even though we have +clear source and sinks. This will happen because while doing +afr_selfheal_find_direction() only the bricks which are blamed by +non-accused bricks are considered as sinks. Later in +__afr_selfheal_entry_finalize_source() when it tries to mark all the +non-sources as sinks it fails to do so because there won't be any +healed_sinks marked, no witness present and there will be a source. + +Fix: +If there is a source and no healed_sinks, then reset all the locked +sources to 0 and healed sinks to 1 to do conservative merge. + +Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57 +Signed-off-by: karthik-us +BUG: 1764095 +Reviewed-on: https://code.engineering.redhat.com/gerrit/185834 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bug-1749322-entry-heal-not-happening.t | 89 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-entry.c | 15 ++++ + 2 files changed, 104 insertions(+) + create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t + +diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +new file mode 100644 +index 0000000..9627908 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +@@ -0,0 +1,89 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++function check_gfid_and_link_count ++{ ++ local file=$1 ++ ++ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file) ++ TEST [ ! -z $file_gfid_b0 ] ++ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file) ++ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file) ++ EXPECT $file_gfid_b0 echo $file_gfid_b1 ++ EXPECT $file_gfid_b0 echo $file_gfid_b2 ++ ++ EXPECT "2" stat -c %h $B0/${V0}0/$file ++ EXPECT "2" stat -c %h $B0/${V0}1/$file ++ EXPECT "2" stat -c %h $B0/${V0}2/$file ++} ++TESTS_EXPECTED_IN_LOOP=18 ++ ++################################################################################ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume start $V0; ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume heal $V0 disable ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST mkdir $M0/dir ++TEST `echo "File 1 " > $M0/dir/file1` ++TEST touch $M0/dir/file{2..4} ++ ++# Remove file2 from 1st & 3rd bricks ++TEST rm -f $B0/$V0"0"/dir/file2 ++TEST rm -f $B0/$V0"2"/dir/file2 ++ ++# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks ++gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3) ++gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3) ++TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm -f $B0/$V0"0"/dir/file3 ++TEST rm -f $B0/$V0"1"/dir/file3 ++ ++# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick ++gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4) ++gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4) ++TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4 ++TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4 ++ ++# B0 and B2 blame each other ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir on first brick. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++ ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# All the files must be present on all the bricks after conservative merge and ++# should have the gfid xattr and the .glusterfs hardlink. ++check_gfid_and_link_count dir/file1 ++check_gfid_and_link_count dir/file2 ++check_gfid_and_link_count dir/file3 ++check_gfid_and_link_count dir/file4 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 35b600f..3ce882e 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, + afr_private_t *priv = NULL; + int source = -1; + int sources_count = 0; ++ int i = 0; + + priv = this->private; + +@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, + } + + source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION); ++ ++ /*If the selected source does not blame any other brick, then mark ++ * everything as sink to trigger conservative merge. ++ */ ++ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) { ++ for (i = 0; i < priv->child_count; i++) { ++ if (locked_on[i]) { ++ sources[i] = 0; ++ healed_sinks[i] = 1; ++ } ++ } ++ return -1; ++ } ++ + return source; + } + +-- +1.8.3.1 + diff --git a/0330-mount.glusterfs-change-the-error-message.patch b/0330-mount.glusterfs-change-the-error-message.patch new file mode 100644 index 0000000..b64f0c6 --- /dev/null +++ b/0330-mount.glusterfs-change-the-error-message.patch @@ -0,0 +1,59 @@ +From 72168245761592a2cd0ebec05dd9bd9bc00745ca Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Wed, 13 Mar 2019 08:51:31 +0530 +Subject: [PATCH 330/335] mount.glusterfs: change the error message + +In scenarios where a mount fails before creating log file, doesn't +make sense to give message to 'check log file'. See below: + +``` +ERROR: failed to create logfile "/var/log/glusterfs/mnt.log" (No space left on device) +ERROR: failed to open logfile /var/log/glusterfs/mnt.log +Mount failed. Please check the log file for more details. +``` + +>upstream patch: https://review.gluster.org/#/c/glusterfs/+/22346/ +>Fixes: bz#1688068 +>Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f +>Signed-off-by: Amar Tumballi + +BUG: 1685406 +Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f +Signed-off-by: Sheetal Pamecha +Reviewed-on: https://code.engineering.redhat.com/gerrit/185828 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mount/fuse/utils/mount.glusterfs.in | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index 3f5d76d..cbde42d 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -361,7 +361,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line $mount_point"); + $cmd_line; + if [ $? -ne 0 ]; then +- warn "Mount failed. Please check the log file for more details." ++ # If this is true, then glusterfs process returned error without ++ # getting daemonized. We have made sure the logs are posted to ++ # 'stderr', so no need to point them to logfile. ++ warn "Mounting glusterfs on $mount_point failed." + exit 1; + fi + +@@ -369,7 +372,9 @@ start_glusterfs () + inode=$( ${getinode} $mount_point 2>/dev/null); + # this is required if the stat returns error + if [ $? -ne 0 ]; then +- warn "Mount failed. Please check the log file for more details." ++ # At this time, glusterfs got daemonized, and then later exited. ++ # These failures are only logged in log file. ++ warn "Mount failed. Check the log file ${log_file} for more details." + umount $mount_point > /dev/null 2>&1; + exit 1; + fi +-- +1.8.3.1 + diff --git a/0331-features-locks-Do-special-handling-for-op-version-3..patch b/0331-features-locks-Do-special-handling-for-op-version-3..patch new file mode 100644 index 0000000..6eb15b0 --- /dev/null +++ b/0331-features-locks-Do-special-handling-for-op-version-3..patch @@ -0,0 +1,44 @@ +From 147cff762b307bf60519bae4cdefc62f655119a7 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Wed, 30 Oct 2019 10:47:17 +0530 +Subject: [PATCH 331/335] features/locks: Do special handling for op-version < + 3.12.0 + +Problem: +Patch https://code.engineering.redhat.com/gerrit/#/c/140080/ diverges from +its upstream patch(https://review.gluster.org/c/glusterfs/+/20031) in op-version. +On upstream special-handling happens for version < 3.10.0 whereas for downstream +special-handling happens for version < 3.12.0. + When rebase happened for 3.5.0 from upstream, this downstream specific change +is missed as there was no special downstream-only patch tracking this difference. +This leads to I/O errors on upgrade from 3.3.1->3.5.0 + +Fix: +Do special handling for op-version < 3.12.0 as in 3.4.x + +Change-Id: I72fec058bdfb3cd30d017d205c90aa61aec86c5d +Label: DOWNSTREAM ONLY +BUG: 1766640 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/185835 +Reviewed-by: Xavi Hernandez Juan +--- + xlators/features/locks/src/posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 9db5ac6..4592240 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -57,7 +57,7 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + do { \ + pl_local_t *__local = NULL; \ + if (frame->root->client && \ +- (frame->root->client->opversion < GD_OP_VERSION_3_10_0)) { \ ++ (frame->root->client->opversion < GD_OP_VERSION_3_12_0)) { \ + __local = frame->local; \ + PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params); \ + } else { \ +-- +1.8.3.1 + diff --git a/0332-Removing-one-top-command-from-gluster-v-help.patch b/0332-Removing-one-top-command-from-gluster-v-help.patch new file mode 100644 index 0000000..c9b2b56 --- /dev/null +++ b/0332-Removing-one-top-command-from-gluster-v-help.patch @@ -0,0 +1,57 @@ +From 808f311bd4f38f06b8afc49fc8d2c65fc4797431 Mon Sep 17 00:00:00 2001 +From: kshithijiyer +Date: Fri, 28 Jun 2019 15:32:31 +0530 +Subject: [PATCH 332/335] Removing one top command from gluster v help + +The current help show 2 different top commands +intead of one single top command which can be +easily observed when "# gluster v help" command +is issued. Removing one "volume top " +and clubbing into them into a single command. + +Current help: +volume top {open|read|write|opendir|readdir|clear} +[nfs|brick ] [list-cnt ] | +volume top {read-perf|write-perf} +[bs count ] [brick ] +[list-cnt ] - volume top operations + +Expected help: +volume top {open|read|write|opendir|readdir|clear} +[nfs|brick ] [list-cnt ] | {read-perf|write-perf} +[bs count ] [brick ] [list-cnt ] +- volume top operations + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/22972/ +> fixes: bz#1725034 +> Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d +> Signed-off-by: kshithijiyer + +BUG: 1726058 +Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/185757 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-cmd-volume.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 66beb1b..754d333 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3427,8 +3427,8 @@ struct cli_cmd volume_cmds[] = { + cli_cmd_volume_profile_cbk, "volume profile operations"}, + + {"volume top {open|read|write|opendir|readdir|clear} [nfs|brick " +- "] [list-cnt ] |\n" +- "volume top {read-perf|write-perf} [bs count ] " ++ "] [list-cnt ] | " ++ "{read-perf|write-perf} [bs count ] " + "[brick ] [list-cnt ]", + cli_cmd_volume_top_cbk, "volume top operations"}, + +-- +1.8.3.1 + diff --git a/0333-rpc-Synchronize-slot-allocation-code.patch b/0333-rpc-Synchronize-slot-allocation-code.patch new file mode 100644 index 0000000..b1d94b4 --- /dev/null +++ b/0333-rpc-Synchronize-slot-allocation-code.patch @@ -0,0 +1,195 @@ +From f199094cb61341a47c98a8ed91b293446182b5a9 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 3 Oct 2019 14:06:52 +0530 +Subject: [PATCH 333/335] rpc: Synchronize slot allocation code + +Problem: Current slot allocation/deallocation code path is not + synchronized.There are scenario when due to race condition + in slot allocation/deallocation code path brick is crashed. + +Solution: Synchronize slot allocation/deallocation code path to + avoid the issue + +> Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25 +> Fixes: bz#1763036 +> Signed-off-by: Mohit Agrawal +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23508/) +> (Cherry pick from commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2) + +Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25 +BUG: 1741193 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/185827 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/event-epoll.c | 74 +++++++++++++++++++++++------------------- + 1 file changed, 41 insertions(+), 33 deletions(-) + +diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c +index 0cec47e..65f5efd 100644 +--- a/libglusterfs/src/event-epoll.c ++++ b/libglusterfs/src/event-epoll.c +@@ -69,15 +69,27 @@ __event_newtable(struct event_pool *event_pool, int table_idx) + } + + static int ++event_slot_ref(struct event_slot_epoll *slot) ++{ ++ if (!slot) ++ return -1; ++ ++ return GF_ATOMIC_INC(slot->ref); ++} ++ ++static int + __event_slot_alloc(struct event_pool *event_pool, int fd, +- char notify_poller_death) ++ char notify_poller_death, struct event_slot_epoll **slot) + { + int i = 0; ++ int j = 0; + int table_idx = -1; + int gen = -1; + struct event_slot_epoll *table = NULL; + +- for (i = 0; i < EVENT_EPOLL_TABLES; i++) { ++retry: ++ ++ while (i < EVENT_EPOLL_TABLES) { + switch (event_pool->slots_used[i]) { + case EVENT_EPOLL_SLOTS: + continue; +@@ -98,6 +110,7 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + if (table) + /* break out of the loop */ + break; ++ i++; + } + + if (!table) +@@ -105,20 +118,20 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + + table_idx = i; + +- for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { +- if (table[i].fd == -1) { ++ for (j = 0; j < EVENT_EPOLL_SLOTS; j++) { ++ if (table[j].fd == -1) { + /* wipe everything except bump the generation */ +- gen = table[i].gen; +- memset(&table[i], 0, sizeof(table[i])); +- table[i].gen = gen + 1; ++ gen = table[j].gen; ++ memset(&table[j], 0, sizeof(table[j])); ++ table[j].gen = gen + 1; + +- LOCK_INIT(&table[i].lock); +- INIT_LIST_HEAD(&table[i].poller_death); ++ LOCK_INIT(&table[j].lock); ++ INIT_LIST_HEAD(&table[j].poller_death); + +- table[i].fd = fd; ++ table[j].fd = fd; + if (notify_poller_death) { +- table[i].idx = table_idx * EVENT_EPOLL_SLOTS + i; +- list_add_tail(&table[i].poller_death, ++ table[j].idx = table_idx * EVENT_EPOLL_SLOTS + j; ++ list_add_tail(&table[j].poller_death, + &event_pool->poller_death); + } + +@@ -128,18 +141,26 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + } + } + +- return table_idx * EVENT_EPOLL_SLOTS + i; ++ if (j == EVENT_EPOLL_SLOTS) { ++ table = NULL; ++ i++; ++ goto retry; ++ } else { ++ (*slot) = &table[j]; ++ event_slot_ref(*slot); ++ return table_idx * EVENT_EPOLL_SLOTS + j; ++ } + } + + static int + event_slot_alloc(struct event_pool *event_pool, int fd, +- char notify_poller_death) ++ char notify_poller_death, struct event_slot_epoll **slot) + { + int idx = -1; + + pthread_mutex_lock(&event_pool->mutex); + { +- idx = __event_slot_alloc(event_pool, fd, notify_poller_death); ++ idx = __event_slot_alloc(event_pool, fd, notify_poller_death, slot); + } + pthread_mutex_unlock(&event_pool->mutex); + +@@ -153,6 +174,7 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx) + int offset = 0; + struct event_slot_epoll *table = NULL; + struct event_slot_epoll *slot = NULL; ++ int fd = -1; + + table_idx = idx / EVENT_EPOLL_SLOTS; + offset = idx % EVENT_EPOLL_SLOTS; +@@ -164,11 +186,13 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx) + slot = &table[offset]; + slot->gen++; + ++ fd = slot->fd; + slot->fd = -1; + slot->handled_error = 0; + slot->in_handler = 0; + list_del_init(&slot->poller_death); +- event_pool->slots_used[table_idx]--; ++ if (fd != -1) ++ event_pool->slots_used[table_idx]--; + + return; + } +@@ -185,15 +209,6 @@ event_slot_dealloc(struct event_pool *event_pool, int idx) + return; + } + +-static int +-event_slot_ref(struct event_slot_epoll *slot) +-{ +- if (!slot) +- return -1; +- +- return GF_ATOMIC_INC(slot->ref); +-} +- + static struct event_slot_epoll * + event_slot_get(struct event_pool *event_pool, int idx) + { +@@ -379,20 +394,13 @@ event_register_epoll(struct event_pool *event_pool, int fd, + if (destroy == 1) + goto out; + +- idx = event_slot_alloc(event_pool, fd, notify_poller_death); ++ idx = event_slot_alloc(event_pool, fd, notify_poller_death, &slot); + if (idx == -1) { + gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, + "could not find slot for fd=%d", fd); + return -1; + } + +- slot = event_slot_get(event_pool, idx); +- if (!slot) { +- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, +- "could not find slot for fd=%d idx=%d", fd, idx); +- return -1; +- } +- + assert(slot->fd == fd); + + LOCK(&slot->lock); +-- +1.8.3.1 + diff --git a/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch b/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch new file mode 100644 index 0000000..48f927f --- /dev/null +++ b/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch @@ -0,0 +1,54 @@ +From 17940583c4d991a568582581f68dcbf08463ccaf Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Tue, 16 Jul 2019 10:31:46 +0530 +Subject: [PATCH 334/335] dht: log getxattr failure for node-uuid at "DEBUG" + +There are two ways to fetch node-uuid information from dht. + +1 - #define GF_XATTR_LIST_NODE_UUIDS_KEY "trusted.glusterfs.list-node-uuids" +This key is used by AFR. + +2 - #define GF_REBAL_FIND_LOCAL_SUBVOL "glusterfs.find-local-subvol" +This key is used for non-afr volume type. + +We do two getxattr operations. First on the #1 key followed by on #2 if +getxattr on #1 key fails. + +Since the parent function "dht_init_local_subvols_and_nodeuuids" logs failure, +moving the log-level to DEBUG in dht_find_local_subvol_cbk. + +>fixes: bz#1730175 +>Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14 +>Signed-off-by: Susant Palai +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23053/ + +BUG: 1727755 +Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14 +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185876 +Tested-by: RHGS Build Bot +--- + xlators/cluster/dht/src/dht-common.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 37952ba..d0b5287 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -4253,8 +4253,11 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + local->op_ret = -1; + local->op_errno = op_errno; + UNLOCK(&frame->lock); +- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED, +- "getxattr err for dir"); ++ if (op_errno == ENODATA) ++ gf_msg_debug(this->name, 0, "failed to get node-uuid"); ++ else ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid"); + goto post_unlock; + } + +-- +1.8.3.1 + diff --git a/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch b/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch new file mode 100644 index 0000000..c3341df --- /dev/null +++ b/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch @@ -0,0 +1,15991 @@ +From 39523fd6c1b4789b12c8db81f4e08a3eb0c6a65c Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya +Date: Thu, 17 Oct 2019 13:03:56 +0530 +Subject: [PATCH 335/335] tests: RHEL8 test failure fixes for RHGS + +- tests/bugs/shard/bug-1272986.t + https://review.gluster.org/#/c/glusterfs/+/23499/ + https://review.gluster.org/#/c/glusterfs/+/23551/ + +- tests/basic/posix/shared-statfs.t + https://review.gluster.org/c/glusterfs/+/23550 + +- tests/basic/fops-sanity.t + https://review.gluster.org/c/glusterfs/+/22210/ + +- tests/bugs/transport/bug-873367.t +- tests/features/ssl-authz.t +- tests/bugs/snapshot/bug-1399598-uss-with-ssl.t + https://review.gluster.org/#/c/glusterfs/+/23587/ + +- remove gnfs relatedtests + +- tests/bugs/shard/unlinks-and-renames.t + https://review.gluster.org/#/c/glusterfs/+/23585/ + +- tests/bugs/rpc/bug-954057.t +- tests/bugs/glusterfs-server/bug-887145.t + https://review.gluster.org/#/c/glusterfs/+/23710/ + +- tests/features/ssl-ciphers.t + https://review.gluster.org/#/c/glusterfs/+/23703/ + +- tests/bugs/fuse/bug-985074.t + https://review.gluster.org/#/c/glusterfs/+/23734/ + +BUG: 1762180 +Change-Id: I97b344a632b49ca9ca332a5a463756b160aee5bd +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/185716 +Tested-by: RHGS Build Bot +--- + tests/basic/fops-sanity.c | 1862 ++-- + tests/basic/posix/shared-statfs.t | 11 +- + tests/bugs/cli/bug-1320388.t | 2 +- + tests/bugs/fuse/bug-985074.t | 4 +- + tests/bugs/glusterd/quorum-value-check.t | 35 - + tests/bugs/glusterfs-server/bug-887145.t | 14 +- + tests/bugs/nfs/bug-1053579.t | 114 - + tests/bugs/nfs/bug-1116503.t | 47 - + tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t | 24 - + tests/bugs/nfs/bug-1157223-symlink-mounting.t | 126 - + tests/bugs/nfs/bug-1161092-nfs-acls.t | 39 - + tests/bugs/nfs/bug-1166862.t | 69 - + tests/bugs/nfs/bug-1210338.c | 31 - + tests/bugs/nfs/bug-1210338.t | 30 - + tests/bugs/nfs/bug-1302948.t | 13 - + tests/bugs/nfs/bug-847622.t | 39 - + tests/bugs/nfs/bug-877885.t | 39 - + tests/bugs/nfs/bug-904065.t | 100 - + tests/bugs/nfs/bug-915280.t | 54 - + tests/bugs/nfs/bug-970070.t | 13 - + tests/bugs/nfs/bug-974972.t | 41 - + tests/bugs/nfs/showmount-many-clients.t | 41 - + tests/bugs/nfs/socket-as-fifo.py | 33 - + tests/bugs/nfs/socket-as-fifo.t | 25 - + tests/bugs/nfs/subdir-trailing-slash.t | 32 - + tests/bugs/nfs/zero-atime.t | 33 - + tests/bugs/rpc/bug-954057.t | 10 +- + tests/bugs/shard/bug-1272986.t | 6 +- + tests/bugs/transport/bug-873367.t | 2 +- + tests/features/ssl-authz.t | 2 +- + tests/features/ssl-ciphers.t | 61 +- + tests/ssl.rc | 2 +- + xlators/features/shard/src/shard.c | 11754 ++++++++++---------- + 33 files changed, 6638 insertions(+), 8070 deletions(-) + delete mode 100755 tests/bugs/glusterd/quorum-value-check.t + delete mode 100755 tests/bugs/nfs/bug-1053579.t + delete mode 100644 tests/bugs/nfs/bug-1116503.t + delete mode 100644 tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t + delete mode 100644 tests/bugs/nfs/bug-1157223-symlink-mounting.t + delete mode 100644 tests/bugs/nfs/bug-1161092-nfs-acls.t + delete mode 100755 tests/bugs/nfs/bug-1166862.t + delete mode 100644 tests/bugs/nfs/bug-1210338.c + delete mode 100644 tests/bugs/nfs/bug-1210338.t + delete mode 100755 tests/bugs/nfs/bug-1302948.t + delete mode 100755 tests/bugs/nfs/bug-847622.t + delete mode 100755 tests/bugs/nfs/bug-877885.t + delete mode 100755 tests/bugs/nfs/bug-904065.t + delete mode 100755 tests/bugs/nfs/bug-915280.t + delete mode 100755 tests/bugs/nfs/bug-970070.t + delete mode 100755 tests/bugs/nfs/bug-974972.t + delete mode 100644 tests/bugs/nfs/showmount-many-clients.t + delete mode 100755 tests/bugs/nfs/socket-as-fifo.py + delete mode 100644 tests/bugs/nfs/socket-as-fifo.t + delete mode 100644 tests/bugs/nfs/subdir-trailing-slash.t + delete mode 100755 tests/bugs/nfs/zero-atime.t + +diff --git a/tests/basic/fops-sanity.c b/tests/basic/fops-sanity.c +index aff72d8..171d003 100644 +--- a/tests/basic/fops-sanity.c ++++ b/tests/basic/fops-sanity.c +@@ -17,15 +17,16 @@ + + /* Filesystem basic sanity check, tests all (almost) fops. */ + +-#include ++#include ++#include + #include +-#include +-#include ++#include ++#include + #include ++#include ++#include + #include +-#include +-#include +-#include ++#include + + #ifndef linux + #include +@@ -34,904 +35,880 @@ + #endif + + /* for fd based fops after unlink */ +-int +-fd_based_fops_1(char *filename); ++int fd_based_fops_1(char *filename); + /* for fd based fops before unlink */ +-int +-fd_based_fops_2(char *filename); ++int fd_based_fops_2(char *filename); + /* fops based on fd after dup */ +-int +-dup_fd_based_fops(char *filename); ++int dup_fd_based_fops(char *filename); + /* for fops based on path */ +-int +-path_based_fops(char *filename); ++int path_based_fops(char *filename); + /* for fops which operate on directory */ +-int +-dir_based_fops(char *filename); ++int dir_based_fops(char *filename); + /* for fops which operate in link files (symlinks) */ +-int +-link_based_fops(char *filename); ++int link_based_fops(char *filename); + /* to test open syscall with open modes available. */ +-int +-test_open_modes(char *filename); ++int test_open_modes(char *filename); + /* generic function which does open write and read. */ +-int +-generic_open_read_write(char *filename, int flag, mode_t mode); ++int generic_open_read_write(char *filename, int flag, mode_t mode); + + #define OPEN_MODE 0666 + +-int +-main(int argc, char *argv[]) +-{ +- int ret = -1; +- int result = 0; +- char filename[255] = { +- 0, +- }; +- +- if (argc > 1) +- strcpy(filename, argv[1]); +- else +- strcpy(filename, "temp-xattr-test-file"); +- +- ret = fd_based_fops_1(strcat(filename, "_1")); +- if (ret < 0) { +- fprintf(stderr, "fd based file operation 1 failed\n"); +- result |= ret; +- } else { +- fprintf(stdout, "fd based file operation 1 passed\n"); +- } +- +- ret = fd_based_fops_2(strcat(filename, "_2")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "fd based file operation 2 failed\n"); +- } else { +- fprintf(stdout, "fd based file operation 2 passed\n"); +- } +- +- ret = dup_fd_based_fops(strcat(filename, "_3")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "dup fd based file operation failed\n"); +- } else { +- fprintf(stdout, "dup fd based file operation passed\n"); +- } +- +- ret = path_based_fops(strcat(filename, "_4")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "path based file operation failed\n"); +- } else { +- fprintf(stdout, "path based file operation passed\n"); +- } +- +- ret = dir_based_fops(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "directory based file operation failed\n"); +- } else { +- fprintf(stdout, "directory based file operation passed\n"); +- } +- +- ret = link_based_fops(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "link based file operation failed\n"); +- } else { +- fprintf(stdout, "link based file operation passed\n"); +- } +- +- ret = test_open_modes(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "testing modes of `open' call failed\n"); +- } else { +- fprintf(stdout, "testing modes of `open' call passed\n"); +- } +- return result; ++int main(int argc, char *argv[]) { ++ int ret = -1; ++ int result = 0; ++ char filename[255] = { ++ 0, ++ }; ++ ++ if (argc > 1) ++ strcpy(filename, argv[1]); ++ else ++ strcpy(filename, "temp-xattr-test-file"); ++ ++ ret = fd_based_fops_1(strcat(filename, "_1")); ++ if (ret < 0) { ++ fprintf(stderr, "fd based file operation 1 failed\n"); ++ result |= ret; ++ } else { ++ fprintf(stdout, "fd based file operation 1 passed\n"); ++ } ++ ++ ret = fd_based_fops_2(strcat(filename, "_2")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "fd based file operation 2 failed\n"); ++ } else { ++ fprintf(stdout, "fd based file operation 2 passed\n"); ++ } ++ ++ ret = dup_fd_based_fops(strcat(filename, "_3")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "dup fd based file operation failed\n"); ++ } else { ++ fprintf(stdout, "dup fd based file operation passed\n"); ++ } ++ ++ ret = path_based_fops(strcat(filename, "_4")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "path based file operation failed\n"); ++ } else { ++ fprintf(stdout, "path based file operation passed\n"); ++ } ++ ++ ret = dir_based_fops(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "directory based file operation failed\n"); ++ } else { ++ fprintf(stdout, "directory based file operation passed\n"); ++ } ++ ++ ret = link_based_fops(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "link based file operation failed\n"); ++ } else { ++ fprintf(stdout, "link based file operation passed\n"); ++ } ++ ++ ret = test_open_modes(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "testing modes of `open' call failed\n"); ++ } else { ++ fprintf(stdout, "testing modes of `open' call passed\n"); ++ } ++ return result; + } + + /* Execute all possible fops on a fd which is unlinked */ +-int +-fd_based_fops_1(char *filename) +-{ +- int fd = 0; +- int ret = -1; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(wstr, "This is my string\n"); +- ret = write(fd, wstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(fd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = ftruncate(fd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fstat(fd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(fd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(fd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- /* +- * These metadata operations fail at the moment because kernel doesn't +- * pass the client fd in the operation. +- * The following bug tracks this change. +- * https://bugzilla.redhat.com/show_bug.cgi?id=1084422 +- * ret = fchmod (fd, 0640); +- * if (ret < 0) { +- * fprintf (stderr, "fchmod failed : %s\n", strerror (errno)); +- * result |= ret; +- * } +- +- * ret = fchown (fd, 10001, 10001); +- * if (ret < 0) { +- * fprintf (stderr, "fchown failed : %s\n", strerror (errno)); +- * result |= ret; +- * } +- +- * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0); +- * if (ret < 0) { +- * fprintf (stderr, "fsetxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = flistxattr (fd, NULL, 0); +- * if (ret <= 0) { +- * fprintf (stderr, "flistxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0); +- * if (ret <= 0) { +- * fprintf (stderr, "fgetxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = fremovexattr (fd, "trusted.xattr-test"); +- * if (ret < 0) { +- * fprintf (stderr, "fremovexattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- */ +- +- if (fd) +- close(fd); +- return result; ++int fd_based_fops_1(char *filename) { ++ int fd = 0; ++ int ret = -1; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(wstr, "This is my string\n"); ++ ret = write(fd, wstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(fd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = ftruncate(fd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fstat(fd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ /* ++ * These metadata operations fail at the moment because kernel doesn't ++ * pass the client fd in the operation. ++ * The following bug tracks this change. ++ * https://bugzilla.redhat.com/show_bug.cgi?id=1084422 ++ * ret = fchmod (fd, 0640); ++ * if (ret < 0) { ++ * fprintf (stderr, "fchmod failed : %s\n", strerror (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fchown (fd, 10001, 10001); ++ * if (ret < 0) { ++ * fprintf (stderr, "fchown failed : %s\n", strerror (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0); ++ * if (ret < 0) { ++ * fprintf (stderr, "fsetxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = flistxattr (fd, NULL, 0); ++ * if (ret <= 0) { ++ * fprintf (stderr, "flistxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0); ++ * if (ret <= 0) { ++ * fprintf (stderr, "fgetxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fremovexattr (fd, "trusted.xattr-test"); ++ * if (ret < 0) { ++ * fprintf (stderr, "fremovexattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ */ ++ ++ if (fd) ++ close(fd); ++ return result; + } + +-int +-fd_based_fops_2(char *filename) +-{ +- int fd = 0; +- int ret = -1; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- ret = ftruncate(fd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(wstr, "This is my second string\n"); +- ret = write(fd, wstr, strlen(wstr)); +- if (ret < 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(fd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = fstat(fd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchmod(fd, 0640); +- if (ret < 0) { +- fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchown(fd, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "fchown failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(fd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(fd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = flistxattr(fd, NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fremovexattr(fd, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (fd) +- close(fd); +- unlink(filename); ++int fd_based_fops_2(char *filename) { ++ int fd = 0; ++ int ret = -1; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = ftruncate(fd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(wstr, "This is my second string\n"); ++ ret = write(fd, wstr, strlen(wstr)); ++ if (ret < 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(fd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = fstat(fd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchmod(fd, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchown(fd, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "fchown failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = flistxattr(fd, NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fremovexattr(fd, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (fd) ++ close(fd); ++ unlink(filename); + +- return result; ++ return result; + } + +-int +-path_based_fops(char *filename) +-{ +- int ret = -1; +- int fd = 0; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char newfilename[255] = { +- 0, +- }; +- char *hardlink = "linkfile-hard.txt"; +- char *symlnk = "linkfile-soft.txt"; +- char buf[1024] = { +- 0, +- }; +- +- fd = creat(filename, 0644); +- if (fd < 0) { +- fprintf(stderr, "creat failed: %s\n", strerror(errno)); +- return ret; +- } +- +- ret = truncate(filename, 0); +- if (ret < 0) { +- fprintf(stderr, "truncate failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = stat(filename, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "stat failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chmod(filename, 0640); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chown(filename, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "chown failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = listxattr(filename, NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = getxattr(filename, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = removexattr(filename, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = access(filename, R_OK | W_OK); +- if (ret < 0) { +- fprintf(stderr, "access failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = link(filename, hardlink); +- if (ret < 0) { +- fprintf(stderr, "link failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(hardlink); +- +- ret = symlink(filename, symlnk); +- if (ret < 0) { +- fprintf(stderr, "symlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = readlink(symlnk, buf, sizeof(buf)); +- if (ret < 0) { +- fprintf(stderr, "readlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(symlnk); +- +- /* Create a character special file */ +- ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3)); +- if (ret < 0) { +- fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("cspecial"); +- +- ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5)); +- if (ret < 0) { +- fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("bspecial"); ++int path_based_fops(char *filename) { ++ int ret = -1; ++ int fd = 0; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char newfilename[255] = { ++ 0, ++ }; ++ char *hardlink = "linkfile-hard.txt"; ++ char *symlnk = "linkfile-soft.txt"; ++ char buf[1024] = { ++ 0, ++ }; ++ ++ fd = creat(filename, 0644); ++ if (fd < 0) { ++ fprintf(stderr, "creat failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = truncate(filename, 0); ++ if (ret < 0) { ++ fprintf(stderr, "truncate failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = stat(filename, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "stat failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chmod(filename, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chown(filename, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "chown failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = listxattr(filename, NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = getxattr(filename, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = removexattr(filename, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = access(filename, R_OK | W_OK); ++ if (ret < 0) { ++ fprintf(stderr, "access failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = link(filename, hardlink); ++ if (ret < 0) { ++ fprintf(stderr, "link failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(hardlink); ++ ++ ret = symlink(filename, symlnk); ++ if (ret < 0) { ++ fprintf(stderr, "symlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = readlink(symlnk, buf, sizeof(buf)); ++ if (ret < 0) { ++ fprintf(stderr, "readlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(symlnk); ++ ++ /* Create a character special file */ ++ ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3)); ++ if (ret < 0) { ++ fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("cspecial"); ++ ++ ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5)); ++ if (ret < 0) { ++ fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("bspecial"); + + #ifdef linux +- ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0); ++ ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0); + #else +- ret = mkfifo("fifo", 0); ++ ret = mkfifo("fifo", 0); + #endif +- if (ret < 0) { +- fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("fifo"); ++ if (ret < 0) { ++ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("fifo"); + + #ifdef linux +- ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0); +- if (ret < 0) { +- fprintf(stderr, "sock mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } ++ ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0); ++ if (ret < 0) { ++ fprintf(stderr, "sock mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } + #else +- { +- int s; +- const char *pathname = "sock"; +- struct sockaddr_un addr; +- +- s = socket(PF_LOCAL, SOCK_STREAM, 0); +- memset(&addr, 0, sizeof(addr)); +- strncpy(addr.sun_path, pathname, sizeof(addr.sun_path)); +- ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr)); +- if (ret < 0) { +- fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- close(s); +- } +-#endif +- unlink("sock"); ++ { ++ int s; ++ const char *pathname = "sock"; ++ struct sockaddr_un addr; + +- strcpy(newfilename, filename); +- strcat(newfilename, "_new"); +- ret = rename(filename, newfilename); ++ s = socket(PF_LOCAL, SOCK_STREAM, 0); ++ memset(&addr, 0, sizeof(addr)); ++ strncpy(addr.sun_path, pathname, sizeof(addr.sun_path)); ++ ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr)); + if (ret < 0) { +- fprintf(stderr, "rename failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(newfilename); +- +- if (fd) +- close(fd); +- +- unlink(filename); +- return result; +-} +- +-int +-dup_fd_based_fops(char *filename) +-{ +- int fd = 0; +- int result = 0; +- int newfd = 0; +- int ret = -1; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- newfd = dup(fd); +- if (newfd < 0) { +- fprintf(stderr, "dup failed: %s\n", strerror(errno)); +- result |= ret; ++ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); ++ result |= ret; + } +- ++ close(s); ++ } ++#endif ++ unlink("sock"); ++ ++ strcpy(newfilename, filename); ++ strcat(newfilename, "_new"); ++ ret = rename(filename, newfilename); ++ if (ret < 0) { ++ fprintf(stderr, "rename failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(newfilename); ++ ++ if (fd) + close(fd); + +- strcpy(wstr, "This is my string\n"); +- ret = write(newfd, wstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lseek(newfd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(newfd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = ftruncate(newfd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fstat(newfd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchmod(newfd, 0640); +- if (ret < 0) { +- fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchown(newfd, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "fchown failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(newfd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(newfd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = flistxattr(newfd, NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fremovexattr(newfd, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (newfd) +- close(newfd); +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed : %s\n", strerror(errno)); +- result |= ret; +- } +- return result; ++ unlink(filename); ++ return result; + } + +-int +-dir_based_fops(char *dirname) +-{ +- int ret = -1; +- int result = 0; +- DIR *dp = NULL; +- char buff[255] = { +- 0, +- }; +- struct dirent *dbuff = { +- 0, +- }; +- struct stat stbuff = { +- 0, +- }; +- char newdname[255] = { +- 0, +- }; +- char *cwd = NULL; +- +- ret = mkdir(dirname, 0755); +- if (ret < 0) { +- fprintf(stderr, "mkdir failed: %s\n", strerror(errno)); +- return ret; +- } +- +- dp = opendir(dirname); +- if (dp == NULL) { +- fprintf(stderr, "opendir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- dbuff = readdir(dp); +- if (NULL == dbuff) { +- fprintf(stderr, "readdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = closedir(dp); +- if (ret < 0) { +- fprintf(stderr, "closedir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = stat(dirname, &stbuff); +- if (ret < 0) { +- fprintf(stderr, "stat failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chmod(dirname, 0744); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chown(dirname, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = listxattr(dirname, NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = getxattr(dirname, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = removexattr(dirname, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(newdname, dirname); +- strcat(newdname, "/../"); +- ret = chdir(newdname); +- if (ret < 0) { +- fprintf(stderr, "chdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- cwd = getcwd(buff, 255); +- if (NULL == cwd) { +- fprintf(stderr, "getcwd failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(newdname, dirname); +- strcat(newdname, "new"); +- ret = rename(dirname, newdname); +- if (ret < 0) { +- fprintf(stderr, "rename failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = rmdir(newdname); +- if (ret < 0) { +- fprintf(stderr, "rmdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- rmdir(dirname); +- return result; ++int dup_fd_based_fops(char *filename) { ++ int fd = 0; ++ int result = 0; ++ int newfd = 0; ++ int ret = -1; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ newfd = dup(fd); ++ if (newfd < 0) { ++ fprintf(stderr, "dup failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ close(fd); ++ ++ strcpy(wstr, "This is my string\n"); ++ ret = write(newfd, wstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lseek(newfd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(newfd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = ftruncate(newfd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fstat(newfd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchmod(newfd, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchown(newfd, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "fchown failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(newfd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(newfd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = flistxattr(newfd, NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fremovexattr(newfd, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (newfd) ++ close(newfd); ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ return result; + } + +-int +-link_based_fops(char *filename) +-{ +- int ret = -1; +- int result = 0; +- int fd = 0; +- char newname[255] = { +- 0, +- }; +- char linkname[255] = { +- 0, +- }; +- struct stat lstbuf = { +- 0, +- }; +- +- fd = creat(filename, 0644); +- if (fd < 0) { +- fd = 0; +- fprintf(stderr, "creat failed: %s\n", strerror(errno)); +- return ret; +- } +- +- strcpy(newname, filename); +- strcat(newname, "_hlink"); +- ret = link(filename, newname); +- if (ret < 0) { +- fprintf(stderr, "link failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(linkname, filename); +- strcat(linkname, "_slink"); +- ret = symlink(newname, linkname); +- if (ret < 0) { +- fprintf(stderr, "symlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lstat(linkname, &lstbuf); +- if (ret < 0) { +- fprintf(stderr, "lstbuf failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lchown(linkname, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "lchown failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = llistxattr(linkname, NULL, 0); +- if (ret < 0) { +- ret = -1; +- fprintf(stderr, "llistxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0); +- if (ret < 0) { +- ret = -1; +- fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lremovexattr(linkname, "trusted.lxattr-test"); +- if (ret < 0) { +- fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (fd) +- close(fd); +- unlink(linkname); +- unlink(newname); +- return result; ++int dir_based_fops(char *dirname) { ++ int ret = -1; ++ int result = 0; ++ DIR *dp = NULL; ++ char buff[255] = { ++ 0, ++ }; ++ struct dirent *dbuff = { ++ 0, ++ }; ++ struct stat stbuff = { ++ 0, ++ }; ++ char newdname[255] = { ++ 0, ++ }; ++ char *cwd = NULL; ++ ++ ret = mkdir(dirname, 0755); ++ if (ret < 0) { ++ fprintf(stderr, "mkdir failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ dp = opendir(dirname); ++ if (dp == NULL) { ++ fprintf(stderr, "opendir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ dbuff = readdir(dp); ++ if (NULL == dbuff) { ++ fprintf(stderr, "readdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = closedir(dp); ++ if (ret < 0) { ++ fprintf(stderr, "closedir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = stat(dirname, &stbuff); ++ if (ret < 0) { ++ fprintf(stderr, "stat failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chmod(dirname, 0744); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chown(dirname, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = listxattr(dirname, NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = getxattr(dirname, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = removexattr(dirname, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(newdname, dirname); ++ strcat(newdname, "/../"); ++ ret = chdir(newdname); ++ if (ret < 0) { ++ fprintf(stderr, "chdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ cwd = getcwd(buff, 255); ++ if (NULL == cwd) { ++ fprintf(stderr, "getcwd failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(newdname, dirname); ++ strcat(newdname, "new"); ++ ret = rename(dirname, newdname); ++ if (ret < 0) { ++ fprintf(stderr, "rename failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = rmdir(newdname); ++ if (ret < 0) { ++ fprintf(stderr, "rmdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ rmdir(dirname); ++ return result; + } + +-int +-test_open_modes(char *filename) +-{ +- int ret = -1; +- int result = 0; +- +- ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n"); +- result |= ret; +- } +- +- ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_RDWR failed\n"); +- result |= ret; +- } +- +- ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_WRONLY, 0); +- if (ret != 0) { +- fprintf(stderr, "flag O_WRONLY failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_RDWR, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_RDWR failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_RDONLY, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_RDONLY failed\n"); +- result |= ret; +- } ++int link_based_fops(char *filename) { ++ int ret = -1; ++ int result = 0; ++ int fd = 0; ++ char newname[255] = { ++ 0, ++ }; ++ char linkname[255] = { ++ 0, ++ }; ++ struct stat lstbuf = { ++ 0, ++ }; ++ ++ fd = creat(filename, 0644); ++ if (fd < 0) { ++ fd = 0; ++ fprintf(stderr, "creat failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ strcpy(newname, filename); ++ strcat(newname, "_hlink"); ++ ret = link(filename, newname); ++ if (ret < 0) { ++ fprintf(stderr, "link failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(linkname, filename); ++ strcat(linkname, "_slink"); ++ ret = symlink(newname, linkname); ++ if (ret < 0) { ++ fprintf(stderr, "symlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lstat(linkname, &lstbuf); ++ if (ret < 0) { ++ fprintf(stderr, "lstbuf failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lchown(linkname, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "lchown failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = llistxattr(linkname, NULL, 0); ++ if (ret < 0) { ++ ret = -1; ++ fprintf(stderr, "llistxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0); ++ if (ret < 0) { ++ ret = -1; ++ fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lremovexattr(linkname, "trusted.lxattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (fd) ++ close(fd); ++ unlink(linkname); ++ unlink(newname); ++ return result; ++} + +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n"); +- result |= ret; +- } ++int test_open_modes(char *filename) { ++ int ret = -1; ++ int result = 0; ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n"); ++ result |= ret; ++ } ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_RDWR failed\n"); ++ result |= ret; ++ } ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_WRONLY, 0); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_WRONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_RDWR, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_RDWR failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_RDONLY, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_RDONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n"); ++ result |= ret; ++ } + + #if 0 /* undefined behaviour, unable to reliably test */ + ret = creat (filename, 0644); +@@ -943,90 +920,87 @@ test_open_modes(char *filename) + } + #endif + +- ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, +- OPEN_MODE); +- if (0 != ret) { +- fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE); +- if (0 != ret) { +- fprintf(stderr, "flag O_CREAT|O_EXCL failed\n"); +- result |= ret; +- } +- +- return result; ++ ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, OPEN_MODE); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_CREAT|O_EXCL failed\n"); ++ result |= ret; ++ } ++ ++ return result; + } + +-int +-generic_open_read_write(char *filename, int flag, mode_t mode) +-{ +- int fd = 0; +- int ret = -1; +- char wstring[50] = { +- 0, +- }; +- char rstring[50] = { +- 0, +- }; +- +- fd = open(filename, flag, mode); +- if (fd < 0) { +- if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) { +- unlink(filename); +- return 0; +- } else { +- fprintf(stderr, "open failed: %s\n", strerror(errno)); +- return -1; +- } +- } +- +- strcpy(wstring, "My string to write\n"); +- ret = write(fd, wstring, strlen(wstring)); +- if (ret <= 0) { +- if (errno != EBADF) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- close(fd); +- unlink(filename); +- return ret; +- } +- } +- +- ret = lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- close(fd); +- unlink(filename); +- return ret; ++int generic_open_read_write(char *filename, int flag, mode_t mode) { ++ int fd = 0; ++ int ret = -1; ++ char wstring[50] = { ++ 0, ++ }; ++ char rstring[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, flag, mode); ++ if (fd < 0) { ++ if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) { ++ unlink(filename); ++ return 0; ++ } else { ++ fprintf(stderr, "open failed: %s\n", strerror(errno)); ++ return -1; + } ++ } + +- ret = read(fd, rstring, strlen(wstring)); +- if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY && +- flag != (O_TRUNC | O_WRONLY)) { +- close(fd); +- unlink(filename); +- return ret; ++ strcpy(wstring, "My string to write\n"); ++ ret = write(fd, wstring, strlen(wstring)); ++ if (ret <= 0) { ++ if (errno != EBADF) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ close(fd); ++ unlink(filename); ++ return ret; + } ++ } + +- /* Compare the rstring with wstring. But we do not want to return +- * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or +- * O_TRUNC|O_RDONLY. Because in that case we are not writing +- * anything to the file.*/ +- +- ret = memcmp(wstring, rstring, strlen(wstring)); +- if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY && +- flag != (O_CREAT | O_WRONLY) && +- !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY || +- flag == (O_TRUNC | O_RDONLY))) { +- fprintf(stderr, "read is returning junk\n"); +- close(fd); +- unlink(filename); +- return ret; +- } ++ ret = lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ close(fd); ++ unlink(filename); ++ return ret; ++ } + ++ ret = read(fd, rstring, strlen(wstring)); ++ if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY && ++ flag != (O_TRUNC | O_WRONLY)) { ++ close(fd); ++ unlink(filename); ++ return ret; ++ } ++ ++ /* Compare the rstring with wstring. But we do not want to return ++ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or ++ * O_TRUNC|O_RDONLY. Because in that case we are not writing ++ * anything to the file.*/ ++ ++ ret = memcmp(wstring, rstring, strlen(wstring)); ++ if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY && ++ flag != (O_CREAT | O_WRONLY) && ++ !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY || ++ flag == (O_TRUNC | O_RDONLY))) { ++ fprintf(stderr, "read is returning junk\n"); + close(fd); + unlink(filename); +- return 0; ++ return ret; ++ } ++ ++ close(fd); ++ unlink(filename); ++ return 0; + } +diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t +index 3343956..0e4a1bb 100644 +--- a/tests/basic/posix/shared-statfs.t ++++ b/tests/basic/posix/shared-statfs.t +@@ -20,15 +20,18 @@ TEST mkdir -p $B0/${V0}1 $B0/${V0}2 + TEST MOUNT_LOOP $LO1 $B0/${V0}1 + TEST MOUNT_LOOP $LO2 $B0/${V0}2 + ++total_brick_blocks=$(df -P $B0/${V0}1 $B0/${V0}2 | tail -2 | awk '{sum = sum+$2}END{print sum}') ++#Account for rounding error ++brick_blocks_two_percent_less=$((total_brick_blocks*98/100)) + # Create a subdir in mountpoint and use that for volume. + TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1; + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count + TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') ++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}') + # Keeping the size less than 200M mainly because XFS will use + # some storage in brick to keep its own metadata. +-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] ++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ] + + + TEST force_umount $M0 +@@ -41,8 +44,8 @@ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/ + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count + TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') +-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] ++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}') ++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ] + + TEST force_umount $M0 + TEST $CLI volume stop $V0 +diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t +index 8e5d77b..e719fc5 100755 +--- a/tests/bugs/cli/bug-1320388.t ++++ b/tests/bugs/cli/bug-1320388.t +@@ -21,7 +21,7 @@ cleanup; + rm -f $SSL_BASE/glusterfs.* + touch "$GLUSTERD_WORKDIR"/secure-access + +-TEST openssl genrsa -out $SSL_KEY 3072 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/bugs/fuse/bug-985074.t b/tests/bugs/fuse/bug-985074.t +index d10fd9f..26d196e 100644 +--- a/tests/bugs/fuse/bug-985074.t ++++ b/tests/bugs/fuse/bug-985074.t +@@ -30,7 +30,7 @@ TEST glusterd + + TEST $CLI volume create $V0 $H0:$B0/$V0 + TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 md-cache-timeout 3 ++TEST $CLI volume set $V0 performance.stat-prefetch off + + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0 + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0 +@@ -40,8 +40,6 @@ TEST ln $M0/file $M0/file.link + TEST ls -ali $M0 $M1 + TEST rm -f $M1/file.link + TEST ls -ali $M0 $M1 +-# expire the md-cache timeout +-sleep 3 + TEST mv $M0/file $M0/file.link + TEST stat $M0/file.link + TEST ! stat $M0/file +diff --git a/tests/bugs/glusterd/quorum-value-check.t b/tests/bugs/glusterd/quorum-value-check.t +deleted file mode 100755 +index aaf6362..0000000 +--- a/tests/bugs/glusterd/quorum-value-check.t ++++ /dev/null +@@ -1,35 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +- +-function check_quorum_nfs() { +- local qnfs="$(less /var/lib/glusterd/nfs/nfs-server.vol | grep "quorum-count"| awk '{print $3}')" +- local qinfo="$($CLI volume info $V0| grep "cluster.quorum-count"| awk '{print $2}')" +- +- if [ $qnfs = $qinfo ]; then +- echo "Y" +- else +- echo "N" +- fi +-} +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +-TEST $CLI volume set $V0 nfs.disable off +-TEST $CLI volume set $V0 performance.write-behind off +-TEST $CLI volume set $V0 cluster.self-heal-daemon off +-TEST $CLI volume set $V0 cluster.quorum-type fixed +-TEST $CLI volume start $V0 +- +-TEST $CLI volume set $V0 cluster.quorum-count 1 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +-TEST $CLI volume set $V0 cluster.quorum-count 2 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +-TEST $CLI volume set $V0 cluster.quorum-count 3 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +- +-cleanup; +diff --git a/tests/bugs/glusterfs-server/bug-887145.t b/tests/bugs/glusterfs-server/bug-887145.t +index 82f7cca..f65b1bd 100755 +--- a/tests/bugs/glusterfs-server/bug-887145.t ++++ b/tests/bugs/glusterfs-server/bug-887145.t +@@ -29,7 +29,15 @@ chmod 600 $M0/file; + + TEST mount_nfs $H0:/$V0 $N0 nolock; + +-chown -R nfsnobody:nfsnobody $M0/dir; ++grep nfsnobody /etc/passwd > /dev/nul ++if [ $? -eq 1 ]; then ++usr=nobody ++grp=nobody ++else ++usr=nfsnobody ++grp=nfsnobody ++fi ++chown -R $usr:$grp $M0/dir; + chown -R tmp_user:tmp_user $M0/other; + + TEST $CLI volume set $V0 server.root-squash on; +@@ -38,7 +46,7 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; + + # create files and directories in the root of the glusterfs and nfs mount + # which is owned by root and hence the right behavior is getting EACCESS +-# as the fops are executed as nfsnobody. ++# as the fops are executed as nfsnobody/nobody. + touch $M0/foo 2>/dev/null; + TEST [ $? -ne 0 ] + touch $N0/foo 2>/dev/null; +@@ -61,7 +69,7 @@ cat $N0/passwd 1>/dev/null; + TEST [ $? -eq 0 ] + + # create files and directories should succeed as the fops are being executed +-# inside the directory owned by nfsnobody ++# inside the directory owned by nfsnobody/nobody + TEST touch $M0/dir/file; + TEST touch $N0/dir/foo; + TEST mkdir $M0/dir/new; +diff --git a/tests/bugs/nfs/bug-1053579.t b/tests/bugs/nfs/bug-1053579.t +deleted file mode 100755 +index 2f53172..0000000 +--- a/tests/bugs/nfs/bug-1053579.t ++++ /dev/null +@@ -1,114 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup +- +-# prepare the users and groups +-NEW_USER=bug1053579 +-NEW_UID=1053579 +-NEW_GID=1053579 +-LAST_GID=1053779 +-NEW_GIDS=${NEW_GID} +- +-# OS-specific overrides +-case $OSTYPE in +-NetBSD|Darwin) +- # only NGROUPS_MAX=16 secondary groups are supported +- LAST_GID=1053593 +- ;; +-FreeBSD) +- # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups +- ;; +-Linux) +- # NGROUPS_MAX=65536, we can afford 200 groups +- ;; +-*) +- ;; +-esac +- +-# create a user that belongs to many groups +-for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID}) +-do +- groupadd -o -g ${GID} ${NEW_USER}-${GID} +- NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}" +-done +-TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER} +- +-# preparation done, start the tests +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 $H0:$B0/${V0}1 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 nfs.server-aux-gids on +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available +- +-# mount the volume +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +- +-# the actual test, this used to crash +-su -m ${NEW_USER} -c "stat $N0/. > /dev/null" +-TEST [ $? -eq 0 ] +- +-# create a file that only a user in a high-group can access +-echo 'Hello World!' > $N0/README +-chgrp ${LAST_GID} $N0/README +-chmod 0640 $N0/README +- +-#su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null" +-su -m ${NEW_USER} -c "cat $N0/README" +-ret=$? +- +-case $OSTYPE in +-Linux) # Linux NFS fails with big GID +- if [ $ret -ne 0 ] ; then +- res="Y" +- else +- res="N" +- fi +- ;; +-*) # Other systems should cope better +- if [ $ret -eq 0 ] ; then +- res="Y" +- else +- res="N" +- fi +- ;; +-esac +-TEST [ "x$res" = "xY" ] +- +-# This passes only on build.gluster.org, not reproducible on other machines?! +-#su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null" +-#TEST [ $? -ne 0 ] +- +-# enable server.manage-gids and things should work +-TEST $CLI volume set $V0 server.manage-gids on +- +-su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null" +-TEST [ $? -eq 0 ] +-su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null" +-TEST [ $? -eq 0 ] +- +-# cleanup +-userdel --force ${NEW_USER} +-for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID}) +-do +- groupdel ${NEW_USER}-${GID} +-done +- +-rm -f $N0/README +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +- +-TEST $CLI volume stop $V0 +-TEST $CLI volume delete $V0 +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1116503.t b/tests/bugs/nfs/bug-1116503.t +deleted file mode 100644 +index dd3998d..0000000 +--- a/tests/bugs/nfs/bug-1116503.t ++++ /dev/null +@@ -1,47 +0,0 @@ +-#!/bin/bash +-# +-# Verify that mounting NFS over UDP (MOUNT service only) works. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 nfs.mount-udp on +- +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-TEST mount_nfs $H0:/$V0 $N0 nolock,mountproto=udp,proto=tcp; +-TEST mkdir -p $N0/foo/bar +-TEST ls $N0/foo +-TEST ls $N0/foo/bar +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST $CLI volume set $V0 nfs.addr-namelookup on +-TEST $CLI volume set $V0 nfs.rpc-auth-allow $H0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST $CLI volume set $V0 nfs.rpc-auth-reject $H0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST ! mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +- +-cleanup; +diff --git a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t +deleted file mode 100644 +index c360db4..0000000 +--- a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t ++++ /dev/null +@@ -1,24 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 performance.open-behind off +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST mkdir -p $N0/foo +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +-TEST mount_nfs $H0:/$V0/foo $N0 nolock +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +-cleanup +diff --git a/tests/bugs/nfs/bug-1157223-symlink-mounting.t b/tests/bugs/nfs/bug-1157223-symlink-mounting.t +deleted file mode 100644 +index dea609e..0000000 +--- a/tests/bugs/nfs/bug-1157223-symlink-mounting.t ++++ /dev/null +@@ -1,126 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-## Start and create a volume +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume info; +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0; +- +-## Wait for volume to register with rpc.mountd +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-## Mount NFS +-TEST mount_nfs $H0:/$V0 $N0 nolock; +- +-mkdir $N0/dir1; +-mkdir $N0/dir2; +-pushd $N0/ ; +- +-##link created using relative path +-ln -s dir1 symlink1; +- +-##relative path contains ".." +-ln -s ../dir1 dir2/symlink2; +- +-##link created using absolute path +-ln -s $N0/dir1 symlink3; +- +-##link pointing to another symlinks +-ln -s symlink1 symlink4 +-ln -s symlink3 symlink5 +- +-##dead links +-ln -s does/not/exist symlink6 +- +-##link which contains ".." points out of glusterfs +-ln -s ../../ symlink7 +- +-##links pointing to unauthorized area +-ln -s .glusterfs symlink8 +- +-popd ; +- +-##Umount the volume +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via directory +-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink1 +-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink2 +-TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink3 should fail +-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock; +- +-## Mount and umount NFS via symlink4 +-TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink5 should fail +-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock; +- +-## Mount NFS via symlink6 should fail +-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock; +- +-## Mount NFS via symlink7 should fail +-TEST ! mount_nfs $H0:/$V0/symlink7 $N0 nolock; +- +-## Mount NFS via symlink8 should fail +-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock; +- +-##Similar check for udp mount +-$CLI volume stop $V0 +-TEST $CLI volume set $V0 nfs.mount-udp on +-$CLI volume start $V0 +- +-## Wait for volume to register with rpc.mountd +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-## Mount and umount NFS via directory +-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink1 +-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink2 +-TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink3 should fail +-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock,mountproto=udp,proto=tcp; +- +-## Mount and umount NFS via symlink4 +-TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink5 should fail +-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock,mountproto=udp,proto=tcp; +- +-## Mount NFS via symlink6 should fail +-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock,mountproto=udp,proto=tcp; +- +-##symlink7 is not check here, because in udp mount ../../ resolves into root '/' +- +-## Mount NFS via symlink8 should fail +-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock,mountproto=udp,proto=tcp; +- +-rm -rf $H0:$B0/ +-cleanup; +diff --git a/tests/bugs/nfs/bug-1161092-nfs-acls.t b/tests/bugs/nfs/bug-1161092-nfs-acls.t +deleted file mode 100644 +index 45a22e7..0000000 +--- a/tests/bugs/nfs/bug-1161092-nfs-acls.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume info +- +-TEST $CLI volume create $V0 $H0:$B0/brick1; +-EXPECT 'Created' volinfo_field $V0 'Status'; +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +-TEST mount_nfs $H0:/$V0 $N0 +- +-TEST touch $N0/file1 +-TEST chmod 700 $N0/file1 +-TEST getfacl $N0/file1 +- +-TEST $CLI volume set $V0 root-squash on +-TEST getfacl $N0/file1 +- +-TEST umount_nfs $H0:/$V0 $N0 +-TEST mount_nfs $H0:/$V0 $N0 +-TEST getfacl $N0/file1 +- +-## Before killing daemon to avoid deadlocks +-umount_nfs $N0 +- +-cleanup; +- +diff --git a/tests/bugs/nfs/bug-1166862.t b/tests/bugs/nfs/bug-1166862.t +deleted file mode 100755 +index c4f51a2..0000000 +--- a/tests/bugs/nfs/bug-1166862.t ++++ /dev/null +@@ -1,69 +0,0 @@ +-#!/bin/bash +-# +-# When nfs.mount-rmtab is disabled, it should not get updated. +-# +-# Based on: bug-904065.t +-# +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-# count the lines of a file, return 0 if the file does not exist +-function count_lines() +-{ +- if [ -n "$1" ] +- then +- $@ 2>/dev/null | wc -l +- else +- echo 0 +- fi +-} +- +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# disable the rmtab by settting it to the magic "/-" value +-TEST $CLI volume set $V0 nfs.mount-rmtab /- +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-# showmount should list one client +-EXPECT '1' count_lines showmount --no-headers $H0 +- +-# unmount +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-# after resetting the option, the rmtab should get updated again +-TEST $CLI volume reset $V0 nfs.mount-rmtab +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT '2' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-# removing a mount +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1210338.c b/tests/bugs/nfs/bug-1210338.c +deleted file mode 100644 +index d409924..0000000 +--- a/tests/bugs/nfs/bug-1210338.c ++++ /dev/null +@@ -1,31 +0,0 @@ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-int +-main(int argc, char *argv[]) +-{ +- int ret = -1; +- int fd = -1; +- +- fd = open(argv[1], O_CREAT | O_EXCL, 0644); +- +- if (fd == -1) { +- fprintf(stderr, "creation of the file %s failed (%s)\n", argv[1], +- strerror(errno)); +- goto out; +- } +- +- ret = 0; +- +-out: +- if (fd > 0) +- close(fd); +- +- return ret; +-} +diff --git a/tests/bugs/nfs/bug-1210338.t b/tests/bugs/nfs/bug-1210338.t +deleted file mode 100644 +index b5c9245..0000000 +--- a/tests/bugs/nfs/bug-1210338.t ++++ /dev/null +@@ -1,30 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-NFS_SOURCE=$(dirname $0)/bug-1210338.c +-NFS_EXEC=$(dirname $0)/excl_create +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-build_tester $NFS_SOURCE -o $NFS_EXEC +-TEST [ -e $NFS_EXEC ] +- +-TEST $NFS_EXEC $N0/my_file +- +-rm -f $NFS_EXEC; +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1302948.t b/tests/bugs/nfs/bug-1302948.t +deleted file mode 100755 +index a2fb0e6..0000000 +--- a/tests/bugs/nfs/bug-1302948.t ++++ /dev/null +@@ -1,13 +0,0 @@ +-#!/bin/bash +-# TEST the nfs.rdirplus option +-. $(dirname $0)/../../include.rc +- +-cleanup +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 nfs.rdirplus off +-TEST $CLI volume set $V0 nfs.rdirplus on +-cleanup +diff --git a/tests/bugs/nfs/bug-847622.t b/tests/bugs/nfs/bug-847622.t +deleted file mode 100755 +index 5ccee72..0000000 +--- a/tests/bugs/nfs/bug-847622.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-case $OSTYPE in +-NetBSD) +- echo "Skip test on ACL which are not available on NetBSD" >&2 +- SKIP_TESTS +- exit 0 +- ;; +-*) +- ;; +-esac +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 $H0:$B0/brick0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +-cd $N0 +- +-# simple getfacl setfacl commands +-TEST touch testfile +-TEST setfacl -m u:14:r testfile +-TEST getfacl testfile +- +-cd +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-cleanup +- +diff --git a/tests/bugs/nfs/bug-877885.t b/tests/bugs/nfs/bug-877885.t +deleted file mode 100755 +index dca315a..0000000 +--- a/tests/bugs/nfs/bug-877885.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +- +-## Mount FUSE with caching disabled +-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \ +-$M0; +- +-TEST touch $M0/file +-TEST mkdir $M0/dir +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +-cd $N0 +- +-rm -rf * & +- +-TEST mount_nfs $H0:/$V0 $N1 retry=0,nolock; +- +-cd; +- +-kill %1; +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1 +- +-cleanup +diff --git a/tests/bugs/nfs/bug-904065.t b/tests/bugs/nfs/bug-904065.t +deleted file mode 100755 +index 0eba86e..0000000 +--- a/tests/bugs/nfs/bug-904065.t ++++ /dev/null +@@ -1,100 +0,0 @@ +-#!/bin/bash +-# +-# This test does not use 'showmount' from the nfs-utils package, it would +-# require setting up a portmapper (either rpcbind or portmap, depending on the +-# Linux distribution used for testing). The persistancy of the rmtab should not +-# affect the current showmount outputs, so existing regression tests should be +-# sufficient. +-# +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-# count the lines of a file, return 0 if the file does not exist +-function count_lines() +-{ +- if [ -e "$1" ] +- then +- wc -l < $1 +- else +- echo 0 +- fi +-} +- +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-# the output would looks similar to: +-# +-# hostname-0=172.31.122.104 +-# mountpoint-0=/ufo +-# +-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# duplicate mounts should not be recorded (client could have crashed) +-TEST mount_nfs $H0:/$V0 $N1 nolock +-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# removing a mount should (even if there are two) should remove the entry +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1 +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# unmounting the other mount should work flawlessly +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $M0 +- +-# we'll create a fake rmtab here, similar to how an other storage server would do +-# using an invalid IP address to prevent (unlikely) collisions on the test-machine +-cat << EOF > $M0/rmtab +-hostname-0=127.0.0.256 +-mountpoint-0=/ufo +-EOF +-EXPECT '2' count_lines $M0/rmtab +- +-# reconfigure merges the rmtab with the one on the volume +-TEST gluster volume set $V0 nfs.mount-rmtab $M0/rmtab +- +-# glusterfs/nfs needs some time to restart +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# Apparently "is_nfs_export_available" might return even if the export is +-# not, in fact, available. (eyeroll) Give it a bit of extra time. +-# +-# TBD: fix the broken shell function instead of working around it here +-sleep 5 +- +-# a new mount should be added to the rmtab, not overwrite exiting ones +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT '4' count_lines $M0/rmtab +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '2' count_lines $M0/rmtab +- +-# TODO: nfs/reconfigure() is never called and is therefor disabled. When the +-# NFS-server supports reloading and does not get restarted anymore, we should +-# add a test that includes the merging of entries in the old rmtab with the new +-# rmtab. +- +-cleanup +diff --git a/tests/bugs/nfs/bug-915280.t b/tests/bugs/nfs/bug-915280.t +deleted file mode 100755 +index bd27915..0000000 +--- a/tests/bugs/nfs/bug-915280.t ++++ /dev/null +@@ -1,54 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +- +-function volinfo_field() +-{ +- local vol=$1; +- local field=$2; +- +- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +-} +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2; +-EXPECT 'Created' volinfo_field $V0 'Status'; +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-MOUNTDIR=$N0; +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock,timeo=30,retrans=1 +-TEST touch $N0/testfile +- +-TEST $CLI volume set $V0 debug.error-gen client +-TEST $CLI volume set $V0 debug.error-fops stat +-TEST $CLI volume set $V0 debug.error-failure 100 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-pid_file=$(read_nfs_pidfile); +- +-getfacl $N0/testfile 2>/dev/null +- +-nfs_pid=$(get_nfs_pid); +-if [ ! $nfs_pid ] +-then +- nfs_pid=0; +-fi +- +-TEST [ $nfs_pid -eq $pid_file ] +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR +- +-cleanup; +diff --git a/tests/bugs/nfs/bug-970070.t b/tests/bugs/nfs/bug-970070.t +deleted file mode 100755 +index 61be484..0000000 +--- a/tests/bugs/nfs/bug-970070.t ++++ /dev/null +@@ -1,13 +0,0 @@ +-#!/bin/bash +-# TEST the nfs.acl option +-. $(dirname $0)/../../include.rc +- +-cleanup +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 nfs.acl off +-TEST $CLI volume set $V0 nfs.acl on +-cleanup +diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t +deleted file mode 100755 +index 975c46f..0000000 +--- a/tests/bugs/nfs/bug-974972.t ++++ /dev/null +@@ -1,41 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-#This script checks that nfs mount does not fail lookup on files with split-brain +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +-TEST $CLI volume set $V0 self-heal-daemon off +-TEST $CLI volume set $V0 cluster.eager-lock off +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 +-TEST touch $N0/1 +-TEST kill_brick ${V0} ${H0} ${B0}/${V0}1 +-echo abc > $N0/1 +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1 +- +-TEST kill_brick ${V0} ${H0} ${B0}/${V0}0 +-echo def > $N0/1 +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1 +- +-#Lookup should not fail +-TEST ls $N0/1 +-TEST ! cat $N0/1 +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-cleanup +diff --git a/tests/bugs/nfs/showmount-many-clients.t b/tests/bugs/nfs/showmount-many-clients.t +deleted file mode 100644 +index f1b6859..0000000 +--- a/tests/bugs/nfs/showmount-many-clients.t ++++ /dev/null +@@ -1,41 +0,0 @@ +-#!/bin/bash +-# +-# The nfs.rpc-auth-allow volume option is used to generate the list of clients +-# that are displayed as able to mount the export. The "group" in the export +-# should be a list of all clients, identified by "name". In previous versions, +-# the "name" was the copied string from nfs.rpc-auth-allow. This is not +-# correct, as the volume option should be parsed and split into different +-# groups. +-# +-# When the single string is passed, this testcase fails when the +-# nfs.rpc-auth-allow volume option is longer than 256 characters. By splitting +-# the groups into their own structures, this testcase passes. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-CLIENTS=$(echo 127.0.0.{1..128} | tr ' ' ,) +-TEST $CLI volume set $V0 nfs.rpc-auth-allow ${CLIENTS} +-TEST $CLI volume set $V0 nfs.rpc-auth-reject all +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# showmount should not timeout (no reply is sent on error) +-TEST showmount -e $H0 +- +-cleanup +diff --git a/tests/bugs/nfs/socket-as-fifo.py b/tests/bugs/nfs/socket-as-fifo.py +deleted file mode 100755 +index eb507e1..0000000 +--- a/tests/bugs/nfs/socket-as-fifo.py ++++ /dev/null +@@ -1,33 +0,0 @@ +-# +-# Create a unix domain socket and test if it is a socket (and not a fifo/pipe). +-# +-# Author: Niels de Vos +-# +- +-from __future__ import print_function +-import os +-import stat +-import sys +-import socket +- +-ret = 1 +- +-if len(sys.argv) != 2: +- print('Usage: %s ' % (sys.argv[0])) +- sys.exit(ret) +- +-path = sys.argv[1] +- +-sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +-sock.bind(path) +- +-stbuf = os.stat(path) +-mode = stbuf.st_mode +- +-if stat.S_ISSOCK(mode): +- ret = 0 +- +-sock.close() +-os.unlink(path) +- +-sys.exit(ret) +diff --git a/tests/bugs/nfs/socket-as-fifo.t b/tests/bugs/nfs/socket-as-fifo.t +deleted file mode 100644 +index d9b9e95..0000000 +--- a/tests/bugs/nfs/socket-as-fifo.t ++++ /dev/null +@@ -1,25 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-# this is the actual test +-TEST $PYTHON $(dirname $0)/socket-as-fifo.py $N0/not-a-fifo.socket +- +-TEST umount_nfs $N0 +- +-cleanup +diff --git a/tests/bugs/nfs/subdir-trailing-slash.t b/tests/bugs/nfs/subdir-trailing-slash.t +deleted file mode 100644 +index 6a11487..0000000 +--- a/tests/bugs/nfs/subdir-trailing-slash.t ++++ /dev/null +@@ -1,32 +0,0 @@ +-#!/bin/bash +-# +-# Verify that mounting a subdir over NFS works, even with a trailing / +-# +-# For example: +-# mount -t nfs server.example.com:/volume/subdir/ +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST mkdir -p $N0/subdir +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST mount_nfs $H0:/$V0/subdir/ $N0 nolock +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-cleanup +diff --git a/tests/bugs/nfs/zero-atime.t b/tests/bugs/nfs/zero-atime.t +deleted file mode 100755 +index 2a94009..0000000 +--- a/tests/bugs/nfs/zero-atime.t ++++ /dev/null +@@ -1,33 +0,0 @@ +-#!/bin/bash +-# +-# posix_do_utimes() sets atime and mtime to the values in the passed IATT. If +-# not set, these values are 0 and cause a atime/mtime set to the Epoch. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-# create a file for testing +-TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k +- +-# timezone in UTC results in atime=0 if not set correctly +-TEST TZ=UTC dd if=/dev/urandom of=$M0/small bs=64k count=1 conv=nocreat +-TEST [ "$(stat --format=%X $M0/small)" != "0" ] +- +-TEST rm $M0/small +- +-cleanup +diff --git a/tests/bugs/rpc/bug-954057.t b/tests/bugs/rpc/bug-954057.t +index 65af274..9ad0ab2 100755 +--- a/tests/bugs/rpc/bug-954057.t ++++ b/tests/bugs/rpc/bug-954057.t +@@ -25,7 +25,15 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + + TEST mkdir $M0/dir + TEST mkdir $M0/nobody +-TEST chown nfsnobody:nfsnobody $M0/nobody ++grep nfsnobody /etc/passwd > /dev/nul ++if [ $? -eq 1 ]; then ++usr=nobody ++grp=nobody ++else ++usr=nfsnobody ++grp=nfsnobody ++fi ++TEST chown $usr:$grp $M0/nobody + TEST `echo "file" >> $M0/file` + TEST cp $M0/file $M0/new + TEST chmod 700 $M0/new +diff --git a/tests/bugs/shard/bug-1272986.t b/tests/bugs/shard/bug-1272986.t +index 7628870..66e896a 100644 +--- a/tests/bugs/shard/bug-1272986.t ++++ b/tests/bugs/shard/bug-1272986.t +@@ -16,16 +16,16 @@ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 + + # Write some data into a file, such that its size crosses the shard block size. +-TEST dd if=/dev/zero of=$M1/file bs=1M count=5 conv=notrunc ++TEST dd if=/dev/urandom of=$M1/file bs=1M count=5 conv=notrunc oflag=direct + + md5sum1_reader=$(md5sum $M0/file | awk '{print $1}') + + EXPECT "$md5sum1_reader" echo `md5sum $M1/file | awk '{print $1}'` + + # Append some more data into the file. +-TEST `echo "abcdefg" >> $M1/file` ++TEST dd if=/dev/urandom of=$M1/file bs=256k count=1 conv=notrunc oflag=direct + +-md5sum2_reader=$(md5sum $M0/file | awk '{print $1}') ++md5sum2_reader=$(dd if=$M0/file iflag=direct bs=256k| md5sum | awk '{print $1}') + + # Test to see if the reader refreshes its cache correctly as part of the reads + # triggered through md5sum. If it does, then the md5sum on the reader and writer +diff --git a/tests/bugs/transport/bug-873367.t b/tests/bugs/transport/bug-873367.t +index d4c0702..8070bc1 100755 +--- a/tests/bugs/transport/bug-873367.t ++++ b/tests/bugs/transport/bug-873367.t +@@ -13,7 +13,7 @@ rm -f $SSL_BASE/glusterfs.* + mkdir -p $B0/1 + mkdir -p $M0 + +-TEST openssl genrsa -out $SSL_KEY 1024 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index 3cb45b5..cae010c 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -41,7 +41,7 @@ function valid_ciphers { + -e '/:$/s///' + } + +-TEST openssl genrsa -out $SSL_KEY 1024 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t +index 7e1e199..e4bcdf5 100644 +--- a/tests/features/ssl-ciphers.t ++++ b/tests/features/ssl-ciphers.t +@@ -33,18 +33,26 @@ wait_mount() { + openssl_connect() { + ssl_opt="-verify 3 -verify_return_error -CAfile $SSL_CA" + ssl_opt="$ssl_opt -crl_check_all -CApath $TMPDIR" +- #echo openssl s_client $ssl_opt $@ > /dev/tty +- #read -p "Continue? " nothing +- CIPHER=`echo "" | +- openssl s_client $ssl_opt $@ 2>/dev/null | +- awk '/^ Cipher/{print $3}'` +- if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" ] ; then ++ cmd="echo "" | openssl s_client $ssl_opt $@ 2>/dev/null" ++ CIPHER=$(eval $cmd | awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}') ++ if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" -o "x${CIPHER}" = "x(NONE)" ] ; then + echo "N" + else + echo "Y" + fi + } + ++#Validate the cipher to pass EXPECT test case before call openssl_connect ++check_cipher() { ++ cmd="echo "" | openssl s_client $@ 2> /dev/null" ++ cipher=$(eval $cmd |awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}') ++ if [ "x${cipher}" = "x" -o "x${cipher}" = "x0000" -o "x${cipher}" = "x(NONE)" ] ; then ++ echo "N" ++ else ++ echo "Y" ++ fi ++} ++ + cleanup; + mkdir -p $B0 + mkdir -p $M0 +@@ -65,7 +73,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume info; + +-TEST openssl genrsa -out $SSL_KEY 1024 2>/dev/null ++TEST openssl genrsa -out $SSL_KEY 2048 2>/dev/null + TEST openssl req -config $SSL_CFG -new -key $SSL_KEY -x509 \ + -subj /CN=CA -out $SSL_CA + TEST openssl req -config $SSL_CFG -new -key $SSL_KEY \ +@@ -106,28 +114,36 @@ EXPECT "N" openssl_connect -ssl3 -connect $H0:$BRICK_PORT + EXPECT "N" openssl_connect -tls1 -connect $H0:$BRICK_PORT + + # Test a HIGH CBC cipher +-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT + + # Test EECDH +-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + # test MD5 fails +-EXPECT "N" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT + + # test RC4 fails +-EXPECT "N" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher RC4-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT + + # test eNULL fails +-EXPECT "N" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher NULL-SHA256 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT + + # test SHA2 +-EXPECT "Y" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA256 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT + + # test GCM +-EXPECT "Y" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT + + # Test DH fails without DH params +-EXPECT "N" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT + + # Test DH with DH params + TEST $CLI volume set $V0 ssl.dh-param `pwd`/`dirname $0`/dh1024.pem +@@ -145,8 +161,10 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +-EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES128-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT + + # Test the ec-curve option + TEST $CLI volume set $V0 ssl.cipher-list EECDH:EDH:!TLSv1 +@@ -155,8 +173,10 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + TEST $CLI volume set $V0 ssl.ec-curve invalid + EXPECT invalid volume_option $V0 ssl.ec-curve +@@ -164,7 +184,8 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + TEST $CLI volume set $V0 ssl.ec-curve secp521r1 + EXPECT secp521r1 volume_option $V0 ssl.ec-curve +diff --git a/tests/ssl.rc b/tests/ssl.rc +index 127f83f..b1ccc4c 100644 +--- a/tests/ssl.rc ++++ b/tests/ssl.rc +@@ -20,7 +20,7 @@ SSL_CA=$SSL_BASE/glusterfs.ca + + # Create self-signed certificates + function create_self_signed_certs (){ +- openssl genrsa -out $SSL_KEY 1024 ++ openssl genrsa -out $SSL_KEY 2048 + openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + return $? +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index b248767..b224abd 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -10,6883 +10,6417 @@ + + #include + +-#include "shard.h" + #include "shard-mem-types.h" ++#include "shard.h" + #include + #include + #include + +-static gf_boolean_t +-__is_shard_dir(uuid_t gfid) +-{ +- shard_priv_t *priv = THIS->private; ++static gf_boolean_t __is_shard_dir(uuid_t gfid) { ++ shard_priv_t *priv = THIS->private; + +- if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) +- return _gf_true; ++ if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) ++ return _gf_true; + +- return _gf_false; ++ return _gf_false; + } + +-static gf_boolean_t +-__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) +-{ +- if (frame->root->pid == GF_CLIENT_PID_GSYNCD && +- (__is_shard_dir(loc->pargfid) || +- (loc->parent && __is_shard_dir(loc->parent->gfid)))) +- return _gf_true; ++static gf_boolean_t __is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) { ++ if (frame->root->pid == GF_CLIENT_PID_GSYNCD && ++ (__is_shard_dir(loc->pargfid) || ++ (loc->parent && __is_shard_dir(loc->parent->gfid)))) ++ return _gf_true; + +- return _gf_false; ++ return _gf_false; + } + +-void +-shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; ++void shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; + +- gf_uuid_unparse(gfid, gfid_str); +- snprintf(buf, len, "%s.%d", gfid_str, block_num); ++ gf_uuid_unparse(gfid, gfid_str); ++ snprintf(buf, len, "%s.%d", gfid_str, block_num); + } + +-void +-shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; ++void shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, ++ size_t len) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; + +- gf_uuid_unparse(gfid, gfid_str); +- snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); ++ gf_uuid_unparse(gfid, gfid_str); ++ snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); + } + +-int +-__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx_p = NULL; ++int __shard_inode_ctx_get(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t **ctx) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx_p = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret == 0) { +- *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; +- return ret; +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret == 0) { ++ *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ return ret; ++ } + +- ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); +- if (!ctx_p) +- return ret; ++ ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); ++ if (!ctx_p) ++ return ret; + +- INIT_LIST_HEAD(&ctx_p->ilist); +- INIT_LIST_HEAD(&ctx_p->to_fsync_list); ++ INIT_LIST_HEAD(&ctx_p->ilist); ++ INIT_LIST_HEAD(&ctx_p->to_fsync_list); + +- ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); +- if (ret < 0) { +- GF_FREE(ctx_p); +- return ret; +- } ++ ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); ++ if (ret < 0) { ++ GF_FREE(ctx_p); ++ return ret; ++ } + +- *ctx = ctx_p; ++ *ctx = ctx_p; + +- return ret; ++ return ret; + } + +-int +-shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +-{ +- int ret = 0; ++int shard_inode_ctx_get(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t **ctx) { ++ int ret = 0; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get(inode, this, ctx); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get(inode, this, ctx); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, +- uint64_t block_size, int32_t valid) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, ++ uint64_t block_size, int32_t valid) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- if (valid & SHARD_MASK_BLOCK_SIZE) +- ctx->block_size = block_size; ++ if (valid & SHARD_MASK_BLOCK_SIZE) ++ ctx->block_size = block_size; + +- if (valid & SHARD_MASK_PROT) +- ctx->stat.ia_prot = stbuf->ia_prot; ++ if (valid & SHARD_MASK_PROT) ++ ctx->stat.ia_prot = stbuf->ia_prot; + +- if (valid & SHARD_MASK_NLINK) +- ctx->stat.ia_nlink = stbuf->ia_nlink; ++ if (valid & SHARD_MASK_NLINK) ++ ctx->stat.ia_nlink = stbuf->ia_nlink; + +- if (valid & SHARD_MASK_UID) +- ctx->stat.ia_uid = stbuf->ia_uid; ++ if (valid & SHARD_MASK_UID) ++ ctx->stat.ia_uid = stbuf->ia_uid; + +- if (valid & SHARD_MASK_GID) +- ctx->stat.ia_gid = stbuf->ia_gid; ++ if (valid & SHARD_MASK_GID) ++ ctx->stat.ia_gid = stbuf->ia_gid; + +- if (valid & SHARD_MASK_SIZE) +- ctx->stat.ia_size = stbuf->ia_size; ++ if (valid & SHARD_MASK_SIZE) ++ ctx->stat.ia_size = stbuf->ia_size; + +- if (valid & SHARD_MASK_BLOCKS) +- ctx->stat.ia_blocks = stbuf->ia_blocks; ++ if (valid & SHARD_MASK_BLOCKS) ++ ctx->stat.ia_blocks = stbuf->ia_blocks; + +- if (valid & SHARD_MASK_TIMES) { +- SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, +- stbuf->ia_mtime, stbuf->ia_mtime_nsec); +- SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, +- stbuf->ia_ctime, stbuf->ia_ctime_nsec); +- SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, +- stbuf->ia_atime, stbuf->ia_atime_nsec); +- } ++ if (valid & SHARD_MASK_TIMES) { ++ SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, ++ stbuf->ia_mtime, stbuf->ia_mtime_nsec); ++ SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, ++ stbuf->ia_ctime, stbuf->ia_ctime_nsec); ++ SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, ++ stbuf->ia_atime, stbuf->ia_atime_nsec); ++ } + +- if (valid & SHARD_MASK_OTHERS) { +- ctx->stat.ia_ino = stbuf->ia_ino; +- gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); +- ctx->stat.ia_dev = stbuf->ia_dev; +- ctx->stat.ia_type = stbuf->ia_type; +- ctx->stat.ia_rdev = stbuf->ia_rdev; +- ctx->stat.ia_blksize = stbuf->ia_blksize; +- } ++ if (valid & SHARD_MASK_OTHERS) { ++ ctx->stat.ia_ino = stbuf->ia_ino; ++ gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); ++ ctx->stat.ia_dev = stbuf->ia_dev; ++ ctx->stat.ia_type = stbuf->ia_type; ++ ctx->stat.ia_rdev = stbuf->ia_rdev; ++ ctx->stat.ia_blksize = stbuf->ia_blksize; ++ } + +- if (valid & SHARD_MASK_REFRESH_RESET) +- ctx->refresh = _gf_false; ++ if (valid & SHARD_MASK_REFRESH_RESET) ++ ctx->refresh = _gf_false; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, +- uint64_t block_size, int32_t valid) +-{ +- int ret = -1; ++int shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, ++ uint64_t block_size, int32_t valid) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- ctx->refresh = _gf_true; ++ ctx->refresh = _gf_true; + +- return 0; ++ return 0; + } +-int +-shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; ++int shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_set_refresh_flag(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_set_refresh_flag(inode, this); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- ctx->refreshed = _gf_true; +- return 0; ++ ctx->refreshed = _gf_true; ++ return 0; + } + +-int +-shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; ++int shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, +- inode_t *shard_inode) +-{ +- int ret = -1; +- shard_inode_ctx_t *base_ictx = NULL; +- shard_inode_ctx_t *shard_ictx = NULL; +- +- ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); +- if (ret) +- return ret; ++int __shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, ++ inode_t *shard_inode) { ++ int ret = -1; ++ shard_inode_ctx_t *base_ictx = NULL; ++ shard_inode_ctx_t *shard_ictx = NULL; + +- ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ if (ret) ++ return ret; + +- if (shard_ictx->fsync_needed) { +- shard_ictx->fsync_needed++; +- return 1; +- } ++ ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); ++ if (ret) ++ return ret; + +- list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); +- shard_ictx->inode = shard_inode; ++ if (shard_ictx->fsync_needed) { + shard_ictx->fsync_needed++; +- base_ictx->fsync_count++; +- shard_ictx->base_inode = base_inode; ++ return 1; ++ } + +- return 0; ++ list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); ++ shard_ictx->inode = shard_inode; ++ shard_ictx->fsync_needed++; ++ base_ictx->fsync_count++; ++ shard_ictx->base_inode = base_inode; ++ ++ return 0; + } + +-int +-shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, +- inode_t *shard_inode) +-{ +- int ret = -1; ++int shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, ++ inode_t *shard_inode) { ++ int ret = -1; + +- /* This ref acts as a refkeepr on the base inode. We +- * need to keep this inode alive as it holds the head +- * of the to_fsync_list. +- */ +- inode_ref(base_inode); +- inode_ref(shard_inode); ++ /* This ref acts as a refkeepr on the base inode. We ++ * need to keep this inode alive as it holds the head ++ * of the to_fsync_list. ++ */ ++ inode_ref(base_inode); ++ inode_ref(shard_inode); + +- LOCK(&base_inode->lock); +- LOCK(&shard_inode->lock); +- { +- ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, +- shard_inode); +- } +- UNLOCK(&shard_inode->lock); +- UNLOCK(&base_inode->lock); ++ LOCK(&base_inode->lock); ++ LOCK(&shard_inode->lock); ++ { ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, shard_inode); } ++ UNLOCK(&shard_inode->lock); ++ UNLOCK(&base_inode->lock); + +- /* Unref the base inode corresponding to the ref above, if the shard is +- * found to be already part of the fsync list. +- */ +- if (ret != 0) { +- inode_unref(base_inode); +- inode_unref(shard_inode); +- } +- return ret; ++ /* Unref the base inode corresponding to the ref above, if the shard is ++ * found to be already part of the fsync list. ++ */ ++ if (ret != 0) { ++ inode_unref(base_inode); ++ inode_unref(shard_inode); ++ } ++ return ret; + } + +-gf_boolean_t +-__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++gf_boolean_t __shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- /* If inode ctx get fails, better to err on the side of caution and +- * try again? Unless the failure is due to mem-allocation. +- */ +- if (ret) +- return _gf_true; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ /* If inode ctx get fails, better to err on the side of caution and ++ * try again? Unless the failure is due to mem-allocation. ++ */ ++ if (ret) ++ return _gf_true; + +- return !ctx->refreshed; ++ return !ctx->refreshed; + } + +-gf_boolean_t +-shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) +-{ +- gf_boolean_t flag = _gf_false; ++gf_boolean_t shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { ++ gf_boolean_t flag = _gf_false; + +- LOCK(&inode->lock); +- { +- flag = __shard_inode_ctx_needs_lookup(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { flag = __shard_inode_ctx_needs_lookup(inode, this); } ++ UNLOCK(&inode->lock); + +- return flag; ++ return flag; + } +-int +-__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, ++ struct iatt *stbuf) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- if ((stbuf->ia_size != ctx->stat.ia_size) || +- (stbuf->ia_blocks != ctx->stat.ia_blocks)) +- ctx->refresh = _gf_true; ++ if ((stbuf->ia_size != ctx->stat.ia_size) || ++ (stbuf->ia_blocks != ctx->stat.ia_blocks)) ++ ctx->refresh = _gf_true; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +-{ +- int ret = -1; ++int shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, ++ struct iatt *stbuf) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_invalidate(inode, this, stbuf); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_invalidate(inode, this, stbuf); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, +- uint64_t *block_size) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, ++ uint64_t *block_size) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- *block_size = ctx->block_size; ++ *block_size = ctx->block_size; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, +- uint64_t *block_size) +-{ +- int ret = -1; ++int shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, ++ uint64_t *block_size) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_block_size(inode, this, block_size); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_block_size(inode, this, block_size); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, +- int *fsync_count) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, ++ int *fsync_count) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- *fsync_count = ctx->fsync_needed; ++ *fsync_count = ctx->fsync_needed; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, +- int *fsync_count) +-{ +- int ret = -1; ++int shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, ++ int *fsync_count) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } +-int +-__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, +- shard_inode_ctx_t *ctx_out) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t *ctx_out) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); +- return 0; ++ memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); ++ return 0; + } + +-int +-shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, +- shard_inode_ctx_t *ctx_out) +-{ +- int ret = -1; ++int shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t *ctx_out) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_all(inode, this, ctx_out); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_all(inode, this, ctx_out); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, +- struct iatt *buf, +- gf_boolean_t *need_refresh) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, ++ struct iatt *buf, ++ gf_boolean_t *need_refresh) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- if (ctx->refresh == _gf_false) +- *buf = ctx->stat; +- else +- *need_refresh = _gf_true; ++ if (ctx->refresh == _gf_false) ++ *buf = ctx->stat; ++ else ++ *need_refresh = _gf_true; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, +- struct iatt *buf, +- gf_boolean_t *need_refresh) +-{ +- int ret = -1; ++int shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, ++ struct iatt *buf, ++ gf_boolean_t *need_refresh) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, +- need_refresh); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ++ ret = ++ __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, need_refresh); ++ } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-void +-shard_local_wipe(shard_local_t *local) +-{ +- int i = 0; +- int count = 0; +- +- count = local->num_blocks; +- +- syncbarrier_destroy(&local->barrier); +- loc_wipe(&local->loc); +- loc_wipe(&local->dot_shard_loc); +- loc_wipe(&local->dot_shard_rm_loc); +- loc_wipe(&local->loc2); +- loc_wipe(&local->tmp_loc); +- loc_wipe(&local->int_inodelk.loc); +- loc_wipe(&local->int_entrylk.loc); +- loc_wipe(&local->newloc); +- +- if (local->int_entrylk.basename) +- GF_FREE(local->int_entrylk.basename); +- if (local->fd) +- fd_unref(local->fd); +- +- if (local->xattr_req) +- dict_unref(local->xattr_req); +- if (local->xattr_rsp) +- dict_unref(local->xattr_rsp); +- +- for (i = 0; i < count; i++) { +- if (!local->inode_list) +- break; +- +- if (local->inode_list[i]) +- inode_unref(local->inode_list[i]); +- } +- +- GF_FREE(local->inode_list); +- +- GF_FREE(local->vector); +- if (local->iobref) +- iobref_unref(local->iobref); +- if (local->list_inited) +- gf_dirent_free(&local->entries_head); +- if (local->inodelk_frame) +- SHARD_STACK_DESTROY(local->inodelk_frame); +- if (local->entrylk_frame) +- SHARD_STACK_DESTROY(local->entrylk_frame); +-} +- +-int +-shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) +-{ +- int ret = -1; +- void *size_attr = NULL; +- uint64_t size_array[4]; +- +- ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); +- if (ret) { +- gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, +- SHARD_MSG_INTERNAL_XATTR_MISSING, +- "Failed to " +- "get " GF_XATTR_SHARD_FILE_SIZE " for %s", +- uuid_utoa(stbuf->ia_gfid)); +- return ret; +- } ++void shard_local_wipe(shard_local_t *local) { ++ int i = 0; ++ int count = 0; + +- memcpy(size_array, size_attr, sizeof(size_array)); ++ count = local->num_blocks; + +- stbuf->ia_size = ntoh64(size_array[0]); +- stbuf->ia_blocks = ntoh64(size_array[2]); ++ syncbarrier_destroy(&local->barrier); ++ loc_wipe(&local->loc); ++ loc_wipe(&local->dot_shard_loc); ++ loc_wipe(&local->dot_shard_rm_loc); ++ loc_wipe(&local->loc2); ++ loc_wipe(&local->tmp_loc); ++ loc_wipe(&local->int_inodelk.loc); ++ loc_wipe(&local->int_entrylk.loc); ++ loc_wipe(&local->newloc); + +- return 0; +-} ++ if (local->int_entrylk.basename) ++ GF_FREE(local->int_entrylk.basename); ++ if (local->fd) ++ fd_unref(local->fd); + +-int +-shard_call_count_return(call_frame_t *frame) +-{ +- int call_count = 0; +- shard_local_t *local = NULL; ++ if (local->xattr_req) ++ dict_unref(local->xattr_req); ++ if (local->xattr_rsp) ++ dict_unref(local->xattr_rsp); + +- local = frame->local; ++ for (i = 0; i < count; i++) { ++ if (!local->inode_list) ++ break; ++ ++ if (local->inode_list[i]) ++ inode_unref(local->inode_list[i]); ++ } ++ ++ GF_FREE(local->inode_list); ++ ++ GF_FREE(local->vector); ++ if (local->iobref) ++ iobref_unref(local->iobref); ++ if (local->list_inited) ++ gf_dirent_free(&local->entries_head); ++ if (local->inodelk_frame) ++ SHARD_STACK_DESTROY(local->inodelk_frame); ++ if (local->entrylk_frame) ++ SHARD_STACK_DESTROY(local->entrylk_frame); ++} ++ ++int shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) { ++ int ret = -1; ++ void *size_attr = NULL; ++ uint64_t size_array[4]; ++ ++ ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); ++ if (ret) { ++ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, ++ SHARD_MSG_INTERNAL_XATTR_MISSING, ++ "Failed to " ++ "get " GF_XATTR_SHARD_FILE_SIZE " for %s", ++ uuid_utoa(stbuf->ia_gfid)); ++ return ret; ++ } ++ ++ memcpy(size_array, size_attr, sizeof(size_array)); ++ ++ stbuf->ia_size = ntoh64(size_array[0]); ++ stbuf->ia_blocks = ntoh64(size_array[2]); ++ ++ return 0; ++} ++ ++int shard_call_count_return(call_frame_t *frame) { ++ int call_count = 0; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ LOCK(&frame->lock); ++ { call_count = --local->call_count; } ++ UNLOCK(&frame->lock); ++ ++ return call_count; ++} ++ ++static char *shard_internal_dir_string(shard_internal_dir_type_t type) { ++ char *str = NULL; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ str = GF_SHARD_DIR; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ str = GF_SHARD_REMOVE_ME_DIR; ++ break; ++ default: ++ break; ++ } ++ return str; ++} ++ ++static int shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ char *bname = NULL; ++ inode_t *parent = NULL; ++ loc_t *internal_dir_loc = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ if (!local) ++ return -1; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ internal_dir_loc = &local->dot_shard_loc; ++ bname = GF_SHARD_DIR; ++ parent = inode_ref(this->itable->root); ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ internal_dir_loc = &local->dot_shard_rm_loc; ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ parent = inode_ref(priv->dot_shard_inode); ++ break; ++ default: ++ break; ++ } ++ ++ internal_dir_loc->inode = inode_new(this->itable); ++ internal_dir_loc->parent = parent; ++ ret = inode_path(internal_dir_loc->parent, bname, ++ (char **)&internal_dir_loc->path); ++ if (ret < 0 || !(internal_dir_loc->inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", bname); ++ goto out; ++ } ++ ++ internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); ++ if (internal_dir_loc->name) ++ internal_dir_loc->name++; ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++inode_t *__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, ++ inode_t *base_inode, int block_num, ++ uuid_t gfid) { ++ char block_bname[256] = { ++ 0, ++ }; ++ inode_t *lru_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *lru_inode_ctx = NULL; ++ shard_inode_ctx_t *lru_base_inode_ctx = NULL; ++ inode_t *fsync_inode = NULL; ++ inode_t *lru_base_inode = NULL; ++ gf_boolean_t do_fsync = _gf_false; ++ ++ priv = this->private; ++ ++ shard_inode_ctx_get(linked_inode, this, &ctx); ++ ++ if (list_empty(&ctx->ilist)) { ++ if (priv->inode_count + 1 <= priv->lru_limit) { ++ /* If this inode was linked here for the first time (indicated ++ * by empty list), and if there is still space in the priv list, ++ * add this ctx to the tail of the list. ++ */ ++ /* For as long as an inode is in lru list, we try to ++ * keep it alive by holding a ref on it. ++ */ ++ inode_ref(linked_inode); ++ if (base_inode) ++ gf_uuid_copy(ctx->base_gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(ctx->base_gfid, gfid); ++ ctx->block_num = block_num; ++ list_add_tail(&ctx->ilist, &priv->ilist_head); ++ priv->inode_count++; ++ ctx->base_inode = inode_ref(base_inode); ++ } else { ++ /*If on the other hand there is no available slot for this inode ++ * in the list, delete the lru inode from the head of the list, ++ * unlink it. And in its place add this new inode into the list. ++ */ ++ lru_inode_ctx = ++ list_first_entry(&priv->ilist_head, shard_inode_ctx_t, ilist); ++ GF_ASSERT(lru_inode_ctx->block_num > 0); ++ lru_base_inode = lru_inode_ctx->base_inode; ++ list_del_init(&lru_inode_ctx->ilist); ++ lru_inode = inode_find(linked_inode->table, lru_inode_ctx->stat.ia_gfid); ++ /* If the lru inode was part of the pending-fsync list, ++ * the base inode needs to be unref'd, the lru inode ++ * deleted from fsync list and fsync'd in a new frame, ++ * and then unlinked in memory and forgotten. ++ */ ++ if (!lru_base_inode) ++ goto after_fsync_check; ++ LOCK(&lru_base_inode->lock); ++ LOCK(&lru_inode->lock); ++ { ++ if (!list_empty(&lru_inode_ctx->to_fsync_list)) { ++ list_del_init(&lru_inode_ctx->to_fsync_list); ++ lru_inode_ctx->fsync_needed = 0; ++ do_fsync = _gf_true; ++ __shard_inode_ctx_get(lru_base_inode, this, &lru_base_inode_ctx); ++ lru_base_inode_ctx->fsync_count--; ++ } ++ } ++ UNLOCK(&lru_inode->lock); ++ UNLOCK(&lru_base_inode->lock); ++ ++ after_fsync_check: ++ if (!do_fsync) { ++ shard_make_block_bname(lru_inode_ctx->block_num, ++ lru_inode_ctx->base_gfid, block_bname, ++ sizeof(block_bname)); ++ /* The following unref corresponds to the ref held at ++ * the time the shard was added to the lru list. ++ */ ++ inode_unref(lru_inode); ++ inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); ++ inode_forget(lru_inode, 0); ++ } else { ++ /* The following unref corresponds to the ref ++ * held when the shard was added to fsync list. ++ */ ++ inode_unref(lru_inode); ++ fsync_inode = lru_inode; ++ if (lru_base_inode) ++ inode_unref(lru_base_inode); ++ } ++ /* The following unref corresponds to the ref ++ * held by inode_find() above. ++ */ ++ inode_unref(lru_inode); ++ ++ /* The following unref corresponds to the ref held on the base shard ++ * at the time of adding shard inode to lru list ++ */ ++ if (lru_base_inode) ++ inode_unref(lru_base_inode); ++ ++ /* For as long as an inode is in lru list, we try to ++ * keep it alive by holding a ref on it. ++ */ ++ inode_ref(linked_inode); ++ if (base_inode) ++ gf_uuid_copy(ctx->base_gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(ctx->base_gfid, gfid); ++ ctx->block_num = block_num; ++ ctx->base_inode = inode_ref(base_inode); ++ list_add_tail(&ctx->ilist, &priv->ilist_head); ++ } ++ } else { ++ /* If this is not the first time this inode is being operated on, move ++ * it to the most recently used end of the list. ++ */ ++ list_move_tail(&ctx->ilist, &priv->ilist_head); ++ } ++ return fsync_inode; ++} ++ ++int shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, ++ int32_t op_ret, int32_t op_errno) { ++ switch (fop) { ++ case GF_FOP_LOOKUP: ++ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, NULL); ++ break; ++ case GF_FOP_STAT: ++ SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_FSTAT: ++ SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_TRUNCATE: ++ SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FTRUNCATE: ++ SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_MKNOD: ++ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_LINK: ++ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_CREATE: ++ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_UNLINK: ++ SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_RENAME: ++ SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_WRITE: ++ SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FALLOCATE: ++ SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_ZEROFILL: ++ SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_DISCARD: ++ SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_READ: ++ SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_FSYNC: ++ SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_REMOVEXATTR: ++ SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_FREMOVEXATTR: ++ SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_FGETXATTR: ++ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_GETXATTR: ++ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_FSETXATTR: ++ SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_SETXATTR: ++ SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_SETATTR: ++ SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FSETATTR: ++ SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_SEEK: ++ SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); ++ break; ++ default: ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_common_inode_write_success_unwind(glusterfs_fop_t fop, ++ call_frame_t *frame, ++ int32_t op_ret) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (fop) { ++ case GF_FOP_WRITE: ++ SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_FALLOCATE: ++ SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_ZEROFILL: ++ SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_DISCARD: ++ SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ default: ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) { ++ char block_bname[256] = { ++ 0, ++ }; ++ fd_t *anon_fd = cookie; ++ inode_t *shard_inode = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ ++ if (anon_fd == NULL || op_ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, ++ "fsync failed on shard"); ++ goto out; ++ } ++ shard_inode = anon_fd->inode; ++ ++ LOCK(&priv->lock); ++ LOCK(&shard_inode->lock); ++ { ++ __shard_inode_ctx_get(shard_inode, this, &ctx); ++ if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { ++ shard_make_block_bname(ctx->block_num, shard_inode->gfid, block_bname, ++ sizeof(block_bname)); ++ inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); ++ /* The following unref corresponds to the ref held by ++ * inode_link() at the time the shard was created or ++ * looked up ++ */ ++ inode_unref(shard_inode); ++ inode_forget(shard_inode, 0); ++ } ++ } ++ UNLOCK(&shard_inode->lock); ++ UNLOCK(&priv->lock); + +- LOCK(&frame->lock); +- { +- call_count = --local->call_count; ++out: ++ if (anon_fd) ++ fd_unref(anon_fd); ++ STACK_DESTROY(frame->root); ++ return 0; ++} ++ ++int shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) { ++ fd_t *anon_fd = NULL; ++ call_frame_t *fsync_frame = NULL; ++ ++ fsync_frame = create_frame(this, this->ctx->pool); ++ if (!fsync_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to fsync shard"); ++ return -1; ++ } ++ ++ anon_fd = fd_anonymous(inode); ++ if (!anon_fd) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create anon fd to" ++ " fsync shard"); ++ STACK_DESTROY(fsync_frame->root); ++ return -1; ++ } ++ ++ STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd, ++ 1, NULL); ++ return 0; ++} ++ ++int shard_common_resolve_shards( ++ call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t post_res_handler) { ++ int i = -1; ++ uint32_t shard_idx_iter = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ inode_t *res_inode = NULL; ++ inode_t *fsync_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ local->call_count = 0; ++ shard_idx_iter = local->first_block; ++ res_inode = local->resolver_base_inode; ++ if (res_inode) ++ gf_uuid_copy(gfid, res_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ if ((local->op_ret < 0) || (local->resolve_not)) ++ goto out; ++ ++ while (shard_idx_iter <= local->last_block) { ++ i++; ++ if (shard_idx_iter == 0) { ++ local->inode_list[i] = inode_ref(res_inode); ++ shard_idx_iter++; ++ continue; ++ } ++ ++ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); ++ ++ inode = NULL; ++ inode = inode_resolve(this->itable, path); ++ if (inode) { ++ gf_msg_debug(this->name, 0, "Shard %d already " ++ "present. gfid=%s. Saving inode for future.", ++ shard_idx_iter, uuid_utoa(inode->gfid)); ++ local->inode_list[i] = inode; ++ /* Let the ref on the inodes that are already present ++ * in inode table still be held so that they don't get ++ * forgotten by the time the fop reaches the actual ++ * write stage. ++ */ ++ LOCK(&priv->lock); ++ { ++ fsync_inode = __shard_update_shards_inode_list(inode, this, res_inode, ++ shard_idx_iter, gfid); ++ } ++ UNLOCK(&priv->lock); ++ shard_idx_iter++; ++ if (fsync_inode) ++ shard_initiate_evicted_inode_fsync(this, fsync_inode); ++ continue; ++ } else { ++ local->call_count++; ++ shard_idx_iter++; + } +- UNLOCK(&frame->lock); ++ } ++out: ++ post_res_handler(frame, this); ++ return 0; ++} ++ ++int shard_update_file_size_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ dict_t *dict, dict_t *xdata) { ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if ((local->fd) && (local->fd->inode)) ++ inode = local->fd->inode; ++ else if (local->loc.inode) ++ inode = local->loc.inode; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size" ++ " xattr failed on %s", ++ uuid_utoa(inode->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } + +- return call_count; ++ if (shard_modify_size_and_block_count(&local->postbuf, dict)) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++err: ++ local->post_update_size_handler(frame, this); ++ return 0; + } + +-static char * +-shard_internal_dir_string(shard_internal_dir_type_t type) +-{ +- char *str = NULL; ++int shard_set_size_attrs(int64_t size, int64_t block_count, ++ int64_t **size_attr_p) { ++ int ret = -1; ++ int64_t *size_attr = NULL; + +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- str = GF_SHARD_DIR; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- str = GF_SHARD_REMOVE_ME_DIR; +- break; +- default: +- break; +- } +- return str; +-} +- +-static int +-shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- char *bname = NULL; +- inode_t *parent = NULL; +- loc_t *internal_dir_loc = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- if (!local) +- return -1; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- internal_dir_loc = &local->dot_shard_loc; +- bname = GF_SHARD_DIR; +- parent = inode_ref(this->itable->root); +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- internal_dir_loc = &local->dot_shard_rm_loc; +- bname = GF_SHARD_REMOVE_ME_DIR; +- parent = inode_ref(priv->dot_shard_inode); +- break; +- default: +- break; +- } ++ if (!size_attr_p) ++ goto out; + +- internal_dir_loc->inode = inode_new(this->itable); +- internal_dir_loc->parent = parent; +- ret = inode_path(internal_dir_loc->parent, bname, +- (char **)&internal_dir_loc->path); +- if (ret < 0 || !(internal_dir_loc->inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", bname); +- goto out; +- } ++ size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); ++ if (!size_attr) ++ goto out; + +- internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); +- if (internal_dir_loc->name) +- internal_dir_loc->name++; ++ size_attr[0] = hton64(size); ++ /* As sharding evolves, it _may_ be necessary to embed more pieces of ++ * information within the same xattr. So allocating slots for them in ++ * advance. For now, only bytes 0-63 and 128-191 which would make up the ++ * current size and block count respectively of the file are valid. ++ */ ++ size_attr[2] = hton64(block_count); + +- ret = 0; ++ *size_attr_p = size_attr; ++ ++ ret = 0; + out: +- return ret; ++ return ret; + } + +-inode_t * +-__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, +- inode_t *base_inode, int block_num, +- uuid_t gfid) +-{ +- char block_bname[256] = { +- 0, +- }; +- inode_t *lru_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *lru_inode_ctx = NULL; +- shard_inode_ctx_t *lru_base_inode_ctx = NULL; +- inode_t *fsync_inode = NULL; +- inode_t *lru_base_inode = NULL; +- gf_boolean_t do_fsync = _gf_false; +- +- priv = this->private; +- +- shard_inode_ctx_get(linked_inode, this, &ctx); +- +- if (list_empty(&ctx->ilist)) { +- if (priv->inode_count + 1 <= priv->lru_limit) { +- /* If this inode was linked here for the first time (indicated +- * by empty list), and if there is still space in the priv list, +- * add this ctx to the tail of the list. +- */ +- /* For as long as an inode is in lru list, we try to +- * keep it alive by holding a ref on it. +- */ +- inode_ref(linked_inode); +- if (base_inode) +- gf_uuid_copy(ctx->base_gfid, base_inode->gfid); +- else +- gf_uuid_copy(ctx->base_gfid, gfid); +- ctx->block_num = block_num; +- list_add_tail(&ctx->ilist, &priv->ilist_head); +- priv->inode_count++; +- ctx->base_inode = inode_ref(base_inode); +- } else { +- /*If on the other hand there is no available slot for this inode +- * in the list, delete the lru inode from the head of the list, +- * unlink it. And in its place add this new inode into the list. +- */ +- lru_inode_ctx = list_first_entry(&priv->ilist_head, +- shard_inode_ctx_t, ilist); +- GF_ASSERT(lru_inode_ctx->block_num > 0); +- lru_base_inode = lru_inode_ctx->base_inode; +- list_del_init(&lru_inode_ctx->ilist); +- lru_inode = inode_find(linked_inode->table, +- lru_inode_ctx->stat.ia_gfid); +- /* If the lru inode was part of the pending-fsync list, +- * the base inode needs to be unref'd, the lru inode +- * deleted from fsync list and fsync'd in a new frame, +- * and then unlinked in memory and forgotten. +- */ +- if (!lru_base_inode) +- goto after_fsync_check; +- LOCK(&lru_base_inode->lock); +- LOCK(&lru_inode->lock); +- { +- if (!list_empty(&lru_inode_ctx->to_fsync_list)) { +- list_del_init(&lru_inode_ctx->to_fsync_list); +- lru_inode_ctx->fsync_needed = 0; +- do_fsync = _gf_true; +- __shard_inode_ctx_get(lru_base_inode, this, +- &lru_base_inode_ctx); +- lru_base_inode_ctx->fsync_count--; +- } +- } +- UNLOCK(&lru_inode->lock); +- UNLOCK(&lru_base_inode->lock); +- +- after_fsync_check: +- if (!do_fsync) { +- shard_make_block_bname(lru_inode_ctx->block_num, +- lru_inode_ctx->base_gfid, block_bname, +- sizeof(block_bname)); +- /* The following unref corresponds to the ref held at +- * the time the shard was added to the lru list. +- */ +- inode_unref(lru_inode); +- inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); +- inode_forget(lru_inode, 0); +- } else { +- /* The following unref corresponds to the ref +- * held when the shard was added to fsync list. +- */ +- inode_unref(lru_inode); +- fsync_inode = lru_inode; +- if (lru_base_inode) +- inode_unref(lru_base_inode); +- } +- /* The following unref corresponds to the ref +- * held by inode_find() above. +- */ +- inode_unref(lru_inode); +- +- /* The following unref corresponds to the ref held on the base shard +- * at the time of adding shard inode to lru list +- */ +- if (lru_base_inode) +- inode_unref(lru_base_inode); +- +- /* For as long as an inode is in lru list, we try to +- * keep it alive by holding a ref on it. +- */ +- inode_ref(linked_inode); +- if (base_inode) +- gf_uuid_copy(ctx->base_gfid, base_inode->gfid); +- else +- gf_uuid_copy(ctx->base_gfid, gfid); +- ctx->block_num = block_num; +- ctx->base_inode = inode_ref(base_inode); +- list_add_tail(&ctx->ilist, &priv->ilist_head); +- } +- } else { +- /* If this is not the first time this inode is being operated on, move +- * it to the most recently used end of the list. +- */ +- list_move_tail(&ctx->ilist, &priv->ilist_head); +- } +- return fsync_inode; +-} ++int shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ loc_t *loc, ++ shard_post_update_size_fop_handler_t handler) { ++ int ret = -1; ++ int64_t *size_attr = NULL; ++ int64_t delta_blocks = 0; ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; ++ dict_t *xattr_req = NULL; + +-int +-shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, +- int32_t op_ret, int32_t op_errno) +-{ +- switch (fop) { +- case GF_FOP_LOOKUP: +- SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_STAT: +- SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_FSTAT: +- SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_TRUNCATE: +- SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FTRUNCATE: +- SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_MKNOD: +- SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_LINK: +- SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_CREATE: +- SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL, NULL, NULL); +- break; +- case GF_FOP_UNLINK: +- SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_RENAME: +- SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL, NULL, NULL); +- break; +- case GF_FOP_WRITE: +- SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FALLOCATE: +- SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_ZEROFILL: +- SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_DISCARD: +- SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_READ: +- SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, +- NULL, NULL); +- break; +- case GF_FOP_FSYNC: +- SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_REMOVEXATTR: +- SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_FREMOVEXATTR: +- SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_FGETXATTR: +- SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_GETXATTR: +- SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_FSETXATTR: +- SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_SETXATTR: +- SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_SETATTR: +- SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FSETATTR: +- SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_SEEK: +- SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); +- break; +- default: +- gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +-} ++ local = frame->local; ++ local->post_update_size_handler = handler; + +-int +-shard_common_inode_write_success_unwind(glusterfs_fop_t fop, +- call_frame_t *frame, int32_t op_ret) +-{ +- shard_local_t *local = NULL; ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ if (fd) ++ inode = fd->inode; ++ else ++ inode = loc->inode; ++ ++ /* If both size and block count have not changed, then skip the xattrop. ++ */ ++ delta_blocks = GF_ATOMIC_GET(local->delta_blocks); ++ if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { ++ goto out; ++ } ++ ++ ret = shard_set_size_attrs(local->delta_size + local->hole_size, delta_blocks, ++ &size_attr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, ++ "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set key %s into dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE, ++ uuid_utoa(inode->gfid)); ++ GF_FREE(size_attr); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } + +- local = frame->local; ++ if (fd) ++ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fxattrop, fd, GF_XATTROP_ADD_ARRAY64, ++ xattr_req, NULL); ++ else ++ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->xattrop, loc, GF_XATTROP_ADD_ARRAY64, ++ xattr_req, NULL); + +- switch (fop) { +- case GF_FOP_WRITE: +- SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_FALLOCATE: +- SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_ZEROFILL: +- SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_DISCARD: +- SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- default: +- gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +-} ++ dict_unref(xattr_req); ++ return 0; + +-int +-shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *prebuf, struct iatt *postbuf, +- dict_t *xdata) +-{ +- char block_bname[256] = { +- 0, +- }; +- fd_t *anon_fd = cookie; +- inode_t *shard_inode = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_priv_t *priv = NULL; ++out: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ handler(frame, this); ++ return 0; ++} ++ ++static inode_t *shard_link_internal_dir_inode(shard_local_t *local, ++ inode_t *inode, struct iatt *buf, ++ shard_internal_dir_type_t type) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ char *bname = NULL; ++ inode_t **priv_inode = NULL; ++ inode_t *parent = NULL; ++ ++ priv = THIS->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ bname = GF_SHARD_DIR; ++ priv_inode = &priv->dot_shard_inode; ++ parent = inode->table->root; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ priv_inode = &priv->dot_shard_rm_inode; ++ parent = priv->dot_shard_inode; ++ break; ++ default: ++ break; ++ } ++ ++ linked_inode = inode_link(inode, parent, bname, buf); ++ inode_lookup(linked_inode); ++ *priv_inode = linked_inode; ++ return linked_inode; ++} ++ ++int shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ shard_local_t *local = NULL; ++ inode_t *linked_inode = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ ++ local = frame->local; ++ ++ if (op_ret) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto out; ++ } ++ ++ /* To-Do: Fix refcount increment per call to ++ * shard_link_internal_dir_inode(). ++ */ ++ linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ shard_inode_ctx_mark_dir_refreshed(linked_inode, this); ++out: ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; ++} ++ ++int shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_internal_dir_type_t type) { ++ loc_t loc = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(gfid, priv->dot_shard_gfid); ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); ++ break; ++ default: ++ break; ++ } ++ ++ inode = inode_find(this->itable, gfid); ++ ++ if (!shard_inode_ctx_needs_lookup(inode, this)) { ++ local->op_ret = 0; ++ goto out; ++ } + +- priv = this->private; ++ /* Plain assignment because the ref is already taken above through ++ * call to inode_find() ++ */ ++ loc.inode = inode; ++ gf_uuid_copy(loc.gfid, gfid); + +- if (anon_fd == NULL || op_ret < 0) { +- gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, +- "fsync failed on shard"); +- goto out; +- } +- shard_inode = anon_fd->inode; ++ STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, ++ NULL); ++ loc_wipe(&loc); + +- LOCK(&priv->lock); +- LOCK(&shard_inode->lock); +- { +- __shard_inode_ctx_get(shard_inode, this, &ctx); +- if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { +- shard_make_block_bname(ctx->block_num, shard_inode->gfid, +- block_bname, sizeof(block_bname)); +- inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); +- /* The following unref corresponds to the ref held by +- * inode_link() at the time the shard was created or +- * looked up +- */ +- inode_unref(shard_inode); +- inode_forget(shard_inode, 0); +- } +- } +- UNLOCK(&shard_inode->lock); +- UNLOCK(&priv->lock); ++ return 0; + + out: +- if (anon_fd) +- fd_unref(anon_fd); +- STACK_DESTROY(frame->root); +- return 0; ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; + } + +-int +-shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) +-{ +- fd_t *anon_fd = NULL; +- call_frame_t *fsync_frame = NULL; ++int shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ inode_t *link_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; + +- fsync_frame = create_frame(this, this->ctx->pool); +- if (!fsync_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to fsync shard"); +- return -1; +- } ++ local = frame->local; + +- anon_fd = fd_anonymous(inode); +- if (!anon_fd) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create anon fd to" +- " fsync shard"); +- STACK_DESTROY(fsync_frame->root); +- return -1; +- } ++ if (op_ret) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } ++ ++ if (!IA_ISDIR(buf->ia_type)) { ++ gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, ++ "%s already exists and " ++ "is not a directory. Please remove it from all bricks " ++ "and try again", ++ shard_internal_dir_string(type)); ++ local->op_ret = -1; ++ local->op_errno = EIO; ++ goto unwind; ++ } ++ ++ link_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ if (link_inode != inode) { ++ shard_refresh_internal_dir(frame, this, type); ++ } else { ++ shard_inode_ctx_mark_dir_refreshed(link_inode, this); ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ } ++ return 0; + +- STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, +- anon_fd, 1, NULL); +- return 0; +-} ++unwind: ++ local->post_res_handler(frame, this); ++ return 0; ++} ++ ++int shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t post_res_handler, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ dict_t *xattr_req = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ uuid_t *gfid = NULL; ++ loc_t *loc = NULL; ++ gf_boolean_t free_gfid = _gf_true; ++ ++ local = frame->local; ++ priv = this->private; ++ local->post_res_handler = post_res_handler; ++ ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); ++ if (!gfid) ++ goto err; ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(*gfid, priv->dot_shard_gfid); ++ loc = &local->dot_shard_loc; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); ++ loc = &local->dot_shard_rm_loc; ++ break; ++ default: ++ bzero(*gfid, sizeof(uuid_t)); ++ break; ++ } ++ ++ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set gfid of %s into dict", ++ shard_internal_dir_string(type)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } else { ++ free_gfid = _gf_false; ++ } + +-int +-shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t post_res_handler) +-{ +- int i = -1; +- uint32_t shard_idx_iter = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *inode = NULL; +- inode_t *res_inode = NULL; +- inode_t *fsync_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- local->call_count = 0; +- shard_idx_iter = local->first_block; +- res_inode = local->resolver_base_inode; +- if (res_inode) +- gf_uuid_copy(gfid, res_inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); ++ STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, ++ xattr_req); + +- if ((local->op_ret < 0) || (local->resolve_not)) +- goto out; ++ dict_unref(xattr_req); ++ return 0; + +- while (shard_idx_iter <= local->last_block) { +- i++; +- if (shard_idx_iter == 0) { +- local->inode_list[i] = inode_ref(res_inode); +- shard_idx_iter++; +- continue; +- } ++err: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ if (free_gfid) ++ GF_FREE(gfid); ++ post_res_handler(frame, this); ++ return 0; ++} ++ ++static void shard_inode_ctx_update(inode_t *inode, xlator_t *this, ++ dict_t *xdata, struct iatt *buf) { ++ int ret = 0; ++ uint64_t size = 0; ++ void *bsize = NULL; ++ ++ if (shard_inode_ctx_get_block_size(inode, this, &size)) { ++ /* Fresh lookup */ ++ ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); ++ if (!ret) ++ size = ntoh64(*((uint64_t *)bsize)); ++ /* If the file is sharded, set its block size, otherwise just ++ * set 0. ++ */ + +- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); +- +- inode = NULL; +- inode = inode_resolve(this->itable, path); +- if (inode) { +- gf_msg_debug(this->name, 0, +- "Shard %d already " +- "present. gfid=%s. Saving inode for future.", +- shard_idx_iter, uuid_utoa(inode->gfid)); +- local->inode_list[i] = inode; +- /* Let the ref on the inodes that are already present +- * in inode table still be held so that they don't get +- * forgotten by the time the fop reaches the actual +- * write stage. +- */ +- LOCK(&priv->lock); +- { +- fsync_inode = __shard_update_shards_inode_list( +- inode, this, res_inode, shard_idx_iter, gfid); +- } +- UNLOCK(&priv->lock); +- shard_idx_iter++; +- if (fsync_inode) +- shard_initiate_evicted_inode_fsync(this, fsync_inode); +- continue; +- } else { +- local->call_count++; +- shard_idx_iter++; +- } +- } +-out: +- post_res_handler(frame, this); +- return 0; ++ shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); ++ } ++ /* If the file is sharded, also set the remaining attributes, ++ * except for ia_size and ia_blocks. ++ */ ++ if (size) { ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); ++ (void)shard_inode_ctx_invalidate(inode, this, buf); ++ } ++} ++ ++int shard_delete_shards(void *opaque); ++ ++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); ++ ++int shard_start_background_deletion(xlator_t *this) { ++ int ret = 0; ++ gf_boolean_t i_cleanup = _gf_true; ++ shard_priv_t *priv = NULL; ++ call_frame_t *cleanup_frame = NULL; ++ ++ priv = this->private; ++ ++ LOCK(&priv->lock); ++ { ++ switch (priv->bg_del_state) { ++ case SHARD_BG_DELETION_NONE: ++ i_cleanup = _gf_true; ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ break; ++ case SHARD_BG_DELETION_LAUNCHING: ++ i_cleanup = _gf_false; ++ break; ++ case SHARD_BG_DELETION_IN_PROGRESS: ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ i_cleanup = _gf_false; ++ break; ++ default: ++ break; ++ } ++ } ++ UNLOCK(&priv->lock); ++ if (!i_cleanup) ++ return 0; ++ ++ cleanup_frame = create_frame(this, this->ctx->pool); ++ if (!cleanup_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create " ++ "new frame to delete shards"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); ++ ++ ret = synctask_new(this->ctx->env, shard_delete_shards, ++ shard_delete_shards_cbk, cleanup_frame, cleanup_frame); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "failed to create task to do background " ++ "cleanup of shards"); ++ STACK_DESTROY(cleanup_frame->root); ++ goto err; ++ } ++ return 0; ++ ++err: ++ LOCK(&priv->lock); ++ { priv->bg_del_state = SHARD_BG_DELETION_NONE; } ++ UNLOCK(&priv->lock); ++ return ret; + } + +-int +-shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- shard_local_t *local = NULL; ++int shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, struct iatt *postparent) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; ++ gf_boolean_t i_start_cleanup = _gf_false; + +- local = frame->local; ++ priv = this->private; + +- if ((local->fd) && (local->fd->inode)) +- inode = local->fd->inode; +- else if (local->loc.inode) +- inode = local->loc.inode; ++ if (op_ret < 0) ++ goto unwind; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_UPDATE_FILE_SIZE_FAILED, +- "Update to file size" +- " xattr failed on %s", +- uuid_utoa(inode->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } ++ if (IA_ISDIR(buf->ia_type)) ++ goto unwind; + +- if (shard_modify_size_and_block_count(&local->postbuf, dict)) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +-err: +- local->post_update_size_handler(frame, this); +- return 0; +-} ++ /* Also, if the file is sharded, get the file size and block cnt xattr, ++ * and store them in the stbuf appropriately. ++ */ + +-int +-shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p) +-{ +- int ret = -1; +- int64_t *size_attr = NULL; ++ if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && ++ frame->root->pid != GF_CLIENT_PID_GSYNCD) ++ shard_modify_size_and_block_count(buf, xdata); + +- if (!size_attr_p) +- goto out; ++ /* If this was a fresh lookup, there are two possibilities: ++ * 1) If the file is sharded (indicated by the presence of block size ++ * xattr), store this block size, along with rdev and mode in its ++ * inode ctx. ++ * 2) If the file is not sharded, store size along with rdev and mode ++ * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is ++ * already initialised to all zeroes, nothing more needs to be done. ++ */ + +- size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); +- if (!size_attr) +- goto out; ++ (void)shard_inode_ctx_update(inode, this, xdata, buf); + +- size_attr[0] = hton64(size); +- /* As sharding evolves, it _may_ be necessary to embed more pieces of +- * information within the same xattr. So allocating slots for them in +- * advance. For now, only bytes 0-63 and 128-191 which would make up the +- * current size and block count respectively of the file are valid. +- */ +- size_attr[2] = hton64(block_count); ++ LOCK(&priv->lock); ++ { ++ if (priv->first_lookup_done == _gf_false) { ++ priv->first_lookup_done = _gf_true; ++ i_start_cleanup = _gf_true; ++ } ++ } ++ UNLOCK(&priv->lock); + +- *size_attr_p = size_attr; ++ if (!i_start_cleanup) ++ goto unwind; + +- ret = 0; +-out: +- return ret; ++ ret = shard_start_background_deletion(this); ++ if (ret < 0) { ++ LOCK(&priv->lock); ++ { priv->first_lookup_done = _gf_false; } ++ UNLOCK(&priv->lock); ++ } ++ ++unwind: ++ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, ++ postparent); ++ return 0; + } + +-int +-shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, +- loc_t *loc, shard_post_update_size_fop_handler_t handler) +-{ +- int ret = -1; +- int64_t *size_attr = NULL; +- int64_t delta_blocks = 0; +- inode_t *inode = NULL; +- shard_local_t *local = NULL; +- dict_t *xattr_req = NULL; ++int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ dict_t *xattr_req) { ++ int ret = -1; ++ int32_t op_errno = ENOMEM; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- local = frame->local; +- local->post_update_size_handler = handler; ++ this->itable = loc->inode->table; ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); ++ } + +- xattr_req = dict_new(); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; +- } ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (fd) +- inode = fd->inode; +- else +- inode = loc->inode; ++ frame->local = local; + +- /* If both size and block count have not changed, then skip the xattrop. +- */ +- delta_blocks = GF_ATOMIC_GET(local->delta_blocks); +- if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { +- goto out; +- } ++ loc_copy(&local->loc, loc); + +- ret = shard_set_size_attrs(local->delta_size + local->hole_size, +- delta_blocks, &size_attr); ++ local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, +- "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict" ++ " value: key:%s for path %s", ++ GF_XATTR_SHARD_BLOCK_SIZE, loc->path); ++ goto err; + } ++ } + +- ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); + if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set key %s into dict. gfid=%s", +- GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid)); +- GF_FREE(size_attr); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s for path %s.", ++ GF_XATTR_SHARD_FILE_SIZE, loc->path); ++ goto err; + } ++ } + +- if (fd) +- STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fxattrop, fd, +- GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); +- else +- STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->xattrop, loc, +- GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); +- +- dict_unref(xattr_req); +- return 0; +- +-out: +- if (xattr_req) +- dict_unref(xattr_req); +- handler(frame, this); +- return 0; +-} +- +-static inode_t * +-shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode, +- struct iatt *buf, shard_internal_dir_type_t type) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- char *bname = NULL; +- inode_t **priv_inode = NULL; +- inode_t *parent = NULL; +- +- priv = THIS->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- bname = GF_SHARD_DIR; +- priv_inode = &priv->dot_shard_inode; +- parent = inode->table->root; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- bname = GF_SHARD_REMOVE_ME_DIR; +- priv_inode = &priv->dot_shard_rm_inode; +- parent = priv->dot_shard_inode; +- break; +- default: +- break; +- } ++ if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) ++ dict_del(xattr_req, GF_CONTENT_KEY); + +- linked_inode = inode_link(inode, parent, bname, buf); +- inode_lookup(linked_inode); +- *priv_inode = linked_inode; +- return linked_inode; ++ STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, ++int shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- shard_local_t *local = NULL; +- inode_t *linked_inode = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; +- +- local = frame->local; +- +- if (op_ret) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto out; +- } ++ struct iatt *postparent) { ++ int ret = -1; ++ int32_t mask = SHARD_INODE_WRITE_MASK; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t ctx = { ++ 0, ++ }; ++ ++ local = frame->local; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" ++ " failed : %s", ++ loc_gfid_utoa(&(local->loc))); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } ++ ++ local->prebuf = *buf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ ++ if (shard_inode_ctx_get_all(inode, this, &ctx)) ++ mask = SHARD_ALL_MASK; ++ ++ ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, ++ (mask | SHARD_MASK_REFRESH_RESET)); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, ++ "Failed to set inode" ++ " write params into inode ctx for %s", ++ uuid_utoa(buf->ia_gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto unwind; ++ } ++ ++unwind: ++ local->handler(frame, this); ++ return 0; ++} ++ ++int shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ shard_post_fop_handler_t handler) { ++ int ret = -1; ++ shard_local_t *local = NULL; ++ dict_t *xattr_req = NULL; ++ gf_boolean_t need_refresh = _gf_false; ++ ++ local = frame->local; ++ local->handler = handler; ++ ++ ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, ++ &need_refresh); ++ /* By this time, inode ctx should have been created either in create, ++ * mknod, readdirp or lookup. If not it is a bug! ++ */ ++ if ((ret == 0) && (need_refresh == _gf_false)) { ++ gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s" ++ "Serving prebuf off the inode ctx cache", ++ uuid_utoa(loc->gfid)); ++ goto out; ++ } ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); ++ ++ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, xattr_req); ++ ++ dict_unref(xattr_req); ++ return 0; + +- /* To-Do: Fix refcount increment per call to +- * shard_link_internal_dir_inode(). +- */ +- linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- shard_inode_ctx_mark_dir_refreshed(linked_inode, this); + out: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; ++ if (xattr_req) ++ dict_unref(xattr_req); ++ handler(frame, this); ++ return 0; + } + +-int +-shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_internal_dir_type_t type) +-{ +- loc_t loc = { +- 0, +- }; +- inode_t *inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- uuid_t gfid = { +- 0, +- }; ++int shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; +- priv = this->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(gfid, priv->dot_shard_gfid); +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); +- break; +- default: +- break; +- } ++ local = frame->local; + +- inode = inode_find(this->itable, gfid); ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, ++ SHARD_LOOKUP_MASK); + +- if (!shard_inode_ctx_needs_lookup(inode, this)) { +- local->op_ret = 0; +- goto out; +- } ++ SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, ++ &local->prebuf, local->xattr_rsp); ++ return 0; ++} + +- /* Plain assignment because the ref is already taken above through +- * call to inode_find() +- */ +- loc.inode = inode; +- gf_uuid_copy(loc.gfid, gfid); ++int shard_post_stat_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, +- NULL); +- loc_wipe(&loc); ++ local = frame->local; + +- return 0; ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, ++ SHARD_LOOKUP_MASK); + +-out: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; ++ SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, ++ &local->prebuf, local->xattr_rsp); ++ return 0; + } + +-int +-shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- inode_t *link_inode = NULL; +- shard_local_t *local = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++int shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ dict_t *xdata) { ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, ++ "stat failed: %s", local->fd ? uuid_utoa(local->fd->inode->gfid) ++ : uuid_utoa((local->loc.inode)->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } + +- if (!IA_ISDIR(buf->ia_type)) { +- gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, +- "%s already exists and " +- "is not a directory. Please remove it from all bricks " +- "and try again", +- shard_internal_dir_string(type)); +- local->op_ret = -1; +- local->op_errno = EIO; +- goto unwind; +- } ++ local->prebuf = *buf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ local->xattr_rsp = dict_ref(xdata); + +- link_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- if (link_inode != inode) { +- shard_refresh_internal_dir(frame, this, type); +- } else { +- shard_inode_ctx_mark_dir_refreshed(link_inode, this); +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- } +- return 0; ++ if (local->loc.inode) ++ inode = local->loc.inode; ++ else ++ inode = local->fd->inode; ++ ++ shard_inode_ctx_invalidate(inode, this, &local->prebuf); + + unwind: +- local->post_res_handler(frame, this); +- return 0; ++ local->handler(frame, this); ++ return 0; + } + +-int +-shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t post_res_handler, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- dict_t *xattr_req = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- uuid_t *gfid = NULL; +- loc_t *loc = NULL; +- gf_boolean_t free_gfid = _gf_true; +- +- local = frame->local; +- priv = this->private; +- local->post_res_handler = post_res_handler; +- +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); +- if (!gfid) +- goto err; +- +- xattr_req = dict_new(); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(*gfid, priv->dot_shard_gfid); +- loc = &local->dot_shard_loc; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); +- loc = &local->dot_shard_rm_loc; +- break; +- default: +- bzero(*gfid, sizeof(uuid_t)); +- break; +- } ++int shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set gfid of %s into dict", +- shard_internal_dir_string(type)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } else { +- free_gfid = _gf_false; +- } ++ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { ++ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, xdata); ++ return 0; ++ } + +- STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, +- xattr_req); ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- dict_unref(xattr_req); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + ++ frame->local = local; ++ ++ local->handler = shard_post_stat_handler; ++ loc_copy(&local->loc, loc); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, ++ local, err); ++ ++ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); ++ return 0; + err: +- if (xattr_req) +- dict_unref(xattr_req); +- if (free_gfid) +- GF_FREE(gfid); +- post_res_handler(frame, this); +- return 0; ++ shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); ++ return 0; + } + +-static void +-shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata, +- struct iatt *buf) +-{ +- int ret = 0; +- uint64_t size = 0; +- void *bsize = NULL; +- +- if (shard_inode_ctx_get_block_size(inode, this, &size)) { +- /* Fresh lookup */ +- ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); +- if (!ret) +- size = ntoh64(*((uint64_t *)bsize)); +- /* If the file is sharded, set its block size, otherwise just +- * set 0. +- */ ++int shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); +- } +- /* If the file is sharded, also set the remaining attributes, +- * except for ia_size and ia_blocks. +- */ +- if (size) { +- shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); +- (void)shard_inode_ctx_invalidate(inode, this, buf); +- } +-} ++ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { ++ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, xdata); ++ return 0; ++ } + +-int +-shard_delete_shards(void *opaque); ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +-int +-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, xdata); ++ return 0; ++ } + +-int +-shard_start_background_deletion(xlator_t *this) +-{ +- int ret = 0; +- gf_boolean_t i_cleanup = _gf_true; +- shard_priv_t *priv = NULL; +- call_frame_t *cleanup_frame = NULL; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- LOCK(&priv->lock); +- { +- switch (priv->bg_del_state) { +- case SHARD_BG_DELETION_NONE: +- i_cleanup = _gf_true; +- priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; +- break; +- case SHARD_BG_DELETION_LAUNCHING: +- i_cleanup = _gf_false; +- break; +- case SHARD_BG_DELETION_IN_PROGRESS: +- priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; +- i_cleanup = _gf_false; +- break; +- default: +- break; +- } +- } +- UNLOCK(&priv->lock); +- if (!i_cleanup) +- return 0; +- +- cleanup_frame = create_frame(this, this->ctx->pool); +- if (!cleanup_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create " +- "new frame to delete shards"); +- ret = -ENOMEM; +- goto err; +- } ++ frame->local = local; + +- set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); ++ local->handler = shard_post_fstat_handler; ++ local->fd = fd_ref(fd); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; + +- ret = synctask_new(this->ctx->env, shard_delete_shards, +- shard_delete_shards_cbk, cleanup_frame, cleanup_frame); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_WARNING, errno, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "failed to create task to do background " +- "cleanup of shards"); +- STACK_DESTROY(cleanup_frame->root); +- goto err; +- } +- return 0; ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); + ++ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); ++ return 0; + err: +- LOCK(&priv->lock); +- { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- } +- UNLOCK(&priv->lock); +- return ret; ++ shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, struct iatt *postparent) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; +- gf_boolean_t i_start_cleanup = _gf_false; ++int shard_post_update_size_truncate_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- priv = this->private; ++ local = frame->local; + +- if (op_ret < 0) +- goto unwind; ++ if (local->fop == GF_FOP_TRUNCATE) ++ SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, NULL); ++ else ++ SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, NULL); ++ return 0; ++} + +- if (IA_ISDIR(buf->ia_type)) +- goto unwind; ++int shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) { ++ inode_t *inode = NULL; ++ int64_t delta_blocks = 0; ++ shard_local_t *local = NULL; + +- /* Also, if the file is sharded, get the file size and block cnt xattr, +- * and store them in the stbuf appropriately. +- */ ++ local = frame->local; + +- if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && +- frame->root->pid != GF_CLIENT_PID_GSYNCD) +- shard_modify_size_and_block_count(buf, xdata); +- +- /* If this was a fresh lookup, there are two possibilities: +- * 1) If the file is sharded (indicated by the presence of block size +- * xattr), store this block size, along with rdev and mode in its +- * inode ctx. +- * 2) If the file is not sharded, store size along with rdev and mode +- * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is +- * already initialised to all zeroes, nothing more needs to be done. +- */ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); + +- (void)shard_inode_ctx_update(inode, this, xdata, buf); ++ inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last" ++ " shard failed : %s", ++ uuid_utoa(inode->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } ++ ++ local->postbuf.ia_size = local->offset; ++ /* Let the delta be negative. We want xattrop to do subtraction */ ++ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; ++ delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, ++ postbuf->ia_blocks - prebuf->ia_blocks); ++ GF_ASSERT(delta_blocks <= 0); ++ local->postbuf.ia_blocks += delta_blocks; ++ local->hole_size = 0; ++ ++ shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++} ++ ++int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, ++ inode_t *inode) { ++ size_t last_shard_size_after = 0; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ /* A NULL inode could be due to the fact that the last shard which ++ * needs to be truncated does not exist due to it lying in a hole ++ * region. So the only thing left to do in that case would be an ++ * update to file size xattr. ++ */ ++ if (!inode) { ++ gf_msg_debug(this->name, 0, ++ "Last shard to be truncated absent" ++ " in backend: %s. Directly proceeding to update " ++ "file size", ++ uuid_utoa(inode->gfid)); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } + +- LOCK(&priv->lock); +- { +- if (priv->first_lookup_done == _gf_false) { +- priv->first_lookup_done = _gf_true; +- i_start_cleanup = _gf_true; +- } +- } +- UNLOCK(&priv->lock); ++ SHARD_SET_ROOT_FS_ID(frame, local); + +- if (!i_start_cleanup) +- goto unwind; ++ loc.inode = inode_ref(inode); ++ gf_uuid_copy(loc.gfid, inode->gfid); + +- ret = shard_start_background_deletion(this); +- if (ret < 0) { +- LOCK(&priv->lock); +- { +- priv->first_lookup_done = _gf_false; +- } +- UNLOCK(&priv->lock); +- } ++ last_shard_size_after = (local->offset % local->block_size); + +-unwind: +- SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, +- postparent); +- return 0; ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, ++ NULL); ++ loc_wipe(&loc); ++ return 0; + } + +-int +-shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +-{ +- int ret = -1; +- int32_t op_errno = ENOMEM; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- this->itable = loc->inode->table; +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); +- } ++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num); + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++int shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int ret = 0; ++ int call_count = 0; ++ int shard_block_num = (long)cookie; ++ uint64_t block_count = 0; ++ shard_local_t *local = NULL; + +- frame->local = local; ++ local = frame->local; + +- loc_copy(&local->loc, loc); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } ++ ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); ++ if (!ret) { ++ GF_ATOMIC_SUB(local->delta_blocks, block_count); ++ } else { ++ /* dict_get failed possibly due to a heterogeneous cluster? */ ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get key %s from dict during truncate of gfid %s", ++ GF_GET_FILE_BLOCK_COUNT, ++ uuid_utoa(local->resolver_base_inode->gfid)); ++ } ++ ++ shard_unlink_block_inode(local, shard_block_num); ++done: ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ shard_truncate_last_shard(frame, this, local->inode_list[0]); ++ } ++ return 0; ++} ++ ++int shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) { ++ int i = 1; ++ int ret = -1; ++ int call_count = 0; ++ uint32_t cur_block = 0; ++ uint32_t last_block = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ char *bname = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ gf_boolean_t wind_failed = _gf_false; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ dict_t *xdata_req = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ cur_block = local->first_block + 1; ++ last_block = local->last_block; ++ ++ /* Determine call count */ ++ for (i = 1; i < local->num_blocks; i++) { ++ if (!local->inode_list[i]) ++ continue; ++ call_count++; ++ } ++ ++ if (!call_count) { ++ /* Call count = 0 implies that all of the shards that need to be ++ * unlinked do not exist. So shard xlator would now proceed to ++ * do the final truncate + size updates. ++ */ ++ gf_msg_debug(this->name, 0, "Shards to be unlinked as part of " ++ "truncate absent in backend: %s. Directly " ++ "proceeding to update file size", ++ uuid_utoa(inode->gfid)); ++ local->postbuf.ia_size = local->offset; ++ local->postbuf.ia_blocks = local->prebuf.ia_blocks; ++ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->hole_size = 0; ++ shard_update_file_size(frame, this, local->fd, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } + +- local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local->call_count = call_count; ++ i = 1; ++ xdata_req = dict_new(); ++ if (!xdata_req) { ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } ++ ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set key %s into dict during truncate of %s", ++ GF_GET_FILE_BLOCK_COUNT, ++ uuid_utoa(local->resolver_base_inode->gfid)); ++ dict_unref(xdata_req); ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } + +- if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict" +- " value: key:%s for path %s", +- GF_XATTR_SHARD_BLOCK_SIZE, loc->path); +- goto err; +- } ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ while (cur_block <= last_block) { ++ if (!local->inode_list[i]) { ++ cur_block++; ++ i++; ++ continue; ++ } ++ if (wind_failed) { ++ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, +- 8 * 4); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s for path %s.", +- GF_XATTR_SHARD_FILE_SIZE, loc->path); +- goto err; +- } ++ shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); ++ bname = strrchr(path, '/') + 1; ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s. Base file gfid = %s", ++ bname, uuid_utoa(inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ loc.inode = inode_ref(local->inode_list[i]); + +- if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) +- dict_del(xattr_req, GF_CONTENT_KEY); ++ STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, &loc, ++ 0, xdata_req); ++ loc_wipe(&loc); ++ next: ++ i++; ++ cur_block++; ++ if (!--call_count) ++ break; ++ } ++ dict_unref(xdata_req); ++ return 0; ++} + +- STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); ++int shard_truncate_do(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->num_blocks == 1) { ++ /* This means that there are no shards to be unlinked. ++ * The fop boils down to truncating the last shard, updating ++ * the size and unwinding. ++ */ ++ shard_truncate_last_shard(frame, this, local->inode_list[0]); + return 0; ++ } else { ++ shard_truncate_htol(frame, this, local->loc.inode); ++ } ++ return 0; + } + +-int +-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- int ret = -1; +- int32_t mask = SHARD_INODE_WRITE_MASK; +- shard_local_t *local = NULL; +- shard_inode_ctx_t ctx = { +- 0, +- }; +- +- local = frame->local; ++int shard_post_lookup_shards_truncate_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_BASE_FILE_LOOKUP_FAILED, +- "Lookup on base file" +- " failed : %s", +- loc_gfid_utoa(&(local->loc))); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ local = frame->local; + +- local->prebuf = *buf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ shard_truncate_do(frame, this); ++ return 0; ++} ++ ++void shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, ++ struct iatt *buf) { ++ int list_index = 0; ++ char block_bname[256] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *linked_inode = NULL; ++ xlator_t *this = NULL; ++ inode_t *fsync_inode = NULL; ++ shard_priv_t *priv = NULL; ++ inode_t *base_inode = NULL; ++ ++ this = THIS; ++ priv = this->private; ++ if (local->loc.inode) { ++ gf_uuid_copy(gfid, local->loc.inode->gfid); ++ base_inode = local->loc.inode; ++ } else if (local->resolver_base_inode) { ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ base_inode = local->resolver_base_inode; ++ } else { ++ gf_uuid_copy(gfid, local->base_gfid); ++ } ++ ++ shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); ++ ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); ++ linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); ++ inode_lookup(linked_inode); ++ list_index = block_num - local->first_block; ++ local->inode_list[list_index] = linked_inode; ++ ++ LOCK(&priv->lock); ++ { ++ fsync_inode = __shard_update_shards_inode_list(linked_inode, this, ++ base_inode, block_num, gfid); ++ } ++ UNLOCK(&priv->lock); ++ if (fsync_inode) ++ shard_initiate_evicted_inode_fsync(this, fsync_inode); ++} ++ ++int shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ int call_count = 0; ++ int shard_block_num = (long)cookie; ++ uuid_t gfid = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ if (local->resolver_base_inode) ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ if (op_ret < 0) { ++ /* Ignore absence of shards in the backend in truncate fop. */ ++ switch (local->fop) { ++ case GF_FOP_TRUNCATE: ++ case GF_FOP_FTRUNCATE: ++ case GF_FOP_RENAME: ++ case GF_FOP_UNLINK: ++ if (op_errno == ENOENT) ++ goto done; ++ break; ++ case GF_FOP_WRITE: ++ case GF_FOP_READ: ++ case GF_FOP_ZEROFILL: ++ case GF_FOP_DISCARD: ++ case GF_FOP_FALLOCATE: ++ if ((!local->first_lookup_done) && (op_errno == ENOENT)) { ++ LOCK(&frame->lock); ++ { local->create_count++; } ++ UNLOCK(&frame->lock); ++ goto done; ++ } ++ break; ++ default: ++ break; + } + +- if (shard_inode_ctx_get_all(inode, this, &ctx)) +- mask = SHARD_ALL_MASK; ++ /* else */ ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_LOOKUP_SHARD_FAILED, ++ "Lookup on shard %d " ++ "failed. Base file gfid = %s", ++ shard_block_num, uuid_utoa(gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } + +- ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, +- (mask | SHARD_MASK_REFRESH_RESET)); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, +- "Failed to set inode" +- " write params into inode ctx for %s", +- uuid_utoa(buf->ia_gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unwind; +- } ++ shard_link_block_inode(local, shard_block_num, inode, buf); + +-unwind: +- local->handler(frame, this); ++done: ++ if (local->lookup_shards_barriered) { ++ syncbarrier_wake(&local->barrier); + return 0; ++ } else { ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ if (!local->first_lookup_done) ++ local->first_lookup_done = _gf_true; ++ local->pls_fop_handler(frame, this); ++ } ++ } ++ return 0; + } + +-int +-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, +- shard_post_fop_handler_t handler) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- dict_t *xattr_req = NULL; +- gf_boolean_t need_refresh = _gf_false; ++dict_t *shard_create_gfid_dict(dict_t *dict) { ++ int ret = 0; ++ dict_t *new = NULL; ++ unsigned char *gfid = NULL; + +- local = frame->local; +- local->handler = handler; ++ new = dict_copy_with_ref(dict, NULL); ++ if (!new) ++ return NULL; + +- ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, +- &need_refresh); +- /* By this time, inode ctx should have been created either in create, +- * mknod, readdirp or lookup. If not it is a bug! +- */ +- if ((ret == 0) && (need_refresh == _gf_false)) { +- gf_msg_debug(this->name, 0, +- "Skipping lookup on base file: %s" +- "Serving prebuf off the inode ctx cache", +- uuid_utoa(loc->gfid)); +- goto out; ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); ++ if (!gfid) { ++ ret = -1; ++ goto out; ++ } ++ ++ gf_uuid_generate(gfid); ++ ++ ret = dict_set_gfuuid(new, "gfid-req", gfid, false); ++ ++out: ++ if (ret) { ++ dict_unref(new); ++ new = NULL; ++ GF_FREE(gfid); ++ } ++ ++ return new; ++} ++ ++int shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, ++ inode_t *inode, ++ shard_post_lookup_shards_fop_handler_t handler) { ++ int i = 0; ++ int ret = 0; ++ int count = 0; ++ int call_count = 0; ++ int32_t shard_idx_iter = 0; ++ int last_block = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ char *bname = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ dict_t *xattr_req = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ count = call_count = local->call_count; ++ shard_idx_iter = local->first_block; ++ last_block = local->last_block; ++ local->pls_fop_handler = handler; ++ if (local->lookup_shards_barriered) ++ local->barrier.waitfor = local->call_count; ++ ++ if (inode) ++ gf_uuid_copy(gfid, inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ while (shard_idx_iter <= last_block) { ++ if (local->inode_list[i]) { ++ i++; ++ shard_idx_iter++; ++ continue; ++ } ++ ++ if (wind_failed) { ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; ++ } ++ ++ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); ++ ++ bname = strrchr(path, '/') + 1; ++ loc.inode = inode_new(this->itable); ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0 || !(loc.inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s, base file gfid = %s", ++ bname, uuid_utoa(gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; + } + +- xattr_req = dict_new(); ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ++ xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ loc_wipe(&loc); ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; ++ } ++ ++ STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, ++ (void *)(long)shard_idx_iter, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ next: ++ shard_idx_iter++; ++ i++; ++ ++ if (!--call_count) ++ break; ++ } ++ if (local->lookup_shards_barriered) { ++ syncbarrier_wait(&local->barrier, count); ++ local->pls_fop_handler(frame, this); ++ } ++ return 0; ++} ++ ++int shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->op_ret < 0) { ++ if (local->op_errno == ENOENT) { ++ /* If lookup on /.shard fails with ENOENT, it means that ++ * the file was 0-byte in size but truncated sometime in ++ * the past to a higher size which is reflected in the ++ * size xattr, and now being truncated to a lower size. ++ * In this case, the only thing that needs to be done is ++ * to update the size xattr of the file and unwind. ++ */ ++ local->first_block = local->last_block = 0; ++ local->num_blocks = 1; ++ local->call_count = 0; ++ local->op_ret = 0; ++ local->postbuf.ia_size = local->offset; ++ shard_update_file_size(frame, this, local->fd, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } else { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } ++ } + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); ++ if (!local->call_count) ++ shard_truncate_do(frame, this); ++ else ++ shard_common_lookup_shards(frame, this, local->loc.inode, ++ shard_post_lookup_shards_truncate_handler); ++ ++ return 0; ++} ++ ++int shard_truncate_begin(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ /* First participant block here is the lowest numbered block that would ++ * hold the last byte of the file post successful truncation. ++ * Last participant block is the block that contains the last byte in ++ * the current state of the file. ++ * If (first block == last_block): ++ * then that means that the file only needs truncation of the ++ * first (or last since both are same) block. ++ * Else ++ * if (new_size % block_size == 0) ++ * then that means there is no truncate to be done with ++ * only shards from first_block + 1 through the last ++ * block needing to be unlinked. ++ * else ++ * both truncate of the first block and unlink of the ++ * remaining shards until end of file is required. ++ */ ++ local->first_block = ++ (local->offset == 0) ? 0 : get_lowest_block(local->offset - 1, ++ local->block_size); ++ local->last_block = ++ get_highest_block(0, local->prebuf.ia_size, local->block_size); ++ ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->resolver_base_inode = ++ (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; ++ ++ if ((local->first_block == 0) && (local->num_blocks == 1)) { ++ if (local->fop == GF_FOP_TRUNCATE) ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, &local->loc, local->offset, ++ local->xattr_req); ++ else ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ftruncate, local->fd, local->offset, ++ local->xattr_req); ++ return 0; ++ } + +- STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, loc, xattr_req); ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ goto err; + +- dict_unref(xattr_req); +- return 0; ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ ret = ++ shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret) ++ goto err; ++ shard_lookup_internal_dir(frame, this, shard_post_resolve_truncate_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_post_resolve_truncate_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; + +-out: +- if (xattr_req) +- dict_unref(xattr_req); +- handler(frame, this); +- return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ struct iatt tmp_stbuf = { ++ 0, ++ }; + +- local = frame->local; +- +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, +- SHARD_LOOKUP_MASK); ++ local = frame->local; + +- SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, +- &local->prebuf, local->xattr_rsp); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ local->postbuf = tmp_stbuf = local->prebuf; ++ ++ if (local->prebuf.ia_size == local->offset) { ++ /* If the file size is same as requested size, unwind the call ++ * immediately. ++ */ ++ if (local->fop == GF_FOP_TRUNCATE) ++ SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, &local->postbuf, ++ NULL); ++ else ++ SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, ++ &local->postbuf, NULL); ++ } else if (local->offset > local->prebuf.ia_size) { ++ /* If the truncate is from a lower to a higher size, set the ++ * new size xattr and unwind. ++ */ ++ local->hole_size = local->offset - local->prebuf.ia_size; ++ local->delta_size = 0; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->postbuf.ia_size = local->offset; ++ tmp_stbuf.ia_size = local->offset; ++ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, ++ SHARD_INODE_WRITE_MASK); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ } else { ++ /* ... else ++ * i. unlink all shards that need to be unlinked. ++ * ii. truncate the last of the shards. ++ * iii. update the new size using setxattr. ++ * and unwind the fop. ++ */ ++ local->hole_size = 0; ++ local->delta_size = (local->offset - local->prebuf.ia_size); ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ tmp_stbuf.ia_size = local->offset; ++ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, ++ SHARD_INODE_WRITE_MASK); ++ shard_truncate_begin(frame, this); ++ } ++ return 0; + } + +-int +-shard_post_stat_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++/* TO-DO: ++ * Fix updates to size and block count with racing write(s) and truncate(s). ++ */ + +- local = frame->local; ++int shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ off_t offset, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, +- SHARD_LOOKUP_MASK); ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, +- &local->prebuf, local->xattr_rsp); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; +-} ++ } + +-int +-shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- shard_local_t *local = NULL; ++ if (!this->itable) ++ this->itable = loc->inode->table; + +- local = frame->local; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ loc_copy(&local->loc, loc); ++ local->offset = offset; ++ local->block_size = block_size; ++ local->fop = GF_FOP_TRUNCATE; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->resolver_base_inode = loc->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_truncate_handler); ++ return 0; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, +- "stat failed: %s", +- local->fd ? uuid_utoa(local->fd->inode->gfid) +- : uuid_utoa((local->loc.inode)->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++err: ++ shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ local->fd = fd_ref(fd); ++ local->offset = offset; ++ local->block_size = block_size; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_FTRUNCATE; ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local->resolver_base_inode = fd->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_truncate_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); ++ return 0; ++} + +- local->prebuf = *buf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; +- } +- local->xattr_rsp = dict_ref(xdata); ++int shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int ret = -1; ++ shard_local_t *local = NULL; + +- if (local->loc.inode) +- inode = local->loc.inode; +- else +- inode = local->fd->inode; ++ local = frame->local; + +- shard_inode_ctx_invalidate(inode, this, &local->prebuf); ++ if (op_ret == -1) ++ goto unwind; ++ ++ ret = ++ shard_inode_ctx_set(inode, this, buf, local->block_size, SHARD_ALL_MASK); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, ++ "Failed to set inode " ++ "ctx for %s", ++ uuid_utoa(inode->gfid)); + + unwind: +- local->handler(frame, this); +- return 0; +-} ++ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, ++ postparent, xdata); + +-int +-shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++ return 0; ++} + +- if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { +- STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, xdata); +- return 0; +- } ++int shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, ++ dev_t rdev, mode_t umask, dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } ++ priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, xdata); +- return 0; +- } ++ frame->local = local; ++ local->block_size = priv->block_size; ++ if (!__is_gsyncd_on_shard_dir(frame, loc)) { ++ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); ++ return 0; ++} + +- frame->local = local; ++int32_t shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ shard_local_t *local = NULL; + +- local->handler = shard_post_stat_handler; +- loc_copy(&local->loc, loc); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local = frame->local; ++ if (op_ret < 0) ++ goto err; + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, +- local, err); ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_MASK_NLINK | SHARD_MASK_TIMES); ++ buf->ia_size = local->prebuf.ia_size; ++ buf->ia_blocks = local->prebuf.ia_blocks; + +- STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); +- return 0; ++ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, ++ postparent, xdata); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); ++ return 0; + } + +-int +-shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { +- STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, xdata); +- return 0; +- } ++ local = frame->local; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ if (local->op_ret < 0) { ++ SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, NULL, ++ NULL, NULL, NULL); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, xdata); +- return 0; +- } ++ STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, ++ local->xattr_req); ++ return 0; ++} + +- if (!this->itable) +- this->itable = fd->inode->table; ++int32_t shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(oldloc->inode->gfid)); ++ goto err; ++ } + +- frame->local = local; ++ if (!block_size) { ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, ++ oldloc, newloc, xdata); ++ return 0; ++ } + +- local->handler = shard_post_fstat_handler; +- local->fd = fd_ref(fd); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ if (!this->itable) ++ this->itable = oldloc->inode->table; + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); +- return 0; ++ frame->local = local; ++ ++ loc_copy(&local->loc, oldloc); ++ loc_copy(&local->loc2, newloc); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_link_handler); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); + +- local = frame->local; ++int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, NULL); +- else +- SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, NULL); ++ local = frame->local; ++ ++ if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { ++ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, ++ "failed to delete shards of %s", ++ uuid_utoa(local->resolver_base_inode->gfid)); + return 0; +-} ++ } ++ local->op_ret = 0; ++ local->op_errno = 0; + +-int +-shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *prebuf, struct iatt *postbuf, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- int64_t delta_blocks = 0; +- shard_local_t *local = NULL; ++ shard_unlink_shards_do(frame, this, local->resolver_base_inode); ++ return 0; ++} + +- local = frame->local; ++int shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- SHARD_UNSET_ROOT_FS_ID(frame, local); ++ local = frame->local; ++ local->lookup_shards_barriered = _gf_true; + +- inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode +- : local->fd->inode; +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, +- "truncate on last" +- " shard failed : %s", +- uuid_utoa(inode->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } ++ if (!local->call_count) ++ shard_unlink_shards_do(frame, this, local->resolver_base_inode); ++ else ++ shard_common_lookup_shards(frame, this, local->resolver_base_inode, ++ shard_post_lookup_shards_unlink_handler); ++ return 0; ++} ++ ++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num) { ++ char block_bname[256] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ inode_t *base_inode = NULL; ++ xlator_t *this = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *base_ictx = NULL; ++ int unref_base_inode = 0; ++ int unref_shard_inode = 0; ++ ++ this = THIS; ++ priv = this->private; ++ ++ inode = local->inode_list[shard_block_num - local->first_block]; ++ shard_inode_ctx_get(inode, this, &ctx); ++ base_inode = ctx->base_inode; ++ if (base_inode) ++ gf_uuid_copy(gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, ctx->base_gfid); ++ shard_make_block_bname(shard_block_num, gfid, block_bname, ++ sizeof(block_bname)); ++ ++ LOCK(&priv->lock); ++ if (base_inode) ++ LOCK(&base_inode->lock); ++ LOCK(&inode->lock); ++ { ++ __shard_inode_ctx_get(inode, this, &ctx); ++ if (!list_empty(&ctx->ilist)) { ++ list_del_init(&ctx->ilist); ++ priv->inode_count--; ++ unref_base_inode++; ++ unref_shard_inode++; ++ GF_ASSERT(priv->inode_count >= 0); ++ } ++ if (ctx->fsync_needed) { ++ unref_base_inode++; ++ unref_shard_inode++; ++ list_del_init(&ctx->to_fsync_list); ++ if (base_inode) { ++ __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ base_ictx->fsync_count--; ++ } ++ } ++ } ++ UNLOCK(&inode->lock); ++ if (base_inode) ++ UNLOCK(&base_inode->lock); + +- local->postbuf.ia_size = local->offset; +- /* Let the delta be negative. We want xattrop to do subtraction */ +- local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; +- delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, +- postbuf->ia_blocks - prebuf->ia_blocks); +- GF_ASSERT(delta_blocks <= 0); +- local->postbuf.ia_blocks += delta_blocks; +- local->hole_size = 0; ++ inode_unlink(inode, priv->dot_shard_inode, block_bname); ++ inode_ref_reduce_by_n(inode, unref_shard_inode); ++ inode_forget(inode, 0); + +- shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ if (base_inode && unref_base_inode) ++ inode_ref_reduce_by_n(base_inode, unref_base_inode); ++ UNLOCK(&priv->lock); + } + +-int +-shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- size_t last_shard_size_after = 0; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; ++int shard_rename_cbk(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- /* A NULL inode could be due to the fact that the last shard which +- * needs to be truncated does not exist due to it lying in a hole +- * region. So the only thing left to do in that case would be an +- * update to file size xattr. +- */ +- if (!inode) { +- gf_msg_debug(this->name, 0, +- "Last shard to be truncated absent" +- " in backend: %s. Directly proceeding to update " +- "file size", +- uuid_utoa(inode->gfid)); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +- } ++ SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->preoldparent, ++ &local->postoldparent, &local->prenewparent, ++ &local->postnewparent, local->xattr_rsp); ++ return 0; ++} + +- SHARD_SET_ROOT_FS_ID(frame, local); ++int32_t shard_unlink_cbk(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = frame->local; + +- loc.inode = inode_ref(inode); +- gf_uuid_copy(loc.gfid, inode->gfid); ++ SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preoldparent, &local->postoldparent, ++ local->xattr_rsp); ++ return 0; ++} + +- last_shard_size_after = (local->offset % local->block_size); ++int shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int shard_block_num = (long)cookie; ++ shard_local_t *local = NULL; + +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, +- NULL); +- loc_wipe(&loc); +- return 0; +-} ++ local = frame->local; + +-void +-shard_unlink_block_inode(shard_local_t *local, int shard_block_num); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } + +-int +-shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int ret = 0; +- int call_count = 0; +- int shard_block_num = (long)cookie; +- uint64_t block_count = 0; +- shard_local_t *local = NULL; ++ shard_unlink_block_inode(local, shard_block_num); ++done: ++ syncbarrier_wake(&local->barrier); ++ return 0; ++} ++ ++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, ++ inode_t *inode) { ++ int i = 0; ++ int ret = -1; ++ int count = 0; ++ uint32_t cur_block = 0; ++ uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ ++ char *bname = NULL; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ loc_t loc = { ++ 0, ++ }; ++ gf_boolean_t wind_failed = _gf_false; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ if (inode) ++ gf_uuid_copy(gfid, inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ for (i = 0; i < local->num_blocks; i++) { ++ if (!local->inode_list[i]) ++ continue; ++ count++; ++ } ++ ++ if (!count) { ++ /* callcount = 0 implies that all of the shards that need to be ++ * unlinked are non-existent (in other words the file is full of ++ * holes). ++ */ ++ gf_msg_debug(this->name, 0, "All shards that need to be " ++ "unlinked are non-existent: %s", ++ uuid_utoa(gfid)); ++ return 0; ++ } + +- local = frame->local; ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ local->barrier.waitfor = count; ++ cur_block = cur_block_idx + local->first_block; + +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); +- if (!ret) { +- GF_ATOMIC_SUB(local->delta_blocks, block_count); +- } else { +- /* dict_get failed possibly due to a heterogeneous cluster? */ +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get key %s from dict during truncate of gfid %s", +- GF_GET_FILE_BLOCK_COUNT, +- uuid_utoa(local->resolver_base_inode->gfid)); +- } +- +- shard_unlink_block_inode(local, shard_block_num); +-done: +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- shard_truncate_last_shard(frame, this, local->inode_list[0]); +- } +- return 0; +-} +- +-int +-shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- int i = 1; +- int ret = -1; +- int call_count = 0; +- uint32_t cur_block = 0; +- uint32_t last_block = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- char *bname = NULL; +- loc_t loc = { +- 0, +- }; +- gf_boolean_t wind_failed = _gf_false; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- dict_t *xdata_req = NULL; +- +- local = frame->local; +- priv = this->private; +- +- cur_block = local->first_block + 1; +- last_block = local->last_block; +- +- /* Determine call count */ +- for (i = 1; i < local->num_blocks; i++) { +- if (!local->inode_list[i]) +- continue; +- call_count++; +- } ++ while (cur_block_idx < local->num_blocks) { ++ if (!local->inode_list[cur_block_idx]) ++ goto next; + +- if (!call_count) { +- /* Call count = 0 implies that all of the shards that need to be +- * unlinked do not exist. So shard xlator would now proceed to +- * do the final truncate + size updates. +- */ +- gf_msg_debug(this->name, 0, +- "Shards to be unlinked as part of " +- "truncate absent in backend: %s. Directly " +- "proceeding to update file size", +- uuid_utoa(inode->gfid)); +- local->postbuf.ia_size = local->offset; +- local->postbuf.ia_blocks = local->prebuf.ia_blocks; +- local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- local->hole_size = 0; +- shard_update_file_size(frame, this, local->fd, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; ++ if (wind_failed) { ++ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; + } + +- local->call_count = call_count; +- i = 1; +- xdata_req = dict_new(); +- if (!xdata_req) { +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +- } +- ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set key %s into dict during truncate of %s", +- GF_GET_FILE_BLOCK_COUNT, +- uuid_utoa(local->resolver_base_inode->gfid)); +- dict_unref(xdata_req); +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; ++ shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); ++ bname = strrchr(path, '/') + 1; ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s, base file gfid = %s", ++ bname, uuid_utoa(gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; + } + +- SHARD_SET_ROOT_FS_ID(frame, local); +- while (cur_block <= last_block) { +- if (!local->inode_list[i]) { +- cur_block++; +- i++; +- continue; +- } +- if (wind_failed) { +- shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); +- bname = strrchr(path, '/') + 1; +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s. Base file gfid = %s", +- bname, uuid_utoa(inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- loc.inode = inode_ref(local->inode_list[i]); +- +- STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, +- (void *)(long)cur_block, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req); +- loc_wipe(&loc); +- next: +- i++; +- cur_block++; +- if (!--call_count) +- break; +- } +- dict_unref(xdata_req); +- return 0; +-} ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ loc.inode = inode_ref(local->inode_list[cur_block_idx]); + +-int +-shard_truncate_do(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, ++ (void *)(long)cur_block, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, ++ local->xattr_req); ++ loc_wipe(&loc); ++ next: ++ cur_block++; ++ cur_block_idx++; ++ } ++ syncbarrier_wait(&local->barrier, count); ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ return 0; ++} ++ ++int shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, ++ int now, int first_block, ++ gf_dirent_t *entry) { ++ int i = 0; ++ int ret = 0; ++ shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ ++ local = cleanup_frame->local; ++ ++ local->inode_list = GF_CALLOC(now, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ return -ENOMEM; ++ ++ local->first_block = first_block; ++ local->last_block = first_block + now - 1; ++ local->num_blocks = now; ++ gf_uuid_parse(entry->d_name, gfid); ++ gf_uuid_copy(local->base_gfid, gfid); ++ local->resolver_base_inode = inode_find(this->itable, gfid); ++ local->call_count = 0; ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) { ++ GF_FREE(local->inode_list); ++ local->inode_list = NULL; ++ inode_unref(local->resolver_base_inode); ++ local->resolver_base_inode = NULL; ++ return -errno; ++ } ++ shard_common_resolve_shards(cleanup_frame, this, ++ shard_post_resolve_unlink_handler); ++ ++ for (i = 0; i < local->num_blocks; i++) { ++ if (local->inode_list[i]) ++ inode_unref(local->inode_list[i]); ++ } ++ GF_FREE(local->inode_list); ++ local->inode_list = NULL; ++ if (local->op_ret) ++ ret = -local->op_errno; ++ syncbarrier_destroy(&local->barrier); ++ inode_unref(local->resolver_base_inode); ++ local->resolver_base_inode = NULL; ++ STACK_RESET(cleanup_frame->root); ++ return ret; ++} ++ ++int __shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, ++ gf_dirent_t *entry, inode_t *inode) { ++ int ret = 0; ++ int shard_count = 0; ++ int first_block = 0; ++ int now = 0; ++ uint64_t size = 0; ++ uint64_t block_size = 0; ++ uint64_t size_array[4] = { ++ 0, ++ }; ++ void *bsize = NULL; ++ void *size_attr = NULL; ++ dict_t *xattr_rsp = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = cleanup_frame->local; ++ ret = dict_reset(local->xattr_req); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to reset dict"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.inode = inode_ref(inode); ++ loc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, ++ &xattr_rsp); ++ if (ret) ++ goto err; ++ ++ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); ++ goto err; ++ } ++ block_size = ntoh64(*((uint64_t *)bsize)); ++ ++ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); ++ goto err; ++ } ++ ++ memcpy(size_array, size_attr, sizeof(size_array)); ++ size = ntoh64(size_array[0]); ++ ++ shard_count = (size / block_size) - 1; ++ if (shard_count < 0) { ++ gf_msg_debug(this->name, 0, "Size of %s hasn't grown beyond " ++ "its shard-block-size. Nothing to delete. " ++ "Returning", ++ entry->d_name); ++ /* File size < shard-block-size, so nothing to delete */ ++ ret = 0; ++ goto delete_marker; ++ } ++ if ((size % block_size) > 0) ++ shard_count++; ++ ++ if (shard_count == 0) { ++ gf_msg_debug(this->name, 0, "Size of %s is exactly equal to " ++ "its shard-block-size. Nothing to delete. " ++ "Returning", ++ entry->d_name); ++ ret = 0; ++ goto delete_marker; ++ } ++ gf_msg_debug(this->name, 0, ++ "base file = %s, " ++ "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 ", " ++ "shard_count=%d", ++ entry->d_name, block_size, size, shard_count); ++ ++ /* Perform a gfid-based lookup to see if gfid corresponding to marker ++ * file's base name exists. ++ */ ++ loc_wipe(&loc); ++ loc.inode = inode_new(this->itable); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ gf_uuid_parse(entry->d_name, loc.gfid); ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); ++ if (!ret) { ++ gf_msg_debug(this->name, 0, "Base shard corresponding to gfid " ++ "%s is present. Skipping shard deletion. " ++ "Returning", ++ entry->d_name); ++ ret = 0; ++ goto delete_marker; ++ } + +- local = frame->local; ++ first_block = 1; + +- if (local->num_blocks == 1) { +- /* This means that there are no shards to be unlinked. +- * The fop boils down to truncating the last shard, updating +- * the size and unwinding. +- */ +- shard_truncate_last_shard(frame, this, local->inode_list[0]); +- return 0; ++ while (shard_count) { ++ if (shard_count < local->deletion_rate) { ++ now = shard_count; ++ shard_count = 0; + } else { +- shard_truncate_htol(frame, this, local->loc.inode); +- } +- return 0; +-} +- +-int +-shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ now = local->deletion_rate; ++ shard_count -= local->deletion_rate; + } + +- shard_truncate_do(frame, this); +- return 0; +-} ++ gf_msg_debug(this->name, 0, "deleting %d shards starting from " ++ "block %d of gfid %s", ++ now, first_block, entry->d_name); ++ ret = shard_regulated_shards_deletion(cleanup_frame, this, now, first_block, ++ entry); ++ if (ret) ++ goto err; ++ first_block += now; ++ } + +-void +-shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, +- struct iatt *buf) +-{ +- int list_index = 0; +- char block_bname[256] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *linked_inode = NULL; +- xlator_t *this = NULL; +- inode_t *fsync_inode = NULL; +- shard_priv_t *priv = NULL; +- inode_t *base_inode = NULL; +- +- this = THIS; +- priv = this->private; +- if (local->loc.inode) { +- gf_uuid_copy(gfid, local->loc.inode->gfid); +- base_inode = local->loc.inode; +- } else if (local->resolver_base_inode) { +- gf_uuid_copy(gfid, local->resolver_base_inode->gfid); +- base_inode = local->resolver_base_inode; ++delete_marker: ++ loc_wipe(&loc); ++ loc.inode = inode_ref(inode); ++ loc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); ++ if (ret) ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to delete %s " ++ "from /%s", ++ entry->d_name, GF_SHARD_REMOVE_ME_DIR); ++err: ++ if (xattr_rsp) ++ dict_unref(xattr_rsp); ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, ++ gf_dirent_t *entry, inode_t *inode) { ++ int ret = -1; ++ loc_t loc = { ++ 0, ++ }; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ loc.inode = inode_ref(priv->dot_shard_rm_inode); ++ ++ ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, ++ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); ++ if (ret < 0) { ++ if (ret == -EAGAIN) { ++ ret = 0; ++ } ++ goto out; ++ } ++ { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); } ++ syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, ++ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); ++out: ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) { ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, ++ shard_internal_dir_type_t type) { ++ int ret = 0; ++ char *bname = NULL; ++ loc_t *loc = NULL; ++ shard_priv_t *priv = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ struct iatt stbuf = { ++ 0, ++ }; ++ ++ priv = this->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ loc = &local->dot_shard_loc; ++ gf_uuid_copy(gfid, priv->dot_shard_gfid); ++ bname = GF_SHARD_DIR; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ loc = &local->dot_shard_rm_loc; ++ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ break; ++ default: ++ break; ++ } ++ ++ loc->inode = inode_find(this->itable, gfid); ++ if (!loc->inode) { ++ ret = shard_init_internal_dir_loc(this, local, type); ++ if (ret) ++ goto err; ++ ret = dict_reset(local->xattr_req); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to reset " ++ "dict"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); ++ ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, local->xattr_req, ++ NULL); ++ if (ret < 0) { ++ if (ret != -ENOENT) ++ gf_msg(this->name, GF_LOG_ERROR, -ret, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Lookup on %s failed, exiting", bname); ++ goto err; + } else { +- gf_uuid_copy(gfid, local->base_gfid); ++ shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); + } ++ } ++ ret = 0; ++err: ++ return ret; ++} ++ ++int shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, ++ gf_dirent_t *entry) { ++ int ret = 0; ++ loc_t loc = { ++ 0, ++ }; ++ ++ loc.inode = inode_new(this->itable); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ loc.parent = inode_ref(local->fd->inode); ++ ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); ++ if (ret < 0) { ++ goto err; ++ } ++ entry->inode = inode_ref(loc.inode); ++ ret = 0; ++err: ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards(void *opaque) { ++ int ret = 0; ++ off_t offset = 0; ++ loc_t loc = { ++ 0, ++ }; ++ inode_t *link_inode = NULL; ++ xlator_t *this = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ gf_dirent_t entries; ++ gf_dirent_t *entry = NULL; ++ call_frame_t *cleanup_frame = NULL; ++ gf_boolean_t done = _gf_false; ++ ++ this = THIS; ++ priv = this->private; ++ INIT_LIST_HEAD(&entries.list); ++ ++ cleanup_frame = opaque; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create local to " ++ "delete shards"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ cleanup_frame->local = local; ++ local->fop = GF_FOP_UNLINK; ++ ++ local->xattr_req = dict_new(); ++ if (!local->xattr_req) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ local->deletion_rate = priv->deletion_rate; ++ ++ ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret == -ENOENT) { ++ gf_msg_debug(this->name, 0, ".shard absent. Nothing to" ++ " delete. Exiting"); ++ ret = 0; ++ goto err; ++ } else if (ret < 0) { ++ goto err; ++ } + +- shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); +- +- shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); +- linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); +- inode_lookup(linked_inode); +- list_index = block_num - local->first_block; +- local->inode_list[list_index] = linked_inode; +- ++ ret = shard_resolve_internal_dir(this, local, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ if (ret == -ENOENT) { ++ gf_msg_debug(this->name, 0, ".remove_me absent. " ++ "Nothing to delete. Exiting"); ++ ret = 0; ++ goto err; ++ } else if (ret < 0) { ++ goto err; ++ } ++ ++ local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); ++ if (!local->fd) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ for (;;) { ++ offset = 0; + LOCK(&priv->lock); + { +- fsync_inode = __shard_update_shards_inode_list( +- linked_inode, this, base_inode, block_num, gfid); ++ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { ++ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; ++ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { ++ priv->bg_del_state = SHARD_BG_DELETION_NONE; ++ done = _gf_true; ++ } + } + UNLOCK(&priv->lock); +- if (fsync_inode) +- shard_initiate_evicted_inode_fsync(this, fsync_inode); +-} +- +-int +-shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, int32_t op_errno, +- inode_t *inode, struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- int call_count = 0; +- int shard_block_num = (long)cookie; +- uuid_t gfid = { +- 0, +- }; +- shard_local_t *local = NULL; +- +- local = frame->local; +- if (local->resolver_base_inode) +- gf_uuid_copy(gfid, local->resolver_base_inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); +- +- if (op_ret < 0) { +- /* Ignore absence of shards in the backend in truncate fop. */ +- switch (local->fop) { +- case GF_FOP_TRUNCATE: +- case GF_FOP_FTRUNCATE: +- case GF_FOP_RENAME: +- case GF_FOP_UNLINK: +- if (op_errno == ENOENT) +- goto done; +- break; +- case GF_FOP_WRITE: +- case GF_FOP_READ: +- case GF_FOP_ZEROFILL: +- case GF_FOP_DISCARD: +- case GF_FOP_FALLOCATE: +- if ((!local->first_lookup_done) && (op_errno == ENOENT)) { +- LOCK(&frame->lock); +- { +- local->create_count++; +- } +- UNLOCK(&frame->lock); +- goto done; +- } +- break; +- default: +- break; +- } +- +- /* else */ +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_LOOKUP_SHARD_FAILED, +- "Lookup on shard %d " +- "failed. Base file gfid = %s", +- shard_block_num, uuid_utoa(gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- +- shard_link_block_inode(local, shard_block_num, inode, buf); +- +-done: +- if (local->lookup_shards_barriered) { +- syncbarrier_wake(&local->barrier); +- return 0; +- } else { +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- if (!local->first_lookup_done) +- local->first_lookup_done = _gf_true; +- local->pls_fop_handler(frame, this); +- } +- } +- return 0; +-} +- +-dict_t * +-shard_create_gfid_dict(dict_t *dict) +-{ +- int ret = 0; +- dict_t *new = NULL; +- unsigned char *gfid = NULL; +- +- new = dict_copy_with_ref(dict, NULL); +- if (!new) +- return NULL; +- +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); +- if (!gfid) { +- ret = -1; +- goto out; +- } +- +- gf_uuid_generate(gfid); +- +- ret = dict_set_gfuuid(new, "gfid-req", gfid, false); +- +-out: +- if (ret) { +- dict_unref(new); +- new = NULL; +- GF_FREE(gfid); +- } +- +- return new; +-} ++ if (done) ++ break; ++ while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, ++ &entries, local->xattr_req, NULL))) { ++ if (ret > 0) ++ ret = 0; ++ list_for_each_entry(entry, &entries.list, list) { ++ offset = entry->d_off; + +-int +-shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, +- shard_post_lookup_shards_fop_handler_t handler) +-{ +- int i = 0; +- int ret = 0; +- int count = 0; +- int call_count = 0; +- int32_t shard_idx_iter = 0; +- int last_block = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- char *bname = NULL; +- uuid_t gfid = { +- 0, +- }; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- gf_boolean_t wind_failed = _gf_false; +- dict_t *xattr_req = NULL; +- +- priv = this->private; +- local = frame->local; +- count = call_count = local->call_count; +- shard_idx_iter = local->first_block; +- last_block = local->last_block; +- local->pls_fop_handler = handler; +- if (local->lookup_shards_barriered) +- local->barrier.waitfor = local->call_count; +- +- if (inode) +- gf_uuid_copy(gfid, inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); ++ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) ++ continue; + +- while (shard_idx_iter <= last_block) { +- if (local->inode_list[i]) { +- i++; +- shard_idx_iter++; ++ if (!entry->inode) { ++ ret = shard_lookup_marker_entry(this, local, entry); ++ if (ret < 0) + continue; + } ++ link_inode = inode_link(entry->inode, local->fd->inode, entry->d_name, ++ &entry->d_stat); + +- if (wind_failed) { +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; +- } +- +- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); +- +- bname = strrchr(path, '/') + 1; +- loc.inode = inode_new(this->itable); +- loc.parent = inode_ref(priv->dot_shard_inode); +- gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0 || !(loc.inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s, base file gfid = %s", +- bname, uuid_utoa(gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- loc_wipe(&loc); +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; ++ gf_msg_debug(this->name, 0, "Initiating deletion of " ++ "shards of gfid %s", ++ entry->d_name); ++ ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, ++ link_inode); ++ inode_unlink(link_inode, local->fd->inode, entry->d_name); ++ inode_unref(link_inode); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, ++ SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to clean up shards of gfid %s", entry->d_name); ++ continue; + } ++ gf_msg(this->name, GF_LOG_INFO, 0, SHARD_MSG_SHARD_DELETION_COMPLETED, ++ "Deleted " ++ "shards of gfid=%s from backend", ++ entry->d_name); ++ } ++ gf_dirent_free(&entries); ++ if (ret) ++ break; ++ } ++ } ++ ret = 0; ++ loc_wipe(&loc); ++ return ret; + +- STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, +- (void *)(long)shard_idx_iter, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); +- loc_wipe(&loc); +- dict_unref(xattr_req); +- next: +- shard_idx_iter++; +- i++; +- +- if (!--call_count) +- break; +- } +- if (local->lookup_shards_barriered) { +- syncbarrier_wait(&local->barrier, count); +- local->pls_fop_handler(frame, this); +- } +- return 0; ++err: ++ LOCK(&priv->lock); ++ { priv->bg_del_state = SHARD_BG_DELETION_NONE; } ++ UNLOCK(&priv->lock); ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ if (op_ret) ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Unlock failed. Please check brick logs for " ++ "more details"); ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_inodelk_t *lock = NULL; ++ ++ local = frame->local; ++ lk_frame = local->inodelk_frame; ++ lk_local = lk_frame->local; ++ local->inodelk_frame = NULL; ++ loc = &local->int_inodelk.loc; ++ lock = &lk_local->int_inodelk; ++ lock->flock.l_type = F_UNLCK; ++ ++ STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, ++ &lock->flock, NULL); ++ local->int_inodelk.acquired_lock = _gf_false; ++ return 0; ++} ++ ++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata); ++int shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ loc_t *dst_loc = NULL; ++ loc_t tmp_loc = { ++ 0, ++ }; ++ shard_local_t *local = frame->local; ++ ++ if (local->dst_block_size) { ++ tmp_loc.parent = inode_ref(local->loc2.parent); ++ ret = inode_path(tmp_loc.parent, local->loc2.name, (char **)&tmp_loc.path); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on pargfid=%s bname=%s", ++ uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ ++ tmp_loc.name = strrchr(tmp_loc.path, '/'); ++ if (tmp_loc.name) ++ tmp_loc.name++; ++ dst_loc = &tmp_loc; ++ } else { ++ dst_loc = &local->loc2; ++ } ++ ++ /* To-Do: Request open-fd count on dst base file */ ++ STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, ++ local->xattr_req); ++ loc_wipe(&tmp_loc); ++ return 0; ++err: ++ loc_wipe(&tmp_loc); ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++} ++ ++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this); ++ ++int shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Xattrop on marker file failed " ++ "while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; ++ } ++ ++ inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, ++ local->newloc.name); ++ ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); ++ return 0; ++} ++ ++int shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) { ++ int op_errno = ENOMEM; ++ uint64_t bs = 0; ++ dict_t *xdata = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ xdata = dict_new(); ++ if (!xdata) ++ goto err; ++ ++ if (local->fop == GF_FOP_UNLINK) ++ bs = local->block_size; ++ else if (local->fop == GF_FOP_RENAME) ++ bs = local->dst_block_size; ++ SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, ++ local->prebuf.ia_size, 0, err); ++ STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->xattrop, &local->newloc, ++ GF_XATTROP_GET_AND_SET, xdata, NULL); ++ dict_unref(xdata); ++ return 0; ++err: ++ if (xdata) ++ dict_unref(xdata); ++ shard_common_failure_unwind(local->fop, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- if (local->op_errno == ENOENT) { +- /* If lookup on /.shard fails with ENOENT, it means that +- * the file was 0-byte in size but truncated sometime in +- * the past to a higher size which is reflected in the +- * size xattr, and now being truncated to a lower size. +- * In this case, the only thing that needs to be done is +- * to update the size xattr of the file and unwind. +- */ +- local->first_block = local->last_block = 0; +- local->num_blocks = 1; +- local->call_count = 0; +- local->op_ret = 0; +- local->postbuf.ia_size = local->offset; +- shard_update_file_size(frame, this, local->fd, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +- } else { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- } +- +- if (!local->call_count) +- shard_truncate_do(frame, this); +- else +- shard_common_lookup_shards(frame, this, local->loc.inode, +- shard_post_lookup_shards_truncate_handler); +- +- return 0; ++int shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Lookup on marker file failed " ++ "while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; ++ } ++ ++ linked_inode = ++ inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); ++ inode_unref(local->newloc.inode); ++ local->newloc.inode = linked_inode; ++ shard_set_size_attrs_on_marker_file(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); ++ return 0; + } + +-int +-shard_truncate_begin(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- /* First participant block here is the lowest numbered block that would +- * hold the last byte of the file post successful truncation. +- * Last participant block is the block that contains the last byte in +- * the current state of the file. +- * If (first block == last_block): +- * then that means that the file only needs truncation of the +- * first (or last since both are same) block. +- * Else +- * if (new_size % block_size == 0) +- * then that means there is no truncate to be done with +- * only shards from first_block + 1 through the last +- * block needing to be unlinked. +- * else +- * both truncate of the first block and unlink of the +- * remaining shards until end of file is required. +- */ +- local->first_block = (local->offset == 0) +- ? 0 +- : get_lowest_block(local->offset - 1, +- local->block_size); +- local->last_block = get_highest_block(0, local->prebuf.ia_size, +- local->block_size); +- +- local->num_blocks = local->last_block - local->first_block + 1; +- local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) +- ? local->loc.inode +- : local->fd->inode; +- +- if ((local->first_block == 0) && (local->num_blocks == 1)) { +- if (local->fop == GF_FOP_TRUNCATE) +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, &local->loc, +- local->offset, local->xattr_req); +- else +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->ftruncate, local->fd, +- local->offset, local->xattr_req); +- return 0; +- } ++int shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) { ++ int op_errno = ENOMEM; ++ dict_t *xattr_req = NULL; ++ shard_local_t *local = NULL; + +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- goto err; ++ local = frame->local; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- ret = shard_init_internal_dir_loc(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret) +- goto err; +- shard_lookup_internal_dir(frame, this, +- shard_post_resolve_truncate_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_post_resolve_truncate_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- return 0; ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) ++ goto err; + ++ STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- struct iatt tmp_stbuf = { +- 0, +- }; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- local->postbuf = tmp_stbuf = local->prebuf; +- +- if (local->prebuf.ia_size == local->offset) { +- /* If the file size is same as requested size, unwind the call +- * immediately. +- */ +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, +- &local->postbuf, NULL); +- else +- SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, +- &local->postbuf, NULL); +- } else if (local->offset > local->prebuf.ia_size) { +- /* If the truncate is from a lower to a higher size, set the +- * new size xattr and unwind. +- */ +- local->hole_size = local->offset - local->prebuf.ia_size; +- local->delta_size = 0; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- local->postbuf.ia_size = local->offset; +- tmp_stbuf.ia_size = local->offset; +- shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, +- SHARD_INODE_WRITE_MASK); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); ++int shard_create_marker_file_under_remove_me_cbk( ++ call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ if (op_ret < 0) { ++ if ((op_errno != EEXIST) && (op_errno != ENODATA)) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Marker file creation " ++ "failed while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; + } else { +- /* ... else +- * i. unlink all shards that need to be unlinked. +- * ii. truncate the last of the shards. +- * iii. update the new size using setxattr. +- * and unwind the fop. +- */ +- local->hole_size = 0; +- local->delta_size = (local->offset - local->prebuf.ia_size); +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- tmp_stbuf.ia_size = local->offset; +- shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, +- SHARD_INODE_WRITE_MASK); +- shard_truncate_begin(frame, this); +- } +- return 0; +-} +- +-/* TO-DO: +- * Fix updates to size and block count with racing write(s) and truncate(s). +- */ +- +-int +-shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; ++ shard_lookup_marker_file(frame, this); ++ return 0; + } ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = loc->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- loc_copy(&local->loc, loc); +- local->offset = offset; +- local->block_size = block_size; +- local->fop = GF_FOP_TRUNCATE; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->resolver_base_inode = loc->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_truncate_handler); +- return 0; ++ linked_inode = ++ inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); ++ inode_unref(local->newloc.inode); ++ local->newloc.inode = linked_inode; + ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = fd->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- local->fd = fd_ref(fd); +- local->offset = offset; +- local->block_size = block_size; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_FTRUNCATE; ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); ++ return 0; ++} ++ ++int shard_create_marker_file_under_remove_me(call_frame_t *frame, ++ xlator_t *this, loc_t *loc) { ++ int ret = 0; ++ int op_errno = ENOMEM; ++ uint64_t bs = 0; ++ char g1[64] = { ++ 0, ++ }; ++ char g2[64] = { ++ 0, ++ }; ++ dict_t *xattr_req = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) ++ goto err; ++ ++ local->newloc.inode = inode_new(this->itable); ++ local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), ++ (char **)&local->newloc.path); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on " ++ "pargfid=%s bname=%s", ++ uuid_utoa_r(priv->dot_shard_rm_gfid, g1), ++ uuid_utoa_r(loc->inode->gfid, g2)); ++ goto err; ++ } ++ local->newloc.name = strrchr(local->newloc.path, '/'); ++ if (local->newloc.name) ++ local->newloc.name++; ++ ++ if (local->fop == GF_FOP_UNLINK) ++ bs = local->block_size; ++ else if (local->fop == GF_FOP_RENAME) ++ bs = local->dst_block_size; ++ ++ SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, ++ local->prebuf.ia_size, 0, err); ++ ++ STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &local->newloc, ++ 0, 0, 0644, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); +- local->resolver_base_inode = fd->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_truncate_handler); +- return 0; + err: +- shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); +- return 0; ++ if (xattr_req) ++ dict_unref(xattr_req); ++ shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, ++ NULL, NULL, NULL, NULL, NULL); ++ return 0; + } + +-int +-shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret == -1) +- goto unwind; +- +- ret = shard_inode_ctx_set(inode, this, buf, local->block_size, +- SHARD_ALL_MASK); +- if (ret) +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, +- "Failed to set inode " +- "ctx for %s", +- uuid_utoa(inode->gfid)); +- +-unwind: +- SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, +- postparent, xdata); ++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); + +- return 0; +-} ++int shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; + +-int +-shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, +- dev_t rdev, mode_t umask, dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; ++ local = frame->local; + +- priv = this->private; +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } else { ++ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); ++ local->preoldparent = *preparent; ++ local->postoldparent = *postparent; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ if (local->cleanup_required) ++ shard_start_background_deletion(this); ++ } + +- frame->local = local; +- local->block_size = priv->block_size; +- if (!__is_gsyncd_on_shard_dir(frame, loc)) { +- SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ if (local->entrylk_frame) { ++ ret = shard_unlock_entrylk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; + } ++ } + +- STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); +- return 0; +-} +- +-int32_t +-shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- if (op_ret < 0) +- goto err; +- +- shard_inode_ctx_set(inode, this, buf, 0, +- SHARD_MASK_NLINK | SHARD_MASK_TIMES); +- buf->ia_size = local->prebuf.ia_size; +- buf->ia_blocks = local->prebuf.ia_blocks; +- +- SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, +- postparent, xdata); +- return 0; ++ ret = shard_unlock_inodelk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } ++ ++ shard_unlink_cbk(frame, this); ++ return 0; ++} ++ ++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = frame->local; ++ ++ /* To-Do: Request open-fd count on base file */ ++ STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, ++ local->xattr_req); ++ return 0; ++} ++ ++int shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ if (op_ret) ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Unlock failed. Please check brick logs for " ++ "more details"); ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_entrylk_t *lock = NULL; ++ ++ local = frame->local; ++ lk_frame = local->entrylk_frame; ++ lk_local = lk_frame->local; ++ local->entrylk_frame = NULL; ++ lock = &lk_local->int_entrylk; ++ loc = &lock->loc; ++ ++ STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->entrylk, this->name, loc, ++ lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, ++ NULL); ++ local->int_entrylk.acquired_lock = _gf_false; ++ return 0; ++} ++ ++int shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (local->fop) { ++ case GF_FOP_UNLINK: ++ case GF_FOP_RENAME: ++ shard_create_marker_file_under_remove_me(frame, this, ++ &local->int_inodelk.loc); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "post-entrylk handler not defined. This case should not" ++ " be hit"); ++ break; ++ } ++ return 0; ++} ++ ++int shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ call_frame_t *main_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *main_local = NULL; ++ ++ local = frame->local; ++ main_frame = local->main_frame; ++ main_local = main_frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); ++ return 0; ++ } ++ main_local->int_entrylk.acquired_lock = _gf_true; ++ shard_post_entrylk_fop_handler(main_frame, this); ++ return 0; ++} ++ ++int shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, ++ uuid_t gfid) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_local_t *entrylk_local = NULL; ++ shard_entrylk_t *int_entrylk = NULL; ++ call_frame_t *entrylk_frame = NULL; ++ ++ local = frame->local; ++ entrylk_frame = create_frame(this, this->ctx->pool); ++ if (!entrylk_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to lock marker file"); ++ goto err; ++ } ++ ++ entrylk_local = mem_get0(this->local_pool); ++ if (!entrylk_local) { ++ STACK_DESTROY(entrylk_frame->root); ++ goto err; ++ } ++ ++ entrylk_frame->local = entrylk_local; ++ entrylk_local->main_frame = frame; ++ int_entrylk = &entrylk_local->int_entrylk; ++ ++ int_entrylk->loc.inode = inode_ref(inode); ++ set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); ++ local->entrylk_frame = entrylk_frame; ++ gf_uuid_unparse(gfid, gfid_str); ++ int_entrylk->basename = gf_strdup(gfid_str); ++ ++ STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, ++ int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, +- NULL, NULL, NULL, NULL); +- return 0; +- } +- +- STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, +- local->xattr_req); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int32_t +-shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(oldloc->inode->gfid)); +- goto err; +- } +- +- if (!block_size) { +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, +- oldloc, newloc, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = oldloc->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- loc_copy(&local->loc, oldloc); +- loc_copy(&local->loc2, newloc); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_link_handler); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); +- +-int +-shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { +- gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, +- "failed to delete shards of %s", +- uuid_utoa(local->resolver_base_inode->gfid)); +- return 0; +- } +- local->op_ret = 0; +- local->op_errno = 0; +- +- shard_unlink_shards_do(frame, this, local->resolver_base_inode); +- return 0; +-} +- +-int +-shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- local->lookup_shards_barriered = _gf_true; +- +- if (!local->call_count) +- shard_unlink_shards_do(frame, this, local->resolver_base_inode); +- else +- shard_common_lookup_shards(frame, this, local->resolver_base_inode, +- shard_post_lookup_shards_unlink_handler); +- return 0; +-} +- +-void +-shard_unlink_block_inode(shard_local_t *local, int shard_block_num) +-{ +- char block_bname[256] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *inode = NULL; +- inode_t *base_inode = NULL; +- xlator_t *this = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *base_ictx = NULL; +- int unref_base_inode = 0; +- int unref_shard_inode = 0; +- +- this = THIS; +- priv = this->private; +- +- inode = local->inode_list[shard_block_num - local->first_block]; +- shard_inode_ctx_get(inode, this, &ctx); +- base_inode = ctx->base_inode; +- if (base_inode) +- gf_uuid_copy(gfid, base_inode->gfid); +- else +- gf_uuid_copy(gfid, ctx->base_gfid); +- shard_make_block_bname(shard_block_num, gfid, block_bname, +- sizeof(block_bname)); +- +- LOCK(&priv->lock); +- if (base_inode) +- LOCK(&base_inode->lock); +- LOCK(&inode->lock); +- { +- __shard_inode_ctx_get(inode, this, &ctx); +- if (!list_empty(&ctx->ilist)) { +- list_del_init(&ctx->ilist); +- priv->inode_count--; +- unref_base_inode++; +- unref_shard_inode++; +- GF_ASSERT(priv->inode_count >= 0); +- } +- if (ctx->fsync_needed) { +- unref_base_inode++; +- unref_shard_inode++; +- list_del_init(&ctx->to_fsync_list); +- if (base_inode) { +- __shard_inode_ctx_get(base_inode, this, &base_ictx); +- base_ictx->fsync_count--; +- } +- } +- } +- UNLOCK(&inode->lock); +- if (base_inode) +- UNLOCK(&base_inode->lock); +- +- inode_unlink(inode, priv->dot_shard_inode, block_bname); +- inode_ref_reduce_by_n(inode, unref_shard_inode); +- inode_forget(inode, 0); +- +- if (base_inode && unref_base_inode) +- inode_ref_reduce_by_n(base_inode, unref_base_inode); +- UNLOCK(&priv->lock); +-} +- +-int +-shard_rename_cbk(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->preoldparent, +- &local->postoldparent, &local->prenewparent, +- &local->postnewparent, local->xattr_rsp); +- return 0; +-} +- +-int32_t +-shard_unlink_cbk(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- +- SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, +- &local->preoldparent, &local->postoldparent, +- local->xattr_rsp); +- return 0; +-} +- +-int +-shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int shard_block_num = (long)cookie; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- +- shard_unlink_block_inode(local, shard_block_num); +-done: +- syncbarrier_wake(&local->barrier); +- return 0; +-} +- +-int +-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- int i = 0; +- int ret = -1; +- int count = 0; +- uint32_t cur_block = 0; +- uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ +- char *bname = NULL; +- char path[PATH_MAX] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- loc_t loc = { +- 0, +- }; +- gf_boolean_t wind_failed = _gf_false; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- if (inode) +- gf_uuid_copy(gfid, inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); +- +- for (i = 0; i < local->num_blocks; i++) { +- if (!local->inode_list[i]) +- continue; +- count++; +- } +- +- if (!count) { +- /* callcount = 0 implies that all of the shards that need to be +- * unlinked are non-existent (in other words the file is full of +- * holes). +- */ +- gf_msg_debug(this->name, 0, +- "All shards that need to be " +- "unlinked are non-existent: %s", +- uuid_utoa(gfid)); +- return 0; +- } +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- local->barrier.waitfor = count; +- cur_block = cur_block_idx + local->first_block; +- +- while (cur_block_idx < local->num_blocks) { +- if (!local->inode_list[cur_block_idx]) +- goto next; +- +- if (wind_failed) { +- shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); +- bname = strrchr(path, '/') + 1; +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s, base file gfid = %s", +- bname, uuid_utoa(gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- loc.inode = inode_ref(local->inode_list[cur_block_idx]); +- +- STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, +- (void *)(long)cur_block, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, +- local->xattr_req); +- loc_wipe(&loc); +- next: +- cur_block++; +- cur_block_idx++; +- } +- syncbarrier_wait(&local->barrier, count); +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- return 0; +-} +- +-int +-shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, +- int now, int first_block, gf_dirent_t *entry) +-{ +- int i = 0; +- int ret = 0; +- shard_local_t *local = NULL; +- uuid_t gfid = { +- 0, +- }; +- +- local = cleanup_frame->local; +- +- local->inode_list = GF_CALLOC(now, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- return -ENOMEM; +- +- local->first_block = first_block; +- local->last_block = first_block + now - 1; +- local->num_blocks = now; +- gf_uuid_parse(entry->d_name, gfid); +- gf_uuid_copy(local->base_gfid, gfid); +- local->resolver_base_inode = inode_find(this->itable, gfid); +- local->call_count = 0; +- ret = syncbarrier_init(&local->barrier); +- if (ret) { +- GF_FREE(local->inode_list); +- local->inode_list = NULL; +- inode_unref(local->resolver_base_inode); +- local->resolver_base_inode = NULL; +- return -errno; +- } +- shard_common_resolve_shards(cleanup_frame, this, +- shard_post_resolve_unlink_handler); +- +- for (i = 0; i < local->num_blocks; i++) { +- if (local->inode_list[i]) +- inode_unref(local->inode_list[i]); +- } +- GF_FREE(local->inode_list); +- local->inode_list = NULL; +- if (local->op_ret) +- ret = -local->op_errno; +- syncbarrier_destroy(&local->barrier); +- inode_unref(local->resolver_base_inode); +- local->resolver_base_inode = NULL; +- STACK_RESET(cleanup_frame->root); +- return ret; +-} +- +-int +-__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, +- gf_dirent_t *entry, inode_t *inode) +-{ +- int ret = 0; +- int shard_count = 0; +- int first_block = 0; +- int now = 0; +- uint64_t size = 0; +- uint64_t block_size = 0; +- uint64_t size_array[4] = { +- 0, +- }; +- void *bsize = NULL; +- void *size_attr = NULL; +- dict_t *xattr_rsp = NULL; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = cleanup_frame->local; +- ret = dict_reset(local->xattr_req); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to reset dict"); +- ret = -ENOMEM; +- goto err; +- } +- +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); +- ret = -ENOMEM; +- goto err; +- } +- +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.inode = inode_ref(inode); +- loc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, +- &xattr_rsp); +- if (ret) +- goto err; +- +- ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); +- goto err; +- } +- block_size = ntoh64(*((uint64_t *)bsize)); +- +- ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); +- goto err; +- } +- +- memcpy(size_array, size_attr, sizeof(size_array)); +- size = ntoh64(size_array[0]); +- +- shard_count = (size / block_size) - 1; +- if (shard_count < 0) { +- gf_msg_debug(this->name, 0, +- "Size of %s hasn't grown beyond " +- "its shard-block-size. Nothing to delete. " +- "Returning", +- entry->d_name); +- /* File size < shard-block-size, so nothing to delete */ +- ret = 0; +- goto delete_marker; +- } +- if ((size % block_size) > 0) +- shard_count++; +- +- if (shard_count == 0) { +- gf_msg_debug(this->name, 0, +- "Size of %s is exactly equal to " +- "its shard-block-size. Nothing to delete. " +- "Returning", +- entry->d_name); +- ret = 0; +- goto delete_marker; +- } +- gf_msg_debug(this->name, 0, +- "base file = %s, " +- "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 +- ", " +- "shard_count=%d", +- entry->d_name, block_size, size, shard_count); +- +- /* Perform a gfid-based lookup to see if gfid corresponding to marker +- * file's base name exists. +- */ +- loc_wipe(&loc); +- loc.inode = inode_new(this->itable); +- if (!loc.inode) { +- ret = -ENOMEM; +- goto err; +- } +- gf_uuid_parse(entry->d_name, loc.gfid); +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); +- if (!ret) { +- gf_msg_debug(this->name, 0, +- "Base shard corresponding to gfid " +- "%s is present. Skipping shard deletion. " +- "Returning", +- entry->d_name); +- ret = 0; +- goto delete_marker; +- } +- +- first_block = 1; +- +- while (shard_count) { +- if (shard_count < local->deletion_rate) { +- now = shard_count; +- shard_count = 0; +- } else { +- now = local->deletion_rate; +- shard_count -= local->deletion_rate; +- } +- +- gf_msg_debug(this->name, 0, +- "deleting %d shards starting from " +- "block %d of gfid %s", +- now, first_block, entry->d_name); +- ret = shard_regulated_shards_deletion(cleanup_frame, this, now, +- first_block, entry); +- if (ret) +- goto err; +- first_block += now; +- } +- +-delete_marker: +- loc_wipe(&loc); +- loc.inode = inode_ref(inode); +- loc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); +- if (ret) +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, +- "Failed to delete %s " +- "from /%s", +- entry->d_name, GF_SHARD_REMOVE_ME_DIR); +-err: +- if (xattr_rsp) +- dict_unref(xattr_rsp); +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, +- gf_dirent_t *entry, inode_t *inode) +-{ +- int ret = -1; +- loc_t loc = { +- 0, +- }; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- loc.inode = inode_ref(priv->dot_shard_rm_inode); +- +- ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, +- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); +- if (ret < 0) { +- if (ret == -EAGAIN) { +- ret = 0; +- } +- goto out; +- } +- { +- ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); +- } +- syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, +- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); +-out: +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) +-{ +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, +- shard_internal_dir_type_t type) +-{ +- int ret = 0; +- char *bname = NULL; +- loc_t *loc = NULL; +- shard_priv_t *priv = NULL; +- uuid_t gfid = { +- 0, +- }; +- struct iatt stbuf = { +- 0, +- }; +- +- priv = this->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- loc = &local->dot_shard_loc; +- gf_uuid_copy(gfid, priv->dot_shard_gfid); +- bname = GF_SHARD_DIR; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- loc = &local->dot_shard_rm_loc; +- gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); +- bname = GF_SHARD_REMOVE_ME_DIR; +- break; +- default: +- break; +- } +- +- loc->inode = inode_find(this->itable, gfid); +- if (!loc->inode) { +- ret = shard_init_internal_dir_loc(this, local, type); +- if (ret) +- goto err; +- ret = dict_reset(local->xattr_req); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to reset " +- "dict"); +- ret = -ENOMEM; +- goto err; +- } +- ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); +- ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, +- local->xattr_req, NULL); +- if (ret < 0) { +- if (ret != -ENOENT) +- gf_msg(this->name, GF_LOG_ERROR, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Lookup on %s failed, exiting", bname); +- goto err; +- } else { +- shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); +- } +- } +- ret = 0; +-err: +- return ret; +-} +- +-int +-shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, +- gf_dirent_t *entry) +-{ +- int ret = 0; +- loc_t loc = { +- 0, +- }; +- +- loc.inode = inode_new(this->itable); +- if (!loc.inode) { +- ret = -ENOMEM; +- goto err; +- } +- loc.parent = inode_ref(local->fd->inode); +- +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); +- if (ret < 0) { +- goto err; +- } +- entry->inode = inode_ref(loc.inode); +- ret = 0; +-err: +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards(void *opaque) +-{ +- int ret = 0; +- off_t offset = 0; +- loc_t loc = { +- 0, +- }; +- inode_t *link_inode = NULL; +- xlator_t *this = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- gf_dirent_t entries; +- gf_dirent_t *entry = NULL; +- call_frame_t *cleanup_frame = NULL; +- gf_boolean_t done = _gf_false; +- +- this = THIS; +- priv = this->private; +- INIT_LIST_HEAD(&entries.list); +- +- cleanup_frame = opaque; +- +- local = mem_get0(this->local_pool); +- if (!local) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create local to " +- "delete shards"); +- ret = -ENOMEM; +- goto err; +- } +- cleanup_frame->local = local; +- local->fop = GF_FOP_UNLINK; +- +- local->xattr_req = dict_new(); +- if (!local->xattr_req) { +- ret = -ENOMEM; +- goto err; +- } +- local->deletion_rate = priv->deletion_rate; +- +- ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret == -ENOENT) { +- gf_msg_debug(this->name, 0, +- ".shard absent. Nothing to" +- " delete. Exiting"); +- ret = 0; +- goto err; +- } else if (ret < 0) { +- goto err; +- } +- +- ret = shard_resolve_internal_dir(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- if (ret == -ENOENT) { +- gf_msg_debug(this->name, 0, +- ".remove_me absent. " +- "Nothing to delete. Exiting"); +- ret = 0; +- goto err; +- } else if (ret < 0) { +- goto err; +- } +- +- local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); +- if (!local->fd) { +- ret = -ENOMEM; +- goto err; +- } +- +- for (;;) { +- offset = 0; +- LOCK(&priv->lock); +- { +- if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { +- priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; +- } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- done = _gf_true; +- } +- } +- UNLOCK(&priv->lock); +- if (done) +- break; +- while ( +- (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, +- &entries, local->xattr_req, NULL))) { +- if (ret > 0) +- ret = 0; +- list_for_each_entry(entry, &entries.list, list) +- { +- offset = entry->d_off; +- +- if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) +- continue; +- +- if (!entry->inode) { +- ret = shard_lookup_marker_entry(this, local, entry); +- if (ret < 0) +- continue; +- } +- link_inode = inode_link(entry->inode, local->fd->inode, +- entry->d_name, &entry->d_stat); +- +- gf_msg_debug(this->name, 0, +- "Initiating deletion of " +- "shards of gfid %s", +- entry->d_name); +- ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, +- link_inode); +- inode_unlink(link_inode, local->fd->inode, entry->d_name); +- inode_unref(link_inode); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Failed to clean up shards of gfid %s", +- entry->d_name); +- continue; +- } +- gf_msg(this->name, GF_LOG_INFO, 0, +- SHARD_MSG_SHARD_DELETION_COMPLETED, +- "Deleted " +- "shards of gfid=%s from backend", +- entry->d_name); +- } +- gf_dirent_free(&entries); +- if (ret) +- break; +- } +- } +- ret = 0; +- loc_wipe(&loc); +- return ret; +- +-err: +- LOCK(&priv->lock); +- { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- } +- UNLOCK(&priv->lock); +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- if (op_ret) +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Unlock failed. Please check brick logs for " +- "more details"); +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_inodelk_t *lock = NULL; +- +- local = frame->local; +- lk_frame = local->inodelk_frame; +- lk_local = lk_frame->local; +- local->inodelk_frame = NULL; +- loc = &local->int_inodelk.loc; +- lock = &lk_local->int_inodelk; +- lock->flock.l_type = F_UNLCK; +- +- STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, +- &lock->flock, NULL); +- local->int_inodelk.acquired_lock = _gf_false; +- return 0; +-} +- +-int +-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- struct iatt *preoldparent, struct iatt *postoldparent, +- struct iatt *prenewparent, struct iatt *postnewparent, +- dict_t *xdata); +-int +-shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- loc_t *dst_loc = NULL; +- loc_t tmp_loc = { +- 0, +- }; +- shard_local_t *local = frame->local; +- +- if (local->dst_block_size) { +- tmp_loc.parent = inode_ref(local->loc2.parent); +- ret = inode_path(tmp_loc.parent, local->loc2.name, +- (char **)&tmp_loc.path); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on pargfid=%s bname=%s", +- uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- +- tmp_loc.name = strrchr(tmp_loc.path, '/'); +- if (tmp_loc.name) +- tmp_loc.name++; +- dst_loc = &tmp_loc; +- } else { +- dst_loc = &local->loc2; +- } +- +- /* To-Do: Request open-fd count on dst base file */ +- STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, +- local->xattr_req); +- loc_wipe(&tmp_loc); +- return 0; +-err: +- loc_wipe(&tmp_loc); +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +-} +- +-int +-shard_unlink_base_file(call_frame_t *frame, xlator_t *this); +- +-int +-shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Xattrop on marker file failed " +- "while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } +- +- inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, +- local->newloc.name); +- +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- else if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) +-{ +- int op_errno = ENOMEM; +- uint64_t bs = 0; +- dict_t *xdata = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- xdata = dict_new(); +- if (!xdata) +- goto err; +- +- if (local->fop == GF_FOP_UNLINK) +- bs = local->block_size; +- else if (local->fop == GF_FOP_RENAME) +- bs = local->dst_block_size; +- SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, +- local->prebuf.ia_size, 0, err); +- STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop, +- &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL); +- dict_unref(xdata); +- return 0; +-err: +- if (xdata) +- dict_unref(xdata); +- shard_common_failure_unwind(local->fop, frame, -1, op_errno); +- return 0; +-} +- +-int +-shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- priv = this->private; +- +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Lookup on marker file failed " +- "while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } +- +- linked_inode = inode_link(inode, priv->dot_shard_rm_inode, +- local->newloc.name, buf); +- inode_unref(local->newloc.inode); +- local->newloc.inode = linked_inode; +- shard_set_size_attrs_on_marker_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) +-{ +- int op_errno = ENOMEM; +- dict_t *xattr_req = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) +- goto err; +- +- STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); +- dict_unref(xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, op_errno); +- return 0; +-} +- +-int +-shard_create_marker_file_under_remove_me_cbk( +- call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, +- int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- priv = this->private; +- +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (op_ret < 0) { +- if ((op_errno != EEXIST) && (op_errno != ENODATA)) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Marker file creation " +- "failed while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } else { +- shard_lookup_marker_file(frame, this); +- return 0; +- } +- } +- +- linked_inode = inode_link(inode, priv->dot_shard_rm_inode, +- local->newloc.name, buf); +- inode_unref(local->newloc.inode); +- local->newloc.inode = linked_inode; +- +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- else if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +-} +- +-int +-shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this, +- loc_t *loc) +-{ +- int ret = 0; +- int op_errno = ENOMEM; +- uint64_t bs = 0; +- char g1[64] = { +- 0, +- }; +- char g2[64] = { +- 0, +- }; +- dict_t *xattr_req = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) +- goto err; +- +- local->newloc.inode = inode_new(this->itable); +- local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), +- (char **)&local->newloc.path); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on " +- "pargfid=%s bname=%s", +- uuid_utoa_r(priv->dot_shard_rm_gfid, g1), +- uuid_utoa_r(loc->inode->gfid, g2)); +- goto err; +- } +- local->newloc.name = strrchr(local->newloc.path, '/'); +- if (local->newloc.name) +- local->newloc.name++; +- +- if (local->fop == GF_FOP_UNLINK) +- bs = local->block_size; +- else if (local->fop == GF_FOP_RENAME) +- bs = local->dst_block_size; +- +- SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, +- local->prebuf.ia_size, 0, err); +- +- STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, +- &local->newloc, 0, 0, 0644, xattr_req); +- dict_unref(xattr_req); +- return 0; +- +-err: +- if (xattr_req) +- dict_unref(xattr_req); +- shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, +- NULL, NULL, NULL, NULL, NULL); +- return 0; +-} +- +-int +-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); +- +-int +-shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } else { +- local->preoldparent = *preparent; +- local->postoldparent = *postparent; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- if (local->cleanup_required) +- shard_start_background_deletion(this); +- } +- +- if (local->entrylk_frame) { +- ret = shard_unlock_entrylk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- } +- +- ret = shard_unlock_inodelk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- +- shard_unlink_cbk(frame, this); +- return 0; +-} +- +-int +-shard_unlink_base_file(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- +- /* To-Do: Request open-fd count on base file */ +- STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, +- local->xattr_req); +- return 0; +-} +- +-int +-shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- if (op_ret) +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Unlock failed. Please check brick logs for " +- "more details"); +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_entrylk_t *lock = NULL; +- +- local = frame->local; +- lk_frame = local->entrylk_frame; +- lk_local = lk_frame->local; +- local->entrylk_frame = NULL; +- lock = &lk_local->int_entrylk; +- loc = &lock->loc; +- +- STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->entrylk, this->name, loc, +- lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, +- NULL); +- local->int_entrylk.acquired_lock = _gf_false; +- return 0; +-} +- +-int +-shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- switch (local->fop) { +- case GF_FOP_UNLINK: +- case GF_FOP_RENAME: +- shard_create_marker_file_under_remove_me(frame, this, +- &local->int_inodelk.loc); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "post-entrylk handler not defined. This case should not" +- " be hit"); +- break; +- } +- return 0; +-} +- +-int +-shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- call_frame_t *main_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *main_local = NULL; +- +- local = frame->local; +- main_frame = local->main_frame; +- main_local = main_frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(main_local->fop, main_frame, op_ret, +- op_errno); +- return 0; +- } +- main_local->int_entrylk.acquired_lock = _gf_true; +- shard_post_entrylk_fop_handler(main_frame, this); +- return 0; +-} +- +-int +-shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, +- uuid_t gfid) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_local_t *entrylk_local = NULL; +- shard_entrylk_t *int_entrylk = NULL; +- call_frame_t *entrylk_frame = NULL; +- +- local = frame->local; +- entrylk_frame = create_frame(this, this->ctx->pool); +- if (!entrylk_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to lock marker file"); +- goto err; +- } +- +- entrylk_local = mem_get0(this->local_pool); +- if (!entrylk_local) { +- STACK_DESTROY(entrylk_frame->root); +- goto err; +- } +- +- entrylk_frame->local = entrylk_local; +- entrylk_local->main_frame = frame; +- int_entrylk = &entrylk_local->int_entrylk; +- +- int_entrylk->loc.inode = inode_ref(inode); +- set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); +- local->entrylk_frame = entrylk_frame; +- gf_uuid_unparse(gfid, gfid_str); +- int_entrylk->basename = gf_strdup(gfid_str); +- +- STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, +- int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- +- if (local->prebuf.ia_nlink > 1) { +- gf_msg_debug(this->name, 0, +- "link count on %s > 1:%d, " +- "performing rename()/unlink()", +- local->int_inodelk.loc.path, local->prebuf.ia_nlink); +- if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- else if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- } else { +- gf_msg_debug(this->name, 0, +- "link count on %s = 1, creating " +- "file under .remove_me", +- local->int_inodelk.loc.path); +- local->cleanup_required = _gf_true; +- shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, +- local->prebuf.ia_gfid); +- } +- return 0; +-} +- +-int +-shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- switch (local->fop) { +- case GF_FOP_UNLINK: +- case GF_FOP_RENAME: +- shard_lookup_base_file(frame, this, &local->int_inodelk.loc, +- shard_post_lookup_base_shard_rm_handler); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "post-inodelk handler not defined. This case should not" +- " be hit"); +- break; +- } +- return 0; +-} +- +-int +-shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- call_frame_t *main_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *main_local = NULL; +- +- local = frame->local; +- main_frame = local->main_frame; +- main_local = main_frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(main_local->fop, main_frame, op_ret, +- op_errno); +- return 0; +- } +- main_local->int_inodelk.acquired_lock = _gf_true; +- shard_post_inodelk_fop_handler(main_frame, this); +- return 0; +-} +- +-int +-shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) +-{ +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_inodelk_t *int_inodelk = NULL; +- +- local = frame->local; +- lk_frame = create_frame(this, this->ctx->pool); +- if (!lk_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to lock base shard"); +- goto err; +- } +- lk_local = mem_get0(this->local_pool); +- if (!lk_local) { +- STACK_DESTROY(lk_frame->root); +- goto err; +- } +- +- lk_frame->local = lk_local; +- lk_local->main_frame = frame; +- int_inodelk = &lk_local->int_inodelk; +- +- int_inodelk->flock.l_len = 0; +- int_inodelk->flock.l_start = 0; +- int_inodelk->domain = this->name; +- int_inodelk->flock.l_type = F_WRLCK; +- loc_copy(&local->int_inodelk.loc, loc); +- set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); +- local->inodelk_frame = lk_frame; +- +- STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, +- &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- if (local->fop == GF_FOP_UNLINK) +- loc = &local->loc; +- else if (local->fop == GF_FOP_RENAME) +- loc = &local->loc2; +- shard_acquire_inodelk(frame, this, loc); +- return 0; +-} +- +-int +-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler, +- shard_internal_dir_type_t type); +-int +-shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- return 0; +-} +- +-void +-shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- +- local->dot_shard_rm_loc.inode = inode_find(this->itable, +- priv->dot_shard_rm_gfid); +- if (!local->dot_shard_rm_loc.inode) { +- local->dot_shard_loc.inode = inode_find(this->itable, +- priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_pre_mkdir_rm_handler; +- shard_refresh_internal_dir(frame, this, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- } else { +- local->post_res_handler = shard_post_mkdir_rm_handler; +- shard_refresh_internal_dir(frame, this, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- } +-} +- +-int +-shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- loc_copy(&local->loc, loc); +- local->xflag = xflag; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- local->block_size = block_size; +- local->resolver_base_inode = loc->inode; +- local->fop = GF_FOP_UNLINK; +- if (!this->itable) +- this->itable = (local->loc.inode)->table; +- +- local->resolve_not = _gf_true; +- shard_begin_rm_resolution(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_rename_cbk(frame, this); +- return 0; +-} +- +-int +-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- struct iatt *preoldparent, struct iatt *postoldparent, +- struct iatt *prenewparent, struct iatt *postnewparent, +- dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } +- /* Set ctx->refresh to TRUE to force a lookup on disk when +- * shard_lookup_base_file() is called next to refresh the hard link +- * count in ctx. Note that this is applicable only to the case where +- * the rename dst is already existent and sharded. +- */ +- if ((local->dst_block_size) && (!local->cleanup_required)) +- shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); +- +- local->prebuf = *buf; +- local->preoldparent = *preoldparent; +- local->postoldparent = *postoldparent; +- local->prenewparent = *prenewparent; +- local->postnewparent = *postnewparent; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- +- if (local->dst_block_size) { +- if (local->entrylk_frame) { +- ret = shard_unlock_entrylk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- } +- +- ret = shard_unlock_inodelk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- goto err; +- } +- if (local->cleanup_required) +- shard_start_background_deletion(this); +- } +- +- /* Now the base file of src, if sharded, is looked up to gather ia_size +- * and ia_blocks.*/ +- if (local->block_size) { +- local->tmp_loc.inode = inode_new(this->itable); +- gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); +- shard_lookup_base_file(frame, this, &local->tmp_loc, +- shard_post_rename_lookup_handler); +- } else { +- shard_rename_cbk(frame, this); +- } +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +-} +- +-int +-shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- /* Save dst base file attributes into postbuf so the information is not +- * lost when it is overwritten after lookup on base file of src in +- * shard_lookup_base_file_cbk(). +- */ +- local->postbuf = local->prebuf; +- shard_rename_src_base_file(frame, this); +- return 0; +-} +- +-int +-shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- uint64_t dst_block_size = 0; +- shard_local_t *local = NULL; +- +- if (IA_ISDIR(oldloc->inode->ia_type)) { +- STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); +- return 0; +- } +- +- ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); +- if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(oldloc->inode->gfid)); +- goto err; +- } +- +- if (newloc->inode) +- ret = shard_inode_ctx_get_block_size(newloc->inode, this, +- &dst_block_size); +- +- /* The following stack_wind covers the case where: +- * a. the src file is not sharded and dst doesn't exist, OR +- * b. the src and dst both exist but are not sharded. +- */ +- if (((!block_size) && (!dst_block_size)) || +- frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- loc_copy(&local->loc, oldloc); +- loc_copy(&local->loc2, newloc); +- local->resolver_base_inode = newloc->inode; +- local->fop = GF_FOP_RENAME; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- +- local->block_size = block_size; +- local->dst_block_size = dst_block_size; +- if (!this->itable) +- this->itable = (local->loc.inode)->table; +- local->resolve_not = _gf_true; +- +- /* The following if-block covers the case where the dst file exists +- * and is sharded. +- */ +- if (local->dst_block_size) { +- shard_begin_rm_resolution(frame, this); +- } else { +- /* The following block covers the case where the dst either doesn't +- * exist or is NOT sharded but the src is sharded. In this case, shard +- * xlator would go ahead and rename src to dst. Once done, it would also +- * lookup the base shard of src to get the ia_size and ia_blocks xattr +- * values. +- */ +- shard_rename_src_base_file(frame, this); +- } +- return 0; +- +-err: +- shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, +- struct iatt *stbuf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret == -1) +- goto unwind; +- +- ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, +- SHARD_ALL_MASK); +- if (ret) +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, +- "Failed to set inode " +- "ctx for %s", +- uuid_utoa(inode->gfid)); +- +-unwind: +- SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, +- preparent, postparent, xdata); +- return 0; +-} +- +-int +-shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, +- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- local->block_size = priv->block_size; +- +- if (!__is_gsyncd_on_shard_dir(frame, loc)) { +- SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); +- } +- +- STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, +- xdata); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +-{ +- /* To-Do: Handle open with O_TRUNC under locks */ +- SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); +- return 0; +-} +- +-int +-shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, +- fd_t *fd, dict_t *xdata) +-{ +- STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); +- return 0; +-} +- +-int +-shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iovec *vector, +- int32_t count, struct iatt *stbuf, struct iobref *iobref, +- dict_t *xdata) +-{ +- int i = 0; +- int call_count = 0; +- void *address = NULL; +- uint64_t block_num = 0; +- off_t off = 0; +- struct iovec vec = { +- 0, +- }; +- shard_local_t *local = NULL; +- fd_t *anon_fd = cookie; +- shard_inode_ctx_t *ctx = NULL; +- +- local = frame->local; +- +- /* If shard has already seen a failure here before, there is no point +- * in aggregating subsequent reads, so just go to out. +- */ +- if (local->op_ret < 0) +- goto out; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto out; +- } +- +- if (local->op_ret >= 0) +- local->op_ret += op_ret; +- +- shard_inode_ctx_get(anon_fd->inode, this, &ctx); +- block_num = ctx->block_num; +- +- if (block_num == local->first_block) { +- address = local->iobuf->ptr; +- } else { +- /* else +- * address to start writing to = beginning of buffer + +- * number of bytes until end of first block + +- * + block_size times number of blocks +- * between the current block and the first +- */ +- address = (char *)local->iobuf->ptr + +- (local->block_size - (local->offset % local->block_size)) + +- ((block_num - local->first_block - 1) * local->block_size); +- } +- +- for (i = 0; i < count; i++) { +- address = (char *)address + off; +- memcpy(address, vector[i].iov_base, vector[i].iov_len); +- off += vector[i].iov_len; +- } +- +-out: +- if (anon_fd) +- fd_unref(anon_fd); +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- } else { +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- vec.iov_base = local->iobuf->ptr; +- vec.iov_len = local->total_size; +- local->op_ret = local->total_size; +- SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, +- &vec, 1, &local->prebuf, local->iobref, +- local->xattr_rsp); +- return 0; +- } +- } +- +- return 0; +-} +- +-int +-shard_readv_do(call_frame_t *frame, xlator_t *this) +-{ +- int i = 0; +- int call_count = 0; +- int last_block = 0; +- int cur_block = 0; +- off_t orig_offset = 0; +- off_t shard_offset = 0; +- size_t read_size = 0; +- size_t remaining_size = 0; +- fd_t *fd = NULL; +- fd_t *anon_fd = NULL; +- shard_local_t *local = NULL; +- gf_boolean_t wind_failed = _gf_false; +- +- local = frame->local; +- fd = local->fd; +- +- orig_offset = local->offset; +- cur_block = local->first_block; +- last_block = local->last_block; +- remaining_size = local->total_size; +- local->call_count = call_count = local->num_blocks; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- if (fd->flags & O_DIRECT) +- local->flags = O_DIRECT; +- +- while (cur_block <= last_block) { +- if (wind_failed) { +- shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, +- 0, NULL, NULL, NULL); +- goto next; +- } +- +- shard_offset = orig_offset % local->block_size; +- read_size = local->block_size - shard_offset; +- if (read_size > remaining_size) +- read_size = remaining_size; +- +- remaining_size -= read_size; +- +- if (cur_block == 0) { +- anon_fd = fd_ref(fd); +- } else { +- anon_fd = fd_anonymous(local->inode_list[i]); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, 0, NULL, NULL, NULL); +- goto next; +- } +- } ++int shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; + +- STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readv, anon_fd, read_size, +- shard_offset, local->flags, local->xattr_req); ++ priv = this->private; ++ local = frame->local; + +- orig_offset += read_size; +- next: +- cur_block++; +- i++; +- call_count--; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; ++ } ++ ++ if (local->prebuf.ia_nlink > 1) { ++ gf_msg_debug(this->name, 0, "link count on %s > 1:%d, " ++ "performing rename()/unlink()", ++ local->int_inodelk.loc.path, local->prebuf.ia_nlink); ++ if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ else if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ } else { ++ gf_msg_debug(this->name, 0, "link count on %s = 1, creating " ++ "file under .remove_me", ++ local->int_inodelk.loc.path); ++ local->cleanup_required = _gf_true; ++ shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, ++ local->prebuf.ia_gfid); ++ } ++ return 0; ++} ++ ++int shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (local->fop) { ++ case GF_FOP_UNLINK: ++ case GF_FOP_RENAME: ++ shard_lookup_base_file(frame, this, &local->int_inodelk.loc, ++ shard_post_lookup_base_shard_rm_handler); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "post-inodelk handler not defined. This case should not" ++ " be hit"); ++ break; ++ } ++ return 0; ++} ++ ++int shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ call_frame_t *main_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *main_local = NULL; ++ ++ local = frame->local; ++ main_frame = local->main_frame; ++ main_local = main_frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); ++ return 0; ++ } ++ main_local->int_inodelk.acquired_lock = _gf_true; ++ shard_post_inodelk_fop_handler(main_frame, this); ++ return 0; ++} ++ ++int shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) { ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_inodelk_t *int_inodelk = NULL; ++ ++ local = frame->local; ++ lk_frame = create_frame(this, this->ctx->pool); ++ if (!lk_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to lock base shard"); ++ goto err; ++ } ++ lk_local = mem_get0(this->local_pool); ++ if (!lk_local) { ++ STACK_DESTROY(lk_frame->root); ++ goto err; ++ } ++ ++ lk_frame->local = lk_local; ++ lk_local->main_frame = frame; ++ int_inodelk = &lk_local->int_inodelk; ++ ++ int_inodelk->flock.l_len = 0; ++ int_inodelk->flock.l_start = 0; ++ int_inodelk->domain = this->name; ++ int_inodelk->flock.l_type = F_WRLCK; ++ loc_copy(&local->int_inodelk.loc, loc); ++ set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); ++ local->inodelk_frame = lk_frame; ++ ++ STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, ++ &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int shard_block_num = (long)cookie; +- int call_count = 0; +- shard_local_t *local = NULL; ++int shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret < 0) { +- if (op_errno == EEXIST) { +- LOCK(&frame->lock); +- { +- local->eexist_count++; +- } +- UNLOCK(&frame->lock); +- } else { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } +- gf_msg_debug(this->name, 0, +- "mknod of shard %d " +- "failed: %s", +- shard_block_num, strerror(op_errno)); +- goto done; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); ++ return 0; ++ } ++ if (local->fop == GF_FOP_UNLINK) ++ loc = &local->loc; ++ else if (local->fop == GF_FOP_RENAME) ++ loc = &local->loc2; ++ shard_acquire_inodelk(frame, this, loc); ++ return 0; ++} + +- shard_link_block_inode(local, shard_block_num, inode, buf); ++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type); ++int shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +-done: +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- local->create_count = 0; +- local->post_mknod_handler(frame, this); +- } ++ local = frame->local; + ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; ++ } ++ shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ return 0; + } + +-int +-shard_common_resume_mknod(call_frame_t *frame, xlator_t *this, +- shard_post_mknod_fop_handler_t post_mknod_handler) +-{ +- int i = 0; +- int shard_idx_iter = 0; +- int last_block = 0; +- int ret = 0; +- int call_count = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- mode_t mode = 0; +- char *bname = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t ctx_tmp = { +- 0, +- }; +- shard_local_t *local = NULL; +- gf_boolean_t wind_failed = _gf_false; +- fd_t *fd = NULL; +- loc_t loc = { +- 0, +- }; +- dict_t *xattr_req = NULL; +- +- local = frame->local; +- priv = this->private; +- fd = local->fd; +- shard_idx_iter = local->first_block; +- last_block = local->last_block; +- call_count = local->call_count = local->create_count; +- local->post_mknod_handler = post_mknod_handler; ++void shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- SHARD_SET_ROOT_FS_ID(frame, local); ++ priv = this->private; ++ local = frame->local; + +- ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get inode " +- "ctx for %s", +- uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); ++ local->dot_shard_rm_loc.inode = ++ inode_find(this->itable, priv->dot_shard_rm_gfid); ++ if (!local->dot_shard_rm_loc.inode) { ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_pre_mkdir_rm_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ } else { ++ local->post_res_handler = shard_post_mkdir_rm_handler; ++ shard_refresh_internal_dir(frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ } ++} ++ ++int shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ loc_copy(&local->loc, loc); ++ local->xflag = xflag; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ local->block_size = block_size; ++ local->resolver_base_inode = loc->inode; ++ local->fop = GF_FOP_UNLINK; ++ if (!this->itable) ++ this->itable = (local->loc.inode)->table; ++ ++ local->resolve_not = _gf_true; ++ shard_begin_rm_resolution(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); ++ return 0; ++} + +- while (shard_idx_iter <= last_block) { +- if (local->inode_list[i]) { +- shard_idx_iter++; +- i++; +- continue; +- } ++int shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) { ++ shard_rename_cbk(frame, this); ++ return 0; ++} + +- if (wind_failed) { +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; + +- shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, +- sizeof(path)); +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++ local = frame->local; + +- bname = strrchr(path, '/') + 1; +- loc.inode = inode_new(this->itable); +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0 || !(loc.inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- "on %s, base file gfid = %s", +- bname, uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- loc_wipe(&loc); +- dict_unref(xattr_req); +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } ++ /* Set ctx->refresh to TRUE to force a lookup on disk when ++ * shard_lookup_base_file() is called next to refresh the hard link ++ * count in ctx. Note that this is applicable only to the case where ++ * the rename dst is already existent and sharded. ++ */ ++ if ((local->dst_block_size) && (!local->cleanup_required)) ++ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); ++ ++ local->prebuf = *buf; ++ local->preoldparent = *preoldparent; ++ local->postoldparent = *postoldparent; ++ local->prenewparent = *prenewparent; ++ local->postnewparent = *postnewparent; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); + +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, +- (void *)(long)shard_idx_iter, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->mknod, &loc, mode, +- ctx_tmp.stat.ia_rdev, 0, xattr_req); +- loc_wipe(&loc); +- dict_unref(xattr_req); +- +- next: +- shard_idx_iter++; +- i++; +- if (!--call_count) +- break; ++ if (local->dst_block_size) { ++ if (local->entrylk_frame) { ++ ret = shard_unlock_entrylk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } + } + +- return 0; ++ ret = shard_unlock_inodelk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ goto err; ++ } ++ if (local->cleanup_required) ++ shard_start_background_deletion(this); ++ } ++ ++ /* Now the base file of src, if sharded, is looked up to gather ia_size ++ * and ia_blocks.*/ ++ if (local->block_size) { ++ local->tmp_loc.inode = inode_new(this->itable); ++ gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); ++ shard_lookup_base_file(frame, this, &local->tmp_loc, ++ shard_post_rename_lookup_handler); ++ } else { ++ shard_rename_cbk(frame, this); ++ } ++ return 0; + err: +- /* +- * This block is for handling failure in shard_inode_ctx_get_all(). +- * Failures in the while-loop are handled within the loop. +- */ +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- post_mknod_handler(frame, this); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } + +-int +-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); +- +-int +-shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++int shard_post_lookup_dst_base_file_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (local->create_count) { +- shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); +- } else { +- shard_readv_do(frame, this); +- } ++ local = frame->local; + ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ /* Save dst base file attributes into postbuf so the information is not ++ * lost when it is overwritten after lookup on base file of src in ++ * shard_lookup_base_file_cbk(). ++ */ ++ local->postbuf = local->prebuf; ++ shard_rename_src_base_file(frame, this); ++ return 0; ++} ++ ++int shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ uint64_t dst_block_size = 0; ++ shard_local_t *local = NULL; ++ ++ if (IA_ISDIR(oldloc->inode->ia_type)) { ++ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); ++ return 0; ++ } ++ ++ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); ++ if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(oldloc->inode->gfid)); ++ goto err; ++ } ++ ++ if (newloc->inode) ++ ret = shard_inode_ctx_get_block_size(newloc->inode, this, &dst_block_size); ++ ++ /* The following stack_wind covers the case where: ++ * a. the src file is not sharded and dst doesn't exist, OR ++ * b. the src and dst both exist but are not sharded. ++ */ ++ if (((!block_size) && (!dst_block_size)) || ++ frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ loc_copy(&local->loc, oldloc); ++ loc_copy(&local->loc2, newloc); ++ local->resolver_base_inode = newloc->inode; ++ local->fop = GF_FOP_RENAME; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ local->block_size = block_size; ++ local->dst_block_size = dst_block_size; ++ if (!this->itable) ++ this->itable = (local->loc.inode)->table; ++ local->resolve_not = _gf_true; ++ ++ /* The following if-block covers the case where the dst file exists ++ * and is sharded. ++ */ ++ if (local->dst_block_size) { ++ shard_begin_rm_resolution(frame, this); ++ } else { ++ /* The following block covers the case where the dst either doesn't ++ * exist or is NOT sharded but the src is sharded. In this case, shard ++ * xlator would go ahead and rename src to dst. Once done, it would also ++ * lookup the base shard of src to get the ia_size and ia_blocks xattr ++ * values. ++ */ ++ shard_rename_src_base_file(frame, this); ++ } ++ return 0; ++ ++err: ++ shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, ++ struct iatt *stbuf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int ret = -1; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ if (op_ret == -1) ++ goto unwind; + +- if (!local->eexist_count) { +- shard_readv_do(frame, this); +- } else { +- local->call_count = local->eexist_count; +- shard_common_lookup_shards(frame, this, local->loc.inode, +- shard_post_lookup_shards_readv_handler); +- } +- return 0; ++ ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, ++ SHARD_ALL_MASK); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, ++ "Failed to set inode " ++ "ctx for %s", ++ uuid_utoa(inode->gfid)); ++ ++unwind: ++ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, ++ preparent, postparent, xdata); ++ return 0; + } + +-int +-shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (local->op_ret < 0) { +- if (local->op_errno != ENOENT) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } else { +- struct iovec vec = { +- 0, +- }; +- +- vec.iov_base = local->iobuf->ptr; +- vec.iov_len = local->total_size; +- local->op_ret = local->total_size; +- SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, +- &local->prebuf, local->iobref, NULL); +- return 0; +- } +- } ++ frame->local = local; ++ local->block_size = priv->block_size; + +- if (local->call_count) { +- shard_common_lookup_shards(frame, this, local->resolver_base_inode, +- shard_post_lookup_shards_readv_handler); +- } else { +- shard_readv_do(frame, this); +- } ++ if (!__is_gsyncd_on_shard_dir(frame, loc)) { ++ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ } + +- return 0; +-} ++ STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, ++ xdata); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { ++ /* To-Do: Handle open with O_TRUNC under locks */ ++ SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); ++ return 0; ++} ++ ++int shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ fd_t *fd, dict_t *xdata) { ++ STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); ++ return 0; ++} ++ ++int shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iovec *vector, ++ int32_t count, struct iatt *stbuf, struct iobref *iobref, ++ dict_t *xdata) { ++ int i = 0; ++ int call_count = 0; ++ void *address = NULL; ++ uint64_t block_num = 0; ++ off_t off = 0; ++ struct iovec vec = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ fd_t *anon_fd = cookie; ++ shard_inode_ctx_t *ctx = NULL; ++ ++ local = frame->local; ++ ++ /* If shard has already seen a failure here before, there is no point ++ * in aggregating subsequent reads, so just go to out. ++ */ ++ if (local->op_ret < 0) ++ goto out; ++ ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto out; ++ } ++ ++ if (local->op_ret >= 0) ++ local->op_ret += op_ret; + +-int +-shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- struct iobuf *iobuf = NULL; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; ++ shard_inode_ctx_get(anon_fd->inode, this, &ctx); ++ block_num = ctx->block_num; ++ ++ if (block_num == local->first_block) { ++ address = local->iobuf->ptr; ++ } else { ++ /* else ++ * address to start writing to = beginning of buffer + ++ * number of bytes until end of first block + ++ * + block_size times number of blocks ++ * between the current block and the first ++ */ ++ address = (char *)local->iobuf->ptr + ++ (local->block_size - (local->offset % local->block_size)) + ++ ((block_num - local->first_block - 1) * local->block_size); ++ } + +- priv = this->private; +- local = frame->local; ++ for (i = 0; i < count; i++) { ++ address = (char *)address + off; ++ memcpy(address, vector[i].iov_base, vector[i].iov_len); ++ off += vector[i].iov_len; ++ } + ++out: ++ if (anon_fd) ++ fd_unref(anon_fd); ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); + if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ vec.iov_base = local->iobuf->ptr; ++ vec.iov_len = local->total_size; ++ local->op_ret = local->total_size; ++ SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1, ++ &local->prebuf, local->iobref, local->xattr_rsp); ++ return 0; ++ } ++ } ++ ++ return 0; ++} ++ ++int shard_readv_do(call_frame_t *frame, xlator_t *this) { ++ int i = 0; ++ int call_count = 0; ++ int last_block = 0; ++ int cur_block = 0; ++ off_t orig_offset = 0; ++ off_t shard_offset = 0; ++ size_t read_size = 0; ++ size_t remaining_size = 0; ++ fd_t *fd = NULL; ++ fd_t *anon_fd = NULL; ++ shard_local_t *local = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ ++ local = frame->local; ++ fd = local->fd; ++ ++ orig_offset = local->offset; ++ cur_block = local->first_block; ++ last_block = local->last_block; ++ remaining_size = local->total_size; ++ local->call_count = call_count = local->num_blocks; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ if (fd->flags & O_DIRECT) ++ local->flags = O_DIRECT; ++ ++ while (cur_block <= last_block) { ++ if (wind_failed) { ++ shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, 0, ++ NULL, NULL, NULL); ++ goto next; ++ } ++ ++ shard_offset = orig_offset % local->block_size; ++ read_size = local->block_size - shard_offset; ++ if (read_size > remaining_size) ++ read_size = remaining_size; ++ ++ remaining_size -= read_size; ++ ++ if (cur_block == 0) { ++ anon_fd = fd_ref(fd); ++ } else { ++ anon_fd = fd_anonymous(local->inode_list[i]); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, NULL, ++ 0, NULL, NULL, NULL); ++ goto next; ++ } + } + +- if (local->offset >= local->prebuf.ia_size) { +- /* If the read is being performed past the end of the file, +- * unwind the FOP with 0 bytes read as status. +- */ +- struct iovec vec = { +- 0, +- }; +- +- iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); +- if (!iobuf) +- goto err; +- +- vec.iov_base = iobuf->ptr; +- vec.iov_len = 0; +- local->iobref = iobref_new(); +- iobref_add(local->iobref, iobuf); +- iobuf_unref(iobuf); +- +- SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, +- local->iobref, NULL); +- return 0; +- } ++ STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, anon_fd, read_size, ++ shard_offset, local->flags, local->xattr_req); ++ ++ orig_offset += read_size; ++ next: ++ cur_block++; ++ i++; ++ call_count--; ++ } ++ return 0; ++} + +- local->first_block = get_lowest_block(local->offset, local->block_size); ++int shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int shard_block_num = (long)cookie; ++ int call_count = 0; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (op_ret < 0) { ++ if (op_errno == EEXIST) { ++ LOCK(&frame->lock); ++ { local->eexist_count++; } ++ UNLOCK(&frame->lock); ++ } else { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } ++ gf_msg_debug(this->name, 0, "mknod of shard %d " ++ "failed: %s", ++ shard_block_num, strerror(op_errno)); ++ goto done; ++ } + +- local->total_size = local->req_size; ++ shard_link_block_inode(local, shard_block_num, inode, buf); + +- local->last_block = get_highest_block(local->offset, local->total_size, +- local->block_size); ++done: ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ local->create_count = 0; ++ local->post_mknod_handler(frame, this); ++ } ++ ++ return 0; ++} ++ ++int shard_common_resume_mknod( ++ call_frame_t *frame, xlator_t *this, ++ shard_post_mknod_fop_handler_t post_mknod_handler) { ++ int i = 0; ++ int shard_idx_iter = 0; ++ int last_block = 0; ++ int ret = 0; ++ int call_count = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ mode_t mode = 0; ++ char *bname = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t ctx_tmp = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ fd_t *fd = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ dict_t *xattr_req = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ fd = local->fd; ++ shard_idx_iter = local->first_block; ++ last_block = local->last_block; ++ call_count = local->call_count = local->create_count; ++ local->post_mknod_handler = post_mknod_handler; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get inode " ++ "ctx for %s", ++ uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); + +- local->num_blocks = local->last_block - local->first_block + 1; +- local->resolver_base_inode = local->loc.inode; ++ while (shard_idx_iter <= last_block) { ++ if (local->inode_list[i]) { ++ shard_idx_iter++; ++ i++; ++ continue; ++ } + +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- goto err; ++ if (wind_failed) { ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; ++ } + +- iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); +- if (!iobuf) +- goto err; ++ shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, ++ sizeof(path)); + +- local->iobref = iobref_new(); +- if (!local->iobref) { +- iobuf_unref(iobuf); +- goto err; ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; + } + +- if (iobref_add(local->iobref, iobuf) != 0) { +- iobuf_unref(iobuf); +- goto err; ++ bname = strrchr(path, '/') + 1; ++ loc.inode = inode_new(this->itable); ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0 || !(loc.inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ "on %s, base file gfid = %s", ++ bname, uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; + } + +- memset(iobuf->ptr, 0, local->total_size); +- iobuf_unref(iobuf); +- local->iobuf = iobuf; ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- ret = shard_init_internal_dir_loc(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret) +- goto err; +- shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_post_resolve_readv_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- return 0; ++ STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, ++ (void *)(long)shard_idx_iter, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mknod, &loc, mode, ++ ctx_tmp.stat.ia_rdev, 0, xattr_req); ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ ++ next: ++ shard_idx_iter++; ++ i++; ++ if (!--call_count) ++ break; ++ } ++ ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); +- return 0; ++ /* ++ * This block is for handling failure in shard_inode_ctx_get_all(). ++ * Failures in the while-loop are handled within the loop. ++ */ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ post_mknod_handler(frame, this); ++ return 0; + } + +-int +-shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, uint32_t flags, dict_t *xdata) +-{ +- int ret = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++int shard_post_lookup_shards_readv_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- /* block_size = 0 means that the file was created before +- * sharding was enabled on the volume. +- */ +- STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, +- xdata); +- return 0; +- } ++ local = frame->local; + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ if (local->create_count) { ++ shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); ++ } else { ++ shard_readv_do(frame, this); ++ } + +- frame->local = local; ++ return 0; ++} + +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- local->fd = fd_ref(fd); +- local->block_size = block_size; +- local->offset = offset; +- local->req_size = size; +- local->flags = flags; +- local->fop = GF_FOP_READ; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local = frame->local; + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_readv_handler); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ if (!local->eexist_count) { ++ shard_readv_do(frame, this); ++ } else { ++ local->call_count = local->eexist_count; ++ shard_common_lookup_shards(frame, this, local->loc.inode, ++ shard_post_lookup_shards_readv_handler); ++ } ++ return 0; + } + +-int +-shard_common_inode_write_post_update_size_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); ++ if (local->op_ret < 0) { ++ if (local->op_errno != ENOENT) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } else { +- shard_common_inode_write_success_unwind(local->fop, frame, +- local->written_size); ++ struct iovec vec = { ++ 0, ++ }; ++ ++ vec.iov_base = local->iobuf->ptr; ++ vec.iov_len = local->total_size; ++ local->op_ret = local->total_size; ++ SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, ++ &local->prebuf, local->iobref, NULL); ++ return 0; + } +- return 0; +-} ++ } + +-static gf_boolean_t +-shard_is_appending_write(shard_local_t *local) +-{ +- if (local->fop != GF_FOP_WRITE) +- return _gf_false; +- if (local->flags & O_APPEND) +- return _gf_true; +- if (local->fd->flags & O_APPEND) +- return _gf_true; +- return _gf_false; ++ if (local->call_count) { ++ shard_common_lookup_shards(frame, this, local->resolver_base_inode, ++ shard_post_lookup_shards_readv_handler); ++ } else { ++ shard_readv_do(frame, this); ++ } ++ ++ return 0; + } + +-int +-__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ struct iobuf *iobuf = NULL; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ if (local->offset >= local->prebuf.ia_size) { ++ /* If the read is being performed past the end of the file, ++ * unwind the FOP with 0 bytes read as status. ++ */ ++ struct iovec vec = { ++ 0, ++ }; + +- if (shard_is_appending_write(local)) { +- local->delta_size = local->total_size; +- } else if (local->offset + local->total_size > ctx->stat.ia_size) { +- local->delta_size = (local->offset + local->total_size) - +- ctx->stat.ia_size; +- } else { +- local->delta_size = 0; +- } +- ctx->stat.ia_size += (local->delta_size); +- local->postbuf = ctx->stat; ++ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); ++ if (!iobuf) ++ goto err; ++ ++ vec.iov_base = iobuf->ptr; ++ vec.iov_len = 0; ++ local->iobref = iobref_new(); ++ iobref_add(local->iobref, iobuf); ++ iobuf_unref(iobuf); + ++ SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, ++ local->iobref, NULL); + return 0; +-} ++ } + +-int +-shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; ++ local->first_block = get_lowest_block(local->offset, local->block_size); + +- LOCK(&inode->lock); +- { +- ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); +- } +- UNLOCK(&inode->lock); ++ local->total_size = local->req_size; + +- return ret; +-} ++ local->last_block = ++ get_highest_block(local->offset, local->total_size, local->block_size); + +-int +-shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, struct iatt *pre, +- struct iatt *post, dict_t *xdata) +-{ +- int call_count = 0; +- fd_t *anon_fd = cookie; +- shard_local_t *local = NULL; +- glusterfs_fop_t fop = 0; ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->resolver_base_inode = local->loc.inode; + +- local = frame->local; +- fop = local->fop; ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ goto err; + +- LOCK(&frame->lock); +- { +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } else { +- local->written_size += op_ret; +- GF_ATOMIC_ADD(local->delta_blocks, +- post->ia_blocks - pre->ia_blocks); +- local->delta_size += (post->ia_size - pre->ia_size); +- shard_inode_ctx_set(local->fd->inode, this, post, 0, +- SHARD_MASK_TIMES); +- if (local->fd->inode != anon_fd->inode) +- shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, +- anon_fd->inode); +- } +- } +- UNLOCK(&frame->lock); ++ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); ++ if (!iobuf) ++ goto err; + +- if (anon_fd) +- fd_unref(anon_fd); ++ local->iobref = iobref_new(); ++ if (!local->iobref) { ++ iobuf_unref(iobuf); ++ goto err; ++ } + +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (local->op_ret < 0) { +- shard_common_failure_unwind(fop, frame, local->op_ret, +- local->op_errno); +- } else { +- shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); +- local->hole_size = 0; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- shard_update_file_size( +- frame, this, local->fd, NULL, +- shard_common_inode_write_post_update_size_handler); +- } +- } ++ if (iobref_add(local->iobref, iobuf) != 0) { ++ iobuf_unref(iobuf); ++ goto err; ++ } + +- return 0; ++ memset(iobuf->ptr, 0, local->total_size); ++ iobuf_unref(iobuf); ++ local->iobuf = iobuf; ++ ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ ret = ++ shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret) ++ goto err; ++ shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_post_resolve_readv_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata) { ++ int ret = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ /* block_size = 0 means that the file was created before ++ * sharding was enabled on the volume. ++ */ ++ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ local->fd = fd_ref(fd); ++ local->block_size = block_size; ++ local->offset = offset; ++ local->req_size = size; ++ local->flags = flags; ++ local->fop = GF_FOP_READ; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_readv_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iovec *vec, int count, off_t shard_offset, +- size_t size) +-{ +- shard_local_t *local = NULL; ++int shard_common_inode_write_post_update_size_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- switch (local->fop) { +- case GF_FOP_WRITE: +- STACK_WIND_COOKIE( +- frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset, +- local->flags, local->iobref, local->xattr_req); +- break; +- case GF_FOP_FALLOCATE: +- STACK_WIND_COOKIE( +- frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fallocate, fd, local->flags, +- shard_offset, size, local->xattr_req); +- break; +- case GF_FOP_ZEROFILL: +- STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, +- FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->zerofill, fd, +- shard_offset, size, local->xattr_req); +- break; +- case GF_FOP_DISCARD: +- STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, +- FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->discard, fd, +- shard_offset, size, local->xattr_req); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", local->fop); +- break; +- } +- return 0; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ shard_common_inode_write_success_unwind(local->fop, frame, ++ local->written_size); ++ } ++ return 0; + } + +-int +-shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) +-{ +- int i = 0; +- int count = 0; +- int call_count = 0; +- int last_block = 0; +- uint32_t cur_block = 0; +- fd_t *fd = NULL; +- fd_t *anon_fd = NULL; +- shard_local_t *local = NULL; +- struct iovec *vec = NULL; +- gf_boolean_t wind_failed = _gf_false; +- gf_boolean_t odirect = _gf_false; +- off_t orig_offset = 0; +- off_t shard_offset = 0; +- off_t vec_offset = 0; +- size_t remaining_size = 0; +- size_t shard_write_size = 0; +- +- local = frame->local; +- fd = local->fd; +- +- orig_offset = local->offset; +- remaining_size = local->total_size; +- cur_block = local->first_block; +- local->call_count = call_count = local->num_blocks; +- last_block = local->last_block; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC +- " into " +- "dict: %s", +- uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- local->call_count = 1; +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, +- ENOMEM, NULL, NULL, NULL); +- return 0; +- } ++static gf_boolean_t shard_is_appending_write(shard_local_t *local) { ++ if (local->fop != GF_FOP_WRITE) ++ return _gf_false; ++ if (local->flags & O_APPEND) ++ return _gf_true; ++ if (local->fd->flags & O_APPEND) ++ return _gf_true; ++ return _gf_false; ++} + +- if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) +- odirect = _gf_true; ++int __shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- while (cur_block <= last_block) { +- if (wind_failed) { +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- shard_offset = orig_offset % local->block_size; +- shard_write_size = local->block_size - shard_offset; +- if (shard_write_size > remaining_size) +- shard_write_size = remaining_size; +- +- remaining_size -= shard_write_size; +- +- if (local->fop == GF_FOP_WRITE) { +- count = iov_subset(local->vector, local->count, vec_offset, +- vec_offset + shard_write_size, NULL); +- +- vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); +- if (!vec) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- GF_FREE(vec); +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, +- -1, ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- count = iov_subset(local->vector, local->count, vec_offset, +- vec_offset + shard_write_size, vec); +- } ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- if (cur_block == 0) { +- anon_fd = fd_ref(fd); +- } else { +- anon_fd = fd_anonymous(local->inode_list[i]); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- GF_FREE(vec); +- shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, +- this, -1, ENOMEM, NULL, NULL, +- NULL); +- goto next; +- } +- +- if (local->fop == GF_FOP_WRITE) { +- if (odirect) +- local->flags = O_DIRECT; +- else +- local->flags = GF_ANON_FD_FLAGS; +- } +- } ++ if (shard_is_appending_write(local)) { ++ local->delta_size = local->total_size; ++ } else if (local->offset + local->total_size > ctx->stat.ia_size) { ++ local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size; ++ } else { ++ local->delta_size = 0; ++ } ++ ctx->stat.ia_size += (local->delta_size); ++ local->postbuf = ctx->stat; + +- shard_common_inode_write_wind(frame, this, anon_fd, vec, count, +- shard_offset, shard_write_size); +- if (vec) +- vec_offset += shard_write_size; +- orig_offset += shard_write_size; +- GF_FREE(vec); +- vec = NULL; +- next: +- cur_block++; +- i++; +- call_count--; +- } +- return 0; ++ return 0; + } + +-int +-shard_common_inode_write_post_mknod_handler(call_frame_t *frame, +- xlator_t *this); ++int shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ ++ LOCK(&inode->lock); ++ { ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); } ++ UNLOCK(&inode->lock); + +-int +-shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ return ret; ++} + +- local = frame->local; ++int shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *pre, ++ struct iatt *post, dict_t *xdata) { ++ int call_count = 0; ++ fd_t *anon_fd = cookie; ++ shard_local_t *local = NULL; ++ glusterfs_fop_t fop = 0; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ local = frame->local; ++ fop = local->fop; + +- if (local->create_count) { +- shard_common_resume_mknod(frame, this, +- shard_common_inode_write_post_mknod_handler); ++ LOCK(&frame->lock); ++ { ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; + } else { +- shard_common_inode_write_do(frame, this); ++ local->written_size += op_ret; ++ GF_ATOMIC_ADD(local->delta_blocks, post->ia_blocks - pre->ia_blocks); ++ local->delta_size += (post->ia_size - pre->ia_size); ++ shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES); ++ if (local->fd->inode != anon_fd->inode) ++ shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, ++ anon_fd->inode); ++ } ++ } ++ UNLOCK(&frame->lock); ++ ++ if (anon_fd) ++ fd_unref(anon_fd); ++ ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(fop, frame, local->op_ret, local->op_errno); ++ } else { ++ shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); ++ local->hole_size = 0; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ shard_update_file_size(frame, this, local->fd, NULL, ++ shard_common_inode_write_post_update_size_handler); + } ++ } + +- return 0; ++ return 0; + } + +-int +-shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iovec *vec, int count, ++ off_t shard_offset, size_t size) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ switch (local->fop) { ++ case GF_FOP_WRITE: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, ++ vec, count, shard_offset, local->flags, local->iobref, ++ local->xattr_req); ++ break; ++ case GF_FOP_FALLOCATE: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd, ++ local->flags, shard_offset, size, local->xattr_req); ++ break; ++ case GF_FOP_ZEROFILL: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, ++ shard_offset, size, local->xattr_req); ++ break; ++ case GF_FOP_DISCARD: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, ++ shard_offset, size, local->xattr_req); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", local->fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) { ++ int i = 0; ++ int count = 0; ++ int call_count = 0; ++ int last_block = 0; ++ uint32_t cur_block = 0; ++ fd_t *fd = NULL; ++ fd_t *anon_fd = NULL; ++ shard_local_t *local = NULL; ++ struct iovec *vec = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ gf_boolean_t odirect = _gf_false; ++ off_t orig_offset = 0; ++ off_t shard_offset = 0; ++ off_t vec_offset = 0; ++ size_t remaining_size = 0; ++ size_t shard_write_size = 0; ++ ++ local = frame->local; ++ fd = local->fd; ++ ++ orig_offset = local->offset; ++ remaining_size = local->total_size; ++ cur_block = local->first_block; ++ local->call_count = call_count = local->num_blocks; ++ last_block = local->last_block; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC " into " ++ "dict: %s", ++ uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ local->call_count = 1; ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ return 0; ++ } + +- if (!local->eexist_count) { +- shard_common_inode_write_do(frame, this); +- } else { +- local->call_count = local->eexist_count; +- shard_common_lookup_shards( +- frame, this, local->loc.inode, +- shard_common_inode_write_post_lookup_shards_handler); ++ if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) ++ odirect = _gf_true; ++ ++ while (cur_block <= last_block) { ++ if (wind_failed) { ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } + +- return 0; +-} ++ shard_offset = orig_offset % local->block_size; ++ shard_write_size = local->block_size - shard_offset; ++ if (shard_write_size > remaining_size) ++ shard_write_size = remaining_size; + +-int +-shard_common_inode_write_post_resolve_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ remaining_size -= shard_write_size; + +- local = frame->local; ++ if (local->fop == GF_FOP_WRITE) { ++ count = iov_subset(local->vector, local->count, vec_offset, ++ vec_offset + shard_write_size, NULL); + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); ++ if (!vec) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ GF_FREE(vec); ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; ++ } ++ count = iov_subset(local->vector, local->count, vec_offset, ++ vec_offset + shard_write_size, vec); + } + +- if (local->call_count) { +- shard_common_lookup_shards( +- frame, this, local->resolver_base_inode, +- shard_common_inode_write_post_lookup_shards_handler); ++ if (cur_block == 0) { ++ anon_fd = fd_ref(fd); + } else { +- shard_common_inode_write_do(frame, this); +- } ++ anon_fd = fd_anonymous(local->inode_list[i]); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ GF_FREE(vec); ++ shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; ++ } + +- return 0; ++ if (local->fop == GF_FOP_WRITE) { ++ if (odirect) ++ local->flags = O_DIRECT; ++ else ++ local->flags = GF_ANON_FD_FLAGS; ++ } ++ } ++ ++ shard_common_inode_write_wind(frame, this, anon_fd, vec, count, ++ shard_offset, shard_write_size); ++ if (vec) ++ vec_offset += shard_write_size; ++ orig_offset += shard_write_size; ++ GF_FREE(vec); ++ vec = NULL; ++ next: ++ cur_block++; ++ i++; ++ call_count--; ++ } ++ return 0; + } + +-int +-shard_common_inode_write_post_lookup_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- shard_priv_t *priv = this->private; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- local->postbuf = local->prebuf; +- +- /*Adjust offset to EOF so that correct shard is chosen for append*/ +- if (shard_is_appending_write(local)) +- local->offset = local->prebuf.ia_size; +- +- local->first_block = get_lowest_block(local->offset, local->block_size); +- local->last_block = get_highest_block(local->offset, local->total_size, +- local->block_size); +- local->num_blocks = local->last_block - local->first_block + 1; +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) { +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +- } ++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, ++ xlator_t *this); + +- gf_msg_trace(this->name, 0, +- "%s: gfid=%s first_block=%" PRIu32 +- " " +- "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64 +- " total_size=%zu flags=%" PRId32 "", +- gf_fop_list[local->fop], +- uuid_utoa(local->resolver_base_inode->gfid), +- local->first_block, local->last_block, local->num_blocks, +- local->offset, local->total_size, local->flags); ++int shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ local = frame->local; + +- if (!local->dot_shard_loc.inode) { +- /*change handler*/ +- shard_mkdir_internal_dir(frame, this, +- shard_common_inode_write_post_resolve_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- /*change handler*/ +- local->post_res_handler = shard_common_inode_write_post_resolve_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; +-} +- +-int +-shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- inode_t *link_inode = NULL; +- shard_local_t *local = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ } + +- local = frame->local; ++ if (local->create_count) { ++ shard_common_resume_mknod(frame, this, ++ shard_common_inode_write_post_mknod_handler); ++ } else { ++ shard_common_inode_write_do(frame, this); ++ } + +- SHARD_UNSET_ROOT_FS_ID(frame, local); ++ return 0; ++} + +- if (op_ret == -1) { +- if (op_errno != EEXIST) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } else { +- gf_msg_debug(this->name, 0, +- "mkdir on %s failed " +- "with EEXIST. Attempting lookup now", +- shard_internal_dir_string(type)); +- shard_lookup_internal_dir(frame, this, local->post_res_handler, +- type); +- return 0; +- } +- } ++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- link_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- if (link_inode != inode) { +- shard_refresh_internal_dir(frame, this, type); +- } else { +- shard_inode_ctx_mark_dir_refreshed(link_inode, this); +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- } +- return 0; +-unwind: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; +-} ++ local = frame->local; + +-int +-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- dict_t *xattr_req = NULL; +- uuid_t *gfid = NULL; +- loc_t *loc = NULL; +- gf_boolean_t free_gfid = _gf_true; +- +- local = frame->local; +- priv = this->private; +- +- local->post_res_handler = handler; +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); +- if (!gfid) +- goto err; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(*gfid, priv->dot_shard_gfid); +- loc = &local->dot_shard_loc; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); +- loc = &local->dot_shard_rm_loc; +- break; +- default: +- bzero(*gfid, sizeof(uuid_t)); +- break; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- xattr_req = dict_new(); +- if (!xattr_req) +- goto err; ++ if (!local->eexist_count) { ++ shard_common_inode_write_do(frame, this); ++ } else { ++ local->call_count = local->eexist_count; ++ shard_common_lookup_shards( ++ frame, this, local->loc.inode, ++ shard_common_inode_write_post_lookup_shards_handler); ++ } + +- ret = shard_init_internal_dir_loc(this, local, type); +- if (ret) +- goto err; ++ return 0; ++} + +- ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set gfid-req for %s", +- shard_internal_dir_string(type)); +- goto err; +- } else { +- free_gfid = _gf_false; +- } ++int shard_common_inode_write_post_resolve_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- SHARD_SET_ROOT_FS_ID(frame, local); ++ local = frame->local; + +- STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, +- 0755, 0, xattr_req); +- dict_unref(xattr_req); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } + +-err: +- if (xattr_req) +- dict_unref(xattr_req); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- if (free_gfid) +- GF_FREE(gfid); +- handler(frame, this); +- return 0; +-} ++ if (local->call_count) { ++ shard_common_lookup_shards( ++ frame, this, local->resolver_base_inode, ++ shard_common_inode_write_post_lookup_shards_handler); ++ } else { ++ shard_common_inode_write_do(frame, this); ++ } + +-int +-shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- /* To-Do: Wind flush on all shards of the file */ +- SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); +- return 0; ++ return 0; + } + +-int +-shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +-{ +- STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->flush, fd, xdata); ++int shard_common_inode_write_post_lookup_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = frame->local; ++ shard_priv_t *priv = this->private; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; +-} ++ } + +-int +-__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++ local->postbuf = local->prebuf; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ /*Adjust offset to EOF so that correct shard is chosen for append*/ ++ if (shard_is_appending_write(local)) ++ local->offset = local->prebuf.ia_size; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ local->first_block = get_lowest_block(local->offset, local->block_size); ++ local->last_block = ++ get_highest_block(local->offset, local->total_size, local->block_size); ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) { ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } + +- local->postbuf.ia_ctime = ctx->stat.ia_ctime; +- local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; +- local->postbuf.ia_atime = ctx->stat.ia_atime; +- local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; +- local->postbuf.ia_mtime = ctx->stat.ia_mtime; +- local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; ++ gf_msg_trace( ++ this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " " ++ "last_block=%" PRIu32 " num_blocks=%" PRIu32 ++ " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", ++ gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), ++ local->first_block, local->last_block, local->num_blocks, local->offset, ++ local->total_size, local->flags); + +- return 0; +-} ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + +-int +-shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = 0; ++ if (!local->dot_shard_loc.inode) { ++ /*change handler*/ ++ shard_mkdir_internal_dir(frame, this, ++ shard_common_inode_write_post_resolve_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ /*change handler*/ ++ local->post_res_handler = shard_common_inode_write_post_resolve_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; ++} + +- LOCK(&inode->lock); +- { +- ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); +- } +- UNLOCK(&inode->lock); ++int shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ inode_t *link_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ ++ local = frame->local; ++ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ ++ if (op_ret == -1) { ++ if (op_errno != EEXIST) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } else { ++ gf_msg_debug(this->name, 0, "mkdir on %s failed " ++ "with EEXIST. Attempting lookup now", ++ shard_internal_dir_string(type)); ++ shard_lookup_internal_dir(frame, this, local->post_res_handler, type); ++ return 0; ++ } ++ } ++ ++ link_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ if (link_inode != inode) { ++ shard_refresh_internal_dir(frame, this, type); ++ } else { ++ shard_inode_ctx_mark_dir_refreshed(link_inode, this); ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ } ++ return 0; ++unwind: ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; ++} ++ ++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ dict_t *xattr_req = NULL; ++ uuid_t *gfid = NULL; ++ loc_t *loc = NULL; ++ gf_boolean_t free_gfid = _gf_true; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ local->post_res_handler = handler; ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); ++ if (!gfid) ++ goto err; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(*gfid, priv->dot_shard_gfid); ++ loc = &local->dot_shard_loc; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); ++ loc = &local->dot_shard_rm_loc; ++ break; ++ default: ++ bzero(*gfid, sizeof(uuid_t)); ++ break; ++ } ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) ++ goto err; ++ ++ ret = shard_init_internal_dir_loc(this, local, type); ++ if (ret) ++ goto err; ++ ++ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set gfid-req for %s", shard_internal_dir_string(type)); ++ goto err; ++ } else { ++ free_gfid = _gf_false; ++ } ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, ++ 0755, 0, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + +- return ret; ++err: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ if (free_gfid) ++ GF_FREE(gfid); ++ handler(frame, this); ++ return 0; + } + +-int +-shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +- struct iatt *postbuf, dict_t *xdata) +-{ +- int call_count = 0; +- uint64_t fsync_count = 0; +- fd_t *anon_fd = cookie; +- shard_local_t *local = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *base_ictx = NULL; +- inode_t *base_inode = NULL; +- gf_boolean_t unref_shard_inode = _gf_false; ++int shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ /* To-Do: Wind flush on all shards of the file */ ++ SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); ++ return 0; ++} + +- local = frame->local; +- base_inode = local->fd->inode; ++int shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ++ STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->flush, fd, xdata); ++ return 0; ++} + +- if (local->op_ret < 0) +- goto out; ++int __shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- LOCK(&frame->lock); +- { +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- UNLOCK(&frame->lock); +- goto out; +- } +- shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, +- SHARD_MASK_TIMES); +- } +- UNLOCK(&frame->lock); +- fd_ctx_get(anon_fd, this, &fsync_count); +-out: +- if (anon_fd && (base_inode != anon_fd->inode)) { +- LOCK(&base_inode->lock); +- LOCK(&anon_fd->inode->lock); +- { +- __shard_inode_ctx_get(anon_fd->inode, this, &ctx); +- __shard_inode_ctx_get(base_inode, this, &base_ictx); +- if (op_ret == 0) +- ctx->fsync_needed -= fsync_count; +- GF_ASSERT(ctx->fsync_needed >= 0); +- if (ctx->fsync_needed != 0) { +- list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); +- base_ictx->fsync_count++; +- } else { +- unref_shard_inode = _gf_true; +- } +- } +- UNLOCK(&anon_fd->inode->lock); +- UNLOCK(&base_inode->lock); +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- if (unref_shard_inode) +- inode_unref(anon_fd->inode); +- if (anon_fd) +- fd_unref(anon_fd); ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- call_count = shard_call_count_return(frame); +- if (call_count != 0) +- return 0; ++ local->postbuf.ia_ctime = ctx->stat.ia_ctime; ++ local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; ++ local->postbuf.ia_atime = ctx->stat.ia_atime; ++ local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; ++ local->postbuf.ia_mtime = ctx->stat.ia_mtime; ++ local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, +- local->op_errno); +- } else { +- shard_get_timestamps_from_inode_ctx(local, base_inode, this); +- SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } +- return 0; ++ return 0; + } + +-int +-shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- int call_count = 0; +- int fsync_count = 0; +- fd_t *anon_fd = NULL; +- inode_t *base_inode = NULL; +- shard_local_t *local = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *iter = NULL; +- struct list_head copy = { +- 0, +- }; +- shard_inode_ctx_t *tmp = NULL; ++int shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = 0; + +- local = frame->local; +- base_inode = local->fd->inode; +- local->postbuf = local->prebuf; +- INIT_LIST_HEAD(©); ++ LOCK(&inode->lock); ++ { ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); } ++ UNLOCK(&inode->lock); + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ return ret; ++} + ++int shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *prebuf, struct iatt *postbuf, ++ dict_t *xdata) { ++ int call_count = 0; ++ uint64_t fsync_count = 0; ++ fd_t *anon_fd = cookie; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *base_ictx = NULL; ++ inode_t *base_inode = NULL; ++ gf_boolean_t unref_shard_inode = _gf_false; ++ ++ local = frame->local; ++ base_inode = local->fd->inode; ++ ++ if (local->op_ret < 0) ++ goto out; ++ ++ LOCK(&frame->lock); ++ { ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ UNLOCK(&frame->lock); ++ goto out; ++ } ++ shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, SHARD_MASK_TIMES); ++ } ++ UNLOCK(&frame->lock); ++ fd_ctx_get(anon_fd, this, &fsync_count); ++out: ++ if (anon_fd && (base_inode != anon_fd->inode)) { + LOCK(&base_inode->lock); ++ LOCK(&anon_fd->inode->lock); + { +- __shard_inode_ctx_get(base_inode, this, &ctx); +- list_splice_init(&ctx->to_fsync_list, ©); +- call_count = ctx->fsync_count; +- ctx->fsync_count = 0; +- } ++ __shard_inode_ctx_get(anon_fd->inode, this, &ctx); ++ __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ if (op_ret == 0) ++ ctx->fsync_needed -= fsync_count; ++ GF_ASSERT(ctx->fsync_needed >= 0); ++ if (ctx->fsync_needed != 0) { ++ list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); ++ base_ictx->fsync_count++; ++ } else { ++ unref_shard_inode = _gf_true; ++ } ++ } ++ UNLOCK(&anon_fd->inode->lock); + UNLOCK(&base_inode->lock); ++ } ++ ++ if (unref_shard_inode) ++ inode_unref(anon_fd->inode); ++ if (anon_fd) ++ fd_unref(anon_fd); ++ ++ call_count = shard_call_count_return(frame); ++ if (call_count != 0) ++ return 0; + +- local->call_count = ++call_count; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ shard_get_timestamps_from_inode_ctx(local, base_inode, this); ++ SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } ++ return 0; ++} ++ ++int shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ int call_count = 0; ++ int fsync_count = 0; ++ fd_t *anon_fd = NULL; ++ inode_t *base_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *iter = NULL; ++ struct list_head copy = { ++ 0, ++ }; ++ shard_inode_ctx_t *tmp = NULL; ++ ++ local = frame->local; ++ base_inode = local->fd->inode; ++ local->postbuf = local->prebuf; ++ INIT_LIST_HEAD(©); ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ LOCK(&base_inode->lock); ++ { ++ __shard_inode_ctx_get(base_inode, this, &ctx); ++ list_splice_init(&ctx->to_fsync_list, ©); ++ call_count = ctx->fsync_count; ++ ctx->fsync_count = 0; ++ } ++ UNLOCK(&base_inode->lock); ++ ++ local->call_count = ++call_count; ++ ++ /* Send fsync() on the base shard first */ ++ anon_fd = fd_ref(local->fd); ++ STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, ++ local->xattr_req); ++ call_count--; ++ anon_fd = NULL; ++ ++ list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) { ++ list_del_init(&iter->to_fsync_list); ++ fsync_count = 0; ++ shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); ++ GF_ASSERT(fsync_count > 0); ++ anon_fd = fd_anonymous(iter->inode); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create " ++ "anon fd to fsync shard"); ++ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ continue; ++ } + +- /* Send fsync() on the base shard first */ +- anon_fd = fd_ref(local->fd); ++ ret = fd_ctx_set(anon_fd, this, fsync_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, ++ "Failed to set fd " ++ "ctx for shard inode gfid=%s", ++ uuid_utoa(iter->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ continue; ++ } + STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, + local->xattr_req); + call_count--; +- anon_fd = NULL; +- +- list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) +- { +- list_del_init(&iter->to_fsync_list); +- fsync_count = 0; +- shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); +- GF_ASSERT(fsync_count > 0); +- anon_fd = fd_anonymous(iter->inode); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, +- SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create " +- "anon fd to fsync shard"); +- shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, NULL, NULL); +- continue; +- } +- +- ret = fd_ctx_set(anon_fd, this, fsync_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, +- "Failed to set fd " +- "ctx for shard inode gfid=%s", +- uuid_utoa(iter->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, NULL, NULL); +- continue; +- } +- STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, +- anon_fd, local->datasync, local->xattr_req); +- call_count--; +- } ++ } + +- return 0; ++ return 0; + } + +-int +-shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, +- dict_t *xdata) +-{ +- int ret = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, ++ dict_t *xdata) { ++ int ret = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); +- return 0; +- } ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); ++ return 0; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- frame->local = local; ++ frame->local = local; + +- local->fd = fd_ref(fd); +- local->fop = GF_FOP_FSYNC; +- local->datasync = datasync; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local->fd = fd_ref(fd); ++ local->fop = GF_FOP_FSYNC; ++ local->datasync = datasync; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_fsync_handler); +- return 0; ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_fsync_handler); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, gf_dirent_t *orig_entries, +- dict_t *xdata) +-{ +- gf_dirent_t *entry = NULL; +- gf_dirent_t *tmp = NULL; +- shard_local_t *local = NULL; ++int shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, ++ gf_dirent_t *orig_entries, dict_t *xdata) { ++ gf_dirent_t *entry = NULL; ++ gf_dirent_t *tmp = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret < 0) +- goto unwind; ++ if (op_ret < 0) ++ goto unwind; + +- list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) +- { +- list_del_init(&entry->list); +- list_add_tail(&entry->list, &local->entries_head.list); ++ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { ++ list_del_init(&entry->list); ++ list_add_tail(&entry->list, &local->entries_head.list); + +- if (!entry->dict) +- continue; ++ if (!entry->dict) ++ continue; + +- if (IA_ISDIR(entry->d_stat.ia_type)) +- continue; ++ if (IA_ISDIR(entry->d_stat.ia_type)) ++ continue; + +- if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) +- shard_modify_size_and_block_count(&entry->d_stat, entry->dict); +- if (!entry->inode) +- continue; ++ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) ++ shard_modify_size_and_block_count(&entry->d_stat, entry->dict); ++ if (!entry->inode) ++ continue; + +- shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); +- } +- local->op_ret += op_ret; ++ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); ++ } ++ local->op_ret += op_ret; + + unwind: +- if (local->fop == GF_FOP_READDIR) +- SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, +- &local->entries_head, xdata); +- else +- SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, +- &local->entries_head, xdata); +- return 0; ++ if (local->fop == GF_FOP_READDIR) ++ SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, ++ &local->entries_head, xdata); ++ else ++ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ return 0; + } + +-int32_t +-shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries, +- dict_t *xdata) +-{ +- fd_t *fd = NULL; +- gf_dirent_t *entry = NULL; +- gf_dirent_t *tmp = NULL; +- shard_local_t *local = NULL; +- gf_boolean_t last_entry = _gf_false; ++int32_t shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ gf_dirent_t *orig_entries, dict_t *xdata) { ++ fd_t *fd = NULL; ++ gf_dirent_t *entry = NULL; ++ gf_dirent_t *tmp = NULL; ++ shard_local_t *local = NULL; ++ gf_boolean_t last_entry = _gf_false; + +- local = frame->local; +- fd = local->fd; ++ local = frame->local; ++ fd = local->fd; + +- if (op_ret < 0) +- goto unwind; ++ if (op_ret < 0) ++ goto unwind; + +- list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) +- { +- if (last_entry) +- last_entry = _gf_false; +- +- if (__is_root_gfid(fd->inode->gfid) && +- !(strcmp(entry->d_name, GF_SHARD_DIR))) { +- local->offset = entry->d_off; +- op_ret--; +- last_entry = _gf_true; +- continue; +- } ++ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { ++ if (last_entry) ++ last_entry = _gf_false; + +- list_del_init(&entry->list); +- list_add_tail(&entry->list, &local->entries_head.list); ++ if (__is_root_gfid(fd->inode->gfid) && ++ !(strcmp(entry->d_name, GF_SHARD_DIR))) { ++ local->offset = entry->d_off; ++ op_ret--; ++ last_entry = _gf_true; ++ continue; ++ } + +- if (!entry->dict) +- continue; ++ list_del_init(&entry->list); ++ list_add_tail(&entry->list, &local->entries_head.list); + +- if (IA_ISDIR(entry->d_stat.ia_type)) +- continue; ++ if (!entry->dict) ++ continue; + +- if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && +- frame->root->pid != GF_CLIENT_PID_GSYNCD) +- shard_modify_size_and_block_count(&entry->d_stat, entry->dict); ++ if (IA_ISDIR(entry->d_stat.ia_type)) ++ continue; + +- if (!entry->inode) +- continue; ++ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && ++ frame->root->pid != GF_CLIENT_PID_GSYNCD) ++ shard_modify_size_and_block_count(&entry->d_stat, entry->dict); + +- shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); +- } ++ if (!entry->inode) ++ continue; + +- local->op_ret = op_ret; ++ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); ++ } + +- if (last_entry) { +- if (local->fop == GF_FOP_READDIR) +- STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir, +- local->fd, local->readdir_size, local->offset, +- local->xattr_req); +- else +- STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, +- local->fd, local->readdir_size, local->offset, +- local->xattr_req); +- return 0; +- } ++ local->op_ret = op_ret; + +-unwind: ++ if (last_entry) { + if (local->fop == GF_FOP_READDIR) +- SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, +- &local->entries_head, xdata); ++ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdir, local->fd, ++ local->readdir_size, local->offset, local->xattr_req); + else +- SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, +- &local->entries_head, xdata); ++ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdirp, local->fd, ++ local->readdir_size, local->offset, local->xattr_req); + return 0; +-} ++ } + +-int +-shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, int whichop, dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = mem_get0(this->local_pool); +- if (!local) { +- goto err; ++unwind: ++ if (local->fop == GF_FOP_READDIR) ++ SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ else ++ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ return 0; ++} ++ ++int shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, int whichop, dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) { ++ goto err; ++ } ++ ++ frame->local = local; ++ ++ local->fd = fd_ref(fd); ++ local->fop = whichop; ++ local->readdir_size = size; ++ INIT_LIST_HEAD(&local->entries_head.list); ++ local->list_inited = _gf_true; ++ ++ if (whichop == GF_FOP_READDIR) { ++ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); ++ } else { ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); ++ if (ret) { ++ gf_log(this->name, GF_LOG_WARNING, ++ "Failed to set " ++ "dict value: key:%s, directory gfid=%s", ++ GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); ++ goto err; + } + +- frame->local = local; +- +- local->fd = fd_ref(fd); +- local->fop = whichop; +- local->readdir_size = size; +- INIT_LIST_HEAD(&local->entries_head.list); +- local->list_inited = _gf_true; +- +- if (whichop == GF_FOP_READDIR) { +- STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); +- } else { +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_log(this->name, GF_LOG_WARNING, +- "Failed to set " +- "dict value: key:%s, directory gfid=%s", +- GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); +- goto err; +- } +- +- STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readdirp, fd, size, offset, +- local->xattr_req); +- } ++ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, ++ local->xattr_req); ++ } + +- return 0; ++ return 0; + + err: +- STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); +- return 0; ++ STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); ++ return 0; + } + +-int32_t +-shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, dict_t *xdata) +-{ +- shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); +- return 0; ++int32_t shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ size_t size, off_t offset, dict_t *xdata) { ++ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); ++ return 0; + } + +-int32_t +-shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, dict_t *xdata) +-{ +- shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); +- return 0; ++int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ size_t size, off_t offset, dict_t *xdata) { ++ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); ++ return 0; + } + +-int32_t +-shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); ++ } + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); ++ } + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- if (op_ret < 0) +- goto unwind; ++int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ if (op_ret < 0) ++ goto unwind; + +- if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); ++ } + + unwind: +- SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); +- return 0; ++ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); ++ return 0; + } + +-int32_t +-shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, +- dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && +- (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { +- op_errno = ENODATA; +- goto out; +- } ++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && ++ (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { ++ op_errno = ENODATA; ++ goto out; ++ } + +- STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); +- return 0; ++ STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- if (op_ret < 0) +- goto unwind; ++int32_t shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ if (op_ret < 0) ++ goto unwind; + +- if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); ++ } + + unwind: +- SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); +- return 0; ++ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); ++ return 0; + } + +-int32_t +-shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && +- (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { +- op_errno = ENODATA; +- goto out; +- } ++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && ++ (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { ++ op_errno = ENODATA; ++ goto out; ++ } + +- STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); +- return 0; ++ STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, +- int32_t flags, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, ++ fd, dict, flags, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, +- int32_t flags, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, +- loc, dict, flags, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, ++ loc, dict, flags, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->fop == GF_FOP_SETATTR) { +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, +- SHARD_LOOKUP_MASK); +- SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } else if (local->fop == GF_FOP_FSETATTR) { +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, +- SHARD_LOOKUP_MASK); +- SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } +- +- return 0; +-} ++int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +-int +-shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +- struct iatt *postbuf, dict_t *xdata) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ local = frame->local; + +- local->prebuf = *prebuf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; +- } +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- local->postbuf = *postbuf; +- local->postbuf.ia_size = local->prebuf.ia_size; +- local->postbuf.ia_blocks = local->prebuf.ia_blocks; ++ if (local->fop == GF_FOP_SETATTR) { ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, ++ SHARD_LOOKUP_MASK); ++ SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } else if (local->fop == GF_FOP_FSETATTR) { ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, ++ SHARD_LOOKUP_MASK); ++ SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } + +-unwind: +- local->handler(frame, this); +- return 0; ++ return 0; + } + +-int +-shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- struct iatt *stbuf, int32_t valid, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { +- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); +- return 0; +- } +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++int shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *prebuf, struct iatt *postbuf, ++ dict_t *xdata) { ++ shard_local_t *local = NULL; + +- frame->local = local; ++ local = frame->local; + +- local->handler = shard_post_setattr_handler; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_SETATTR; +- loc_copy(&local->loc, loc); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, +- local, err); ++ local->prebuf = *prebuf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ local->postbuf = *postbuf; ++ local->postbuf.ia_size = local->prebuf.ia_size; ++ local->postbuf.ia_blocks = local->prebuf.ia_blocks; + +- STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, +- local->xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); +- return 0; ++unwind: ++ local->handler(frame, this); ++ return 0; + } + +-int +-shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iatt *stbuf, int32_t valid, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { +- STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); +- return 0; +- } ++int shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ struct iatt *stbuf, int32_t valid, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { ++ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); +- return 0; +- } ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- frame->local = local; ++ frame->local = local; + +- local->handler = shard_post_setattr_handler; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_FSETATTR; +- local->fd = fd_ref(fd); ++ local->handler = shard_post_setattr_handler; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_SETATTR; ++ loc_copy(&local->loc, loc); + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, ++ local, err); + +- STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, +- local->xattr_req); +- return 0; ++ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, ++ local->xattr_req); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, +- glusterfs_fop_t fop, fd_t *fd, +- struct iovec *vector, int32_t count, +- off_t offset, uint32_t flags, size_t len, +- struct iobref *iobref, dict_t *xdata) +-{ +- int ret = 0; +- int i = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iatt *stbuf, int32_t valid, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto out; +- } ++ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { ++ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- /* block_size = 0 means that the file was created before +- * sharding was enabled on the volume. +- */ +- switch (fop) { +- case GF_FOP_WRITE: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->writev, fd, vector, +- count, offset, flags, iobref, xdata); +- break; +- case GF_FOP_FALLOCATE: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fallocate, fd, flags, +- offset, len, xdata); +- break; +- case GF_FOP_ZEROFILL: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->zerofill, fd, offset, +- len, xdata); +- break; +- case GF_FOP_DISCARD: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->discard, fd, offset, +- len, xdata); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +- } ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto out; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- frame->local = local; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto out; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto out; +- +- if (vector) { +- local->vector = iov_dup(vector, count); +- if (!local->vector) +- goto out; +- for (i = 0; i < count; i++) +- local->total_size += vector[i].iov_len; +- local->count = count; +- } else { +- local->total_size = len; +- } ++ frame->local = local; + +- local->fop = fop; +- local->offset = offset; +- local->flags = flags; +- if (iobref) +- local->iobref = iobref_ref(iobref); +- local->fd = fd_ref(fd); +- local->block_size = block_size; +- local->resolver_base_inode = local->fd->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->handler = shard_post_setattr_handler; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_FSETATTR; ++ local->fd = fd_ref(fd); + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_common_inode_write_post_lookup_handler); +- return 0; ++ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, ++ local->xattr_req); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, ++ glusterfs_fop_t fop, fd_t *fd, ++ struct iovec *vector, int32_t count, ++ off_t offset, uint32_t flags, size_t len, ++ struct iobref *iobref, dict_t *xdata) { ++ int ret = 0; ++ int i = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto out; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ /* block_size = 0 means that the file was created before ++ * sharding was enabled on the volume. ++ */ ++ switch (fop) { ++ case GF_FOP_WRITE: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, ++ fd, vector, count, offset, flags, iobref, xdata); ++ break; ++ case GF_FOP_FALLOCATE: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fallocate, fd, flags, offset, ++ len, xdata); ++ break; ++ case GF_FOP_ZEROFILL: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, ++ xdata); ++ break; ++ case GF_FOP_DISCARD: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto out; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto out; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto out; ++ ++ if (vector) { ++ local->vector = iov_dup(vector, count); ++ if (!local->vector) ++ goto out; ++ for (i = 0; i < count; i++) ++ local->total_size += vector[i].iov_len; ++ local->count = count; ++ } else { ++ local->total_size = len; ++ } ++ ++ local->fop = fop; ++ local->offset = offset; ++ local->flags = flags; ++ if (iobref) ++ local->iobref = iobref_ref(iobref); ++ local->fd = fd_ref(fd); ++ local->block_size = block_size; ++ local->resolver_base_inode = local->fd->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_common_inode_write_post_lookup_handler); ++ return 0; + out: +- shard_common_failure_unwind(fop, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iovec *vector, int32_t count, off_t offset, uint32_t flags, +- struct iobref *iobref, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, +- offset, flags, 0, iobref, xdata); +- return 0; ++int shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iovec *vector, int32_t count, off_t offset, ++ uint32_t flags, struct iobref *iobref, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, ++ offset, flags, 0, iobref, xdata); ++ return 0; + } + +-int +-shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, +- int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +-{ +- if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && +- (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) +- goto out; ++int shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ int32_t keep_size, off_t offset, size_t len, ++ dict_t *xdata) { ++ if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && ++ (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) ++ goto out; + +- shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, +- offset, keep_size, len, NULL, xdata); +- return 0; ++ shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, ++ offset, keep_size, len, NULL, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); ++ return 0; + } + +-int +-shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- off_t len, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, +- offset, 0, len, NULL, xdata); +- return 0; ++int shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ off_t len, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, ++ offset, 0, len, NULL, xdata); ++ return 0; + } + +-int +-shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- size_t len, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, +- offset, 0, len, NULL, xdata); +- return 0; ++int shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ size_t len, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, ++ offset, 0, len, NULL, xdata); ++ return 0; + } + +-int32_t +-shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- gf_seek_what_t what, dict_t *xdata) +-{ +- /* TBD */ +- gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, +- "seek called on %s.", uuid_utoa(fd->inode->gfid)); +- shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); +- return 0; ++int32_t shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ gf_seek_what_t what, dict_t *xdata) { ++ /* TBD */ ++ gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, ++ "seek called on %s.", uuid_utoa(fd->inode->gfid)); ++ shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); ++ return 0; + } + +-int32_t +-mem_acct_init(xlator_t *this) +-{ +- int ret = -1; +- +- if (!this) +- return ret; ++int32_t mem_acct_init(xlator_t *this) { ++ int ret = -1; + +- ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); ++ if (!this) ++ return ret; + +- if (ret != 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, +- "Memory accounting init" +- "failed"); +- return ret; +- } ++ ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); + ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, ++ "Memory accounting init" ++ "failed"); + return ret; ++ } ++ ++ return ret; + } + +-int +-init(xlator_t *this) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; ++int init(xlator_t *this) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; + +- if (!this) { +- gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, +- "this is NULL. init() failed"); +- return -1; +- } ++ if (!this) { ++ gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, ++ "this is NULL. init() failed"); ++ return -1; ++ } + +- if (!this->parents) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, +- "Dangling volume. Check volfile"); +- goto out; +- } ++ if (!this->parents) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, ++ "Dangling volume. Check volfile"); ++ goto out; ++ } + +- if (!this->children || this->children->next) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, +- "shard not configured with exactly one sub-volume. " +- "Check volfile"); +- goto out; +- } ++ if (!this->children || this->children->next) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, ++ "shard not configured with exactly one sub-volume. " ++ "Check volfile"); ++ goto out; ++ } + +- priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); +- if (!priv) +- goto out; ++ priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); ++ if (!priv) ++ goto out; + +- GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); ++ GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); + +- GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); ++ GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); + +- GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); ++ GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); + +- this->local_pool = mem_pool_new(shard_local_t, 128); +- if (!this->local_pool) { +- ret = -1; +- goto out; +- } +- gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); +- gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); ++ this->local_pool = mem_pool_new(shard_local_t, 128); ++ if (!this->local_pool) { ++ ret = -1; ++ goto out; ++ } ++ gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); ++ gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); + +- this->private = priv; +- LOCK_INIT(&priv->lock); +- INIT_LIST_HEAD(&priv->ilist_head); +- ret = 0; ++ this->private = priv; ++ LOCK_INIT(&priv->lock); ++ INIT_LIST_HEAD(&priv->ilist_head); ++ ret = 0; + out: +- if (ret) { +- GF_FREE(priv); +- mem_pool_destroy(this->local_pool); +- } ++ if (ret) { ++ GF_FREE(priv); ++ mem_pool_destroy(this->local_pool); ++ } + +- return ret; ++ return ret; + } + +-void +-fini(xlator_t *this) +-{ +- shard_priv_t *priv = NULL; ++void fini(xlator_t *this) { ++ shard_priv_t *priv = NULL; + +- GF_VALIDATE_OR_GOTO("shard", this, out); ++ GF_VALIDATE_OR_GOTO("shard", this, out); + +- mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; ++ mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + +- priv = this->private; +- if (!priv) +- goto out; ++ priv = this->private; ++ if (!priv) ++ goto out; + +- this->private = NULL; +- LOCK_DESTROY(&priv->lock); +- GF_FREE(priv); ++ this->private = NULL; ++ LOCK_DESTROY(&priv->lock); ++ GF_FREE(priv); + + out: +- return; ++ return; + } + +-int +-reconfigure(xlator_t *this, dict_t *options) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; ++int reconfigure(xlator_t *this, dict_t *options) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; + +- priv = this->private; ++ priv = this->private; + +- GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); ++ GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); + +- GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, +- uint32, out); +- ret = 0; ++ GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, uint32, ++ out); ++ ret = 0; + + out: +- return ret; ++ return ret; + } + +-int +-shard_forget(xlator_t *this, inode_t *inode) +-{ +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; +- shard_priv_t *priv = NULL; ++int shard_forget(xlator_t *this, inode_t *inode) { ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_priv_t *priv = NULL; + +- priv = this->private; +- if (!priv) +- return 0; ++ priv = this->private; ++ if (!priv) ++ return 0; + +- inode_ctx_del(inode, this, &ctx_uint); +- if (!ctx_uint) +- return 0; ++ inode_ctx_del(inode, this, &ctx_uint); ++ if (!ctx_uint) ++ return 0; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- /* When LRU limit reaches inode will be forcefully removed from the +- * table, inode needs to be removed from LRU of shard as well. +- */ +- if (!list_empty(&ctx->ilist)) { +- LOCK(&priv->lock); +- { +- list_del_init(&ctx->ilist); +- priv->inode_count--; +- } +- UNLOCK(&priv->lock); ++ /* When LRU limit reaches inode will be forcefully removed from the ++ * table, inode needs to be removed from LRU of shard as well. ++ */ ++ if (!list_empty(&ctx->ilist)) { ++ LOCK(&priv->lock); ++ { ++ list_del_init(&ctx->ilist); ++ priv->inode_count--; + } +- GF_FREE(ctx); ++ UNLOCK(&priv->lock); ++ } ++ GF_FREE(ctx); + +- return 0; ++ return 0; + } + +-int +-shard_release(xlator_t *this, fd_t *fd) +-{ +- /* TBD */ +- return 0; ++int shard_release(xlator_t *this, fd_t *fd) { ++ /* TBD */ ++ return 0; + } + +-int +-shard_priv_dump(xlator_t *this) +-{ +- shard_priv_t *priv = NULL; +- char key_prefix[GF_DUMP_MAX_BUF_LEN] = { +- 0, +- }; +- char *str = NULL; ++int shard_priv_dump(xlator_t *this) { ++ shard_priv_t *priv = NULL; ++ char key_prefix[GF_DUMP_MAX_BUF_LEN] = { ++ 0, ++ }; ++ char *str = NULL; + +- priv = this->private; ++ priv = this->private; + +- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); +- gf_proc_dump_add_section("%s", key_prefix); +- str = gf_uint64_2human_readable(priv->block_size); +- gf_proc_dump_write("shard-block-size", "%s", str); +- gf_proc_dump_write("inode-count", "%d", priv->inode_count); +- gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); +- gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); ++ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); ++ gf_proc_dump_add_section("%s", key_prefix); ++ str = gf_uint64_2human_readable(priv->block_size); ++ gf_proc_dump_write("shard-block-size", "%s", str); ++ gf_proc_dump_write("inode-count", "%d", priv->inode_count); ++ gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); ++ gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); + +- GF_FREE(str); ++ GF_FREE(str); + +- return 0; ++ return 0; + } + +-int +-shard_releasedir(xlator_t *this, fd_t *fd) +-{ +- return 0; +-} ++int shard_releasedir(xlator_t *this, fd_t *fd) { return 0; } + + struct xlator_fops fops = { + .lookup = shard_lookup, +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index 94c4d76..57699ad 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -231,7 +231,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 21%{?dist} +Release: 23%{?dist} ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ @@ -623,6 +623,27 @@ Patch0311: 0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch Patch0312: 0312-Scripts-quota_fsck-script-KeyError-contri_size.patch Patch0313: 0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch Patch0314: 0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch +Patch0315: 0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch +Patch0316: 0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch +Patch0317: 0317-Update-rfc.sh-to-rhgs-3.5.1.patch +Patch0318: 0318-Update-rfc.sh-to-rhgs-3.5.1.patch +Patch0319: 0319-features-snapview-server-obtain-the-list-of-snapshot.patch +Patch0320: 0320-gf-event-Handle-unix-volfile-servers.patch +Patch0321: 0321-Adding-white-spaces-to-description-of-set-group.patch +Patch0322: 0322-glusterd-display-correct-rebalance-data-size-after-g.patch +Patch0323: 0323-cli-display-detailed-rebalance-info.patch +Patch0324: 0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch +Patch0325: 0325-extras-hooks-Install-and-package-newly-added-post-ad.patch +Patch0326: 0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch +Patch0327: 0327-glusterfind-integrate-with-gfid2path.patch +Patch0328: 0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch +Patch0329: 0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch +Patch0330: 0330-mount.glusterfs-change-the-error-message.patch +Patch0331: 0331-features-locks-Do-special-handling-for-op-version-3..patch +Patch0332: 0332-Removing-one-top-command-from-gluster-v-help.patch +Patch0333: 0333-rpc-Synchronize-slot-allocation-code.patch +Patch0334: 0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch +Patch0335: 0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -1022,7 +1043,7 @@ This package provides the translators needed on any GlusterFS client. %package events Summary: GlusterFS Events Requires: %{name}-server%{?_isa} = %{version}-%{release} -Requires: python%{_pythonver} python%{_pythonver}-prettytable +Requires: python%{_pythonver} Requires: python%{_pythonver}-gluster = %{version}-%{release} %if ( 0%{?rhel} && 0%{?rhel} < 8 ) Requires: python-requests @@ -1030,7 +1051,10 @@ Requires: python-requests Requires: python%{_pythonver}-requests %endif %if ( 0%{?rhel} && 0%{?rhel} < 7 ) +Requires: python-prettytable Requires: python-argparse +%else +Requires: python%{_pythonver}-prettytable %endif %if ( 0%{?_with_systemd:1} ) %{?systemd_requires} @@ -1833,6 +1857,7 @@ exit 0 %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh @@ -2336,6 +2361,14 @@ fi %endif %changelog +* Thu Nov 21 2019 Rinku Kothiya - 6.0-23 +- fixes bugs bz#1344758 bz#1599802 bz#1685406 bz#1686800 bz#1724021 + bz#1726058 bz#1727755 bz#1731513 bz#1741193 bz#1758923 bz#1761326 bz#1761486 + bz#1762180 bz#1764095 bz#1766640 + +* Thu Nov 14 2019 Rinku Kothiya - 6.0-22 +- fixes bugs bz#1771524 bz#1771614 + * Fri Oct 25 2019 Rinku Kothiya - 6.0-21 - fixes bugs bz#1765555