diff --git a/0202-glusterd-TLS-verification-fails-while-using-intermed.patch b/0202-glusterd-TLS-verification-fails-while-using-intermed.patch new file mode 100644 index 0000000..982e9f5 --- /dev/null +++ b/0202-glusterd-TLS-verification-fails-while-using-intermed.patch @@ -0,0 +1,235 @@ +From 355e366ff59dfc2ecd4fdf1e5653664b9ac0c45f Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Wed, 14 Mar 2018 09:37:52 +0530 +Subject: [PATCH 202/212] glusterd: TLS verification fails while using + intermediate CA + +Problem: TLS verification fails while using intermediate CA + if mgmt SSL is enabled. + +Solution: There are two main issue of TLS verification failing + 1) not calling ssl_api to set cert_depth + 2) The current code does not allow to set certificate depth + while MGMT SSL is enabled. + After apply this patch to set certificate depth user + need to set parameter option transport.socket.ssl-cert-depth + in /var/lib/glusterd/secure_acccess instead to set in + /etc/glusterfs/glusterd.vol. At the time of set secure_mgmt in ctx + we will check the value of cert-depth and save the value of cert-depth + in ctx.If user does not provide any value in cert-depth in that case + it will consider default value is 1 + +> BUG: 1555154 +> Change-Id: I89e9a9e1026e37efb5c20f9ec62b1989ef644f35 +> Reviewed on https://review.gluster.org/#/c/19708/ +> (cherry pick from commit cf06dd544004701ef43fa81c5b7a95353d5c1d65) + +BUG: 1446046 +Change-Id: I94000bc8741ceb5659ec9f376eac447ae84792ad +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/133849 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + api/src/glfs-mgmt.c | 1 + + cli/src/cli.c | 1 + + glusterfsd/src/glusterfsd-mgmt.c | 2 ++ + glusterfsd/src/glusterfsd.c | 1 + + heal/src/glfs-heal.c | 1 + + libglusterfs/src/glusterfs.h | 6 ++++ + libglusterfs/src/graph.c | 42 +++++++++++++++++++++++++++- + rpc/rpc-transport/socket/src/socket.c | 12 +++++--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 3 ++ + 9 files changed, 64 insertions(+), 5 deletions(-) + +diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c +index 32b9dbd..b70dc35 100644 +--- a/api/src/glfs-mgmt.c ++++ b/api/src/glfs-mgmt.c +@@ -996,6 +996,7 @@ glfs_mgmt_init (struct glfs *fs) + + if (sys_access (SECURE_ACCESS_FILE, F_OK) == 0) { + ctx->secure_mgmt = 1; ++ ctx->ssl_cert_depth = glusterfs_read_secure_access_file (); + } + + rpc = rpc_clnt_new (options, THIS, THIS->name, 8); +diff --git a/cli/src/cli.c b/cli/src/cli.c +index 52c1b67..b64d4ef 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -432,6 +432,7 @@ parse_cmdline (int argc, char *argv[], struct cli_state *state) + /* Do this first so that an option can override. */ + if (sys_access (SECURE_ACCESS_FILE, F_OK) == 0) { + state->ctx->secure_mgmt = 1; ++ state->ctx->ssl_cert_depth = glusterfs_read_secure_access_file (); + } + + if (state->argc > GEO_REP_CMD_CONFIG_INDEX && +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index 69d93f5..ef53d09 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -2467,6 +2467,8 @@ glusterfs_mgmt_init (glusterfs_ctx_t *ctx) + goto out; + + } ++ ++ ctx->ssl_cert_depth = glusterfs_read_secure_access_file (); + } + + rpc = rpc_clnt_new (options, THIS, THIS->name, 8); +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 38b863c..3ae89a6 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -1917,6 +1917,7 @@ parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx) + /* Do this before argp_parse so it can be overridden. */ + if (sys_access (SECURE_ACCESS_FILE, F_OK) == 0) { + cmd_args->secure_mgmt = 1; ++ ctx->ssl_cert_depth = glusterfs_read_secure_access_file (); + } + + argp_parse (&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args); +diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c +index 532b6f9..153cd29 100644 +--- a/heal/src/glfs-heal.c ++++ b/heal/src/glfs-heal.c +@@ -1617,6 +1617,7 @@ main (int argc, char **argv) + + if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) { + fs->ctx->secure_mgmt = 1; ++ fs->ctx->ssl_cert_depth = glusterfs_read_secure_access_file (); + } + + ret = glfs_set_volfile_server (fs, "unix", DEFAULT_GLUSTERD_SOCKFILE, 0); +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index 5abfafa..5d5f5c8 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -536,6 +536,11 @@ struct _glusterfs_ctx { + */ + int secure_mgmt; + ++ /* The option is use to set cert_depth while management connection ++ use SSL ++ */ ++ int ssl_cert_depth; ++ + /* + * Should *our* server/inbound connections use SSL? This is only true + * if we're glusterd and secure_mgmt is set, or if we're glusterfsd +@@ -638,4 +643,5 @@ int glusterfs_graph_parent_up (glusterfs_graph_t *graph); + void + gf_free_mig_locks (lock_migration_info_t *locks); + ++int glusterfs_read_secure_access_file (void); + #endif /* _GLUSTERFS_H */ +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 738cd96..cdd7123 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -16,7 +16,7 @@ + #include "defaults.h" + #include + #include "syscall.h" +- ++#include + #include "libglusterfs-messages.h" + + #if 0 +@@ -68,7 +68,47 @@ _gf_dump_details (int argc, char **argv) + } + #endif + ++int ++glusterfs_read_secure_access_file (void) ++{ ++ FILE *fp = NULL; ++ char line[100] = {0,}; ++ int cert_depth = 1; /* Default SSL CERT DEPTH */ ++ regex_t regcmpl; ++ char *key = {"^option transport.socket.ssl-cert-depth"}; ++ char keyval[50] = {0,}; ++ int start = 0, end = 0, copy_len = 0; ++ regmatch_t result[1] = {{0} }; ++ ++ fp = fopen (SECURE_ACCESS_FILE, "r"); ++ if (!fp) ++ goto out; + ++ /* Check if any line matches with key */ ++ while (fgets(line, sizeof(line), fp) != NULL) { ++ if (regcomp (®cmpl, key, REG_EXTENDED)) { ++ goto out; ++ } ++ if (!regexec (®cmpl, line, 1, result, 0)) { ++ start = result[0].rm_so; ++ end = result[0].rm_eo; ++ copy_len = end - start; ++ strcpy (keyval, line+copy_len); ++ if (keyval[0]) { ++ cert_depth = atoi(keyval); ++ if (cert_depth == 0) ++ cert_depth = 1; /* Default SSL CERT DEPTH */ ++ break; ++ } ++ } ++ regfree(®cmpl); ++ } ++ ++out: ++ if (fp) ++ fclose (fp); ++ return cert_depth; ++} + + int + glusterfs_xlator_link (xlator_t *pxl, xlator_t *cxl) +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 590d465..157b5b7 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -4324,7 +4324,13 @@ socket_init (rpc_transport_t *this) + "using %s polling thread", + priv->own_thread ? "private" : "system"); + +- if (!dict_get_int32 (this->options, SSL_CERT_DEPTH_OPT, &cert_depth)) { ++ if (!priv->mgmt_ssl) { ++ if (!dict_get_int32 (this->options, SSL_CERT_DEPTH_OPT, &cert_depth)) { ++ gf_log (this->name, GF_LOG_INFO, ++ "using certificate depth %d", cert_depth); ++ } ++ } else { ++ cert_depth = this->ctx->ssl_cert_depth; + gf_log (this->name, GF_LOG_INFO, + "using certificate depth %d", cert_depth); + } +@@ -4463,9 +4469,7 @@ socket_init (rpc_transport_t *this) + goto err; + } + +-#if (OPENSSL_VERSION_NUMBER < 0x00905100L) +- SSL_CTX_set_verify_depth(ctx,cert_depth); +-#endif ++ SSL_CTX_set_verify_depth(priv->ssl_ctx, cert_depth); + + if (crl_path) { + #ifdef X509_V_FLAG_CRL_CHECK_ALL +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 16a3773..ddab159 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3544,6 +3544,9 @@ glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + "failed to set ssl-enabled in dict"); + goto out; + } ++ ++ this->ctx->ssl_cert_depth = glusterfs_read_secure_access_file (); ++ + } + + ret = glusterd_rpc_create (&peerinfo->rpc, options, +-- +1.8.3.1 + diff --git a/0203-mgmt-glusterd-Adding-validation-for-setting-quorum-c.patch b/0203-mgmt-glusterd-Adding-validation-for-setting-quorum-c.patch new file mode 100644 index 0000000..efb4ef2 --- /dev/null +++ b/0203-mgmt-glusterd-Adding-validation-for-setting-quorum-c.patch @@ -0,0 +1,107 @@ +From 45481e3e7ca074eb405b0db5521d4ca08bb20641 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Fri, 9 Mar 2018 14:45:07 +0530 +Subject: [PATCH 203/212] mgmt/glusterd: Adding validation for setting + quorum-count + +In a replicated volume it was allowing to set the quorum-count value +between the range [1 - 2147483647]. This patch adds validation for +allowing only maximum of replica_count number of quorum-count value +to be set on a volume. + +Upstream patch: https://review.gluster.org/#/c/19104/ + +> Change-Id: I13952f3c6cf498c9f2b91161503fc0fba9d94898 +> BUG: 1529515 +> Signed-off-by: karthik-us + +Change-Id: Ie4a74184ae640703524f371f4a0de6d70a6e9abb +BUG: 1186664 +Signed-off-by: karthik-us +Reviewed-on: https://code.engineering.redhat.com/gerrit/132255 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + xlators/cluster/afr/src/afr.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 45 ++++++++++++++++++++++--- + 2 files changed, 41 insertions(+), 6 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index dec6e60..0122b7f 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -959,7 +959,7 @@ struct volume_options options[] = { + .max = INT_MAX, + .default_value = 0, + .description = "If quorum-type is \"fixed\" only allow writes if " +- "this many bricks or present. Other quorum types " ++ "this many bricks are present. Other quorum types " + "will OVERWRITE this value.", + }, + { .key = {"quorum-reads"}, +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 8d3407d..d01e282 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -847,6 +847,40 @@ out: + } + + static int ++validate_quorum_count (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, ++ char *value, char **op_errstr) ++{ ++ int ret = 0; ++ xlator_t *this = NULL; ++ int q_count = 0; ++ ++ this = THIS; ++ GF_ASSERT (this); ++ ++ ret = gf_string2int (value, &q_count); ++ if (ret) { ++ gf_asprintf (op_errstr, "%s is not an integer. %s expects a " ++ "valid integer value.", value, key); ++ goto out; ++ } ++ ++ if (q_count < 1 || q_count > volinfo->replica_count) { ++ gf_asprintf (op_errstr, "%d in %s %d is out of range [1 - %d]", ++ q_count, key, q_count, volinfo->replica_count); ++ ret = -1; ++ } ++ ++out: ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", ++ *op_errstr); ++ } ++ gf_msg_debug (this->name, 0, "Returning %d", ret); ++ ++ return ret; ++} ++ ++static int + validate_subvols_per_directory (glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) + { +@@ -1456,11 +1490,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, +- { .key = "cluster.quorum-count", +- .voltype = "cluster/replicate", +- .option = "quorum-count", +- .op_version = 1, +- .flags = OPT_FLAG_CLIENT_OPT ++ { .key = "cluster.quorum-count", ++ .voltype = "cluster/replicate", ++ .option = "quorum-count", ++ .op_version = 1, ++ .validate_fn = validate_quorum_count, ++ .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.choose-local", + .voltype = "cluster/replicate", +-- +1.8.3.1 + diff --git a/0204-glusterd-memory-leak-in-mgmt_v3-lock-functionality.patch b/0204-glusterd-memory-leak-in-mgmt_v3-lock-functionality.patch new file mode 100644 index 0000000..033d25e --- /dev/null +++ b/0204-glusterd-memory-leak-in-mgmt_v3-lock-functionality.patch @@ -0,0 +1,73 @@ +From 9b001e38b21d433580d55e68225f2cd5af058dbf Mon Sep 17 00:00:00 2001 +From: Gaurav Yadav +Date: Thu, 1 Mar 2018 14:44:34 +0530 +Subject: [PATCH 204/212] glusterd : memory leak in mgmt_v3 lock functionality + +In order to take care of stale lock issue, a timer was intrduced +in mgmt_v3 lock. This timer is not freeing the memory due to +which this leak got introduced + +With this fix now memory cleanup in locking is handled properly + +>upstream patch: https://review.gluster.org/#/c/19651/ +> https://review.gluster.org/#/c/19723/ + +>Change-Id: I2e1ce3ebba3520f7660321f3d97554080e4e22f4 +>BUG: 1550339 +>Signed-off-by: Gaurav Yadav + +Change-Id: I2e1ce3ebba3520f7660321f3d97554080e4e22f4 +BUG: 1529451 +Signed-off-by: Gaurav Yadav +Reviewed-on: https://code.engineering.redhat.com/gerrit/134218 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-locks.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c +index bd73b37..a19d688 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-locks.c ++++ b/xlators/mgmt/glusterd/src/glusterd-locks.c +@@ -719,6 +719,7 @@ gd_mgmt_v3_unlock_timer_cbk (void *data) + int32_t ret = -1; + glusterfs_ctx_t *mgmt_lock_timer_ctx = NULL; + xlator_t *mgmt_lock_timer_xl = NULL; ++ gf_timer_t *timer = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); +@@ -766,9 +767,10 @@ out: + GF_VALIDATE_OR_GOTO (this->name, mgmt_lock_timer_ctx, + ret_function); + ++ timer = mgmt_lock_timer->timer; ++ GF_FREE (timer->data); + gf_timer_call_cancel (mgmt_lock_timer_ctx, + mgmt_lock_timer->timer); +- GF_FREE(key); + dict_del (conf->mgmt_v3_lock_timer, bt_key); + mgmt_lock_timer->timer = NULL; + } +@@ -791,6 +793,7 @@ glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) + xlator_t *this = NULL; + glusterfs_ctx_t *mgmt_lock_timer_ctx = NULL; + xlator_t *mgmt_lock_timer_xl = NULL; ++ gf_timer_t *timer = NULL; + + this = THIS; + GF_ASSERT (this); +@@ -893,6 +896,9 @@ glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) + mgmt_lock_timer_ctx = mgmt_lock_timer_xl->ctx; + GF_VALIDATE_OR_GOTO (this->name, mgmt_lock_timer_ctx, out); + ret = 0; ++ ++ timer = mgmt_lock_timer->timer; ++ GF_FREE (timer->data); + gf_timer_call_cancel (mgmt_lock_timer_ctx, + mgmt_lock_timer->timer); + dict_del (priv->mgmt_v3_lock_timer, key_dup); +-- +1.8.3.1 + diff --git a/0205-cluster-dht-User-xattrs-are-not-healed-after-brick-s.patch b/0205-cluster-dht-User-xattrs-are-not-healed-after-brick-s.patch new file mode 100644 index 0000000..a11ba87 --- /dev/null +++ b/0205-cluster-dht-User-xattrs-are-not-healed-after-brick-s.patch @@ -0,0 +1,3615 @@ +From 27081fda822921e7f452304bea170d2d13cba257 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Fri, 12 May 2017 21:12:47 +0530 +Subject: [PATCH 205/212] cluster/dht : User xattrs are not healed after brick + stop/start + +Problem: In a distributed volume custom extended attribute value for a directory + does not display correct value after stop/start or added newly brick. + If any extended(acl) attribute value is set for a directory after stop/added + the brick the attribute(user|acl|quota) value is not updated on brick + after start the brick. + +Solution: First store hashed subvol or subvol(has internal xattr) on inode ctx and + consider it as a MDS subvol.At the time of update custom xattr + (user,quota,acl, selinux) on directory first check the mds from + inode ctx, if mds is not present on inode ctx then throw EINVAL error + to application otherwise set xattr on MDS subvol with internal xattr + value of -1 and then try to update the attribute on other non MDS + volumes also.If mds subvol is down in that case throw an + error "Transport endpoint is not connected". In dht_dir_lookup_cbk| + dht_revalidate_cbk|dht_discover_complete call dht_call_dir_xattr_heal + to heal custom extended attribute. + In case of gnfs server if hashed subvol has not found based on + loc then wind a call on all subvol to update xattr. + +Fix: 1) Save MDS subvol on inode ctx + 2) Check if mds subvol is present on inode ctx + 3) If mds subvol is down then call unwind with error ENOTCONN and if it is up + then set new xattr "GF_DHT_XATTR_MDS" to -1 and wind a call on other + subvol. + 4) If setxattr fop is successful on non-mds subvol then increment the value of + internal xattr to +1 + 5) At the time of directory_lookup check the value of new xattr GF_DHT_XATTR_MDS + 6) If value is not 0 in dht_lookup_dir_cbk(other cbk) functions then call heal + function to heal user xattr + 7) syncop_setxattr on hashed_subvol to reset the value of xattr to 0 + if heal is successful on all subvol. + +Test : To reproduce the issue followed below steps + 1) Create a distributed volume and create mount point + 2) Create some directory from mount point mkdir tmp{1..5} + 3) Kill any one brick from the volume + 4) Set extended attribute from mount point on directory + setfattr -n user.foo -v "abc" ./tmp{1..5} + It will throw error " Transport End point is not connected " + for those hashed subvol is down + 5) Start volume with force option to start brick process + 6) Execute getfattr command on mount point for directory + 7) Check extended attribute on brick + getfattr -n user.foo /tmp{1..5} + It shows correct value for directories for those + xattr fop were executed successfully. + +Note: The patch will resolve xattr healing problem only for fuse mount + not for nfs mount. + +> BUG: 1371806 +> Signed-off-by: Mohit Agrawal +> (Cherry pick from commit 9b4de61a136b8e5ba7bf0e48690cdb1292d0dee8) +> (Upstream patch link https://review.gluster.org/#/c/15468/) + +BUG: 1550315 +Change-Id: I4eb137eace24a8cb796712b742f1d177a65343d5 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/132383 +Tested-by: RHGS Build Bot +Reviewed-by: Raghavendra Gowdappa +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/bug-1368312.t | 30 +- + tests/bugs/bug-1371806.t | 80 ++ + tests/bugs/bug-1371806_1.t | 49 + + tests/bugs/bug-1371806_2.t | 52 ++ + tests/bugs/bug-1371806_3.t | 63 ++ + tests/bugs/bug-1371806_acl.t | 90 ++ + tests/bugs/distribute/bug-862967.t | 7 +- + xlators/cluster/dht/src/dht-common.c | 1389 ++++++++++++++++++++++++++--- + xlators/cluster/dht/src/dht-common.h | 72 +- + xlators/cluster/dht/src/dht-helper.c | 65 ++ + xlators/cluster/dht/src/dht-inode-write.c | 163 +++- + xlators/cluster/dht/src/dht-messages.h | 28 +- + xlators/cluster/dht/src/dht-selfheal.c | 519 ++++++++++- + xlators/cluster/dht/src/dht-shared.c | 2 + + 14 files changed, 2436 insertions(+), 173 deletions(-) + create mode 100644 tests/bugs/bug-1371806.t + create mode 100644 tests/bugs/bug-1371806_1.t + create mode 100644 tests/bugs/bug-1371806_2.t + create mode 100644 tests/bugs/bug-1371806_3.t + create mode 100644 tests/bugs/bug-1371806_acl.t + +diff --git a/tests/bugs/bug-1368312.t b/tests/bugs/bug-1368312.t +index 135048f..61e5606 100644 +--- a/tests/bugs/bug-1368312.t ++++ b/tests/bugs/bug-1368312.t +@@ -29,46 +29,46 @@ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; + TEST mkdir $M0/tmp1 + + #Create metadata split-brain +-TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST kill_brick $V0 $H0 $B0/${V0}2 + TEST chmod 666 $M0/tmp1 + TEST $CLI volume start $V0 force +-TEST kill_brick $V0 $H0 $B0/${V0}1 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++TEST kill_brick $V0 $H0 $B0/${V0}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + + TEST chmod 757 $M0/tmp1 + + TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3 + + EXPECT 2 get_pending_heal_count $V0 + + +-TEST kill_brick $V0 $H0 $B0/${V0}2 ++TEST kill_brick $V0 $H0 $B0/${V0}4 + TEST chmod 755 $M0/tmp1 + TEST $CLI volume start $V0 force +-TEST kill_brick $V0 $H0 $B0/${V0}3 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++TEST kill_brick $V0 $H0 $B0/${V0}5 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4 + + TEST chmod 766 $M0/tmp1 + + TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 5 + + EXPECT 4 get_pending_heal_count $V0 + +-TEST kill_brick $V0 $H0 $B0/${V0}4 ++TEST kill_brick $V0 $H0 $B0/${V0}0 + TEST chmod 765 $M0/tmp1 + TEST $CLI volume start $V0 force +-TEST kill_brick $V0 $H0 $B0/${V0}5 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + + TEST chmod 756 $M0/tmp1 + + TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 5 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + + EXPECT 6 get_pending_heal_count $V0 + +diff --git a/tests/bugs/bug-1371806.t b/tests/bugs/bug-1371806.t +new file mode 100644 +index 0000000..7dc1613 +--- /dev/null ++++ b/tests/bugs/bug-1371806.t +@@ -0,0 +1,80 @@ ++#!/bin/bash ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../dht.rc ++cleanup; ++ ++function get_getfattr { ++ local path=$1 ++ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//' ++} ++ ++function set_fattr { ++ for i in `seq 1 10` ++ do ++ setfattr -n user.foo -v "newabc" ./tmp${i} ++ if [ "$?" = "0" ] ++ then ++ succ=$((succ+1)) ++ else ++ fail=$((fail+1)) ++ fi ++ done ++} ++ ++ ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3,4,5} ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++cd $M0 ++TEST mkdir tmp{1..10} ++ ++##First set user.foo xattr with value abc on all dirs ++ ++TEST setfattr -n user.foo -v "abc" ./tmp{1..10} ++EXPECT "abc" get_getfattr ./tmp{1..10} ++EXPECT "abc" get_getfattr $B0/${V0}5/tmp{1..10} ++ ++TEST kill_brick $V0 $H0 $B0/${V0}5 ++EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "5" online_brick_count ++ ++succ=fail=0 ++## set user.foo xattr with value newabc after kill one brick ++set_fattr ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count ++ ++cd - ++TEST umount $M0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++cd $M0 ++## At this point dht code will heal xattr on down brick only for those dirs ++## hashed subvol was up at the time of update xattr ++TEST stat ./tmp{1..10} ++ ++## Count the user.foo xattr value with abc on mount point and compare with fail value ++count=`getfattr -n user.foo ./tmp{1..10} | grep "user.foo" | grep -iw "abc" | wc -l` ++EXPECT "$fail" echo $count ++ ++## Count the user.foo xattr value with newabc on mount point and compare with succ value ++count=`getfattr -n user.foo ./tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l` ++EXPECT "$succ" echo $count ++ ++## Count the user.foo xattr value with abc on brick and compare with succ value ++count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "abc" | wc -l` ++EXPECT "$fail" echo $count ++ ++## Count the user.foo xattr value with newabc on brick and compare with succ value ++count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l` ++EXPECT "$succ" echo $count ++ ++ ++cd - ++cleanup ++exit +diff --git a/tests/bugs/bug-1371806_1.t b/tests/bugs/bug-1371806_1.t +new file mode 100644 +index 0000000..44a57a9 +--- /dev/null ++++ b/tests/bugs/bug-1371806_1.t +@@ -0,0 +1,49 @@ ++#!/bin/bash ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../dht.rc ++cleanup; ++ ++function get_getfattr { ++ local path=$1 ++ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//' ++} ++ ++function remove_mds_xattr { ++ ++ for i in `seq 1 10` ++ do ++ setfattr -x trusted.glusterfs.dht.mds $1/tmp${i} 2> /dev/null ++ done ++} ++ ++ ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3} ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; ++ ++cd $M0 ++TEST mkdir tmp{1..10} ++ ++##Remove internal mds xattr from all directory ++remove_mds_xattr $B0/${V0}0 ++remove_mds_xattr $B0/${V0}1 ++remove_mds_xattr $B0/${V0}2 ++remove_mds_xattr $B0/${V0}3 ++ ++cd - ++umount $M0 ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; ++cd $M0 ++ ++TEST setfattr -n user.foo -v "abc" ./tmp{1..10} ++EXPECT "abc" get_getfattr ./tmp{1..10} ++ ++cd - ++cleanup ++exit +diff --git a/tests/bugs/bug-1371806_2.t b/tests/bugs/bug-1371806_2.t +new file mode 100644 +index 0000000..e6aa8e7 +--- /dev/null ++++ b/tests/bugs/bug-1371806_2.t +@@ -0,0 +1,52 @@ ++#!/bin/bash ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../dht.rc ++cleanup; ++ ++function get_getfattr { ++ local path=$1 ++ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//' ++} ++ ++function remove_mds_xattr { ++ ++ for i in `seq 1 10` ++ do ++ setfattr -x trusted.glusterfs.dht.mds $1/tmp${i} 2> /dev/null ++ done ++} ++ ++ ++ ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3} ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0; ++cd $M0 ++TEST mkdir tmp{1..10} ++ ++##Remove internal mds xattr from all directory ++remove_mds_xattr $B0/${V0}0 ++remove_mds_xattr $B0/${V0}1 ++remove_mds_xattr $B0/${V0}2 ++remove_mds_xattr $B0/${V0}3 ++ ++##First set user.foo xattr with value abc on all dirs ++ ++TEST setfattr -n user.foo -v "abc" ./tmp{1..10} ++EXPECT "abc" get_getfattr ./tmp{1..10} ++EXPECT "abc" get_getfattr $B0/${V0}0/tmp{1..10} ++EXPECT "abc" get_getfattr $B0/${V0}1/tmp{1..10} ++EXPECT "abc" get_getfattr $B0/${V0}2/tmp{1..10} ++EXPECT "abc" get_getfattr $B0/${V0}3/tmp{1..10} ++ ++cd - ++TEST umount $M0 ++ ++cd - ++cleanup ++exit +diff --git a/tests/bugs/bug-1371806_3.t b/tests/bugs/bug-1371806_3.t +new file mode 100644 +index 0000000..cb13f37 +--- /dev/null ++++ b/tests/bugs/bug-1371806_3.t +@@ -0,0 +1,63 @@ ++#!/bin/bash ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../dht.rc ++cleanup; ++ ++function get_getfattr { ++ local path=$1 ++ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//' ++} ++ ++function set_fattr { ++ for i in `seq 1 10` ++ do ++ setfattr -n user.foo -v "newabc" ./tmp${i} ++ if [ "$?" = "0" ] ++ then ++ succ=$((succ+1)) ++ else ++ fail=$((fail+1)) ++ fi ++ done ++} ++ ++ ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3} ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0; ++ ++cd $M0 ++TEST mkdir tmp{1..10} ++ ++TEST kill_brick $V0 $H0 $B0/${V0}3 ++EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "3" online_brick_count ++ ++succ=fail=0 ++## set user.foo xattr with value newabc after kill one brick ++set_fattr ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" online_brick_count ++ ++cd - ++TEST umount $M0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0; ++ ++cd $M0 ++## At this point dht code will heal xattr on down brick only for those dirs ++## hashed subvol was up at the time of update xattr ++TEST stat ./tmp{1..10} ++ ++ ++## Count the user.foo xattr value with newabc on brick and compare with succ value ++count=`getfattr -n user.foo $B0/${V0}3/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l` ++EXPECT "$succ" echo $count ++ ++ ++cd - ++cleanup ++exit +diff --git a/tests/bugs/bug-1371806_acl.t b/tests/bugs/bug-1371806_acl.t +new file mode 100644 +index 0000000..aa41e04 +--- /dev/null ++++ b/tests/bugs/bug-1371806_acl.t +@@ -0,0 +1,90 @@ ++#!/bin/bash ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++TEST useradd tmpuser ++ ++function set_facl_user { ++ for i in `seq 1 10` ++ do ++ setfacl -m u:tmpuser:rw ./tmp${i} ++ if [ "$?" = "0" ] ++ then ++ succ=$((succ+1)) ++ else ++ fail=$((fail+1)) ++ fi ++ done ++} ++ ++function set_facl_default { ++ for i in `seq 1 10` ++ do ++ setfacl -m d:o:rw ./tmp${i} ++ if [ "$?" = "0" ] ++ then ++ succ1=$((succ1+1)) ++ else ++ fail1=$((fail1+1)) ++ fi ++ done ++} ++ ++ ++ ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3,4,5} ++TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=$V0 --acl --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++cd $M0 ++TEST mkdir tmp{1..10} ++TEST setfacl -m u:tmpuser:rwx ./tmp{1..10} ++count=`getfacl -p $M0/tmp{1..10} | grep -c "user:tmpuser:rwx"` ++EXPECT "10" echo $count ++TEST setfacl -m d:o:rwx ./tmp{1..10} ++count=`getfacl -p $M0/tmp{1..10} | grep -c "default:other::rwx"` ++EXPECT "10" echo $count ++count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "user:tmpuser:rwx"` ++EXPECT "10" echo $count ++count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "default:other::rwx"` ++EXPECT "10" echo $count ++ ++ ++TEST kill_brick $V0 $H0 $B0/${V0}5 ++EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "5" online_brick_count ++ ++succ=fail=0 ++## Update acl attributes on dir after kill one brick ++set_facl_user ++succ1=fail1=0 ++set_facl_default ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count ++ ++cd - ++TEST umount $M0 ++TEST glusterfs --volfile-id=$V0 --acl --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++cd $M0 ++## At this point dht will heal xatts on down brick only for those hashed_subvol ++## was up at the time of updated xattrs ++TEST stat ./tmp{1..10} ++ ++## Compare succ value with updated acl attributes ++count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "user:tmpuser:rw-"` ++EXPECT "$succ" echo $count ++ ++ ++count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "default:other::rw-"` ++EXPECT "$succ1" echo $count ++ ++cd - ++userdel --force tmpuser ++cleanup +diff --git a/tests/bugs/distribute/bug-862967.t b/tests/bugs/distribute/bug-862967.t +index 09dac37..2fb0848 100644 +--- a/tests/bugs/distribute/bug-862967.t ++++ b/tests/bugs/distribute/bug-862967.t +@@ -37,7 +37,7 @@ chown 1:1 $M0/dir; + + # Kill a brick process + +-kill_brick $V0 $H0 $B0/${V0}1 ++kill_brick $V0 $H0 $B0/${V0}2 + # change dir ownership + NEW_UID=36; + NEW_GID=36; +@@ -51,9 +51,8 @@ sleep 10; + ls -l $M0/dir; + + # check if uid/gid is healed on backend brick which was taken down +-BACKEND_UID=`stat -c %u $B0/${V0}1/dir`; +-BACKEND_GID=`stat -c %g $B0/${V0}1/dir`; +- ++BACKEND_UID=`stat -c %u $B0/${V0}2/dir`; ++BACKEND_GID=`stat -c %g $B0/${V0}2/dir`; + + EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 5641330..f1e6a92 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -110,6 +110,24 @@ int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata, + int + dht_rmdir_unlock (call_frame_t *frame, xlator_t *this); + ++char *xattrs_to_heal[] = { ++ "user.", ++ POSIX_ACL_ACCESS_XATTR, ++ POSIX_ACL_DEFAULT_XATTR, ++ QUOTA_LIMIT_KEY, ++ QUOTA_LIMIT_OBJECTS_KEY, ++ GF_SELINUX_XATTR_KEY, ++ NULL ++}; ++ ++/* Return true if key exists in array ++*/ ++static gf_boolean_t ++dht_match_xattr (const char *key) ++{ ++ return gf_get_index_by_elem (xattrs_to_heal, (char *)key) >= 0; ++} ++ + int + dht_aggregate_quota_xattr (dict_t *dst, char *key, data_t *value) + { +@@ -205,7 +223,7 @@ int add_opt(char **optsp, const char *opt) + } + + /* Return Choice list from Split brain status */ +-char * ++static char * + getChoices (const char *value) + { + int i = 0; +@@ -428,6 +446,74 @@ out: + return; + } + ++/* Code to save hashed subvol on inode ctx as a mds subvol ++*/ ++int ++dht_inode_ctx_mdsvol_set (inode_t *inode, xlator_t *this, xlator_t *mds_subvol) ++{ ++ dht_inode_ctx_t *ctx = NULL; ++ int ret = -1; ++ uint64_t ctx_int = 0; ++ gf_boolean_t ctx_free = _gf_false; ++ ++ ++ LOCK (&inode->lock); ++ { ++ ret = __inode_ctx_get (inode, this , &ctx_int); ++ if (ctx_int) { ++ ctx = (dht_inode_ctx_t *)ctx_int; ++ ctx->mds_subvol = mds_subvol; ++ } else { ++ ctx = GF_CALLOC (1, sizeof(*ctx), gf_dht_mt_inode_ctx_t); ++ if (!ctx) ++ goto unlock; ++ ctx->mds_subvol = mds_subvol; ++ ctx_free = _gf_true; ++ ctx_int = (long) ctx; ++ ret = __inode_ctx_set (inode, this, &ctx_int); ++ } ++ } ++unlock: ++ UNLOCK (&inode->lock); ++ if (ret && ctx_free) ++ GF_FREE (ctx); ++ return ret; ++} ++ ++/*Code to get mds subvol from inode ctx */ ++ ++int ++dht_inode_ctx_mdsvol_get (inode_t *inode, xlator_t *this, xlator_t **mdsvol) ++{ ++ dht_inode_ctx_t *ctx = NULL; ++ int ret = -1; ++ ++ if (!mdsvol) ++ return ret; ++ ++ if (__is_root_gfid(inode->gfid)) { ++ (*mdsvol) = FIRST_CHILD (this); ++ return 0; ++ } ++ ++ ret = dht_inode_ctx_get (inode, this, &ctx); ++ ++ if (!ret && ctx) { ++ if (ctx->mds_subvol) { ++ *mdsvol = ctx->mds_subvol; ++ ret = 0; ++ } else { ++ ret = -1; ++ } ++ } ++ ++ return ret; ++} ++ ++ ++ ++ ++ + /* TODO: + - use volumename in xattr instead of "dht" + - use NS locks +@@ -443,6 +529,7 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, + { + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; ++ dht_conf_t *conf = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO ("dht", frame, out); +@@ -450,6 +537,7 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + + local = frame->local; ++ conf = this->private; + ret = op_ret; + + FRAME_SU_UNDO (frame, dht_local_t); +@@ -467,6 +555,8 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, + + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + dht_set_fixed_dir_stat (&local->postparent); ++ /* Delete mds xattr at the time of STACK UNWIND */ ++ GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); + + DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode, + &local->stbuf, local->xattr, &local->postparent); +@@ -492,10 +582,12 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) + int i = 0; + loc_t loc = {0 }; + int8_t is_read_only = 0, layout_anomalies = 0; ++ char gfid_local[GF_UUID_BUF_SIZE] = {0}; + + local = discover_frame->local; + layout = local->layout; + conf = this->private; ++ gf_uuid_unparse(local->gfid, gfid_local); + + LOCK(&discover_frame->lock); + { +@@ -507,6 +599,18 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) + if (!main_frame) + return 0; + ++ /* Code to update all extended attributed from ++ subvol to local->xattr on that internal xattr has found ++ */ ++ if (conf->subvolume_cnt == 1) ++ local->need_xattr_heal = 0; ++ if (local->need_xattr_heal && (local->mds_xattr)) { ++ dht_dir_set_heal_xattr (this, local, local->xattr, ++ local->mds_xattr, NULL, NULL); ++ dict_unref (local->mds_xattr); ++ local->mds_xattr = NULL; ++ } ++ + ret = dict_get_int8 (local->xattr_req, QUOTA_READ_ONLY_KEY, + &is_read_only); + if (ret < 0) +@@ -575,6 +679,26 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) + } + } + ++ if (IA_ISDIR (local->stbuf.ia_type)) { ++ /* Call function to save hashed subvol on inode ctx if ++ internal mds xattr is not present and all subvols are up ++ */ ++ if (!local->op_ret && !__is_root_gfid (local->stbuf.ia_gfid)) ++ (void) dht_mark_mds_subvolume (discover_frame, this); ++ ++ if (local->need_xattr_heal && !heal_path) { ++ local->need_xattr_heal = 0; ++ ret = dht_dir_xattr_heal (this, local); ++ if (ret) ++ gf_msg (this->name, GF_LOG_ERROR, ++ ret, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "xattr heal failed for " ++ "directory gfid is %s ", ++ gfid_local); ++ } ++ } ++ + if (source && (heal_path || layout_anomalies)) { + gf_uuid_copy (loc.gfid, local->gfid); + if (gf_uuid_is_null (loc.gfid)) { +@@ -621,10 +745,14 @@ cleanup: + } + done: + dht_set_fixed_dir_stat (&local->postparent); ++ /* Delete mds xattr at the time of STACK UNWIND */ ++ GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); ++ + DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); + return 0; ++ + out: + DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL, + NULL); +@@ -633,6 +761,170 @@ out: + } + + int ++dht_mds_internal_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *hashed_subvol = NULL; ++ dht_conf_t *conf = NULL; ++ int ret = 0; ++ ++ GF_VALIDATE_OR_GOTO (this->name, frame, out); ++ GF_VALIDATE_OR_GOTO (this->name, frame->local, out); ++ ++ local = frame->local; ++ hashed_subvol = cookie; ++ conf = this->private; ++ ++ if (op_ret) { ++ gf_msg_debug (this->name, op_ret, ++ "Failed to set %s on the MDS for path %s. ", ++ conf->mds_xattr_key, local->loc.path); ++ } else { ++ /* Save mds subvol on inode ctx */ ++ ret = dht_inode_ctx_mdsvol_set (local->inode, this, ++ hashed_subvol); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_SET_INODE_CTX_FAILED, ++ "Failed to set mds subvol on inode ctx" ++ " %s for %s", hashed_subvol->name, ++ local->loc.path); ++ } ++ } ++out: ++ DHT_STACK_DESTROY (frame); ++ return 0; ++} ++ ++ ++ ++/* Code to save hashed subvol on inode ctx only while no ++ mds xattr is availble and all subvols are up for fresh ++*/ ++int ++dht_mark_mds_subvolume (call_frame_t *frame, xlator_t *this) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *hashed_subvol = NULL; ++ int i = 0; ++ gf_boolean_t vol_down = _gf_false; ++ dht_conf_t *conf = 0; ++ int ret = -1; ++ char gfid_local[GF_UUID_BUF_SIZE] = {0}; ++ dict_t *xattrs = NULL; ++ dht_local_t *copy_local = NULL; ++ call_frame_t *xattr_frame = NULL; ++ int32_t zero[1] = {0}; ++ ++ ++ GF_VALIDATE_OR_GOTO ("dht", frame, out); ++ GF_VALIDATE_OR_GOTO ("dht", this, out); ++ GF_VALIDATE_OR_GOTO (this->name, frame->local, out); ++ GF_VALIDATE_OR_GOTO (this->name, this->private, out); ++ ++ local = frame->local; ++ conf = this->private; ++ gf_uuid_unparse(local->gfid, gfid_local); ++ ++ ++ /* Code to update hashed subvol consider as a mds subvol ++ and save on inode ctx if all subvols are up and no internal ++ xattr has been set yet ++ */ ++ if (!dict_get (local->xattr, conf->mds_xattr_key)) { ++ /* It means no internal MDS xattr has been set yet ++ */ ++ /* Check the status of all subvol are up ++ */ ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (!conf->subvolume_status[i]) { ++ vol_down = _gf_true; ++ break; ++ } ++ } ++ if (vol_down) { ++ ret = 0; ++ gf_msg_debug (this->name, 0, ++ "subvol %s is down. Unable to " ++ " save mds subvol on inode for " ++ " path %s gfid is %s " , ++ conf->subvolumes[i]->name, local->loc.path, ++ gfid_local); ++ goto out; ++ } ++ /* Calculate hashed subvol based on inode and ++ parent inode ++ */ ++ hashed_subvol = dht_inode_get_hashed_subvol (local->inode, ++ this, &local->loc); ++ if (!hashed_subvol) { ++ gf_msg (this->name, GF_LOG_DEBUG, 0, ++ DHT_MSG_HASHED_SUBVOL_GET_FAILED, ++ "Failed to get hashed subvol for path %s" ++ " gfid is %s ", ++ local->loc.path, gfid_local); ++ } else { ++ xattrs = dict_new (); ++ if (!xattrs) { ++ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, ++ DHT_MSG_NO_MEMORY, "dict_new failed"); ++ ret = -1; ++ goto out; ++ } ++ /* Add internal MDS xattr on disk for hashed subvol ++ */ ++ ret = dht_dict_set_array (xattrs, conf->mds_xattr_key, zero, 1); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, ++ DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary" ++ " value:key = %s for " ++ "path %s", conf->mds_xattr_key, ++ local->loc.path); ++ ret = -1; ++ goto out; ++ } ++ xattr_frame = create_frame (this, this->ctx->pool); ++ if (!xattr_frame) { ++ ret = -1; ++ goto out; ++ } ++ copy_local = dht_local_init (xattr_frame, &(local->loc), ++ NULL, 0); ++ if (!copy_local) { ++ ret = -1; ++ DHT_STACK_DESTROY (xattr_frame); ++ goto out; ++ } ++ copy_local->stbuf = local->stbuf; ++ if (!copy_local->inode) ++ copy_local->inode = inode_ref (local->inode); ++ gf_uuid_copy (copy_local->loc.gfid, local->gfid); ++ STACK_WIND_COOKIE (xattr_frame, dht_mds_internal_setxattr_cbk, ++ hashed_subvol, hashed_subvol, ++ hashed_subvol->fops->setxattr, ++ &local->loc, xattrs, 0, NULL); ++ ret = 0; ++ } ++ } else { ++ ret = 0; ++ gf_msg_debug (this->name, 0, ++ "internal xattr %s is present on subvol" ++ "on path %s gfid is %s " , conf->mds_xattr_key, ++ local->loc.path, gfid_local); ++ } ++ ++ ++out: ++ if (xattrs) ++ dict_unref (xattrs); ++ return ret; ++} ++ ++ ++ ++int + dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, dict_t *xattr, +@@ -644,11 +936,15 @@ dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + dht_layout_t *layout = NULL; + int ret = -1; + int is_dir = 0; ++ int32_t check_mds = 0; + int is_linkfile = 0; + int attempt_unwind = 0; + dht_conf_t *conf = 0; +- char gfid_local[GF_UUID_BUF_SIZE] = {0}; +- char gfid_node[GF_UUID_BUF_SIZE] = {0}; ++ char gfid_local[GF_UUID_BUF_SIZE] = {0}; ++ char gfid_node[GF_UUID_BUF_SIZE] = {0}; ++ int32_t mds_xattr_val[1] = {0}; ++ int errst = 0; ++ + + GF_VALIDATE_OR_GOTO ("dht", frame, out); + GF_VALIDATE_OR_GOTO ("dht", this, out); +@@ -743,6 +1039,41 @@ dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + dht_iatt_merge (this, &local->stbuf, stbuf, prev); + dht_iatt_merge (this, &local->postparent, postparent, + prev); ++ if (!dict_get (xattr, conf->mds_xattr_key)) { ++ goto unlock; ++ } else { ++ gf_msg_debug (this->name, 0, ++ "internal xattr %s is present on subvol" ++ "on path %s gfid is %s " , ++ conf->mds_xattr_key, ++ local->loc.path, gfid_local); ++ } ++ check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key, ++ mds_xattr_val, 1, &errst); ++ /* save mds subvol on inode ctx */ ++ ret = dht_inode_ctx_mdsvol_set (local->inode, this, ++ prev); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_SET_INODE_CTX_FAILED, ++ "Failed to set hashed subvol for %s vol is %s", ++ local->loc.path, prev->name); ++ } ++ ++ if ((check_mds < 0) && !errst) { ++ local->mds_xattr = dict_ref (xattr); ++ gf_msg_debug (this->name, 0, ++ "Value of %s is not zero on mds subvol" ++ "so xattr needs to be healed on non mds" ++ " path is %s and vol name is %s " ++ " gfid is %s" , ++ conf->mds_xattr_key, ++ local->loc.path, ++ prev->name, gfid_local); ++ local->need_xattr_heal = 1; ++ local->mds_subvol = prev; ++ } ++ + } + unlock: + UNLOCK (&frame->lock); +@@ -841,6 +1172,99 @@ err: + return 0; + } + ++/* Get the value of key from dict in the bytewise and save in array after ++ convert from network byte order to host byte order ++*/ ++int32_t ++dht_dict_get_array (dict_t *dict, char *key, int32_t value[], int32_t size, int *errst) ++{ ++ void *ptr = NULL; ++ int32_t len = -1; ++ int32_t vindex = -1; ++ int32_t err = -1; ++ int ret = 0; ++ ++ if (dict == NULL) { ++ (*errst) = -1; ++ return -EINVAL; ++ } ++ err = dict_get_ptr_and_len(dict, key, &ptr, &len); ++ if (err != 0) { ++ (*errst) = -1; ++ return err; ++ } ++ ++ if (len != (size * sizeof (int32_t))) { ++ (*errst) = -1; ++ return -EINVAL; ++ } ++ ++ memset (value, 0, size * sizeof(int32_t)); ++ for (vindex = 0; vindex < size; vindex++) { ++ value[vindex] = ntoh32(*((int32_t *)ptr + vindex)); ++ if (value[vindex] < 0) ++ ret = -1; ++ } ++ ++ return ret; ++} ++ ++ ++/* Code to call syntask to heal custom xattr from hashed subvol ++ to non hashed subvol ++*/ ++int ++dht_dir_xattr_heal (xlator_t *this, dht_local_t *local) ++{ ++ dht_local_t *copy_local = NULL; ++ call_frame_t *copy = NULL; ++ int ret = -1; ++ char gfid_local[GF_UUID_BUF_SIZE] = {0}; ++ ++ if (local->gfid) { ++ gf_uuid_unparse(local->gfid, gfid_local); ++ } else { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "No gfid exists for path %s " ++ "so healing xattr is not possible", ++ local->loc.path); ++ goto out; ++ } ++ ++ copy = create_frame (this, this->ctx->pool); ++ if (copy) { ++ copy_local = dht_local_init (copy, &(local->loc), NULL, 0); ++ if (!copy_local) { ++ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "Memory allocation failed " ++ "for path %s gfid %s ", ++ local->loc.path, gfid_local); ++ DHT_STACK_DESTROY (copy); ++ } else { ++ copy_local->stbuf = local->stbuf; ++ gf_uuid_copy (copy_local->loc.gfid, local->gfid); ++ copy_local->mds_subvol = local->mds_subvol; ++ FRAME_SU_DO (copy, dht_local_t); ++ ret = synctask_new (this->ctx->env, dht_dir_heal_xattrs, ++ dht_dir_heal_xattrs_done, ++ copy, copy); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "Synctask creation failed to heal xattr " ++ "for path %s gfid %s ", ++ local->loc.path, gfid_local); ++ DHT_STACK_DESTROY (copy); ++ } ++ } ++ } ++out: ++ return ret; ++} ++ ++ + + int + dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +@@ -849,13 +1273,17 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *postparent) + { + dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + int is_dir = 0; +- char gfid_local[GF_UUID_BUF_SIZE] = {0}; +- char gfid_node[GF_UUID_BUF_SIZE] = {0}; ++ int32_t check_mds = 0; ++ int errst = 0; ++ char gfid_local[GF_UUID_BUF_SIZE] = {0}; ++ char gfid_node[GF_UUID_BUF_SIZE] = {0}; ++ int32_t mds_xattr_val[1] = {0}; + + GF_VALIDATE_OR_GOTO ("dht", frame, out); + GF_VALIDATE_OR_GOTO ("dht", this, out); +@@ -865,17 +1293,20 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + local = frame->local; + prev = cookie; ++ conf = this->private; + + layout = local->layout; + +- if (!op_ret && gf_uuid_is_null (local->gfid)) ++ if (!op_ret && gf_uuid_is_null (local->gfid)) { + memcpy (local->gfid, stbuf->ia_gfid, 16); ++ } ++ if (local->gfid) ++ gf_uuid_unparse(local->gfid, gfid_local); + + /* Check if the gfid is different for file from other node */ + if (!op_ret && gf_uuid_compare (local->gfid, stbuf->ia_gfid)) { + + gf_uuid_unparse(stbuf->ia_gfid, gfid_node); +- gf_uuid_unparse(local->gfid, gfid_local); + + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_GFID_MISMATCH, +@@ -930,6 +1361,41 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + dht_iatt_merge (this, &local->stbuf, stbuf, prev); + dht_iatt_merge (this, &local->postparent, postparent, prev); ++ ++ if (!dict_get (xattr, conf->mds_xattr_key)) { ++ gf_msg_debug (this->name, 0, ++ "Internal xattr %s is not present " ++ " on path %s gfid is %s " , ++ conf->mds_xattr_key, ++ local->loc.path, gfid_local); ++ goto unlock; ++ } else { ++ /* Save mds subvol on inode ctx */ ++ ret = dht_inode_ctx_mdsvol_set (local->inode, this, ++ prev); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_SET_INODE_CTX_FAILED, ++ "Failed to set hashed subvol for %s vol is %s", ++ local->loc.path, prev->name); ++ } ++ } ++ check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key, ++ mds_xattr_val, 1, &errst); ++ if ((check_mds < 0) && !errst) { ++ local->mds_xattr = dict_ref (xattr); ++ gf_msg_debug (this->name, 0, ++ "Value of %s is not zero on hashed subvol " ++ "so xattr needs to be heal on non hashed" ++ " path is %s and vol name is %s " ++ " gfid is %s" , ++ conf->mds_xattr_key, ++ local->loc.path, ++ prev->name, gfid_local); ++ local->need_xattr_heal = 1; ++ local->mds_subvol = prev; ++ } ++ + } + unlock: + UNLOCK (&frame->lock); +@@ -938,7 +1404,20 @@ unlock: + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { +- gf_uuid_copy (local->loc.gfid, local->gfid); ++ /* No need to call xattr heal code if volume count is 1 ++ */ ++ if (conf->subvolume_cnt == 1) ++ local->need_xattr_heal = 0; ++ ++ /* Code to update all extended attributed from hashed subvol ++ to local->xattr ++ */ ++ if (local->need_xattr_heal && (local->mds_xattr)) { ++ dht_dir_set_heal_xattr (this, local, local->xattr, ++ local->mds_xattr, NULL, NULL); ++ dict_unref (local->mds_xattr); ++ local->mds_xattr = NULL; ++ } + + if (local->need_selfheal) { + local->need_selfheal = 0; +@@ -957,6 +1436,9 @@ unlock: + } + + dht_layout_set (this, local->inode, layout); ++ if (!dict_get (local->xattr, conf->mds_xattr_key) || ++ local->need_xattr_heal) ++ goto selfheal; + } + + if (local->inode) { +@@ -971,6 +1453,8 @@ unlock: + + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + dht_set_fixed_dir_stat (&local->postparent); ++ /* Delete mds xattr at the time of STACK UNWIND */ ++ GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); + DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); +@@ -1027,6 +1511,9 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + char gfid[GF_UUID_BUF_SIZE] = {0}; + uint32_t vol_commit_hash = 0; + xlator_t *subvol = NULL; ++ int32_t check_mds = 0; ++ int errst = 0; ++ int32_t mds_xattr_val[1] = {0}; + + GF_VALIDATE_OR_GOTO ("dht", frame, err); + GF_VALIDATE_OR_GOTO ("dht", this, err); +@@ -1051,6 +1538,9 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + LOCK (&frame->lock); + { ++ if (gf_uuid_is_null (local->gfid)) { ++ memcpy (local->gfid, local->loc.gfid, 16); ++ } + + gf_msg_debug (this->name, op_errno, + "revalidate lookup of %s " +@@ -1136,6 +1626,7 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->prebuf.ia_prot = stbuf->ia_prot; + } + } ++ + if (local->stbuf.ia_type != IA_INVAL) + { + if ((local->stbuf.ia_gid != stbuf->ia_gid) || +@@ -1146,6 +1637,44 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + local->need_selfheal = 1; + } + } ++ if (!dict_get (xattr, conf->mds_xattr_key)) { ++ gf_msg_debug (this->name, 0, ++ "internal xattr %s is not present" ++ " on path %s gfid is %s " , ++ conf->mds_xattr_key, ++ local->loc.path, gfid); ++ } else { ++ check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key, ++ mds_xattr_val, 1, &errst); ++ if (local->mds_subvol == prev) { ++ local->mds_stbuf.ia_gid = stbuf->ia_gid; ++ local->mds_stbuf.ia_uid = stbuf->ia_uid; ++ local->mds_stbuf.ia_prot = stbuf->ia_prot; ++ } ++ /* save mds subvol on inode ctx */ ++ ret = dht_inode_ctx_mdsvol_set (local->inode, this, ++ prev); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_SET_INODE_CTX_FAILED, ++ "Failed to set MDS subvol for %s vol is %s", ++ local->loc.path, prev->name); ++ } ++ if ((check_mds < 0) && !errst) { ++ local->mds_xattr = dict_ref (xattr); ++ gf_msg_debug (this->name, 0, ++ "Value of %s is not zero on " ++ "hashed subvol so xattr needs to" ++ " be healed on non hashed" ++ " path is %s and vol name is %s " ++ " gfid is %s" , ++ conf->mds_xattr_key, ++ local->loc.path, ++ prev->name, gfid); ++ local->need_xattr_heal = 1; ++ local->mds_subvol = prev; ++ } ++ } + ret = dht_layout_dir_mismatch (this, layout, + prev, &local->loc, + xattr); +@@ -1215,13 +1744,52 @@ out: + && (conf && conf->unhashed_sticky_bit)) { + local->stbuf.ia_prot.sticky = 1; + } ++ /* No need to call heal code if volume count is 1 ++ */ ++ if (conf->subvolume_cnt == 1) ++ local->need_xattr_heal = 0; ++ ++ /* Code to update all extended attributed from hashed subvol ++ to local->xattr ++ */ ++ if (local->need_xattr_heal && (local->mds_xattr)) { ++ dht_dir_set_heal_xattr (this, local, local->xattr, ++ local->mds_xattr, NULL, NULL); ++ dict_unref (local->mds_xattr); ++ local->mds_xattr = NULL; ++ } ++ /* Call function to save hashed subvol on inode ctx if ++ internal mds xattr is not present and all subvols are up ++ */ ++ if (inode && !__is_root_gfid (inode->gfid) && ++ (!local->op_ret) && (IA_ISDIR (local->stbuf.ia_type))) ++ (void) dht_mark_mds_subvolume (frame, this); ++ ++ if (local->need_xattr_heal) { ++ local->need_xattr_heal = 0; ++ ret = dht_dir_xattr_heal (this, local); ++ if (ret) ++ gf_msg (this->name, GF_LOG_ERROR, ++ ret, DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "xattr heal failed for directory %s " ++ " gfid %s ", local->loc.path, ++ gfid); ++ } + if (local->need_selfheal) { + local->need_selfheal = 0; +- gf_uuid_copy (local->gfid, local->stbuf.ia_gfid); +- local->stbuf.ia_gid = local->prebuf.ia_gid; +- local->stbuf.ia_uid = local->prebuf.ia_uid; +- if (__is_root_gfid(local->stbuf.ia_gfid)) ++ if (!__is_root_gfid (inode->gfid)) { ++ gf_uuid_copy (local->gfid, local->mds_stbuf.ia_gfid); ++ if (local->mds_stbuf.ia_gid || local->mds_stbuf.ia_uid) { ++ local->stbuf.ia_gid = local->mds_stbuf.ia_gid; ++ local->stbuf.ia_uid = local->mds_stbuf.ia_uid; ++ } ++ } else { ++ gf_uuid_copy (local->gfid, local->stbuf.ia_gfid); ++ local->stbuf.ia_gid = local->prebuf.ia_gid; ++ local->stbuf.ia_uid = local->prebuf.ia_uid; + local->stbuf.ia_prot = local->prebuf.ia_prot; ++ } ++ + copy = create_frame (this, this->ctx->pool); + if (copy) { + copy_local = dht_local_init (copy, &local->loc, +@@ -1229,6 +1797,8 @@ out: + if (!copy_local) + goto cont; + copy_local->stbuf = local->stbuf; ++ copy_local->mds_stbuf = local->mds_stbuf; ++ copy_local->mds_subvol = local->mds_subvol; + copy->local = copy_local; + FRAME_SU_DO (copy, dht_local_t); + ret = synctask_new (this->ctx->env, +@@ -1283,6 +1853,8 @@ cont: + local->op_ret = -1; + local->op_errno = ESTALE; + } ++ /* Delete mds xattr at the time of STACK UNWIND */ ++ GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); + + DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, +@@ -2303,6 +2875,62 @@ out: + + } + ++/* Code to get hashed subvol based on inode and loc ++ First it check if loc->parent and loc->path exist then it get ++ hashed subvol based on loc. ++*/ ++ ++xlator_t * ++dht_inode_get_hashed_subvol (inode_t *inode, xlator_t *this, loc_t *loc) ++{ ++ char *path = NULL; ++ loc_t populate_loc = {0, }; ++ char *name = NULL; ++ xlator_t *hash_subvol = NULL; ++ ++ if (!inode) ++ return hash_subvol; ++ ++ if (loc && loc->parent && loc->path) { ++ if (!loc->name) { ++ name = strrchr (loc->path, '/'); ++ if (name) { ++ loc->name = name + 1; ++ } else { ++ goto out; ++ } ++ } ++ hash_subvol = dht_subvol_get_hashed (this, loc); ++ goto out; ++ } ++ ++ if (!gf_uuid_is_null (inode->gfid)) { ++ populate_loc.inode = inode_ref (inode); ++ populate_loc.parent = inode_parent (populate_loc.inode, ++ NULL, NULL); ++ inode_path (populate_loc.inode, NULL, &path); ++ ++ if (!path) ++ goto out; ++ ++ populate_loc.path = path; ++ if (!populate_loc.name && populate_loc.path) { ++ name = strrchr (populate_loc.path, '/'); ++ if (name) { ++ populate_loc.name = name + 1; ++ ++ } else { ++ goto out; ++ } ++ } ++ hash_subvol = dht_subvol_get_hashed (this, &populate_loc); ++ } ++out: ++ if (populate_loc.inode) ++ loc_wipe (&populate_loc); ++ return hash_subvol; ++} ++ + + int + dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +@@ -2537,6 +3165,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this, + { + xlator_t *subvol = NULL; + xlator_t *hashed_subvol = NULL; ++ xlator_t *mds_subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; +@@ -2587,6 +3216,15 @@ dht_lookup (call_frame_t *frame, xlator_t *this, + local->xattr_req = dict_new (); + } + ++ ret = dict_set_uint32 (local->xattr_req, conf->mds_xattr_key, 4); ++ ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, ++ DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value:key = %s for " ++ "path %s", conf->mds_xattr_key, loc->path); ++ } ++ + /* Nameless lookup */ + + if (gf_uuid_is_null (loc->pargfid) && !gf_uuid_is_null (loc->gfid) && +@@ -2663,6 +3301,14 @@ dht_lookup (call_frame_t *frame, xlator_t *this, + goto err; + } + if (IA_ISDIR (local->inode->ia_type)) { ++ ret = dht_inode_ctx_mdsvol_get (local->inode, this, ++ &mds_subvol); ++ if (ret || !mds_subvol) { ++ gf_msg_debug (this->name, 0, ++ "Failed to get mds subvol for path %s", ++ local->loc.path); ++ } ++ local->mds_subvol = mds_subvol; + local->call_cnt = call_cnt = conf->subvolume_cnt; + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE (frame, dht_revalidate_cbk, +@@ -2851,76 +3497,300 @@ dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + + local->op_ret = 0; + +- local->postparent = *postparent; +- local->preparent = *preparent; ++ local->postparent = *postparent; ++ local->preparent = *preparent; ++ ++ if (local->loc.parent) { ++ dht_inode_ctx_time_update (local->loc.parent, this, ++ &local->preparent, 0); ++ dht_inode_ctx_time_update (local->loc.parent, this, ++ &local->postparent, 1); ++ } ++ } ++unlock: ++ UNLOCK (&frame->lock); ++ ++ if (!local->op_ret) { ++ hashed_subvol = dht_subvol_get_hashed (this, &local->loc); ++ if (hashed_subvol && hashed_subvol != local->cached_subvol) { ++ /* ++ * If hashed and cached are different, then we need ++ * to unlink linkfile from hashed subvol if data ++ * file is deleted successfully ++ */ ++ STACK_WIND_COOKIE (frame, dht_unlink_linkfile_cbk, ++ hashed_subvol, hashed_subvol, ++ hashed_subvol->fops->unlink, ++ &local->loc, local->flags, xdata); ++ return 0; ++ } ++ } ++ ++ dht_set_fixed_dir_stat (&local->preparent); ++ dht_set_fixed_dir_stat (&local->postparent); ++ DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno, ++ &local->preparent, &local->postparent, xdata); ++ ++ return 0; ++} ++ ++static int ++dht_common_setxattr_cbk (call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ dict_t *xdata) ++{ ++ DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); ++ return 0; ++} ++ ++ ++ ++int ++dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ int this_call_cnt = 0; ++ xlator_t *prev = NULL; ++ ++ local = frame->local; ++ prev = cookie; ++ ++ LOCK (&frame->lock); ++ { ++ if (op_ret == -1) { ++ local->op_errno = op_errno; ++ gf_msg_debug (this->name, op_errno, ++ "subvolume %s returned -1", ++ prev->name); ++ goto unlock; ++ } ++ ++ local->op_ret = 0; ++ } ++unlock: ++ UNLOCK (&frame->lock); ++ ++ this_call_cnt = dht_frame_return (frame); ++ if (is_last_call (this_call_cnt)) { ++ DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ++ local->op_errno, NULL); ++ } ++ ++ return 0; ++} ++ ++/* Set the value[] of key into dict after convert from ++ host byte order to network byte order ++*/ ++int32_t dht_dict_set_array (dict_t *dict, char *key, int32_t value[], ++ int32_t size) ++{ ++ int ret = -1; ++ int32_t *ptr = NULL; ++ int32_t vindex; ++ ++ if (value == NULL) { ++ return -EINVAL; ++ } ++ ++ ptr = GF_MALLOC(sizeof(int32_t) * size, gf_common_mt_char); ++ if (ptr == NULL) { ++ return -ENOMEM; ++ } ++ for (vindex = 0; vindex < size; vindex++) { ++ ptr[vindex] = hton32(value[vindex]); ++ } ++ ret = dict_set_bin(dict, key, ptr, sizeof(int32_t) * size); ++ if (ret) ++ GF_FREE (ptr); ++ return ret; ++} ++ ++/* Code to wind a xattrop call to add 1 on current mds internal xattr ++ value ++*/ ++int ++dht_setxattr_non_mds_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ int this_call_cnt = 0; ++ int ret = 0; ++ dict_t *xattrop = NULL; ++ int32_t addone[1] = {1}; ++ call_frame_t *prev = NULL; ++ dht_conf_t *conf = NULL; ++ ++ local = frame->local; ++ prev = cookie; ++ conf = this->private; ++ ++ LOCK (&frame->lock); ++ { ++ if (op_ret && !local->op_ret) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ gf_msg_debug (this->name, op_errno, ++ "subvolume %s returned -1", ++ prev->this->name); ++ } ++ } ++ UNLOCK (&frame->lock); ++ this_call_cnt = dht_frame_return (frame); ++ ++ if (is_last_call (this_call_cnt)) { ++ if (!local->op_ret) { ++ xattrop = dict_new (); ++ if (!xattrop) { ++ gf_msg (this->name, GF_LOG_ERROR, ++ DHT_MSG_NO_MEMORY, 0, ++ "dictionary creation failed"); ++ ret = -1; ++ goto out; ++ } ++ ret = dht_dict_set_array (xattrop, ++ conf->mds_xattr_key, ++ addone, 1); ++ if (ret != 0) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_DICT_SET_FAILED, ++ "dictionary set array failed "); ++ ret = -1; ++ goto out; ++ } ++ if (local->fop == GF_FOP_SETXATTR) { ++ STACK_WIND (frame, dht_common_xattrop_cbk, ++ local->mds_subvol, ++ local->mds_subvol->fops->xattrop, ++ &local->loc, GF_XATTROP_ADD_ARRAY, ++ xattrop, NULL); ++ } else { ++ STACK_WIND (frame, dht_common_xattrop_cbk, ++ local->mds_subvol, ++ local->mds_subvol->fops->fxattrop, ++ local->fd, GF_XATTROP_ADD_ARRAY, ++ xattrop, NULL); ++ } ++ } else { ++ if (local->fop == GF_FOP_SETXATTR) ++ DHT_STACK_UNWIND (setxattr, frame, 0, 0, local->xdata); ++ else ++ DHT_STACK_UNWIND (fsetxattr, frame, 0, 0, local->xdata); ++ } ++ } ++out: ++ if (xattrop) ++ dict_unref (xattrop); ++ if (ret) { ++ if (local->fop == GF_FOP_SETXATTR) ++ DHT_STACK_UNWIND (setxattr, frame, 0, 0, local->xdata); ++ else ++ DHT_STACK_UNWIND (fsetxattr, frame, 0, 0, local->xdata); ++ } ++ return 0; ++} ++ ++ ++int ++dht_setxattr_mds_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; ++ call_frame_t *prev = NULL; ++ xlator_t *mds_subvol = NULL; ++ int i = 0; ++ ++ local = frame->local; ++ prev = cookie; ++ conf = this->private; ++ mds_subvol = local->mds_subvol; ++ ++ if (op_ret == -1) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ gf_msg_debug (this->name, op_errno, ++ "subvolume %s returned -1", ++ prev->this->name); ++ goto out; ++ } ++ ++ local->op_ret = 0; ++ local->call_cnt = conf->subvolume_cnt - 1; ++ local->xdata = dict_ref (xdata); + +- if (local->loc.parent) { +- dht_inode_ctx_time_update (local->loc.parent, this, +- &local->preparent, 0); +- dht_inode_ctx_time_update (local->loc.parent, this, +- &local->postparent, 1); ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (mds_subvol && (mds_subvol == conf->subvolumes[i])) ++ continue; ++ if (local->fop == GF_FOP_SETXATTR) { ++ STACK_WIND (frame, dht_setxattr_non_mds_cbk, ++ conf->subvolumes[i], ++ conf->subvolumes[i]->fops->setxattr, ++ &local->loc, local->xattr, ++ local->flags, local->xattr_req); ++ } else { ++ STACK_WIND (frame, dht_setxattr_non_mds_cbk, ++ conf->subvolumes[i], ++ conf->subvolumes[i]->fops->fsetxattr, ++ local->fd, local->xattr, ++ local->flags, local->xattr_req); + } + } +-unlock: +- UNLOCK (&frame->lock); + +- if (!local->op_ret) { +- hashed_subvol = dht_subvol_get_hashed (this, &local->loc); +- if (hashed_subvol && +- hashed_subvol != local->cached_subvol) { +- /* +- * If hashed and cached are different, then we need +- * to unlink linkfile from hashed subvol if data +- * file is deleted successfully +- */ +- STACK_WIND_COOKIE (frame, dht_unlink_linkfile_cbk, +- hashed_subvol, hashed_subvol, +- hashed_subvol->fops->unlink, &local->loc, +- local->flags, xdata); +- return 0; +- } ++ return 0; ++out: ++ if (local->fop == GF_FOP_SETXATTR) { ++ DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ++ local->op_errno, xdata); ++ } else { ++ DHT_STACK_UNWIND (fsetxattr, frame, local->op_ret, ++ local->op_errno, xdata); + } + +- dht_set_fixed_dir_stat (&local->preparent); +- dht_set_fixed_dir_stat (&local->postparent); +- DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno, +- &local->preparent, &local->postparent, xdata); +- + return 0; + } + + int +-dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +- int op_ret, int op_errno, dict_t *xdata) ++dht_xattrop_mds_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, dict_t *dict, dict_t *xdata) + { + dht_local_t *local = NULL; +- int this_call_cnt = 0; +- xlator_t *prev = NULL; ++ call_frame_t *prev = NULL; + + local = frame->local; + prev = cookie; + +- LOCK (&frame->lock); +- { +- if (op_ret == -1) { +- local->op_errno = op_errno; +- gf_msg_debug (this->name, op_errno, +- "subvolume %s returned -1", +- prev->name); +- goto unlock; +- } +- +- local->op_ret = 0; ++ if (op_ret == -1) { ++ local->op_errno = op_errno; ++ local->op_ret = op_ret; ++ gf_msg_debug (this->name, op_errno, ++ "subvolume %s returned -1", ++ prev->this->name); ++ goto out; + } +-unlock: +- UNLOCK (&frame->lock); + +- this_call_cnt = dht_frame_return (frame); +- if (is_last_call (this_call_cnt)) { +- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, +- local->op_errno, NULL); ++ if (local->fop == GF_FOP_SETXATTR) { ++ STACK_WIND (frame, dht_setxattr_mds_cbk, ++ local->mds_subvol, ++ local->mds_subvol->fops->setxattr, ++ &local->loc, local->xattr, ++ local->flags, local->xattr_req); ++ } else { ++ STACK_WIND (frame, dht_setxattr_mds_cbk, ++ local->mds_subvol, ++ local->mds_subvol->fops->fsetxattr, ++ local->fd, local->xattr, ++ local->flags, local->xattr_req); + } +- ++ return 0; ++out: ++ if (local->fop == GF_FOP_SETXATTR) ++ DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ++ local->op_errno, xdata); ++ else ++ DHT_STACK_UNWIND (fsetxattr, frame, local->op_ret, ++ local->op_errno, xdata); + return 0; + } + +@@ -3371,6 +4241,41 @@ dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + return 0; + } + ++ ++int ++dht_mds_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; ++ ++ VALIDATE_OR_GOTO (frame, out); ++ VALIDATE_OR_GOTO (frame->local, out); ++ VALIDATE_OR_GOTO (this->private, out); ++ ++ conf = this->private; ++ local = frame->local; ++ ++ if (!xattr || (op_ret == -1)) { ++ local->op_ret = op_ret; ++ goto out; ++ } ++ if (dict_get (xattr, conf->xattr_name)) { ++ dict_del (xattr, conf->xattr_name); ++ } ++ local->op_ret = 0; ++ ++ if (!local->xattr) { ++ local->xattr = dict_copy_with_ref (xattr, NULL); ++ } ++ ++out: ++ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, ++ local->xattr, xdata); ++ return 0; ++} ++ ++ + int + dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +@@ -3600,6 +4505,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, + + xlator_t *subvol = NULL; + xlator_t *hashed_subvol = NULL; ++ xlator_t *mds_subvol = NULL; + xlator_t *cached_subvol = NULL; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; +@@ -3642,6 +4548,12 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, + } + + if (key && ++ (strncmp (key, conf->mds_xattr_key, strlen(key)) == 0)) { ++ op_errno = ENOTSUP; ++ goto err; ++ } ++ ++ if (key && + (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) + && DHT_IS_DIR(layout)) { +@@ -3771,26 +4683,53 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, + return 0; + } + +- if (key && (!strcmp (QUOTA_LIMIT_KEY, key) || +- !strcmp (QUOTA_LIMIT_OBJECTS_KEY, key))) { +- /* quota hardlimit and aggregated size of a directory is stored +- * in inode contexts of each brick. Hence its good enough that +- * we send getxattr for this key to any brick. +- */ +- local->call_cnt = 1; +- subvol = dht_first_up_subvol (this); +- STACK_WIND (frame, dht_getxattr_cbk, subvol, +- subvol->fops->getxattr, loc, key, xdata); +- return 0; +- } +- + if (cluster_handle_marker_getxattr (frame, loc, key, conf->vol_uuid, + dht_getxattr_unwind, + dht_marker_populate_args) == 0) + return 0; + + if (DHT_IS_DIR(layout)) { +- cnt = local->call_cnt = layout->cnt; ++ local->call_cnt = conf->subvolume_cnt; ++ cnt = conf->subvolume_cnt; ++ ret = dht_inode_ctx_mdsvol_get (loc->inode, this, &mds_subvol); ++ if (!mds_subvol) { ++ gf_msg (this->name, GF_LOG_INFO, 0, ++ DHT_MSG_HASHED_SUBVOL_GET_FAILED, ++ "Cannot determine MDS, fetching xattr %s randomly" ++ " from a subvol for path %s ", key, loc->path); ++ } else { ++ /* TODO need to handle it, As of now we are ++ choosing availability instead of chossing ++ consistencty, in case of mds_subvol is ++ down winding a getxattr call on other subvol ++ and return xattr ++ */ ++ local->mds_subvol = mds_subvol; ++ for (i = 0; i < cnt; i++) { ++ if (conf->subvolumes[i] == mds_subvol) { ++ if (!conf->subvolume_status[i]) { ++ gf_msg (this->name, ++ GF_LOG_INFO, 0, ++ DHT_MSG_HASHED_SUBVOL_DOWN, ++ "MDS %s is down for path" ++ " path %s so fetching xattr " ++ "%s randomly from a subvol ", ++ local->mds_subvol->name, ++ loc->path, key); ++ ret = 1; ++ } ++ } ++ } ++ } ++ ++ if (!ret && key && local->mds_subvol && dht_match_xattr (key)) { ++ STACK_WIND (frame, dht_mds_getxattr_cbk, ++ local->mds_subvol, ++ local->mds_subvol->fops->getxattr, ++ loc, key, xdata); ++ ++ return 0; ++ } + } else { + cnt = local->call_cnt = 1; + } +@@ -3821,6 +4760,10 @@ dht_fgetxattr (call_frame_t *frame, xlator_t *this, + int op_errno = -1; + int i = 0; + int cnt = 0; ++ xlator_t *mds_subvol = NULL; ++ int ret = -1; ++ dht_conf_t *conf = NULL; ++ char gfid[GF_UUID_BUF_SIZE] = {0}; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); +@@ -3828,6 +4771,8 @@ dht_fgetxattr (call_frame_t *frame, xlator_t *this, + VALIDATE_OR_GOTO (fd->inode, err); + VALIDATE_OR_GOTO (this->private, err); + ++ conf = this->private; ++ + local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR); + if (!local) { + op_errno = ENOMEM; +@@ -3852,15 +4797,63 @@ dht_fgetxattr (call_frame_t *frame, xlator_t *this, + } + } + ++ if (fd->inode) ++ gf_uuid_unparse(fd->inode->gfid, gfid); ++ + if ((fd->inode->ia_type == IA_IFDIR) + && key + && (strncmp (key, GF_XATTR_LOCKINFO_KEY, + strlen (GF_XATTR_LOCKINFO_KEY)) != 0)) { +- cnt = local->call_cnt = layout->cnt; ++ local->call_cnt = conf->subvolume_cnt; ++ cnt = conf->subvolume_cnt; ++ ret = dht_inode_ctx_mdsvol_get (fd->inode, this, &mds_subvol); ++ ++ if (!mds_subvol) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_HASHED_SUBVOL_GET_FAILED, ++ "cannot determine MDS, fetching xattr %s " ++ " randomly from a subvol for gfid %s ", ++ key, gfid); ++ } else { ++ /* TODO need to handle it, As of now we are ++ choosing availability instead of chossing ++ consistencty, in case of hashed_subvol is ++ down winding a getxattr call on other subvol ++ and return xattr ++ */ ++ local->mds_subvol = mds_subvol; ++ for (i = 0; i < cnt; i++) { ++ if (conf->subvolumes[i] == mds_subvol) { ++ if (!conf->subvolume_status[i]) { ++ gf_msg (this->name, ++ GF_LOG_WARNING, 0, ++ DHT_MSG_HASHED_SUBVOL_DOWN, ++ "MDS subvolume %s is down" ++ " for gfid %s so fetching xattr " ++ " %s randomly from a subvol ", ++ local->mds_subvol->name, ++ gfid, key); ++ ret = 1; ++ } ++ } ++ } ++ } ++ ++ if (!ret && key && local->mds_subvol && ++ dht_match_xattr (key)) { ++ STACK_WIND (frame, dht_mds_getxattr_cbk, ++ local->mds_subvol, ++ local->mds_subvol->fops->fgetxattr, ++ fd, key, NULL); ++ ++ return 0; ++ } ++ + } else { + cnt = local->call_cnt = 1; + } + ++ + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_getxattr_cbk, +@@ -3956,6 +4949,169 @@ out: + return 0; + } + ++/* Function is call by dict_foreach_fnmatch if key is match with ++ user.* and set boolean flag to true ++*/ ++static int ++dht_is_user_xattr (dict_t *this, char *key, data_t *value, void *data) ++{ ++ gf_boolean_t *user_xattr_found = data; ++ *user_xattr_found = _gf_true; ++ return 0; ++} ++ ++ ++/* Common code to wind a (f)setxattr call to set xattr on directory ++*/ ++int ++dht_dir_common_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, ++ fd_t *fd, dict_t *xattr, int flags, dict_t *xdata, ++ int *op_errno) ++ ++{ ++ dict_t *xattrop = NULL; ++ int32_t subone[1] = {-1}; ++ gf_boolean_t uxattr_key_found = _gf_false; ++ xlator_t *mds_subvol = NULL; ++ xlator_t *travvol = NULL; ++ dht_conf_t *conf = NULL; ++ int ret = -1; ++ int i = 0; ++ int call_cnt = 0; ++ dht_local_t *local = NULL; ++ char gfid_local[GF_UUID_BUF_SIZE] = {0}; ++ ++ conf = this->private; ++ local = frame->local; ++ call_cnt = conf->subvolume_cnt; ++ local->flags = flags; ++ ++ if (local->gfid) ++ gf_uuid_unparse(local->gfid, gfid_local); ++ ++ /* Check if any user xattr present in xattr ++ */ ++ dict_foreach_fnmatch (xattr, "user*", dht_is_user_xattr, ++ &uxattr_key_found); ++ ++ /* Check if any custom key xattr present in dict xattr ++ and start index from 1 because user xattr already ++ checked in previous line ++ */ ++ for (i = 1; xattrs_to_heal[i]; i++) ++ if (dict_get (xattr, xattrs_to_heal[i])) ++ uxattr_key_found = _gf_true; ++ ++ /* If there is no custom key xattr present or gfid is root ++ or call_cnt is 1 then wind a (f)setxattr call on all subvols ++ */ ++ if (!uxattr_key_found || __is_root_gfid (local->gfid) || call_cnt == 1) { ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ travvol = conf->subvolumes[i]; ++ if (fd) { ++ STACK_WIND_COOKIE (frame, dht_err_cbk, ++ travvol, travvol, ++ travvol->fops->fsetxattr, ++ fd, xattr, flags, xdata); ++ } else { ++ STACK_WIND_COOKIE (frame, dht_err_cbk, ++ travvol, travvol, ++ travvol->fops->setxattr, ++ loc, xattr, flags, xdata); ++ } ++ } ++ ++ return 0; ++ } ++ ++ /* Calculate hash subvol based on inode and parent inode ++ */ ++ if (fd) { ++ ret = dht_inode_ctx_mdsvol_get (fd->inode, this, &mds_subvol); ++ } else { ++ ret = dht_inode_ctx_mdsvol_get (loc->inode, this, &mds_subvol); ++ } ++ if (ret || !mds_subvol) { ++ if (fd) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_HASHED_SUBVOL_GET_FAILED, ++ "Failed to get mds subvol for fd %p" ++ "gfid is %s ", fd, gfid_local); ++ } else { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_HASHED_SUBVOL_GET_FAILED, ++ "Failed to get mds subvol for path %s" ++ "gfid is %s ", loc->path, gfid_local); ++ } ++ (*op_errno) = ENOENT; ++ goto err; ++ } ++ ++ local->mds_subvol = mds_subvol; ++ ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (conf->subvolumes[i] == mds_subvol) { ++ if (!conf->subvolume_status[i]) { ++ gf_msg (this->name, GF_LOG_WARNING, ++ 0, DHT_MSG_HASHED_SUBVOL_DOWN, ++ "MDS subvol is down for path " ++ " %s gfid is %s Unable to set xattr " , ++ local->loc.path, gfid_local); ++ (*op_errno) = ENOTCONN; ++ goto err; ++ } ++ } ++ } ++ ++ if (uxattr_key_found) { ++ xattrop = dict_new (); ++ if (!xattrop) { ++ gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ++ 0, "dictionary creation failed for path %s " ++ "for gfid is %s ", local->loc.path, gfid_local); ++ (*op_errno) = ENOMEM; ++ goto err; ++ } ++ local->xattr = dict_ref (xattr); ++ /* Subtract current MDS xattr value to -1 , value of MDS ++ xattr represents no. of times xattr modification failed ++ on non MDS subvols. ++ */ ++ ret = dht_dict_set_array (xattrop, conf->mds_xattr_key, subone, 1); ++ if (ret != 0) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, ++ "dictionary set array failed for path %s " ++ "for gfid is %s ", local->loc.path, gfid_local); ++ if (xattrop) ++ dict_unref (xattrop); ++ (*op_errno) = ret; ++ goto err; ++ } ++ /* Wind a xattrop call to use ref counting approach ++ update mds xattr to -1 before update xattr on ++ hashed subvol and update mds xattr to +1 after update ++ xattr on all non hashed subvol ++ */ ++ if (fd) { ++ STACK_WIND (frame, dht_xattrop_mds_cbk, ++ local->mds_subvol, ++ local->mds_subvol->fops->fxattrop, ++ fd, GF_XATTROP_ADD_ARRAY, xattrop, NULL); ++ } else { ++ STACK_WIND (frame, dht_xattrop_mds_cbk, ++ local->mds_subvol, ++ local->mds_subvol->fops->xattrop, ++ loc, GF_XATTROP_ADD_ARRAY, ++ xattrop, NULL); ++ } ++ if (xattrop) ++ dict_unref (xattrop); ++ } ++ ++ return 0; ++err: ++ return -1; ++} + + + int +@@ -3969,7 +5125,6 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, + dht_layout_t *layout = NULL; + int ret = -1; + int call_cnt = 0; +- int i = 0; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); +@@ -4009,14 +5164,11 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, + local->call_cnt = call_cnt = layout->cnt; + + if (IA_ISDIR (fd->inode->ia_type)) { +- for (i = 0; i < call_cnt; i++) { +- STACK_WIND_COOKIE (frame, dht_err_cbk, +- layout->list[i].xlator, +- layout->list[i].xlator, +- layout->list[i].xlator->fops->fsetxattr, +- fd, xattr, flags, xdata); +- } +- ++ local->hashed_subvol = NULL; ++ ret = dht_dir_common_setxattr (frame, this, NULL, fd, ++ xattr, flags, xdata, &op_errno); ++ if (ret) ++ goto err; + } else { + + local->call_cnt = 1; +@@ -4043,16 +5195,6 @@ err: + return 0; + } + +-static int +-dht_common_setxattr_cbk (call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, int32_t op_errno, +- dict_t *xdata) +-{ +- DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); +- +- return 0; +-} +- + + int + dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +@@ -4190,6 +5332,7 @@ dht_nuke_dir (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp) + return 0; + } + ++ + int + dht_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr, int flags, dict_t *xdata) +@@ -4209,6 +5352,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, + int call_cnt = 0; + uint32_t new_hash = 0; + ++ + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); +@@ -4248,6 +5392,11 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, + } + + local->call_cnt = call_cnt = layout->cnt; ++ tmp = dict_get (xattr, conf->mds_xattr_key); ++ if (tmp) { ++ op_errno = ENOTSUP; ++ goto err; ++ } + + tmp = dict_get (xattr, GF_XATTR_FILE_MIGRATE_KEY); + if (tmp) { +@@ -4423,15 +5572,11 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, + local->xattr_req = xdata ? dict_ref (xdata) : dict_new (); + + if (IA_ISDIR (loc->inode->ia_type)) { +- +- for (i = 0; i < call_cnt; i++) { +- STACK_WIND_COOKIE (frame, dht_err_cbk, +- layout->list[i].xlator, +- layout->list[i].xlator, +- layout->list[i].xlator->fops->setxattr, +- loc, xattr, flags, xdata); +- } +- ++ local->hashed_subvol = NULL; ++ ret = dht_dir_common_setxattr (frame, this, loc, NULL, ++ xattr, flags, xdata, &op_errno); ++ if (ret) ++ goto err; + } else { + + local->rebalance.xattr = dict_ref (xattr); +@@ -4670,6 +5815,12 @@ dht_removexattr (call_frame_t *frame, xlator_t *this, + local->call_cnt = call_cnt = layout->cnt; + local->key = gf_strdup (key); + ++ if (key && ++ (strncmp (key, conf->mds_xattr_key, strlen(key)) == 0)) { ++ op_errno = ENOTSUP; ++ goto err; ++ } ++ + if (IA_ISDIR (loc->inode->ia_type)) { + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE (frame, dht_removexattr_cbk, +@@ -7641,6 +8792,10 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, + dht_iatt_merge (this, &local->postparent, postparent, prev); + + local->call_cnt = conf->subvolume_cnt - 1; ++ /* Delete internal mds xattr from params dict to avoid store ++ internal mds xattr on other subvols ++ */ ++ dict_del (local->params, conf->mds_xattr_key); + + if (gf_uuid_is_null (local->loc.gfid)) + gf_uuid_copy (local->loc.gfid, stbuf->ia_gfid); +@@ -7652,6 +8807,14 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, + &local->loc, layout); + } + ++ /* Set hashed subvol as a mds subvol on inode ctx */ ++ ret = dht_inode_ctx_mdsvol_set (local->inode, this, hashed_subvol); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, ++ "Failed to set hashed subvol for %s on inode vol is %s", ++ local->loc.path, hashed_subvol->name); ++ } ++ + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == hashed_subvol) + continue; +@@ -7661,6 +8824,7 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, + &local->loc, local->mode, local->umask, + local->params); + } ++ + return 0; + err: + if (local->op_ret != 0) { +@@ -7682,9 +8846,13 @@ dht_mkdir_guard_parent_layout_cbk (call_frame_t *frame, xlator_t *this, + dict_t *params) + { + dht_local_t *local = NULL; ++ dht_conf_t *conf = 0; + char pgfid[GF_UUID_BUF_SIZE] = {0}; ++ int ret = -1; ++ int32_t zero[1] = {0}; + + local = frame->local; ++ conf = this->private; + + gf_uuid_unparse (loc->parent->gfid, pgfid); + +@@ -7698,6 +8866,15 @@ dht_mkdir_guard_parent_layout_cbk (call_frame_t *frame, xlator_t *this, + } + + local->op_ret = -1; ++ /* Add internal MDS xattr on disk for hashed subvol ++ */ ++ ret = dht_dict_set_array (params, conf->mds_xattr_key, zero, 1); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, ++ DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value:key = %s for " ++ "path %s", conf->mds_xattr_key, loc->path); ++ } + + STACK_WIND_COOKIE (frame, dht_mkdir_hashed_cbk, local->hashed_subvol, + local->hashed_subvol, +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index 47a2e23..2aa7251 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -28,6 +28,7 @@ + #define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout" + #define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete" + #define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data" ++#define DHT_MDS_STR "mds" + #define GF_DHT_LOOKUP_UNHASHED_ON 1 + #define GF_DHT_LOOKUP_UNHASHED_AUTO 2 + #define DHT_PATHINFO_HEADER "DISTRIBUTE:" +@@ -43,6 +44,12 @@ + #define DHT_DIR_STAT_BLOCKS 8 + #define DHT_DIR_STAT_SIZE 4096 + ++/* Array to hold custom xattr keys ++*/ ++extern char *xattrs_to_heal[]; ++ ++ ++ + #include + + /* Array to hold custom xattr keys +@@ -116,6 +123,7 @@ struct dht_inode_ctx { + dht_layout_t *layout; + dht_stat_time_t time; + xlator_t *lock_subvol; ++ xlator_t *mds_subvol; /* This is only used for directories */ + }; + + typedef struct dht_inode_ctx dht_inode_ctx_t; +@@ -262,6 +270,7 @@ struct dht_local { + /* Use stbuf as the postbuf, when we require both + * pre and post attrs */ + struct iatt stbuf; ++ struct iatt mds_stbuf; + struct iatt prebuf; + struct iatt preoldparent; + struct iatt postoldparent; +@@ -273,6 +282,8 @@ struct dht_local { + inode_t *inode; + dict_t *params; + dict_t *xattr; ++ dict_t *mds_xattr; ++ dict_t *xdata; /* dict used to save xdata response by xattr fop */ + dict_t *xattr_req; + dht_layout_t *layout; + size_t size; +@@ -281,7 +292,9 @@ struct dht_local { + xlator_t *dst_hashed, *dst_cached; + xlator_t *cached_subvol; + xlator_t *hashed_subvol; ++ xlator_t *mds_subvol; /* This is use for dir only */ + char need_selfheal; ++ char need_xattr_heal; + int file_count; + int dir_count; + call_frame_t *main_frame; +@@ -365,6 +378,9 @@ struct dht_local { + + /* fd open check */ + gf_boolean_t fd_checked; ++ /* This is use only for directory operation */ ++ int32_t valid; ++ gf_boolean_t heal_layout; + }; + typedef struct dht_local dht_local_t; + +@@ -651,6 +667,7 @@ struct dht_conf { + + /* Support variable xattr names. */ + char *xattr_name; ++ char *mds_xattr_key; + char *link_xattr_name; + char *commithash_xattr_name; + char *wild_xattr_name; +@@ -1333,9 +1350,6 @@ dht_normalize_stats (struct statvfs *buf, unsigned long bsize, + int + add_opt(char **optsp, const char *opt); + +-char * +-getChoices (const char *value); +- + int + dht_aggregate_split_brain_xattr (dict_t *dst, char *key, data_t *value); + +@@ -1345,18 +1359,12 @@ dht_remove_stale_linkto (void *data); + int + dht_remove_stale_linkto_cbk (int ret, call_frame_t *sync_frame, void *data); + +- + int + dht_fd_ctx_set (xlator_t *this, fd_t *fd, xlator_t *subvol); + + int + dht_check_and_open_fd_on_subvol (xlator_t *this, call_frame_t *frame); + +- +- +- +- +- + /* FD fop callbacks */ + + int +@@ -1409,12 +1417,10 @@ int + dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata); + +- + int + dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata); + +- + int + dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata); +@@ -1426,4 +1432,48 @@ int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req); + + int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata, + struct iatt *stbuf); ++ ++/* All custom xattr heal functions */ ++int ++dht_dir_heal_xattrs (void *data); ++ ++int ++dht_dir_heal_xattrs_done (int ret, call_frame_t *sync_frame, void *data); ++ ++void ++dht_aggregate_xattr (dict_t *dst, dict_t *src); ++ ++int32_t ++dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size); ++ ++int ++dht_set_user_xattr (dict_t *dict, char *k, data_t *v, void *data); ++ ++void ++dht_dir_set_heal_xattr (xlator_t *this, dht_local_t *local, dict_t *dst, ++ dict_t *src, int *uret, int *uflag); ++ ++int ++dht_dir_xattr_heal (xlator_t *this, dht_local_t *local); ++ ++int32_t ++dht_dict_get_array (dict_t *dict, char *key, int32_t value[], int32_t size, int *errst); ++ ++xlator_t * ++dht_inode_get_hashed_subvol (inode_t *inode, xlator_t *this, loc_t *loc); ++ ++int ++dht_mark_mds_subvolume (call_frame_t *frame, xlator_t *this); ++ ++int ++dht_mds_internal_setxattr_cbk (call_frame_t *frame, void *cookie, ++ xlator_t *this, int op_ret, int op_errno, ++ dict_t *xdata); ++int ++dht_inode_ctx_mdsvol_set (inode_t *inode, xlator_t *this, ++ xlator_t *mds_subvol); ++int ++dht_inode_ctx_mdsvol_get (inode_t *inode, xlator_t *this, ++ xlator_t **mdsvol); ++ + #endif/* _DHT_H */ +diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c +index e56a085..6e20aea 100644 +--- a/xlators/cluster/dht/src/dht-helper.c ++++ b/xlators/cluster/dht/src/dht-helper.c +@@ -767,6 +767,10 @@ dht_local_wipe (xlator_t *this, dht_local_t *local) + + if (local->xattr_req) + dict_unref (local->xattr_req); ++ if (local->mds_xattr) ++ dict_unref (local->mds_xattr); ++ if (local->xdata) ++ dict_unref (local->xdata); + + if (local->selfheal.layout) { + dht_layout_unref (this, local->selfheal.layout); +@@ -2085,12 +2089,24 @@ dht_heal_full_path_done (int op_ret, call_frame_t *heal_frame, void *data) + + call_frame_t *main_frame = NULL; + dht_local_t *local = NULL; ++ xlator_t *this = NULL; ++ int ret = -1; + + local = heal_frame->local; + main_frame = local->main_frame; + local->main_frame = NULL; ++ this = heal_frame->this; + + dht_set_fixed_dir_stat (&local->postparent); ++ if (local->need_xattr_heal) { ++ local->need_xattr_heal = 0; ++ ret = dht_dir_xattr_heal (this, local); ++ if (ret) ++ gf_msg (this->name, GF_LOG_ERROR, ret, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "xattr heal failed for directory %s ", ++ local->loc.path); ++ } + + DHT_STACK_UNWIND (lookup, main_frame, 0, 0, + local->inode, &local->stbuf, local->xattr, +@@ -2254,3 +2270,52 @@ dht_lk_inode_unref (call_frame_t *frame, int32_t op_ret) + out: + return ret; + } ++ ++/* Code to update custom extended attributes from src dict to dst dict ++*/ ++void ++dht_dir_set_heal_xattr (xlator_t *this, dht_local_t *local, dict_t *dst, ++ dict_t *src, int *uret, int *uflag) ++{ ++ int ret = -1; ++ data_t *keyval = NULL; ++ int luret = -1; ++ int luflag = -1; ++ int i = 0; ++ ++ if (!src || !dst) { ++ gf_msg (this->name, GF_LOG_WARNING, EINVAL, ++ DHT_MSG_DICT_SET_FAILED, ++ "src or dst is NULL. Failed to set " ++ " dictionary value for path %s", ++ local->loc.path); ++ return; ++ } ++ /* Check if any user xattr present in src dict and set ++ it to dst dict ++ */ ++ luret = dict_foreach_fnmatch (src, "user.*", ++ dht_set_user_xattr, dst); ++ /* Check if any other custom xattr present in src dict ++ and set it to dst dict, here index start from 1 because ++ user xattr already checked in previous statement ++ */ ++ for (i = 1; xattrs_to_heal[i]; i++) { ++ keyval = dict_get (src, xattrs_to_heal[i]); ++ if (keyval) { ++ luflag = 1; ++ ret = dict_set (dst, xattrs_to_heal[i], keyval); ++ if (ret) ++ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, ++ DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value:key = %s for " ++ "path %s", xattrs_to_heal[i], ++ local->loc.path); ++ keyval = NULL; ++ } ++ } ++ if (uret) ++ (*uret) = luret; ++ if (uflag) ++ (*uflag) = luflag; ++} +diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c +index 9709acf..7c596b1 100644 +--- a/xlators/cluster/dht/src/dht-inode-write.c ++++ b/xlators/cluster/dht/src/dht-inode-write.c +@@ -1161,6 +1161,7 @@ dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + dht_iatt_merge (this, &local->stbuf, statpost, prev); + + local->op_ret = 0; ++ local->op_errno = 0; + } + unlock: + UNLOCK (&frame->lock); +@@ -1178,16 +1179,117 @@ unlock: + } + + ++/* Keep the existing code same for all the cases other than regular file */ ++int ++dht_non_mds_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, struct iatt *statpre, ++ struct iatt *statpost, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ int this_call_cnt = 0; ++ xlator_t *prev = NULL; ++ ++ ++ local = frame->local; ++ prev = cookie; ++ ++ LOCK (&frame->lock); ++ { ++ if (op_ret == -1) { ++ gf_msg (this->name, op_errno, 0, ++ 0, "subvolume %s returned -1", ++ prev->name); ++ ++ goto unlock; ++ } ++ ++ dht_iatt_merge (this, &local->prebuf, statpre, prev); ++ dht_iatt_merge (this, &local->stbuf, statpost, prev); ++ ++ local->op_ret = 0; ++ local->op_errno = 0; ++ } ++unlock: ++ UNLOCK (&frame->lock); ++ ++ this_call_cnt = dht_frame_return (frame); ++ if (is_last_call (this_call_cnt)) { ++ dht_inode_ctx_time_set (local->loc.inode, this, &local->stbuf); ++ DHT_STACK_UNWIND (setattr, frame, 0, 0, ++ &local->prebuf, &local->stbuf, xdata); ++ } ++ ++ return 0; ++} ++ ++ ++ ++ ++ ++int ++dht_mds_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, struct iatt *statpre, ++ struct iatt *statpost, dict_t *xdata) ++ ++{ ++ dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; ++ xlator_t *prev = NULL; ++ xlator_t *mds_subvol = NULL; ++ struct iatt loc_stbuf = {0,}; ++ int i = 0; ++ ++ local = frame->local; ++ prev = cookie; ++ conf = this->private; ++ mds_subvol = local->mds_subvol; ++ ++ if (op_ret == -1) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ gf_msg_debug (this->name, op_errno, ++ "subvolume %s returned -1", ++ prev->name); ++ goto out; ++ } ++ ++ local->op_ret = 0; ++ loc_stbuf = local->stbuf; ++ dht_iatt_merge (this, &local->prebuf, statpre, prev); ++ dht_iatt_merge (this, &local->stbuf, statpost, prev); ++ ++ local->call_cnt = conf->subvolume_cnt - 1; ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (mds_subvol == conf->subvolumes[i]) ++ continue; ++ STACK_WIND_COOKIE (frame, dht_non_mds_setattr_cbk, ++ conf->subvolumes[i], conf->subvolumes[i], ++ conf->subvolumes[i]->fops->setattr, ++ &local->loc, &loc_stbuf, ++ local->valid, local->xattr_req); ++ } ++ ++ return 0; ++out: ++ DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->stbuf, xdata); ++ ++ return 0; ++} ++ + int + dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) + { +- xlator_t *subvol = NULL; +- dht_layout_t *layout = NULL; +- dht_local_t *local = NULL; +- int op_errno = -1; +- int i = -1; +- int call_cnt = 0; ++ xlator_t *subvol = NULL; ++ xlator_t *mds_subvol = NULL; ++ dht_layout_t *layout = NULL; ++ dht_local_t *local = NULL; ++ int op_errno = -1; ++ int i = -1; ++ int ret = -1; ++ int call_cnt = 0; ++ dht_conf_t *conf = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); +@@ -1195,6 +1297,7 @@ dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + ++ conf = this->private; + local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR); + if (!local) { + op_errno = ENOMEM; +@@ -1235,12 +1338,50 @@ dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + + local->call_cnt = call_cnt = layout->cnt; + +- for (i = 0; i < call_cnt; i++) { +- STACK_WIND_COOKIE (frame, dht_setattr_cbk, +- layout->list[i].xlator, +- layout->list[i].xlator, +- layout->list[i].xlator->fops->setattr, ++ if (IA_ISDIR (loc->inode->ia_type) && ++ !__is_root_gfid (loc->inode->gfid) && call_cnt != 1) { ++ ret = dht_inode_ctx_mdsvol_get (loc->inode, this, &mds_subvol); ++ if (ret || !mds_subvol) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_HASHED_SUBVOL_GET_FAILED, ++ "Failed to get mds subvol for path %s", ++ local->loc.path); ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ local->mds_subvol = mds_subvol; ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (conf->subvolumes[i] == mds_subvol) { ++ if (!conf->subvolume_status[i]) { ++ gf_msg (this->name, GF_LOG_WARNING, ++ layout->list[i].err, ++ DHT_MSG_HASHED_SUBVOL_DOWN, ++ "MDS subvol is down for path " ++ " %s Unable to set attr " , ++ local->loc.path); ++ op_errno = ENOTCONN; ++ goto err; ++ } ++ } ++ } ++ local->valid = valid; ++ local->stbuf = *stbuf; ++ ++ STACK_WIND_COOKIE (frame, dht_mds_setattr_cbk, ++ local->mds_subvol, ++ local->mds_subvol, ++ local->mds_subvol->fops->setattr, + loc, stbuf, valid, xdata); ++ return 0; ++ } else { ++ for (i = 0; i < call_cnt; i++) { ++ STACK_WIND_COOKIE (frame, dht_setattr_cbk, ++ layout->list[i].xlator, ++ layout->list[i].xlator, ++ layout->list[i].xlator->fops->setattr, ++ loc, stbuf, valid, xdata); ++ } + } + + return 0; +diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h +index dcfd747..ade32e4 100644 +--- a/xlators/cluster/dht/src/dht-messages.h ++++ b/xlators/cluster/dht/src/dht-messages.h +@@ -40,7 +40,7 @@ + */ + + #define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT +-#define GLFS_DHT_NUM_MESSAGES 126 ++#define GLFS_DHT_NUM_MESSAGES 129 + #define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1) + + /* Messages with message IDs */ +@@ -1083,6 +1083,7 @@ + * @diagnosis + * @recommendedaction None + */ ++ + #define DHT_MSG_DIR_LOOKUP_FAILED (GLFS_DHT_BASE + 118) + + /* +@@ -1111,6 +1112,7 @@ + * @diagnosis + * @recommendedaction None + */ ++ + #define DHT_MSG_ENTRYLK_ERROR (GLFS_DHT_BASE + 122) + + /* +@@ -1132,7 +1134,7 @@ + * @diagnosis + * @recommendedaction None + */ +-#define DHT_MSG_UNKNOWN_FOP (GLFS_DHT_BASE + 125) ++#define DHT_MSG_UNKNOWN_FOP (GLFS_DHT_BASE + 125) + + /* + * @messageid 109126 +@@ -1141,5 +1143,27 @@ + */ + #define DHT_MSG_MIGRATE_FILE_SKIPPED (GLFS_DHT_BASE + 126) + ++/* ++ * @messageid 109127 ++ * @diagnosis ++ * @recommendedaction None ++ */ ++#define DHT_MSG_DIR_XATTR_HEAL_FAILED (GLFS_DHT_BASE + 127) ++ ++/* ++ * @messageid 109128 ++ * @diagnosis ++ * @recommendedaction None ++ */ ++#define DHT_MSG_HASHED_SUBVOL_DOWN (GLFS_DHT_BASE + 128) ++ ++/* ++ * @messageid 109129 ++ * @diagnosis ++ * @recommendedaction None ++ */ ++#define DHT_MSG_NON_HASHED_SUBVOL_DOWN (GLFS_DHT_BASE + 129) ++ ++ + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" + #endif /* _DHT_MESSAGES_H_ */ +diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c +index 3b9fcf1..328251d 100644 +--- a/xlators/cluster/dht/src/dht-selfheal.c ++++ b/xlators/cluster/dht/src/dht-selfheal.c +@@ -703,6 +703,18 @@ dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + return 0; + } + ++/* Code is required to set user xattr to local->xattr ++*/ ++int ++dht_set_user_xattr (dict_t *dict, char *k, data_t *v, void *data) ++{ ++ dict_t *set_xattr = data; ++ int ret = -1; ++ ++ ret = dict_set (set_xattr, k, v); ++ return ret; ++} ++ + + int + dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc, +@@ -830,7 +842,6 @@ dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc, + err: + if (xattr) + dict_unref (xattr); +- + if (xdata) + dict_unref (xdata); + +@@ -1128,6 +1139,14 @@ dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { ++ if (!local->heal_layout) { ++ gf_msg_trace (this->name, 0, ++ "Skip heal layout for %s gfid = %s ", ++ local->loc.path, uuid_utoa(local->gfid)); ++ ++ dht_selfheal_dir_finish (frame, this, 0, 1); ++ return 0; ++ } + ret = dht_selfheal_layout_lock (frame, layout, _gf_false, + dht_selfheal_dir_xattr, + dht_should_heal_layout); +@@ -1140,6 +1159,141 @@ dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + return 0; + } + ++int ++dht_selfheal_dir_check_set_mdsxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *prev = cookie; ++ int ret = -1; ++ dht_conf_t *conf = 0; ++ ++ GF_VALIDATE_OR_GOTO (this->name, frame, out); ++ GF_VALIDATE_OR_GOTO (this->name, frame->local, out); ++ ++ local = frame->local; ++ conf = this->private; ++ ++ if (op_ret) { ++ gf_msg_debug (this->name, op_ret, ++ "internal mds setxattr %s is failed on mds subvol " ++ "at the time of heal on path %s " , ++ conf->mds_xattr_key, local->loc.path); ++ } else { ++ /* Save mds subvol on inode ctx */ ++ ret = dht_inode_ctx_mdsvol_set (local->inode, this, prev); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_SET_INODE_CTX_FAILED, ++ "Failed to set hashed subvol " ++ " %s for %s ", prev->name, ++ local->loc.path); ++ } ++ } ++ ++out: ++ DHT_STACK_DESTROY (frame); ++ return 0; ++} ++ ++/* Code to set internal mds xattr if it is not present ++*/ ++int ++dht_selfheal_dir_check_set_mdsxattr (call_frame_t *frame, loc_t *loc) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *this = NULL; ++ xlator_t *hashed_subvol = NULL; ++ int ret = -1; ++ dict_t *xattrs = NULL; ++ char gfid_local[GF_UUID_BUF_SIZE] = {0,}; ++ int32_t zero[1] = {0}; ++ call_frame_t *xattr_frame = NULL; ++ dht_local_t *copy_local = NULL; ++ dht_conf_t *conf = 0; ++ ++ local = frame->local; ++ this = frame->this; ++ conf = this->private; ++ gf_uuid_unparse(local->gfid, gfid_local); ++ ++ if (!dict_get (local->xattr, conf->mds_xattr_key)) { ++ /* It means no internal MDS xattr has been set yet ++ */ ++ /* Calculate hashed subvol based on inode and ++ parent inode ++ */ ++ hashed_subvol = dht_inode_get_hashed_subvol (local->inode, this, ++ loc); ++ if (!hashed_subvol) { ++ gf_msg (this->name, GF_LOG_DEBUG, 0, ++ DHT_MSG_HASHED_SUBVOL_GET_FAILED, ++ "Failed to get hashed subvol for path %s" ++ "gfid is %s ", ++ local->loc.path, gfid_local); ++ ret = -1; ++ goto out; ++ } else { ++ /* Set internal mds xattr on disk */ ++ xattrs = dict_new (); ++ if (!xattrs) { ++ gf_msg (this->name, GF_LOG_ERROR, ENOMEM, ++ DHT_MSG_NO_MEMORY, "dict_new failed"); ++ ret = -1; ++ goto out; ++ } ++ /* Add internal MDS xattr on disk for hashed subvol ++ */ ++ ret = dht_dict_set_array (xattrs, conf->mds_xattr_key, zero, 1); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, ++ DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary" ++ " value:key = %s for " ++ "path %s", conf->mds_xattr_key, ++ local->loc.path); ++ ret = -1; ++ goto out; ++ } ++ ++ xattr_frame = create_frame (this, this->ctx->pool); ++ if (!xattr_frame) { ++ ret = -1; ++ goto out; ++ } ++ copy_local = dht_local_init (xattr_frame, &(local->loc), ++ NULL, 0); ++ if (!copy_local) { ++ ret = -1; ++ DHT_STACK_DESTROY (xattr_frame); ++ goto out; ++ } ++ ++ copy_local->stbuf = local->stbuf; ++ copy_local->inode = inode_ref (local->inode); ++ gf_uuid_copy (copy_local->loc.gfid, local->gfid); ++ ++ STACK_WIND_COOKIE (xattr_frame, ++ dht_selfheal_dir_check_set_mdsxattr_cbk, ++ (void *)hashed_subvol, hashed_subvol, ++ hashed_subvol->fops->setxattr, ++ loc, xattrs, 0, NULL); ++ ret = 0; ++ } ++ } else { ++ ret = 0; ++ gf_msg_debug (this->name, 0, ++ "internal xattr %s is present on subvol" ++ "on path %s gfid is %s " , conf->mds_xattr_key, ++ local->loc.path, gfid_local); ++ } ++ ++out: ++ if (xattrs) ++ dict_unref (xattrs); ++ return ret; ++} ++ + + int + dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf, +@@ -1159,7 +1313,40 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf, + missing_attr++; + } + ++ if (!__is_root_gfid (local->stbuf.ia_gfid)) { ++ if (local->need_xattr_heal) { ++ local->need_xattr_heal = 0; ++ ret = dht_dir_xattr_heal (this, local); ++ if (ret) ++ gf_msg (this->name, GF_LOG_ERROR, ++ ret, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "xattr heal failed for " ++ "directory %s gfid %s ", ++ local->loc.path, ++ local->gfid); ++ } else { ++ ret = dht_selfheal_dir_check_set_mdsxattr (frame, loc); ++ if (ret) ++ gf_msg (this->name, GF_LOG_INFO, ret, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "set mds internal xattr failed for " ++ "directory %s gfid %s ", local->loc.path, ++ local->gfid); ++ } ++ } ++ ++ if (!gf_uuid_is_null (local->gfid)) ++ gf_uuid_copy (loc->gfid, local->gfid); ++ + if (missing_attr == 0) { ++ if (!local->heal_layout) { ++ gf_msg_trace (this->name, 0, ++ "Skip heal layout for %s gfid = %s ", ++ loc->path, uuid_utoa(loc->gfid)); ++ dht_selfheal_dir_finish (frame, this, 0, 1); ++ return 0; ++ } + ret = dht_selfheal_layout_lock (frame, layout, _gf_false, + dht_selfheal_dir_xattr, + dht_should_heal_layout); +@@ -1171,11 +1358,9 @@ dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf, + return 0; + } + +- if (!gf_uuid_is_null (local->gfid)) +- gf_uuid_copy (loc->gfid, local->gfid); +- + local->call_cnt = missing_attr; + cnt = layout->cnt; ++ + for (i = 0; i < cnt; i++) { + if (layout->list[i].err == -1) { + gf_msg_trace (this->name, 0, +@@ -1291,16 +1476,66 @@ out: + return; + } + ++ ++void ++dht_selfheal_dir_mkdir_setquota (dict_t *src, dict_t *dst) ++{ ++ data_t *quota_limit_key = NULL; ++ data_t *quota_limit_obj_key = NULL; ++ xlator_t *this = NULL; ++ int ret = -1; ++ ++ GF_ASSERT (src); ++ GF_ASSERT (dst); ++ ++ this = THIS; ++ GF_ASSERT (this); ++ ++ quota_limit_key = dict_get (src, QUOTA_LIMIT_KEY); ++ if (!quota_limit_key) { ++ gf_msg_debug (this->name, 0, ++ "QUOTA_LIMIT_KEY xattr not present"); ++ goto cont; ++ } ++ ret = dict_set(dst, QUOTA_LIMIT_KEY, quota_limit_key); ++ if (ret) ++ gf_msg (this->name, GF_LOG_WARNING, 0, ++ DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value.key = %s", ++ QUOTA_LIMIT_KEY); ++ ++cont: ++ quota_limit_obj_key = dict_get (src, QUOTA_LIMIT_OBJECTS_KEY); ++ if (!quota_limit_obj_key) { ++ gf_msg_debug (this->name, 0, ++ "QUOTA_LIMIT_OBJECTS_KEY xattr not present"); ++ goto out; ++ } ++ ret = dict_set (dst, QUOTA_LIMIT_OBJECTS_KEY, quota_limit_obj_key); ++ if (ret) ++ gf_msg (this->name, GF_LOG_WARNING, 0, ++ DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value.key = %s", ++ QUOTA_LIMIT_OBJECTS_KEY); ++ ++out: ++ return; ++} ++ ++ ++ ++ ++ + int + dht_selfheal_dir_mkdir_lookup_done (call_frame_t *frame, xlator_t *this) + { + dht_local_t *local = NULL; + int i = 0; +- int ret = -1; + dict_t *dict = NULL; + dht_layout_t *layout = NULL; + loc_t *loc = NULL; + int cnt = 0; ++ int ret = -1; + + VALIDATE_OR_GOTO (this->private, err); + +@@ -1324,9 +1559,11 @@ dht_selfheal_dir_mkdir_lookup_done (call_frame_t *frame, xlator_t *this) + + dict = dict_ref (local->params); + } +- /* Set acls */ +- if (local->xattr && dict) +- dht_selfheal_dir_mkdir_setacl (local->xattr, dict); ++ /* Code to update all extended attributed from local->xattr ++ to dict ++ */ ++ dht_dir_set_heal_xattr (this, local, dict, local->xattr, NULL, ++ NULL); + + if (!dict) + gf_msg (this->name, GF_LOG_WARNING, 0, +@@ -1374,8 +1611,13 @@ dht_selfheal_dir_mkdir_lookup_cbk (call_frame_t *frame, void *cookie, + int this_call_cnt = 0; + int missing_dirs = 0; + dht_layout_t *layout = NULL; ++ dht_conf_t *conf = 0; + loc_t *loc = NULL; + xlator_t *prev = NULL; ++ int check_mds = 0; ++ int errst = 0; ++ int32_t mds_xattr_val[1] = {0}; ++ char gfid_local[GF_UUID_BUF_SIZE] = {0}; + + VALIDATE_OR_GOTO (this->private, err); + +@@ -1383,6 +1625,10 @@ dht_selfheal_dir_mkdir_lookup_cbk (call_frame_t *frame, void *cookie, + layout = local->layout; + loc = &local->loc; + prev = cookie; ++ conf = this->private; ++ ++ if (local->gfid) ++ gf_uuid_unparse(local->gfid, gfid_local); + + this_call_cnt = dht_frame_return (frame); + +@@ -1397,6 +1643,12 @@ dht_selfheal_dir_mkdir_lookup_cbk (call_frame_t *frame, void *cookie, + if (!op_ret) { + dht_iatt_merge (this, &local->stbuf, stbuf, prev); + } ++ check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key, ++ mds_xattr_val, 1, &errst); ++ if (dict_get (xattr, conf->mds_xattr_key) && check_mds && !errst) { ++ dict_unref (local->xattr); ++ local->xattr = dict_ref (xattr); ++ } + + } + UNLOCK (&frame->lock); +@@ -1445,13 +1697,16 @@ dht_selfheal_dir_mkdir_lock_cbk (call_frame_t *frame, void *cookie, + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int i = 0; ++ int ret = -1; ++ xlator_t *mds_subvol = NULL; + + VALIDATE_OR_GOTO (this->private, err); + + conf = this->private; + local = frame->local; ++ mds_subvol = local->mds_subvol; + +- local->call_cnt = conf->subvolume_cnt; ++ local->call_cnt = conf->subvolume_cnt; + + if (op_ret < 0) { + +@@ -1477,12 +1732,32 @@ dht_selfheal_dir_mkdir_lock_cbk (call_frame_t *frame, void *cookie, + /* After getting locks, perform lookup again to ensure that the + directory was not deleted by a racing rmdir + */ ++ if (!local->xattr_req) ++ local->xattr_req = dict_new (); ++ ++ ret = dict_set_int32 (local->xattr_req, "list-xattr", 1); ++ if (ret) ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary key list-xattr value " ++ " for path %s ", local->loc.path); + + for (i = 0; i < conf->subvolume_cnt; i++) { +- STACK_WIND_COOKIE (frame, dht_selfheal_dir_mkdir_lookup_cbk, +- conf->subvolumes[i], conf->subvolumes[i], +- conf->subvolumes[i]->fops->lookup, +- &local->loc, NULL); ++ if (mds_subvol && conf->subvolumes[i] == mds_subvol) { ++ STACK_WIND_COOKIE (frame, ++ dht_selfheal_dir_mkdir_lookup_cbk, ++ conf->subvolumes[i], ++ conf->subvolumes[i], ++ conf->subvolumes[i]->fops->lookup, ++ &local->loc, local->xattr_req); ++ } else { ++ STACK_WIND_COOKIE (frame, ++ dht_selfheal_dir_mkdir_lookup_cbk, ++ conf->subvolumes[i], ++ conf->subvolumes[i], ++ conf->subvolumes[i]->fops->lookup, ++ &local->loc, NULL); ++ } + } + + return 0; +@@ -2171,15 +2446,16 @@ dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + } + + dht_layout_sort_volname (layout); ++ local->heal_layout = _gf_true; + ret = dht_selfheal_dir_getafix (frame, loc, layout); + + if (ret == -1) { +- gf_msg (this->name, GF_LOG_WARNING, 0, ++ gf_msg (this->name, GF_LOG_INFO, 0, + DHT_MSG_DIR_SELFHEAL_FAILED, + "Directory selfheal failed: " + "Unable to form layout for directory %s", + loc->path); +- goto sorry_no_fix; ++ local->heal_layout = _gf_false; + } + + dht_selfheal_dir_mkdir (frame, loc, layout, 0); +@@ -2281,23 +2557,196 @@ dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + } + + int ++dht_dir_heal_xattrs (void *data) ++{ ++ call_frame_t *frame = NULL; ++ dht_local_t *local = NULL; ++ xlator_t *subvol = NULL; ++ xlator_t *mds_subvol = NULL; ++ xlator_t *this = NULL; ++ dht_conf_t *conf = NULL; ++ dict_t *user_xattr = NULL; ++ dict_t *internal_xattr = NULL; ++ dict_t *mds_xattr = NULL; ++ dict_t *xdata = NULL; ++ int call_cnt = 0; ++ int ret = -1; ++ int uret = 0; ++ int uflag = 0; ++ int i = 0; ++ int xattr_hashed = 0; ++ char gfid[GF_UUID_BUF_SIZE] = {0}; ++ int32_t allzero[1] = {0}; ++ ++ GF_VALIDATE_OR_GOTO ("dht", data, out); ++ ++ frame = data; ++ local = frame->local; ++ this = frame->this; ++ GF_VALIDATE_OR_GOTO ("dht", this, out); ++ GF_VALIDATE_OR_GOTO (this->name, local, out); ++ mds_subvol = local->mds_subvol; ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO (this->name, conf, out); ++ gf_uuid_unparse(local->loc.gfid, gfid); ++ ++ if (!mds_subvol) { ++ gf_msg (this->name, GF_LOG_WARNING, 0, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "No mds subvol for %s gfid = %s", ++ local->loc.path, gfid); ++ goto out; ++ } ++ ++ if ((local->loc.inode && gf_uuid_is_null (local->loc.inode->gfid)) || ++ gf_uuid_is_null (local->loc.gfid)) { ++ gf_msg (this->name, GF_LOG_WARNING, 0, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "No gfid present so skip heal for path %s gfid = %s", ++ local->loc.path, gfid); ++ goto out; ++ } ++ ++ internal_xattr = dict_new (); ++ if (!internal_xattr) { ++ gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, ++ "dictionary creation failed"); ++ goto out; ++ } ++ xdata = dict_new (); ++ if (!xdata) { ++ gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, ++ "dictionary creation failed"); ++ goto out; ++ } ++ ++ call_cnt = conf->subvolume_cnt; ++ ++ user_xattr = dict_new (); ++ if (!user_xattr) { ++ gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0, ++ "dictionary creation failed"); ++ goto out; ++ } ++ ++ ret = syncop_listxattr (local->mds_subvol, &local->loc, ++ &mds_xattr, NULL, NULL); ++ if (ret < 0) { ++ gf_msg (this->name, GF_LOG_ERROR, -ret, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "failed to list xattrs for " ++ "%s: on %s ", ++ local->loc.path, local->mds_subvol->name); ++ } ++ ++ if (!mds_xattr) ++ goto out; ++ ++ dht_dir_set_heal_xattr (this, local, user_xattr, mds_xattr, ++ &uret, &uflag); ++ ++ /* To set quota related xattr need to set GLUSTERFS_INTERNAL_FOP_KEY ++ * key value to 1 ++ */ ++ if (dict_get (user_xattr, QUOTA_LIMIT_KEY) || ++ dict_get (user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) { ++ ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value: key = %s," ++ " path = %s", GLUSTERFS_INTERNAL_FOP_KEY, ++ local->loc.path); ++ goto out; ++ } ++ } ++ if (uret <= 0 && !uflag) ++ goto out; ++ ++ for (i = 0; i < call_cnt; i++) { ++ subvol = conf->subvolumes[i]; ++ if (subvol == mds_subvol) ++ continue; ++ if (uret || uflag) { ++ ret = syncop_setxattr (subvol, &local->loc, user_xattr, ++ 0, xdata, NULL); ++ if (ret) { ++ xattr_hashed = 1; ++ gf_msg (this->name, GF_LOG_ERROR, -ret, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "Directory xattr heal failed. Failed to set" ++ "user xattr on path %s on " ++ "subvol %s, gfid = %s ", ++ local->loc.path, subvol->name, gfid); ++ } ++ } ++ } ++ /* After heal all custom xattr reset internal MDS xattr to 0 */ ++ if (!xattr_hashed) { ++ ret = dht_dict_set_array (internal_xattr, ++ conf->mds_xattr_key, ++ allzero, 1); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, ENOMEM, ++ DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value:key = %s for " ++ "path %s", conf->mds_xattr_key, ++ local->loc.path); ++ goto out; ++ } ++ ret = syncop_setxattr (mds_subvol, &local->loc, internal_xattr, ++ 0, NULL, NULL); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, -ret, ++ DHT_MSG_DIR_XATTR_HEAL_FAILED, ++ "Failed to reset internal xattr " ++ "on path %s on subvol %s" ++ "gfid = %s ", local->loc.path, ++ mds_subvol->name, gfid); ++ } ++ } ++ ++out: ++ if (user_xattr) ++ dict_unref (user_xattr); ++ if (mds_xattr) ++ dict_unref (mds_xattr); ++ if (internal_xattr) ++ dict_unref (internal_xattr); ++ if (xdata) ++ dict_unref (xdata); ++ return 0; ++} ++ ++ ++int ++dht_dir_heal_xattrs_done (int ret, call_frame_t *sync_frame, void *data) ++{ ++ DHT_STACK_DESTROY (sync_frame); ++ return 0; ++} ++ ++ ++int + dht_dir_attr_heal (void *data) + { +- call_frame_t *frame = NULL; +- dht_local_t *local = NULL; +- xlator_t *subvol = NULL; +- xlator_t *this = NULL; ++ call_frame_t *frame = NULL; ++ dht_local_t *local = NULL; ++ xlator_t *subvol = NULL; ++ xlator_t *mds_subvol = NULL; ++ xlator_t *this = NULL; + dht_conf_t *conf = NULL; + int call_cnt = 0; + int ret = -1; + int i = 0; +- char gfid[GF_UUID_BUF_SIZE] = {0}; ++ char gfid[GF_UUID_BUF_SIZE] = {0}; + + + GF_VALIDATE_OR_GOTO ("dht", data, out); + + frame = data; + local = frame->local; ++ mds_subvol = local->mds_subvol; + this = frame->this; + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", local, out); +@@ -2306,17 +2755,39 @@ dht_dir_attr_heal (void *data) + + call_cnt = conf->subvolume_cnt; + ++ if (!__is_root_gfid (local->stbuf.ia_gfid) && (!mds_subvol)) { ++ gf_msg (this->name, GF_LOG_WARNING, 0, ++ DHT_MSG_DIR_ATTR_HEAL_FAILED, ++ "No mds subvol for %s gfid = %s", ++ local->loc.path, gfid); ++ goto out; ++ } ++ ++ if (!__is_root_gfid (local->stbuf.ia_gfid)) { ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (conf->subvolumes[i] == mds_subvol) { ++ if (!conf->subvolume_status[i]) { ++ gf_msg (this->name, GF_LOG_ERROR, ++ 0, DHT_MSG_HASHED_SUBVOL_DOWN, ++ "mds subvol is down for path " ++ " %s gfid is %s Unable to set xattr " , ++ local->loc.path, gfid); ++ goto out; ++ } ++ } ++ } ++ } ++ + for (i = 0; i < call_cnt; i++) { + subvol = conf->subvolumes[i]; +- if (!subvol) ++ if (!subvol || subvol == mds_subvol) + continue; +- + if (__is_root_gfid (local->stbuf.ia_gfid)) { + ret = syncop_setattr (subvol, &local->loc, &local->stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), + NULL, NULL, NULL, NULL); + } else { +- ret = syncop_setattr (subvol, &local->loc, &local->stbuf, ++ ret = syncop_setattr (subvol, &local->loc, &local->mds_stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID), + NULL, NULL, NULL, NULL); + } +@@ -2324,7 +2795,7 @@ dht_dir_attr_heal (void *data) + if (ret) { + gf_uuid_unparse(local->loc.gfid, gfid); + +- gf_msg ("dht", GF_LOG_ERROR, -ret, ++ gf_msg (this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_ATTR_HEAL_FAILED, + "Directory attr heal failed. Failed to set" + " uid/gid on path %s on subvol %s, gfid = %s ", +diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c +index 0373ebf..42daff0 100644 +--- a/xlators/cluster/dht/src/dht-shared.c ++++ b/xlators/cluster/dht/src/dht-shared.c +@@ -868,6 +868,7 @@ dht_init (xlator_t *this) + } + + GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err); ++ gf_asprintf (&conf->mds_xattr_key, "%s."DHT_MDS_STR, conf->xattr_name); + gf_asprintf (&conf->link_xattr_name, "%s."DHT_LINKFILE_STR, + conf->xattr_name); + gf_asprintf (&conf->commithash_xattr_name, "%s."DHT_COMMITHASH_STR, +@@ -917,6 +918,7 @@ err: + GF_FREE (conf->xattr_name); + GF_FREE (conf->link_xattr_name); + GF_FREE (conf->wild_xattr_name); ++ GF_FREE (conf->mds_xattr_key); + + if (conf->lock_pool) + mem_pool_destroy (conf->lock_pool); +-- +1.8.3.1 + diff --git a/0206-glusterd-honour-localtime-logging-for-all-the-daemon.patch b/0206-glusterd-honour-localtime-logging-for-all-the-daemon.patch new file mode 100644 index 0000000..e46ec1d --- /dev/null +++ b/0206-glusterd-honour-localtime-logging-for-all-the-daemon.patch @@ -0,0 +1,123 @@ +From ed84bfec039d2f3d63902dfe3bade2fe6eb6c31e Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Tue, 3 Apr 2018 21:28:37 +0530 +Subject: [PATCH 206/212] glusterd: honour localtime-logging for all the + daemons + +>upstream patch : https://review.gluster.org/#/c/19814/ + +>Change-Id: I97a70d29365b0a454241ac5f5cae56d93eefd73a +>Fixes: bz#1563334 +>Signed-off-by: Atin Mukherjee + +Change-Id: I97a70d29365b0a454241ac5f5cae56d93eefd73a +BUG: 958062 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/134447 +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 6 ++++++ + xlators/mgmt/glusterd/src/glusterd-snapd-svc.c | 6 ++++++ + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c | 6 ++++++ + xlators/mgmt/glusterd/src/glusterd-tierd-svc.c | 6 ++++++ + 4 files changed, 24 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index 76191c4..848e689 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -196,6 +196,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, + char volname[PATH_MAX] = {0,}; + char valgrind_logfile[PATH_MAX] = {0,}; + char *volfileserver = NULL; ++ char *localtime_logging = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); +@@ -312,6 +313,11 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, + runner_argprintf (&runner, logfile); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); ++ if (dict_get_str (priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, ++ &localtime_logging) == 0) { ++ if (strcmp (localtime_logging, "enable") == 0) ++ runner_add_arg (&runner, "--localtime-logging"); ++ } + + ret = runner_run_nowait (&runner); + if (ret) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +index 5621852..bd1c02e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +@@ -226,6 +226,7 @@ glusterd_snapdsvc_start (glusterd_svc_t *svc, int flags) + char snapd_id[PATH_MAX] = {0,}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snapdsvc_t *snapd = NULL; ++ char *localtime_logging = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -298,6 +299,11 @@ glusterd_snapdsvc_start (glusterd_svc_t *svc, int flags) + "-l", svc->proc.logfile, + "--brick-name", snapd_id, + "-S", svc->conn.sockpath, NULL); ++ if (dict_get_str (priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, ++ &localtime_logging) == 0) { ++ if (strcmp (localtime_logging, "enable") == 0) ++ runner_add_arg (&runner, "--localtime-logging"); ++ } + + snapd_port = pmap_assign_port (THIS, volinfo->snapd.port, snapd_id); + if (!snapd_port) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +index f229865..ba948b4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +@@ -150,6 +150,7 @@ glusterd_svc_start (glusterd_svc_t *svc, int flags, dict_t *cmdline) + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char valgrind_logfile[PATH_MAX] = {0}; ++ char *localtime_logging = NULL; + + this = THIS; + GF_ASSERT (this); +@@ -190,6 +191,11 @@ glusterd_svc_start (glusterd_svc_t *svc, int flags, dict_t *cmdline) + "-S", svc->conn.sockpath, + NULL); + ++ if (dict_get_str (priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, ++ &localtime_logging) == 0) { ++ if (strcmp (localtime_logging, "enable") == 0) ++ runner_add_arg (&runner, "--localtime-logging"); ++ } + if (cmdline) + dict_foreach (cmdline, svc_add_args, (void *) &runner); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c +index a2876ae..2c556fc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c +@@ -259,6 +259,7 @@ glusterd_tierdsvc_start (glusterd_svc_t *svc, int flags) + glusterd_volinfo_t *volinfo = NULL; + glusterd_tierdsvc_t *tierd = NULL; + int cmd = GF_DEFRAG_CMD_START_TIER; ++ char *localtime_logging = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); +@@ -353,6 +354,11 @@ glusterd_tierdsvc_start (glusterd_svc_t *svc, int flags) + volinfo->rebal.commit_hash); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); ++ if (dict_get_str (priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, ++ &localtime_logging) == 0) { ++ if (strcmp (localtime_logging, "enable") == 0) ++ runner_add_arg (&runner, "--localtime-logging"); ++ } + + snprintf (msg, sizeof (msg), + "Starting the tierd service for volume %s", volinfo->volname); +-- +1.8.3.1 + diff --git a/0207-glusterd-fix-txn_opinfo-memory-leak.patch b/0207-glusterd-fix-txn_opinfo-memory-leak.patch new file mode 100644 index 0000000..44aa60b --- /dev/null +++ b/0207-glusterd-fix-txn_opinfo-memory-leak.patch @@ -0,0 +1,113 @@ +From 47678bde5c2f8e674289d2b0893865ab3fa43940 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Sun, 1 Apr 2018 10:10:41 +0530 +Subject: [PATCH 207/212] glusterd: fix txn_opinfo memory leak + +For transactions where there's no volname involved (eg : gluster v +status), the originator node initiates with staging phase and what that +means in op-sm there's no unlock event triggered which resulted into a +txn_opinfo dictionary leak. + +Credits : cynthia.zhou@nokia-sbell.com + +> upstream patch : https://review.gluster.org/#/c/19801/ + +>Change-Id: I92fffbc2e8e1b010f489060f461be78aa2b86615 +>Fixes: bz#1550339 +>Signed-off-by: Atin Mukherjee + +Change-Id: I92fffbc2e8e1b010f489060f461be78aa2b86615 +BUG: 1529451 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/134448 +Tested-by: RHGS Build Bot +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 1 + + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 32 ++++++++++++++++++++-------- + xlators/mgmt/glusterd/src/glusterd-op-sm.h | 1 + + 3 files changed, 25 insertions(+), 9 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index ddab159..dbf80a1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -1073,6 +1073,7 @@ __glusterd_handle_stage_op (rpcsvc_request_t *req) + glusterd_txn_opinfo_init (&txn_op_info, &state, &op_req.op, + req_ctx->dict, req); + ++ txn_op_info.skip_locking = _gf_true; + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index a02a0b3..72d349b 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -5919,14 +5919,15 @@ glusterd_op_init_commit_rsp_dict (glusterd_op_t op) + static int + glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) + { +- int ret = 0; +- glusterd_req_ctx_t *req_ctx = NULL; +- int32_t status = 0; +- char *op_errstr = NULL; +- dict_t *dict = NULL; +- dict_t *rsp_dict = NULL; +- xlator_t *this = NULL; +- uuid_t *txn_id = NULL; ++ int ret = 0; ++ glusterd_req_ctx_t *req_ctx = NULL; ++ int32_t status = 0; ++ char *op_errstr = NULL; ++ dict_t *dict = NULL; ++ dict_t *rsp_dict = NULL; ++ xlator_t *this = NULL; ++ uuid_t *txn_id = NULL; ++ glusterd_op_info_t txn_op_info = {{0},}; + + this = THIS; + GF_ASSERT (this); +@@ -5965,6 +5966,15 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) + ret = -1; + goto out; + } ++ ret = glusterd_get_txn_opinfo (&event->txn_id, &txn_op_info); ++ if (ret) { ++ gf_msg_callingfn (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_TRANS_OPINFO_GET_FAIL, ++ "Unable to get transaction opinfo " ++ "for transaction ID : %s", ++ uuid_utoa (event->txn_id)); ++ goto out; ++ } + + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(*txn_id)); +@@ -5985,7 +5995,11 @@ out: + + if (rsp_dict) + dict_unref (rsp_dict); +- ++ /* for no volname transactions, the txn_opinfo needs to be cleaned up ++ * as there's no unlock event triggered ++ */ ++ if (txn_op_info.skip_locking) ++ ret = glusterd_clear_txn_opinfo (txn_id); + gf_msg_debug (this->name, 0, "Returning with %d", ret); + + return ret; +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h +index 24b1944..f2aee9c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h +@@ -101,6 +101,7 @@ struct glusterd_op_info_ { + char *op_errstr; + struct cds_list_head pending_bricks; + uint32_t txn_generation; ++ gf_boolean_t skip_locking; + }; + + typedef struct glusterd_op_info_ glusterd_op_info_t; +-- +1.8.3.1 + diff --git a/0208-cluster-dht-enable-lookup-optimize-by-default.patch b/0208-cluster-dht-enable-lookup-optimize-by-default.patch new file mode 100644 index 0000000..5915b4e --- /dev/null +++ b/0208-cluster-dht-enable-lookup-optimize-by-default.patch @@ -0,0 +1,75 @@ +From 699f3c720d340b95177c521037c1cb8799930b9e Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Fri, 16 Mar 2018 20:54:15 +0530 +Subject: [PATCH 208/212] cluster/dht: enable lookup-optimize by default + +Lookup-optimize has been shown to improve create +performance. The code has been in the project for several +years and is considered stable. + +Enabling this by default in order to test this in the +upstream regression runs. + +upstream master:https://review.gluster.org/#/c/19731/ + +> Change-Id: Iab792979ee34f0af4713931e0b5b399c23f65313 +> updates: bz#1557435 +> BUG: 1557435 +> Signed-off-by: N Balachandran + +Change-Id: I076b4e4beb9db390f619f38bf4598589b95685c7 +BUG: 1557365 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/134450 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + tests/features/unhashed-auto.t | 2 +- + xlators/cluster/dht/src/dht-rebalance.c | 4 +++- + xlators/cluster/dht/src/dht-shared.c | 2 +- + 3 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/tests/features/unhashed-auto.t b/tests/features/unhashed-auto.t +index ddebd03..0a6bbfb 100755 +--- a/tests/features/unhashed-auto.t ++++ b/tests/features/unhashed-auto.t +@@ -114,7 +114,7 @@ TEST [ x"$new_hash" = x"00000001" ] + + # Unset the option and check that newly created directories get 1 in the + # disk layout +-TEST $CLI volume reset $V0 cluster.lookup-optimize ++TEST $CLI volume set $V0 cluster.lookup-optimize off + TEST mkdir $M0/dir1 + new_hash=$(get_xattr_hash $B0/${V0}1/dir1) + TEST [ x"$new_hash" = x"00000001" ] +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 9770359..9e31ff8 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -4422,7 +4422,9 @@ gf_defrag_start_crawl (void *data) + + ret = syncop_setxattr (this, &loc, fix_layout, 0, NULL, NULL); + if (ret) { +- gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed", ++ gf_log (this->name, GF_LOG_ERROR, ++ "Failed to set commit hash on %s. " ++ "Rebalance cannot proceed.", + loc.path); + defrag->total_failures++; + ret = -1; +diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c +index 42daff0..2f0d8ce 100644 +--- a/xlators/cluster/dht/src/dht-shared.c ++++ b/xlators/cluster/dht/src/dht-shared.c +@@ -943,7 +943,7 @@ struct volume_options options[] = { + }, + { .key = {"lookup-optimize"}, + .type = GF_OPTION_TYPE_BOOL, +- .default_value = "off", ++ .default_value = "on", + .description = "This option if set to ON enables the optimization " + "of -ve lookups, by not doing a lookup on non-hashed subvolumes for " + "files, in case the hashed subvolume does not return any result. " +-- +1.8.3.1 + diff --git a/0209-cluster-dht-Update-layout-in-inode-only-on-success.patch b/0209-cluster-dht-Update-layout-in-inode-only-on-success.patch new file mode 100644 index 0000000..894ec48 --- /dev/null +++ b/0209-cluster-dht-Update-layout-in-inode-only-on-success.patch @@ -0,0 +1,97 @@ +From b3c216d77ae7a054d4f5f28a93239afe00771cd8 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Thu, 29 Mar 2018 18:23:13 +0530 +Subject: [PATCH 209/212] cluster/dht: Update layout in inode only on success + +With lookup-optimize enabled, gf_defrag_settle_hash in rebalance +sometimes flips the on-disk layout on volume root post the +migration of all files in the directory. + +This is sometimes seen when attempting to fix the layout of a +directory multiple times before calling gf_defrag_settle_hash. +dht_fix_layout_of_directory generates a new layout in memory but +updates it in the inode ctx before it is set on disk. The layout +may be different the second time around due to +dht_selfheal_layout_maximize_overlap. If the layout is then not +written to the disk, the inode now contains the wrong layout. +gf_defrag_settle_hash does not check the correctness of the layout +in the inode before updating the commit-hash and writing it to the +disk thus changing the layout of the directory. + +upstream master:https://review.gluster.org/#/c/19797/ + +> Change-Id: Ie1407d92982518f2a0c40ec70ad370b34a87b4d4 +> updates: bz#1557435 +> Signed-off-by: N Balachandran + +Change-Id: I4222b7c985226ca175e0581c103bad62084339a2 +BUG: 1557365 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/134451 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/cluster/dht/src/dht-common.c | 25 ++++++++++++++++++++++++- + xlators/cluster/dht/src/dht-selfheal.c | 3 --- + 2 files changed, 24 insertions(+), 4 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index f1e6a92..6319a87 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -3545,6 +3545,28 @@ dht_common_setxattr_cbk (call_frame_t *frame, void *cookie, + + + ++static int ++dht_fix_layout_setxattr_cbk (call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ dht_layout_t *layout = NULL; ++ ++ if (op_ret == 0) { ++ ++ /* update the layout in the inode ctx */ ++ local = frame->local; ++ layout = local->selfheal.layout; ++ ++ dht_layout_set (this, local->loc.inode, layout); ++ } ++ ++ DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); ++ return 0; ++} ++ ++ + int + dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) +@@ -5531,7 +5553,8 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, + DHT_MSG_FIX_LAYOUT_INFO, + "fixing the layout of %s", loc->path); + +- ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk, ++ ret = dht_fix_directory_layout (frame, ++ dht_fix_layout_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; +diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c +index 328251d..1707e08 100644 +--- a/xlators/cluster/dht/src/dht-selfheal.c ++++ b/xlators/cluster/dht/src/dht-selfheal.c +@@ -2112,9 +2112,6 @@ dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc, + } + done: + if (new_layout) { +- /* Now that the new layout has all the proper layout, change the +- inode context */ +- dht_layout_set (this, loc->inode, new_layout); + + /* Make sure the extra 'ref' for existing layout is removed */ + dht_layout_unref (this, local->layout); +-- +1.8.3.1 + diff --git a/0210-cluster-ec-send-list-node-uuids-request-to-all-subvo.patch b/0210-cluster-ec-send-list-node-uuids-request-to-all-subvo.patch new file mode 100644 index 0000000..c704594 --- /dev/null +++ b/0210-cluster-ec-send-list-node-uuids-request-to-all-subvo.patch @@ -0,0 +1,53 @@ +From 53649d22deea97c1604f5688ecab303eb46104d9 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Wed, 28 Mar 2018 11:34:49 +0200 +Subject: [PATCH 210/212] cluster/ec: send list-node-uuids request to all + subvolumes + +The xattr trusted.glusterfs.list-node-uuids was only sent to a single +subvolume. This was returning null uuids from the other subvolumes as +if they were down. + +This fix forces that xattr to be requested from all subvolumes. + +> Upstream patch: https://review.gluster.org/19784 + +Change-Id: If62eb39a6857258923ba625e153d4ad79018ea2f +BUG: 1561733 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/134066 +Tested-by: RHGS Build Bot +Reviewed-by: Ashish Pandey +--- + tests/basic/ec/ec-rebalance.t | 1 + + xlators/cluster/ec/src/ec.c | 2 +- + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tests/basic/ec/ec-rebalance.t b/tests/basic/ec/ec-rebalance.t +index b5c3072..6cda3a3 100644 +--- a/tests/basic/ec/ec-rebalance.t ++++ b/tests/basic/ec/ec-rebalance.t +@@ -14,6 +14,7 @@ cleanup + TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2} ++TEST $CLI volume set $V0 lookup-optimize on + TEST $CLI volume start $V0 + + #Mount the volume +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 956b45b..eb91c4a 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -881,7 +881,7 @@ ec_gf_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + + if (name && + ((fnmatch (GF_XATTR_STIME_PATTERN, name, 0) == 0) || +- (XATTR_IS_NODE_UUID(name)))) { ++ XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) { + minimum = EC_MINIMUM_ALL; + } + +-- +1.8.3.1 + diff --git a/0211-common-ha-scripts-pass-the-list-of-servers-properly-.patch b/0211-common-ha-scripts-pass-the-list-of-servers-properly-.patch new file mode 100644 index 0000000..a861c31 --- /dev/null +++ b/0211-common-ha-scripts-pass-the-list-of-servers-properly-.patch @@ -0,0 +1,40 @@ +From cc7483e65a0b165112446d4598fe4215a4e8109f Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan +Date: Wed, 4 Apr 2018 09:29:43 +0530 +Subject: [PATCH 211/212] common-ha/scripts : pass the list of servers properly + to stop_ganesha_all() + +Label: BACKPORT FROM UPSTREAM 3.10 + +Upstream Reference : +>Change-Id: I6d92623cd9fb450d7a27f5acc61eca0b3cbc9b08 +>BUG: 1563500 +>Signed-off-by: Jiffin Tony Thottan +>Patch link : https://review.gluster.org/#/c/19816/ + +Change-Id: I6d92623cd9fb450d7a27f5acc61eca0b3cbc9b08 +BUG: 1226874 +Signed-off-by: Jiffin Tony Thottan +Reviewed-on: https://code.engineering.redhat.com/gerrit/134453 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 623fb64..4459068 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -199,7 +199,7 @@ setup_cluster() + if [ $? -ne 0 ]; then + logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" + #set up failed stop all ganesha process and clean up symlinks in cluster +- stop_ganesha_all ${servers} ++ stop_ganesha_all "${servers}" + exit 1; + fi + pcs cluster start --all +-- +1.8.3.1 + diff --git a/0212-readdir-ahead-Cleanup-the-xattr-request-code.patch b/0212-readdir-ahead-Cleanup-the-xattr-request-code.patch new file mode 100644 index 0000000..41f94fc --- /dev/null +++ b/0212-readdir-ahead-Cleanup-the-xattr-request-code.patch @@ -0,0 +1,94 @@ +From a81170eca4848c6bd2d0fa9e8a2c9fc9803b868e Mon Sep 17 00:00:00 2001 +From: Poornima G +Date: Thu, 4 Jan 2018 19:39:05 +0530 +Subject: [PATCH 212/212] readdir-ahead: Cleanup the xattr request code + +upstream master: https://review.gluster.org/#/c/19172/ + +> Change-Id: Ia0c697583751290a455da3cd1894e0c5685d1bd8 +> updates: #297 +> Signed-off-by: Poornima G + +BUG: 1559884 +Change-Id: Ia0c697583751290a455da3cd1894e0c5685d1bd8 +Signed-off-by: Poornima G +Reviewed-on: https://code.engineering.redhat.com/gerrit/134500 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../performance/readdir-ahead/src/readdir-ahead.c | 42 ++-------------------- + 1 file changed, 2 insertions(+), 40 deletions(-) + +diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c +index c2ceda4..0d3bdbd 100644 +--- a/xlators/performance/readdir-ahead/src/readdir-ahead.c ++++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c +@@ -474,31 +474,6 @@ err: + return -1; + } + +- +-static int +-rda_unpack_mdc_loaded_keys_to_dict(char *payload, dict_t *dict) +-{ +- int ret = -1; +- char *mdc_key = NULL; +- +- if (!payload || !dict) { +- goto out; +- } +- +- mdc_key = strtok(payload, " "); +- while (mdc_key != NULL) { +- ret = dict_set_int8 (dict, mdc_key, 0); +- if (ret) { +- goto out; +- } +- mdc_key = strtok(NULL, " "); +- } +- +-out: +- return ret; +-} +- +- + static int32_t + rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +@@ -528,9 +503,7 @@ static int32_t + rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) + { +- int ret = -1; + int op_errno = 0; +- char *payload = NULL; + struct rda_local *local = NULL; + dict_t *xdata_from_req = NULL; + +@@ -552,21 +525,10 @@ rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + * Retrieve list of keys set by md-cache xlator and store it + * in local to be consumed in rda_opendir_cbk + */ +- ret = dict_get_str (xdata, GF_MDC_LOADED_KEY_NAMES, &payload); +- if (ret) +- goto wind; +- ret = rda_unpack_mdc_loaded_keys_to_dict((char *) payload, +- xdata_from_req); +- if (ret) +- goto wind; +- +- dict_copy (xdata, xdata_from_req); +- dict_del (xdata_from_req, GF_MDC_LOADED_KEY_NAMES); +- +- local->xattrs = xdata_from_req; ++ local->xattrs = dict_ref (xdata); + frame->local = local; + } +-wind: ++ + STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index 69179e2..a9e1d1e 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -192,7 +192,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 3.12.2 -Release: 6%{?dist} +Release: 7%{?dist} %endif License: GPLv2 or LGPLv3+ Group: System Environment/Base @@ -466,6 +466,17 @@ Patch0198: 0198-cluster-ec-Change-default-read-policy-to-gfid-hash.patch Patch0199: 0199-cluster-ec-avoid-delays-in-self-heal.patch Patch0200: 0200-quick-read-Discard-cache-for-fallocate-zerofill-and-.patch Patch0201: 0201-posix-After-set-storage.reserve-limit-df-does-not-sh.patch +Patch0202: 0202-glusterd-TLS-verification-fails-while-using-intermed.patch +Patch0203: 0203-mgmt-glusterd-Adding-validation-for-setting-quorum-c.patch +Patch0204: 0204-glusterd-memory-leak-in-mgmt_v3-lock-functionality.patch +Patch0205: 0205-cluster-dht-User-xattrs-are-not-healed-after-brick-s.patch +Patch0206: 0206-glusterd-honour-localtime-logging-for-all-the-daemon.patch +Patch0207: 0207-glusterd-fix-txn_opinfo-memory-leak.patch +Patch0208: 0208-cluster-dht-enable-lookup-optimize-by-default.patch +Patch0209: 0209-cluster-dht-Update-layout-in-inode-only-on-success.patch +Patch0210: 0210-cluster-ec-send-list-node-uuids-request-to-all-subvo.patch +Patch0211: 0211-common-ha-scripts-pass-the-list-of-servers-properly-.patch +Patch0212: 0212-readdir-ahead-Cleanup-the-xattr-request-code.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -2409,6 +2420,10 @@ fi %endif %changelog +* Wed Apr 04 2018 Milind Changire - 3.12.2-7 +- fixes bugs bz#958062 bz#1186664 bz#1226874 bz#1446046 bz#1529451 bz#1550315 + bz#1557365 bz#1559884 bz#1561733 + * Mon Mar 26 2018 Milind Changire - 3.12.2-6 - fixes bugs bz#1491785 bz#1518710 bz#1523599 bz#1528733 bz#1550474 bz#1550982 bz#1551186 bz#1552360 bz#1552414 bz#1552425 bz#1554255 bz#1554905