diff --git a/0140-md-cache-Add-additional-samba-and-macOS-specific-EAs.patch b/0140-md-cache-Add-additional-samba-and-macOS-specific-EAs.patch new file mode 100644 index 0000000..24617ee --- /dev/null +++ b/0140-md-cache-Add-additional-samba-and-macOS-specific-EAs.patch @@ -0,0 +1,272 @@ +From 14a8e47839a955cd693616c7497b93ddc9584478 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?G=C3=BCnther=20Deschner?= +Date: Tue, 25 Jul 2017 11:54:09 +0200 +Subject: [PATCH 140/148] md-cache: Add additional samba and macOS specific EAs + to mdcache + +Samba ships with a server implementation of the Apple Create Context +extension (AAPL) as negotiated by all modern Apple clients. With the +support of the AAPL extension, Apple clients will integrate better with +Samba servers. The AAPL implementation itself is contained in the Samba +vfs_fruit(8) module which has to be activated in Samba. This vfs_fruit +module also provides support for macOS alternate data streams which will +be represented in EAs. Two standard data streams ("AFP_AfpInfo" and +"AFP_Resource") will be stored in the following EAs: + +* user.org.netatalk.Metadata +* user.org.netatalk.ResourceFork + +For all other data streams, vfs_fruit relies on another Samba vfs +module, vfs_streams_xattr(8), to handle these. Although configurable, by +default the vfs_streams_xattr module will build EA keynames with a +"user.DosStream." prefix. Please note that we have to deal with only one +known prefix key, as macOS will happily compose EA keynames like: + +* user.DosStream.com.apple.diskimages.fsck:$DATA +* user.DosStream.com.apple.diskimages.recentcksum:$DATA +* user.DosStream.com.apple.metadata:kMDItemWhereFroms:$DATA +* user.DosStream.com.apple.quarantine:$DATA +* etc. + +Caching of vfs_fruit specific EAs is crucial for SMB performance and is +controlled with the same configuration option +"performance.cache-samba-metadata". + +> Signed-off-by: Guenther Deschner +> Change-Id: Ia7aa341234dc13e1c0057f3d658b7ef711b5d31e +> BUG: 1499933 +> Reviewed-on: https://review.gluster.org/#/c/18455/ +> Reviewed-by: Jeff Darcy +> Reviewed-by: Niels de Vos +> Smoke: Gluster Build System +> CentOS-regression: Gluster Build System +> (cherry picked from commit ae9b006f23b1408ff548348440369d056becdc1d) + +Change-Id: Ia7aa341234dc13e1c0057f3d658b7ef711b5d31e +BUG: 1446125 +Signed-off-by: Guenther Deschner +Signed-off-by: Poornima G +Reviewed-on: https://code.engineering.redhat.com/gerrit/128479 +Tested-by: RHGS Build Bot +Reviewed-by: Gunther Deschner +Reviewed-by: Atin Mukherjee +--- + tests/bugs/md-cache/bug-1211863.t | 16 +++---- + xlators/performance/md-cache/src/md-cache.c | 66 ++++++++++++++++++++++++++--- + 2 files changed, 68 insertions(+), 14 deletions(-) + +diff --git a/tests/bugs/md-cache/bug-1211863.t b/tests/bugs/md-cache/bug-1211863.t +index ece42fe..b89d7f5 100755 +--- a/tests/bugs/md-cache/bug-1211863.t ++++ b/tests/bugs/md-cache/bug-1211863.t +@@ -26,15 +26,15 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1 + TEST touch $M0/file1 + + ## 9. Setxattr from mount-0 +-TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1" ++TEST "setfattr -n user.DosStream.abc -v "abc" $M0/file1" + ## 10. Getxattr from mount-1, this should return the correct value as it is a fresh getxattr +-TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc" ++TEST "getfattr -n user.DosStream.abc $M1/file1 | grep -q abc" + + ## 11. Now modify the same xattr from mount-0 again +-TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1" ++TEST "setfattr -n user.DosStream.abc -v "xyz" $M0/file1" + ## 12. Since the xattr is already cached in mount-1 it returns the old xattr + #value, until the timeout (600) +-TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc" ++TEST "getfattr -n user.DosStream.abc $M1/file1 | grep -q abc" + + ## 13. Unmount to clean all the cache + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +@@ -54,14 +54,14 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1 + + ## 23. Repeat the tests 11-14, but this time since cache invalidation is on, + #the getxattr will reflect the new value +-TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1" +-TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc" +-TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1" ++TEST "setfattr -n user.DosStream.abc -v "abc" $M0/file1" ++TEST "getfattr -n user.DosStream.abc $M1/file1 | grep -q abc" ++TEST "setfattr -n user.DosStream.abc -v "xyz" $M0/file1" + sleep 2; #There can be a very very small window where the next getxattr + #reaches md-cache, before the cache-invalidation caused by previous + #setxattr, reaches md-cache. Hence sleeping for 2 sec. + #Also it should not be > 600. +-TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q xyz" ++TEST "getfattr -n user.DosStream.abc $M1/file1 | grep -q xyz" + + TEST $CLI volume set $V0 cache-samba-metadata off + EXPECT 'off' volinfo_field $V0 'performance.cache-samba-metadata' +diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c +index 1ca7727..feab357 100644 +--- a/xlators/performance/md-cache/src/md-cache.c ++++ b/xlators/performance/md-cache/src/md-cache.c +@@ -72,56 +72,85 @@ static struct mdc_key { + const char *name; + int load; + int check; ++ int prefix_match; + } mdc_keys[] = { + { + .name = POSIX_ACL_ACCESS_XATTR, + .load = 0, + .check = 1, ++ .prefix_match = 0, + }, + { + .name = POSIX_ACL_DEFAULT_XATTR, + .load = 0, + .check = 1, ++ .prefix_match = 0, + }, + { + .name = GF_POSIX_ACL_ACCESS, + .load = 0, + .check = 1, ++ .prefix_match = 0, + }, + { + .name = GF_POSIX_ACL_DEFAULT, + .load = 0, + .check = 1, ++ .prefix_match = 0, + }, + { + .name = GF_SELINUX_XATTR_KEY, + .load = 0, + .check = 1, ++ .prefix_match = 0, + }, + { + .name = "user.swift.metadata", + .load = 0, + .check = 1, ++ .prefix_match = 0, + }, + { + .name = "user.DOSATTRIB", + .load = 0, + .check = 1, ++ .prefix_match = 0, ++ }, ++ { ++ .name = "user.DosStream.*", ++ .load = 0, ++ .check = 1, ++ .prefix_match = 1, ++ }, ++ { ++ .name = "user.org.netatalk.Metadata", ++ .load = 0, ++ .check = 1, ++ .prefix_match = 0, ++ }, ++ { ++ .name = "user.org.netatalk.ResourceFork", ++ .load = 0, ++ .check = 1, ++ .prefix_match = 0, + }, + { + .name = "security.NTACL", + .load = 0, + .check = 1, ++ .prefix_match = 0, + }, + { + .name = "security.capability", + .load = 0, + .check = 1, ++ .prefix_match = 0, + }, + { + .name = "gfid-req", + .load = 0, + .check = 1, ++ .prefix_match = 0, + }, + { + .name = "security.ima", +@@ -132,6 +161,7 @@ static struct mdc_key { + .name = NULL, + .load = 0, + .check = 0, ++ .prefix_match = 0, + } + }; + +@@ -606,8 +636,14 @@ updatefn(dict_t *dict, char *key, data_t *value, void *data) + for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) { + if (!mdc_keys[i].check) + continue; +- if (strcmp(mdc_key, key)) +- continue; ++ ++ if (mdc_keys[i].prefix_match) { ++ if (strncmp (mdc_key, key, (strlen(mdc_key) - 1))) ++ continue; ++ } else { ++ if (strcmp(mdc_key, key)) ++ continue; ++ } + + if (!u->dict) { + u->dict = dict_new(); +@@ -986,8 +1022,13 @@ is_mdc_key_satisfied (const char *key) + for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) { + if (!mdc_keys[i].load) + continue; +- if (strcmp (mdc_key, key) == 0) +- return 1; ++ if (mdc_keys[i].prefix_match) { ++ if (strncmp (mdc_key, key, (strlen(mdc_key) - 1)) == 0) ++ return 1; ++ } else { ++ if (strcmp (mdc_key, key) == 0) ++ return 1; ++ } + } + + gf_msg_trace ("md-cache", 0, "xattr key %s doesn't satisfy " +@@ -2902,6 +2943,12 @@ reconfigure (xlator_t *this, dict_t *options) + options, bool, out); + mdc_key_load_set (mdc_keys, "user.DOSATTRIB", + conf->cache_samba_metadata); ++ mdc_key_load_set (mdc_keys, "user.DosStream.", ++ conf->cache_samba_metadata); ++ mdc_key_load_set (mdc_keys, "user.org.netatalk.Metadata", ++ conf->cache_samba_metadata); ++ mdc_key_load_set (mdc_keys, "user.org.netatalk.ResourceFork", ++ conf->cache_samba_metadata); + mdc_key_load_set (mdc_keys, "security.NTACL", + conf->cache_samba_metadata); + +@@ -2973,6 +3020,12 @@ init (xlator_t *this) + bool, out); + mdc_key_load_set (mdc_keys, "user.DOSATTRIB", + conf->cache_samba_metadata); ++ mdc_key_load_set (mdc_keys, "user.DosStream.", ++ conf->cache_samba_metadata); ++ mdc_key_load_set (mdc_keys, "user.org.netatalk.Metadata", ++ conf->cache_samba_metadata); ++ mdc_key_load_set (mdc_keys, "user.org.netatalk.ResourceFork", ++ conf->cache_samba_metadata); + mdc_key_load_set (mdc_keys, "security.NTACL", + conf->cache_samba_metadata); + +@@ -3131,8 +3184,9 @@ struct volume_options options[] = { + { .key = {"cache-samba-metadata"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", +- .description = "Cache samba metadata (user.DOSATTRIB, security.NTACL" +- " xattrs)", ++ .description = "Cache samba metadata (user.DOSATTRIB, security.NTACL," ++ " org.netatalk.Metadata, org.netatalk.ResourceFork, " ++ "and user.DosStream. xattrs)", + }, + { .key = {"cache-posix-acl"}, + .type = GF_OPTION_TYPE_BOOL, +-- +1.8.3.1 + diff --git a/0141-rpc-Showing-some-unusual-timer-error-logs-during-bri.patch b/0141-rpc-Showing-some-unusual-timer-error-logs-during-bri.patch new file mode 100644 index 0000000..8c05c8b --- /dev/null +++ b/0141-rpc-Showing-some-unusual-timer-error-logs-during-bri.patch @@ -0,0 +1,66 @@ +From ef08094304905a40339fba306e64508082432ab3 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 25 Jan 2018 10:03:09 +0530 +Subject: [PATCH 141/148] rpc: Showing some unusual timer error logs during + brick stop + +Solution: Update msg condition in gf_timer_call_after function + to avoid the message + +> BUG: 1538427 +> Change-Id: I849e8e052a8259cf977fd5e7ff3aeba52f9b5f27 +> Signed-off-by: Mohit Agrawal +> (Upstream patch link https://review.gluster.org/#/c/19320/) +> (cherry picked from commit c142d26e44436d805e476f2d13ac8726052a59c4) + +BUG: 1467536 +Change-Id: I849e8e052a8259cf977fd5e7ff3aeba52f9b5f27 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/129722 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +Tested-by: RHGS Build Bot +--- + libglusterfs/src/timer.c | 18 +++--------------- + 1 file changed, 3 insertions(+), 15 deletions(-) + +diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c +index 3d69a9f..34dfd35 100644 +--- a/libglusterfs/src/timer.c ++++ b/libglusterfs/src/timer.c +@@ -30,10 +30,11 @@ gf_timer_call_after (glusterfs_ctx_t *ctx, + gf_timer_t *trav = NULL; + uint64_t at = 0; + +- if (ctx == NULL) ++ if ((ctx == NULL) || (ctx->cleanup_started)) + { + gf_msg_callingfn ("timer", GF_LOG_ERROR, EINVAL, +- LG_MSG_INVALID_ARG, "invalid argument"); ++ LG_MSG_INVALID_ARG, "Either ctx is NULL or" ++ " ctx cleanup started"); + return NULL; + } + +@@ -186,19 +187,6 @@ gf_timer_registry_init (glusterfs_ctx_t *ctx) + { + gf_timer_registry_t *reg = NULL; + +- if (ctx == NULL) { +- gf_msg_callingfn ("timer", GF_LOG_ERROR, EINVAL, +- LG_MSG_INVALID_ARG, "invalid argument"); +- return NULL; +- } +- +- if (ctx->cleanup_started) { +- gf_msg_callingfn ("timer", GF_LOG_INFO, 0, +- LG_MSG_CTX_CLEANUP_STARTED, +- "ctx cleanup started"); +- return NULL; +- } +- + LOCK (&ctx->lock); + { + reg = ctx->timer; +-- +1.8.3.1 + diff --git a/0142-cluster-dht-Add-migration-checks-to-dht_-f-xattrop.patch b/0142-cluster-dht-Add-migration-checks-to-dht_-f-xattrop.patch new file mode 100644 index 0000000..2f75451 --- /dev/null +++ b/0142-cluster-dht-Add-migration-checks-to-dht_-f-xattrop.patch @@ -0,0 +1,701 @@ +From 4f5197f585ce4117e29e6b6af0e6d91c19eb34ea Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Wed, 3 Jan 2018 10:36:58 +0530 +Subject: [PATCH 142/148] cluster/dht: Add migration checks to dht_(f)xattrop + +The dht_(f)xattrop implementation did not implement +migration phase1/phase2 checks which could cause issues +with rebalance on sharded volumes. +This does not solve the issue where fops may reach the target +out of order. + +upstream : https://review.gluster.org/#/c/17776 + +> Change-Id: I2416fc35115e60659e35b4b717fd51f20746586c +> BUG: 1471031 +> Signed-off-by: N Balachandran + +Change-Id: I95b453421809c543ba8e4febd9a12c84e9439a29 +BUG: 1530146 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/126959 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/glusterfs.h | 1 + + xlators/cluster/dht/src/dht-common.c | 48 +++++- + xlators/cluster/dht/src/dht-common.h | 10 ++ + xlators/cluster/dht/src/dht-helper.c | 3 + + xlators/cluster/dht/src/dht-inode-read.c | 241 +++++++++++++++++++++++++++--- + xlators/cluster/dht/src/dht-rebalance.c | 86 +++++------ + xlators/cluster/dht/src/dht-selfheal.c | 1 - + xlators/storage/posix/src/posix-helpers.c | 31 ++++ + xlators/storage/posix/src/posix.c | 2 + + xlators/storage/posix/src/posix.h | 4 + + 10 files changed, 366 insertions(+), 61 deletions(-) + +diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h +index 18256aa..c8835d9 100644 +--- a/libglusterfs/src/glusterfs.h ++++ b/libglusterfs/src/glusterfs.h +@@ -272,6 +272,7 @@ + #define TIER_LINKFILE_GFID "tier-linkfile-gfid" + #define DHT_SKIP_OPEN_FD_UNLINK "dont-unlink-for-open-fd" + #define DHT_IATT_IN_XDATA_KEY "dht-get-iatt-in-xattr" ++#define DHT_MODE_IN_XDATA_KEY "dht-get-mode-in-xattr" + #define GET_LINK_COUNT "get-link-count" + #define GF_GET_SIZE "get-size" + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index b55cb36..c2d0827 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -18,7 +18,6 @@ + #include "dht-lock.h" + #include "defaults.h" + #include "byte-order.h" +-#include "glusterfs-acl.h" + #include "quota-common-utils.h" + #include "upcall-utils.h" + +@@ -46,6 +45,11 @@ int + dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this); + + ++int ++dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata); ++ + + /* Sets the blocks and size values to fixed values. This is to be called + * only for dirs. The caller is responsible for checking the type +@@ -61,6 +65,48 @@ int32_t dht_set_fixed_dir_stat (struct iatt *stat) + } + + ++/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY ++ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to ++ * DHT_IATT_IN_XDATA_KEY ++ */ ++int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req) ++{ ++ int ret = -1; ++ ++ ret = dict_set_int8 (xattr_req, DHT_MODE_IN_XDATA_KEY, 1); ++ ret = dict_set_int8 (xattr_req, DHT_IATT_IN_XDATA_KEY, 1); ++ ++ /* At least one call succeeded */ ++ return ret; ++} ++ ++ ++/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY ++ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to ++ * DHT_IATT_IN_XDATA_KEY ++ * This will return a dummy iatt with only the mode and type set ++ */ ++int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata, ++ struct iatt *stbuf) ++{ ++ int ret = -1; ++ int32_t mode = 0; ++ ++ ret = dict_get_int32 (xdata, DHT_MODE_IN_XDATA_KEY, &mode); ++ ++ if (ret) { ++ ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY, ++ (void **)&stbuf); ++ } else { ++ stbuf->ia_prot = ia_prot_from_st_mode (mode); ++ stbuf->ia_type = ia_type_from_st_mode (mode); ++ } ++ ++ return ret; ++} ++ ++ ++ + int + dht_rmdir_unlock (call_frame_t *frame, xlator_t *this); + +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index e2afd6c..47a2e23 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -20,6 +20,7 @@ + #include "refcount.h" + #include "timer.h" + #include "protocol-common.h" ++#include "glusterfs-acl.h" + + #ifndef _DHT_H + #define _DHT_H +@@ -146,6 +147,7 @@ struct dht_rebalance_ { + dht_defrag_cbk_fn_t target_op_fn; + dict_t *xdata; + dict_t *xattr; ++ dict_t *dict; + int32_t set; + struct gf_flock flock; + int lock_cmd; +@@ -1416,4 +1418,12 @@ dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int + dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata); ++ ++/* Abstract out the DHT-IATT-IN-DICT */ ++ ++ ++int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req); ++ ++int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata, ++ struct iatt *stbuf); + #endif/* _DHT_H */ +diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c +index cca2bfe..e56a085 100644 +--- a/xlators/cluster/dht/src/dht-helper.c ++++ b/xlators/cluster/dht/src/dht-helper.c +@@ -797,6 +797,9 @@ dht_local_wipe (xlator_t *this, dht_local_t *local) + if (local->rebalance.xattr) + dict_unref (local->rebalance.xattr); + ++ if (local->rebalance.dict) ++ dict_unref (local->rebalance.dict); ++ + GF_FREE (local->rebalance.vector); + + if (local->rebalance.iobref) +diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c +index a9e4766..fa63fef 100644 +--- a/xlators/cluster/dht/src/dht-inode-read.c ++++ b/xlators/cluster/dht/src/dht-inode-read.c +@@ -24,8 +24,9 @@ int dht_lk2 (xlator_t *this, xlator_t *dst_node, + call_frame_t *frame, int ret); + int dht_fsync2 (xlator_t *this, xlator_t *dst_node, + call_frame_t *frame, int ret); +- +- ++int ++dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, ++ int ret); + + int + dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +@@ -1246,13 +1247,163 @@ err: + return 0; + } + +-/* Currently no translators on top of 'distribute' will be using +- * below fops, hence not implementing 'migration' related checks +- */ ++ ++int ++dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ call_frame_t *call_frame = NULL; ++ xlator_t *prev = NULL; ++ xlator_t *src_subvol = NULL; ++ xlator_t *dst_subvol = NULL; ++ struct iatt stbuf = {0,}; ++ int ret = -1; ++ inode_t *inode = NULL; ++ ++ local = frame->local; ++ call_frame = cookie; ++ prev = call_frame->this; ++ ++ local->op_errno = op_errno; ++ ++ if ((op_ret == -1) && !dht_inode_missing (op_errno)) { ++ gf_msg_debug (this->name, op_errno, ++ "subvolume %s returned -1.", ++ prev->name); ++ goto out; ++ } ++ ++ if (local->call_cnt != 1) ++ goto out; ++ ++ ret = dht_read_iatt_from_xdata (this, xdata, &stbuf); ++ ++ if ((!op_ret) && (ret)) { ++ /* This is a potential problem and can cause corruption ++ * with sharding. ++ * Oh well. We tried. ++ */ ++ goto out; ++ } ++ ++ local->op_ret = op_ret; ++ local->rebalance.target_op_fn = dht_common_xattrop2; ++ if (xdata) ++ local->rebalance.xdata = dict_ref (xdata); ++ ++ if (dict) ++ local->rebalance.dict = dict_ref (dict); ++ ++ /* Phase 2 of migration */ ++ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (&stbuf)) { ++ ret = dht_rebalance_complete_check (this, frame); ++ if (!ret) ++ return 0; ++ } ++ ++ /* Check if the rebalance phase1 is true */ ++ if (IS_DHT_MIGRATION_PHASE1 (&stbuf)) { ++ ++ inode = local->loc.inode ? local->loc.inode : local->fd->inode; ++ dht_inode_ctx_get_mig_info (this, inode, &src_subvol, ++ &dst_subvol); ++ ++ if (dht_mig_info_is_invalid (local->cached_subvol, src_subvol, ++ dst_subvol) || ++ !dht_fd_open_on_dst (this, local->fd, dst_subvol)) { ++ ++ ret = dht_rebalance_in_progress_check (this, frame); ++ if (!ret) ++ return 0; ++ } else { ++ dht_common_xattrop2 (this, dst_subvol, frame, 0); ++ return 0; ++ } ++ } ++ ++ ++out: ++ if (local->fop == GF_FOP_XATTROP) { ++ DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, ++ dict, xdata); ++ } else { ++ DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, ++ dict, xdata); ++ } ++ ++ return 0; ++} ++ ++ ++int ++dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, ++ int ret) ++{ ++ dht_local_t *local = NULL; ++ int32_t op_errno = EINVAL; ++ ++ if ((frame == NULL) || (frame->local == NULL)) ++ goto out; ++ ++ local = frame->local; ++ op_errno = local->op_errno; ++ ++ if (we_are_not_migrating (ret)) { ++ /* This dht xlator is not migrating the file. Unwind and ++ * pass on the original mode bits so the higher DHT layer ++ * can handle this. ++ */ ++ if (local->fop == GF_FOP_XATTROP) { ++ DHT_STACK_UNWIND (xattrop, frame, local->op_ret, ++ op_errno, local->rebalance.dict, ++ local->rebalance.xdata); ++ } else { ++ DHT_STACK_UNWIND (fxattrop, frame, local->op_ret, ++ op_errno, local->rebalance.dict, ++ local->rebalance.xdata); ++ } ++ ++ return 0; ++ } ++ ++ if (subvol == NULL) ++ goto out; ++ ++ local->call_cnt = 2; /* This is the second attempt */ ++ ++ if (local->fop == GF_FOP_XATTROP) { ++ STACK_WIND (frame, dht_common_xattrop_cbk, subvol, ++ subvol->fops->xattrop, &local->loc, ++ local->rebalance.flags, local->rebalance.xattr, ++ local->xattr_req); ++ } else { ++ STACK_WIND (frame, dht_common_xattrop_cbk, subvol, ++ subvol->fops->fxattrop, local->fd, ++ local->rebalance.flags, local->rebalance.xattr, ++ local->xattr_req); ++ } ++ ++ return 0; ++ ++out: ++ ++ /* If local is unavailable we could be unwinding the wrong ++ * function here */ ++ ++ if (local && (local->fop == GF_FOP_XATTROP)) { ++ DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL); ++ } else { ++ DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL); ++ } ++ return 0; ++} ++ + + int + dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) ++ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) + { + DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata); + return 0; +@@ -1263,9 +1414,10 @@ int + dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) + { +- xlator_t *subvol = NULL; ++ xlator_t *subvol = NULL; + int op_errno = -1; +- dht_local_t *local = NULL; ++ dht_local_t *local = NULL; ++ int ret = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); +@@ -1287,11 +1439,33 @@ dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, + goto err; + } + +- local->call_cnt = 1; ++ /* Todo : Handle dirs as well. At the moment the only xlator above dht ++ * that uses xattrop is sharding and that is only for files */ ++ ++ if (IA_ISDIR (loc->inode->ia_type)) { ++ STACK_WIND (frame, dht_xattrop_cbk, subvol, ++ subvol->fops->xattrop, loc, flags, dict, xdata); ++ ++ } else { ++ local->xattr_req = xdata ? dict_ref(xdata) : dict_new (); ++ local->call_cnt = 1; + +- STACK_WIND (frame, dht_xattrop_cbk, +- subvol, subvol->fops->xattrop, +- loc, flags, dict, xdata); ++ local->rebalance.xattr = dict_ref (dict); ++ local->rebalance.flags = flags; ++ ++ ret = dht_request_iatt_in_xdata (this, local->xattr_req); ++ ++ if (ret) { ++ gf_msg_debug (this->name, 0, ++ "Failed to set dictionary key %s file=%s", ++ DHT_IATT_IN_XDATA_KEY, loc->path); ++ } ++ ++ STACK_WIND (frame, dht_common_xattrop_cbk, subvol, ++ subvol->fops->xattrop, loc, ++ local->rebalance.flags, local->rebalance.xattr, ++ local->xattr_req); ++ } + + return 0; + +@@ -1318,6 +1492,8 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this, + { + xlator_t *subvol = NULL; + int op_errno = -1; ++ dht_local_t *local = NULL; ++ int ret = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); +@@ -1331,10 +1507,39 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this, + goto err; + } + +- STACK_WIND (frame, +- dht_fxattrop_cbk, +- subvol, subvol->fops->fxattrop, +- fd, flags, dict, xdata); ++ local = dht_local_init (frame, NULL, fd, GF_FOP_FXATTROP); ++ if (!local) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ /* Todo : Handle dirs as well. At the moment the only xlator above dht ++ * that uses xattrop is sharding and that is only for files */ ++ ++ if (IA_ISDIR (fd->inode->ia_type)) { ++ STACK_WIND (frame, dht_fxattrop_cbk, subvol, ++ subvol->fops->fxattrop, fd, flags, dict, xdata); ++ ++ } else { ++ local->xattr_req = xdata ? dict_ref(xdata) : dict_new (); ++ local->call_cnt = 1; ++ ++ local->rebalance.xattr = dict_ref (dict); ++ local->rebalance.flags = flags; ++ ++ ret = dht_request_iatt_in_xdata (this, local->xattr_req); ++ ++ if (ret) { ++ gf_msg_debug (this->name, 0, ++ "Failed to set dictionary key %s fd=%p", ++ DHT_IATT_IN_XDATA_KEY, fd); ++ } ++ ++ STACK_WIND (frame, dht_common_xattrop_cbk, subvol, ++ subvol->fops->fxattrop, fd, ++ local->rebalance.flags, local->rebalance.xattr, ++ local->xattr_req); ++ } + + return 0; + +@@ -1345,6 +1550,9 @@ err: + return 0; + } + ++/* Currently no translators on top of 'distribute' will be using ++ * below fops, hence not implementing 'migration' related checks ++ */ + + int + dht_inodelk_cbk (call_frame_t *frame, void *cookie, +@@ -1406,7 +1614,6 @@ dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + + { +- + dht_lk_inode_unref (frame, op_ret); + DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata); + return 0; +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index ae367d7..3343a2b 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -168,7 +168,7 @@ dht_strip_out_acls (dict_t *dict) + { + if (dict) { + dict_del (dict, "trusted.SGI_ACL_FILE"); +- dict_del (dict, "POSIX_ACL_ACCESS_XATTR"); ++ dict_del (dict, POSIX_ACL_ACCESS_XATTR); + } + } + +@@ -665,7 +665,7 @@ out: + static int + __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, + loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, +- dict_t *xattr, int *fop_errno) ++ int *fop_errno) + { + int ret = -1; + fd_t *fd = NULL; +@@ -810,28 +810,6 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, + goto out; + } + +- ret = syncop_fsetxattr (to, fd, xattr, 0, NULL, NULL); +- if (ret < 0) { +- *fop_errno = -ret; +- gf_msg (this->name, GF_LOG_WARNING, -ret, +- DHT_MSG_MIGRATE_FILE_FAILED, +- "%s: failed to set xattr on %s", +- loc->path, to->name); +- +- } +- +- /* TODO: Need to add a detailed comment about why we moved away from +- ftruncate. +- +- ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, NULL); +- if (ret < 0) { +- *fop_errno = -ret; +- gf_msg (this->name, GF_LOG_ERROR, -ret, +- DHT_MSG_MIGRATE_FILE_FAILED, +- "ftruncate failed for %s on %s", +- loc->path, to->name); +- */ +- + ret = syncop_fsetattr (to, fd, stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID), + NULL, NULL, NULL, NULL); +@@ -1620,24 +1598,10 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + } + + +- /* TODO: move all xattr related operations to fd based operations */ +- ret = syncop_listxattr (from, loc, &xattr, NULL, NULL); +- if (ret < 0) { +- *fop_errno = -ret; +- ret = -1; +- gf_msg (this->name, GF_LOG_WARNING, *fop_errno, +- DHT_MSG_MIGRATE_FILE_FAILED, +- "Migrate file failed:" +- "%s: failed to get xattr from %s", +- loc->path, from->name); +- } +- +- /* Copying posix acls to the linkto file messes up the permissions*/ +- dht_strip_out_acls (xattr); + + /* create the destination, with required modes/xattr */ + ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf, +- &dst_fd, xattr, fop_errno); ++ &dst_fd, fop_errno); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Create dst failed" + " on - %s for file - %s", to->name, loc->path); +@@ -1683,7 +1647,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + * as in case of failure the linkto needs to point to the source + * subvol */ + ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf, +- &dst_fd, xattr, fop_errno); ++ &dst_fd, fop_errno); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Create dst failed" + " on - %s for file - %s", to->name, loc->path); +@@ -1709,8 +1673,44 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + loc->path, from->name); + goto out; + } ++ ++ /* TODO: move all xattr related operations to fd based operations */ ++ ret = syncop_listxattr (from, loc, &xattr, NULL, NULL); ++ if (ret < 0) { ++ *fop_errno = -ret; ++ gf_msg (this->name, GF_LOG_WARNING, *fop_errno, ++ DHT_MSG_MIGRATE_FILE_FAILED, ++ "Migrate file failed:" ++ "%s: failed to get xattr from %s", ++ loc->path, from->name); ++ ret = -1; ++ goto out; ++ } ++ ++ /* Copying posix acls to the linkto file messes up the permissions*/ ++ dht_strip_out_acls (xattr); ++ ++ /* Remove the linkto xattr as we don't want to overwrite the value ++ * set on the dst. ++ */ ++ dict_del (xattr, conf->link_xattr_name); ++ ++ /* We need to error out if this fails as having the wrong shard xattrs ++ * set on the dst could cause data corruption ++ */ ++ ret = syncop_fsetxattr (to, dst_fd, xattr, 0, NULL, NULL); ++ if (ret < 0) { ++ *fop_errno = -ret; ++ gf_msg (this->name, GF_LOG_WARNING, -ret, ++ DHT_MSG_MIGRATE_FILE_FAILED, ++ "%s: failed to set xattr on %s", ++ loc->path, to->name); ++ ret = -1; ++ goto out; ++ } ++ + if (xattr_rsp) { +- /* we no more require this key */ ++ /* we no longer require this key */ + dict_del (dict, conf->link_xattr_name); + dict_unref (xattr_rsp); + } +@@ -2011,7 +2011,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + xattr = NULL; + } + +- ret = syncop_listxattr (from, loc, &xattr, NULL, NULL); ++ /* Set only the Posix ACLs this time */ ++ ret = syncop_getxattr (from, loc, &xattr, POSIX_ACL_ACCESS_XATTR, ++ NULL, NULL); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, +diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c +index 1577d03..3b9fcf1 100644 +--- a/xlators/cluster/dht/src/dht-selfheal.c ++++ b/xlators/cluster/dht/src/dht-selfheal.c +@@ -14,7 +14,6 @@ + #include "dht-common.h" + #include "dht-messages.h" + #include "dht-lock.h" +-#include "glusterfs-acl.h" + + #define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,path) do { \ + layout->list[i].start = srt; \ +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index f8d8fed..bc97206 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -150,6 +150,37 @@ out: + return ret; + } + ++int32_t ++posix_set_mode_in_dict (dict_t *in_dict, dict_t *out_dict, ++ struct iatt *in_stbuf) ++{ ++ int ret = -1; ++ mode_t mode = 0; ++ ++ if ((!in_dict) || (!in_stbuf) || (!out_dict)) { ++ goto out; ++ } ++ ++ /* We need this only for files */ ++ if (!(IA_ISREG (in_stbuf->ia_type))) { ++ ret = 0; ++ goto out; ++ } ++ ++ /* Nobody asked for this */ ++ if (!dict_get (in_dict, DHT_MODE_IN_XDATA_KEY)) { ++ ret = 0; ++ goto out; ++ } ++ mode = st_mode_from_ia (in_stbuf->ia_prot, in_stbuf->ia_type); ++ ++ ret = dict_set_int32 (out_dict, DHT_MODE_IN_XDATA_KEY, mode); ++ ++out: ++ return ret; ++} ++ ++ + static gf_boolean_t + posix_xattr_ignorable (char *key) + { +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 8aeca3b..a412e6d 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -6146,7 +6146,9 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + if (!xdata_rsp) { + op_ret = -1; + op_errno = ENOMEM; ++ goto out; + } ++ posix_set_mode_in_dict (xdata, xdata_rsp, &stbuf); + out: + + STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr_rsp, +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index ae9fb08..8e40e6f 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -353,4 +353,8 @@ posix_fdget_objectsignature (int, dict_t *); + + gf_boolean_t + posix_is_bulk_removexattr (char *name, dict_t *dict); ++ ++int32_t ++posix_set_mode_in_dict (dict_t *in_dict, dict_t *out_dict, ++ struct iatt *in_stbuf); + #endif /* _POSIX_H */ +-- +1.8.3.1 + diff --git a/0143-glusterd-store-handle-the-case-of-fsid-being-set-to-.patch b/0143-glusterd-store-handle-the-case-of-fsid-being-set-to-.patch new file mode 100644 index 0000000..418e657 --- /dev/null +++ b/0143-glusterd-store-handle-the-case-of-fsid-being-set-to-.patch @@ -0,0 +1,58 @@ +From 7dd1fe266d8c908e8196c93fc371cf205a3d57cb Mon Sep 17 00:00:00 2001 +From: Amar Tumballi +Date: Sun, 4 Feb 2018 10:04:29 +0530 +Subject: [PATCH 143/148] glusterd/store: handle the case of fsid being set to + 0 + +Generally this would happen when a system gets upgraded from an +version which doesn't have fsid details, to a version with fsid +values. Without this change, after upgrade, people would see reduced +'df ' output, causing lot of confusions. + +Debugging Credits: Nithya B + +>upstream mainline patch : https://review.gluster.org/#/c/19484/ + +Change-Id: Id718127ddfb69553b32770b25021290bd0e7c49a +BUG: 1541830 +Signed-off-by: Amar Tumballi +Reviewed-on: https://code.engineering.redhat.com/gerrit/129739 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index e35fcde..6bb7372 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -2632,6 +2632,25 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) + strlen(abspath)); + } + } ++ ++ /* Handle upgrade case of shared_brick_count 'fsid' */ ++ /* Ideally statfs_fsid should never be 0 if done right */ ++ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID) && ++ brickinfo->statfs_fsid == 0) { ++ struct statvfs brickstat = {0,}; ++ ret = sys_statvfs (brickinfo->path, &brickstat); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_WARNING, ++ errno, ++ GD_MSG_BRICKINFO_CREATE_FAIL, ++ "failed to get statfs() call on brick %s", ++ brickinfo->path); ++ /* No need for treating it as an error, lets continue ++ with just a message */ ++ } ++ brickinfo->statfs_fsid = brickstat.f_fsid; ++ } ++ + cds_list_add_tail (&brickinfo->brick_list, &volinfo->bricks); + brick_count++; + } +-- +1.8.3.1 + diff --git a/0144-cluster-dht-Unlink-linkto-files-as-root.patch b/0144-cluster-dht-Unlink-linkto-files-as-root.patch new file mode 100644 index 0000000..bbb06cd --- /dev/null +++ b/0144-cluster-dht-Unlink-linkto-files-as-root.patch @@ -0,0 +1,77 @@ +From 8f2fd4ac7ab74d5687fea8c1f6737f81e052953a Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Tue, 6 Feb 2018 15:20:16 +0530 +Subject: [PATCH 144/148] cluster/dht: Unlink linkto files as root + +Non-privileged users cannot delete linkto +files. However the failure to unlink a stale linkto +causes DHT to fail the lookup with EIO and hence +prevent access to the file. + +upstream patch: https://review.gluster.org/#/c/19508/ + +> Change-Id: Id295362d41e52263790694602f36f1219f0646a2 +> BUG: 1542318 +> Signed-off-by: N Balachandran + +Change-Id: Ic7c4e4f94c03d18d43cfcf7b2de77ceea2b9733c +BUG: 1540664 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/129742 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index c2d0827..727a47b 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -1404,6 +1404,8 @@ dht_lookup_unlink_of_false_linkto_cbk (call_frame_t *frame, void *cookie, + local = (dht_local_t*)frame->local; + path = local->loc.path; + ++ FRAME_SU_UNDO (frame, dht_local_t); ++ + gf_msg (this->name, GF_LOG_INFO, 0, + DHT_MSG_UNLINK_LOOKUP_INFO, "lookup_unlink returned with " + "op_ret -> %d and op-errno -> %d for %s", op_ret, op_errno, +@@ -1473,6 +1475,7 @@ dht_lookup_unlink_stale_linkto_cbk (call_frame_t *frame, void *cookie, + "op_errno %d for %s", op_ret, op_errno, + ((path==NULL)?"null":path)); + ++ FRAME_SU_UNDO (frame, dht_local_t); + DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL, + NULL); + +@@ -1629,15 +1632,15 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) + DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, + NULL, NULL, NULL, NULL); + } else { +- local->skip_unlink.handle_valid_link = _gf_false; ++ local->skip_unlink.handle_valid_link = _gf_false; + + gf_msg_debug (this->name, 0, + "No Cached was found and " + "unlink on hashed was skipped" + " so performing now: %s", + local->loc.path); +- +- STACK_WIND (frame, ++ FRAME_SU_DO (frame, dht_local_t); ++ STACK_WIND (frame, + dht_lookup_unlink_stale_linkto_cbk, + hashed_subvol, + hashed_subvol->fops->unlink, +@@ -1762,6 +1765,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) + NULL, NULL); + } else { + local->call_cnt = 1; ++ FRAME_SU_DO (frame, dht_local_t); + STACK_WIND (frame, + dht_lookup_unlink_of_false_linkto_cbk, + hashed_subvol, +-- +1.8.3.1 + diff --git a/0145-glusterd-optimize-glusterd-import-volumes-code-path.patch b/0145-glusterd-optimize-glusterd-import-volumes-code-path.patch new file mode 100644 index 0000000..4807480 --- /dev/null +++ b/0145-glusterd-optimize-glusterd-import-volumes-code-path.patch @@ -0,0 +1,52 @@ +From bab10010daf3dc93492e7770e4084f0f422e3b0c Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Mon, 29 Jan 2018 10:23:52 +0530 +Subject: [PATCH 145/148] glusterd: optimize glusterd import volumes code path + +In case there's a version mismatch detected for one of the volumes +glusterd was ending up with updating all the volumes which is a +overkill. + +>upstream mainline patch : https://review.gluster.org/#/c/19358/ + +Change-Id: I6df792db391ce3a1697cfa9260f7dbc3f59aa62d +BUG: 1540600 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/129935 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 4bb54db..d991a9f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -4819,16 +4819,18 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status, + ret = 0; + goto out; + } +- if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) ++ if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) { ++ ret = glusterd_import_friend_volume (peer_data, i); ++ if (ret) { ++ goto out; ++ } + update = _gf_true; +- ++ *status = GLUSTERD_VOL_COMP_NONE; ++ } + i++; + } + + if (update) { +- ret = glusterd_import_friend_volumes (peer_data); +- if (ret) +- goto out; + glusterd_svcs_manager (NULL); + } + +-- +1.8.3.1 + diff --git a/0146-cluster-dht-Cleanup-on-fallocate-failure.patch b/0146-cluster-dht-Cleanup-on-fallocate-failure.patch new file mode 100644 index 0000000..28170e4 --- /dev/null +++ b/0146-cluster-dht-Cleanup-on-fallocate-failure.patch @@ -0,0 +1,71 @@ +From 92d3d162567828c15f9e3aa7bd2cb70c7d058efb Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Fri, 9 Feb 2018 13:49:44 +0530 +Subject: [PATCH 146/148] cluster/dht: Cleanup on fallocate failure + +It looks like fallocate leaves a non-empty +file behind in case of some failures. We now +truncate the file to 0 bytes on failure in +__dht_rebalance_create_dst_file. + +upstream patch: https://review.gluster.org/#/c/19514/ + +> Change-Id: Ia4ad7b94bb3624a301fcc87d9e36c4dc751edb59 +> BUG: 1541916 +> Signed-off-by: N Balachandran + +Change-Id: I0c67f62a638300d98b93710dedde438e69edb7f4 +BUG: 1540961 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/129923 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +Reviewed-by: Susant Palai +--- + xlators/cluster/dht/src/dht-rebalance.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 3343a2b..f9a25fb 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -668,6 +668,7 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, + int *fop_errno) + { + int ret = -1; ++ int ret2 = -1; + fd_t *fd = NULL; + struct iatt new_stbuf = {0,}; + struct iatt check_stbuf= {0,}; +@@ -803,7 +804,7 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, + + if (-ret == ENOENT) { + gf_msg (this->name, GF_LOG_ERROR, -ret, +- DHT_MSG_MIGRATE_FILE_FAILED, "%s: file does not exists" ++ DHT_MSG_MIGRATE_FILE_FAILED, "%s: file does not exist " + "on %s", loc->path, to->name); + *fop_errno = -ret; + ret = -1; +@@ -831,6 +832,19 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, + DHT_MSG_MIGRATE_FILE_FAILED, + "fallocate failed for %s on %s", + loc->path, to->name); ++ ++ /* fallocate does not release the space in some cases ++ */ ++ ++ ret2 = syncop_ftruncate (to, fd, 0, ++ NULL, NULL); ++ if (ret2 < 0) { ++ gf_msg (this->name, GF_LOG_WARNING, -ret2, ++ DHT_MSG_MIGRATE_FILE_FAILED, ++ "ftruncate failed for %s on %s", ++ loc->path, to->name); ++ } ++ *fop_errno = -ret; + ret = -1; + goto out; + } +-- +1.8.3.1 + diff --git a/0147-glusterd-import-volumes-in-separate-synctask.patch b/0147-glusterd-import-volumes-in-separate-synctask.patch new file mode 100644 index 0000000..e647d45 --- /dev/null +++ b/0147-glusterd-import-volumes-in-separate-synctask.patch @@ -0,0 +1,797 @@ +From 2f5d6b2923a7f9fe74cf820e5a4cdf894eb0a2bd Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee +Date: Thu, 8 Feb 2018 09:09:00 +0530 +Subject: [PATCH 147/148] glusterd: import volumes in separate synctask + +With brick multiplexing, to attach a brick to an existing brick process +the prerequisite is to have the compatible brick to finish it's +initialization and portmap sign in and hence the thread might have to go +to a sleep and context switch the synctask to allow the brick process to +communicate with glusterd. In normal code path, this works fine as +glusterd_restart_bricks () is launched through a separate synctask. + +In case there's a mismatch of the volume when glusterd restarts, +glusterd_import_friend_volume is invoked and then it tries to call +glusterd_start_bricks () from the main thread which eventually may land +into the similar situation. Now since this is not done through a +separate synctask, the 1st brick will never be able to get its turn to +finish all of its handshaking and as a consequence to it, all the bricks +will fail to get attached to it. + +Solution : Execute import volume and glusterd restart bricks in separate +synctask. Importing snaps had to be also done through synctask as +there's a dependency of the parent volume need to be available for the +importing snap functionality to work. + +>upstream mainline patch : https://review.gluster.org/#/c/19357 + https://review.gluster.org/#/c/19536/ + https://review.gluster.org/#/c/19539/ + +Change-Id: I290b244d456afcc9b913ab30be4af040d340428c +BUG: 1540600 +Signed-off-by: Atin Mukherjee +Reviewed-on: https://code.engineering.redhat.com/gerrit/129937 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + ...e-with-other-processes-accessing-mounted-path.t | 13 ++ + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 9 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.h | 2 + + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 229 +++++++++++++++++---- + xlators/mgmt/glusterd/src/glusterd-utils.c | 166 ++++++++++++--- + xlators/mgmt/glusterd/src/glusterd-utils.h | 4 + + xlators/mgmt/glusterd/src/glusterd.h | 3 +- + 7 files changed, 356 insertions(+), 70 deletions(-) + +diff --git a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t +index c5a0088..22f98d2 100644 +--- a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t ++++ b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t +@@ -92,20 +92,33 @@ EXPECT "0" mounted_snaps ${V1} + # handled during handshake. + + activate_snapshots ++ ++EXPECT 'Started' snapshot_status ${V0}_snap; ++EXPECT 'Started' snapshot_status ${V1}_snap; ++ + kill_glusterd 2 ++ + deactivate_snapshots ++EXPECT 'Stopped' snapshot_status ${V0}_snap; ++EXPECT 'Stopped' snapshot_status ${V1}_snap; ++ + TEST start_glusterd 2 + + # Updates form friend should reflect as snap was deactivated while glusterd + # process was inactive and mount point should also not exist. + ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count; + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V0} + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V1} + + kill_glusterd 2 + activate_snapshots ++EXPECT 'Started' snapshot_status ${V0}_snap; ++EXPECT 'Started' snapshot_status ${V1}_snap; + TEST start_glusterd 2 + ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count; ++ + # Updates form friend should reflect as snap was activated while glusterd + # process was inactive and mount point should exist. + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" mounted_snaps ${V0} +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 2fc2e3b..81cde21 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2426,6 +2426,7 @@ glusterd_stop_bricks (glusterd_volinfo_t *volinfo) + + int + glusterd_start_bricks (glusterd_volinfo_t *volinfo) ++ + { + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; +@@ -2454,14 +2455,6 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo) + goto out; + } + } +- +- } +- ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); +- if (ret) { +- gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, +- "Failed to write volinfo for volume %s", +- volinfo->volname); +- goto out; + } + ret = 0; + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h +index 48275c5..24b1944 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h +@@ -275,8 +275,10 @@ glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size, + int32_t blk_count, double *throughput, double *time); + gf_boolean_t + glusterd_is_volume_started (glusterd_volinfo_t *volinfo); ++ + int + glusterd_start_bricks (glusterd_volinfo_t *volinfo); ++ + gf_boolean_t + glusterd_are_all_volumes_stopped (); + int +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index 3f03d2b..ad206f6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -1758,8 +1758,11 @@ out: + * state, i.e either both would be hosting bricks or both would not be hosting + * bricks, then a decision can't be taken and a peer-reject will happen. + * +- * glusterd_compare_and_update_snap() implements the following algorithm to +- * perform the above task: ++ * glusterd_compare_snap() & glusterd_update_snaps () implement the following ++ * algorithm to perform the above task. Please note the former function tries to ++ * iterate over the snaps one at a time and updating the relevant fields in the ++ * dictionary and then glusterd_update_snaps () go over all the snaps and update ++ * them at one go as part of a synctask. + * Step 1: Start. + * Step 2: Check if the peer is missing a delete or restore on the said snap. + * If yes, goto step 6. +@@ -1784,21 +1787,18 @@ out: + * + */ + int32_t +-glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, +- char *peername, uuid_t peerid) ++glusterd_compare_snap (dict_t *peer_data, int32_t snap_count, ++ char *peername, uuid_t peerid) + { + char buf[NAME_MAX] = ""; + char prefix[NAME_MAX] = ""; + char *peer_snap_name = NULL; + char *peer_snap_id = NULL; +- dict_t *dict = NULL; + glusterd_snap_t *snap = NULL; + gf_boolean_t conflict = _gf_false; + gf_boolean_t is_local = _gf_false; + gf_boolean_t is_hosted = _gf_false; + gf_boolean_t missed_delete = _gf_false; +- gf_boolean_t remove_lvm = _gf_true; +- + int32_t ret = -1; + int32_t volcount = 0; + xlator_t *this = NULL; +@@ -1810,6 +1810,14 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, + + snprintf (prefix, sizeof(prefix), "snap%d", snap_count); + ++ ret = dict_set_uint32 (peer_data, buf, 0); ++ snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix); ++ ret = dict_set_uint32 (peer_data, buf, 0); ++ snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix); ++ ret = dict_set_uint32 (peer_data, buf, 0); ++ snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix); ++ ret = dict_set_uint32 (peer_data, buf, 0); ++ + /* Fetch the peer's snapname */ + snprintf (buf, sizeof(buf), "%s.snapname", prefix); + ret = dict_get_str (peer_data, buf, &peer_snap_name); +@@ -1866,7 +1874,10 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, + /* Peer has snap with the same snapname + * and snap_id, which local node doesn't have. + */ +- goto accept_peer_data; ++ snprintf (buf, sizeof(buf), "%s.accept_peer_data", ++ prefix); ++ ret = dict_set_uint32 (peer_data, buf, 1); ++ goto out; + } + /* Peer has snap with the same snapname + * and snap_id. Now check if peer has a +@@ -1893,12 +1904,18 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, + * When removing data from local node, make sure + * we are not removing backend lvm of the snap. + */ +- remove_lvm = _gf_false; +- goto remove_my_data; ++ snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix); ++ ret = dict_set_uint32 (peer_data, buf, 0); ++ snprintf (buf, sizeof(buf), "%s.remove_my_data", ++ prefix); ++ ret = dict_set_uint32 (peer_data, buf, 1); ++ snprintf (buf, sizeof(buf), "%s.accept_peer_data", ++ prefix); ++ ret = dict_set_uint32 (peer_data, buf, 1); + } else { + ret = 0; +- goto out; + } ++ goto out; + } + + /* There is a conflict. Check if the current node is +@@ -1950,50 +1967,176 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, + * And local node isn't. Hence remove local node's + * data and accept peer data + */ +- + gf_msg_debug (this->name, 0, "Peer hosts bricks for conflicting " + "snap(%s). Removing local data. Accepting peer data.", + peer_snap_name); +- remove_lvm = _gf_true; ++ snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix); ++ ret = dict_set_uint32 (peer_data, buf, 1); ++ snprintf (buf, sizeof(buf), "%s.remove_my_data", ++ prefix); ++ ret = dict_set_uint32 (peer_data, buf, 1); ++ snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix); ++ ret = dict_set_uint32 (peer_data, buf, 1); + +-remove_my_data: ++out: ++ gf_msg_trace (this->name, 0, "Returning %d", ret); ++ return ret; ++} + +- dict = dict_new(); +- if (!dict) { +- gf_msg (this->name, GF_LOG_ERROR, 0, +- GD_MSG_DICT_CREATE_FAIL, +- "Unable to create dict"); +- ret = -1; +- goto out; ++int32_t ++glusterd_update_snaps_synctask (void *opaque) ++{ ++ int32_t ret = -1; ++ int32_t snap_count = 0; ++ int i = 1; ++ xlator_t *this = NULL; ++ dict_t *peer_data = NULL; ++ char buf[NAME_MAX] = ""; ++ char prefix[NAME_MAX] = ""; ++ char *peer_snap_name = NULL; ++ char *peer_snap_id = NULL; ++ char *peername = NULL; ++ gf_boolean_t remove_lvm = _gf_false; ++ gf_boolean_t remove_my_data = _gf_false; ++ gf_boolean_t accept_peer_data = _gf_false; ++ int32_t val = 0; ++ glusterd_snap_t *snap = NULL; ++ dict_t *dict = NULL; ++ glusterd_conf_t *conf = NULL; ++ ++ this = THIS; ++ GF_ASSERT (this); ++ ++ conf = this->private; ++ GF_ASSERT (conf); ++ ++ peer_data = (dict_t *)opaque; ++ GF_ASSERT (peer_data); ++ ++ synclock_lock (&conf->big_lock); ++ ++ while (conf->restart_bricks) { ++ synclock_unlock (&conf->big_lock); ++ sleep (2); ++ synclock_lock (&conf->big_lock); + } ++ conf->restart_bricks = _gf_true; + +- ret = glusterd_snap_remove (dict, snap, remove_lvm, _gf_false, +- _gf_false); ++ ret = dict_get_int32 (peer_data, "snap_count", &snap_count); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, +- GD_MSG_SNAP_REMOVE_FAIL, +- "Failed to remove snap %s", snap->snapname); ++ GD_MSG_DICT_GET_FAILED, "Failed to fetch snap_count"); + goto out; + } +- +-accept_peer_data: +- +- /* Accept Peer Data */ +- ret = glusterd_import_friend_snap (peer_data, snap_count, +- peer_snap_name, peer_snap_id); ++ ret = dict_get_str (peer_data, "peername", &peername); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, +- GD_MSG_SNAP_IMPORT_FAIL, +- "Failed to import snap %s from peer %s", +- peer_snap_name, peername); ++ GD_MSG_DICT_GET_FAILED, "Failed to fetch peername"); + goto out; + } + ++ for (i = 1; i <= snap_count; i++) { ++ snprintf (prefix, sizeof(prefix), "snap%d", i); ++ ++ /* Fetch the peer's snapname */ ++ snprintf (buf, sizeof(buf), "%s.snapname", prefix); ++ ret = dict_get_str (peer_data, buf, &peer_snap_name); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_DICT_GET_FAILED, ++ "Unable to fetch snapname from peer: %s", ++ peername); ++ goto out; ++ } ++ ++ /* Fetch the peer's snap_id */ ++ snprintf (buf, sizeof(buf), "%s.snap_id", prefix); ++ ret = dict_get_str (peer_data, buf, &peer_snap_id); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_DICT_GET_FAILED, ++ "Unable to fetch snap_id from peer: %s", ++ peername); ++ goto out; ++ } ++ ++ /* remove_my_data */ ++ snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix); ++ ret = dict_get_int32 (peer_data, buf, &val); ++ if (val) ++ remove_my_data = _gf_true; ++ else ++ remove_my_data = _gf_false; ++ ++ if (remove_my_data) { ++ snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix); ++ ret = dict_get_int32 (peer_data, buf, &val); ++ if (val) ++ remove_lvm = _gf_true; ++ else ++ remove_lvm = _gf_false; ++ ++ dict = dict_new(); ++ if (!dict) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_DICT_CREATE_FAIL, ++ "Unable to create dict"); ++ ret = -1; ++ goto out; ++ } ++ snap = glusterd_find_snap_by_name (peer_snap_name); ++ if (!snap) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_MISSED_SNAP_PRESENT, ++ "Snapshot %s from peer %s missing on " ++ "localhost", peer_snap_name, ++ peername); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = glusterd_snap_remove (dict, snap, remove_lvm, ++ _gf_false, _gf_false); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_SNAP_REMOVE_FAIL, ++ "Failed to remove snap %s", ++ snap->snapname); ++ goto out; ++ } ++ if (dict) ++ dict_unref (dict); ++ } ++ snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix); ++ ret = dict_get_int32 (peer_data, buf, &val); ++ if (val) ++ accept_peer_data = _gf_true; ++ else ++ accept_peer_data = _gf_false; ++ ++ if (accept_peer_data) { ++ /* Accept Peer Data */ ++ ret = glusterd_import_friend_snap (peer_data, ++ i, ++ peer_snap_name, ++ peer_snap_id); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_SNAP_IMPORT_FAIL, ++ "Failed to import snap %s from peer %s", ++ peer_snap_name, peername); ++ goto out; ++ } ++ } ++ } ++ + out: ++ if (peer_data) ++ dict_unref (peer_data); + if (dict) + dict_unref (dict); ++ conf->restart_bricks = _gf_false; + +- gf_msg_trace (this->name, 0, "Returning %d", ret); + return ret; + } + +@@ -2008,6 +2151,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername, + int32_t snap_count = 0; + int i = 1; + xlator_t *this = NULL; ++ dict_t *peer_data_copy = NULL; + + this = THIS; + GF_ASSERT (this); +@@ -2023,8 +2167,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername, + + for (i = 1; i <= snap_count; i++) { + /* Compare one snapshot from peer_data at a time */ +- ret = glusterd_compare_and_update_snap (peer_data, i, peername, +- peerid); ++ ret = glusterd_compare_snap (peer_data, i, peername, peerid); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_SNAPSHOT_OP_FAILED, +@@ -2033,6 +2176,18 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername, + goto out; + } + } ++ /* Update the snaps at one go */ ++ peer_data_copy = dict_copy_with_ref (peer_data, NULL); ++ ret = dict_set_str (peer_data_copy, "peername", peername); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, ++ "Failed to set peername into the dict"); ++ if (peer_data_copy) ++ dict_unref (peer_data_copy); ++ goto out; ++ } ++ glusterd_launch_synctask (glusterd_update_snaps_synctask, ++ peer_data_copy); + + out: + gf_msg_trace (this->name, 0, "Returning %d", ret); +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index d991a9f..5deacde 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3448,6 +3448,14 @@ glusterd_compare_friend_volume (dict_t *peer_data, int32_t count, + *status = GLUSTERD_VOL_COMP_SCS; + + out: ++ memset (key, 0, sizeof (key)); ++ snprintf (key, sizeof (key), "volume%d.update", count); ++ ++ if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) { ++ ret = dict_set_int32 (peer_data, key, 1); ++ } else { ++ ret = dict_set_int32 (peer_data, key, 0); ++ } + if (*status == GLUSTERD_VOL_COMP_RJT) { + gf_event (EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s", + volinfo->volname); +@@ -3520,12 +3528,11 @@ glusterd_spawn_daemons (void *opaque) + int ret = -1; + + synclock_lock (&conf->big_lock); +- glusterd_restart_bricks (conf); ++ glusterd_restart_bricks (); + glusterd_restart_gsyncds (conf); + glusterd_restart_rebalance (conf); + ret = glusterd_snapdsvc_restart (); + ret = glusterd_tierdsvc_restart (); +- + return ret; + } + +@@ -4291,20 +4298,35 @@ out: + int32_t + glusterd_volume_disconnect_all_bricks (glusterd_volinfo_t *volinfo) + { +- int ret = 0; +- glusterd_brickinfo_t *brickinfo = NULL; ++ int ret = 0; ++ glusterd_brickinfo_t *brickinfo = NULL; ++ glusterd_brick_proc_t *brick_proc = NULL; ++ int brick_count = 0; ++ + GF_ASSERT (volinfo); + + cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (glusterd_is_brick_started (brickinfo)) { +- ret = glusterd_brick_disconnect (brickinfo); +- if (ret) { +- gf_msg ("glusterd", GF_LOG_ERROR, 0, +- GD_MSD_BRICK_DISCONNECT_FAIL, +- "Failed to " +- "disconnect %s:%s", brickinfo->hostname, +- brickinfo->path); +- break; ++ /* If brick multiplexing is enabled then we can't ++ * blindly set brickinfo->rpc to NULL as it might impact ++ * the other attached bricks. ++ */ ++ ret = glusterd_brick_proc_for_port (brickinfo->port, ++ &brick_proc); ++ if (!ret) { ++ brick_count = brick_proc->brick_count; ++ } ++ if (!is_brick_mx_enabled () || brick_count == 0) { ++ ret = glusterd_brick_disconnect (brickinfo); ++ if (ret) { ++ gf_msg ("glusterd", GF_LOG_ERROR, 0, ++ GD_MSD_BRICK_DISCONNECT_FAIL, ++ "Failed to " ++ "disconnect %s:%s", ++ brickinfo->hostname, ++ brickinfo->path); ++ break; ++ } + } + } + } +@@ -4543,7 +4565,7 @@ out: + } + + int32_t +-glusterd_import_friend_volume (dict_t *peer_data, size_t count) ++glusterd_import_friend_volume (dict_t *peer_data, int count) + { + + int32_t ret = -1; +@@ -4552,6 +4574,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count) + glusterd_volinfo_t *old_volinfo = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + glusterd_svc_t *svc = NULL; ++ int32_t update = 0; ++ char key[512] = {0,}; + + GF_ASSERT (peer_data); + +@@ -4559,6 +4583,15 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count) + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); ++ ++ memset (key, 0, sizeof (key)); ++ snprintf (key, sizeof (key), "volume%d.update", count); ++ ret = dict_get_int32 (peer_data, key, &update); ++ if (ret || !update) { ++ /* if update is 0 that means the volume is not imported */ ++ goto out; ++ } ++ + ret = glusterd_import_volinfo (peer_data, count, + &new_volinfo, "volume"); + if (ret) +@@ -4572,6 +4605,14 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count) + + ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo); + if (0 == ret) { ++ if (new_volinfo->version <= old_volinfo->version) { ++ /* When this condition is true, it already means that ++ * the other synctask thread of import volume has ++ * already up to date volume, so just ignore this volume ++ * now ++ */ ++ goto out; ++ } + /* Ref count the old_volinfo such that deleting it doesn't crash + * if its been already in use by other thread + */ +@@ -4602,7 +4643,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count) + } + } + +- ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); ++ ret = glusterd_store_volinfo (new_volinfo, ++ GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_VOLINFO_STORE_FAIL, "Failed to store " +@@ -4630,6 +4672,60 @@ out: + } + + int32_t ++glusterd_import_friend_volumes_synctask (void *opaque) ++{ ++ int32_t ret = -1; ++ int32_t count = 0; ++ int i = 1; ++ xlator_t *this = NULL; ++ glusterd_conf_t *conf = NULL; ++ dict_t *peer_data = NULL; ++ ++ this = THIS; ++ GF_ASSERT (this); ++ ++ conf = this->private; ++ GF_ASSERT (conf); ++ ++ peer_data = (dict_t *)opaque; ++ GF_ASSERT (peer_data); ++ ++ ret = dict_get_int32 (peer_data, "count", &count); ++ if (ret) ++ goto out; ++ ++ synclock_lock (&conf->big_lock); ++ ++ /* We need to ensure that importing a volume shouldn't race with an ++ * other thread where as part of restarting glusterd, bricks are ++ * restarted (refer glusterd_restart_bricks ()) ++ */ ++ while (conf->restart_bricks) { ++ synclock_unlock (&conf->big_lock); ++ sleep (2); ++ synclock_lock (&conf->big_lock); ++ } ++ conf->restart_bricks = _gf_true; ++ ++ while (i <= count) { ++ ret = glusterd_import_friend_volume (peer_data, i); ++ if (ret) { ++ conf->restart_bricks = _gf_false; ++ goto out; ++ } ++ i++; ++ } ++ glusterd_svcs_manager (NULL); ++ conf->restart_bricks = _gf_false; ++out: ++ if (peer_data) ++ dict_unref (peer_data); ++ ++ gf_msg_debug ("glusterd", 0, "Returning with %d", ret); ++ return ret; ++} ++ ++int32_t + glusterd_import_friend_volumes (dict_t *peer_data) + { + int32_t ret = -1; +@@ -4768,8 +4864,10 @@ glusterd_import_global_opts (dict_t *friend_data) + * recompute if quorum is met. If quorum is not met bricks are + * not started and those already running are stopped + */ +- if (old_quorum != new_quorum) +- glusterd_restart_bricks (conf); ++ if (old_quorum != new_quorum) { ++ glusterd_launch_synctask (glusterd_restart_bricks, ++ NULL); ++ } + } + + ret = 0; +@@ -4789,6 +4887,7 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status, + gf_boolean_t update = _gf_false; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; ++ dict_t *peer_data_copy = NULL; + + this = THIS; + GF_ASSERT (this); +@@ -4820,18 +4919,23 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status, + goto out; + } + if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) { +- ret = glusterd_import_friend_volume (peer_data, i); +- if (ret) { +- goto out; +- } + update = _gf_true; +- *status = GLUSTERD_VOL_COMP_NONE; + } + i++; + } + + if (update) { +- glusterd_svcs_manager (NULL); ++ /* Launch the import friend volume as a separate synctask as it ++ * has to trigger start bricks where we may need to wait for the ++ * first brick to come up before attaching the subsequent bricks ++ * in case brick multiplexing is enabled ++ */ ++ peer_data_copy = dict_copy_with_ref (peer_data, NULL); ++ glusterd_launch_synctask ++ (glusterd_import_friend_volumes_synctask, ++ peer_data_copy); ++ if (ret) ++ goto out; + } + + out: +@@ -5975,7 +6079,7 @@ out: + } + + int +-glusterd_restart_bricks (glusterd_conf_t *conf) ++glusterd_restart_bricks (void *opaque) + { + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; +@@ -5983,6 +6087,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf) + glusterd_snap_t *snap = NULL; + gf_boolean_t start_svcs = _gf_false; + xlator_t *this = NULL; ++ glusterd_conf_t *conf = NULL; + int active_count = 0; + int quorum_count = 0; + gf_boolean_t node_quorum = _gf_false; +@@ -5993,6 +6098,17 @@ glusterd_restart_bricks (glusterd_conf_t *conf) + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, return_block); + ++ /* We need to ensure that restarting the bricks during glusterd restart ++ * shouldn't race with the import volume thread (refer ++ * glusterd_compare_friend_data ()) ++ */ ++ while (conf->restart_bricks) { ++ synclock_unlock (&conf->big_lock); ++ sleep (2); ++ synclock_lock (&conf->big_lock); ++ } ++ conf->restart_bricks = _gf_true; ++ + ++(conf->blockers); + ret = glusterd_get_quorum_cluster_counts (this, &active_count, + &quorum_count); +@@ -6003,8 +6119,9 @@ glusterd_restart_bricks (glusterd_conf_t *conf) + node_quorum = _gf_true; + + cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) { +- if (volinfo->status != GLUSTERD_STATUS_STARTED) ++ if (volinfo->status != GLUSTERD_STATUS_STARTED) { + continue; ++ } + gf_msg_debug (this->name, 0, "starting the volume %s", + volinfo->volname); + +@@ -6111,6 +6228,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf) + out: + --(conf->blockers); + conf->restart_done = _gf_true; ++ conf->restart_bricks = _gf_false; + + return_block: + return ret; +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 9194da0..3b82b1e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -245,6 +245,10 @@ glusterd_pending_node_put_rpc (glusterd_pending_node_t *pending_node); + int + glusterd_remote_hostname_get (rpcsvc_request_t *req, + char *remote_host, int len); ++ ++int32_t ++glusterd_import_friend_volumes_synctask (void *opaque); ++ + int32_t + glusterd_import_friend_volumes (dict_t *peer_data); + void +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 3ad5ed6..b0656e6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -199,6 +199,7 @@ typedef struct { + int32_t workers; + uint32_t blockers; + uint32_t mgmt_v3_lock_timeout; ++ gf_boolean_t restart_bricks; + } glusterd_conf_t; + + +@@ -1077,7 +1078,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, + dict_t *volumes, int count); + + int +-glusterd_restart_bricks (glusterd_conf_t *conf); ++glusterd_restart_bricks (); + + int32_t + glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags, +-- +1.8.3.1 + diff --git a/0148-glusterd-tier-is_tier_enabled-inserted-causing-check.patch b/0148-glusterd-tier-is_tier_enabled-inserted-causing-check.patch new file mode 100644 index 0000000..55c622b --- /dev/null +++ b/0148-glusterd-tier-is_tier_enabled-inserted-causing-check.patch @@ -0,0 +1,39 @@ +From 6d173a903dccc8fc885f8b15994176558f277ad1 Mon Sep 17 00:00:00 2001 +From: hari gowtham +Date: Thu, 8 Feb 2018 15:45:03 +0530 +Subject: [PATCH 148/148] glusterd/tier: is_tier_enabled inserted causing + checksum mismatch + +the volfile entry is_tier_enabled is checked for version 3.7.6 +while it was supposed to check for 3.10. +this is to fix it downstream only but changing the version of check +to 3.13.1 + +Label: DOWNSTREAM ONLY +Change-Id: I6ae9a0346d67fcc47a4762db5089d8010070a735 +BUG: 1543296 +Signed-off-by: hari gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/129822 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +Tested-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 6bb7372..387e7e5 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -1043,7 +1043,9 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) + buf); + if (ret) + goto out; ++ } + ++ if (conf->op_version >= GD_OP_VERSION_3_13_1) { + snprintf (buf, sizeof (buf), "%d", volinfo->is_tier_enabled); + ret = gf_store_save_value (fd, GF_TIER_ENABLED, buf); + if (ret) +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index 999f68c..45305c2 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -192,7 +192,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 3.12.2 -Release: 3%{?dist} +Release: 4%{?dist} %endif License: GPLv2 or LGPLv3+ Group: System Environment/Base @@ -404,6 +404,15 @@ Patch0136: 0136-glusterd-Nullify-pmap-entry-for-bricks-belonging-to-.patch Patch0137: 0137-bitrot-improved-cli-report-after-bitrot-operatoin.patch Patch0138: 0138-glusterd-enable-brick-multiplexing-by-default.patch Patch0139: 0139-libglusterfs-Reset-errno-before-call.patch +Patch0140: 0140-md-cache-Add-additional-samba-and-macOS-specific-EAs.patch +Patch0141: 0141-rpc-Showing-some-unusual-timer-error-logs-during-bri.patch +Patch0142: 0142-cluster-dht-Add-migration-checks-to-dht_-f-xattrop.patch +Patch0143: 0143-glusterd-store-handle-the-case-of-fsid-being-set-to-.patch +Patch0144: 0144-cluster-dht-Unlink-linkto-files-as-root.patch +Patch0145: 0145-glusterd-optimize-glusterd-import-volumes-code-path.patch +Patch0146: 0146-cluster-dht-Cleanup-on-fallocate-failure.patch +Patch0147: 0147-glusterd-import-volumes-in-separate-synctask.patch +Patch0148: 0148-glusterd-tier-is_tier_enabled-inserted-causing-check.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -2331,6 +2340,10 @@ fi %endif %changelog +* Mon Feb 12 2018 Milind Changire - 3.12.2-4 +- fixes bugs bz#1446125 bz#1467536 bz#1530146 bz#1540600 bz#1540664 + bz#1540961 bz#1541830 bz#1543296 + * Mon Feb 05 2018 Milind Changire - 3.12.2-3 - fixes bugs bz#1446125 bz#1463592 bz#1516249 bz#1517463 bz#1527309 bz#1530325 bz#1531041 bz#1539699 bz#1540011