diff --git a/0283-cluster-ec-Fix-pre-op-xattrop-management.patch b/0283-cluster-ec-Fix-pre-op-xattrop-management.patch new file mode 100644 index 0000000..b232eae --- /dev/null +++ b/0283-cluster-ec-Fix-pre-op-xattrop-management.patch @@ -0,0 +1,263 @@ +From fb62cbef3fcaa3e2a23a98182edaae332803b3bb Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Tue, 15 May 2018 11:37:16 +0200 +Subject: [PATCH 283/305] cluster/ec: Fix pre-op xattrop management + +Multiple pre-op xattrop can be simultaneously being processed. On the cbk +it was checked if the fop was waiting for some specific data (like size and +version) and, if so, it was assumed that this answer should contain that +data. + +This is not true, since a fop can be waiting for some data, but it may come +from the xattrop of another fop. + +This patch differentiates between needing some information and providing it. + +This is related to parallel writes. Disabling them fixed the problem, but +also prevented concurrent reads. A change has been made so that disabling +parallel writes still allows parallel reads. + +Upstream patch: https://review.gluster.org/20024 + +BUG: 1567001 +Change-Id: I74772ad6b80b7b37805da93d5ec3ae099e96b041 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/139707 +Tested-by: RHGS Build Bot +Reviewed-by: Pranith Kumar Karampuri +--- + xlators/cluster/ec/src/ec-common.c | 70 ++++++++++++++++++++++---------------- + xlators/cluster/ec/src/ec-common.h | 28 +++++++++++++-- + xlators/cluster/ec/src/ec.c | 1 + + 3 files changed, 66 insertions(+), 33 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index bd2ae50..b74bce0 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -21,10 +21,6 @@ + #include "ec.h" + #include "ec-messages.h" + +-#define EC_XATTROP_ALL_WAITING_FLAGS (EC_FLAG_WAITING_XATTROP |\ +- EC_FLAG_WAITING_DATA_DIRTY |\ +- EC_FLAG_WAITING_METADATA_DIRTY) +- + void + ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx, + int32_t ret_status) +@@ -160,10 +156,16 @@ ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2) + static gf_boolean_t + ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2) + { ++ ec_t *ec = l1->fop->xl->private; ++ + if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) && + (l2->fop->flags & EC_FLAG_LOCK_SHARED)) + return _gf_false; + ++ if (!ec->parallel_writes) { ++ return _gf_true; ++ } ++ + return ec_is_range_conflict (l1, l2); + } + +@@ -1118,7 +1120,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, + ec_lock_t *lock = NULL; + ec_inode_t *ctx; + gf_boolean_t release = _gf_false; +- uint64_t waiting_flags = 0; ++ uint64_t provided_flags = 0; + uint64_t dirty[EC_VERSION_SIZE] = {0, 0}; + + lock = parent_link->lock; +@@ -1126,14 +1128,14 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, + ctx = lock->ctx; + + INIT_LIST_HEAD(&list); +- waiting_flags = parent_link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS; ++ provided_flags = EC_PROVIDED_FLAGS(parent_link->waiting_flags); + + LOCK(&lock->loc.inode->lock); + + list_for_each_entry(link, &lock->owners, owner_list) { +- if ((link->waiting_flags & waiting_flags) != 0) { +- link->waiting_flags ^= (link->waiting_flags & waiting_flags); +- if ((link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS) == 0) ++ if ((link->waiting_flags & provided_flags) != 0) { ++ link->waiting_flags ^= (link->waiting_flags & provided_flags); ++ if (EC_NEEDED_FLAGS(link->waiting_flags) == 0) + list_add_tail(&link->fop->cbk_list, &list); + } + } +@@ -1146,7 +1148,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, + goto unlock; + } + +- if (waiting_flags & EC_FLAG_WAITING_XATTROP) { ++ if (EC_FLAGS_HAVE(provided_flags, EC_FLAG_XATTROP)) { + op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION, + ctx->pre_version, + EC_VERSION_SIZE); +@@ -1207,20 +1209,20 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, + + ec_set_dirty_flag (fop->data, ctx, dirty); + if (dirty[EC_METADATA_TXN] && +- (waiting_flags & EC_FLAG_WAITING_METADATA_DIRTY)) { ++ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_METADATA_DIRTY))) { + GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]); + ctx->dirty[EC_METADATA_TXN] = 1; + } + + if (dirty[EC_DATA_TXN] && +- (waiting_flags & EC_FLAG_WAITING_DATA_DIRTY)) { ++ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_DATA_DIRTY))) { + GF_ASSERT (!ctx->dirty[EC_DATA_TXN]); + ctx->dirty[EC_DATA_TXN] = 1; + } + op_errno = 0; + unlock: + +- lock->waiting_flags ^= waiting_flags; ++ lock->waiting_flags ^= provided_flags; + + if (op_errno == 0) { + /* If the fop fails on any of the good bricks, it is important to mark +@@ -1267,6 +1269,24 @@ unlock: + return 0; + } + ++static gf_boolean_t ++ec_set_needed_flag(ec_lock_t *lock, ec_lock_link_t *link, uint64_t flag) ++{ ++ uint64_t current; ++ ++ link->waiting_flags |= EC_FLAG_NEEDS(flag); ++ ++ current = EC_NEEDED_FLAGS(lock->waiting_flags); ++ if (!EC_FLAGS_HAVE(current, flag)) { ++ lock->waiting_flags |= EC_FLAG_NEEDS(flag); ++ link->waiting_flags |= EC_FLAG_PROVIDES(flag); ++ ++ return _gf_true; ++ } ++ ++ return _gf_false; ++} ++ + static uint64_t + ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link, + uint64_t *dirty) +@@ -1275,31 +1295,25 @@ ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link, + uint64_t newflags = 0; + ec_inode_t *ctx = lock->ctx; + +- oldflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS; ++ oldflags = EC_NEEDED_FLAGS(lock->waiting_flags); + + if (lock->query && !ctx->have_info) { +- lock->waiting_flags |= EC_FLAG_WAITING_XATTROP; +- link->waiting_flags |= EC_FLAG_WAITING_XATTROP; ++ ec_set_needed_flag(lock, link, EC_FLAG_XATTROP); + } + + if (dirty[EC_DATA_TXN]) { +- if (oldflags & EC_FLAG_WAITING_DATA_DIRTY) { ++ if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) { + dirty[EC_DATA_TXN] = 0; +- } else { +- lock->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY; + } +- link->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY; + } + + if (dirty[EC_METADATA_TXN]) { +- if (oldflags & EC_FLAG_WAITING_METADATA_DIRTY) { ++ if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) { + dirty[EC_METADATA_TXN] = 0; +- } else { +- lock->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY; + } +- link->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY; + } +- newflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS; ++ newflags = EC_NEEDED_FLAGS(lock->waiting_flags); ++ + return oldflags ^ newflags; + } + +@@ -1369,7 +1383,7 @@ void ec_get_size_version(ec_lock_link_t *link) + goto out; + } + +- if (changed_flags & EC_FLAG_WAITING_XATTROP) { ++ if (EC_FLAGS_HAVE(changed_flags, EC_FLAG_XATTROP)) { + /* Once we know that an xattrop will be needed, + * we try to get all available information in a + * single call. */ +@@ -1646,10 +1660,6 @@ static gf_boolean_t + ec_link_has_lock_conflict (ec_lock_link_t *link, gf_boolean_t waitlist_check) + { + ec_lock_link_t *trav_link = NULL; +- ec_t *ec = link->fop->xl->private; +- +- if (!ec->parallel_writes) +- return _gf_true; + + list_for_each_entry (trav_link, &link->lock->owners, owner_list) { + if (ec_lock_conflict (trav_link, link)) +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index c0ad604..85bf819 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -29,9 +29,31 @@ typedef enum { + + #define EC_FLAG_LOCK_SHARED 0x0001 + +-#define EC_FLAG_WAITING_XATTROP 0x0001 +-#define EC_FLAG_WAITING_DATA_DIRTY 0x0002 +-#define EC_FLAG_WAITING_METADATA_DIRTY 0x0004 ++enum _ec_xattrop_flags { ++ EC_FLAG_XATTROP, ++ EC_FLAG_DATA_DIRTY, ++ EC_FLAG_METADATA_DIRTY, ++ ++ /* Add any new flag here, before EC_FLAG_MAX. The maximum number of ++ * flags that can be defined is 16. */ ++ ++ EC_FLAG_MAX ++}; ++ ++/* We keep two sets of flags. One to determine what's really providing the ++ * currect xattrop and the other to know what the parent fop of the xattrop ++ * needs to proceed. It might happen that a fop needs some information that ++ * is being already requested by a previous fop. The two sets are stored ++ * contiguously. */ ++ ++#define EC_FLAG_NEEDS(_flag) (1 << (_flag)) ++#define EC_FLAG_PROVIDES(_flag) (1 << ((_flag) + EC_FLAG_MAX)) ++ ++#define EC_NEEDED_FLAGS(_flags) ((_flags) & ((1 << EC_FLAG_MAX) - 1)) ++ ++#define EC_PROVIDED_FLAGS(_flags) EC_NEEDED_FLAGS((_flags) >> EC_FLAG_MAX) ++ ++#define EC_FLAGS_HAVE(_flags, _flag) (((_flags) & (1 << (_flag))) != 0) + + #define EC_SELFHEAL_BIT 62 + +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index eb91c4a..0d59efd 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -1331,6 +1331,7 @@ int32_t ec_dump_private(xlator_t *this) + gf_proc_dump_write("healers", "%d", ec->healers); + gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters); + gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]); ++ gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes); + + return 0; + } +-- +1.8.3.1 + diff --git a/0284-glusterd-glusterd-is-releasing-the-locks-before-time.patch b/0284-glusterd-glusterd-is-releasing-the-locks-before-time.patch new file mode 100644 index 0000000..d385a4f --- /dev/null +++ b/0284-glusterd-glusterd-is-releasing-the-locks-before-time.patch @@ -0,0 +1,264 @@ +From 123f052f7925074087e4393b22f68b9eb510936d Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Tue, 17 Apr 2018 18:10:01 +0530 +Subject: [PATCH 284/305] glusterd: glusterd is releasing the locks before + timeout + +Problem: We introduced lock timer in mgmt v3, which will realease +the lock after 3 minutes from command execution. Some commands related +to heal/profile will take more time to execute. For these comands +timeout is set to 10 minutes. As the lock timer is set to 3 minutes +glusterd is releasing the lock after 3 minutes. That means locks are +released before the command is completed its execution. + +Solution: Pass a timeout parameter from cli to glusterd, when there +is a change in default timeout value(i.e, default timeout value can +be changed through command line or for the commands related to profile/heal +we will change the default timeout value to 10 minutes.) glusterd will +set the lock timer timeout according to the timeout value passed. + +>Change-Id: I7b7a9a4f95ed44aca39ef9d9907f546bca99c69d +>fixes: bz#1577731 +>Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/19890/ + +Change-Id: I7b7a9a4f95ed44aca39ef9d9907f546bca99c69d +BUG: 1575557 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/140025 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + cli/src/cli-rpc-ops.c | 22 ++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-handler.c | 10 ++++++++++ + xlators/mgmt/glusterd/src/glusterd-locks.c | 2 ++ + xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c | 13 +++++++++++++ + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 10 ++++++++++ + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 14 ++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-syncop.c | 9 +++++++++ + 7 files changed, 80 insertions(+) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 1bb01e8..54b61ee65 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -91,6 +91,9 @@ cli_to_glusterd (gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn, + xdrproc_t xdrproc, dict_t *dict, int procnum, xlator_t *this, + rpc_clnt_prog_t *prog, struct iobref *iobref); + ++int ++add_cli_cmd_timeout_to_dict (dict_t *dict); ++ + rpc_clnt_prog_t cli_handshake_prog = { + .progname = "cli handshake", + .prognum = GLUSTER_HNDSK_PROGRAM, +@@ -4131,6 +4134,8 @@ cli_quotad_getlimit (call_frame_t *frame, xlator_t *this, void *data) + } + + dict = data; ++ ret = add_cli_cmd_timeout_to_dict (dict); ++ + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret < 0) { +@@ -11650,6 +11655,21 @@ out: + } + + int ++add_cli_cmd_timeout_to_dict (dict_t *dict) ++{ ++ int ret = 0; ++ ++ if (cli_default_conn_timeout > 120) { ++ ret = dict_set_uint32 (dict, "timeout", cli_default_conn_timeout); ++ if (ret) { ++ gf_log ("cli", GF_LOG_INFO, "Failed to save" ++ "timeout to dict"); ++ } ++ } ++ return ret; ++} ++ ++int + cli_to_glusterd (gf_cli_req *req, call_frame_t *frame, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc, dict_t *dict, + int procnum, xlator_t *this, rpc_clnt_prog_t *prog, +@@ -11703,6 +11723,8 @@ cli_to_glusterd (gf_cli_req *req, call_frame_t *frame, + if (ret) + goto out; + ++ ret = add_cli_cmd_timeout_to_dict (dict); ++ + ret = dict_allocate_and_serialize (dict, &(req->dict).dict_val, + &(req->dict).dict_len); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 962c87e..c072b05 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -718,6 +718,7 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + uint32_t op_errno = 0; ++ uint32_t timeout = 0; + + GF_ASSERT (req); + GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX)); +@@ -785,6 +786,15 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + goto out; + } + ++ /* Cli will add timeout key to dict if the default timeout is ++ * other than 2 minutes. Here we use this value to check whether ++ * mgmt_v3_lock_timeout should be set to default value or we ++ * need to change the value according to timeout value ++ * i.e, timeout + 120 seconds. */ ++ ret = dict_get_uint32 (dict, "timeout", &timeout); ++ if (!ret) ++ priv->mgmt_v3_lock_timeout = timeout + 120; ++ + ret = glusterd_mgmt_v3_lock (volname, MY_UUID, &op_errno, + "vol"); + if (ret) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c +index a19d688..831be20 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-locks.c ++++ b/xlators/mgmt/glusterd/src/glusterd-locks.c +@@ -656,6 +656,8 @@ glusterd_mgmt_v3_lock (const char *name, uuid_t uuid, uint32_t *op_errno, + key_dup = gf_strdup (key); + delay.tv_sec = priv->mgmt_v3_lock_timeout; + delay.tv_nsec = 0; ++ /*changing to default timeout value*/ ++ priv->mgmt_v3_lock_timeout = GF_LOCK_TIMER; + + ret = -1; + mgmt_lock_timer_xl = mgmt_lock_timer->xl; +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c +index 5b7f0fa..993f12a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c +@@ -132,8 +132,12 @@ glusterd_handle_mgmt_v3_lock_fn (rpcsvc_request_t *req) + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + gf_boolean_t free_ctx = _gf_false; ++ glusterd_conf_t *conf = NULL; ++ uint32_t timeout = 0; + + this = THIS; ++ conf = this->private; ++ GF_ASSERT (conf); + GF_ASSERT (this); + GF_ASSERT (req); + +@@ -183,6 +187,15 @@ glusterd_handle_mgmt_v3_lock_fn (rpcsvc_request_t *req) + goto out; + } + ++ /* Cli will add timeout key to dict if the default timeout is ++ * other than 2 minutes. Here we use this value to check whether ++ * mgmt_v3_lock_timeout should be set to default value or we ++ * need to change the value according to timeout value ++ * i.e, timeout + 120 seconds. */ ++ ret = dict_get_uint32 (ctx->dict, "timeout", &timeout); ++ if (!ret) ++ conf->mgmt_v3_lock_timeout = timeout + 120; ++ + is_synctasked = dict_get_str_boolean (ctx->dict, + "is_synctasked", _gf_false); + if (is_synctasked) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index 8bc1f1b..d7da3c1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -693,6 +693,7 @@ glusterd_mgmt_v3_initiate_lockdown (glusterd_op_t op, dict_t *dict, + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; ++ uint32_t timeout = 0; + + this = THIS; + GF_ASSERT (this); +@@ -703,6 +704,15 @@ glusterd_mgmt_v3_initiate_lockdown (glusterd_op_t op, dict_t *dict, + GF_ASSERT (op_errstr); + GF_ASSERT (is_acquired); + ++ /* Cli will add timeout key to dict if the default timeout is ++ * other than 2 minutes. Here we use this value to check whether ++ * mgmt_v3_lock_timeout should be set to default value or we ++ * need to change the value according to timeout value ++ * i.e, timeout + 120 seconds. */ ++ ret = dict_get_uint32 (dict, "timeout", &timeout); ++ if (!ret) ++ conf->mgmt_v3_lock_timeout = timeout + 120; ++ + /* Trying to acquire multiple mgmt_v3 locks on local node */ + ret = glusterd_multiple_mgmt_v3_lock (dict, MY_UUID, op_errno); + if (ret) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 7107a46..7e959a0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -4159,11 +4159,16 @@ glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) + glusterd_op_lock_ctx_t *lock_ctx = NULL; + xlator_t *this = NULL; + uint32_t op_errno = 0; ++ glusterd_conf_t *conf = NULL; ++ uint32_t timeout = 0; + + GF_ASSERT (event); + GF_ASSERT (ctx); + + this = THIS; ++ GF_ASSERT (this); ++ conf = this->private; ++ GF_ASSERT (conf); + + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; + +@@ -4174,6 +4179,15 @@ glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) + ret = glusterd_lock (lock_ctx->uuid); + glusterd_op_lock_send_resp (lock_ctx->req, ret); + } else { ++ /* Cli will add timeout key to dict if the default timeout is ++ * other than 2 minutes. Here we use this value to check whether ++ * mgmt_v3_lock_timeout should be set to default value or we ++ * need to change the value according to timeout value ++ * i.e, timeout + 120 seconds. */ ++ ret = dict_get_uint32 (lock_ctx->dict, "timeout", &timeout); ++ if (!ret) ++ conf->mgmt_v3_lock_timeout = timeout + 120; ++ + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 066c7f9..5aaa7f8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1818,6 +1818,7 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) + glusterd_op_info_t txn_opinfo = {{0},}; + uint32_t op_errno = 0; + gf_boolean_t cluster_lock = _gf_false; ++ uint32_t timeout = 0; + + this = THIS; + GF_ASSERT (this); +@@ -1879,6 +1880,14 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) + goto out; + } + } else { ++ /* Cli will add timeout key to dict if the default timeout is ++ * other than 2 minutes. Here we use this value to check whether ++ * mgmt_v3_lock_timeout should be set to default value or we ++ * need to change the value according to timeout value ++ * i.e, timeout + 120 seconds. */ ++ ret = dict_get_uint32 (op_ctx, "timeout", &timeout); ++ if (!ret) ++ conf->mgmt_v3_lock_timeout = timeout + 120; + + ret = dict_get_str (op_ctx, "globalname", &global); + if (!ret) { +-- +1.8.3.1 + diff --git a/0285-gluster-Allow-only-read-only-CLI-commands-via-remote.patch b/0285-gluster-Allow-only-read-only-CLI-commands-via-remote.patch new file mode 100644 index 0000000..98a37a8 --- /dev/null +++ b/0285-gluster-Allow-only-read-only-CLI-commands-via-remote.patch @@ -0,0 +1,41 @@ +From 98a7692aebbe05a50f39d9c8d3e44fcfb42a580b Mon Sep 17 00:00:00 2001 +From: moagrawa +Date: Mon, 28 May 2018 19:20:54 +0530 +Subject: [PATCH 285/305] gluster: Allow only read-only CLI commands via + remote-host + +Problem: Current CLI code allows to run all commands via remote-host + while SSL is enabled even node is not added in trusted storage + pool + +Solution: Change condition in init function in glusterd.c to allow only read-only + CLI commands via remote-host while SSL is enabled. + +BUG: 1582129 +Change-Id: Ibf427c417437cd051822e30dea11a6c21d0dca6b +Signed-off-by: moagrawa +Reviewed-on: https://code.engineering.redhat.com/gerrit/140024 +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index ed01b93..78a37eb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1750,11 +1750,6 @@ init (xlator_t *this) + goto out; + } + /* +- * With strong authentication, we can afford to allow +- * privileged operations over TCP. +- */ +- gd_inet_programs[1] = &gd_svc_cli_prog; +- /* + * This is the only place where we want secure_srvr to reflect + * the management-plane setting. + */ +-- +1.8.3.1 + diff --git a/0286-glusterd-memory-leak-in-geo-rep-status.patch b/0286-glusterd-memory-leak-in-geo-rep-status.patch new file mode 100644 index 0000000..7ac4b8c --- /dev/null +++ b/0286-glusterd-memory-leak-in-geo-rep-status.patch @@ -0,0 +1,58 @@ +From 6ba6401d26486920ed7c30025c1c3a941fb7bb00 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Mon, 21 May 2018 16:18:53 +0530 +Subject: [PATCH 286/305] glusterd: memory leak in geo-rep status + +>Fixes: bz#1580352 +>Change-Id: I9648e73090f5a2edbac663a6fb49acdb702cdc49 +>Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/20046/ + +Change-Id: I9648e73090f5a2edbac663a6fb49acdb702cdc49 +BUG: 1575539 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/139986 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +index a1251ae..eef347c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c ++++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +@@ -512,8 +512,10 @@ glusterd_urltransform (runner_t *runner, char ***linearrp) + } + + if (fgets (line, 1024, runner_chio (runner, STDOUT_FILENO)) == +- NULL) ++ NULL) { ++ GF_FREE (line); + break; ++ } + + len = strlen (line); + if (len == 0 || line[len - 1] != '\n') { +@@ -638,7 +640,7 @@ glusterd_get_slave (glusterd_volinfo_t *vol, const char *slaveurl, char **slavek + if (strcmp (linearr[i], linearr[n - 1]) == 0) + break; + } +- glusterd_urltransform_free (linearr, i); ++ glusterd_urltransform_free (linearr, n); + + if (i < n - 1) + *slavekey = dict_get_by_index (vol->gsync_slaves, i); +@@ -5870,6 +5872,8 @@ glusterd_get_slave_info (char *slave, + } + + out: ++ if (linearr) ++ glusterd_urltransform_free (linearr, 1); + gf_msg_debug (this->name, 0, "Returning %d", ret); + return ret; + } +-- +1.8.3.1 + diff --git a/0287-Revert-performance-write-behind-fix-flush-stuck-by-f.patch b/0287-Revert-performance-write-behind-fix-flush-stuck-by-f.patch new file mode 100644 index 0000000..54684c4 --- /dev/null +++ b/0287-Revert-performance-write-behind-fix-flush-stuck-by-f.patch @@ -0,0 +1,78 @@ +From 3e7c6b338deb7a8b95b208b1aa087f97fb58549f Mon Sep 17 00:00:00 2001 +From: Raghavendra G +Date: Fri, 25 May 2018 08:16:41 +0530 +Subject: [PATCH 287/305] Revert "performance/write-behind: fix flush stuck by + former failed writes" + +This reverts commit 9340b3c7a6c8556d6f1d4046de0dbd1946a64963. + +operations/writes across different fds of the same file cannot be +considered as independent. For eg., man 2 fsync states, + + + +fsync() transfers ("flushes") all modified in-core data of +(i.e., modified buffer cache pages for) the file referred to by the +file descriptor fd to the disk device + + + +This means fsync is an operation on file and fd is just a way to reach +file. So, it has to sync writes done on other fds too. Patch +9340b3c7a6c, prevents this. + +The problem fixed by patch 9340b3c7a6c - a flush on an fd is hung on a +failed write (held in cache for retrying) on a different fd - is +solved in this patch by making sure __wb_request_waiting_on considers +failed writes on any fd as dependent on flush/fsync on any fd (not +just the fd on which writes happened) opened on the same file. This +means failed writes on any fd are either synced or thrown away on +witnessing flush/fsync on any fd of the same file. + +>Change-Id: Iee748cebb6d2a5b32f9328aff2b5b7cbf6c52c05 +>Signed-off-by: Raghavendra G +>Updates: bz#1512691 + +upstream patch: https://review.gluster.org/20082 +BUG: 1518710 +Change-Id: Ie9df1cb2fcd698db3d186485fd61ea6dc1c1fcb7 +Signed-off-by: Raghavendra G +Reviewed-on: https://code.engineering.redhat.com/gerrit/140063 +Tested-by: RHGS Build Bot +Reviewed-by: Csaba Henk +Tested-by: Csaba Henk +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/performance/write-behind/src/write-behind.c | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c +index 7104eb9..ca1cb63 100644 +--- a/xlators/performance/write-behind/src/write-behind.c ++++ b/xlators/performance/write-behind/src/write-behind.c +@@ -287,10 +287,6 @@ wb_requests_conflict (wb_request_t *lie, wb_request_t *req) + us in the todo list */ + return _gf_false; + +- /* requests from different fd do not conflict with each other. */ +- if (req->fd && (req->fd != lie->fd)) +- return _gf_false; +- + if (lie->ordering.append) + /* all modifications wait for the completion + of outstanding append */ +@@ -743,9 +739,8 @@ __wb_request_waiting_on (wb_request_t *req) + wb_inode = req->wb_inode; + + list_for_each_entry (trav, &wb_inode->todo, todo) { +- if ((trav->fd == req->fd) +- && ((trav->stub->fop == GF_FOP_FLUSH) +- || (trav->stub->fop == GF_FOP_FSYNC)) ++ if (((trav->stub->fop == GF_FOP_FLUSH) || (trav->stub->fop ++ == GF_FOP_FSYNC)) + && (trav->gen >= req->gen)) + return trav; + } +-- +1.8.3.1 + diff --git a/0288-feature-locks-Unwind-response-based-on-clinet-versio.patch b/0288-feature-locks-Unwind-response-based-on-clinet-versio.patch new file mode 100644 index 0000000..b9196d0 --- /dev/null +++ b/0288-feature-locks-Unwind-response-based-on-clinet-versio.patch @@ -0,0 +1,359 @@ +From 1a5680e36331d10be2b677f7a5a085e706297c96 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey +Date: Thu, 17 May 2018 15:55:44 +0530 +Subject: [PATCH 288/305] feature/locks: Unwind response based on clinet + version + +upstream patch: +https://review.gluster.org/#/c/20031/ + +>Change-Id: I6fc7755cca0d6f61cb775363618036228925842c + +Change-Id: I6fc7755cca0d6f61cb775363618036228925842c +BUG: 1558948 +Signed-off-by: Ashish Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/140080 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/locks/src/posix.c | 142 +++++++++++++++++++++++-------------- + 1 file changed, 88 insertions(+), 54 deletions(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index ef4bebf..63bcf31 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -39,6 +39,43 @@ static int format_brickname(char *); + int pl_lockinfo_get_brickname (xlator_t *, inode_t *, int32_t *); + static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + ++#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params ...) \ ++ do { \ ++ frame->local = NULL; \ ++ STACK_UNWIND_STRICT (fop, frame, op_ret, params); \ ++ if (__local) { \ ++ if (__local->inodelk_dom_count_req) \ ++ data_unref (__local->inodelk_dom_count_req);\ ++ loc_wipe (&__local->loc[0]); \ ++ loc_wipe (&__local->loc[1]); \ ++ if (__local->fd) \ ++ fd_unref (__local->fd); \ ++ mem_put (__local); \ ++ } \ ++ } while (0) ++ ++/* ++ * The client is always requesting data, but older ++ * servers were not returning it. Newer ones are, so ++ * the client is receiving a mix of NULL and non-NULL ++ * xdata in the answers when bricks are of different ++ * versions. This triggers a bug in older clients. ++ * To prevent that, we avoid returning extra xdata to ++ * older clients (making the newer brick to behave as ++ * an old brick). ++ */ ++#define PL_STACK_UNWIND_FOR_CLIENT(fop, xdata, frame, op_ret, params ...) \ ++ do { \ ++ pl_local_t *__local = NULL; \ ++ if (frame->root->client && \ ++ (frame->root->client->opversion < GD_OP_VERSION_3_12_0)) {\ ++ __local = frame->local; \ ++ PL_STACK_UNWIND_AND_FREE (__local, fop, frame, op_ret, params);\ ++ } else { \ ++ PL_STACK_UNWIND (fop, xdata, frame, op_ret, params); \ ++ } \ ++ } while (0) ++ + #define PL_STACK_UNWIND(fop, xdata, frame, op_ret, params ...) \ + do { \ + pl_local_t *__local = NULL; \ +@@ -68,17 +105,7 @@ static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + } \ + } \ + } \ +- frame->local = NULL; \ +- STACK_UNWIND_STRICT (fop, frame, op_ret, params); \ +- if (__local) { \ +- if (__local->inodelk_dom_count_req) \ +- data_unref (__local->inodelk_dom_count_req);\ +- loc_wipe (&__local->loc[0]); \ +- loc_wipe (&__local->loc[1]); \ +- if (__local->fd) \ +- fd_unref (__local->fd); \ +- mem_put (__local); \ +- } \ ++ PL_STACK_UNWIND_AND_FREE (__local, fop, frame, op_ret, params);\ + if (__unref) \ + dict_unref (__unref); \ + } while (0) +@@ -1492,7 +1519,8 @@ int32_t + pl_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + { +- PL_STACK_UNWIND (fsetxattr, xdata, frame, op_ret, op_errno, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (fsetxattr, xdata, frame, ++ op_ret, op_errno, xdata); + return 0; + } + +@@ -1564,12 +1592,8 @@ pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + { + +- if (frame->root->client && +- (frame->root->client->opversion < GD_OP_VERSION_3_12_0)) { +- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata); +- } else { +- PL_STACK_UNWIND (flush, xdata, frame, op_ret, op_errno, xdata); +- } ++ PL_STACK_UNWIND_FOR_CLIENT (flush, xdata, frame, ++ op_ret, op_errno, xdata); + + return 0; + } +@@ -3081,7 +3105,8 @@ int32_t + pl_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + { +- PL_STACK_UNWIND (setxattr, xdata, frame, op_ret, op_errno, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (setxattr, xdata, frame, ++ op_ret, op_errno, xdata); + return 0; + } + +@@ -3107,8 +3132,8 @@ pl_setxattr (call_frame_t *frame, xlator_t *this, + goto usual; + } + +- PL_STACK_UNWIND (setxattr, xdata_rsp, frame, op_ret, op_errno, +- xdata_rsp); ++ PL_STACK_UNWIND_FOR_CLIENT (setxattr, xdata_rsp, frame, ++ op_ret, op_errno, xdata_rsp); + return 0; + + usual: +@@ -3937,8 +3962,8 @@ pl_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) + { +- PL_STACK_UNWIND (mkdir, xdata, frame, op_ret, op_errno, +- inode, buf, preparent, postparent, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (mkdir, xdata, frame, op_ret, op_errno, ++ inode, buf, preparent, postparent, xdata); + return 0; + } + +@@ -3958,7 +3983,8 @@ pl_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) + { +- PL_STACK_UNWIND (stat, xdata, frame, op_ret, op_errno, buf, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (stat, xdata, frame, ++ op_ret, op_errno, buf, xdata); + return 0; + } + +@@ -3978,8 +4004,8 @@ pl_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) + { +- PL_STACK_UNWIND (mknod, xdata, frame, op_ret, op_errno, +- inode, buf, preparent, postparent, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (mknod, xdata, frame, op_ret, op_errno, ++ inode, buf, preparent, postparent, xdata); + return 0; + } + +@@ -4000,8 +4026,8 @@ pl_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) + { +- PL_STACK_UNWIND (rmdir, xdata, frame, op_ret, op_errno, +- preparent, postparent, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (rmdir, xdata, frame, op_ret, op_errno, ++ preparent, postparent, xdata); + return 0; + } + +@@ -4022,8 +4048,8 @@ pl_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) + { +- PL_STACK_UNWIND (symlink, xdata, frame, op_ret, op_errno, +- inode, buf, preparent, postparent, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (symlink, xdata, frame, op_ret, op_errno, ++ inode, buf, preparent, postparent, xdata); + return 0; + } + +@@ -4045,8 +4071,8 @@ pl_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) + { +- PL_STACK_UNWIND (link, xdata, frame, op_ret, op_errno, +- inode, buf, preparent, postparent, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (link, xdata, frame, op_ret, op_errno, ++ inode, buf, preparent, postparent, xdata); + return 0; + } + +@@ -4066,8 +4092,8 @@ pl_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *postbuf, + dict_t *xdata) + { +- PL_STACK_UNWIND (fsync, xdata, frame, op_ret, op_errno, +- prebuf, postbuf, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (fsync, xdata, frame, op_ret, op_errno, ++ prebuf, postbuf, xdata); + return 0; + } + +@@ -4086,8 +4112,8 @@ pl_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) + { +- PL_STACK_UNWIND (readdir, xdata, frame, op_ret, op_errno, +- entries, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (readdir, xdata, frame, op_ret, op_errno, ++ entries, xdata); + return 0; + } + +@@ -4108,7 +4134,8 @@ int32_t + pl_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + { +- PL_STACK_UNWIND (fsyncdir, xdata, frame, op_ret, op_errno, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (fsyncdir, xdata, frame, ++ op_ret, op_errno, xdata); + return 0; + } + +@@ -4128,7 +4155,8 @@ pl_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf, + dict_t *xdata) + { +- PL_STACK_UNWIND (statfs, xdata, frame, op_ret, op_errno, buf, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (statfs, xdata, frame, ++ op_ret, op_errno, buf, xdata); + return 0; + } + +@@ -4146,7 +4174,8 @@ int32_t + pl_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + { +- PL_STACK_UNWIND (removexattr, xdata, frame, op_ret, op_errno, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (removexattr, xdata, frame, ++ op_ret, op_errno, xdata); + return 0; + } + +@@ -4164,7 +4193,8 @@ int32_t + pl_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + { +- PL_STACK_UNWIND (fremovexattr, xdata, frame, op_ret, op_errno, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (fremovexattr, xdata, frame, ++ op_ret, op_errno, xdata); + return 0; + } + +@@ -4183,8 +4213,8 @@ pl_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uint32_t weak_cksum, + uint8_t *strong_cksum, dict_t *xdata) + { +- PL_STACK_UNWIND (rchecksum, xdata, frame, op_ret, op_errno, +- weak_cksum, strong_cksum, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (rchecksum, xdata, frame, op_ret, op_errno, ++ weak_cksum, strong_cksum, xdata); + return 0; + } + +@@ -4204,7 +4234,8 @@ pl_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) + { +- PL_STACK_UNWIND (xattrop, xdata, frame, op_ret, op_errno, dict, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (xattrop, xdata, frame, ++ op_ret, op_errno, dict, xdata); + return 0; + } + +@@ -4225,7 +4256,8 @@ pl_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) + { +- PL_STACK_UNWIND (fxattrop, xdata, frame, op_ret, op_errno, dict, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (fxattrop, xdata, frame, ++ op_ret, op_errno, dict, xdata); + return 0; + } + +@@ -4247,8 +4279,8 @@ pl_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *statpost, + dict_t *xdata) + { +- PL_STACK_UNWIND (setattr, xdata, frame, op_ret, op_errno, +- statpre, statpost, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (setattr, xdata, frame, op_ret, op_errno, ++ statpre, statpost, xdata); + return 0; + } + +@@ -4267,8 +4299,8 @@ pl_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) + { +- PL_STACK_UNWIND (fsetattr, xdata, frame, op_ret, op_errno, +- statpre, statpost, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (fsetattr, xdata, frame, op_ret, op_errno, ++ statpre, statpost, xdata); + return 0; + } + +@@ -4287,8 +4319,8 @@ pl_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) + { +- PL_STACK_UNWIND (fallocate, xdata, frame, op_ret, op_errno, +- pre, post, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (fallocate, xdata, frame, op_ret, op_errno, ++ pre, post, xdata); + return 0; + } + +@@ -4309,8 +4341,8 @@ pl_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *path, + struct iatt *buf, dict_t *xdata) + { +- PL_STACK_UNWIND (readlink, xdata, frame, op_ret, op_errno, +- path, buf, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (readlink, xdata, frame, op_ret, op_errno, ++ path, buf, xdata); + return 0; + } + +@@ -4328,7 +4360,8 @@ int32_t + pl_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + { +- PL_STACK_UNWIND (access, xdata, frame, op_ret, op_errno, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (access, xdata, frame, ++ op_ret, op_errno, xdata); + return 0; + } + +@@ -4347,7 +4380,8 @@ pl_seek_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, off_t offset, + dict_t *xdata) + { +- PL_STACK_UNWIND (seek, xdata, frame, op_ret, op_errno, offset, xdata); ++ PL_STACK_UNWIND_FOR_CLIENT (seek, xdata, frame, ++ op_ret, op_errno, offset, xdata); + return 0; + } + +-- +1.8.3.1 + diff --git a/0289-changelog-fix-br-state-check.t-failure-for-brick_mux.patch b/0289-changelog-fix-br-state-check.t-failure-for-brick_mux.patch new file mode 100644 index 0000000..55d8665 --- /dev/null +++ b/0289-changelog-fix-br-state-check.t-failure-for-brick_mux.patch @@ -0,0 +1,117 @@ +From a5584d85d5cd8d8c7252ef9e03207d64e87df83c Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Fri, 18 May 2018 20:03:32 +0530 +Subject: [PATCH 289/305] changelog: fix br-state-check.t failure for brick_mux + +Problem: Sometime br-state-check.t crash while runnning + for brick multiplex and command in test case is + taking 2 minutes for detach a brick + +Solution: Update code in changelog xlator specific to wait + on all connection before cleanup rpc threads and + cleanup rpc object only in non brick mux scenario + +> BUG: 1577672 +> Change-Id: I16e257c1e127744a815000b87bd8b7b8d9c51e1b +> fixes: bz#1577672 +> (cherry picked from commit 4ae7f0714b809cfebb64f6e5b5a70664e17a7a56) +> (Upstream review link https://review.gluster.org/#/c/20037/) + +BUG: 1581647 +Change-Id: I0b25e032d90a57cdd612a38b356248b4b47a7b60 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/139977 +Reviewed-by: Kotresh Hiremath Ravishankar +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + rpc/rpc-lib/src/rpcsvc.c | 2 +- + .../features/changelog/src/changelog-rpc-common.c | 7 ++++- + xlators/features/changelog/src/changelog-rpc.c | 33 ++++++++++++++++++++++ + 3 files changed, 40 insertions(+), 2 deletions(-) + +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 9938b8f..3acaa8b 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -1482,7 +1482,7 @@ rpcsvc_get_listener (rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans) + pthread_mutex_lock (&svc->rpclock); + { + list_for_each_entry (listener, &svc->listeners, list) { +- if (trans != NULL) { ++ if (listener && trans) { + if (listener->trans == trans) { + found = 1; + break; +diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c +index 21bef76..9ba5444 100644 +--- a/xlators/features/changelog/src/changelog-rpc-common.c ++++ b/xlators/features/changelog/src/changelog-rpc-common.c +@@ -280,7 +280,12 @@ changelog_rpc_server_destroy (xlator_t *this, rpcsvc_t *rpc, char *sockfile, + rpc->rxpool = NULL; + } + +- GF_FREE (rpc); ++ /* TODO Avoid freeing rpc object in case of brick multiplex ++ after freeing rpc object svc->rpclock corrupted and it takes ++ more time to detach a brick ++ */ ++ if (!this->cleanup_starting) ++ GF_FREE (rpc); + } + + rpcsvc_t * +diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c +index ccb22b5..1443bd0 100644 +--- a/xlators/features/changelog/src/changelog-rpc.c ++++ b/xlators/features/changelog/src/changelog-rpc.c +@@ -154,6 +154,9 @@ void + changelog_destroy_rpc_listner (xlator_t *this, changelog_priv_t *priv) + { + char sockfile[UNIX_PATH_MAX] = {0,}; ++ changelog_clnt_t *c_clnt = &priv->connections; ++ changelog_rpc_clnt_t *crpc = NULL; ++ int nofconn = 0; + + /* sockfile path could have been saved to avoid this */ + CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick, +@@ -162,6 +165,36 @@ changelog_destroy_rpc_listner (xlator_t *this, changelog_priv_t *priv) + priv->rpc, sockfile, + changelog_rpcsvc_notify, + changelog_programs); ++ ++ /* TODO Below approach is not perfect to wait for cleanup ++ all active connections without this code brick process ++ can be crash in case of brick multiplexing if any in-progress ++ request process on rpc by changelog xlator after ++ cleanup resources ++ */ ++ ++ if (c_clnt) { ++ do { ++ nofconn = 0; ++ LOCK (&c_clnt->active_lock); ++ list_for_each_entry (crpc, &c_clnt->active, list) { ++ nofconn++; ++ } ++ UNLOCK (&c_clnt->active_lock); ++ LOCK (&c_clnt->wait_lock); ++ list_for_each_entry (crpc, &c_clnt->waitq, list) { ++ nofconn++; ++ } ++ UNLOCK (&c_clnt->wait_lock); ++ pthread_mutex_lock (&c_clnt->pending_lock); ++ list_for_each_entry (crpc, &c_clnt->pending, list) { ++ nofconn++; ++ } ++ pthread_mutex_unlock (&c_clnt->pending_lock); ++ ++ } while (nofconn); /* Wait for all connection cleanup */ ++ } ++ + (void) changelog_cleanup_rpc_threads (this, priv); + } + +-- +1.8.3.1 + diff --git a/0290-performance-open-behind-open-pending-fds-before-perm.patch b/0290-performance-open-behind-open-pending-fds-before-perm.patch new file mode 100644 index 0000000..6ee44ac --- /dev/null +++ b/0290-performance-open-behind-open-pending-fds-before-perm.patch @@ -0,0 +1,128 @@ +From 7fa60ece3fda28ca03298196374617893861a09b Mon Sep 17 00:00:00 2001 +From: Raghavendra G +Date: Fri, 25 May 2018 12:27:43 +0530 +Subject: [PATCH 290/305] performance/open-behind: open pending fds before + permission change + +setattr, posix-acl and selinux changes on a file can revoke permission +to open the file after permission changes. To prevent that, make sure +the pending fd is opened before winding down setattr or setxattr (for +posix-acl and selinux) calls. + +>Change-Id: Ib0b91795d286072e445190f9a1b3b1e9cd363282 +>Signed-off-by: Raghavendra G +>fixes: bz#1405147 + +upstream patch: https://review.gluster.org/#/c/20084/ +BUG: 1580120 +Change-Id: Id6a2beef00a9adb0dcb2169d3966cd2a1c3c8456 +Signed-off-by: Raghavendra G +Reviewed-on: https://code.engineering.redhat.com/gerrit/140095 +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez +--- + xlators/performance/open-behind/src/open-behind.c | 61 ++++++++++++++++++++++- + 1 file changed, 60 insertions(+), 1 deletion(-) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index d6dcf6f..3be35bc 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -14,6 +14,7 @@ + #include "call-stub.h" + #include "defaults.h" + #include "open-behind-messages.h" ++#include "glusterfs-acl.h" + + typedef struct ob_conf { + gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe +@@ -811,6 +812,63 @@ err: + return 0; + } + ++int32_t ++ob_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, ++ struct iatt *stbuf, int32_t valid, dict_t *xdata) ++{ ++ fd_t *fd = NULL; ++ call_stub_t *stub = NULL; ++ ++ stub = fop_setattr_stub (frame, default_setattr_resume, loc, stbuf, ++ valid, xdata); ++ if (!stub) ++ goto err; ++ ++ fd = fd_lookup (loc->inode, 0); ++ ++ open_and_resume (this, fd, stub); ++ if (fd) ++ fd_unref (fd); ++ ++ return 0; ++err: ++ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); ++ return 0; ++} ++ ++ ++int32_t ++ob_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, ++ int32_t flags, dict_t *xdata) ++{ ++ fd_t *fd = NULL; ++ call_stub_t *stub = NULL; ++ gf_boolean_t access_xattr = _gf_false; ++ ++ if (dict_get (dict, POSIX_ACL_DEFAULT_XATTR) ++ || dict_get (dict, POSIX_ACL_ACCESS_XATTR) ++ || dict_get (dict, GF_SELINUX_XATTR_KEY)) ++ access_xattr = _gf_true; ++ ++ if (!access_xattr) ++ return default_setxattr (frame, this, loc, dict, flags, xdata); ++ ++ stub = fop_setxattr_stub (frame, default_setxattr_resume, loc, dict, ++ flags, xdata); ++ if (!stub) ++ goto err; ++ ++ fd = fd_lookup (loc->inode, 0); ++ ++ open_and_resume (this, fd, stub); ++ if (fd) ++ fd_unref (fd); ++ ++ return 0; ++err: ++ STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL); ++ return 0; ++} + + int + ob_release (xlator_t *this, fd_t *fd) +@@ -976,7 +1034,6 @@ fini (xlator_t *this) + return; + } + +- + struct xlator_fops fops = { + .open = ob_open, + .readv = ob_readv, +@@ -986,12 +1043,14 @@ struct xlator_fops fops = { + .fstat = ob_fstat, + .ftruncate = ob_ftruncate, + .fsetxattr = ob_fsetxattr, ++ .setxattr = ob_setxattr, + .fgetxattr = ob_fgetxattr, + .fremovexattr = ob_fremovexattr, + .finodelk = ob_finodelk, + .fentrylk = ob_fentrylk, + .fxattrop = ob_fxattrop, + .fsetattr = ob_fsetattr, ++ .setattr = ob_setattr, + .fallocate = ob_fallocate, + .discard = ob_discard, + .zerofill = ob_zerofill, +-- +1.8.3.1 + diff --git a/0291-Core-The-lock-contention-on-gf_client_dump_inodes_to.patch b/0291-Core-The-lock-contention-on-gf_client_dump_inodes_to.patch new file mode 100644 index 0000000..2923a2b --- /dev/null +++ b/0291-Core-The-lock-contention-on-gf_client_dump_inodes_to.patch @@ -0,0 +1,47 @@ +From 149a69ffa4aadca1d4d495957e5e7fd068756f64 Mon Sep 17 00:00:00 2001 +From: hari gowtham +Date: Fri, 18 May 2018 15:05:37 +0530 +Subject: [PATCH 291/305] Core: The lock contention on + gf_client_dump_inodes_to_dict + + backport of:https://review.gluster.org/#/c/20035/ + +Problem: For a distributed replicated volume, in the inode status +command the lock on gf_client_dump_inodes_to_dict is held by the +first brickop. while this is being processed, if the second brickop +comes. It fails to get the lock and the whole brick op fails. + +Fix: Instead of using a TRY_LOCK which errors out if the lock is busy, +Use LOCK which will wait till the lock is acquired. + +>Change-Id: I768a0a1b60f28c4f7f94549e18ee3765b69cc528 +>Signed-off-by: hari gowtham +>BUG: 1559452 + +fixes: bz#1579769 +Change-Id: I768a0a1b60f28c4f7f94549e18ee3765b69cc528 +BUG: 1559452 +Signed-off-by: hari gowtham +Reviewed-on: https://code.engineering.redhat.com/gerrit/140334 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/client_t.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c +index dc153cc..17e3026 100644 +--- a/libglusterfs/src/client_t.c ++++ b/libglusterfs/src/client_t.c +@@ -849,7 +849,7 @@ gf_client_dump_inodes_to_dict (xlator_t *this, dict_t *dict) + if (!clienttable) + return -1; + +- ret = TRY_LOCK (&clienttable->lock); ++ ret = LOCK (&clienttable->lock); + { + if (ret) { + gf_msg ("client_t", GF_LOG_WARNING, 0, +-- +1.8.3.1 + diff --git a/0292-geo-rep-Fix-rename-of-directory-in-hybrid-crawl.patch b/0292-geo-rep-Fix-rename-of-directory-in-hybrid-crawl.patch new file mode 100644 index 0000000..c97716f --- /dev/null +++ b/0292-geo-rep-Fix-rename-of-directory-in-hybrid-crawl.patch @@ -0,0 +1,680 @@ +From d354eb1abb2160495e205c87e1b2ecd8778c70ed Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Thu, 21 Sep 2017 18:11:15 -0400 +Subject: [PATCH 292/305] geo-rep: Fix rename of directory in hybrid crawl + +In hybrid crawl, renames and unlink can't be +synced but directory renames can be detected. +While syncing the directory on slave, if the +gfid already exists, it should be rename. +Hence if directory gfid already exists, rename +it. + +Backport of: + > Patch: https://review.gluster.org/18448 + > Change-Id: Ibf9f99e76a3e02795a3c2befd8cac48a5c365bb6 + > BUG: 1499566 + > Signed-off-by: Kotresh HR + +Change-Id: Ibf9f99e76a3e02795a3c2befd8cac48a5c365bb6 +BUG: 1582417 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/140285 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/gsyncd.py | 4 +- + geo-replication/syncdaemon/monitor.py | 85 +---------- + geo-replication/syncdaemon/resource.py | 191 +++++-------------------- + geo-replication/syncdaemon/syncdutils.py | 237 +++++++++++++++++++++++++++++++ + 4 files changed, 276 insertions(+), 241 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index 629e8b7..b0ed0ae 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -39,7 +39,7 @@ from changelogagent import agent, Changelog + from gsyncdstatus import set_monitor_status, GeorepStatus, human_time_utc + from libcxattr import Xattr + import struct +-from syncdutils import get_master_and_slave_data_from_args, lf ++from syncdutils import get_master_and_slave_data_from_args, lf, Popen + + ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError + +@@ -778,7 +778,7 @@ def main_i(): + else: + gconf.label = 'slave' + startup(go_daemon=go_daemon, log_file=log_file, label=gconf.label) +- resource.Popen.init_errhandler() ++ Popen.init_errhandler() + + if be_agent: + os.setsid() +diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py +index 0f43c4f..55f8330 100644 +--- a/geo-replication/syncdaemon/monitor.py ++++ b/geo-replication/syncdaemon/monitor.py +@@ -16,7 +16,7 @@ import logging + import uuid + import xml.etree.ElementTree as XET + from subprocess import PIPE +-from resource import Popen, FILE, GLUSTER, SSH ++from resource import FILE, GLUSTER, SSH + from threading import Lock + from errno import ECHILD, ESRCH + import re +@@ -24,8 +24,9 @@ import random + from gconf import gconf + from syncdutils import select, waitpid, errno_wrap, lf + from syncdutils import set_term_handler, is_host_local, GsyncdError +-from syncdutils import escape, Thread, finalize, memoize, boolify ++from syncdutils import escape, Thread, finalize, boolify + from syncdutils import gf_event, EVENT_GEOREP_FAULTY ++from syncdutils import Volinfo, Popen + + from gsyncdstatus import GeorepStatus, set_monitor_status + +@@ -91,86 +92,6 @@ def get_slave_bricks_status(host, vol): + return list(up_hosts) + + +-class Volinfo(object): +- +- def __init__(self, vol, host='localhost', prelude=[]): +- po = Popen(prelude + ['gluster', '--xml', '--remote-host=' + host, +- 'volume', 'info', vol], +- stdout=PIPE, stderr=PIPE) +- vix = po.stdout.read() +- po.wait() +- po.terminate_geterr() +- vi = XET.fromstring(vix) +- if vi.find('opRet').text != '0': +- if prelude: +- via = '(via %s) ' % prelude.join(' ') +- else: +- via = ' ' +- raise GsyncdError('getting volume info of %s%s ' +- 'failed with errorcode %s' % +- (vol, via, vi.find('opErrno').text)) +- self.tree = vi +- self.volume = vol +- self.host = host +- +- def get(self, elem): +- return self.tree.findall('.//' + elem) +- +- def is_tier(self): +- return (self.get('typeStr')[0].text == 'Tier') +- +- def is_hot(self, brickpath): +- logging.debug('brickpath: ' + repr(brickpath)) +- return brickpath in self.hot_bricks +- +- @property +- @memoize +- def bricks(self): +- def bparse(b): +- host, dirp = b.find("name").text.split(':', 2) +- return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text} +- return [bparse(b) for b in self.get('brick')] +- +- @property +- @memoize +- def uuid(self): +- ids = self.get('id') +- if len(ids) != 1: +- raise GsyncdError("volume info of %s obtained from %s: " +- "ambiguous uuid" % (self.volume, self.host)) +- return ids[0].text +- +- def replica_count(self, tier, hot): +- if (tier and hot): +- return int(self.get('hotBricks/hotreplicaCount')[0].text) +- elif (tier and not hot): +- return int(self.get('coldBricks/coldreplicaCount')[0].text) +- else: +- return int(self.get('replicaCount')[0].text) +- +- def disperse_count(self, tier, hot): +- if (tier and hot): +- # Tiering doesn't support disperse volume as hot brick, +- # hence no xml output, so returning 0. In case, if it's +- # supported later, we should change here. +- return 0 +- elif (tier and not hot): +- return int(self.get('coldBricks/colddisperseCount')[0].text) +- else: +- return int(self.get('disperseCount')[0].text) +- +- @property +- @memoize +- def hot_bricks(self): +- return [b.text for b in self.get('hotBricks/brick')] +- +- def get_hot_bricks_count(self, tier): +- if (tier): +- return int(self.get('hotBricks/hotbrickCount')[0].text) +- else: +- return 0 +- +- + class Monitor(object): + + """class which spawns and manages gsyncd workers""" +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index d6618c1..c4b5b53 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -13,21 +13,16 @@ import os + import sys + import stat + import time +-import signal + import fcntl +-import errno + import types + import struct + import socket + import logging + import tempfile +-import threading + import subprocess + import errno + from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EACCES + from errno import EISDIR, ENOTEMPTY, ESTALE, EINVAL, EBUSY, EPERM +-from select import error as SelectError +-import shutil + + from gconf import gconf + import repce +@@ -43,7 +38,7 @@ from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION + from syncdutils import GX_GFID_CANONICAL_LEN + from gsyncdstatus import GeorepStatus + from syncdutils import get_master_and_slave_data_from_args +-from syncdutils import lf ++from syncdutils import lf, Popen, sup, Volinfo + from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt + + UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z') +@@ -52,14 +47,9 @@ UserRX = re.compile("[\w!\#$%&'*+-\/=?^_`{|}~]+") + + ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP') + +-def sup(x, *a, **kw): +- """a rubyesque "super" for python ;) +- +- invoke caller method in parent class with given args. +- """ +- return getattr(super(type(x), x), +- sys._getframe(1).f_code.co_name)(*a, **kw) +- ++slv_volume = None ++slv_host = None ++slv_bricks = None + + def desugar(ustr): + """transform sugared url strings to standard :// form +@@ -114,149 +104,6 @@ def parse_url(ustr): + return getattr(this, sch.upper())(path) + + +-class Popen(subprocess.Popen): +- +- """customized subclass of subprocess.Popen with a ring +- buffer for children error output""" +- +- @classmethod +- def init_errhandler(cls): +- """start the thread which handles children's error output""" +- cls.errstore = {} +- +- def tailer(): +- while True: +- errstore = cls.errstore.copy() +- try: +- poe, _, _ = select( +- [po.stderr for po in errstore], [], [], 1) +- except (ValueError, SelectError): +- # stderr is already closed wait for some time before +- # checking next error +- time.sleep(0.5) +- continue +- for po in errstore: +- if po.stderr not in poe: +- continue +- po.lock.acquire() +- try: +- if po.on_death_row: +- continue +- la = errstore[po] +- try: +- fd = po.stderr.fileno() +- except ValueError: # file is already closed +- time.sleep(0.5) +- continue +- +- try: +- l = os.read(fd, 1024) +- except OSError: +- time.sleep(0.5) +- continue +- +- if not l: +- continue +- tots = len(l) +- for lx in la: +- tots += len(lx) +- while tots > 1 << 20 and la: +- tots -= len(la.pop(0)) +- la.append(l) +- finally: +- po.lock.release() +- t = syncdutils.Thread(target=tailer) +- t.start() +- cls.errhandler = t +- +- @classmethod +- def fork(cls): +- """fork wrapper that restarts errhandler thread in child""" +- pid = os.fork() +- if not pid: +- cls.init_errhandler() +- return pid +- +- def __init__(self, args, *a, **kw): +- """customizations for subprocess.Popen instantiation +- +- - 'close_fds' is taken to be the default +- - if child's stderr is chosen to be managed, +- register it with the error handler thread +- """ +- self.args = args +- if 'close_fds' not in kw: +- kw['close_fds'] = True +- self.lock = threading.Lock() +- self.on_death_row = False +- self.elines = [] +- try: +- sup(self, args, *a, **kw) +- except: +- ex = sys.exc_info()[1] +- if not isinstance(ex, OSError): +- raise +- raise GsyncdError("""execution of "%s" failed with %s (%s)""" % +- (args[0], errno.errorcode[ex.errno], +- os.strerror(ex.errno))) +- if kw.get('stderr') == subprocess.PIPE: +- assert(getattr(self, 'errhandler', None)) +- self.errstore[self] = [] +- +- def errlog(self): +- """make a log about child's failure event""" +- logging.error(lf("command returned error", +- cmd=" ".join(self.args), +- error=self.returncode)) +- lp = '' +- +- def logerr(l): +- logging.error(self.args[0] + "> " + l) +- for l in self.elines: +- ls = l.split('\n') +- ls[0] = lp + ls[0] +- lp = ls.pop() +- for ll in ls: +- logerr(ll) +- if lp: +- logerr(lp) +- +- def errfail(self): +- """fail nicely if child did not terminate with success""" +- self.errlog() +- syncdutils.finalize(exval=1) +- +- def terminate_geterr(self, fail_on_err=True): +- """kill child, finalize stderr harvesting (unregister +- from errhandler, set up .elines), fail on error if +- asked for +- """ +- self.lock.acquire() +- try: +- self.on_death_row = True +- finally: +- self.lock.release() +- elines = self.errstore.pop(self) +- if self.poll() is None: +- self.terminate() +- if self.poll() is None: +- time.sleep(0.1) +- self.kill() +- self.wait() +- while True: +- if not select([self.stderr], [], [], 0.1)[0]: +- break +- b = os.read(self.stderr.fileno(), 1024) +- if b: +- elines.append(b) +- else: +- break +- self.stderr.close() +- self.elines = elines +- if fail_on_err and self.returncode != 0: +- self.errfail() +- +- + class Server(object): + + """singleton implemening those filesystem access primitives +@@ -776,6 +623,31 @@ class Server(object): + if isinstance(st, int): + blob = entry_pack_mkdir( + gfid, bname, e['mode'], e['uid'], e['gid']) ++ else: ++ # If gfid of a directory exists on slave but path based ++ # create is getting EEXIST. This means the directory is ++ # renamed in master but recorded as MKDIR during hybrid ++ # crawl. Get the directory path by reading the backend ++ # symlink and trying to rename to new name as said by ++ # master. ++ global slv_bricks ++ global slv_volume ++ global slv_host ++ if not slv_bricks: ++ slv_info = Volinfo (slv_volume, slv_host) ++ slv_bricks = slv_info.bricks ++ # Result of readlink would be of format as below. ++ # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename" ++ realpath = os.readlink(os.path.join(slv_bricks[0]['dir'], ++ ".glusterfs", gfid[0:2], ++ gfid[2:4], gfid)) ++ realpath_parts = realpath.split('/') ++ src_pargfid = realpath_parts[-2] ++ src_basename = realpath_parts[-1] ++ src_entry = os.path.join(pfx, src_pargfid, src_basename) ++ logging.info(lf("Special case: rename on mkdir", ++ gfid=gfid, entry=repr(entry))) ++ rename_with_disk_gfid_confirmation(gfid, src_entry, entry) + elif op == 'LINK': + slink = os.path.join(pfx, gfid) + st = lstat(slink) +@@ -1318,6 +1190,11 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): + def __init__(self, path): + self.host, self.volume = sup(self, path, '^(%s):(.+)' % HostRX.pattern) + ++ global slv_volume ++ global slv_host ++ slv_volume = self.volume ++ slv_host = self.host ++ + def canonical_path(self): + return ':'.join([gethostbyname(self.host), self.volume]) + +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index 2b57f83..a493c37 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -16,13 +16,18 @@ import fcntl + import shutil + import logging + import socket ++import errno ++import threading + import subprocess ++from subprocess import PIPE + from threading import Lock, Thread as baseThread + from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED + from errno import EINTR, ENOENT, EPERM, ESTALE, EBUSY, errorcode + from signal import signal, SIGTERM + import select as oselect + from os import waitpid as owaitpid ++import xml.etree.ElementTree as XET ++from select import error as SelectError + + from conf import GLUSTERFS_LIBEXECDIR, UUID_FILE + sys.path.insert(1, GLUSTERFS_LIBEXECDIR) +@@ -76,6 +81,15 @@ NEWLINE_ESCAPE_CHAR = "%0A" + PERCENTAGE_ESCAPE_CHAR = "%25" + + ++def sup(x, *a, **kw): ++ """a rubyesque "super" for python ;) ++ ++ invoke caller method in parent class with given args. ++ """ ++ return getattr(super(type(x), x), ++ sys._getframe(1).f_code.co_name)(*a, **kw) ++ ++ + def escape(s): + """the chosen flavor of string escaping, used all over + to turn whatever data to creatable representation""" +@@ -650,3 +664,226 @@ def lf(event, **kwargs): + for k, v in kwargs.items(): + msg += "\t{0}={1}".format(k, v) + return msg ++ ++ ++class Popen(subprocess.Popen): ++ ++ """customized subclass of subprocess.Popen with a ring ++ buffer for children error output""" ++ ++ @classmethod ++ def init_errhandler(cls): ++ """start the thread which handles children's error output""" ++ cls.errstore = {} ++ ++ def tailer(): ++ while True: ++ errstore = cls.errstore.copy() ++ try: ++ poe, _, _ = select( ++ [po.stderr for po in errstore], [], [], 1) ++ except (ValueError, SelectError): ++ # stderr is already closed wait for some time before ++ # checking next error ++ time.sleep(0.5) ++ continue ++ for po in errstore: ++ if po.stderr not in poe: ++ continue ++ po.lock.acquire() ++ try: ++ if po.on_death_row: ++ continue ++ la = errstore[po] ++ try: ++ fd = po.stderr.fileno() ++ except ValueError: # file is already closed ++ time.sleep(0.5) ++ continue ++ ++ try: ++ l = os.read(fd, 1024) ++ except OSError: ++ time.sleep(0.5) ++ continue ++ ++ if not l: ++ continue ++ tots = len(l) ++ for lx in la: ++ tots += len(lx) ++ while tots > 1 << 20 and la: ++ tots -= len(la.pop(0)) ++ la.append(l) ++ finally: ++ po.lock.release() ++ t = Thread(target=tailer) ++ t.start() ++ cls.errhandler = t ++ ++ @classmethod ++ def fork(cls): ++ """fork wrapper that restarts errhandler thread in child""" ++ pid = os.fork() ++ if not pid: ++ cls.init_errhandler() ++ return pid ++ ++ def __init__(self, args, *a, **kw): ++ """customizations for subprocess.Popen instantiation ++ ++ - 'close_fds' is taken to be the default ++ - if child's stderr is chosen to be managed, ++ register it with the error handler thread ++ """ ++ self.args = args ++ if 'close_fds' not in kw: ++ kw['close_fds'] = True ++ self.lock = threading.Lock() ++ self.on_death_row = False ++ self.elines = [] ++ try: ++ sup(self, args, *a, **kw) ++ except: ++ ex = sys.exc_info()[1] ++ if not isinstance(ex, OSError): ++ raise ++ raise GsyncdError("""execution of "%s" failed with %s (%s)""" % ++ (args[0], errno.errorcode[ex.errno], ++ os.strerror(ex.errno))) ++ if kw.get('stderr') == subprocess.PIPE: ++ assert(getattr(self, 'errhandler', None)) ++ self.errstore[self] = [] ++ ++ def errlog(self): ++ """make a log about child's failure event""" ++ logging.error(lf("command returned error", ++ cmd=" ".join(self.args), ++ error=self.returncode)) ++ lp = '' ++ ++ def logerr(l): ++ logging.error(self.args[0] + "> " + l) ++ for l in self.elines: ++ ls = l.split('\n') ++ ls[0] = lp + ls[0] ++ lp = ls.pop() ++ for ll in ls: ++ logerr(ll) ++ if lp: ++ logerr(lp) ++ ++ def errfail(self): ++ """fail nicely if child did not terminate with success""" ++ self.errlog() ++ finalize(exval=1) ++ ++ def terminate_geterr(self, fail_on_err=True): ++ """kill child, finalize stderr harvesting (unregister ++ from errhandler, set up .elines), fail on error if ++ asked for ++ """ ++ self.lock.acquire() ++ try: ++ self.on_death_row = True ++ finally: ++ self.lock.release() ++ elines = self.errstore.pop(self) ++ if self.poll() is None: ++ self.terminate() ++ if self.poll() is None: ++ time.sleep(0.1) ++ self.kill() ++ self.wait() ++ while True: ++ if not select([self.stderr], [], [], 0.1)[0]: ++ break ++ b = os.read(self.stderr.fileno(), 1024) ++ if b: ++ elines.append(b) ++ else: ++ break ++ self.stderr.close() ++ self.elines = elines ++ if fail_on_err and self.returncode != 0: ++ self.errfail() ++ ++ ++class Volinfo(object): ++ ++ def __init__(self, vol, host='localhost', prelude=[]): ++ po = Popen(prelude + ['gluster', '--xml', '--remote-host=' + host, ++ 'volume', 'info', vol], ++ stdout=PIPE, stderr=PIPE) ++ vix = po.stdout.read() ++ po.wait() ++ po.terminate_geterr() ++ vi = XET.fromstring(vix) ++ if vi.find('opRet').text != '0': ++ if prelude: ++ via = '(via %s) ' % prelude.join(' ') ++ else: ++ via = ' ' ++ raise GsyncdError('getting volume info of %s%s ' ++ 'failed with errorcode %s' % ++ (vol, via, vi.find('opErrno').text)) ++ self.tree = vi ++ self.volume = vol ++ self.host = host ++ ++ def get(self, elem): ++ return self.tree.findall('.//' + elem) ++ ++ def is_tier(self): ++ return (self.get('typeStr')[0].text == 'Tier') ++ ++ def is_hot(self, brickpath): ++ logging.debug('brickpath: ' + repr(brickpath)) ++ return brickpath in self.hot_bricks ++ ++ @property ++ @memoize ++ def bricks(self): ++ def bparse(b): ++ host, dirp = b.find("name").text.split(':', 2) ++ return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text} ++ return [bparse(b) for b in self.get('brick')] ++ ++ @property ++ @memoize ++ def uuid(self): ++ ids = self.get('id') ++ if len(ids) != 1: ++ raise GsyncdError("volume info of %s obtained from %s: " ++ "ambiguous uuid" % (self.volume, self.host)) ++ return ids[0].text ++ ++ def replica_count(self, tier, hot): ++ if (tier and hot): ++ return int(self.get('hotBricks/hotreplicaCount')[0].text) ++ elif (tier and not hot): ++ return int(self.get('coldBricks/coldreplicaCount')[0].text) ++ else: ++ return int(self.get('replicaCount')[0].text) ++ ++ def disperse_count(self, tier, hot): ++ if (tier and hot): ++ # Tiering doesn't support disperse volume as hot brick, ++ # hence no xml output, so returning 0. In case, if it's ++ # supported later, we should change here. ++ return 0 ++ elif (tier and not hot): ++ return int(self.get('coldBricks/colddisperseCount')[0].text) ++ else: ++ return int(self.get('disperseCount')[0].text) ++ ++ @property ++ @memoize ++ def hot_bricks(self): ++ return [b.text for b in self.get('hotBricks/brick')] ++ ++ def get_hot_bricks_count(self, tier): ++ if (tier): ++ return int(self.get('hotBricks/hotbrickCount')[0].text) ++ else: ++ return 0 +-- +1.8.3.1 + diff --git a/0293-rpcsvc-correct-event-thread-scaling.patch b/0293-rpcsvc-correct-event-thread-scaling.patch new file mode 100644 index 0000000..f6d2d6e --- /dev/null +++ b/0293-rpcsvc-correct-event-thread-scaling.patch @@ -0,0 +1,53 @@ +From 3bf557cd74a50f7fadaba59c86653de8887d4ffd Mon Sep 17 00:00:00 2001 +From: Milind Changire +Date: Fri, 1 Jun 2018 12:56:20 +0530 +Subject: [PATCH 293/305] rpcsvc: correct event-thread scaling + +Problem: +Auto thread count derived from the number of attachs and detachs +was reset to 1 when server_reconfigure() was called. + +Solution: +Avoid auto-thread-count reset to 1. + +mainline: +> BUG: 1547888 +> Reviewed-on: https://review.gluster.org/19689 +> Reviewed-by: Raghavendra G +> Signed-off-by: Milind Changire +(cherry picked from commit 0c3d984287d91d3fe1ffeef297252d912c08a410) + +BUG: 1554255 +Change-Id: I53217bd0634ec5dcc164338867d9d468687598b0 +Signed-off-by: Milind Changire +Reviewed-on: https://code.engineering.redhat.com/gerrit/140454 +Reviewed-by: Raghavendra Gowdappa +Tested-by: RHGS Build Bot +Reviewed-by: Mohit Agrawal +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/protocol/server/src/server.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index bc87a80..69ad184 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -1042,10 +1042,11 @@ do_rpc: + } + + /* +- * Let the event subsystem know that we're auto-scaling, with an +- * initial count of one. ++ * Update: ++ * We don't need to reset auto_thread_count since it has been derived ++ * out of the total bricks attached. We can reconfigure event threads ++ * but not auto threads. + */ +- ((struct event_pool *)(this->ctx->event_pool))->auto_thread_count = 1; + + GF_OPTION_RECONF ("event-threads", new_nthread, options, int32, out); + ret = server_check_event_threads (this, conf, new_nthread); +-- +1.8.3.1 + diff --git a/0294-features-shard-Fix-missing-unlock-in-shard_fsync_sha.patch b/0294-features-shard-Fix-missing-unlock-in-shard_fsync_sha.patch new file mode 100644 index 0000000..e80416e --- /dev/null +++ b/0294-features-shard-Fix-missing-unlock-in-shard_fsync_sha.patch @@ -0,0 +1,38 @@ +From 3e0fa14847b089291f4ca990d43029c1cd77a232 Mon Sep 17 00:00:00 2001 +From: Vijay Bellur +Date: Thu, 31 May 2018 17:11:01 -0700 +Subject: [PATCH 294/305] features/shard: Fix missing unlock in + shard_fsync_shards_cbk() + +updates: bz#789278 + +> Upstream: https://review.gluster.org/20118 +> BUG: 789278 +> Change-Id: I745a98e957cf3c6ba69247fcf6b58dd05cf59c3c + +Change-Id: I745a98e957cf3c6ba69247fcf6b58dd05cf59c3c +BUG: 1493085 +Signed-off-by: Vijay Bellur +Reviewed-on: https://code.engineering.redhat.com/gerrit/140462 +Tested-by: Krutika Dhananjay +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/shard/src/shard.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 29989d3..d67cdf4 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -4504,6 +4504,7 @@ shard_fsync_shards_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; ++ UNLOCK (&frame->lock); + goto out; + } + shard_inode_ctx_set (local->fd->inode, this, postbuf, 0, +-- +1.8.3.1 + diff --git a/0295-dht-Excessive-dict-is-null-logs-in-dht_revalidate_cb.patch b/0295-dht-Excessive-dict-is-null-logs-in-dht_revalidate_cb.patch new file mode 100644 index 0000000..d687c57 --- /dev/null +++ b/0295-dht-Excessive-dict-is-null-logs-in-dht_revalidate_cb.patch @@ -0,0 +1,55 @@ +From 669cc980d642af5bac1d41e3671822321b6bc986 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 29 May 2018 14:57:44 +0530 +Subject: [PATCH 295/305] dht: Excessive 'dict is null' logs in + dht_revalidate_cbk + +Problem: In case of error(ESTALE/ENOENT) dht_revalidate_cbk + throws "dict is null" error because xattr is not available + +Solution: To avoid the logs update condition in dht_revalidate_cbk + and dht_lookup_dir_cbk + +> BUG: 1583565 +> Change-Id: Ife6b3eeb6d91bf24403ed3100e237bb5d15b4357 +> fixes: bz#1583565 +> (Cherry pick from commit 052fce3f31d856fce10e07eda5f5c49ad3390d3a) +> (Upstream review link https://review.gluster.org/#/c/20096/) + +BUG: 1581553 +Change-Id: Ib86e7a14f30b7357ebdffd42fe91ee295e152e64 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/140185 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index feeaa3f..5f246b1 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -1499,7 +1499,8 @@ unlock: + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + dht_set_fixed_dir_stat (&local->postparent); + /* Delete mds xattr at the time of STACK UNWIND */ +- GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); ++ if (local->xattr) ++ GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); + DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); +@@ -1902,7 +1903,8 @@ cont: + local->op_errno = ESTALE; + } + /* Delete mds xattr at the time of STACK UNWIND */ +- GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); ++ if (local->xattr) ++ GF_REMOVE_INTERNAL_XATTR (conf->mds_xattr_key, local->xattr); + + DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, +-- +1.8.3.1 + diff --git a/0296-cluster-dht-Increase-failure-count-for-lookup-failur.patch b/0296-cluster-dht-Increase-failure-count-for-lookup-failur.patch new file mode 100644 index 0000000..2df19e1 --- /dev/null +++ b/0296-cluster-dht-Increase-failure-count-for-lookup-failur.patch @@ -0,0 +1,117 @@ +From 8863bc3638ca8b3a2c6105d35f5a9113b5796dd5 Mon Sep 17 00:00:00 2001 +From: Susant Palai +Date: Mon, 21 May 2018 11:51:47 +0530 +Subject: [PATCH 296/305] cluster/dht: Increase failure count for lookup + failure in remove-brick op + +An entry from readdirp might get renamed just before migration leading to +lookup failures. For such lookup failure, remove-brick process does not +see any increment in failure count. Though there is a warning message +after remove-brick commit for the user to check in the decommissioned brick +for any files those are not migrated, it's better to increase the failure count +so that user can check in the decommissioned bricks for files before commit. + +Note: This can result in false negative cases for rm -rf interaction with +remove-brick op, where remove-brick shows non-zero failed count, but the +entry was actually deleted by user. + +upstream patch: https://review.gluster.org/#/c/20044/ +> Fixes :bz#1580269 +> Change-Id: Icd1047ab9edc1d5bfc231a1f417a7801c424917c +> fixes: bz#1580269 +> Signed-off-by: Susant Palai + +Change-Id: Icd1047ab9edc1d5bfc231a1f417a7801c424917c +BUG: 1577051 +(cherry-picked from upstream: https://review.gluster.org/#/c/20044/) +Signed-off-by: Susant Palai +Reviewed-on: https://code.engineering.redhat.com/gerrit/139989 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-rebalance.c | 34 ++++++++++++++++++++++++++++++--- + 1 file changed, 31 insertions(+), 3 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index f03931f..b9078e0 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -2713,6 +2713,19 @@ gf_defrag_migrate_single_file (void *opaque) + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s lookup failed", + entry_loc.path); ++ ++ /* Increase failure count only for remove-brick op, so that ++ * user is warned to check the removed-brick for any files left ++ * unmigrated ++ */ ++ if (conf->decommission_subvols_cnt) { ++ LOCK (&defrag->lock); ++ { ++ defrag->total_failures += 1; ++ } ++ UNLOCK (&defrag->lock); ++ } ++ + ret = 0; + goto out; + } +@@ -3722,8 +3735,11 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + DHT_MSG_DIR_LOOKUP_FAILED, + "Dir:%s renamed or removed. Skipping", + loc->path); +- ret = 0; +- goto out; ++ if (conf->decommission_subvols_cnt) { ++ defrag->total_failures++; ++ } ++ ret = 0; ++ goto out; + } else { + gf_msg (this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_LOOKUP_FAILED, +@@ -3744,6 +3760,9 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + ret = syncop_opendir (this, loc, fd, NULL, NULL); + if (ret) { + if (-ret == ENOENT || -ret == ESTALE) { ++ if (conf->decommission_subvols_cnt) { ++ defrag->total_failures++; ++ } + ret = 0; + goto out; + } +@@ -3763,6 +3782,9 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + { + if (ret < 0) { + if (-ret == ENOENT || -ret == ESTALE) { ++ if (conf->decommission_subvols_cnt) { ++ defrag->total_failures++; ++ } + ret = 0; + goto out; + } +@@ -3868,7 +3890,10 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + DHT_MSG_DIR_LOOKUP_FAILED, + "Dir:%s renamed or removed. " + "Skipping", loc->path); +- ret = 0; ++ ret = 0; ++ if (conf->decommission_subvols_cnt) { ++ defrag->total_failures++; ++ } + continue; + } else { + gf_msg (this->name, GF_LOG_ERROR, -ret, +@@ -3931,6 +3956,9 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + "Setxattr failed. Dir %s " + "renamed or removed", + loc->path); ++ if (conf->decommission_subvols_cnt) { ++ defrag->total_failures++; ++ } + ret = 0; + } else { + gf_msg (this->name, GF_LOG_ERROR, -ret, +-- +1.8.3.1 + diff --git a/0297-dht-Delete-MDS-internal-xattr-from-dict-in-dht_getxa.patch b/0297-dht-Delete-MDS-internal-xattr-from-dict-in-dht_getxa.patch new file mode 100644 index 0000000..0ed23bd --- /dev/null +++ b/0297-dht-Delete-MDS-internal-xattr-from-dict-in-dht_getxa.patch @@ -0,0 +1,104 @@ +From bffd85fc848a02b69db27c8f4d9d0882e2859fc9 Mon Sep 17 00:00:00 2001 +From: moagrawa +Date: Wed, 6 Jun 2018 07:39:29 +0530 +Subject: [PATCH 297/305] dht: Delete MDS internal xattr from dict in + dht_getxattr_cbk + +Problem: At the time of fetching xattr to heal xattr by afr + it is not able to fetch xattr because posix_getxattr + has a check to ignore if xattr name is MDS + +Solution: To ignore same xattr update a check in dht_getxattr_cbk + instead of having a check in posix_getxattr + +> BUG: 1584098 +> Change-Id: I86cd2b2ee08488cb6c12f407694219d57c5361dc +> fixes: bz#1584098 +> cherry pick from commit 2c1131e5868e46cfc806fb3a1cb63a5e554b4d6c +> (Upstream review link https://review.gluster.org/#/c/20102/) + +BUG: 1582119 +Change-Id: I71894c0754a09994a3fe69915fb17b6adf2b86c5 +Signed-off-by: moagrawa +Reviewed-on: https://code.engineering.redhat.com/gerrit/140773 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-common.c | 4 ++++ + xlators/storage/posix/src/posix.c | 31 ------------------------------- + 2 files changed, 4 insertions(+), 31 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 5f246b1..c6adce4 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -4537,6 +4537,10 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + dict_del (xattr, conf->xattr_name); + } + ++ if (dict_get (xattr, conf->mds_xattr_key)) { ++ dict_del (xattr, conf->mds_xattr_key); ++ } ++ + if (frame->root->pid >= 0) { + GF_REMOVE_INTERNAL_XATTR + ("trusted.glusterfs.quota*", xattr); +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 416d9e4..6aa64f9 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -4626,26 +4626,6 @@ out: + return ret; + } + +-gf_boolean_t +-posix_is_mds_xattr (const char *name) +-{ +- regex_t regcmpl; +- char *key = {"trusted.glusterfs.*.mds$"}; +- regmatch_t result[1] = {{0} }; +- gf_boolean_t status = _gf_false; +- +- if (regcomp (®cmpl, key, REG_EXTENDED)) { +- goto out; +- } +- if (!regexec (®cmpl, name, 1, result, 0)) { +- status = _gf_true; +- goto out; +- } +-out: +- regfree(®cmpl); +- return status; +-} +- + + /** + * posix_getxattr - this function returns a dictionary with all the +@@ -4702,13 +4682,6 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, + goto out; + } + +- if (name && posix_is_mds_xattr (name)) { +- op_ret = -1; +- op_errno = ENOATTR; +- goto out; +- } +- +- + if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name && + ZR_FILE_CONTENT_REQUEST(name)) { + ret = posix_get_file_contents (this, loc->gfid, &name[15], +@@ -5078,10 +5051,6 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, + goto ignore; + } + +- if (posix_is_mds_xattr (keybuffer)) { +- goto ignore; +- } +- + memset (value_buf, '\0', sizeof(value_buf)); + have_val = _gf_false; + size = sys_lgetxattr (real_path, keybuffer, value_buf, +-- +1.8.3.1 + diff --git a/0298-glusterd-Fix-for-shd-not-coming-up.patch b/0298-glusterd-Fix-for-shd-not-coming-up.patch new file mode 100644 index 0000000..0d576c4 --- /dev/null +++ b/0298-glusterd-Fix-for-shd-not-coming-up.patch @@ -0,0 +1,69 @@ +From 747262000fd282ba66b7b1ffd7c51553d8f60de9 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Fri, 8 Jun 2018 19:39:58 +0530 +Subject: [PATCH 298/305] glusterd: Fix for shd not coming up + +Problem: After creating and starting n(n is large) distribute-replicated +volumes using a script, if we create and start (n+1)th distribute-replicate +volume manually self heal daemon is down. + +Solution: In glusterd_proc_stop after giving SIGTERM signal if the +process is still running, we are giving a SIGKILL. As SIGKILL will +not perform any cleanup process, we need to remove the pidfile. + +>Fixes: bz#1589253 +>Change-Id: I7c114334eec74c8d0f21b3e45cf7db6b8ef28af1 +>Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/20197/ + +Change-Id: I7c114334eec74c8d0f21b3e45cf7db6b8ef28af1 +BUG: 1581184 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/141526 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + libglusterfs/src/common-utils.c | 3 --- + xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 4 ++++ + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c +index 378ed05..fd2f004 100644 +--- a/libglusterfs/src/common-utils.c ++++ b/libglusterfs/src/common-utils.c +@@ -3854,9 +3854,6 @@ gf_is_service_running (char *pidfile, int *pid) + ret = lockf (fno, F_TEST, 0); + if (ret == -1) + running = _gf_true; +- if (!pid) { +- goto out; +- } + + ret = fscanf (file, "%d", pid); + if (ret <= 0) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +index 8eeec40..ebf4174 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +@@ -12,6 +12,7 @@ + #include + #include + ++#include "glusterd-utils.h" + #include "common-utils.h" + #include "xlator.h" + #include "logging.h" +@@ -113,6 +114,9 @@ glusterd_proc_stop (glusterd_proc_t *proc, int sig, int flags) + "reason:%s", pid, strerror(errno)); + goto out; + } ++ ret = glusterd_unlink_file (proc->pidfile); ++ if (ret) ++ goto out; + } + + ret = 0; +-- +1.8.3.1 + diff --git a/0299-afr-heal-gfids-when-file-is-not-present-on-all-brick.patch b/0299-afr-heal-gfids-when-file-is-not-present-on-all-brick.patch new file mode 100644 index 0000000..692c99f --- /dev/null +++ b/0299-afr-heal-gfids-when-file-is-not-present-on-all-brick.patch @@ -0,0 +1,345 @@ +From 6fb1804d113ae996e085ef0f23fa8908d167f006 Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Thu, 14 Jun 2018 12:59:06 +0530 +Subject: [PATCH 299/305] afr: heal gfids when file is not present on all + bricks + +Backport of: https://review.gluster.org/#/c/20271/ +commit f73814ad08d552d94d0139b2592175d206e7a166 (rhgs) introduced a regression +wherein if a file is present in only 1 brick of replica *and* doesn't +have a gfid associated with it, it doesn't get healed upon the next +lookup from the client. Fix it. + +Change-Id: I7d1111dcb45b1b8b8340a7d02558f05df70aa599 +BUG: 1592666 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/141899 +Tested-by: RHGS Build Bot +Reviewed-by: Karthik Subrahmanya +--- + .../replicate/bug-1591193-assign-gfid-and-heal.t | 128 +++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-common.c | 39 ++++++- + xlators/cluster/afr/src/afr-self-heal-data.c | 8 +- + xlators/cluster/afr/src/afr-self-heal-entry.c | 4 +- + xlators/cluster/afr/src/afr-self-heal-name.c | 6 +- + xlators/cluster/afr/src/afr-self-heal.h | 6 +- + 6 files changed, 179 insertions(+), 12 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t + +diff --git a/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t +new file mode 100644 +index 0000000..d3b5f9a +--- /dev/null ++++ b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t +@@ -0,0 +1,128 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++function check_gfid_and_link_count ++{ ++ local file=$1 ++ ++ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file) ++ TEST [ ! -z $file_gfid_b0 ] ++ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file) ++ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file) ++ EXPECT $file_gfid_b0 echo $file_gfid_b1 ++ EXPECT $file_gfid_b0 echo $file_gfid_b2 ++ ++ EXPECT "2" stat -c %h $B0/${V0}0/$file ++ EXPECT "2" stat -c %h $B0/${V0}1/$file ++ EXPECT "2" stat -c %h $B0/${V0}2/$file ++} ++TESTS_EXPECTED_IN_LOOP=30 ++ ++############################################################################## ++# Test on 1x3 volume ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume start $V0; ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++ ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++ ++# Create files directly in the backend on different bricks ++echo $RANDOM >> $B0/${V0}0/file1 ++echo $RANDOM >> $B0/${V0}1/file2 ++echo $RANDOM >> $B0/${V0}2/file3 ++ ++# To prevent is_fresh_file code path ++sleep 2 ++ ++# Access them from mount to trigger name + gfid heal. ++TEST stat $M0/file1 ++TEST stat $M0/file2 ++TEST stat $M0/file3 ++ ++# Launch index heal to complete any pending data/metadata heals. ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# Check each file has a gfid and the .glusterfs hardlink ++check_gfid_and_link_count file1 ++check_gfid_and_link_count file2 ++check_gfid_and_link_count file3 ++ ++TEST rm $M0/file1 ++TEST rm $M0/file2 ++TEST rm $M0/file3 ++cleanup; ++ ++############################################################################## ++# Test on 1x (2+1) volume ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume start $V0; ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++ ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++ ++# Create files directly in the backend on different bricks ++echo $RANDOM >> $B0/${V0}0/file1 ++echo $RANDOM >> $B0/${V0}1/file2 ++touch $B0/${V0}2/file3 ++ ++# To prevent is_fresh_file code path ++sleep 2 ++ ++# Access them from mount to trigger name + gfid heal. ++TEST stat $M0/file1 ++TEST stat $M0/file2 ++ ++# Though file is created on all 3 bricks, lookup will fail as arbiter blames the ++# other 2 bricks and ariter is not 'readable'. ++# TEST ! stat $M0/file3 ++# But the checks for failing lookups when quorum is not met is not yet there in ++# rhgs-3.4.0, so stat will succeed. ++TEST stat $M0/file3 ++ ++# Launch index heal to complete any pending data/metadata heals. ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# Check each file has a gfid and the .glusterfs hardlink ++check_gfid_and_link_count file1 ++check_gfid_and_link_count file2 ++check_gfid_and_link_count file3 ++ ++TEST rm $M0/file1 ++TEST rm $M0/file2 ++TEST rm $M0/file3 ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 32fd24a..50989d6 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -22,7 +22,7 @@ afr_heal_synctask (xlator_t *this, afr_local_t *local); + int + afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name, + inode_t *inode, struct afr_reply *replies, +- int source, void *gfid) ++ int source, unsigned char *sources, void *gfid) + { + afr_private_t *priv = NULL; + call_frame_t *frame = NULL; +@@ -37,6 +37,23 @@ afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name, + priv = this->private; + wind_on = alloca0 (priv->child_count); + ia_type = replies[source].poststat.ia_type; ++ if ((ia_type == IA_INVAL) && ++ (AFR_COUNT(sources, priv->child_count) == priv->child_count)) { ++ /* If a file is present on some bricks of the replica but parent ++ * dir does not have pending xattrs, all bricks are sources and ++ * the 'source' we selected earlier might be one where the file ++ * is not actually present. Hence check if file is present in ++ * any of the sources.*/ ++ for (i = 0; i < priv->child_count; i++) { ++ if (i == source) ++ continue; ++ if (sources[i] && replies[i].valid && ++ replies[i].op_ret == 0) { ++ ia_type = replies[i].poststat.ia_type; ++ break; ++ } ++ } ++ } + + /* gfid heal on those subvolumes that do not have gfid associated + * with the inode and update those replies. +@@ -1250,6 +1267,21 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, + return fav_child; + } + ++gf_boolean_t ++afr_is_file_empty_on_all_children (afr_private_t *priv, ++ struct afr_reply *replies) ++{ ++ int i = 0; ++ ++ for (i = 0; i < priv->child_count; i++) { ++ if ((!replies[i].valid) || (replies[i].op_ret != 0) || ++ (replies[i].poststat.ia_size != 0)) ++ return _gf_false; ++ } ++ ++ return _gf_true; ++} ++ + int + afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, + unsigned char *sinks, +@@ -1268,11 +1300,8 @@ afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, + return -1; + + if (type == AFR_DATA_TRANSACTION) { +- for (i = 0; i < priv->child_count; i++) { +- if (replies[i].poststat.ia_size != 0) ++ if (!afr_is_file_empty_on_all_children(priv, replies)) + return -1; +- } +- + goto mark; + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index f872a98..3ef7376 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -670,6 +670,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, + int source = -1; + gf_boolean_t did_sh = _gf_true; + gf_boolean_t is_arbiter_the_only_sink = _gf_false; ++ gf_boolean_t empty_file = _gf_false; + + priv = this->private; + +@@ -710,6 +711,11 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, + source = ret; + + if (AFR_IS_ARBITER_BRICK(priv, source)) { ++ empty_file = afr_is_file_empty_on_all_children (priv, ++ locked_replies); ++ if (empty_file) ++ goto restore_time; ++ + did_sh = _gf_false; + goto unlock; + } +@@ -746,7 +752,7 @@ restore_time: + afr_selfheal_restore_time (frame, this, fd->inode, source, + healed_sinks, locked_replies); + +- if (!is_arbiter_the_only_sink) { ++ if (!is_arbiter_the_only_sink || !empty_file) { + ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, + 0, 0, data_lock); + if (ret < priv->child_count) { +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 647dd71..f6d3a8a 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -187,7 +187,7 @@ __afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, + + if (replies[source].op_ret == 0) { + ret = afr_lookup_and_heal_gfid (this, fd->inode, name, +- inode, replies, source, ++ inode, replies, source, sources, + &replies[source].poststat.ia_gfid); + if (ret) + return ret; +@@ -320,7 +320,7 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, + } + + ret = afr_lookup_and_heal_gfid (this, fd->inode, name, inode, replies, +- source, ++ source, sources, + &replies[source].poststat.ia_gfid); + if (ret) + return ret; +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index 556d14b..bcd0e60 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -19,7 +19,7 @@ __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid, + const char *bname, inode_t *inode, + struct afr_reply *replies, void *gfid, + unsigned char *locked_on, int source, +- gf_boolean_t is_gfid_absent) ++ unsigned char *sources, gf_boolean_t is_gfid_absent) + { + int ret = 0; + int up_count = 0; +@@ -48,7 +48,7 @@ __afr_selfheal_assign_gfid (xlator_t *this, inode_t *parent, uuid_t pargfid, + } + + afr_lookup_and_heal_gfid (this, parent, bname, inode, replies, source, +- gfid); ++ sources, gfid); + + out: + return ret; +@@ -426,7 +426,7 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, + is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false; + ret = __afr_selfheal_assign_gfid (this, parent, pargfid, bname, inode, + replies, gfid, locked_on, source, +- is_gfid_absent); ++ sources, is_gfid_absent); + if (ret) + return ret; + +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index b015976..cc99d9e 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -113,7 +113,7 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode); + int + afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name, + inode_t *inode, struct afr_reply *replies, int source, +- void *gfid); ++ unsigned char *sources, void *gfid); + + int + afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, +@@ -354,4 +354,8 @@ afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, + struct afr_reply *replies, + afr_transaction_type type); + ++gf_boolean_t ++afr_is_file_empty_on_all_children (afr_private_t *priv, ++ struct afr_reply *replies); ++ + #endif /* !_AFR_SELFHEAL_H */ +-- +1.8.3.1 + diff --git a/0300-protocol-client-Don-t-send-fops-till-SETVOLUME-is-co.patch b/0300-protocol-client-Don-t-send-fops-till-SETVOLUME-is-co.patch new file mode 100644 index 0000000..73149a2 --- /dev/null +++ b/0300-protocol-client-Don-t-send-fops-till-SETVOLUME-is-co.patch @@ -0,0 +1,89 @@ +From 267c65ef6369f9becac151b1b517e0c74a1c8e8d Mon Sep 17 00:00:00 2001 +From: Raghavendra G +Date: Wed, 30 May 2018 09:34:44 +0530 +Subject: [PATCH 300/305] protocol/client: Don't send fops till SETVOLUME is + complete + +An earlier commit set conf->connected just after rpc layer sends +RPC_CLNT_CONNECT event. However, success of socket level connection +connection doesn't indicate brick stack is ready to receive fops, as +an handshake has to be done b/w client and server after +RPC_CLNT_CONNECT event. Any fop sent to brick in the window between, +* protocol/client receiving RPC_CLNT_CONNECT event +* protocol/client receiving a successful setvolume response + +can end up accessing an uninitialized brick stack. So, set +conf->connected only after a successful SETVOLUME. + +>Change-Id: I139a03d2da6b0d95a0d68391fcf54b00e749decf +>fixes: bz#1583937 +>Signed-off-by: Raghavendra G + +upstream patch: https://review.gluster.org/20101/ +BUG: 1588408 +Change-Id: I51a15a89afd1d275a44e5f279f1a685f64f10ced +Signed-off-by: Raghavendra G +Reviewed-on: https://code.engineering.redhat.com/gerrit/140963 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/protocol/client/src/client.c | 7 ++++--- + xlators/protocol/client/src/client.h | 5 +++++ + 2 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index 6cb5b6b..26b0907 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -2234,7 +2234,7 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + } + case RPC_CLNT_CONNECT: + { +- conf->connected = 1; ++ conf->can_log_disconnect = 1; + // connect happened, send 'get_supported_versions' mop + + gf_msg_debug (this->name, 0, "got RPC_CLNT_CONNECT"); +@@ -2274,7 +2274,7 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + client_register_grace_timer (this, conf); + + if (!conf->skip_notify) { +- if (conf->connected) { ++ if (conf->can_log_disconnect) { + if (!conf->disconnect_err_logged) { + gf_msg (this->name, GF_LOG_INFO, 0, + PC_MSG_CLIENT_DISCONNECTED, +@@ -2309,12 +2309,13 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + "CHILD_DOWN notify failed"); + + } else { +- if (conf->connected) ++ if (conf->can_log_disconnect) + gf_msg_debug (this->name, 0, + "disconnected (skipped notify)"); + } + + conf->connected = 0; ++ conf->can_log_disconnect = 0; + conf->skip_notify = 0; + + if (conf->quick_reconnect) { +diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h +index c025b98..7e2c03c 100644 +--- a/xlators/protocol/client/src/client.h ++++ b/xlators/protocol/client/src/client.h +@@ -204,6 +204,11 @@ typedef struct clnt_conf { + + gf_boolean_t child_up; /* Set to true, when child is up, and + * false, when child is down */ ++ ++ gf_boolean_t can_log_disconnect; /* socket level connection is ++ * up, disconnects can be ++ * logged ++ */ + } clnt_conf_t; + + typedef struct _client_fd_ctx { +-- +1.8.3.1 + diff --git a/0301-storage-posix-Fix-posix_symlinks_match.patch b/0301-storage-posix-Fix-posix_symlinks_match.patch new file mode 100644 index 0000000..375468f --- /dev/null +++ b/0301-storage-posix-Fix-posix_symlinks_match.patch @@ -0,0 +1,104 @@ +From 4d1636273b5333ed88ffd051c365490c73a380da Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Tue, 26 Jun 2018 15:58:02 +0530 +Subject: [PATCH 301/305] storage/posix: Fix posix_symlinks_match() + + Upstream patch: https://review.gluster.org/20399 + +1) snprintf into linkname_expected should happen with PATH_MAX +2) comparison should happen with linkname_actual with complete + string linkname_expected + +BUG: 1558989 +Change-Id: Ic3b3c362dc6c69c046b9a13e031989be47ecff14 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/142486 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t | 15 ++++++++++----- + xlators/storage/posix/src/posix.c | 16 +++++++++++++--- + 2 files changed, 23 insertions(+), 8 deletions(-) + +diff --git a/tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t b/tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t +index 465800b..bb858a8 100644 +--- a/tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t ++++ b/tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t +@@ -13,10 +13,10 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +-TEST mkdir -p $M0/d1/dir $M0/d2 ++TEST mkdir -p $M0/d1/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 $M0/d2 + gfid_d1=$(gf_get_gfid_xattr $B0/${V0}0/d1) + gfid_d2=$(gf_get_gfid_xattr $B0/${V0}0/d2) +-gfid_dir=$(gf_get_gfid_xattr $B0/${V0}0/d1/dir) ++gfid_dir=$(gf_get_gfid_xattr $B0/${V0}0/d1/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789) + + gfid_str_d1=$(gf_gfid_xattr_to_str $gfid_d1) + gfid_str_d2=$(gf_gfid_xattr_to_str $gfid_d2) +@@ -24,7 +24,7 @@ gfid_str_d3=$(gf_gfid_xattr_to_str $gfid_dir) + + # Kill 3rd brick and rename the dir from mount. + TEST kill_brick $V0 $H0 $B0/${V0}2 +-TEST mv $M0/d1/dir $M0/d2 ++TEST mv $M0/d1/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 $M0/d2 + + # Bring it back and trigger heal. + TEST $CLI volume start $V0 force +@@ -38,10 +38,15 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + TEST $CLI volume heal $V0 + EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +-# Check that .glusterfs symlink for dir exists and points to d2/dir ++# Check that .glusterfs symlink for dir exists and points to d2/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 + TEST linkname=$(readlink $B0/${V0}2/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3) +-EXPECT "dir" basename $linkname ++EXPECT "dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" basename $linkname + TEST parent_dir_gfid_str=$(echo $linkname|cut -d / -f5) + EXPECT $gfid_str_d2 echo $parent_dir_gfid_str + ++TEST rmdir $M0/d2/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 ++ ++TEST ! stat $B0/${V0}0/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3 ++TEST ! stat $B0/${V0}1/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3 ++TEST ! stat $B0/${V0}2/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3 + cleanup; +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 6aa64f9..af54882 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -125,17 +125,27 @@ posix_symlinks_match (xlator_t *this, loc_t *loc, uuid_t gfid) + handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); + dir_handle = alloca0 (handle_size); + +- snprintf (linkname_expected, handle_size, "../../%02x/%02x/%s/%s", ++ snprintf (linkname_expected, PATH_MAX, "../../%02x/%02x/%s/%s", + loc->pargfid[0], loc->pargfid[1], uuid_utoa (loc->pargfid), + loc->name); + + MAKE_HANDLE_GFID_PATH (dir_handle, this, gfid, NULL); + len = sys_readlink (dir_handle, linkname_actual, PATH_MAX); +- if (len < 0) ++ if (len < 0 || len == PATH_MAX) { ++ if (len == PATH_MAX) { ++ errno = EINVAL; ++ } ++ ++ if (errno != ENOENT) { ++ gf_msg (this->name, GF_LOG_ERROR, errno, ++ P_MSG_LSTAT_FAILED, "readlink[%s] failed", ++ dir_handle); ++ } + goto out; ++ } + linkname_actual[len] = '\0'; + +- if (!strncmp (linkname_actual, linkname_expected, handle_size)) ++ if (!strcmp (linkname_actual, linkname_expected)) + ret = _gf_true; + + out: +-- +1.8.3.1 + diff --git a/0302-storage-posix-Handle-ENOSPC-correctly-in-zero_fill.patch b/0302-storage-posix-Handle-ENOSPC-correctly-in-zero_fill.patch new file mode 100644 index 0000000..629b0ff --- /dev/null +++ b/0302-storage-posix-Handle-ENOSPC-correctly-in-zero_fill.patch @@ -0,0 +1,188 @@ +From e1e80bc293f5a84087d5a35054cd8bc0abfa4836 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Wed, 13 Jun 2018 12:17:28 +0530 +Subject: [PATCH 302/305] storage/posix: Handle ENOSPC correctly in zero_fill + + Upstream patch: https://review.gluster.org/20254 + +BUG: 1594658 +Change-Id: Icc521d86cc510f88b67d334b346095713899087a +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/142311 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/basic/posix/zero-fill-enospace.c | 64 ++++++++++++++++++++++++++++++++++ + tests/basic/posix/zero-fill-enospace.t | 35 +++++++++++++++++++ + xlators/storage/posix/src/posix.c | 23 +++++++++++- + 3 files changed, 121 insertions(+), 1 deletion(-) + create mode 100644 tests/basic/posix/zero-fill-enospace.c + create mode 100644 tests/basic/posix/zero-fill-enospace.t + +diff --git a/tests/basic/posix/zero-fill-enospace.c b/tests/basic/posix/zero-fill-enospace.c +new file mode 100644 +index 0000000..b1aaa57 +--- /dev/null ++++ b/tests/basic/posix/zero-fill-enospace.c +@@ -0,0 +1,64 @@ ++#include ++#include ++#include ++ ++int ++main (int argc, char *argv[]) ++{ ++ glfs_t *fs = NULL; ++ glfs_fd_t *fd = NULL; ++ int ret = 1; ++ int size = 0; ++ ++ if (argc != 6) { ++ fprintf (stderr, "Syntax: %s \n", argv[0]); ++ return 1; ++ } ++ ++ fs = glfs_new (argv[2]); ++ if (!fs) { ++ fprintf (stderr, "glfs_new: returned NULL\n"); ++ return 1; ++ } ++ ++ ret = glfs_set_volfile_server (fs, "tcp", argv[1], 24007); ++ if (ret != 0) { ++ fprintf (stderr, "glfs_set_volfile_server: retuned %d\n", ret); ++ goto out; ++ } ++ ret = glfs_set_logging (fs, argv[4], 7); ++ if (ret != 0) { ++ fprintf (stderr, "glfs_set_logging: returned %d\n", ret); ++ goto out; ++ } ++ ret = glfs_init (fs); ++ if (ret != 0) { ++ fprintf (stderr, "glfs_init: returned %d\n", ret); ++ goto out; ++ } ++ ++ fd = glfs_open (fs, argv[3], O_RDWR); ++ if (fd == NULL) { ++ fprintf (stderr, "glfs_open: returned NULL\n"); ++ goto out; ++ } ++ ++ size = atoi(argv[5]); ++ if (size < 0) { ++ fprintf (stderr, "Wrong size %s", argv[5]); ++ goto out; ++ } ++ ret = glfs_zerofill (fd, 0, atoi(argv[5])); ++ if (ret <= 0) { ++ fprintf (stderr, "glfs_zerofill: returned %d\n", ret); ++ goto out; ++ } ++ ++ ret = 0; ++ ++out: ++ if (fd) ++ glfs_close(fd); ++ glfs_fini (fs); ++ return ret; ++} +diff --git a/tests/basic/posix/zero-fill-enospace.t b/tests/basic/posix/zero-fill-enospace.t +new file mode 100644 +index 0000000..ac2e61b +--- /dev/null ++++ b/tests/basic/posix/zero-fill-enospace.t +@@ -0,0 +1,35 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../dht.rc ++ ++cleanup; ++ ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST truncate -s 100M $B0/brick1 ++ ++TEST L1=`SETUP_LOOP $B0/brick1` ++TEST MKFS_LOOP $L1 ++ ++TEST mkdir -p $B0/${V0}1 ++ ++TEST MOUNT_LOOP $L1 $B0/${V0}1 ++ ++TEST $CLI volume create $V0 $H0:$B0/${V0}1 ++ ++TEST $CLI volume start $V0; ++ ++TEST glusterfs -s $H0 --volfile-id=$V0 $M0 ++TEST touch $M0/foo ++TEST build_tester $(dirname $0)/zero-fill-enospace.c -lgfapi -Wall -O2 ++TEST ! $(dirname $0)/zero-fill-enospace $H0 $V0 /foo `gluster --print-logdir`/glfs-$V0.log 104857600 ++ ++TEST force_umount $M0 ++TEST $CLI volume stop $V0 ++UMOUNT_LOOP ${B0}/${V0}1 ++rm -f ${B0}/brick1 ++ ++cleanup +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index af54882..01f472b 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -943,17 +943,32 @@ _posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct) + op_ret = sys_writev (fd, vector, num_vect); + if (op_ret < 0) + goto err; ++ if (op_ret != (vect_size * num_vect)) { ++ op_ret = -1; ++ errno = ENOSPC; ++ goto err; ++ } + } + if (extra) { + op_ret = sys_writev (fd, vector, extra); + if (op_ret < 0) + goto err; ++ if (op_ret != (vect_size * extra)) { ++ op_ret = -1; ++ errno = ENOSPC; ++ goto err; ++ } + } + if (remain) { + vector[0].iov_len = remain; + op_ret = sys_writev (fd, vector , 1); + if (op_ret < 0) + goto err; ++ if (op_ret != remain) { ++ op_ret = -1; ++ errno = ENOSPC; ++ goto err; ++ } + } + err: + if (o_direct) +@@ -1014,8 +1029,14 @@ posix_do_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + */ + flags = FALLOC_FL_ZERO_RANGE; + ret = sys_fallocate (pfd->fd, flags, offset, len); +- if (ret == 0) ++ if (ret == 0) { + goto fsync; ++ } else { ++ ret = -errno; ++ if ((ret != -ENOSYS) && (ret != -EOPNOTSUPP)) { ++ goto out; ++ } ++ } + + ret = _posix_do_zerofill (pfd->fd, offset, len, pfd->flags & O_DIRECT); + if (ret < 0) { +-- +1.8.3.1 + diff --git a/0303-block-profile-enable-cluster.eager-lock-in-block-pro.patch b/0303-block-profile-enable-cluster.eager-lock-in-block-pro.patch new file mode 100644 index 0000000..54d0bcb --- /dev/null +++ b/0303-block-profile-enable-cluster.eager-lock-in-block-pro.patch @@ -0,0 +1,40 @@ +From b6cf34d99a8b60bcf07b557328d7d998a71653ac Mon Sep 17 00:00:00 2001 +From: Prasanna Kumar Kalever +Date: Fri, 20 Apr 2018 17:26:12 +0530 +Subject: [PATCH 303/305] block-profile: enable cluster.eager-lock in + block-profile + +Eager-lock gave 2.5X perf improvement. On top of that with batching +fix in tcmu-runner and client-io-threads we are seeing close to 3X perf +improvement. But we don't want to include that in the default profile +option but enable it on a case by case basis. So not adding +client-io-threads option. + + >BUG: 1573119 + >Upstream-patch: https://review.gluster.org/19913 +BUG: 1569951 +Change-Id: Ida53c3ef9a041a73b65fdd06158ac082da437206 +Signed-off-by: Prasanna Kumar Kalever +Reviewed-on: https://code.engineering.redhat.com/gerrit/141310 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/group-gluster-block | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/group-gluster-block b/extras/group-gluster-block +index d619255..56b406e 100644 +--- a/extras/group-gluster-block ++++ b/extras/group-gluster-block +@@ -6,7 +6,7 @@ performance.open-behind=off + performance.readdir-ahead=off + performance.strict-o-direct=on + network.remote-dio=disable +-cluster.eager-lock=disable ++cluster.eager-lock=enable + cluster.quorum-type=auto + cluster.data-self-heal-algorithm=full + cluster.locking-scheme=granular +-- +1.8.3.1 + diff --git a/0304-cluster-dht-Fix-rename-journal-in-changelog.patch b/0304-cluster-dht-Fix-rename-journal-in-changelog.patch new file mode 100644 index 0000000..6799164 --- /dev/null +++ b/0304-cluster-dht-Fix-rename-journal-in-changelog.patch @@ -0,0 +1,373 @@ +From 91cfe700849856245f58a8c0ee98c0fd1e9d47f6 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Mon, 28 May 2018 03:05:26 -0400 +Subject: [PATCH 304/305] cluster/dht: Fix rename journal in changelog + +With patch [1], renames are journalled only +on cached subvolume. The dht sends the special +key on the cached subvolume so that the changelog +journals the rename. With single distribute +sub-volume, the key is not being set. This patch +fixes the same. + +[1] https://review.gluster.org/10410 + +Backport of: + > Patch: https://review.gluster.org/20093/ + > fixes: bz#1583018 + > Change-Id: Ic2e35b40535916fa506a714f257ba325e22d0961 + > Signed-off-by: Kotresh HR + +BUG: 1583047 +Change-Id: Ic2e35b40535916fa506a714f257ba325e22d0961 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/142601 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + tests/basic/changelog/changelog-rename.t | 44 ++++++ + tests/utils/changelogparser.py | 234 +++++++++++++++++++++++++++++++ + tests/volume.rc | 7 + + xlators/cluster/dht/src/dht-rename.c | 11 ++ + 4 files changed, 296 insertions(+) + create mode 100644 tests/basic/changelog/changelog-rename.t + create mode 100644 tests/utils/changelogparser.py + +diff --git a/tests/basic/changelog/changelog-rename.t b/tests/basic/changelog/changelog-rename.t +new file mode 100644 +index 0000000..9a0ef52 +--- /dev/null ++++ b/tests/basic/changelog/changelog-rename.t +@@ -0,0 +1,44 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs" ++ROLLOVER_TIME=30 ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}0 ++TEST $CLI volume set $V0 changelog.changelog on ++TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++touch $M0/file1 ++mv $M0/file1 $M0/rn_file1 ++mkdir $M0/dir1 ++mv $M0/dir1 $M0/rn_dir1 ++ ++EXPECT "2" check_changelog_op ${CHANGELOG_PATH_0} "RENAME" ++ ++cleanup; ++ ++#####Test on multiple subvolume##### ++#==========================================# ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} ++TEST $CLI volume set $V0 changelog.changelog on ++TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++touch $M0/gluster_file ++mv $M0/gluster_file $M0/rn_gluster_file ++mkdir $M0/dir1 ++mv $M0/dir1 $M0/rn_dir1 ++ ++EXPECT "2" check_changelog_op ${CHANGELOG_PATH_0} "RENAME" ++ ++cleanup; +diff --git a/tests/utils/changelogparser.py b/tests/utils/changelogparser.py +new file mode 100644 +index 0000000..e173e52 +--- /dev/null ++++ b/tests/utils/changelogparser.py +@@ -0,0 +1,234 @@ ++#!/usr/bin/env python ++# -*- coding: utf-8 -*- ++""" ++Why? ++ ++Converts this ++ ++GlusterFS Changelog | version: v1.1 | encoding : 2 ++E0b99ef11-4b79-4cd0-9730-b5a0e8c4a8c0^@4^@16877^@0^@0^@00000000-0000-0000-0000- ++000000000001/dir1^@Ec5250af6-720e-4bfe-b938-827614304f39^@23^@33188^@0^@0^@0b99 ++ef11-4b79-4cd0-9730-b5a0e8c4a8c0/hello.txt^@Dc5250af6-720e-4bfe-b938-827614304f ++39^@Dc5250af6-720e-4bfe-b938-827614304f39^@ ++ ++ ++to human readable :) ++ ++E 0b99ef11-4b79-4cd0-9730-b5a0e8c4a8c0 MKDIR 16877 0 000000000-0000-0000-0000 ++ -000000000001/dir1 ++E c5250af6-720e-4bfe-b938-827614304f39 CREATE 33188 0 0 0b99ef11-4b79-4cd0-9730 ++ -b5a0e8c4a8c0/hello.txt ++D c5250af6-720e-4bfe-b938-827614304f39 ++D c5250af6-720e-4bfe-b938-827614304f39 ++ ++ ++""" ++import sys ++import codecs ++ ++ENTRY = 'E' ++META = 'M' ++DATA = 'D' ++SEP = "\x00" ++ ++GF_FOP = [ ++ "NULL", "STAT", "READLINK", "MKNOD", "MKDIR", "UNLINK", ++ "RMDIR", "SYMLINK", "RENAME", "LINK", "TRUNCATE", "OPEN", ++ "READ", "WRITE", "STATFS", "FLUSH", "FSYNC", "SETXATTR", ++ "GETXATTR", "REMOVEXATTR", "OPENDIR", "FSYNCDIR", "ACCESS", ++ "CREATE", "FTRUNCATE", "FSTAT", "LK", "LOOKUP", "READDIR", ++ "INODELK", "FINODELK", "ENTRYLK", "FENTRYLK", "XATTROP", ++ "FXATTROP", "FSETXATTR", "FGETXATTR", "RCHECKSUM", "SETATTR", ++ "FSETATTR", "READDIRP", "GETSPEC", "FORGET", "RELEASE", ++ "RELEASEDIR", "FREMOVEXATTR", "FALLOCATE", "DISCARD", "ZEROFILL"] ++ ++ ++class NumTokens_V11(object): ++ E = 7 ++ M = 3 ++ D = 2 ++ NULL = 3 ++ MKNOD = 7 ++ MKDIR = 7 ++ UNLINK = 4 ++ RMDIR = 4 ++ SYMLINK = 4 ++ RENAME = 5 ++ LINK = 4 ++ SETXATTR = 3 ++ REMOVEXATTR = 3 ++ CREATE = 7 ++ SETATTR = 3 ++ FTRUNCATE = 3 ++ FXATTROP = 3 ++ ++ ++class NumTokens_V12(NumTokens_V11): ++ UNLINK = 5 ++ RMDIR = 5 ++ ++ ++class Version: ++ V11 = "v1.1" ++ V12 = "v1.2" ++ ++ ++class Record(object): ++ def __init__(self, **kwargs): ++ self.ts = kwargs.get("ts", None) ++ self.fop_type = kwargs.get("fop_type", None) ++ self.gfid = kwargs.get("gfid", None) ++ self.path = kwargs.get("path", None) ++ self.fop = kwargs.get("fop", None) ++ self.path1 = kwargs.get("path1", None) ++ self.path2 = kwargs.get("path2", None) ++ self.mode = kwargs.get("mode", None) ++ self.uid = kwargs.get("uid", None) ++ self.gid = kwargs.get("gid", None) ++ ++ def create_mknod_mkdir(self, **kwargs): ++ self.path = kwargs.get("path", None) ++ self.fop = kwargs.get("fop", None) ++ self.mode = kwargs.get("mode", None) ++ self.uid = kwargs.get("uid", None) ++ self.gid = kwargs.get("gid", None) ++ ++ def metadata(self, **kwargs): ++ self.fop = kwargs.get("fop", None) ++ ++ def rename(self, **kwargs): ++ self.fop = kwargs.get("fop", None) ++ self.path1 = kwargs.get("path1", None) ++ self.path2 = kwargs.get("path2", None) ++ ++ def link_symlink_unlink_rmdir(self, **kwargs): ++ self.path = kwargs.get("path", None) ++ self.fop = kwargs.get("fop", None) ++ ++ def __unicode__(self): ++ if self.fop_type == "D": ++ return u"{ts} {fop_type} {gfid}".format(**self.__dict__) ++ elif self.fop_type == "M": ++ return u"{ts} {fop_type} {gfid} {fop}".format(**self.__dict__) ++ elif self.fop_type == "E": ++ if self.fop in ["CREATE", "MKNOD", "MKDIR"]: ++ return (u"{ts} {fop_type} {gfid} {fop} " ++ u"{path} {mode} {uid} {gid}".format(**self.__dict__)) ++ elif self.fop == "RENAME": ++ return (u"{ts} {fop_type} {gfid} {fop} " ++ u"{path1} {path2}".format(**self.__dict__)) ++ elif self.fop in ["LINK", "SYMLINK", "UNLINK", "RMDIR"]: ++ return (u"{ts} {fop_type} {gfid} {fop} " ++ u"{path}".format(**self.__dict__)) ++ else: ++ return repr(self.__dict__) ++ else: ++ return repr(self.__dict__) ++ ++ def __str__(self): ++ return unicode(self).encode('utf-8') ++ ++ ++def get_num_tokens(data, tokens, version=Version.V11): ++ if version == Version.V11: ++ cls_numtokens = NumTokens_V11 ++ elif version == Version.V12: ++ cls_numtokens = NumTokens_V12 ++ else: ++ sys.stderr.write("Unknown Changelog Version\n") ++ sys.exit(1) ++ ++ if data[tokens[0]] in [ENTRY, META]: ++ if len(tokens) >= 3: ++ return getattr(cls_numtokens, GF_FOP[int(data[tokens[2]])]) ++ else: ++ return None ++ else: ++ return getattr(cls_numtokens, data[tokens[0]]) ++ ++ ++def process_record(data, tokens, changelog_ts, callback): ++ if data[tokens[0]] in [ENTRY, META]: ++ try: ++ tokens[2] = GF_FOP[int(data[tokens[2]])] ++ except ValueError: ++ tokens[2] = "NULL" ++ ++ if not changelog_ts: ++ ts1 = int(changelog_ts) ++ else: ++ ts1="" ++ record = Record(ts=ts1, fop_type=data[tokens[0]], ++ gfid=data[tokens[1]]) ++ if data[tokens[0]] == META: ++ record.metadata(fop=tokens[2]) ++ elif data[tokens[0]] == ENTRY: ++ if tokens[2] in ["CREATE", "MKNOD", "MKDIR"]: ++ record.create_mknod_mkdir(fop=tokens[2], ++ path=data[tokens[6]], ++ mode=int(data[tokens[3]]), ++ uid=int(data[tokens[4]]), ++ gid=int(data[tokens[5]])) ++ elif tokens[2] == "RENAME": ++ record.rename(fop=tokens[2], ++ path1=data[tokens[3]], ++ path2=data[tokens[4]]) ++ if tokens[2] in ["LINK", "SYMLINK", "UNLINK", "RMDIR"]: ++ record.link_symlink_unlink_rmdir(fop=tokens[2], ++ path=data[tokens[3]]) ++ callback(record) ++ ++ ++def default_callback(record): ++ sys.stdout.write(u"{0}\n".format(record)) ++ ++ ++def parse(filename, callback=default_callback): ++ data = None ++ tokens = [] ++ changelog_ts = filename.rsplit(".")[-1] ++ with codecs.open(filename, mode="rb", encoding="utf-8") as f: ++ # GlusterFS Changelog | version: v1.1 | encoding : 2 ++ header = f.readline() ++ version = header.split()[4] ++ ++ data = f.readline() ++ ++ slice_start = 0 ++ in_record = False ++ ++ prev_char = "" ++ next_char = "" ++ for i, c in enumerate(data): ++ next_char = "" ++ if len(data) >= (i + 2): ++ next_char = data[i+1] ++ ++ if not in_record and c in [ENTRY, META, DATA]: ++ tokens.append(slice(slice_start, i+1)) ++ slice_start = i+1 ++ in_record = True ++ continue ++ ++ if c == SEP and ((prev_char != SEP and next_char == SEP) or ++ (prev_char == SEP and next_char != SEP) or ++ (prev_char != SEP and next_char != SEP)): ++ tokens.append(slice(slice_start, i)) ++ slice_start = i+1 ++ ++ num_tokens = get_num_tokens(data, tokens, version) ++ ++ if num_tokens == len(tokens): ++ process_record(data, tokens, changelog_ts, callback) ++ in_record = False ++ tokens = [] ++ ++ prev_char = c ++ ++ # process last record ++ if slice_start < (len(data) - 1): ++ tokens.append(slice(slice_start, len(data))) ++ process_record(data, tokens, changelog_ts, callback) ++ tokens = [] ++ ++parse(sys.argv[1]) +diff --git a/tests/volume.rc b/tests/volume.rc +index f9e16c5..bba7e4e 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -865,3 +865,10 @@ function get_mount_lru_size_value { + rm -f $statedump + echo $val + } ++ ++function check_changelog_op { ++ local clog_path=$1 ++ local op=$2 ++ ++ $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep $op | wc -l ++} +diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c +index d311ac6..1d0c2bb 100644 +--- a/xlators/cluster/dht/src/dht-rename.c ++++ b/xlators/cluster/dht/src/dht-rename.c +@@ -1948,6 +1948,7 @@ dht_rename (call_frame_t *frame, xlator_t *this, + dht_conf_t *conf = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + char newgfid[GF_UUID_BUF_SIZE] = {0}; ++ gf_boolean_t free_xdata = _gf_false; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); +@@ -1957,7 +1958,17 @@ dht_rename (call_frame_t *frame, xlator_t *this, + conf = this->private; + + if (conf->subvolume_cnt == 1) { ++ if (!IA_ISDIR (oldloc->inode->ia_type)) { ++ if (!xdata) { ++ free_xdata = _gf_true; ++ } ++ DHT_CHANGELOG_TRACK_AS_RENAME(xdata, oldloc, newloc); ++ } + default_rename (frame, this, oldloc, newloc, xdata); ++ if (free_xdata && xdata) { ++ dict_unref(xdata); ++ xdata = NULL; ++ } + return 0; + } + +-- +1.8.3.1 + diff --git a/0305-geo-rep-Fix-geo-rep-for-older-versions-of-unshare.patch b/0305-geo-rep-Fix-geo-rep-for-older-versions-of-unshare.patch new file mode 100644 index 0000000..9891d15 --- /dev/null +++ b/0305-geo-rep-Fix-geo-rep-for-older-versions-of-unshare.patch @@ -0,0 +1,162 @@ +From 77d79754d914ce7d15d02d5fc9d9785609094850 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Thu, 7 Jun 2018 06:32:36 -0400 +Subject: [PATCH 305/305] geo-rep: Fix geo-rep for older versions of unshare + +Geo-rep mounts are private to worker. It uses +mount namespace using unshare command to achieve +the same. Well, the unshare command has to support +'--propagation' option. So geo-rep breaks on the +systems with older unshare version. The patch +makes it fall back to lazy umount behaviour if +the unshare does not support propagation option. + +Backpor of: + > Patch: https://review.gluster.org/20221 + > fixes: bz#1589782 + > Change-Id: Ia614f068aede288d63ac62fea4461b1865066054 + > Signed-off-by: Kotresh HR + +BUG: 1569312 +Change-Id: Ia614f068aede288d63ac62fea4461b1865066054 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/142613 +Tested-by: RHGS Build Bot +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy +Reviewed-by: Atin Mukherjee +--- + geo-replication/syncdaemon/gsyncd.py | 1 + + geo-replication/syncdaemon/monitor.py | 15 +++++++++++---- + geo-replication/syncdaemon/resource.py | 21 ++++++++++++++++++--- + geo-replication/syncdaemon/syncdutils.py | 18 ++++++++++++++++++ + 4 files changed, 48 insertions(+), 7 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index b0ed0ae..fff193b 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -775,6 +775,7 @@ def main_i(): + elif remote: + # master + gconf.label = gconf.local_path ++ gconf.worker = True + else: + gconf.label = 'slave' + startup(go_daemon=go_daemon, log_file=log_file, label=gconf.label) +diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py +index 55f8330..9245572 100644 +--- a/geo-replication/syncdaemon/monitor.py ++++ b/geo-replication/syncdaemon/monitor.py +@@ -29,7 +29,7 @@ from syncdutils import gf_event, EVENT_GEOREP_FAULTY + from syncdutils import Volinfo, Popen + + from gsyncdstatus import GeorepStatus, set_monitor_status +- ++from syncdutils import unshare_propagation_supported + + ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError + +@@ -247,9 +247,16 @@ class Monitor(object): + if access_mount: + os.execv(sys.executable, args_to_worker) + else: +- unshare_cmd = ['unshare', '-m', '--propagation', 'private'] +- cmd = unshare_cmd + args_to_worker +- os.execvp("unshare", cmd) ++ if unshare_propagation_supported(): ++ logging.debug("Worker would mount volume privately") ++ unshare_cmd = ['unshare', '-m', '--propagation', ++ 'private'] ++ cmd = unshare_cmd + args_to_worker ++ os.execvp("unshare", cmd) ++ else: ++ logging.debug("Mount is not private. It would be lazy" ++ " umounted") ++ os.execv(sys.executable, args_to_worker) + + cpids.add(cpid) + agents.add(apid) +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index c4b5b53..00e62b7 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -40,6 +40,7 @@ from gsyncdstatus import GeorepStatus + from syncdutils import get_master_and_slave_data_from_args + from syncdutils import lf, Popen, sup, Volinfo + from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt ++from syncdutils import unshare_propagation_supported + + UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z') + HostRX = re.compile('[a-zA-Z\d](?:[a-zA-Z\d.-]*[a-zA-Z\d])?', re.I) +@@ -1302,15 +1303,29 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): + assert(mntdata[-1] == '\0') + mntpt = mntdata[:-1] + assert(mntpt) +- if mounted and gconf.label == 'slave' \ ++ ++ umount_master = False ++ umount_slave = False ++ master_access_mount = getattr(gconf, 'access_mount', ++ False) ++ worker = getattr(gconf, 'worker', None) ++ ++ if worker \ ++ and not unshare_propagation_supported() \ ++ and not boolify(master_access_mount): ++ umount_master = True ++ if gconf.label == 'slave' \ + and not boolify(gconf.slave_access_mount): ++ umount_slave = True ++ ++ if mounted and (umount_master or umount_slave): + po = self.umount_l(mntpt) + po.terminate_geterr(fail_on_err=False) + if po.returncode != 0: + po.errlog() + rv = po.returncode +- if gconf.label == 'slave' \ +- and not boolify(gconf.slave_access_mount): ++ logging.debug("Lazy umount done: %s" % mntpt) ++ if umount_master or umount_slave: + self.cleanup_mntpt(mntpt) + except: + logging.exception('mount cleanup failure:') +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index a493c37..6dafb0a 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -76,6 +76,7 @@ CHANGELOG_AGENT_SERVER_VERSION = 1.0 + CHANGELOG_AGENT_CLIENT_VERSION = 1.0 + NodeID = None + rsync_version = None ++unshare_mnt_propagation = None + SPACE_ESCAPE_CHAR = "%20" + NEWLINE_ESCAPE_CHAR = "%0A" + PERCENTAGE_ESCAPE_CHAR = "%25" +@@ -637,6 +638,23 @@ def get_master_and_slave_data_from_args(args): + return (master_name, slave_data) + + ++def unshare_propagation_supported(): ++ global unshare_mnt_propagation ++ if unshare_mnt_propagation is not None: ++ return unshare_mnt_propagation ++ ++ unshare_mnt_propagation = False ++ p = subprocess.Popen(["unshare", "--help"], ++ stderr=subprocess.PIPE, ++ stdout=subprocess.PIPE) ++ out, err = p.communicate() ++ if p.returncode == 0: ++ if "propagation" in out: ++ unshare_mnt_propagation = True ++ ++ return unshare_mnt_propagation ++ ++ + def get_rsync_version(rsync_cmd): + global rsync_version + if rsync_version is not None: +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index cf90e56..5c1718f 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -192,7 +192,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 3.12.2 -Release: 12%{?dist} +Release: 13%{?dist} %endif License: GPLv2 or LGPLv3+ Group: System Environment/Base @@ -547,6 +547,29 @@ Patch0279: 0279-Revert-rpc-make-actor-search-parallel.patch Patch0280: 0280-Revert-rpcsvc-scale-rpcsvc_request_handler-threads.patch Patch0281: 0281-Revert-program-GF-DUMP-Shield-ping-processing-from-t.patch Patch0282: 0282-cluster-dht-Remove-EIO-from-dht_inode_missing.patch +Patch0283: 0283-cluster-ec-Fix-pre-op-xattrop-management.patch +Patch0284: 0284-glusterd-glusterd-is-releasing-the-locks-before-time.patch +Patch0285: 0285-gluster-Allow-only-read-only-CLI-commands-via-remote.patch +Patch0286: 0286-glusterd-memory-leak-in-geo-rep-status.patch +Patch0287: 0287-Revert-performance-write-behind-fix-flush-stuck-by-f.patch +Patch0288: 0288-feature-locks-Unwind-response-based-on-clinet-versio.patch +Patch0289: 0289-changelog-fix-br-state-check.t-failure-for-brick_mux.patch +Patch0290: 0290-performance-open-behind-open-pending-fds-before-perm.patch +Patch0291: 0291-Core-The-lock-contention-on-gf_client_dump_inodes_to.patch +Patch0292: 0292-geo-rep-Fix-rename-of-directory-in-hybrid-crawl.patch +Patch0293: 0293-rpcsvc-correct-event-thread-scaling.patch +Patch0294: 0294-features-shard-Fix-missing-unlock-in-shard_fsync_sha.patch +Patch0295: 0295-dht-Excessive-dict-is-null-logs-in-dht_revalidate_cb.patch +Patch0296: 0296-cluster-dht-Increase-failure-count-for-lookup-failur.patch +Patch0297: 0297-dht-Delete-MDS-internal-xattr-from-dict-in-dht_getxa.patch +Patch0298: 0298-glusterd-Fix-for-shd-not-coming-up.patch +Patch0299: 0299-afr-heal-gfids-when-file-is-not-present-on-all-brick.patch +Patch0300: 0300-protocol-client-Don-t-send-fops-till-SETVOLUME-is-co.patch +Patch0301: 0301-storage-posix-Fix-posix_symlinks_match.patch +Patch0302: 0302-storage-posix-Handle-ENOSPC-correctly-in-zero_fill.patch +Patch0303: 0303-block-profile-enable-cluster.eager-lock-in-block-pro.patch +Patch0304: 0304-cluster-dht-Fix-rename-journal-in-changelog.patch +Patch0305: 0305-geo-rep-Fix-geo-rep-for-older-versions-of-unshare.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -2493,6 +2516,12 @@ fi %endif %changelog +* Thu Jun 28 2018 Milind Changire - 3.12.2-13 +- fixes bugs bz#1493085 bz#1518710 bz#1554255 bz#1558948 bz#1558989 + bz#1559452 bz#1567001 bz#1569312 bz#1569951 bz#1575539 bz#1575557 bz#1577051 + bz#1580120 bz#1581184 bz#1581553 bz#1581647 bz#1582119 bz#1582129 bz#1582417 + bz#1583047 bz#1588408 bz#1592666 bz#1594658 + * Thu May 24 2018 Milind Changire - 3.12.2-12 - fixes bugs bz#1558989 bz#1580344 bz#1581057 bz#1581219