diff --git a/SOURCES/0457-cluster-ec-Improve-detection-of-new-heals.patch b/SOURCES/0457-cluster-ec-Improve-detection-of-new-heals.patch new file mode 100644 index 0000000..be9202a --- /dev/null +++ b/SOURCES/0457-cluster-ec-Improve-detection-of-new-heals.patch @@ -0,0 +1,409 @@ +From 3e8b3a2c2c6f83635486035fc8040c87d89813d2 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Thu, 2 Jul 2020 18:08:52 +0200 +Subject: [PATCH 457/465] cluster/ec: Improve detection of new heals + +When EC successfully healed a directory it assumed that maybe other +entries inside that directory could have been created, which could +require additional heal cycles. For this reason, when the heal happened +as part of one index heal iteration, it triggered a new iteration. + +The problem happened when the directory was healthy, so no new entries +were added, but its index entry was not removed for some reason. In +this case self-heal started and endless loop healing the same directory +continuously, cause high CPU utilization. + +This patch improves detection of new files added to the heal index so +that a new index heal iteration is only triggered if there is new work +to do. + +>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24665/ +>Fixes: #1354 + +Change-Id: I2355742b85fbfa6de758bccc5d2e1a283c82b53f +BUG: 1852736 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/208041 +Tested-by: Ashish Pandey +Tested-by: RHGS Build Bot +Reviewed-by: Ashish Pandey +--- + xlators/cluster/ec/src/ec-common.c | 2 +- + xlators/cluster/ec/src/ec-heal.c | 58 +++++++++++++++++++++++----------- + xlators/cluster/ec/src/ec-heald.c | 24 ++++++++++---- + xlators/cluster/ec/src/ec-inode-read.c | 27 ++++++++++++++-- + xlators/cluster/ec/src/ec-types.h | 4 +-- + xlators/cluster/ec/src/ec.h | 1 + + 6 files changed, 86 insertions(+), 30 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index e580bfb..e3f8769 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -230,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx) + int32_t + ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good, +- uintptr_t bad, dict_t *xdata) ++ uintptr_t bad, uint32_t pending, dict_t *xdata) + { + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL, +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 06a7016..e2de879 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -72,6 +72,7 @@ struct ec_name_data { + char *name; + inode_t *parent; + default_args_cbk_t *replies; ++ uint32_t heal_pending; + }; + + static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL}; +@@ -996,6 +997,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia, + ret = -ENOTCONN; + goto out; + } ++ + out: + if (xattr) + dict_unref(xattr); +@@ -1164,6 +1166,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, + dict_t *xdata = NULL; + char *linkname = NULL; + ec_config_t config; ++ + /* There should be just one gfid key */ + EC_REPLIES_ALLOC(replies, ec->nodes); + if (gfid_db->count != 1) { +@@ -1408,6 +1411,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, + + ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent, + participants); ++ if (ret >= 0) { ++ /* If ec_create_name() succeeded we return 1 to indicate that a new ++ * file has been created and it will need to be healed. */ ++ ret = 1; ++ } + out: + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); +@@ -1485,18 +1493,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name, + name_on); + +- if (ret < 0) ++ if (ret < 0) { + memset(name_on, 0, ec->nodes); ++ } else { ++ name_data->heal_pending += ret; ++ } + + for (i = 0; i < ec->nodes; i++) + if (name_data->participants[i] && !name_on[i]) + name_data->failed_on[i] = 1; ++ + return 0; + } + + int + ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, +- unsigned char *participants) ++ unsigned char *participants, uint32_t *pending) + { + int i = 0; + int j = 0; +@@ -1509,7 +1521,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, + name_data.frame = frame; + name_data.participants = participants; + name_data.failed_on = alloca0(ec->nodes); +- ; ++ name_data.heal_pending = 0; + + for (i = 0; i < ec->nodes; i++) { + if (!participants[i]) +@@ -1528,6 +1540,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, + break; + } + } ++ *pending += name_data.heal_pending; ++ + loc_wipe(&loc); + return ret; + } +@@ -1535,7 +1549,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, + int + __ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *heal_on, unsigned char *sources, +- unsigned char *healed_sinks) ++ unsigned char *healed_sinks, uint32_t *pending) + { + unsigned char *locked_on = NULL; + unsigned char *output = NULL; +@@ -1580,7 +1594,7 @@ unlock: + if (sources[i] || healed_sinks[i]) + participants[i] = 1; + } +- ret = ec_heal_names(frame, ec, inode, participants); ++ ret = ec_heal_names(frame, ec, inode, participants, pending); + + if (EC_COUNT(participants, ec->nodes) <= ec->fragments) + goto out; +@@ -1601,7 +1615,8 @@ out: + + int + ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, +- unsigned char *sources, unsigned char *healed_sinks) ++ unsigned char *sources, unsigned char *healed_sinks, ++ uint32_t *pending) + { + unsigned char *locked_on = NULL; + unsigned char *up_subvols = NULL; +@@ -1632,7 +1647,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, + goto unlock; + } + ret = __ec_heal_entry(frame, ec, inode, locked_on, sources, +- healed_sinks); ++ healed_sinks, pending); + } + unlock: + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, +@@ -1953,14 +1968,14 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0, + (heal->good | heal->bad), heal->good, heal->bad, +- NULL); ++ 0, NULL); + } + + return EC_STATE_END; + case -EC_STATE_REPORT: + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0, +- 0, 0, NULL); ++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1, ++ fop->error, 0, 0, 0, 0, NULL); + } + + return EC_STATE_END; +@@ -1997,14 +2012,15 @@ out: + if (fop != NULL) { + ec_manager(fop, error); + } else { +- func(frame, NULL, this, -1, error, 0, 0, 0, NULL); ++ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL); + } + } + + int32_t + ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uintptr_t mask, +- uintptr_t good, uintptr_t bad, dict_t *xdata) ++ uintptr_t good, uintptr_t bad, uint32_t pending, ++ dict_t *xdata) + { + ec_fop_data_t *fop = cookie; + ec_heal_t *heal = fop->data; +@@ -2489,6 +2505,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + intptr_t mbad = 0; + intptr_t good = 0; + intptr_t bad = 0; ++ uint32_t pending = 0; + ec_fop_data_t *fop = data; + gf_boolean_t blocking = _gf_false; + ec_heal_need_t need_heal = EC_HEAL_NONEED; +@@ -2524,7 +2541,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + if (loc->name && strlen(loc->name)) { + ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name, + participants); +- if (ret == 0) { ++ if (ret >= 0) { + gf_msg_debug(this->name, 0, + "%s: name heal " + "successful on %" PRIXPTR, +@@ -2542,7 +2559,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + + /* Mount triggers heal only when it detects that it must need heal, shd + * triggers heals periodically which need not be thorough*/ +- if (ec->shd.iamshd) { ++ if (ec->shd.iamshd && (ret <= 0)) { + ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false, + &need_heal); + +@@ -2552,13 +2569,15 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + goto out; + } + } ++ + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); + if (IA_ISREG(loc->inode->ia_type)) { + ret = ec_heal_data(frame, ec, blocking, loc->inode, sources, + healed_sinks); + } else if (IA_ISDIR(loc->inode->ia_type) && !partial) { +- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks); ++ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks, ++ &pending); + } else { + ret = 0; + memcpy(sources, participants, ec->nodes); +@@ -2588,10 +2607,11 @@ out: + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, + ec_char_array_to_mask(participants, ec->nodes), +- mgood & good, mbad & bad, NULL); ++ mgood & good, mbad & bad, pending, NULL); + } + if (frame) + STACK_DESTROY(frame->root); ++ + return; + } + +@@ -2638,8 +2658,8 @@ void + ec_heal_fail(ec_t *ec, ec_fop_data_t *fop) + { + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0, +- NULL); ++ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0, ++ 0, 0, NULL); + } + ec_fop_data_release(fop); + } +@@ -2826,7 +2846,7 @@ fail: + if (fop) + ec_fop_data_release(fop); + if (func) +- func(frame, NULL, this, -1, err, 0, 0, 0, NULL); ++ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL); + } + + int +diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c +index cba111a..4f4b6aa 100644 +--- a/xlators/cluster/ec/src/ec-heald.c ++++ b/xlators/cluster/ec/src/ec-heald.c +@@ -156,15 +156,27 @@ int + ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc, + gf_boolean_t full) + { ++ dict_t *xdata = NULL; ++ uint32_t count; + int32_t ret; + +- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL); +- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) { ++ ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata); ++ if (!full && (loc->inode->ia_type == IA_IFDIR)) { + /* If we have just healed a directory, it's possible that +- * other index entries have appeared to be healed. We put a +- * mark so that we can check it later and restart a scan +- * without delay. */ +- healer->rerun = _gf_true; ++ * other index entries have appeared to be healed. */ ++ if ((xdata != NULL) && ++ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) && ++ (count > 0)) { ++ /* Force a rerun of the index healer. */ ++ gf_msg_debug(healer->this->name, 0, "%d more entries to heal", ++ count); ++ ++ healer->rerun = _gf_true; ++ } ++ } ++ ++ if (xdata != NULL) { ++ dict_unref(xdata); + } + + return ret; +diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c +index f87a94a..e82e8f6 100644 +--- a/xlators/cluster/ec/src/ec-inode-read.c ++++ b/xlators/cluster/ec/src/ec-inode-read.c +@@ -393,7 +393,8 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state) + int32_t + ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + int32_t op_ret, int32_t op_errno, uintptr_t mask, +- uintptr_t good, uintptr_t bad, dict_t *xdata) ++ uintptr_t good, uintptr_t bad, uint32_t pending, ++ dict_t *xdata) + { + ec_fop_data_t *fop = cookie; + fop_getxattr_cbk_t func = fop->data; +@@ -402,6 +403,25 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + char *str; + char bin1[65], bin2[65]; + ++ /* We try to return the 'pending' information in xdata, but if this cannot ++ * be set, we will ignore it silently. We prefer to report the success or ++ * failure of the heal itself. */ ++ if (xdata == NULL) { ++ xdata = dict_new(); ++ } else { ++ dict_ref(xdata); ++ } ++ if (xdata != NULL) { ++ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) { ++ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc ++ * enforces to check the result in this case. However we don't ++ * really care if it succeeded or not. We'll just do the same. ++ * ++ * This empty 'if' avoids the warning, and it will be removed by ++ * the optimizer. */ ++ } ++ } ++ + if (op_ret >= 0) { + dict = dict_new(); + if (dict == NULL) { +@@ -435,11 +455,14 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + } + + out: +- func(frame, NULL, xl, op_ret, op_errno, dict, NULL); ++ func(frame, NULL, xl, op_ret, op_errno, dict, xdata); + + if (dict != NULL) { + dict_unref(dict); + } ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } + + return 0; + } +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 34a9768..f15429d 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -186,10 +186,10 @@ struct _ec_inode { + + typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, + int32_t, uintptr_t, uintptr_t, uintptr_t, +- dict_t *); ++ uint32_t, dict_t *); + typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, + int32_t, uintptr_t, uintptr_t, uintptr_t, +- dict_t *); ++ uint32_t, dict_t *); + + union _ec_cbk { + fop_access_cbk_t access; +diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h +index 1b210d9..6f6de6d 100644 +--- a/xlators/cluster/ec/src/ec.h ++++ b/xlators/cluster/ec/src/ec.h +@@ -18,6 +18,7 @@ + #define EC_XATTR_SIZE EC_XATTR_PREFIX "size" + #define EC_XATTR_VERSION EC_XATTR_PREFIX "version" + #define EC_XATTR_HEAL EC_XATTR_PREFIX "heal" ++#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new" + #define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty" + #define EC_STRIPE_CACHE_MAX_SIZE 10 + #define EC_VERSION_SIZE 2 +-- +1.8.3.1 + diff --git a/SOURCES/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch b/SOURCES/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch new file mode 100644 index 0000000..b7b9f04 --- /dev/null +++ b/SOURCES/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch @@ -0,0 +1,182 @@ +From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat +Date: Mon, 15 Apr 2019 14:09:34 -0400 +Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after + cancelling the signer thread + +When bit-rot feature is disabled, the signer thread from the bit-rot-stub +xlator (the thread which performs the setxattr of the signature on to the +disk) is cancelled. But, if the cancelled signer thread had already held +the mutex (&priv->lock) which it uses to monitor the queue of files to +be signed, then the mutex is never released. This creates problems in +future when the feature is enabled again. Both the new instance of the +signer thread and the regular thread which enqueues the files to be +signed will be blocked on this mutex. + +So, as part of cancelling the signer thread, unlock the mutex associated +with it as well using pthread_cleanup_push and pthread_cleanup_pop. + +Upstream patch: + > patch: https://review.gluster.org/22572 + > fixes: #bz1700078 + > Change-Id: Ib761910caed90b268e69794ddeb108165487af40 + +Change-Id: Ib761910caed90b268e69794ddeb108165487af40 +BUG: 1851424 +Signed-off-by: Raghavendra M +Reviewed-on: https://code.engineering.redhat.com/gerrit/208304 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bit-rot/src/stub/bit-rot-stub-messages.h | 4 +- + xlators/features/bit-rot/src/stub/bit-rot-stub.c | 62 +++++++++++++++++++--- + 2 files changed, 59 insertions(+), 7 deletions(-) + +diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +index 7f07f29..155802b 100644 +--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h ++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, + BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL, + BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL, + BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED, +- BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL); ++ BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL, ++ BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD, ++ BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL); + + #endif /* !_BITROT_STUB_MESSAGES_H_ */ +diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c +index 3f48a4b..c3f81bc 100644 +--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c ++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c +@@ -26,6 +26,15 @@ + + #define BR_STUB_REQUEST_COOKIE 0x1 + ++void ++br_stub_lock_cleaner(void *arg) ++{ ++ pthread_mutex_t *clean_mutex = arg; ++ ++ pthread_mutex_unlock(clean_mutex); ++ return; ++} ++ + void * + br_stub_signth(void *); + +@@ -166,8 +175,11 @@ init(xlator_t *this) + + ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, + "brssign"); +- if (ret != 0) ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED, ++ "failed to create the new thread for signer"); + goto cleanup_lock; ++ } + + ret = br_stub_bad_object_container_init(this, priv); + if (ret) { +@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options) + priv = this->private; + + GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err); +- if (priv->do_versioning) { ++ if (priv->do_versioning && !priv->signth) { + ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, + "brssign"); +- if (ret != 0) ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ BRS_MSG_SPAWN_SIGN_THRD_FAILED, ++ "failed to create the new thread for signer"); + goto err; ++ } + + ret = br_stub_bad_object_container_init(this, priv); + if (ret) { +@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options) + gf_msg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, + "Could not cancel sign serializer thread"); ++ } else { ++ gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD, ++ "killed the signer thread"); ++ priv->signth = 0; + } +- priv->signth = 0; + } + + if (priv->container.thread) { +@@ -902,6 +921,24 @@ br_stub_signth(void *arg) + + THIS = this; + while (1) { ++ /* ++ * Disabling bit-rot feature leads to this particular thread ++ * getting cleaned up by reconfigure via a call to the function ++ * gf_thread_cleanup_xint (which in turn calls pthread_cancel ++ * and pthread_join). But, if this thread had held the mutex ++ * &priv->lock at the time of cancellation, then it leads to ++ * deadlock in future when bit-rot feature is enabled (which ++ * again spawns this thread which cant hold the lock as the ++ * mutex is still held by the previous instance of the thread ++ * which got killed). Also, the br_stub_handle_object_signature ++ * function which is called whenever file has to be signed ++ * also gets blocked as it too attempts to acquire &priv->lock. ++ * ++ * So, arrange for the lock to be unlocked as part of the ++ * cleanup of this thread using pthread_cleanup_push and ++ * pthread_cleanup_pop. ++ */ ++ pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock); + pthread_mutex_lock(&priv->lock); + { + while (list_empty(&priv->squeue)) +@@ -912,6 +949,7 @@ br_stub_signth(void *arg) + list_del_init(&sigstub->list); + } + pthread_mutex_unlock(&priv->lock); ++ pthread_cleanup_pop(0); + + call_resume(sigstub->stub); + +@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd, + + priv = this->private; + +- if (frame->root->pid != GF_CLIENT_PID_BITD) ++ if (frame->root->pid != GF_CLIENT_PID_BITD) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID, ++ "PID %d from where signature request" ++ "came, does not belong to bit-rot daemon." ++ "Unwinding the fop", ++ frame->root->pid); + goto dofop; ++ } + + ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess); +- if (ret) ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL, ++ "failed to prepare the signature for %s. Unwinding the fop", ++ uuid_utoa(fd->inode->gfid)); + goto dofop; ++ } + if (fakesuccess) { + op_ret = op_errno = 0; + goto dofop; +@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + /* object signature request */ + ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign); + if (!ret) { ++ gf_msg_debug(this->name, 0, "got SIGNATURE request on %s", ++ uuid_utoa(fd->inode->gfid)); + br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata); + goto done; + } +-- +1.8.3.1 + diff --git a/SOURCES/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch b/SOURCES/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch new file mode 100644 index 0000000..2c9b66e --- /dev/null +++ b/SOURCES/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch @@ -0,0 +1,181 @@ +From 80eef2f52bb92ed740ac00eeb11ee7a3e7fffff2 Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat +Date: Mon, 11 Mar 2019 12:16:50 -0400 +Subject: [PATCH 459/465] features/bit-rot: Unconditionally sign the files + during oneshot crawl + +Currently bit-rot feature has an issue with disabling and reenabling it +on the same volume. Consider enabling bit-rot detection which goes on to +crawl and sign all the files present in the volume. Then some files are +modified and the bit-rot daemon goes on to sign the modified files with +the correct signature. Now, disable bit-rot feature. While, signing and +scrubbing are not happening, previous checksums of the files continue to +exist as extended attributes. Now, if some files with checksum xattrs get +modified, they are not signed with new signature as the feature is off. + +At this point, if the feature is enabled again, the bit rot daemon will +go and sign those files which does not have any bit-rot specific xattrs +(i.e. those files which were created after bit-rot was disabled). Whereas +the files with bit-rot xattrs wont get signed with proper new checksum. +At this point if scrubber runs, it finds the on disk checksum and the actual +checksum of the file to be different (because the file got modified) and +marks the file as corrupted. + +FIX: + +The fix is to unconditionally sign the files when the bit-rot daemon +comes up (instead of skipping the files with bit-rot xattrs). + +upstream fix: + > patch: https://review.gluster.org/#/c/glusterfs/+/22360/ + > fixes: #bz1700078 + > Change-ID: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5 + +Change-Id: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5 +BUG: 1851424 +Signed-off-by: Raghavendra M +Reviewed-on: https://code.engineering.redhat.com/gerrit/208305 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bitrot/bug-1700078.t | 87 +++++++++++++++++++++++++++++ + xlators/features/bit-rot/src/bitd/bit-rot.c | 15 ++++- + 2 files changed, 101 insertions(+), 1 deletion(-) + create mode 100644 tests/bitrot/bug-1700078.t + +diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t +new file mode 100644 +index 0000000..f273742 +--- /dev/null ++++ b/tests/bitrot/bug-1700078.t +@@ -0,0 +1,87 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++ ++## Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++ ++## Lets create and start the volume ++TEST $CLI volume create $V0 $H0:$B0/${V0}1 ++TEST $CLI volume start $V0 ++ ++## Enable bitrot for volume $V0 ++TEST $CLI volume bitrot $V0 enable ++ ++## Turn off quick-read so that it wont cache the contents ++# of the file in lookup. For corrupted files, it might ++# end up in reads being served from the cache instead of ++# an error. ++TEST $CLI volume set $V0 performance.quick-read off ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location' ++ ++## Set expiry-timeout to 1 sec ++TEST $CLI volume set $V0 features.expiry-time 1 ++ ++##Mount $V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++ ++## Turn off quick-read xlator so that, the contents are not served from the ++# quick-read cache. ++TEST $CLI volume set $V0 performance.quick-read off ++ ++#Create sample file ++TEST `echo "1234" > $M0/FILE1` ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1" ++ ++##disable bitrot ++TEST $CLI volume bitrot $V0 disable ++ ++## modify the file ++TEST `echo "write" >> $M0/FILE1` ++ ++# unmount and remount when the file has to be accessed. ++# This is to ensure that, when the remount happens, ++# and the file is read, its contents are served from the ++# brick instead of cache. ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++##enable bitrot ++TEST $CLI volume bitrot $V0 enable ++ ++# expiry time is set to 1 second. Hence sleep for 2 seconds for the ++# oneshot crawler to finish its crawling and sign the file properly. ++sleep 2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location' ++ ++## Ondemand scrub ++TEST $CLI volume bitrot $V0 scrub ondemand ++ ++# the scrub ondemand CLI command, just ensures that ++# the scrubber has received the ondemand scrub directive ++# and started. sleep for 2 seconds for scrubber to finish ++# crawling and marking file(s) as bad (if if finds that ++# corruption has happened) which are filesystem operations. ++sleep 2 ++ ++TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1 ++ ++##Mount $V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++ ++TEST cat $M0/FILE1 ++ ++cleanup; +diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c +index b8feef7..424c0d5 100644 +--- a/xlators/features/bit-rot/src/bitd/bit-rot.c ++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c +@@ -973,6 +973,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + int32_t ret = -1; + inode_t *linked_inode = NULL; + gf_boolean_t need_signing = _gf_false; ++ gf_boolean_t need_reopen = _gf_true; + + GF_VALIDATE_OR_GOTO("bit-rot", subvol, out); + GF_VALIDATE_OR_GOTO("bit-rot", data, out); +@@ -1046,6 +1047,18 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + uuid_utoa(linked_inode->gfid)); + } else { + need_signing = br_check_object_need_sign(this, xattr, child); ++ ++ /* ++ * If we are here means, bitrot daemon has started. Is it just ++ * a simple restart of the daemon or is it started because the ++ * feature is enabled is something hard to determine. Hence, ++ * if need_signing is false (because bit-rot version and signature ++ * are present), then still go ahead and sign it. ++ */ ++ if (!need_signing) { ++ need_signing = _gf_true; ++ need_reopen = _gf_true; ++ } + } + + if (!need_signing) +@@ -1054,7 +1067,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, + "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path, + uuid_utoa(linked_inode->gfid), child->brick_path); +- br_trigger_sign(this, child, linked_inode, &loc, _gf_true); ++ br_trigger_sign(this, child, linked_inode, &loc, need_reopen); + + ret = 0; + +-- +1.8.3.1 + diff --git a/SOURCES/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch b/SOURCES/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch new file mode 100644 index 0000000..e31349a --- /dev/null +++ b/SOURCES/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch @@ -0,0 +1,152 @@ +From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001 +From: Ashish Pandey +Date: Thu, 23 Jul 2020 11:07:32 +0530 +Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop + folder + +Problem: +If a gfid is present in indices/xattrop folder while +the file/dir is actaully healthy and all the xattrs are healthy, +it causes lot of lookups by shd on an entry which does not need +to be healed. +This whole process eats up lot of CPU usage without doing meaningful +work. + +Solution: +Set trusted.ec.dirty xattr of the entry so that actual heal process +happens and at the end of it, during unset of dirty, gfid enrty from +indices/xattrop will be removed. + +>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/ +>Fixes: #1385 + +Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b +BUG: 1785714 +Signed-off-by: Ashish Pandey +Reviewed-on: https://code.engineering.redhat.com/gerrit/208591 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec-heal.c | 73 ++++++++++++++++++++++++++++++++++++++- + xlators/cluster/ec/src/ec-types.h | 7 +++- + 2 files changed, 78 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index e2de879..7d25853 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2488,6 +2488,59 @@ out: + return ret; + } + ++int ++ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) ++{ ++ int i = 0; ++ int ret = 0; ++ dict_t **xattr = NULL; ++ loc_t loc = {0}; ++ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0}; ++ unsigned char *on = NULL; ++ default_args_cbk_t *replies = NULL; ++ dict_t *dict = NULL; ++ ++ /* Allocate the required memory */ ++ loc.inode = inode_ref(inode); ++ gf_uuid_copy(loc.gfid, inode->gfid); ++ on = alloca0(ec->nodes); ++ EC_REPLIES_ALLOC(replies, ec->nodes); ++ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer); ++ if (!xattr) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ dict = dict_new(); ++ if (!dict) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ for (i = 0; i < ec->nodes; i++) { ++ xattr[i] = dict; ++ on[i] = 1; ++ } ++ dirty_xattr[EC_METADATA_TXN] = hton64(1); ++ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr, ++ (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); ++ if (ret < 0) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame, ++ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64, ++ xattr, NULL); ++out: ++ if (dict) { ++ dict_unref(dict); ++ } ++ if (xattr) { ++ GF_FREE(xattr); ++ } ++ cluster_replies_wipe(replies, ec->nodes); ++ loc_wipe(&loc); ++ return ret; ++} ++ + void + ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + { +@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false, + &need_heal); + +- if (need_heal == EC_HEAL_NONEED) { ++ if (need_heal == EC_HEAL_PURGE_INDEX) { ++ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, ++ "Index entry needs to be purged for: %s ", ++ uuid_utoa(loc->gfid)); ++ /* We need to send xattrop to set dirty flag so that it can be ++ * healed and index entry could be removed. We need not to take lock ++ * on this entry to do so as we are just setting dirty flag which ++ * actually increases the trusted.ec.dirty count and does not set ++ * the new value. ++ * This will make sure that it is not interfering in other fops.*/ ++ ec_heal_set_dirty_without_lock(frame, ec, loc->inode); ++ } else if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); + goto out; +@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, + goto out; + } + } ++ /* If lock count is 0, all dirty flags are 0 and all the ++ * versions are macthing then why are we here. It looks ++ * like something went wrong while removing the index entries ++ * after completing a successful heal or fop. In this case ++ * we need to remove this index entry to avoid triggering heal ++ * in a loop and causing lookups again and again*/ ++ *need_heal = EC_HEAL_PURGE_INDEX; + } else { + for (i = 0; i < ec->nodes; i++) { + /* Since each lock can only increment the dirty +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index f15429d..700dc39 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t); + + enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX }; + +-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST }; ++enum _ec_heal_need { ++ EC_HEAL_NONEED, ++ EC_HEAL_MAYBE, ++ EC_HEAL_MUST, ++ EC_HEAL_PURGE_INDEX ++}; + + enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL }; + +-- +1.8.3.1 + diff --git a/SOURCES/0461-geo-replication-Fix-IPv6-parsing.patch b/SOURCES/0461-geo-replication-Fix-IPv6-parsing.patch new file mode 100644 index 0000000..098be5f --- /dev/null +++ b/SOURCES/0461-geo-replication-Fix-IPv6-parsing.patch @@ -0,0 +1,127 @@ +From d425ed54261d5bc19aa853854cc3b64647e3c897 Mon Sep 17 00:00:00 2001 +From: Aravinda Vishwanathapura +Date: Sun, 12 Jul 2020 12:42:36 +0530 +Subject: [PATCH 461/465] geo-replication: Fix IPv6 parsing + +Brick paths in Volinfo used `:` as delimiter, Geo-rep uses split +based on `:` char. This will go wrong with IPv6. + +This patch handles the IPv6 case and handles the split properly. +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24706 + >Fixes: #1366 + >Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d + >Signed-off-by: Aravinda Vishwanathapura + +BUG: 1855966 +Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/208610 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/master.py | 5 ++-- + geo-replication/syncdaemon/syncdutils.py | 43 +++++++++++++++++++++++++++++--- + 2 files changed, 43 insertions(+), 5 deletions(-) + +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index 3f98337..08e98f8 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -26,7 +26,8 @@ from rconf import rconf + from syncdutils import Thread, GsyncdError, escape_space_newline + from syncdutils import unescape_space_newline, gauxpfx, escape + from syncdutils import lstat, errno_wrap, FreeObject, lf, matching_disk_gfid +-from syncdutils import NoStimeAvailable, PartialHistoryAvailable ++from syncdutils import NoStimeAvailable, PartialHistoryAvailable, host_brick_split ++ + + URXTIME = (-1, 0) + +@@ -1466,7 +1467,7 @@ class GMasterChangelogMixin(GMasterCommon): + node = rconf.args.resource_remote + node_data = node.split("@") + node = node_data[-1] +- remote_node_ip = node.split(":")[0] ++ remote_node_ip, _ = host_brick_split(node) + self.status.set_slave_node(remote_node_ip) + + def changelogs_batch_process(self, changes): +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index 7560fa1..f43e13b 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -883,6 +883,19 @@ class Popen(subprocess.Popen): + self.errfail() + + ++def host_brick_split(value): ++ """ ++ IPv6 compatible way to split and get the host ++ and brick information. Example inputs: ++ node1.example.com:/exports/bricks/brick1/brick ++ fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick ++ """ ++ parts = value.split(":") ++ brick = parts[-1] ++ hostparts = parts[0:-1] ++ return (":".join(hostparts), brick) ++ ++ + class Volinfo(object): + + def __init__(self, vol, host='localhost', prelude=[], master=True): +@@ -925,7 +938,7 @@ class Volinfo(object): + @memoize + def bricks(self): + def bparse(b): +- host, dirp = b.find("name").text.split(':', 2) ++ host, dirp = host_brick_split(b.find("name").text) + return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text} + return [bparse(b) for b in self.get('brick')] + +@@ -1001,6 +1014,16 @@ class VolinfoFromGconf(object): + def is_hot(self, brickpath): + return False + ++ def is_uuid(self, value): ++ try: ++ uuid.UUID(value) ++ return True ++ except ValueError: ++ return False ++ ++ def possible_path(self, value): ++ return "/" in value ++ + @property + @memoize + def bricks(self): +@@ -1014,8 +1037,22 @@ class VolinfoFromGconf(object): + out = [] + for b in bricks_data: + parts = b.split(":") +- bpath = parts[2] if len(parts) == 3 else "" +- out.append({"host": parts[1], "dir": bpath, "uuid": parts[0]}) ++ b_uuid = None ++ if self.is_uuid(parts[0]): ++ b_uuid = parts[0] ++ # Set all parts except first ++ parts = parts[1:] ++ ++ if self.possible_path(parts[-1]): ++ bpath = parts[-1] ++ # Set all parts except last ++ parts = parts[0:-1] ++ ++ out.append({ ++ "host": ":".join(parts), # if remaining parts are IPv6 name ++ "dir": bpath, ++ "uuid": b_uuid ++ }) + + return out + +-- +1.8.3.1 + diff --git a/SOURCES/0462-Issue-with-gf_fill_iatt_for_dirent.patch b/SOURCES/0462-Issue-with-gf_fill_iatt_for_dirent.patch new file mode 100644 index 0000000..aa5fd21 --- /dev/null +++ b/SOURCES/0462-Issue-with-gf_fill_iatt_for_dirent.patch @@ -0,0 +1,43 @@ +From f027734165374979bd0bff8ea059dfaadca85e07 Mon Sep 17 00:00:00 2001 +From: Soumya Koduri +Date: Thu, 2 Jul 2020 02:07:56 +0530 +Subject: [PATCH 462/465] Issue with gf_fill_iatt_for_dirent + +In "gf_fill_iatt_for_dirent()", while calculating inode_path for loc, +the inode should be of parent's. Instead it is loc.inode which results in error + and eventually lookup/readdirp fails. + +This patch fixes the same. + +This is backport of below mainstream fix : + +> Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5 +> Fixes: #1351 +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24661/ + +Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5 +BUG: 1853189 +Signed-off-by: Soumya Koduri +Reviewed-on: https://code.engineering.redhat.com/gerrit/208691 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + libglusterfs/src/gf-dirent.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c +index f289723..3fa67f2 100644 +--- a/libglusterfs/src/gf-dirent.c ++++ b/libglusterfs/src/gf-dirent.c +@@ -277,7 +277,7 @@ gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol) + gf_uuid_copy(loc.pargfid, parent->gfid); + loc.name = entry->d_name; + loc.parent = inode_ref(parent); +- ret = inode_path(loc.inode, entry->d_name, &path); ++ ret = inode_path(loc.parent, entry->d_name, &path); + loc.path = path; + if (ret < 0) + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch b/SOURCES/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch new file mode 100644 index 0000000..b47cdd1 --- /dev/null +++ b/SOURCES/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch @@ -0,0 +1,87 @@ +From 7d87933f648092ae55d57a96fd06e3df975d764c Mon Sep 17 00:00:00 2001 +From: Ashish Pandey +Date: Tue, 18 Aug 2020 10:33:48 +0530 +Subject: [PATCH 463/465] cluster/ec: Change handling of heal failure to avoid + crash + +Problem: +ec_getxattr_heal_cbk was called with NULL as second argument +in case heal was failing. +This function was dereferencing "cookie" argument which caused crash. + +Solution: +Cookie is changed to carry the value that was supposed to be +stored in fop->data, so even in the case when fop is NULL in error +case, there won't be any NULL dereference. + +Thanks to Xavi for the suggestion about the fix. + +>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23050/ +>fixes: bz#1729085 + +Change-Id: I0798000d5cadb17c3c2fbfa1baf77033ffc2bb8c +BUG: 1852736 +Reviewed-on: https://code.engineering.redhat.com/gerrit/209012 +Tested-by: Ashish Pandey +Tested-by: RHGS Build Bot +Reviewed-by: Xavi Hernandez Juan +--- + xlators/cluster/ec/src/ec-heal.c | 11 ++++++----- + xlators/cluster/ec/src/ec-inode-read.c | 4 ++-- + 2 files changed, 8 insertions(+), 7 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 7d25853..6e6948b 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -1966,7 +1966,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) + + case EC_STATE_REPORT: + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0, ++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0, + (heal->good | heal->bad), heal->good, heal->bad, + 0, NULL); + } +@@ -2022,10 +2022,11 @@ ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this, + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) + { +- ec_fop_data_t *fop = cookie; +- ec_heal_t *heal = fop->data; ++ ec_heal_t *heal = cookie; + +- fop->heal = NULL; ++ if (heal->fop) { ++ heal->fop->heal = NULL; ++ } + heal->fop = NULL; + heal->error = op_ret < 0 ? op_errno : 0; + syncbarrier_wake(heal->data); +@@ -2669,7 +2670,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + out: + ec_reset_entry_healing(fop); + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, ++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno, + ec_char_array_to_mask(participants, ec->nodes), + mgood & good, mbad & bad, pending, NULL); + } +diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c +index e82e8f6..c50d0ad 100644 +--- a/xlators/cluster/ec/src/ec-inode-read.c ++++ b/xlators/cluster/ec/src/ec-inode-read.c +@@ -396,8 +396,8 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) + { +- ec_fop_data_t *fop = cookie; +- fop_getxattr_cbk_t func = fop->data; ++ fop_getxattr_cbk_t func = cookie; ++ + ec_t *ec = xl->private; + dict_t *dict = NULL; + char *str; +-- +1.8.3.1 + diff --git a/SOURCES/0464-storage-posix-Remove-nr_files-usage.patch b/SOURCES/0464-storage-posix-Remove-nr_files-usage.patch new file mode 100644 index 0000000..d98e33d --- /dev/null +++ b/SOURCES/0464-storage-posix-Remove-nr_files-usage.patch @@ -0,0 +1,102 @@ +From 7c51addf7912a94320e6b148bd66f2dbf274c533 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Mon, 11 Mar 2019 14:04:39 +0530 +Subject: [PATCH 464/465] storage/posix: Remove nr_files usage + +nr_files is supposed to represent the number of files opened in posix. +Present logic doesn't seem to handle anon-fds because of which the +counts would always be wrong. + +I don't remember anyone using this value in debugging any problem probably +because we always have 'ls -l /proc//fd' which not only prints the +fds that are active but also prints their paths. It also handles directories +and anon-fds which actually opened the file. So removing this code +instead of fixing the buggy logic to have the nr_files. + +> fixes bz#1688106 +> Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040 +> Signed-off-by: Pranith Kumar K +> (Cherry pick from commit f5987d38f216a3142dfe45f03bf66ff4827d9b55) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22333/) + +Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040 +BUG: 1851989 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/209468 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix-common.c | 2 -- + xlators/storage/posix/src/posix-entry-ops.c | 2 -- + xlators/storage/posix/src/posix-inode-fd-ops.c | 2 -- + xlators/storage/posix/src/posix.h | 1 - + 4 files changed, 7 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index ac53796..b317627 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -128,7 +128,6 @@ posix_priv(xlator_t *this) + gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value)); + gf_proc_dump_write("max_write", "%" PRId64, + GF_ATOMIC_GET(priv->write_value)); +- gf_proc_dump_write("nr_files", "%" PRId64, GF_ATOMIC_GET(priv->nr_files)); + + return 0; + } +@@ -815,7 +814,6 @@ posix_init(xlator_t *this) + } + + LOCK_INIT(&_private->lock); +- GF_ATOMIC_INIT(_private->nr_files, 0); + GF_ATOMIC_INIT(_private->read_value, 0); + GF_ATOMIC_INIT(_private->write_value, 0); + +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index 65650b3..b3a5381 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -2243,8 +2243,6 @@ fill_stat: + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context path=%s fd=%p", real_path, fd); + +- GF_ATOMIC_INC(priv->nr_files); +- + op_ret = 0; + + out: +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index d135d8b..81f4a6b 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1605,7 +1605,6 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context path=%s fd=%p", real_path, fd); + +- GF_ATOMIC_INC(priv->nr_files); + op_ret = 0; + + out: +@@ -2526,7 +2525,6 @@ posix_release(xlator_t *this, fd_t *fd) + if (!priv) + goto out; + +- GF_ATOMIC_DEC(priv->nr_files); + out: + return 0; + } +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 61495a7..124dbb4 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -154,7 +154,6 @@ struct posix_private { + + gf_atomic_t read_value; /* Total read, from init */ + gf_atomic_t write_value; /* Total write, from init */ +- gf_atomic_t nr_files; + /* + In some cases, two exported volumes may reside on the same + partition on the server. Sending statvfs info for both +-- +1.8.3.1 + diff --git a/SOURCES/0465-posix-Implement-a-janitor-thread-to-close-fd.patch b/SOURCES/0465-posix-Implement-a-janitor-thread-to-close-fd.patch new file mode 100644 index 0000000..fc22456 --- /dev/null +++ b/SOURCES/0465-posix-Implement-a-janitor-thread-to-close-fd.patch @@ -0,0 +1,384 @@ +From 143b93b230b429cc712353243ed794b68494c040 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Mon, 27 Jul 2020 18:08:00 +0530 +Subject: [PATCH 465/465] posix: Implement a janitor thread to close fd + +Problem: In the commit fb20713b380e1df8d7f9e9df96563be2f9144fd6 we use + syntask to close fd but we have found the patch is reducing the + performance + +Solution: Use janitor thread to close fd's and save the pfd ctx into + ctx janitor list and also save the posix_xlator into pfd object to + avoid the race condition during cleanup in brick_mux environment + +> Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092 +> Fixes: #1396 +> Signed-off-by: Mohit Agrawal +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24755/) +> (Cherry pick from commit 41b9616435cbdf671805856e487e373060c9455b + +Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092 +BUG: 1851989 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/209448 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfsd/src/glusterfsd.c | 4 ++ + libglusterfs/src/glusterfs/glusterfs.h | 7 ++ + rpc/rpc-lib/src/rpcsvc.c | 6 -- + xlators/storage/posix/src/posix-common.c | 34 +++++++++- + xlators/storage/posix/src/posix-helpers.c | 93 ++++++++++++++++++++++++++ + xlators/storage/posix/src/posix-inode-fd-ops.c | 33 ++++----- + xlators/storage/posix/src/posix.h | 7 ++ + 7 files changed, 161 insertions(+), 23 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 9821180..955bf1d 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -1839,6 +1839,10 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx) + + INIT_LIST_HEAD(&cmd_args->xlator_options); + INIT_LIST_HEAD(&cmd_args->volfile_servers); ++ ctx->pxl_count = 0; ++ pthread_mutex_init(&ctx->fd_lock, NULL); ++ pthread_cond_init(&ctx->fd_cond, NULL); ++ INIT_LIST_HEAD(&ctx->janitor_fds); + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 495a4d7..bf6a987 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -733,6 +733,13 @@ struct _glusterfs_ctx { + } stats; + + struct list_head volfile_list; ++ /* Add members to manage janitor threads for cleanup fd */ ++ struct list_head janitor_fds; ++ pthread_cond_t fd_cond; ++ pthread_mutex_t fd_lock; ++ pthread_t janitor; ++ /* The variable is use to save total posix xlator count */ ++ uint32_t pxl_count; + + char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ + }; +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 23ca1fd..3f184bf 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -375,12 +375,6 @@ rpcsvc_program_actor(rpcsvc_request_t *req) + + req->ownthread = program->ownthread; + req->synctask = program->synctask; +- if (((req->procnum == GFS3_OP_RELEASE) || +- (req->procnum == GFS3_OP_RELEASEDIR)) && +- (program->prognum == GLUSTER_FOP_PROGRAM)) { +- req->ownthread = _gf_false; +- req->synctask = _gf_true; +- } + + err = SUCCESS; + gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s", +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index b317627..c5a43a1 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -150,6 +150,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + struct timespec sleep_till = { + 0, + }; ++ glusterfs_ctx_t *ctx = this->ctx; + + switch (event) { + case GF_EVENT_PARENT_UP: { +@@ -160,8 +161,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + case GF_EVENT_PARENT_DOWN: { + if (!victim->cleanup_starting) + break; +- gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", +- victim->name); + + if (priv->janitor) { + pthread_mutex_lock(&priv->janitor_mutex); +@@ -187,6 +186,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + GF_FREE(priv->janitor); + } + priv->janitor = NULL; ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ while (priv->rel_fdcount > 0) { ++ pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); ++ } ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", ++ victim->name); + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); + } break; + default: +@@ -1038,7 +1047,13 @@ posix_init(xlator_t *this) + pthread_cond_init(&_private->fsync_cond, NULL); + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); ++ pthread_cond_init(&_private->fd_cond, NULL); + INIT_LIST_HEAD(&_private->fsyncs); ++ _private->rel_fdcount = 0; ++ ret = posix_spawn_ctx_janitor_thread(this); ++ if (ret) ++ goto out; ++ + ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this, + "posixfsy"); + if (ret) { +@@ -1133,6 +1148,8 @@ posix_fini(xlator_t *this) + { + struct posix_private *priv = this->private; + gf_boolean_t health_check = _gf_false; ++ glusterfs_ctx_t *ctx = this->ctx; ++ uint32_t count; + int ret = 0; + + if (!priv) +@@ -1166,6 +1183,19 @@ posix_fini(xlator_t *this) + priv->janitor = NULL; + } + ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ count = --ctx->pxl_count; ++ if (count == 0) { ++ pthread_cond_signal(&ctx->fd_cond); ++ } ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ if (count == 0) { ++ pthread_join(ctx->janitor, NULL); ++ } ++ + if (priv->fsyncer) { + (void)gf_thread_cleanup_xint(priv->fsyncer); + priv->fsyncer = 0; +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 39dbcce..73a44be 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1582,6 +1582,99 @@ unlock: + return; + } + ++static struct posix_fd * ++janitor_get_next_fd(glusterfs_ctx_t *ctx) ++{ ++ struct posix_fd *pfd = NULL; ++ ++ while (list_empty(&ctx->janitor_fds)) { ++ if (ctx->pxl_count == 0) { ++ return NULL; ++ } ++ ++ pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock); ++ } ++ ++ pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list); ++ list_del_init(&pfd->list); ++ ++ return pfd; ++} ++ ++static void ++posix_close_pfd(xlator_t *xl, struct posix_fd *pfd) ++{ ++ THIS = xl; ++ ++ if (pfd->dir == NULL) { ++ gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd); ++ sys_close(pfd->fd); ++ } else { ++ gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir); ++ sys_closedir(pfd->dir); ++ } ++ ++ GF_FREE(pfd); ++} ++ ++static void * ++posix_ctx_janitor_thread_proc(void *data) ++{ ++ xlator_t *xl; ++ struct posix_fd *pfd; ++ glusterfs_ctx_t *ctx = NULL; ++ struct posix_private *priv_fd; ++ ++ ctx = data; ++ ++ pthread_mutex_lock(&ctx->fd_lock); ++ ++ while ((pfd = janitor_get_next_fd(ctx)) != NULL) { ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ xl = pfd->xl; ++ posix_close_pfd(xl, pfd); ++ ++ pthread_mutex_lock(&ctx->fd_lock); ++ ++ priv_fd = xl->private; ++ priv_fd->rel_fdcount--; ++ if (!priv_fd->rel_fdcount) ++ pthread_cond_signal(&priv_fd->fd_cond); ++ } ++ ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ return NULL; ++} ++ ++int ++posix_spawn_ctx_janitor_thread(xlator_t *this) ++{ ++ int ret = 0; ++ glusterfs_ctx_t *ctx = NULL; ++ ++ ctx = this->ctx; ++ ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ if (ctx->pxl_count++ == 0) { ++ ret = gf_thread_create(&ctx->janitor, NULL, ++ posix_ctx_janitor_thread_proc, ctx, ++ "posixctxjan"); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, ++ "spawning janitor thread failed"); ++ ctx->pxl_count--; ++ } ++ } ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ return ret; ++} ++ + static int + is_fresh_file(int64_t ctime_sec) + { +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 81f4a6b..21119ea 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1352,6 +1352,22 @@ out: + return 0; + } + ++static void ++posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd) ++{ ++ glusterfs_ctx_t *ctx = this->ctx; ++ struct posix_private *priv = this->private; ++ ++ pfd->xl = this; ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ list_add_tail(&pfd->list, &ctx->janitor_fds); ++ priv->rel_fdcount++; ++ pthread_cond_signal(&ctx->fd_cond); ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++} ++ + int32_t + posix_releasedir(xlator_t *this, fd_t *fd) + { +@@ -1374,11 +1390,7 @@ posix_releasedir(xlator_t *this, fd_t *fd) + "pfd->dir is NULL for fd=%p", fd); + goto out; + } +- +- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); +- +- sys_closedir(pfd->dir); +- GF_FREE(pfd); ++ posix_add_fd_to_cleanup(this, pfd); + + out: + return 0; +@@ -2494,7 +2506,6 @@ out: + int32_t + posix_release(xlator_t *this, fd_t *fd) + { +- struct posix_private *priv = NULL; + struct posix_fd *pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; +@@ -2502,8 +2513,6 @@ posix_release(xlator_t *this, fd_t *fd) + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + +- priv = this->private; +- + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, +@@ -2517,13 +2526,7 @@ posix_release(xlator_t *this, fd_t *fd) + "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); + } + +- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); +- +- sys_close(pfd->fd); +- GF_FREE(pfd); +- +- if (!priv) +- goto out; ++ posix_add_fd_to_cleanup(this, pfd); + + out: + return 0; +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 124dbb4..07f367b 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -134,6 +134,8 @@ struct posix_fd { + off_t dir_eof; /* offset at dir EOF */ + int odirect; + struct list_head list; /* to add to the janitor list */ ++ xlator_t *xl; ++ char _pad[4]; /* manual padding */ + }; + + struct posix_private { +@@ -204,6 +206,7 @@ struct posix_private { + pthread_cond_t fsync_cond; + pthread_mutex_t janitor_mutex; + pthread_cond_t janitor_cond; ++ pthread_cond_t fd_cond; + int fsync_queue_count; + + enum { +@@ -259,6 +262,7 @@ struct posix_private { + gf_boolean_t fips_mode_rchecksum; + gf_boolean_t ctime; + gf_boolean_t janitor_task_stop; ++ uint32_t rel_fdcount; + }; + + typedef struct { +@@ -665,6 +669,9 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, + int + posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); + ++int ++posix_spawn_ctx_janitor_thread(xlator_t *this); ++ + void + posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +-- +1.8.3.1 + diff --git a/SOURCES/0466-cluster-ec-Change-stale-index-handling.patch b/SOURCES/0466-cluster-ec-Change-stale-index-handling.patch new file mode 100644 index 0000000..1dc9f57 --- /dev/null +++ b/SOURCES/0466-cluster-ec-Change-stale-index-handling.patch @@ -0,0 +1,68 @@ +From b603170ae5f583037b8177a9d19e56c7821edf0b Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Tue, 25 Aug 2020 04:19:54 +0530 +Subject: [PATCH 466/466] cluster/ec: Change stale index handling + +Problem: +Earlier approach is setting dirty bit which requires extra heal + +Fix: +Send zero-xattrop which deletes stale index without any need +for extra heal. + + > Fixes: #1385 + > Upstream-patch: https://review.gluster.org/c/glusterfs/+/24911/ + +BUG: 1785714 +Change-Id: I7e97a1d8b5516f7be47cae55d0e56b14332b6cae +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/209904 +Tested-by: RHGS Build Bot +Tested-by: Ashish Pandey +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec-heal.c | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 6e6948b..06bafa5 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2490,7 +2490,7 @@ out: + } + + int +-ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) ++ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode) + { + int i = 0; + int ret = 0; +@@ -2520,7 +2520,6 @@ ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) + xattr[i] = dict; + on[i] = 1; + } +- dirty_xattr[EC_METADATA_TXN] = hton64(1); + ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr, + (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); + if (ret < 0) { +@@ -2621,13 +2620,10 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, + "Index entry needs to be purged for: %s ", + uuid_utoa(loc->gfid)); +- /* We need to send xattrop to set dirty flag so that it can be +- * healed and index entry could be removed. We need not to take lock +- * on this entry to do so as we are just setting dirty flag which +- * actually increases the trusted.ec.dirty count and does not set +- * the new value. +- * This will make sure that it is not interfering in other fops.*/ +- ec_heal_set_dirty_without_lock(frame, ec, loc->inode); ++ /* We need to send zero-xattrop so that stale index entry could be ++ * removed. We need not take lock on this entry to do so as ++ * xattrop on a brick is atomic. */ ++ ec_heal_purge_stale_index(frame, ec, loc->inode); + } else if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); +-- +1.8.3.1 + diff --git a/SOURCES/0467-build-Added-dependency-for-glusterfs-selinux.patch b/SOURCES/0467-build-Added-dependency-for-glusterfs-selinux.patch new file mode 100644 index 0000000..93bb140 --- /dev/null +++ b/SOURCES/0467-build-Added-dependency-for-glusterfs-selinux.patch @@ -0,0 +1,38 @@ +From 9176ee8f10c3c33f31d00261995ed27e8680934a Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Thu, 3 Sep 2020 11:46:38 +0000 +Subject: [PATCH 467/467] build: Added dependency for glusterfs-selinux + +> Fixes: #1442 +> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24876/ +> Change-Id: I7d79bceff329db4d525bc8a77ba7ffe41bf53c97 +> Signed-off-by: Rinku Kothiya + +BUG: 1460657 + +Change-Id: I7d79bceff329db4d525bc8a77ba7ffe41bf53c97 +Signed-off-by: Rinku Kothiya +Reviewed-on: https://code.engineering.redhat.com/gerrit/210637 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 9def416..ed6bdf3 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -650,6 +650,9 @@ Summary: Clustered file-system server + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-cli%{?_isa} = %{version}-%{release} + Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) ) ++Requires: glusterfs-selinux >= 0.1.0-2 ++%endif + # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse + Requires: %{name}-fuse%{?_isa} = %{version}-%{release} + # self-heal daemon, rebalance, nfs-server etc. are actually clients +-- +1.8.3.1 + diff --git a/SOURCES/0468-build-Update-the-glusterfs-selinux-version.patch b/SOURCES/0468-build-Update-the-glusterfs-selinux-version.patch new file mode 100644 index 0000000..b4b5ead --- /dev/null +++ b/SOURCES/0468-build-Update-the-glusterfs-selinux-version.patch @@ -0,0 +1,36 @@ +From 4b72f5e7704d480bac869f7a32ac891898bb994f Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Thu, 3 Sep 2020 14:56:27 +0000 +Subject: [PATCH 468/468] build: Update the glusterfs-selinux version + +Updated the glusterfs-selinux version according to +the downstream official version. + +Label: DOWNSTREAM ONLY + +BUG: 1460657 + +Change-Id: I7b8bbf53f71f6f56103042950d8910f0cb63a685 +Signed-off-by: Rinku Kothiya +Reviewed-on: https://code.engineering.redhat.com/gerrit/210685 +Tested-by: RHGS Build Bot +--- + glusterfs.spec.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index ed6bdf3..30d7162 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -651,7 +651,7 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-cli%{?_isa} = %{version}-%{release} + Requires: %{name}-libs%{?_isa} = %{version}-%{release} + %if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) ) +-Requires: glusterfs-selinux >= 0.1.0-2 ++Requires: glusterfs-selinux >= 1.0-1 + %endif + # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse + Requires: %{name}-fuse%{?_isa} = %{version}-%{release} +-- +1.8.3.1 + diff --git a/SOURCES/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch b/SOURCES/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch new file mode 100644 index 0000000..0fadfc9 --- /dev/null +++ b/SOURCES/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch @@ -0,0 +1,33 @@ +From 6fed6cfcb26e6ed3c9640c5f889629315bbd83c2 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Mon, 31 Aug 2020 12:22:05 +0530 +Subject: [PATCH 469/469] cluster/ec: Don't trigger heal for stale index + + > Fixes: #1385 + > Upstream-patch: https://review.gluster.org/c/glusterfs/+/24930 + +BUG: 1785714 +Change-Id: I3609dd2e1f63c4bd6a19d528b935bf5b05443824 +Signed-off-by: Pranith Kumar K +Reviewed-on: https://code.engineering.redhat.com/gerrit/210731 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec-heal.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 06bafa5..f6376cd 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2624,6 +2624,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + * removed. We need not take lock on this entry to do so as + * xattrop on a brick is atomic. */ + ec_heal_purge_stale_index(frame, ec, loc->inode); ++ goto out; + } else if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); +-- +1.8.3.1 + diff --git a/SOURCES/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch b/SOURCES/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch new file mode 100644 index 0000000..e26d46a --- /dev/null +++ b/SOURCES/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch @@ -0,0 +1,63 @@ +From 8e427716f4e2855093b1a1a0e3a9ec79ebac7faf Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya +Date: Thu, 10 Sep 2020 13:49:09 +0530 +Subject: [PATCH 470/473] extras/snap_scheduler: changes in + gluster-shared-storage mount path + +The patch https://review.gluster.org/#/c/glusterfs/+/24934/, changes mount point +of gluster_shared_storage from /var/run to /run to address the issue of symlink +at mount path in fstab. +NOTE: mount point /var/run is symlink to /run + +The required changes with respect to gluster_shared_storage mount path are +introduced with this patch in snap_scheduler. + +>Fixes: #1476 +>Change-Id: I9ce88c2f624c6aa5982de04edfee2d0a9f160d62 +>Signed-off-by: Shwetha K Acharya + +backport of https://review.gluster.org/#/c/glusterfs/+/24971/ +BUG: 1873469 +Change-Id: I9ce88c2f624c6aa5982de04edfee2d0a9f160d62 +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/211391 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/snap_scheduler/gcron.py | 4 ++-- + extras/snap_scheduler/snap_scheduler.py | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py +index cc16310..0e4df77 100755 +--- a/extras/snap_scheduler/gcron.py ++++ b/extras/snap_scheduler/gcron.py +@@ -19,10 +19,10 @@ import logging.handlers + import fcntl + + +-GCRON_TASKS = "/var/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" ++GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" + GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks" + GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag" +-LOCK_FILE_DIR = "/var/run/gluster/shared_storage/snaps/lock_files/" ++LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/" + log = logging.getLogger("gcron-logger") + start_time = 0.0 + +diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py +index 5a29d41..e8fcc44 100755 +--- a/extras/snap_scheduler/snap_scheduler.py ++++ b/extras/snap_scheduler/snap_scheduler.py +@@ -67,7 +67,7 @@ except ImportError: + SCRIPT_NAME = "snap_scheduler" + scheduler_enabled = False + log = logging.getLogger(SCRIPT_NAME) +-SHARED_STORAGE_DIR="/var/run/gluster/shared_storage" ++SHARED_STORAGE_DIR="/run/gluster/shared_storage" + GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled" + GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled" + GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks" +-- +1.8.3.1 + diff --git a/SOURCES/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch b/SOURCES/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch new file mode 100644 index 0000000..0ebba37 --- /dev/null +++ b/SOURCES/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch @@ -0,0 +1,73 @@ +From d23ad767281af85cf07f5c3f63de482d40ee1953 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya +Date: Thu, 10 Sep 2020 13:16:12 +0530 +Subject: [PATCH 471/473] nfs-ganesha: gluster_shared_storage fails to + automount on node reboot on rhel 8 + +The patch https://review.gluster.org/#/c/glusterfs/+/24934/, changes mount point +of gluster_shared_storage from /var/run to /run to address the issue of symlink +at mount path in fstab. +NOTE: mount point /var/run is symlink to /run + +The required changes with respect to gluster_shared_storage mount path are +introduced with this patch in nfs-ganesha. + +>Fixes: #1475 +>Change-Id: I9c7677a053e1291f71476d47ba6fa2e729f59625 +>Signed-off-by: Shwetha K Acharya + +backport of https://review.gluster.org/#/c/glusterfs/+/24970/ +BUG: 1873469 +Change-Id: I9c7677a053e1291f71476d47ba6fa2e729f59625 +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/211392 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + extras/ganesha/ocf/ganesha_nfsd | 2 +- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + extras/hook-scripts/start/post/S31ganesha-start.sh | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd +index 93fc8be..f91e8b6 100644 +--- a/extras/ganesha/ocf/ganesha_nfsd ++++ b/extras/ganesha/ocf/ganesha_nfsd +@@ -36,7 +36,7 @@ else + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + fi + +-OCF_RESKEY_ha_vol_mnt_default="/var/run/gluster/shared_storage" ++OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage" + : ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}} + + ganesha_meta_data() { +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index a6814b1..9790a71 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -24,7 +24,7 @@ GANESHA_HA_SH=$(realpath $0) + HA_NUM_SERVERS=0 + HA_SERVERS="" + HA_VOL_NAME="gluster_shared_storage" +-HA_VOL_MNT="/var/run/gluster/shared_storage" ++HA_VOL_MNT="/run/gluster/shared_storage" + HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" + SERVICE_MAN="DISTRO_NOT_FOUND" + +diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh +index 90ba6bc..7ad6f23 100755 +--- a/extras/hook-scripts/start/post/S31ganesha-start.sh ++++ b/extras/hook-scripts/start/post/S31ganesha-start.sh +@@ -4,7 +4,7 @@ OPTSPEC="volname:,gd-workdir:" + VOL= + declare -i EXPORT_ID + ganesha_key="ganesha.enable" +-GANESHA_DIR="/var/run/gluster/shared_storage/nfs-ganesha" ++GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha" + CONF1="$GANESHA_DIR/ganesha.conf" + GLUSTERD_WORKDIR= + +-- +1.8.3.1 + diff --git a/SOURCES/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch b/SOURCES/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch new file mode 100644 index 0000000..79d4d0e --- /dev/null +++ b/SOURCES/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch @@ -0,0 +1,98 @@ +From ccd45222c46b91b4d0cd57db9ea8b1515c97ada0 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya +Date: Mon, 31 Aug 2020 20:08:39 +0530 +Subject: [PATCH 472/473] geo-rep: gluster_shared_storage fails to automount on + node reboot on rhel 8. + +Issue: On reboot, all the mounts get wiped out. + Only the mounts mentioned in /etc/fstab automatically gets mounted + during boot/reboot. + + But /etc/fstab complains on not getting a canonical path + (it gets path containing a symlink) + This is because the gluster_shared_storage, is mounted to + /var/run which is symlink to /run. This is a general practice + followed by most operating systems. + + [root@ ~]# ls -lsah /var/run + 0 lrwxrwxrwx. 1 root root 6 Jul 22 19:39 /var/run -> ../run + +Fix: Mount gluster_shared_storage on /run. + (Also It is seen that /var/run is mostly + used by old or legacy systems, thus it is a good practice to + update /var/run to /run) + +>fixes: #1459 +>Change-Id: I8c16653be8cd746c84f01abf0eea19284fb97c77 +>Signed-off-by: Shwetha K Acharya + +backport of https://review.gluster.org/#/c/glusterfs/+/24934/ +BUG: 1873469 +Change-Id: I8c16653be8cd746c84f01abf0eea19284fb97c77 +Signed-off-by: Shwetha K Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/211387 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../set/post/S32gluster_enable_shared_storage.sh | 18 +++++++++--------- + geo-replication/gsyncd.conf.in | 2 +- + 2 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +index 885ed03..3bae37c 100755 +--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh ++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +@@ -79,9 +79,9 @@ done + + if [ "$option" == "disable" ]; then + # Unmount the volume on all the nodes +- umount /var/run/gluster/shared_storage +- cat /etc/fstab | grep -v "gluster_shared_storage /var/run/gluster/shared_storage/" > /var/run/gluster/fstab.tmp +- mv /var/run/gluster/fstab.tmp /etc/fstab ++ umount /run/gluster/shared_storage ++ cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp ++ mv /run/gluster/fstab.tmp /etc/fstab + fi + + if [ "$is_originator" == 1 ]; then +@@ -105,7 +105,7 @@ function check_volume_status() + } + + mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ +- /var/run/gluster/shared_storage" ++ /run/gluster/shared_storage" + + if [ "$option" == "enable" ]; then + retry=0; +@@ -120,10 +120,10 @@ if [ "$option" == "enable" ]; then + status=$(check_volume_status) + done + # Mount the volume on all the nodes +- umount /var/run/gluster/shared_storage +- mkdir -p /var/run/gluster/shared_storage ++ umount /run/gluster/shared_storage ++ mkdir -p /run/gluster/shared_storage + $mount_cmd +- cp /etc/fstab /var/run/gluster/fstab.tmp +- echo "$local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage/ glusterfs defaults 0 0" >> /var/run/gluster/fstab.tmp +- mv /var/run/gluster/fstab.tmp /etc/fstab ++ cp /etc/fstab /run/gluster/fstab.tmp ++ echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp ++ mv /run/gluster/fstab.tmp /etc/fstab + fi +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index 11e57fd..9688c79 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -123,7 +123,7 @@ type=bool + help=Use this to set Active Passive mode to meta-volume. + + [meta-volume-mnt] +-value=/var/run/gluster/shared_storage ++value=/run/gluster/shared_storage + help=Meta Volume or Shared Volume mount path + + [allow-network] +-- +1.8.3.1 + diff --git a/SOURCES/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch b/SOURCES/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch new file mode 100644 index 0000000..0629fa7 --- /dev/null +++ b/SOURCES/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch @@ -0,0 +1,75 @@ +From 80f1b3aedcde02ae25b341519857ba9a5b2fa722 Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha +Date: Thu, 24 Sep 2020 19:43:29 +0530 +Subject: [PATCH 473/473] glusterd: Fix Add-brick with increasing replica count + failure + +Problem: add-brick operation fails with multiple bricks on same +server error when replica count is increased. + +This was happening because of extra runs in a loop to compare +hostnames and if bricks supplied were less than "replica" count, +the bricks will get compared to itself resulting in above error. + +>Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/25029 +>Fixes: #1508 + +BUG: 1881823 +Change-Id: I8668e964340b7bf59728bb838525d2db062197ed +Signed-off-by: Sheetal Pamecha +Reviewed-on: https://code.engineering.redhat.com/gerrit/213064 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/glusterd/brick-order-check-add-brick.t | 21 +++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-utils.c | 4 ++++ + 2 files changed, 25 insertions(+) + +diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t +index 29f0ed1..0be31da 100644 +--- a/tests/bugs/glusterd/brick-order-check-add-brick.t ++++ b/tests/bugs/glusterd/brick-order-check-add-brick.t +@@ -37,4 +37,25 @@ EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks' + TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force + EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks' + ++TEST $CLI_1 volume stop $V0 ++TEST $CLI_1 volume delete $V0 ++ ++TEST $CLI_1 volume create $V0 replica 2 $H1:$L1/${V0}1 $H2:$L2/${V0}1 ++EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks' ++EXPECT 'Created' volinfo_field $V0 'Status' ++ ++TEST $CLI_1 volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++ ++#Add-brick with Increasing replica count ++TEST $CLI_1 volume add-brick $V0 replica 3 $H3:$L3/${V0}1 ++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks' ++ ++#Add-brick with Increasing replica count from same host should fail ++TEST ! $CLI_1 volume add-brick $V0 replica 5 $H1:$L1/${V0}2 $H1:$L1/${V0}3 ++ ++#adding multiple bricks from same host should fail the brick order check ++TEST ! $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}{4..6} $H2:$L2/${V0}{7..9} ++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks' ++ + cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 545e688..d25fc8a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -14908,6 +14908,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, + i = 0; + ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); + ++ if (brick_count < sub_count) { ++ sub_count = brick_count; ++ } ++ + /* Check for bad brick order */ + while (i < brick_count) { + ++i; +-- +1.8.3.1 + diff --git a/SOURCES/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch b/SOURCES/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch new file mode 100644 index 0000000..034a2a2 --- /dev/null +++ b/SOURCES/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch @@ -0,0 +1,49 @@ +From 3612b3a46c33d19bb7d4aee6eb6625d8d903d459 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Wed, 17 Jun 2020 10:44:37 +0530 +Subject: [PATCH 474/478] features/locks: posixlk-clear-lock should set error + as EINTR + +Problem: +fuse on receiving interrupt for setlk sends clear-lock "fop" +using virtual-getxattr. At the moment blocked locks which are +cleared return EAGAIN errno as opposed to EINTR errno + +Fix: +Return EINTR errno. + +Upstream: +> Reviewed-on: https://review.gluster.org/24587 +> Updates: #1310 +> Change-Id: I47de0fcaec370b267f2f5f89deeb37e1b9c0ee9b +> Signed-off-by: Pranith Kumar K + +BUG: 1821743 +Change-Id: Id8301ce6e21c009949e88db5904d8b6ecc278f66 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/216157 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/features/locks/src/clear.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c +index 116aed6..ab1eac6 100644 +--- a/xlators/features/locks/src/clear.c ++++ b/xlators/features/locks/src/clear.c +@@ -181,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, + if (plock->blocked) { + bcount++; + pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW, +- &plock->user_flock, -1, EAGAIN, NULL); ++ &plock->user_flock, -1, EINTR, NULL); + +- STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN, ++ STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR, + &plock->user_flock, NULL); + + } else { +-- +1.8.3.1 + diff --git a/SOURCES/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch b/SOURCES/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch new file mode 100644 index 0000000..24a62b3 --- /dev/null +++ b/SOURCES/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch @@ -0,0 +1,46 @@ +From 47d8c316f622850d060af90d1d939528ace5607a Mon Sep 17 00:00:00 2001 +From: Csaba Henk +Date: Thu, 14 Feb 2019 02:01:38 +0100 +Subject: [PATCH 475/478] fuse lock interrupt: fix flock_interrupt.t + +Upstream: +> Reviewed-on: https://review.gluster.org/22213 +> updates: bz#1193929 +> Change-Id: I347de62755100cd69e3cf341434767ae23fd1ba4 +> Signed-off-by: Csaba Henk + +BUG: 1821743 +Change-Id: I0088f804bca215152e7ca2c490402c11f7b5333a +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/216158 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/features/flock_interrupt.t | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t +index 8603b65..964a4bc 100644 +--- a/tests/features/flock_interrupt.t ++++ b/tests/features/flock_interrupt.t +@@ -22,12 +22,12 @@ EXPECT 'Started' volinfo_field $V0 'Status'; + TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + TEST touch $M0/testfile; + +-function flock_interrupt { +- flock $MO/testfile sleep 3 & flock -w 1 $M0/testfile true; +- echo ok; +-} ++echo > got_lock ++flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok > got_lock; } & + +-EXPECT_WITHIN 2 ok flock_interrupt; ++EXPECT_WITHIN 4 ok cat got_lock; + + ## Finish up ++sleep 7; ++rm -f got_lock; + cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch b/SOURCES/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch new file mode 100644 index 0000000..6c9d736 --- /dev/null +++ b/SOURCES/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch @@ -0,0 +1,114 @@ +From 40519185067d891f06818c574301ea1af4b36479 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K +Date: Wed, 17 Jun 2020 10:45:19 +0530 +Subject: [PATCH 476/478] mount/fuse: use cookies to get fuse-interrupt-record + instead of xdata + +Problem: +On executing tests/features/flock_interrupt.t the following error log +appears +[2020-06-16 11:51:54.631072 +0000] E +[fuse-bridge.c:4791:fuse_setlk_interrupt_handler_cbk] 0-glusterfs-fuse: +interrupt record not found + +This happens because fuse-interrupt-record is never sent on the wire by +getxattr fop and there is no guarantee that in the cbk it will be +available in case of failures. + +Fix: +wind getxattr fop with fuse-interrupt-record as cookie and recover it +in the cbk + +Upstream: +> Reviewed-on: https://review.gluster.org/24588 +> Fixes: #1310 +> Change-Id: I4cfff154321a449114fc26e9440db0f08e5c7daa +> Signed-off-by: Pranith Kumar K + +BUG: 1821743 +Change-Id: If9576801654d4d743bd66ae90ca259c4d34746a7 +Signed-off-by: Csaba Henk +Reviewed-on: https://code.engineering.redhat.com/gerrit/216159 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/features/flock_interrupt.t | 1 - + xlators/mount/fuse/src/fuse-bridge.c | 28 +++++++--------------------- + 2 files changed, 7 insertions(+), 22 deletions(-) + +diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t +index 964a4bc..b8717e3 100644 +--- a/tests/features/flock_interrupt.t ++++ b/tests/features/flock_interrupt.t +@@ -28,6 +28,5 @@ flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok + EXPECT_WITHIN 4 ok cat got_lock; + + ## Finish up +-sleep 7; + rm -f got_lock; + cleanup; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index f61fa39..1bddac2 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4768,16 +4768,8 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie, + int32_t op_errno, dict_t *dict, dict_t *xdata) + { + fuse_interrupt_state_t intstat = INTERRUPT_NONE; +- fuse_interrupt_record_t *fir; ++ fuse_interrupt_record_t *fir = cookie; + fuse_state_t *state = NULL; +- int ret = 0; +- +- ret = dict_get_bin(xdata, "fuse-interrupt-record", (void **)&fir); +- if (ret < 0) { +- gf_log("glusterfs-fuse", GF_LOG_ERROR, "interrupt record not found"); +- +- goto out; +- } + + intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED; + +@@ -4789,7 +4781,6 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie, + GF_FREE(state); + } + +-out: + STACK_DESTROY(frame->root); + + return 0; +@@ -4827,9 +4818,10 @@ fuse_setlk_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir) + frame->op = GF_FOP_GETXATTR; + state->name = xattr_name; + +- STACK_WIND(frame, fuse_setlk_interrupt_handler_cbk, state->active_subvol, +- state->active_subvol->fops->fgetxattr, state->fd, xattr_name, +- state->xdata); ++ STACK_WIND_COOKIE(frame, fuse_setlk_interrupt_handler_cbk, fir, ++ state->active_subvol, ++ state->active_subvol->fops->fgetxattr, state->fd, ++ xattr_name, state->xdata); + + return; + +@@ -4852,15 +4844,9 @@ fuse_setlk_resume(fuse_state_t *state) + fir = fuse_interrupt_record_new(state->finh, fuse_setlk_interrupt_handler); + state_clone = gf_memdup(state, sizeof(*state)); + if (state_clone) { +- /* +- * Calling this allocator with fir casted to (char *) seems like +- * an abuse of this API, but in fact the API is stupid to assume +- * a (char *) argument (in the funcion it's casted to (void *) +- * anyway). +- */ +- state_clone->xdata = dict_for_key_value( +- "fuse-interrupt-record", (char *)fir, sizeof(*fir), _gf_true); ++ state_clone->xdata = dict_new(); + } ++ + if (!fir || !state_clone || !state_clone->xdata) { + if (fir) { + GF_FREE(fir); +-- +1.8.3.1 + diff --git a/SOURCES/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch b/SOURCES/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch new file mode 100644 index 0000000..c604ccd --- /dev/null +++ b/SOURCES/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch @@ -0,0 +1,51 @@ +From 3d50207b346cb5d95af94aa010ebd1ec3e795554 Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar +Date: Wed, 4 Nov 2020 11:44:51 +0530 +Subject: [PATCH 477/478] glusterd/snapshot: Snapshot prevalidation failure not + failing. + +The value of `ret` is to be set to `-1` to indicate failure +or else the prevalidation which is supposed to be a failure +as the snapshot isn't even activated for cloning will move +to next stage. + +Label: DOWNSTREAM ONLY +BUG: 1837926 + +Change-Id: I95122c3a261332630efa00033a1892a8f95fc00b +Signed-off-by: srijan-sivakumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/216920 +Tested-by: RHGS Build Bot +Reviewed-by: Shwetha Acharya +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-snapshot.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +index 5b8ae97..ee3cea0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +@@ -2298,8 +2298,8 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr, + goto out; + } + +- + if (!glusterd_is_volume_started(snap_vol)) { ++ ret = -1; + snprintf(err_str, sizeof(err_str), + "Snapshot %s is " + "not activated", +@@ -9361,7 +9361,8 @@ glusterd_handle_snapshot_fn(rpcsvc_request_t *req) + "for a snapshot"); + op_errno = EG_OPNOTSUP; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, +- "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_RHS_3_0); ++ "%s (%d < %d)", err_str, conf->op_version, ++ GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +-- +1.8.3.1 + diff --git a/SOURCES/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch b/SOURCES/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch new file mode 100644 index 0000000..596fe2b --- /dev/null +++ b/SOURCES/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch @@ -0,0 +1,119 @@ +From e772bef5631017145cd0270d72a9ada1378e022a Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman +Date: Fri, 30 Oct 2020 08:27:47 +0200 +Subject: [PATCH 478/478] DHT - Fixing rebalance failure on issuing stop + command + +Issuing a stop command for an ongoing rebalance process results in an error. +This issue was brought up in https://bugzilla.redhat.com/1286171 and a patch +(https://review.gluster.org/24103/) was submitted to resolve the issue. + +However the submitted patch resolved only part of the +problem by reducing the number of log messages that were printed (since +rebalnace is currently a recursive process, an error message was printed +for every directory) but didn't fully resolve the root cause for the +failure. + +This patch fixes the issue by modifying the code-path which handles the +termination of the rebalance process by issuing a stop command. + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/1628 +> fixes: #1627 +> Change-Id: I604f2b0f8b1ccb1026b8425a14200bbd1dc5bd03 +> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com + +BUG: 1286171 +Change-Id: I604f2b0f8b1ccb1026b8425a14200bbd1dc5bd03 +Signed-off-by: Barak Sason Rofman +Reviewed-on: https://code.engineering.redhat.com/gerrit/216896 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/dht/src/dht-rebalance.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index abc10fc..d49a719 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -3113,12 +3113,10 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + struct dht_container *tmp_container = NULL; + + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = -1; + goto out; + } + + if (dir_dfmeta->offset_var[i].readdir_done == 1) { +- ret = 0; + goto out; + } + +@@ -3135,7 +3133,6 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + &(dir_dfmeta->equeue[i]), xattr_req, NULL); + if (ret == 0) { + dir_dfmeta->offset_var[i].readdir_done = 1; +- ret = 0; + goto out; + } + +@@ -3161,7 +3158,6 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + + while (1) { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = -1; + goto out; + } + +@@ -3273,12 +3269,14 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + } + + out: +- if (ret == 0) { +- *container = tmp_container; +- } else { +- if (tmp_container) { ++ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { ++ if (ret == 0) { ++ *container = tmp_container; ++ } else { + gf_defrag_free_container(tmp_container); + } ++ } else { ++ gf_defrag_free_container(tmp_container); + } + + return ret; +@@ -3487,7 +3485,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + migrate_data, dir_dfmeta, xattr_req, + &should_commit_hash, perrno); + +- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + goto out; + } + +@@ -3947,7 +3945,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout, + migrate_data); + +- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + goto out; + } + +@@ -4015,6 +4013,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { + ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); + ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ++ goto out; ++ } ++ + if (ret && (ret != 2)) { + if (perrno == ENOENT || perrno == ESTALE) { + ret = 0; +-- +1.8.3.1 + diff --git a/SOURCES/0479-ganesha-ha-revised-regex-exprs-for-status.patch b/SOURCES/0479-ganesha-ha-revised-regex-exprs-for-status.patch new file mode 100644 index 0000000..8bbdf9d --- /dev/null +++ b/SOURCES/0479-ganesha-ha-revised-regex-exprs-for-status.patch @@ -0,0 +1,53 @@ +From 9036c9f0fd081c83c5c4fcd1ecba858421442777 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" +Date: Tue, 10 Nov 2020 07:39:14 -0500 +Subject: [PATCH 479/479] ganesha-ha: revised regex exprs for --status + +better whitespace in regex + +This has worked for years, but somehow no longer works on rhel8 + +> Updates: #1000 +> Change-Id: I2c1a3537573d125608334772ba1a263c55407dd4 +> Signed-off-by: Kaleb S. KEITHLEY +> https://github.com/gluster/glusterfs/commit/4026fe9a956238d8e4785cf39c3b7290eae90f03 + +BUG: 1895301 +Change-Id: I2c1a3537573d125608334772ba1a263c55407dd4 +Signed-off-by: Kaleb S. KEITHLEY +Reviewed-on: https://code.engineering.redhat.com/gerrit/217480 +Tested-by: RHGS Build Bot +--- + extras/ganesha/scripts/ganesha-ha.sh | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 9790a71..491c61d 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -948,18 +948,18 @@ status() + # check if the VIP and port block/unblock RAs are on the expected nodes + for n in ${nodes[*]}; do + +- grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ grep -E -x "${n}-nfs_block +\(ocf::heartbeat:portblock\): +Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) +- grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ grep -E -x "${n}-cluster_ip-1 +\(ocf::heartbeat:IPaddr\): +Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) +- grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ grep -E -x "${n}-nfs_unblock +\(ocf::heartbeat:portblock\): +Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + done + +- grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} ++ grep -E "\): +Stopped|FAILED" > /dev/null 2>&1 ${scratch} + result=$? + + if [ ${result} -eq 0 ]; then +-- +1.8.3.1 + diff --git a/SOURCES/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch b/SOURCES/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch new file mode 100644 index 0000000..31c404f --- /dev/null +++ b/SOURCES/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch @@ -0,0 +1,255 @@ +From 759c12fc016a6399bb179aa0f930602c87d1e0f8 Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman +Date: Tue, 24 Nov 2020 12:56:10 +0200 +Subject: [PATCH 480/480] DHT/Rebalance - Ensure Rebalance reports status only + once upon stopping + +Upon issuing rebalance stop command, the status of rebalance is being +logged twice to the log file, which can sometime result in an +inconsistent reports (one report states status stopped, while the other +may report something else). + +This fix ensures rebalance reports it's status only once and that the +correct status is being reported. + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/1783 +> fixes: #1782 +> Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37 +> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com + +BUG: 1286171 +Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37 +Signed-off-by: Barak Sason Rofman +Reviewed-on: https://code.engineering.redhat.com/gerrit/218953 +Tested-by: RHGS Build Bot +Reviewed-by: Csaba Henk +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + tests/bugs/distribute/bug-1286171.t | 75 +++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.c | 2 +- + xlators/cluster/dht/src/dht-common.h | 2 +- + xlators/cluster/dht/src/dht-rebalance.c | 63 ++++++++++++++------------- + 4 files changed, 108 insertions(+), 34 deletions(-) + create mode 100644 tests/bugs/distribute/bug-1286171.t + +diff --git a/tests/bugs/distribute/bug-1286171.t b/tests/bugs/distribute/bug-1286171.t +new file mode 100644 +index 0000000..a2ca36f +--- /dev/null ++++ b/tests/bugs/distribute/bug-1286171.t +@@ -0,0 +1,75 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../cluster.rc ++. $(dirname $0)/../../volume.rc ++ ++# Initialize ++#------------------------------------------------------------ ++cleanup; ++ ++volname=bug-1286171 ++ ++# Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++# Create a volume ++TEST $CLI volume create $volname $H0:$B0/${volname}{1,2} ++ ++# Verify volume creation ++EXPECT "$volname" volinfo_field $volname 'Volume Name'; ++EXPECT 'Created' volinfo_field $volname 'Status'; ++ ++# Start volume and verify successful start ++TEST $CLI volume start $volname; ++EXPECT 'Started' volinfo_field $volname 'Status'; ++TEST glusterfs --volfile-id=$volname --volfile-server=$H0 --entry-timeout=0 $M0; ++#------------------------------------------------------------ ++ ++# Create a nested dir structure and some file under MP ++cd $M0; ++for i in {1..5} ++do ++ mkdir dir$i ++ cd dir$i ++ for j in {1..5} ++ do ++ mkdir dir$i$j ++ cd dir$i$j ++ for k in {1..5} ++ do ++ mkdir dir$i$j$k ++ cd dir$i$j$k ++ touch {1..300} ++ cd .. ++ done ++ touch {1..300} ++ cd .. ++ done ++ touch {1..300} ++ cd .. ++done ++touch {1..300} ++ ++# Add-brick and start rebalance ++TEST $CLI volume add-brick $volname $H0:$B0/${volname}4; ++TEST $CLI volume rebalance $volname start; ++ ++# Let rebalance run for a while ++sleep 5 ++ ++# Stop rebalance ++TEST $CLI volume rebalance $volname stop; ++ ++# Allow rebalance to stop ++sleep 5 ++ ++# Examine the logfile for errors ++cd /var/log/glusterfs; ++failures=`grep "failures:" ${volname}-rebalance.log | tail -1 | sed 's/.*failures: //; s/,.*//'`; ++ ++TEST [ $failures == 0 ]; ++ ++cleanup; +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 23cc80c..4db89df 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -10969,7 +10969,7 @@ dht_notify(xlator_t *this, int event, void *data, ...) + if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STATUS_TIER) || + (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) +- gf_defrag_status_get(conf, output); ++ gf_defrag_status_get(conf, output, _gf_false); + else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) + gf_defrag_start_detach_tier(defrag); + else if (cmd == GF_DEFRAG_CMD_DETACH_START) +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index 9ec5b51..92f1b89 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -1252,7 +1252,7 @@ dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata); + + int +-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict); ++gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status); + + void + gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state); +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index d49a719..16ac16c 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -2720,7 +2720,6 @@ gf_defrag_migrate_single_file(void *opaque) + iatt_ptr = &entry->d_stat; + + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = -1; + goto out; + } + +@@ -3833,7 +3832,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + list_for_each_entry_safe(entry, tmp, &entries.list, list) + { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = 1; + goto out; + } + +@@ -4863,7 +4861,7 @@ out: + LOCK(&defrag->lock); + { + status = dict_new(); +- gf_defrag_status_get(conf, status); ++ gf_defrag_status_get(conf, status, _gf_true); + if (ctx && ctx->notify) + ctx->notify(GF_EN_DEFRAG_STATUS, status); + if (status) +@@ -4998,7 +4996,7 @@ out: + } + + int +-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) ++gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status) + { + int ret = 0; + uint64_t files = 0; +@@ -5095,34 +5093,35 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left"); + + log: +- switch (defrag->defrag_status) { +- case GF_DEFRAG_STATUS_NOT_STARTED: +- status = "not started"; +- break; +- case GF_DEFRAG_STATUS_STARTED: +- status = "in progress"; +- break; +- case GF_DEFRAG_STATUS_STOPPED: +- status = "stopped"; +- break; +- case GF_DEFRAG_STATUS_COMPLETE: +- status = "completed"; +- break; +- case GF_DEFRAG_STATUS_FAILED: +- status = "failed"; +- break; +- default: +- break; +- } ++ if (log_status) { ++ switch (defrag->defrag_status) { ++ case GF_DEFRAG_STATUS_NOT_STARTED: ++ status = "not started"; ++ break; ++ case GF_DEFRAG_STATUS_STARTED: ++ status = "in progress"; ++ break; ++ case GF_DEFRAG_STATUS_STOPPED: ++ status = "stopped"; ++ break; ++ case GF_DEFRAG_STATUS_COMPLETE: ++ status = "completed"; ++ break; ++ case GF_DEFRAG_STATUS_FAILED: ++ status = "failed"; ++ break; ++ default: ++ break; ++ } + +- gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, +- "Rebalance is %s. Time taken is %.2f secs", status, elapsed); +- gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, +- "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64 +- ", failures: %" PRIu64 +- ", skipped: " +- "%" PRIu64, +- files, size, lookup, failures, skipped); ++ gf_msg("DHT", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, ++ "Rebalance is %s. Time taken is %.2f secs " ++ "Files migrated: %" PRIu64 ", size: %" PRIu64 ++ ", lookups: %" PRIu64 ", failures: %" PRIu64 ++ ", skipped: " ++ "%" PRIu64, ++ status, elapsed, files, size, lookup, failures, skipped); ++ } + out: + return 0; + } +@@ -5299,7 +5298,7 @@ gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output) + defrag->defrag_status = status; + + if (output) +- gf_defrag_status_get(conf, output); ++ gf_defrag_status_get(conf, output, _gf_false); + ret = 0; + out: + gf_msg_debug("", 0, "Returning %d", ret); +-- +1.8.3.1 + diff --git a/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch b/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch new file mode 100644 index 0000000..dd9b0ab --- /dev/null +++ b/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch @@ -0,0 +1,33 @@ +From 346aa7cbc34b9bbbaca45180215a4d9ffd5055df Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya +Date: Fri, 19 Feb 2021 06:19:07 +0000 +Subject: [PATCH 481/481] RHGS-3.5.3 rebuild to ship with RHEL. + +Label: DOWNSTREAM ONLY +BUG: 1930561 + +Change-Id: I9c7f30cc6bc616344b27072bfde056c7bba1e143 +Signed-off-by: Rinku Kothiya +Reviewed-on: https://code.engineering.redhat.com/gerrit/228413 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfs.spec.in | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 30d7162..52f9b40 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1983,6 +1983,8 @@ fi + %endif + + %changelog ++* Fri Feb 19 2021 Rinku Kothiya ++- Build RGHS clients for RHEL (#1930561) + + * Mon May 11 2020 Sunny Kumar + - added requires policycoreutils-python-utils on rhel8 for geo-replication +-- +1.8.3.1 + diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec index fef1771..905084f 100644 --- a/SPECS/glusterfs.spec +++ b/SPECS/glusterfs.spec @@ -237,7 +237,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 40%{?dist} +Release: 49.1%{?dist} ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ @@ -771,6 +771,31 @@ Patch0453: 0453-glusterd-add-brick-command-failure.patch Patch0454: 0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch Patch0455: 0455-locks-prevent-deletion-of-locked-entries.patch Patch0456: 0456-add-clean-local-after-grant-lock.patch +Patch0457: 0457-cluster-ec-Improve-detection-of-new-heals.patch +Patch0458: 0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch +Patch0459: 0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch +Patch0460: 0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch +Patch0461: 0461-geo-replication-Fix-IPv6-parsing.patch +Patch0462: 0462-Issue-with-gf_fill_iatt_for_dirent.patch +Patch0463: 0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch +Patch0464: 0464-storage-posix-Remove-nr_files-usage.patch +Patch0465: 0465-posix-Implement-a-janitor-thread-to-close-fd.patch +Patch0466: 0466-cluster-ec-Change-stale-index-handling.patch +Patch0467: 0467-build-Added-dependency-for-glusterfs-selinux.patch +Patch0468: 0468-build-Update-the-glusterfs-selinux-version.patch +Patch0469: 0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch +Patch0470: 0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch +Patch0471: 0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch +Patch0472: 0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch +Patch0473: 0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch +Patch0474: 0474-features-locks-posixlk-clear-lock-should-set-error-a.patch +Patch0475: 0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch +Patch0476: 0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch +Patch0477: 0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch +Patch0478: 0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch +Patch0479: 0479-ganesha-ha-revised-regex-exprs-for-status.patch +Patch0480: 0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch +Patch0481: 0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -1108,6 +1133,9 @@ Summary: Clustered file-system server Requires: %{name}%{?_isa} = %{version}-%{release} Requires: %{name}-cli%{?_isa} = %{version}-%{release} Requires: %{name}-libs%{?_isa} = %{version}-%{release} +%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) ) +Requires: glusterfs-selinux >= 1.0-1 +%endif # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse Requires: %{name}-fuse%{?_isa} = %{version}-%{release} # self-heal daemon, rebalance, nfs-server etc. are actually clients @@ -2511,6 +2539,36 @@ fi %endif %changelog +* Fri Feb 19 2021 Gluster Jenkins - 6.0-49.1 +- fixes bugs bz#1930561 + +* Wed Nov 25 2020 Gluster Jenkins - 6.0-49 +- fixes bugs bz#1286171 + +* Tue Nov 10 2020 Gluster Jenkins - 6.0-48 +- fixes bugs bz#1895301 + +* Thu Nov 05 2020 Gluster Jenkins - 6.0-47 +- fixes bugs bz#1286171 bz#1821743 bz#1837926 + +* Wed Oct 21 2020 Gluster Jenkins - 6.0-46 +- fixes bugs bz#1873469 bz#1881823 + +* Wed Sep 09 2020 Deepshikha Khandelwal - 6.0-45 +- fixes bugs bz#1785714 + +* Thu Sep 03 2020 Deepshikha Khandelwal - 6.0-44 +- fixes bugs bz#1460657 + +* Thu Sep 03 2020 Deepshikha Khandelwal - 6.0-43 +- fixes bugs bz#1460657 + +* Wed Sep 02 2020 Deepshikha Khandelwal - 6.0-42 +- fixes bugs bz#1785714 + +* Tue Aug 25 2020 Deepshikha Khandelwal - 6.0-41 +- fixes bugs bz#1785714 bz#1851424 bz#1851989 bz#1852736 bz#1853189 bz#1855966 + * Tue Jul 21 2020 Deepshikha Khandelwal - 6.0-40 - fixes bugs bz#1812789 bz#1844359 bz#1847081 bz#1854165