autobuild v6.0-41
Resolves: bz#1785714 bz#1851424 bz#1851989 bz#1852736 bz#1853189 Resolves: bz#1855966 Signed-off-by: Deepshikha Khandelwal <dkhandel@redhat.com>
This commit is contained in:
parent
71080da8dd
commit
bd380b90dd
409
0457-cluster-ec-Improve-detection-of-new-heals.patch
Normal file
409
0457-cluster-ec-Improve-detection-of-new-heals.patch
Normal file
@ -0,0 +1,409 @@
|
||||
From 3e8b3a2c2c6f83635486035fc8040c87d89813d2 Mon Sep 17 00:00:00 2001
|
||||
From: Xavi Hernandez <xhernandez@redhat.com>
|
||||
Date: Thu, 2 Jul 2020 18:08:52 +0200
|
||||
Subject: [PATCH 457/465] cluster/ec: Improve detection of new heals
|
||||
|
||||
When EC successfully healed a directory it assumed that maybe other
|
||||
entries inside that directory could have been created, which could
|
||||
require additional heal cycles. For this reason, when the heal happened
|
||||
as part of one index heal iteration, it triggered a new iteration.
|
||||
|
||||
The problem happened when the directory was healthy, so no new entries
|
||||
were added, but its index entry was not removed for some reason. In
|
||||
this case self-heal started and endless loop healing the same directory
|
||||
continuously, cause high CPU utilization.
|
||||
|
||||
This patch improves detection of new files added to the heal index so
|
||||
that a new index heal iteration is only triggered if there is new work
|
||||
to do.
|
||||
|
||||
>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24665/
|
||||
>Fixes: #1354
|
||||
|
||||
Change-Id: I2355742b85fbfa6de758bccc5d2e1a283c82b53f
|
||||
BUG: 1852736
|
||||
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208041
|
||||
Tested-by: Ashish Pandey <aspandey@redhat.com>
|
||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||||
Reviewed-by: Ashish Pandey <aspandey@redhat.com>
|
||||
---
|
||||
xlators/cluster/ec/src/ec-common.c | 2 +-
|
||||
xlators/cluster/ec/src/ec-heal.c | 58 +++++++++++++++++++++++-----------
|
||||
xlators/cluster/ec/src/ec-heald.c | 24 ++++++++++----
|
||||
xlators/cluster/ec/src/ec-inode-read.c | 27 ++++++++++++++--
|
||||
xlators/cluster/ec/src/ec-types.h | 4 +--
|
||||
xlators/cluster/ec/src/ec.h | 1 +
|
||||
6 files changed, 86 insertions(+), 30 deletions(-)
|
||||
|
||||
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
|
||||
index e580bfb..e3f8769 100644
|
||||
--- a/xlators/cluster/ec/src/ec-common.c
|
||||
+++ b/xlators/cluster/ec/src/ec-common.c
|
||||
@@ -230,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
|
||||
int32_t
|
||||
ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
|
||||
- uintptr_t bad, dict_t *xdata)
|
||||
+ uintptr_t bad, uint32_t pending, dict_t *xdata)
|
||||
{
|
||||
if (op_ret < 0) {
|
||||
gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
|
||||
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
|
||||
index 06a7016..e2de879 100644
|
||||
--- a/xlators/cluster/ec/src/ec-heal.c
|
||||
+++ b/xlators/cluster/ec/src/ec-heal.c
|
||||
@@ -72,6 +72,7 @@ struct ec_name_data {
|
||||
char *name;
|
||||
inode_t *parent;
|
||||
default_args_cbk_t *replies;
|
||||
+ uint32_t heal_pending;
|
||||
};
|
||||
|
||||
static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
|
||||
@@ -996,6 +997,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
|
||||
ret = -ENOTCONN;
|
||||
goto out;
|
||||
}
|
||||
+
|
||||
out:
|
||||
if (xattr)
|
||||
dict_unref(xattr);
|
||||
@@ -1164,6 +1166,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
|
||||
dict_t *xdata = NULL;
|
||||
char *linkname = NULL;
|
||||
ec_config_t config;
|
||||
+
|
||||
/* There should be just one gfid key */
|
||||
EC_REPLIES_ALLOC(replies, ec->nodes);
|
||||
if (gfid_db->count != 1) {
|
||||
@@ -1408,6 +1411,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
|
||||
|
||||
ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
|
||||
participants);
|
||||
+ if (ret >= 0) {
|
||||
+ /* If ec_create_name() succeeded we return 1 to indicate that a new
|
||||
+ * file has been created and it will need to be healed. */
|
||||
+ ret = 1;
|
||||
+ }
|
||||
out:
|
||||
cluster_replies_wipe(replies, ec->nodes);
|
||||
loc_wipe(&loc);
|
||||
@@ -1485,18 +1493,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
|
||||
ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
|
||||
name_on);
|
||||
|
||||
- if (ret < 0)
|
||||
+ if (ret < 0) {
|
||||
memset(name_on, 0, ec->nodes);
|
||||
+ } else {
|
||||
+ name_data->heal_pending += ret;
|
||||
+ }
|
||||
|
||||
for (i = 0; i < ec->nodes; i++)
|
||||
if (name_data->participants[i] && !name_on[i])
|
||||
name_data->failed_on[i] = 1;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
|
||||
- unsigned char *participants)
|
||||
+ unsigned char *participants, uint32_t *pending)
|
||||
{
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
@@ -1509,7 +1521,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
|
||||
name_data.frame = frame;
|
||||
name_data.participants = participants;
|
||||
name_data.failed_on = alloca0(ec->nodes);
|
||||
- ;
|
||||
+ name_data.heal_pending = 0;
|
||||
|
||||
for (i = 0; i < ec->nodes; i++) {
|
||||
if (!participants[i])
|
||||
@@ -1528,6 +1540,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
|
||||
break;
|
||||
}
|
||||
}
|
||||
+ *pending += name_data.heal_pending;
|
||||
+
|
||||
loc_wipe(&loc);
|
||||
return ret;
|
||||
}
|
||||
@@ -1535,7 +1549,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
|
||||
int
|
||||
__ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
|
||||
unsigned char *heal_on, unsigned char *sources,
|
||||
- unsigned char *healed_sinks)
|
||||
+ unsigned char *healed_sinks, uint32_t *pending)
|
||||
{
|
||||
unsigned char *locked_on = NULL;
|
||||
unsigned char *output = NULL;
|
||||
@@ -1580,7 +1594,7 @@ unlock:
|
||||
if (sources[i] || healed_sinks[i])
|
||||
participants[i] = 1;
|
||||
}
|
||||
- ret = ec_heal_names(frame, ec, inode, participants);
|
||||
+ ret = ec_heal_names(frame, ec, inode, participants, pending);
|
||||
|
||||
if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
|
||||
goto out;
|
||||
@@ -1601,7 +1615,8 @@ out:
|
||||
|
||||
int
|
||||
ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
|
||||
- unsigned char *sources, unsigned char *healed_sinks)
|
||||
+ unsigned char *sources, unsigned char *healed_sinks,
|
||||
+ uint32_t *pending)
|
||||
{
|
||||
unsigned char *locked_on = NULL;
|
||||
unsigned char *up_subvols = NULL;
|
||||
@@ -1632,7 +1647,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
|
||||
goto unlock;
|
||||
}
|
||||
ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
|
||||
- healed_sinks);
|
||||
+ healed_sinks, pending);
|
||||
}
|
||||
unlock:
|
||||
cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
|
||||
@@ -1953,14 +1968,14 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
|
||||
if (fop->cbks.heal) {
|
||||
fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0,
|
||||
(heal->good | heal->bad), heal->good, heal->bad,
|
||||
- NULL);
|
||||
+ 0, NULL);
|
||||
}
|
||||
|
||||
return EC_STATE_END;
|
||||
case -EC_STATE_REPORT:
|
||||
if (fop->cbks.heal) {
|
||||
- fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0,
|
||||
- 0, 0, NULL);
|
||||
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
|
||||
+ fop->error, 0, 0, 0, 0, NULL);
|
||||
}
|
||||
|
||||
return EC_STATE_END;
|
||||
@@ -1997,14 +2012,15 @@ out:
|
||||
if (fop != NULL) {
|
||||
ec_manager(fop, error);
|
||||
} else {
|
||||
- func(frame, NULL, this, -1, error, 0, 0, 0, NULL);
|
||||
+ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
int32_t op_ret, int32_t op_errno, uintptr_t mask,
|
||||
- uintptr_t good, uintptr_t bad, dict_t *xdata)
|
||||
+ uintptr_t good, uintptr_t bad, uint32_t pending,
|
||||
+ dict_t *xdata)
|
||||
{
|
||||
ec_fop_data_t *fop = cookie;
|
||||
ec_heal_t *heal = fop->data;
|
||||
@@ -2489,6 +2505,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
||||
intptr_t mbad = 0;
|
||||
intptr_t good = 0;
|
||||
intptr_t bad = 0;
|
||||
+ uint32_t pending = 0;
|
||||
ec_fop_data_t *fop = data;
|
||||
gf_boolean_t blocking = _gf_false;
|
||||
ec_heal_need_t need_heal = EC_HEAL_NONEED;
|
||||
@@ -2524,7 +2541,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
||||
if (loc->name && strlen(loc->name)) {
|
||||
ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
|
||||
participants);
|
||||
- if (ret == 0) {
|
||||
+ if (ret >= 0) {
|
||||
gf_msg_debug(this->name, 0,
|
||||
"%s: name heal "
|
||||
"successful on %" PRIXPTR,
|
||||
@@ -2542,7 +2559,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
||||
|
||||
/* Mount triggers heal only when it detects that it must need heal, shd
|
||||
* triggers heals periodically which need not be thorough*/
|
||||
- if (ec->shd.iamshd) {
|
||||
+ if (ec->shd.iamshd && (ret <= 0)) {
|
||||
ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
|
||||
&need_heal);
|
||||
|
||||
@@ -2552,13 +2569,15 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
+
|
||||
sources = alloca0(ec->nodes);
|
||||
healed_sinks = alloca0(ec->nodes);
|
||||
if (IA_ISREG(loc->inode->ia_type)) {
|
||||
ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
|
||||
healed_sinks);
|
||||
} else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
|
||||
- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks);
|
||||
+ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
|
||||
+ &pending);
|
||||
} else {
|
||||
ret = 0;
|
||||
memcpy(sources, participants, ec->nodes);
|
||||
@@ -2588,10 +2607,11 @@ out:
|
||||
if (fop->cbks.heal) {
|
||||
fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
|
||||
ec_char_array_to_mask(participants, ec->nodes),
|
||||
- mgood & good, mbad & bad, NULL);
|
||||
+ mgood & good, mbad & bad, pending, NULL);
|
||||
}
|
||||
if (frame)
|
||||
STACK_DESTROY(frame->root);
|
||||
+
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2638,8 +2658,8 @@ void
|
||||
ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
|
||||
{
|
||||
if (fop->cbks.heal) {
|
||||
- fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0,
|
||||
- NULL);
|
||||
+ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
|
||||
+ 0, 0, NULL);
|
||||
}
|
||||
ec_fop_data_release(fop);
|
||||
}
|
||||
@@ -2826,7 +2846,7 @@ fail:
|
||||
if (fop)
|
||||
ec_fop_data_release(fop);
|
||||
if (func)
|
||||
- func(frame, NULL, this, -1, err, 0, 0, 0, NULL);
|
||||
+ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
|
||||
}
|
||||
|
||||
int
|
||||
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
|
||||
index cba111a..4f4b6aa 100644
|
||||
--- a/xlators/cluster/ec/src/ec-heald.c
|
||||
+++ b/xlators/cluster/ec/src/ec-heald.c
|
||||
@@ -156,15 +156,27 @@ int
|
||||
ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
|
||||
gf_boolean_t full)
|
||||
{
|
||||
+ dict_t *xdata = NULL;
|
||||
+ uint32_t count;
|
||||
int32_t ret;
|
||||
|
||||
- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL);
|
||||
- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
|
||||
+ ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata);
|
||||
+ if (!full && (loc->inode->ia_type == IA_IFDIR)) {
|
||||
/* If we have just healed a directory, it's possible that
|
||||
- * other index entries have appeared to be healed. We put a
|
||||
- * mark so that we can check it later and restart a scan
|
||||
- * without delay. */
|
||||
- healer->rerun = _gf_true;
|
||||
+ * other index entries have appeared to be healed. */
|
||||
+ if ((xdata != NULL) &&
|
||||
+ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
|
||||
+ (count > 0)) {
|
||||
+ /* Force a rerun of the index healer. */
|
||||
+ gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
|
||||
+ count);
|
||||
+
|
||||
+ healer->rerun = _gf_true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (xdata != NULL) {
|
||||
+ dict_unref(xdata);
|
||||
}
|
||||
|
||||
return ret;
|
||||
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
|
||||
index f87a94a..e82e8f6 100644
|
||||
--- a/xlators/cluster/ec/src/ec-inode-read.c
|
||||
+++ b/xlators/cluster/ec/src/ec-inode-read.c
|
||||
@@ -393,7 +393,8 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state)
|
||||
int32_t
|
||||
ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
|
||||
int32_t op_ret, int32_t op_errno, uintptr_t mask,
|
||||
- uintptr_t good, uintptr_t bad, dict_t *xdata)
|
||||
+ uintptr_t good, uintptr_t bad, uint32_t pending,
|
||||
+ dict_t *xdata)
|
||||
{
|
||||
ec_fop_data_t *fop = cookie;
|
||||
fop_getxattr_cbk_t func = fop->data;
|
||||
@@ -402,6 +403,25 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
|
||||
char *str;
|
||||
char bin1[65], bin2[65];
|
||||
|
||||
+ /* We try to return the 'pending' information in xdata, but if this cannot
|
||||
+ * be set, we will ignore it silently. We prefer to report the success or
|
||||
+ * failure of the heal itself. */
|
||||
+ if (xdata == NULL) {
|
||||
+ xdata = dict_new();
|
||||
+ } else {
|
||||
+ dict_ref(xdata);
|
||||
+ }
|
||||
+ if (xdata != NULL) {
|
||||
+ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) {
|
||||
+ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc
|
||||
+ * enforces to check the result in this case. However we don't
|
||||
+ * really care if it succeeded or not. We'll just do the same.
|
||||
+ *
|
||||
+ * This empty 'if' avoids the warning, and it will be removed by
|
||||
+ * the optimizer. */
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
if (op_ret >= 0) {
|
||||
dict = dict_new();
|
||||
if (dict == NULL) {
|
||||
@@ -435,11 +455,14 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
|
||||
}
|
||||
|
||||
out:
|
||||
- func(frame, NULL, xl, op_ret, op_errno, dict, NULL);
|
||||
+ func(frame, NULL, xl, op_ret, op_errno, dict, xdata);
|
||||
|
||||
if (dict != NULL) {
|
||||
dict_unref(dict);
|
||||
}
|
||||
+ if (xdata != NULL) {
|
||||
+ dict_unref(xdata);
|
||||
+ }
|
||||
|
||||
return 0;
|
||||
}
|
||||
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
|
||||
index 34a9768..f15429d 100644
|
||||
--- a/xlators/cluster/ec/src/ec-types.h
|
||||
+++ b/xlators/cluster/ec/src/ec-types.h
|
||||
@@ -186,10 +186,10 @@ struct _ec_inode {
|
||||
|
||||
typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
|
||||
int32_t, uintptr_t, uintptr_t, uintptr_t,
|
||||
- dict_t *);
|
||||
+ uint32_t, dict_t *);
|
||||
typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
|
||||
int32_t, uintptr_t, uintptr_t, uintptr_t,
|
||||
- dict_t *);
|
||||
+ uint32_t, dict_t *);
|
||||
|
||||
union _ec_cbk {
|
||||
fop_access_cbk_t access;
|
||||
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
|
||||
index 1b210d9..6f6de6d 100644
|
||||
--- a/xlators/cluster/ec/src/ec.h
|
||||
+++ b/xlators/cluster/ec/src/ec.h
|
||||
@@ -18,6 +18,7 @@
|
||||
#define EC_XATTR_SIZE EC_XATTR_PREFIX "size"
|
||||
#define EC_XATTR_VERSION EC_XATTR_PREFIX "version"
|
||||
#define EC_XATTR_HEAL EC_XATTR_PREFIX "heal"
|
||||
+#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new"
|
||||
#define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty"
|
||||
#define EC_STRIPE_CACHE_MAX_SIZE 10
|
||||
#define EC_VERSION_SIZE 2
|
||||
--
|
||||
1.8.3.1
|
||||
|
182
0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch
Normal file
182
0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch
Normal file
@ -0,0 +1,182 @@
|
||||
From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001
|
||||
From: Raghavendra Bhat <raghavendra@redhat.com>
|
||||
Date: Mon, 15 Apr 2019 14:09:34 -0400
|
||||
Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after
|
||||
cancelling the signer thread
|
||||
|
||||
When bit-rot feature is disabled, the signer thread from the bit-rot-stub
|
||||
xlator (the thread which performs the setxattr of the signature on to the
|
||||
disk) is cancelled. But, if the cancelled signer thread had already held
|
||||
the mutex (&priv->lock) which it uses to monitor the queue of files to
|
||||
be signed, then the mutex is never released. This creates problems in
|
||||
future when the feature is enabled again. Both the new instance of the
|
||||
signer thread and the regular thread which enqueues the files to be
|
||||
signed will be blocked on this mutex.
|
||||
|
||||
So, as part of cancelling the signer thread, unlock the mutex associated
|
||||
with it as well using pthread_cleanup_push and pthread_cleanup_pop.
|
||||
|
||||
Upstream patch:
|
||||
> patch: https://review.gluster.org/22572
|
||||
> fixes: #bz1700078
|
||||
> Change-Id: Ib761910caed90b268e69794ddeb108165487af40
|
||||
|
||||
Change-Id: Ib761910caed90b268e69794ddeb108165487af40
|
||||
BUG: 1851424
|
||||
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
|
||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208304
|
||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||||
---
|
||||
.../bit-rot/src/stub/bit-rot-stub-messages.h | 4 +-
|
||||
xlators/features/bit-rot/src/stub/bit-rot-stub.c | 62 +++++++++++++++++++---
|
||||
2 files changed, 59 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
|
||||
index 7f07f29..155802b 100644
|
||||
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
|
||||
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
|
||||
@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
|
||||
BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
|
||||
BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
|
||||
BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
|
||||
- BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
|
||||
+ BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
|
||||
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
|
||||
+ BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL);
|
||||
|
||||
#endif /* !_BITROT_STUB_MESSAGES_H_ */
|
||||
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
||||
index 3f48a4b..c3f81bc 100644
|
||||
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
||||
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
||||
@@ -26,6 +26,15 @@
|
||||
|
||||
#define BR_STUB_REQUEST_COOKIE 0x1
|
||||
|
||||
+void
|
||||
+br_stub_lock_cleaner(void *arg)
|
||||
+{
|
||||
+ pthread_mutex_t *clean_mutex = arg;
|
||||
+
|
||||
+ pthread_mutex_unlock(clean_mutex);
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
void *
|
||||
br_stub_signth(void *);
|
||||
|
||||
@@ -166,8 +175,11 @@ init(xlator_t *this)
|
||||
|
||||
ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
|
||||
"brssign");
|
||||
- if (ret != 0)
|
||||
+ if (ret != 0) {
|
||||
+ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
|
||||
+ "failed to create the new thread for signer");
|
||||
goto cleanup_lock;
|
||||
+ }
|
||||
|
||||
ret = br_stub_bad_object_container_init(this, priv);
|
||||
if (ret) {
|
||||
@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options)
|
||||
priv = this->private;
|
||||
|
||||
GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
|
||||
- if (priv->do_versioning) {
|
||||
+ if (priv->do_versioning && !priv->signth) {
|
||||
ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
|
||||
"brssign");
|
||||
- if (ret != 0)
|
||||
+ if (ret != 0) {
|
||||
+ gf_msg(this->name, GF_LOG_WARNING, 0,
|
||||
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED,
|
||||
+ "failed to create the new thread for signer");
|
||||
goto err;
|
||||
+ }
|
||||
|
||||
ret = br_stub_bad_object_container_init(this, priv);
|
||||
if (ret) {
|
||||
@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options)
|
||||
gf_msg(this->name, GF_LOG_ERROR, 0,
|
||||
BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
|
||||
"Could not cancel sign serializer thread");
|
||||
+ } else {
|
||||
+ gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
|
||||
+ "killed the signer thread");
|
||||
+ priv->signth = 0;
|
||||
}
|
||||
- priv->signth = 0;
|
||||
}
|
||||
|
||||
if (priv->container.thread) {
|
||||
@@ -902,6 +921,24 @@ br_stub_signth(void *arg)
|
||||
|
||||
THIS = this;
|
||||
while (1) {
|
||||
+ /*
|
||||
+ * Disabling bit-rot feature leads to this particular thread
|
||||
+ * getting cleaned up by reconfigure via a call to the function
|
||||
+ * gf_thread_cleanup_xint (which in turn calls pthread_cancel
|
||||
+ * and pthread_join). But, if this thread had held the mutex
|
||||
+ * &priv->lock at the time of cancellation, then it leads to
|
||||
+ * deadlock in future when bit-rot feature is enabled (which
|
||||
+ * again spawns this thread which cant hold the lock as the
|
||||
+ * mutex is still held by the previous instance of the thread
|
||||
+ * which got killed). Also, the br_stub_handle_object_signature
|
||||
+ * function which is called whenever file has to be signed
|
||||
+ * also gets blocked as it too attempts to acquire &priv->lock.
|
||||
+ *
|
||||
+ * So, arrange for the lock to be unlocked as part of the
|
||||
+ * cleanup of this thread using pthread_cleanup_push and
|
||||
+ * pthread_cleanup_pop.
|
||||
+ */
|
||||
+ pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
|
||||
pthread_mutex_lock(&priv->lock);
|
||||
{
|
||||
while (list_empty(&priv->squeue))
|
||||
@@ -912,6 +949,7 @@ br_stub_signth(void *arg)
|
||||
list_del_init(&sigstub->list);
|
||||
}
|
||||
pthread_mutex_unlock(&priv->lock);
|
||||
+ pthread_cleanup_pop(0);
|
||||
|
||||
call_resume(sigstub->stub);
|
||||
|
||||
@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
|
||||
|
||||
priv = this->private;
|
||||
|
||||
- if (frame->root->pid != GF_CLIENT_PID_BITD)
|
||||
+ if (frame->root->pid != GF_CLIENT_PID_BITD) {
|
||||
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
|
||||
+ "PID %d from where signature request"
|
||||
+ "came, does not belong to bit-rot daemon."
|
||||
+ "Unwinding the fop",
|
||||
+ frame->root->pid);
|
||||
goto dofop;
|
||||
+ }
|
||||
|
||||
ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
|
||||
- if (ret)
|
||||
+ if (ret) {
|
||||
+ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
|
||||
+ "failed to prepare the signature for %s. Unwinding the fop",
|
||||
+ uuid_utoa(fd->inode->gfid));
|
||||
goto dofop;
|
||||
+ }
|
||||
if (fakesuccess) {
|
||||
op_ret = op_errno = 0;
|
||||
goto dofop;
|
||||
@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
|
||||
/* object signature request */
|
||||
ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign);
|
||||
if (!ret) {
|
||||
+ gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
|
||||
+ uuid_utoa(fd->inode->gfid));
|
||||
br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
|
||||
goto done;
|
||||
}
|
||||
--
|
||||
1.8.3.1
|
||||
|
181
0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch
Normal file
181
0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch
Normal file
@ -0,0 +1,181 @@
|
||||
From 80eef2f52bb92ed740ac00eeb11ee7a3e7fffff2 Mon Sep 17 00:00:00 2001
|
||||
From: Raghavendra Bhat <raghavendra@redhat.com>
|
||||
Date: Mon, 11 Mar 2019 12:16:50 -0400
|
||||
Subject: [PATCH 459/465] features/bit-rot: Unconditionally sign the files
|
||||
during oneshot crawl
|
||||
|
||||
Currently bit-rot feature has an issue with disabling and reenabling it
|
||||
on the same volume. Consider enabling bit-rot detection which goes on to
|
||||
crawl and sign all the files present in the volume. Then some files are
|
||||
modified and the bit-rot daemon goes on to sign the modified files with
|
||||
the correct signature. Now, disable bit-rot feature. While, signing and
|
||||
scrubbing are not happening, previous checksums of the files continue to
|
||||
exist as extended attributes. Now, if some files with checksum xattrs get
|
||||
modified, they are not signed with new signature as the feature is off.
|
||||
|
||||
At this point, if the feature is enabled again, the bit rot daemon will
|
||||
go and sign those files which does not have any bit-rot specific xattrs
|
||||
(i.e. those files which were created after bit-rot was disabled). Whereas
|
||||
the files with bit-rot xattrs wont get signed with proper new checksum.
|
||||
At this point if scrubber runs, it finds the on disk checksum and the actual
|
||||
checksum of the file to be different (because the file got modified) and
|
||||
marks the file as corrupted.
|
||||
|
||||
FIX:
|
||||
|
||||
The fix is to unconditionally sign the files when the bit-rot daemon
|
||||
comes up (instead of skipping the files with bit-rot xattrs).
|
||||
|
||||
upstream fix:
|
||||
> patch: https://review.gluster.org/#/c/glusterfs/+/22360/
|
||||
> fixes: #bz1700078
|
||||
> Change-ID: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
|
||||
|
||||
Change-Id: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
|
||||
BUG: 1851424
|
||||
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
|
||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208305
|
||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||||
---
|
||||
tests/bitrot/bug-1700078.t | 87 +++++++++++++++++++++++++++++
|
||||
xlators/features/bit-rot/src/bitd/bit-rot.c | 15 ++++-
|
||||
2 files changed, 101 insertions(+), 1 deletion(-)
|
||||
create mode 100644 tests/bitrot/bug-1700078.t
|
||||
|
||||
diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t
|
||||
new file mode 100644
|
||||
index 0000000..f273742
|
||||
--- /dev/null
|
||||
+++ b/tests/bitrot/bug-1700078.t
|
||||
@@ -0,0 +1,87 @@
|
||||
+#!/bin/bash
|
||||
+
|
||||
+. $(dirname $0)/../include.rc
|
||||
+. $(dirname $0)/../volume.rc
|
||||
+
|
||||
+cleanup;
|
||||
+
|
||||
+## Start glusterd
|
||||
+TEST glusterd;
|
||||
+TEST pidof glusterd;
|
||||
+
|
||||
+## Lets create and start the volume
|
||||
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
|
||||
+TEST $CLI volume start $V0
|
||||
+
|
||||
+## Enable bitrot for volume $V0
|
||||
+TEST $CLI volume bitrot $V0 enable
|
||||
+
|
||||
+## Turn off quick-read so that it wont cache the contents
|
||||
+# of the file in lookup. For corrupted files, it might
|
||||
+# end up in reads being served from the cache instead of
|
||||
+# an error.
|
||||
+TEST $CLI volume set $V0 performance.quick-read off
|
||||
+
|
||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
|
||||
+
|
||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
|
||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
|
||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
|
||||
+
|
||||
+## Set expiry-timeout to 1 sec
|
||||
+TEST $CLI volume set $V0 features.expiry-time 1
|
||||
+
|
||||
+##Mount $V0
|
||||
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
|
||||
+
|
||||
+## Turn off quick-read xlator so that, the contents are not served from the
|
||||
+# quick-read cache.
|
||||
+TEST $CLI volume set $V0 performance.quick-read off
|
||||
+
|
||||
+#Create sample file
|
||||
+TEST `echo "1234" > $M0/FILE1`
|
||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
|
||||
+
|
||||
+##disable bitrot
|
||||
+TEST $CLI volume bitrot $V0 disable
|
||||
+
|
||||
+## modify the file
|
||||
+TEST `echo "write" >> $M0/FILE1`
|
||||
+
|
||||
+# unmount and remount when the file has to be accessed.
|
||||
+# This is to ensure that, when the remount happens,
|
||||
+# and the file is read, its contents are served from the
|
||||
+# brick instead of cache.
|
||||
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
||||
+
|
||||
+##enable bitrot
|
||||
+TEST $CLI volume bitrot $V0 enable
|
||||
+
|
||||
+# expiry time is set to 1 second. Hence sleep for 2 seconds for the
|
||||
+# oneshot crawler to finish its crawling and sign the file properly.
|
||||
+sleep 2
|
||||
+
|
||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
|
||||
+
|
||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
|
||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
|
||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
|
||||
+
|
||||
+## Ondemand scrub
|
||||
+TEST $CLI volume bitrot $V0 scrub ondemand
|
||||
+
|
||||
+# the scrub ondemand CLI command, just ensures that
|
||||
+# the scrubber has received the ondemand scrub directive
|
||||
+# and started. sleep for 2 seconds for scrubber to finish
|
||||
+# crawling and marking file(s) as bad (if if finds that
|
||||
+# corruption has happened) which are filesystem operations.
|
||||
+sleep 2
|
||||
+
|
||||
+TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1
|
||||
+
|
||||
+##Mount $V0
|
||||
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
|
||||
+
|
||||
+TEST cat $M0/FILE1
|
||||
+
|
||||
+cleanup;
|
||||
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
|
||||
index b8feef7..424c0d5 100644
|
||||
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
|
||||
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
|
||||
@@ -973,6 +973,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
|
||||
int32_t ret = -1;
|
||||
inode_t *linked_inode = NULL;
|
||||
gf_boolean_t need_signing = _gf_false;
|
||||
+ gf_boolean_t need_reopen = _gf_true;
|
||||
|
||||
GF_VALIDATE_OR_GOTO("bit-rot", subvol, out);
|
||||
GF_VALIDATE_OR_GOTO("bit-rot", data, out);
|
||||
@@ -1046,6 +1047,18 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
|
||||
uuid_utoa(linked_inode->gfid));
|
||||
} else {
|
||||
need_signing = br_check_object_need_sign(this, xattr, child);
|
||||
+
|
||||
+ /*
|
||||
+ * If we are here means, bitrot daemon has started. Is it just
|
||||
+ * a simple restart of the daemon or is it started because the
|
||||
+ * feature is enabled is something hard to determine. Hence,
|
||||
+ * if need_signing is false (because bit-rot version and signature
|
||||
+ * are present), then still go ahead and sign it.
|
||||
+ */
|
||||
+ if (!need_signing) {
|
||||
+ need_signing = _gf_true;
|
||||
+ need_reopen = _gf_true;
|
||||
+ }
|
||||
}
|
||||
|
||||
if (!need_signing)
|
||||
@@ -1054,7 +1067,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
|
||||
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN,
|
||||
"Triggering signing for %s [GFID: %s | Brick: %s]", loc.path,
|
||||
uuid_utoa(linked_inode->gfid), child->brick_path);
|
||||
- br_trigger_sign(this, child, linked_inode, &loc, _gf_true);
|
||||
+ br_trigger_sign(this, child, linked_inode, &loc, need_reopen);
|
||||
|
||||
ret = 0;
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
152
0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch
Normal file
152
0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch
Normal file
@ -0,0 +1,152 @@
|
||||
From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001
|
||||
From: Ashish Pandey <aspandey@redhat.com>
|
||||
Date: Thu, 23 Jul 2020 11:07:32 +0530
|
||||
Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop
|
||||
folder
|
||||
|
||||
Problem:
|
||||
If a gfid is present in indices/xattrop folder while
|
||||
the file/dir is actaully healthy and all the xattrs are healthy,
|
||||
it causes lot of lookups by shd on an entry which does not need
|
||||
to be healed.
|
||||
This whole process eats up lot of CPU usage without doing meaningful
|
||||
work.
|
||||
|
||||
Solution:
|
||||
Set trusted.ec.dirty xattr of the entry so that actual heal process
|
||||
happens and at the end of it, during unset of dirty, gfid enrty from
|
||||
indices/xattrop will be removed.
|
||||
|
||||
>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/
|
||||
>Fixes: #1385
|
||||
|
||||
Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b
|
||||
BUG: 1785714
|
||||
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
|
||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208591
|
||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||||
---
|
||||
xlators/cluster/ec/src/ec-heal.c | 73 ++++++++++++++++++++++++++++++++++++++-
|
||||
xlators/cluster/ec/src/ec-types.h | 7 +++-
|
||||
2 files changed, 78 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
|
||||
index e2de879..7d25853 100644
|
||||
--- a/xlators/cluster/ec/src/ec-heal.c
|
||||
+++ b/xlators/cluster/ec/src/ec-heal.c
|
||||
@@ -2488,6 +2488,59 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+int
|
||||
+ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
|
||||
+{
|
||||
+ int i = 0;
|
||||
+ int ret = 0;
|
||||
+ dict_t **xattr = NULL;
|
||||
+ loc_t loc = {0};
|
||||
+ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
|
||||
+ unsigned char *on = NULL;
|
||||
+ default_args_cbk_t *replies = NULL;
|
||||
+ dict_t *dict = NULL;
|
||||
+
|
||||
+ /* Allocate the required memory */
|
||||
+ loc.inode = inode_ref(inode);
|
||||
+ gf_uuid_copy(loc.gfid, inode->gfid);
|
||||
+ on = alloca0(ec->nodes);
|
||||
+ EC_REPLIES_ALLOC(replies, ec->nodes);
|
||||
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
|
||||
+ if (!xattr) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ dict = dict_new();
|
||||
+ if (!dict) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ for (i = 0; i < ec->nodes; i++) {
|
||||
+ xattr[i] = dict;
|
||||
+ on[i] = 1;
|
||||
+ }
|
||||
+ dirty_xattr[EC_METADATA_TXN] = hton64(1);
|
||||
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
|
||||
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
|
||||
+ if (ret < 0) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
|
||||
+ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
|
||||
+ xattr, NULL);
|
||||
+out:
|
||||
+ if (dict) {
|
||||
+ dict_unref(dict);
|
||||
+ }
|
||||
+ if (xattr) {
|
||||
+ GF_FREE(xattr);
|
||||
+ }
|
||||
+ cluster_replies_wipe(replies, ec->nodes);
|
||||
+ loc_wipe(&loc);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
void
|
||||
ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
||||
{
|
||||
@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
||||
ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
|
||||
&need_heal);
|
||||
|
||||
- if (need_heal == EC_HEAL_NONEED) {
|
||||
+ if (need_heal == EC_HEAL_PURGE_INDEX) {
|
||||
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
|
||||
+ "Index entry needs to be purged for: %s ",
|
||||
+ uuid_utoa(loc->gfid));
|
||||
+ /* We need to send xattrop to set dirty flag so that it can be
|
||||
+ * healed and index entry could be removed. We need not to take lock
|
||||
+ * on this entry to do so as we are just setting dirty flag which
|
||||
+ * actually increases the trusted.ec.dirty count and does not set
|
||||
+ * the new value.
|
||||
+ * This will make sure that it is not interfering in other fops.*/
|
||||
+ ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
|
||||
+ } else if (need_heal == EC_HEAL_NONEED) {
|
||||
gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
|
||||
"Heal is not required for : %s ", uuid_utoa(loc->gfid));
|
||||
goto out;
|
||||
@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
+ /* If lock count is 0, all dirty flags are 0 and all the
|
||||
+ * versions are macthing then why are we here. It looks
|
||||
+ * like something went wrong while removing the index entries
|
||||
+ * after completing a successful heal or fop. In this case
|
||||
+ * we need to remove this index entry to avoid triggering heal
|
||||
+ * in a loop and causing lookups again and again*/
|
||||
+ *need_heal = EC_HEAL_PURGE_INDEX;
|
||||
} else {
|
||||
for (i = 0; i < ec->nodes; i++) {
|
||||
/* Since each lock can only increment the dirty
|
||||
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
|
||||
index f15429d..700dc39 100644
|
||||
--- a/xlators/cluster/ec/src/ec-types.h
|
||||
+++ b/xlators/cluster/ec/src/ec-types.h
|
||||
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
|
||||
|
||||
enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
|
||||
|
||||
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
|
||||
+enum _ec_heal_need {
|
||||
+ EC_HEAL_NONEED,
|
||||
+ EC_HEAL_MAYBE,
|
||||
+ EC_HEAL_MUST,
|
||||
+ EC_HEAL_PURGE_INDEX
|
||||
+};
|
||||
|
||||
enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
127
0461-geo-replication-Fix-IPv6-parsing.patch
Normal file
127
0461-geo-replication-Fix-IPv6-parsing.patch
Normal file
@ -0,0 +1,127 @@
|
||||
From d425ed54261d5bc19aa853854cc3b64647e3c897 Mon Sep 17 00:00:00 2001
|
||||
From: Aravinda Vishwanathapura <aravinda@kadalu.io>
|
||||
Date: Sun, 12 Jul 2020 12:42:36 +0530
|
||||
Subject: [PATCH 461/465] geo-replication: Fix IPv6 parsing
|
||||
|
||||
Brick paths in Volinfo used `:` as delimiter, Geo-rep uses split
|
||||
based on `:` char. This will go wrong with IPv6.
|
||||
|
||||
This patch handles the IPv6 case and handles the split properly.
|
||||
Backport of:
|
||||
>Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24706
|
||||
>Fixes: #1366
|
||||
>Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d
|
||||
>Signed-off-by: Aravinda Vishwanathapura <aravinda@kadalu.io>
|
||||
|
||||
BUG: 1855966
|
||||
Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d
|
||||
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
|
||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208610
|
||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||||
---
|
||||
geo-replication/syncdaemon/master.py | 5 ++--
|
||||
geo-replication/syncdaemon/syncdutils.py | 43 +++++++++++++++++++++++++++++---
|
||||
2 files changed, 43 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
|
||||
index 3f98337..08e98f8 100644
|
||||
--- a/geo-replication/syncdaemon/master.py
|
||||
+++ b/geo-replication/syncdaemon/master.py
|
||||
@@ -26,7 +26,8 @@ from rconf import rconf
|
||||
from syncdutils import Thread, GsyncdError, escape_space_newline
|
||||
from syncdutils import unescape_space_newline, gauxpfx, escape
|
||||
from syncdutils import lstat, errno_wrap, FreeObject, lf, matching_disk_gfid
|
||||
-from syncdutils import NoStimeAvailable, PartialHistoryAvailable
|
||||
+from syncdutils import NoStimeAvailable, PartialHistoryAvailable, host_brick_split
|
||||
+
|
||||
|
||||
URXTIME = (-1, 0)
|
||||
|
||||
@@ -1466,7 +1467,7 @@ class GMasterChangelogMixin(GMasterCommon):
|
||||
node = rconf.args.resource_remote
|
||||
node_data = node.split("@")
|
||||
node = node_data[-1]
|
||||
- remote_node_ip = node.split(":")[0]
|
||||
+ remote_node_ip, _ = host_brick_split(node)
|
||||
self.status.set_slave_node(remote_node_ip)
|
||||
|
||||
def changelogs_batch_process(self, changes):
|
||||
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
|
||||
index 7560fa1..f43e13b 100644
|
||||
--- a/geo-replication/syncdaemon/syncdutils.py
|
||||
+++ b/geo-replication/syncdaemon/syncdutils.py
|
||||
@@ -883,6 +883,19 @@ class Popen(subprocess.Popen):
|
||||
self.errfail()
|
||||
|
||||
|
||||
+def host_brick_split(value):
|
||||
+ """
|
||||
+ IPv6 compatible way to split and get the host
|
||||
+ and brick information. Example inputs:
|
||||
+ node1.example.com:/exports/bricks/brick1/brick
|
||||
+ fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick
|
||||
+ """
|
||||
+ parts = value.split(":")
|
||||
+ brick = parts[-1]
|
||||
+ hostparts = parts[0:-1]
|
||||
+ return (":".join(hostparts), brick)
|
||||
+
|
||||
+
|
||||
class Volinfo(object):
|
||||
|
||||
def __init__(self, vol, host='localhost', prelude=[], master=True):
|
||||
@@ -925,7 +938,7 @@ class Volinfo(object):
|
||||
@memoize
|
||||
def bricks(self):
|
||||
def bparse(b):
|
||||
- host, dirp = b.find("name").text.split(':', 2)
|
||||
+ host, dirp = host_brick_split(b.find("name").text)
|
||||
return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text}
|
||||
return [bparse(b) for b in self.get('brick')]
|
||||
|
||||
@@ -1001,6 +1014,16 @@ class VolinfoFromGconf(object):
|
||||
def is_hot(self, brickpath):
|
||||
return False
|
||||
|
||||
+ def is_uuid(self, value):
|
||||
+ try:
|
||||
+ uuid.UUID(value)
|
||||
+ return True
|
||||
+ except ValueError:
|
||||
+ return False
|
||||
+
|
||||
+ def possible_path(self, value):
|
||||
+ return "/" in value
|
||||
+
|
||||
@property
|
||||
@memoize
|
||||
def bricks(self):
|
||||
@@ -1014,8 +1037,22 @@ class VolinfoFromGconf(object):
|
||||
out = []
|
||||
for b in bricks_data:
|
||||
parts = b.split(":")
|
||||
- bpath = parts[2] if len(parts) == 3 else ""
|
||||
- out.append({"host": parts[1], "dir": bpath, "uuid": parts[0]})
|
||||
+ b_uuid = None
|
||||
+ if self.is_uuid(parts[0]):
|
||||
+ b_uuid = parts[0]
|
||||
+ # Set all parts except first
|
||||
+ parts = parts[1:]
|
||||
+
|
||||
+ if self.possible_path(parts[-1]):
|
||||
+ bpath = parts[-1]
|
||||
+ # Set all parts except last
|
||||
+ parts = parts[0:-1]
|
||||
+
|
||||
+ out.append({
|
||||
+ "host": ":".join(parts), # if remaining parts are IPv6 name
|
||||
+ "dir": bpath,
|
||||
+ "uuid": b_uuid
|
||||
+ })
|
||||
|
||||
return out
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
43
0462-Issue-with-gf_fill_iatt_for_dirent.patch
Normal file
43
0462-Issue-with-gf_fill_iatt_for_dirent.patch
Normal file
@ -0,0 +1,43 @@
|
||||
From f027734165374979bd0bff8ea059dfaadca85e07 Mon Sep 17 00:00:00 2001
|
||||
From: Soumya Koduri <skoduri@redhat.com>
|
||||
Date: Thu, 2 Jul 2020 02:07:56 +0530
|
||||
Subject: [PATCH 462/465] Issue with gf_fill_iatt_for_dirent
|
||||
|
||||
In "gf_fill_iatt_for_dirent()", while calculating inode_path for loc,
|
||||
the inode should be of parent's. Instead it is loc.inode which results in error
|
||||
and eventually lookup/readdirp fails.
|
||||
|
||||
This patch fixes the same.
|
||||
|
||||
This is backport of below mainstream fix :
|
||||
|
||||
> Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5
|
||||
> Fixes: #1351
|
||||
> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24661/
|
||||
|
||||
Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5
|
||||
BUG: 1853189
|
||||
Signed-off-by: Soumya Koduri <skoduri@redhat.com>
|
||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208691
|
||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||||
---
|
||||
libglusterfs/src/gf-dirent.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c
|
||||
index f289723..3fa67f2 100644
|
||||
--- a/libglusterfs/src/gf-dirent.c
|
||||
+++ b/libglusterfs/src/gf-dirent.c
|
||||
@@ -277,7 +277,7 @@ gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol)
|
||||
gf_uuid_copy(loc.pargfid, parent->gfid);
|
||||
loc.name = entry->d_name;
|
||||
loc.parent = inode_ref(parent);
|
||||
- ret = inode_path(loc.inode, entry->d_name, &path);
|
||||
+ ret = inode_path(loc.parent, entry->d_name, &path);
|
||||
loc.path = path;
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -0,0 +1,87 @@
|
||||
From 7d87933f648092ae55d57a96fd06e3df975d764c Mon Sep 17 00:00:00 2001
|
||||
From: Ashish Pandey <aspandey@redhat.com>
|
||||
Date: Tue, 18 Aug 2020 10:33:48 +0530
|
||||
Subject: [PATCH 463/465] cluster/ec: Change handling of heal failure to avoid
|
||||
crash
|
||||
|
||||
Problem:
|
||||
ec_getxattr_heal_cbk was called with NULL as second argument
|
||||
in case heal was failing.
|
||||
This function was dereferencing "cookie" argument which caused crash.
|
||||
|
||||
Solution:
|
||||
Cookie is changed to carry the value that was supposed to be
|
||||
stored in fop->data, so even in the case when fop is NULL in error
|
||||
case, there won't be any NULL dereference.
|
||||
|
||||
Thanks to Xavi for the suggestion about the fix.
|
||||
|
||||
>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23050/
|
||||
>fixes: bz#1729085
|
||||
|
||||
Change-Id: I0798000d5cadb17c3c2fbfa1baf77033ffc2bb8c
|
||||
BUG: 1852736
|
||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/209012
|
||||
Tested-by: Ashish Pandey <aspandey@redhat.com>
|
||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||||
Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
|
||||
---
|
||||
xlators/cluster/ec/src/ec-heal.c | 11 ++++++-----
|
||||
xlators/cluster/ec/src/ec-inode-read.c | 4 ++--
|
||||
2 files changed, 8 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
|
||||
index 7d25853..6e6948b 100644
|
||||
--- a/xlators/cluster/ec/src/ec-heal.c
|
||||
+++ b/xlators/cluster/ec/src/ec-heal.c
|
||||
@@ -1966,7 +1966,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
|
||||
|
||||
case EC_STATE_REPORT:
|
||||
if (fop->cbks.heal) {
|
||||
- fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0,
|
||||
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
|
||||
(heal->good | heal->bad), heal->good, heal->bad,
|
||||
0, NULL);
|
||||
}
|
||||
@@ -2022,10 +2022,11 @@ ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
uintptr_t good, uintptr_t bad, uint32_t pending,
|
||||
dict_t *xdata)
|
||||
{
|
||||
- ec_fop_data_t *fop = cookie;
|
||||
- ec_heal_t *heal = fop->data;
|
||||
+ ec_heal_t *heal = cookie;
|
||||
|
||||
- fop->heal = NULL;
|
||||
+ if (heal->fop) {
|
||||
+ heal->fop->heal = NULL;
|
||||
+ }
|
||||
heal->fop = NULL;
|
||||
heal->error = op_ret < 0 ? op_errno : 0;
|
||||
syncbarrier_wake(heal->data);
|
||||
@@ -2669,7 +2670,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
||||
out:
|
||||
ec_reset_entry_healing(fop);
|
||||
if (fop->cbks.heal) {
|
||||
- fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
|
||||
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
|
||||
ec_char_array_to_mask(participants, ec->nodes),
|
||||
mgood & good, mbad & bad, pending, NULL);
|
||||
}
|
||||
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
|
||||
index e82e8f6..c50d0ad 100644
|
||||
--- a/xlators/cluster/ec/src/ec-inode-read.c
|
||||
+++ b/xlators/cluster/ec/src/ec-inode-read.c
|
||||
@@ -396,8 +396,8 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
|
||||
uintptr_t good, uintptr_t bad, uint32_t pending,
|
||||
dict_t *xdata)
|
||||
{
|
||||
- ec_fop_data_t *fop = cookie;
|
||||
- fop_getxattr_cbk_t func = fop->data;
|
||||
+ fop_getxattr_cbk_t func = cookie;
|
||||
+
|
||||
ec_t *ec = xl->private;
|
||||
dict_t *dict = NULL;
|
||||
char *str;
|
||||
--
|
||||
1.8.3.1
|
||||
|
102
0464-storage-posix-Remove-nr_files-usage.patch
Normal file
102
0464-storage-posix-Remove-nr_files-usage.patch
Normal file
@ -0,0 +1,102 @@
|
||||
From 7c51addf7912a94320e6b148bd66f2dbf274c533 Mon Sep 17 00:00:00 2001
|
||||
From: Pranith Kumar K <pkarampu@redhat.com>
|
||||
Date: Mon, 11 Mar 2019 14:04:39 +0530
|
||||
Subject: [PATCH 464/465] storage/posix: Remove nr_files usage
|
||||
|
||||
nr_files is supposed to represent the number of files opened in posix.
|
||||
Present logic doesn't seem to handle anon-fds because of which the
|
||||
counts would always be wrong.
|
||||
|
||||
I don't remember anyone using this value in debugging any problem probably
|
||||
because we always have 'ls -l /proc/<pid>/fd' which not only prints the
|
||||
fds that are active but also prints their paths. It also handles directories
|
||||
and anon-fds which actually opened the file. So removing this code
|
||||
instead of fixing the buggy logic to have the nr_files.
|
||||
|
||||
> fixes bz#1688106
|
||||
> Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040
|
||||
> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
||||
> (Cherry pick from commit f5987d38f216a3142dfe45f03bf66ff4827d9b55)
|
||||
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22333/)
|
||||
|
||||
Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040
|
||||
BUG: 1851989
|
||||
Signed-off-by: Mohit Agrawal<moagrawa@redhat.com>
|
||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/209468
|
||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||||
---
|
||||
xlators/storage/posix/src/posix-common.c | 2 --
|
||||
xlators/storage/posix/src/posix-entry-ops.c | 2 --
|
||||
xlators/storage/posix/src/posix-inode-fd-ops.c | 2 --
|
||||
xlators/storage/posix/src/posix.h | 1 -
|
||||
4 files changed, 7 deletions(-)
|
||||
|
||||
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
|
||||
index ac53796..b317627 100644
|
||||
--- a/xlators/storage/posix/src/posix-common.c
|
||||
+++ b/xlators/storage/posix/src/posix-common.c
|
||||
@@ -128,7 +128,6 @@ posix_priv(xlator_t *this)
|
||||
gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value));
|
||||
gf_proc_dump_write("max_write", "%" PRId64,
|
||||
GF_ATOMIC_GET(priv->write_value));
|
||||
- gf_proc_dump_write("nr_files", "%" PRId64, GF_ATOMIC_GET(priv->nr_files));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -815,7 +814,6 @@ posix_init(xlator_t *this)
|
||||
}
|
||||
|
||||
LOCK_INIT(&_private->lock);
|
||||
- GF_ATOMIC_INIT(_private->nr_files, 0);
|
||||
GF_ATOMIC_INIT(_private->read_value, 0);
|
||||
GF_ATOMIC_INIT(_private->write_value, 0);
|
||||
|
||||
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
|
||||
index 65650b3..b3a5381 100644
|
||||
--- a/xlators/storage/posix/src/posix-entry-ops.c
|
||||
+++ b/xlators/storage/posix/src/posix-entry-ops.c
|
||||
@@ -2243,8 +2243,6 @@ fill_stat:
|
||||
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
|
||||
"failed to set the fd context path=%s fd=%p", real_path, fd);
|
||||
|
||||
- GF_ATOMIC_INC(priv->nr_files);
|
||||
-
|
||||
op_ret = 0;
|
||||
|
||||
out:
|
||||
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
|
||||
index d135d8b..81f4a6b 100644
|
||||
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
|
||||
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
|
||||
@@ -1605,7 +1605,6 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
|
||||
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
|
||||
"failed to set the fd context path=%s fd=%p", real_path, fd);
|
||||
|
||||
- GF_ATOMIC_INC(priv->nr_files);
|
||||
op_ret = 0;
|
||||
|
||||
out:
|
||||
@@ -2526,7 +2525,6 @@ posix_release(xlator_t *this, fd_t *fd)
|
||||
if (!priv)
|
||||
goto out;
|
||||
|
||||
- GF_ATOMIC_DEC(priv->nr_files);
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
|
||||
index 61495a7..124dbb4 100644
|
||||
--- a/xlators/storage/posix/src/posix.h
|
||||
+++ b/xlators/storage/posix/src/posix.h
|
||||
@@ -154,7 +154,6 @@ struct posix_private {
|
||||
|
||||
gf_atomic_t read_value; /* Total read, from init */
|
||||
gf_atomic_t write_value; /* Total write, from init */
|
||||
- gf_atomic_t nr_files;
|
||||
/*
|
||||
In some cases, two exported volumes may reside on the same
|
||||
partition on the server. Sending statvfs info for both
|
||||
--
|
||||
1.8.3.1
|
||||
|
384
0465-posix-Implement-a-janitor-thread-to-close-fd.patch
Normal file
384
0465-posix-Implement-a-janitor-thread-to-close-fd.patch
Normal file
@ -0,0 +1,384 @@
|
||||
From 143b93b230b429cc712353243ed794b68494c040 Mon Sep 17 00:00:00 2001
|
||||
From: Mohit Agrawal <moagrawa@redhat.com>
|
||||
Date: Mon, 27 Jul 2020 18:08:00 +0530
|
||||
Subject: [PATCH 465/465] posix: Implement a janitor thread to close fd
|
||||
|
||||
Problem: In the commit fb20713b380e1df8d7f9e9df96563be2f9144fd6 we use
|
||||
syntask to close fd but we have found the patch is reducing the
|
||||
performance
|
||||
|
||||
Solution: Use janitor thread to close fd's and save the pfd ctx into
|
||||
ctx janitor list and also save the posix_xlator into pfd object to
|
||||
avoid the race condition during cleanup in brick_mux environment
|
||||
|
||||
> Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092
|
||||
> Fixes: #1396
|
||||
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
|
||||
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24755/)
|
||||
> (Cherry pick from commit 41b9616435cbdf671805856e487e373060c9455b
|
||||
|
||||
Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092
|
||||
BUG: 1851989
|
||||
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
|
||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/209448
|
||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||||
---
|
||||
glusterfsd/src/glusterfsd.c | 4 ++
|
||||
libglusterfs/src/glusterfs/glusterfs.h | 7 ++
|
||||
rpc/rpc-lib/src/rpcsvc.c | 6 --
|
||||
xlators/storage/posix/src/posix-common.c | 34 +++++++++-
|
||||
xlators/storage/posix/src/posix-helpers.c | 93 ++++++++++++++++++++++++++
|
||||
xlators/storage/posix/src/posix-inode-fd-ops.c | 33 ++++-----
|
||||
xlators/storage/posix/src/posix.h | 7 ++
|
||||
7 files changed, 161 insertions(+), 23 deletions(-)
|
||||
|
||||
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
|
||||
index 9821180..955bf1d 100644
|
||||
--- a/glusterfsd/src/glusterfsd.c
|
||||
+++ b/glusterfsd/src/glusterfsd.c
|
||||
@@ -1839,6 +1839,10 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
|
||||
|
||||
INIT_LIST_HEAD(&cmd_args->xlator_options);
|
||||
INIT_LIST_HEAD(&cmd_args->volfile_servers);
|
||||
+ ctx->pxl_count = 0;
|
||||
+ pthread_mutex_init(&ctx->fd_lock, NULL);
|
||||
+ pthread_cond_init(&ctx->fd_cond, NULL);
|
||||
+ INIT_LIST_HEAD(&ctx->janitor_fds);
|
||||
|
||||
lim.rlim_cur = RLIM_INFINITY;
|
||||
lim.rlim_max = RLIM_INFINITY;
|
||||
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
|
||||
index 495a4d7..bf6a987 100644
|
||||
--- a/libglusterfs/src/glusterfs/glusterfs.h
|
||||
+++ b/libglusterfs/src/glusterfs/glusterfs.h
|
||||
@@ -733,6 +733,13 @@ struct _glusterfs_ctx {
|
||||
} stats;
|
||||
|
||||
struct list_head volfile_list;
|
||||
+ /* Add members to manage janitor threads for cleanup fd */
|
||||
+ struct list_head janitor_fds;
|
||||
+ pthread_cond_t fd_cond;
|
||||
+ pthread_mutex_t fd_lock;
|
||||
+ pthread_t janitor;
|
||||
+ /* The variable is use to save total posix xlator count */
|
||||
+ uint32_t pxl_count;
|
||||
|
||||
char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
|
||||
};
|
||||
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
|
||||
index 23ca1fd..3f184bf 100644
|
||||
--- a/rpc/rpc-lib/src/rpcsvc.c
|
||||
+++ b/rpc/rpc-lib/src/rpcsvc.c
|
||||
@@ -375,12 +375,6 @@ rpcsvc_program_actor(rpcsvc_request_t *req)
|
||||
|
||||
req->ownthread = program->ownthread;
|
||||
req->synctask = program->synctask;
|
||||
- if (((req->procnum == GFS3_OP_RELEASE) ||
|
||||
- (req->procnum == GFS3_OP_RELEASEDIR)) &&
|
||||
- (program->prognum == GLUSTER_FOP_PROGRAM)) {
|
||||
- req->ownthread = _gf_false;
|
||||
- req->synctask = _gf_true;
|
||||
- }
|
||||
|
||||
err = SUCCESS;
|
||||
gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s",
|
||||
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
|
||||
index b317627..c5a43a1 100644
|
||||
--- a/xlators/storage/posix/src/posix-common.c
|
||||
+++ b/xlators/storage/posix/src/posix-common.c
|
||||
@@ -150,6 +150,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
|
||||
struct timespec sleep_till = {
|
||||
0,
|
||||
};
|
||||
+ glusterfs_ctx_t *ctx = this->ctx;
|
||||
|
||||
switch (event) {
|
||||
case GF_EVENT_PARENT_UP: {
|
||||
@@ -160,8 +161,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
|
||||
case GF_EVENT_PARENT_DOWN: {
|
||||
if (!victim->cleanup_starting)
|
||||
break;
|
||||
- gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
|
||||
- victim->name);
|
||||
|
||||
if (priv->janitor) {
|
||||
pthread_mutex_lock(&priv->janitor_mutex);
|
||||
@@ -187,6 +186,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
|
||||
GF_FREE(priv->janitor);
|
||||
}
|
||||
priv->janitor = NULL;
|
||||
+ pthread_mutex_lock(&ctx->fd_lock);
|
||||
+ {
|
||||
+ while (priv->rel_fdcount > 0) {
|
||||
+ pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock);
|
||||
+ }
|
||||
+ }
|
||||
+ pthread_mutex_unlock(&ctx->fd_lock);
|
||||
+
|
||||
+ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
|
||||
+ victim->name);
|
||||
default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
|
||||
} break;
|
||||
default:
|
||||
@@ -1038,7 +1047,13 @@ posix_init(xlator_t *this)
|
||||
pthread_cond_init(&_private->fsync_cond, NULL);
|
||||
pthread_mutex_init(&_private->janitor_mutex, NULL);
|
||||
pthread_cond_init(&_private->janitor_cond, NULL);
|
||||
+ pthread_cond_init(&_private->fd_cond, NULL);
|
||||
INIT_LIST_HEAD(&_private->fsyncs);
|
||||
+ _private->rel_fdcount = 0;
|
||||
+ ret = posix_spawn_ctx_janitor_thread(this);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+
|
||||
ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this,
|
||||
"posixfsy");
|
||||
if (ret) {
|
||||
@@ -1133,6 +1148,8 @@ posix_fini(xlator_t *this)
|
||||
{
|
||||
struct posix_private *priv = this->private;
|
||||
gf_boolean_t health_check = _gf_false;
|
||||
+ glusterfs_ctx_t *ctx = this->ctx;
|
||||
+ uint32_t count;
|
||||
int ret = 0;
|
||||
|
||||
if (!priv)
|
||||
@@ -1166,6 +1183,19 @@ posix_fini(xlator_t *this)
|
||||
priv->janitor = NULL;
|
||||
}
|
||||
|
||||
+ pthread_mutex_lock(&ctx->fd_lock);
|
||||
+ {
|
||||
+ count = --ctx->pxl_count;
|
||||
+ if (count == 0) {
|
||||
+ pthread_cond_signal(&ctx->fd_cond);
|
||||
+ }
|
||||
+ }
|
||||
+ pthread_mutex_unlock(&ctx->fd_lock);
|
||||
+
|
||||
+ if (count == 0) {
|
||||
+ pthread_join(ctx->janitor, NULL);
|
||||
+ }
|
||||
+
|
||||
if (priv->fsyncer) {
|
||||
(void)gf_thread_cleanup_xint(priv->fsyncer);
|
||||
priv->fsyncer = 0;
|
||||
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
|
||||
index 39dbcce..73a44be 100644
|
||||
--- a/xlators/storage/posix/src/posix-helpers.c
|
||||
+++ b/xlators/storage/posix/src/posix-helpers.c
|
||||
@@ -1582,6 +1582,99 @@ unlock:
|
||||
return;
|
||||
}
|
||||
|
||||
+static struct posix_fd *
|
||||
+janitor_get_next_fd(glusterfs_ctx_t *ctx)
|
||||
+{
|
||||
+ struct posix_fd *pfd = NULL;
|
||||
+
|
||||
+ while (list_empty(&ctx->janitor_fds)) {
|
||||
+ if (ctx->pxl_count == 0) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock);
|
||||
+ }
|
||||
+
|
||||
+ pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list);
|
||||
+ list_del_init(&pfd->list);
|
||||
+
|
||||
+ return pfd;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+posix_close_pfd(xlator_t *xl, struct posix_fd *pfd)
|
||||
+{
|
||||
+ THIS = xl;
|
||||
+
|
||||
+ if (pfd->dir == NULL) {
|
||||
+ gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd);
|
||||
+ sys_close(pfd->fd);
|
||||
+ } else {
|
||||
+ gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir);
|
||||
+ sys_closedir(pfd->dir);
|
||||
+ }
|
||||
+
|
||||
+ GF_FREE(pfd);
|
||||
+}
|
||||
+
|
||||
+static void *
|
||||
+posix_ctx_janitor_thread_proc(void *data)
|
||||
+{
|
||||
+ xlator_t *xl;
|
||||
+ struct posix_fd *pfd;
|
||||
+ glusterfs_ctx_t *ctx = NULL;
|
||||
+ struct posix_private *priv_fd;
|
||||
+
|
||||
+ ctx = data;
|
||||
+
|
||||
+ pthread_mutex_lock(&ctx->fd_lock);
|
||||
+
|
||||
+ while ((pfd = janitor_get_next_fd(ctx)) != NULL) {
|
||||
+ pthread_mutex_unlock(&ctx->fd_lock);
|
||||
+
|
||||
+ xl = pfd->xl;
|
||||
+ posix_close_pfd(xl, pfd);
|
||||
+
|
||||
+ pthread_mutex_lock(&ctx->fd_lock);
|
||||
+
|
||||
+ priv_fd = xl->private;
|
||||
+ priv_fd->rel_fdcount--;
|
||||
+ if (!priv_fd->rel_fdcount)
|
||||
+ pthread_cond_signal(&priv_fd->fd_cond);
|
||||
+ }
|
||||
+
|
||||
+ pthread_mutex_unlock(&ctx->fd_lock);
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+posix_spawn_ctx_janitor_thread(xlator_t *this)
|
||||
+{
|
||||
+ int ret = 0;
|
||||
+ glusterfs_ctx_t *ctx = NULL;
|
||||
+
|
||||
+ ctx = this->ctx;
|
||||
+
|
||||
+ pthread_mutex_lock(&ctx->fd_lock);
|
||||
+ {
|
||||
+ if (ctx->pxl_count++ == 0) {
|
||||
+ ret = gf_thread_create(&ctx->janitor, NULL,
|
||||
+ posix_ctx_janitor_thread_proc, ctx,
|
||||
+ "posixctxjan");
|
||||
+
|
||||
+ if (ret) {
|
||||
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
|
||||
+ "spawning janitor thread failed");
|
||||
+ ctx->pxl_count--;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ pthread_mutex_unlock(&ctx->fd_lock);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
is_fresh_file(int64_t ctime_sec)
|
||||
{
|
||||
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
|
||||
index 81f4a6b..21119ea 100644
|
||||
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
|
||||
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
|
||||
@@ -1352,6 +1352,22 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void
|
||||
+posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd)
|
||||
+{
|
||||
+ glusterfs_ctx_t *ctx = this->ctx;
|
||||
+ struct posix_private *priv = this->private;
|
||||
+
|
||||
+ pfd->xl = this;
|
||||
+ pthread_mutex_lock(&ctx->fd_lock);
|
||||
+ {
|
||||
+ list_add_tail(&pfd->list, &ctx->janitor_fds);
|
||||
+ priv->rel_fdcount++;
|
||||
+ pthread_cond_signal(&ctx->fd_cond);
|
||||
+ }
|
||||
+ pthread_mutex_unlock(&ctx->fd_lock);
|
||||
+}
|
||||
+
|
||||
int32_t
|
||||
posix_releasedir(xlator_t *this, fd_t *fd)
|
||||
{
|
||||
@@ -1374,11 +1390,7 @@ posix_releasedir(xlator_t *this, fd_t *fd)
|
||||
"pfd->dir is NULL for fd=%p", fd);
|
||||
goto out;
|
||||
}
|
||||
-
|
||||
- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
|
||||
-
|
||||
- sys_closedir(pfd->dir);
|
||||
- GF_FREE(pfd);
|
||||
+ posix_add_fd_to_cleanup(this, pfd);
|
||||
|
||||
out:
|
||||
return 0;
|
||||
@@ -2494,7 +2506,6 @@ out:
|
||||
int32_t
|
||||
posix_release(xlator_t *this, fd_t *fd)
|
||||
{
|
||||
- struct posix_private *priv = NULL;
|
||||
struct posix_fd *pfd = NULL;
|
||||
int ret = -1;
|
||||
uint64_t tmp_pfd = 0;
|
||||
@@ -2502,8 +2513,6 @@ posix_release(xlator_t *this, fd_t *fd)
|
||||
VALIDATE_OR_GOTO(this, out);
|
||||
VALIDATE_OR_GOTO(fd, out);
|
||||
|
||||
- priv = this->private;
|
||||
-
|
||||
ret = fd_ctx_del(fd, this, &tmp_pfd);
|
||||
if (ret < 0) {
|
||||
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
|
||||
@@ -2517,13 +2526,7 @@ posix_release(xlator_t *this, fd_t *fd)
|
||||
"pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd);
|
||||
}
|
||||
|
||||
- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
|
||||
-
|
||||
- sys_close(pfd->fd);
|
||||
- GF_FREE(pfd);
|
||||
-
|
||||
- if (!priv)
|
||||
- goto out;
|
||||
+ posix_add_fd_to_cleanup(this, pfd);
|
||||
|
||||
out:
|
||||
return 0;
|
||||
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
|
||||
index 124dbb4..07f367b 100644
|
||||
--- a/xlators/storage/posix/src/posix.h
|
||||
+++ b/xlators/storage/posix/src/posix.h
|
||||
@@ -134,6 +134,8 @@ struct posix_fd {
|
||||
off_t dir_eof; /* offset at dir EOF */
|
||||
int odirect;
|
||||
struct list_head list; /* to add to the janitor list */
|
||||
+ xlator_t *xl;
|
||||
+ char _pad[4]; /* manual padding */
|
||||
};
|
||||
|
||||
struct posix_private {
|
||||
@@ -204,6 +206,7 @@ struct posix_private {
|
||||
pthread_cond_t fsync_cond;
|
||||
pthread_mutex_t janitor_mutex;
|
||||
pthread_cond_t janitor_cond;
|
||||
+ pthread_cond_t fd_cond;
|
||||
int fsync_queue_count;
|
||||
|
||||
enum {
|
||||
@@ -259,6 +262,7 @@ struct posix_private {
|
||||
gf_boolean_t fips_mode_rchecksum;
|
||||
gf_boolean_t ctime;
|
||||
gf_boolean_t janitor_task_stop;
|
||||
+ uint32_t rel_fdcount;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@@ -665,6 +669,9 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,
|
||||
int
|
||||
posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);
|
||||
|
||||
+int
|
||||
+posix_spawn_ctx_janitor_thread(xlator_t *this);
|
||||
+
|
||||
void
|
||||
posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -237,7 +237,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
|
||||
%else
|
||||
Name: glusterfs
|
||||
Version: 6.0
|
||||
Release: 40%{?dist}
|
||||
Release: 41%{?dist}
|
||||
ExcludeArch: i686
|
||||
%endif
|
||||
License: GPLv2 or LGPLv3+
|
||||
@ -771,6 +771,15 @@ Patch0453: 0453-glusterd-add-brick-command-failure.patch
|
||||
Patch0454: 0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch
|
||||
Patch0455: 0455-locks-prevent-deletion-of-locked-entries.patch
|
||||
Patch0456: 0456-add-clean-local-after-grant-lock.patch
|
||||
Patch0457: 0457-cluster-ec-Improve-detection-of-new-heals.patch
|
||||
Patch0458: 0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch
|
||||
Patch0459: 0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch
|
||||
Patch0460: 0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch
|
||||
Patch0461: 0461-geo-replication-Fix-IPv6-parsing.patch
|
||||
Patch0462: 0462-Issue-with-gf_fill_iatt_for_dirent.patch
|
||||
Patch0463: 0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch
|
||||
Patch0464: 0464-storage-posix-Remove-nr_files-usage.patch
|
||||
Patch0465: 0465-posix-Implement-a-janitor-thread-to-close-fd.patch
|
||||
|
||||
%description
|
||||
GlusterFS is a distributed file-system capable of scaling to several
|
||||
@ -2511,6 +2520,9 @@ fi
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Tue Aug 25 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-41
|
||||
- fixes bugs bz#1785714 bz#1851424 bz#1851989 bz#1852736 bz#1853189 bz#1855966
|
||||
|
||||
* Tue Jul 21 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-40
|
||||
- fixes bugs bz#1812789 bz#1844359 bz#1847081 bz#1854165
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user