203 lines
8.0 KiB
Diff
203 lines
8.0 KiB
Diff
From 488a5aa4932842334e2749224e9c39f8b6fd379c Mon Sep 17 00:00:00 2001
|
|
From: Ashish Pandey <aspandey@redhat.com>
|
|
Date: Wed, 20 May 2020 11:30:17 +0530
|
|
Subject: [PATCH 561/584] cluster/ec: Inform failure when some bricks are
|
|
unavailable.
|
|
|
|
Provide proper information about failure when a fop
|
|
fails on some of the brick.
|
|
Also provide information about parent fop and
|
|
the map of the bricks on which it is failing.
|
|
|
|
Upstream patch details:
|
|
>Change-Id: If812739617df65cd146c8e667fbacff653717248
|
|
>updates #1434
|
|
>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
|
|
>https://review.gluster.org/#/c/glusterfs/+/24858/
|
|
|
|
Change-Id: I3549d637e7345f05f21ac1c0e8106973c69d1be9
|
|
BUG: 1908635
|
|
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244926
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
---
|
|
xlators/cluster/ec/src/ec-common.c | 76 +++++++++++++++++++++++---------------
|
|
xlators/cluster/ec/src/ec.c | 14 ++++++-
|
|
2 files changed, 58 insertions(+), 32 deletions(-)
|
|
|
|
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
|
|
index e3f8769..a9624d8 100644
|
|
--- a/xlators/cluster/ec/src/ec-common.c
|
|
+++ b/xlators/cluster/ec/src/ec-common.c
|
|
@@ -316,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
|
|
}
|
|
}
|
|
|
|
- gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
|
|
- "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
|
|
- "remaining=%s, good=%s, bad=%s, %s)",
|
|
- gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
|
|
- ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
|
|
- ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
|
|
- ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
|
|
- ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
|
|
- ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
|
|
- ec->nodes),
|
|
- ec_msg_str(fop));
|
|
+ gf_msg(
|
|
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
|
|
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
|
|
+ "remaining=%s, good=%s, bad=%s,"
|
|
+ "(Least significant bit represents first client/brick of subvol), %s)",
|
|
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
|
|
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
|
|
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
|
|
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
|
|
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
|
|
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
|
|
+ ec->nodes),
|
|
+ ec_msg_str(fop));
|
|
if (fop->use_fd) {
|
|
if (fop->fd != NULL) {
|
|
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
|
|
@@ -614,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
|
|
loc_t *loc2 = NULL;
|
|
char gfid1[64] = {0};
|
|
char gfid2[64] = {0};
|
|
+ ec_fop_data_t *parent = fop->parent;
|
|
|
|
if (fop->errstr)
|
|
return fop->errstr;
|
|
-
|
|
if (!fop->use_fd) {
|
|
loc1 = &fop->loc[0];
|
|
loc2 = &fop->loc[1];
|
|
@@ -625,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop)
|
|
if (fop->id == GF_FOP_RENAME) {
|
|
gf_asprintf(&fop->errstr,
|
|
"FOP : '%s' failed on '%s' and '%s' with gfids "
|
|
- "%s and %s respectively",
|
|
+ "%s and %s respectively. Parent FOP: %s",
|
|
ec_fop_name(fop->id), loc1->path, loc2->path,
|
|
uuid_utoa_r(loc1->gfid, gfid1),
|
|
- uuid_utoa_r(loc2->gfid, gfid2));
|
|
+ uuid_utoa_r(loc2->gfid, gfid2),
|
|
+ parent ? ec_fop_name(parent->id) : "No Parent");
|
|
} else {
|
|
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
|
|
- ec_fop_name(fop->id), loc1->path,
|
|
- uuid_utoa_r(loc1->gfid, gfid1));
|
|
+ gf_asprintf(
|
|
+ &fop->errstr,
|
|
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
|
|
+ ec_fop_name(fop->id), loc1->path,
|
|
+ uuid_utoa_r(loc1->gfid, gfid1),
|
|
+ parent ? ec_fop_name(parent->id) : "No Parent");
|
|
}
|
|
} else {
|
|
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
|
|
- ec_fop_name(fop->id),
|
|
- uuid_utoa_r(fop->fd->inode->gfid, gfid1));
|
|
+ gf_asprintf(
|
|
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
|
|
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
|
|
+ parent ? ec_fop_name(parent->id) : "No Parent");
|
|
}
|
|
return fop->errstr;
|
|
}
|
|
|
|
+static void
|
|
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
|
|
+ int32_t loglevel)
|
|
+{
|
|
+ ec_t *ec = fop->xl->private;
|
|
+ char str1[32], str2[32], str3[32];
|
|
+
|
|
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
|
|
+ "Insufficient available children for this request: "
|
|
+ "Have : %d, Need : %u : Child UP : %s "
|
|
+ "Mask: %s, Healing : %s : %s ",
|
|
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
|
|
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
|
|
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
|
|
+ ec_msg_str(fop));
|
|
+}
|
|
+
|
|
static int32_t
|
|
ec_child_select(ec_fop_data_t *fop)
|
|
{
|
|
@@ -699,11 +723,7 @@ ec_child_select(ec_fop_data_t *fop)
|
|
ec_trace("SELECT", fop, "");
|
|
|
|
if ((num < fop->minimum) && (num < ec->fragments)) {
|
|
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
|
|
- "Insufficient available children "
|
|
- "for this request (have %d, need "
|
|
- "%d). %s",
|
|
- num, fop->minimum, ec_msg_str(fop));
|
|
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
|
|
return 0;
|
|
}
|
|
|
|
@@ -711,11 +731,7 @@ ec_child_select(ec_fop_data_t *fop)
|
|
(fop->locks[0].update[EC_DATA_TXN] ||
|
|
fop->locks[0].update[EC_METADATA_TXN])) {
|
|
if (ec->quorum_count && (num < ec->quorum_count)) {
|
|
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
|
|
- "Insufficient available children "
|
|
- "for this request (have %d, need "
|
|
- "%d). %s",
|
|
- num, ec->quorum_count, ec_msg_str(fop));
|
|
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
|
|
return 0;
|
|
}
|
|
}
|
|
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
|
|
index a930089..047cdd8 100644
|
|
--- a/xlators/cluster/ec/src/ec.c
|
|
+++ b/xlators/cluster/ec/src/ec.c
|
|
@@ -325,13 +325,18 @@ ec_get_event_from_state(ec_t *ec)
|
|
void
|
|
ec_up(xlator_t *this, ec_t *ec)
|
|
{
|
|
+ char str1[32], str2[32];
|
|
+
|
|
if (ec->timer != NULL) {
|
|
gf_timer_call_cancel(this->ctx, ec->timer);
|
|
ec->timer = NULL;
|
|
}
|
|
|
|
ec->up = 1;
|
|
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
|
|
+ "Going UP : Child UP = %s Child Notify = %s",
|
|
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
|
|
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
|
|
|
|
gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
|
|
}
|
|
@@ -339,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec)
|
|
void
|
|
ec_down(xlator_t *this, ec_t *ec)
|
|
{
|
|
+ char str1[32], str2[32];
|
|
+
|
|
if (ec->timer != NULL) {
|
|
gf_timer_call_cancel(this->ctx, ec->timer);
|
|
ec->timer = NULL;
|
|
}
|
|
|
|
ec->up = 0;
|
|
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
|
|
+ "Going DOWN : Child UP = %s Child Notify = %s",
|
|
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
|
|
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
|
|
|
|
gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
|
|
}
|
|
--
|
|
1.8.3.1
|
|
|