139 lines
4.9 KiB
Diff
139 lines
4.9 KiB
Diff
|
From a5da8bb830e86b6dd77a06cd59d220052e80b21c Mon Sep 17 00:00:00 2001
|
||
|
From: Tamar Shacked <tshacked@redhat.com>
|
||
|
Date: Sun, 6 Jun 2021 11:57:06 +0300
|
||
|
Subject: [PATCH 580/584] cluster/dht: suppress file migration error for node
|
||
|
not supposed to migrate file
|
||
|
|
||
|
A rebalance process does a lookup for every file in the dir it is processing
|
||
|
before checking if it supposed to migrate the file.
|
||
|
In this issue there are two rebalance processses running on a replica subvol:
|
||
|
R1 is migrating the FILE.
|
||
|
R2 is not supposed to migrate the FILE, but it does a lookup and
|
||
|
finds a stale linkfile which is mostly due to a stale layout.
|
||
|
Then, it tries to unlink the stale linkfile and gets EBUSY
|
||
|
as the linkfile fd is open due R1 migration.
|
||
|
As a result a misleading error msg about FILE migration failure
|
||
|
due EBUSY is logged in R2 logfile.
|
||
|
|
||
|
Fix:
|
||
|
suppress the error in case it occured in a node that
|
||
|
is not supposed to migrate the file.
|
||
|
|
||
|
Backport of:
|
||
|
> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24712/
|
||
|
> fixes: #1371
|
||
|
> Change-Id: I37832b404e2b0cc40ac5caf45f14c32c891e71f3
|
||
|
> Signed-off-by: Tamar Shacked <tshacked@redhat.com>
|
||
|
|
||
|
BUG: 1815462
|
||
|
Signed-off-by: Tamar Shacked <tshacked@redhat.com>
|
||
|
Change-Id: I915ee8e7470d85a849b198bfa7d58d368a246aae
|
||
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245401
|
||
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||
|
---
|
||
|
xlators/cluster/dht/src/dht-rebalance.c | 38 ++++++++++++++++++++++-----------
|
||
|
1 file changed, 25 insertions(+), 13 deletions(-)
|
||
|
|
||
|
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
|
||
|
index e07dec0..cc0f2c9 100644
|
||
|
--- a/xlators/cluster/dht/src/dht-rebalance.c
|
||
|
+++ b/xlators/cluster/dht/src/dht-rebalance.c
|
||
|
@@ -2604,10 +2604,10 @@ out:
|
||
|
* all hardlinks.
|
||
|
*/
|
||
|
|
||
|
-int
|
||
|
+gf_boolean_t
|
||
|
gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
|
||
|
{
|
||
|
- int ret = 0;
|
||
|
+ gf_boolean_t ret = _gf_false;
|
||
|
int i = local_subvol_index;
|
||
|
char *str = NULL;
|
||
|
uint32_t hashval = 0;
|
||
|
@@ -2629,12 +2629,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
|
||
|
}
|
||
|
|
||
|
str = uuid_utoa_r(gfid, buf);
|
||
|
- ret = dht_hash_compute(this, 0, str, &hashval);
|
||
|
- if (ret == 0) {
|
||
|
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
|
||
|
index = (hashval % entry->count);
|
||
|
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
|
||
|
/* Index matches this node's nodeuuid.*/
|
||
|
- ret = 1;
|
||
|
+ ret = _gf_true;
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
@@ -2647,12 +2646,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
|
||
|
/* None of the bricks in the subvol are up.
|
||
|
* CHILD_DOWN will kill the process soon */
|
||
|
|
||
|
- return 0;
|
||
|
+ return _gf_false;
|
||
|
}
|
||
|
|
||
|
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
|
||
|
/* Index matches this node's nodeuuid.*/
|
||
|
- ret = 1;
|
||
|
+ ret = _gf_true;
|
||
|
goto out;
|
||
|
}
|
||
|
}
|
||
|
@@ -2701,6 +2700,7 @@ gf_defrag_migrate_single_file(void *opaque)
|
||
|
struct iatt *iatt_ptr = NULL;
|
||
|
gf_boolean_t update_skippedcount = _gf_true;
|
||
|
int i = 0;
|
||
|
+ gf_boolean_t should_i_migrate = 0;
|
||
|
|
||
|
rebal_entry = (struct dht_container *)opaque;
|
||
|
if (!rebal_entry) {
|
||
|
@@ -2754,11 +2754,29 @@ gf_defrag_migrate_single_file(void *opaque)
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
+ should_i_migrate = gf_defrag_should_i_migrate(
|
||
|
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
|
||
|
+
|
||
|
gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
|
||
|
|
||
|
gf_uuid_copy(entry_loc.pargfid, loc->gfid);
|
||
|
|
||
|
ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
|
||
|
+
|
||
|
+ if (!should_i_migrate) {
|
||
|
+ /* this node isn't supposed to migrate the file. suppressing any
|
||
|
+ * potential error from lookup as this file is under migration by
|
||
|
+ * another node */
|
||
|
+ if (ret) {
|
||
|
+ gf_msg_debug(this->name, -ret,
|
||
|
+ "Ignoring lookup failure: node isn't migrating %s",
|
||
|
+ entry_loc.path);
|
||
|
+ ret = 0;
|
||
|
+ }
|
||
|
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
if (ret) {
|
||
|
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
|
||
|
"Migrate file failed: %s lookup failed", entry_loc.path);
|
||
|
@@ -2779,12 +2797,6 @@ gf_defrag_migrate_single_file(void *opaque)
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
|
||
|
- entry->d_stat.ia_gfid)) {
|
||
|
- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
|
||
|
- goto out;
|
||
|
- }
|
||
|
-
|
||
|
iatt_ptr = &iatt;
|
||
|
|
||
|
hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
|
||
|
--
|
||
|
1.8.3.1
|
||
|
|