glusterfs/0580-cluster-dht-suppress-file-migration-error-for-node-n.patch
2023-02-27 13:17:02 -05:00

139 lines
4.9 KiB
Diff

From a5da8bb830e86b6dd77a06cd59d220052e80b21c Mon Sep 17 00:00:00 2001
From: Tamar Shacked <tshacked@redhat.com>
Date: Sun, 6 Jun 2021 11:57:06 +0300
Subject: [PATCH 580/584] cluster/dht: suppress file migration error for node
not supposed to migrate file
A rebalance process does a lookup for every file in the dir it is processing
before checking if it supposed to migrate the file.
In this issue there are two rebalance processses running on a replica subvol:
R1 is migrating the FILE.
R2 is not supposed to migrate the FILE, but it does a lookup and
finds a stale linkfile which is mostly due to a stale layout.
Then, it tries to unlink the stale linkfile and gets EBUSY
as the linkfile fd is open due R1 migration.
As a result a misleading error msg about FILE migration failure
due EBUSY is logged in R2 logfile.
Fix:
suppress the error in case it occured in a node that
is not supposed to migrate the file.
Backport of:
> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24712/
> fixes: #1371
> Change-Id: I37832b404e2b0cc40ac5caf45f14c32c891e71f3
> Signed-off-by: Tamar Shacked <tshacked@redhat.com>
BUG: 1815462
Signed-off-by: Tamar Shacked <tshacked@redhat.com>
Change-Id: I915ee8e7470d85a849b198bfa7d58d368a246aae
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245401
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/dht/src/dht-rebalance.c | 38 ++++++++++++++++++++++-----------
1 file changed, 25 insertions(+), 13 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index e07dec0..cc0f2c9 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2604,10 +2604,10 @@ out:
* all hardlinks.
*/
-int
+gf_boolean_t
gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
{
- int ret = 0;
+ gf_boolean_t ret = _gf_false;
int i = local_subvol_index;
char *str = NULL;
uint32_t hashval = 0;
@@ -2629,12 +2629,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
}
str = uuid_utoa_r(gfid, buf);
- ret = dht_hash_compute(this, 0, str, &hashval);
- if (ret == 0) {
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
index = (hashval % entry->count);
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
@@ -2647,12 +2646,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
/* None of the bricks in the subvol are up.
* CHILD_DOWN will kill the process soon */
- return 0;
+ return _gf_false;
}
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
}
@@ -2701,6 +2700,7 @@ gf_defrag_migrate_single_file(void *opaque)
struct iatt *iatt_ptr = NULL;
gf_boolean_t update_skippedcount = _gf_true;
int i = 0;
+ gf_boolean_t should_i_migrate = 0;
rebal_entry = (struct dht_container *)opaque;
if (!rebal_entry) {
@@ -2754,11 +2754,29 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
+ should_i_migrate = gf_defrag_should_i_migrate(
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
+
gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
gf_uuid_copy(entry_loc.pargfid, loc->gfid);
ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+
+ if (!should_i_migrate) {
+ /* this node isn't supposed to migrate the file. suppressing any
+ * potential error from lookup as this file is under migration by
+ * another node */
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Ignoring lookup failure: node isn't migrating %s",
+ entry_loc.path);
+ ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+ goto out;
+ }
+
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
"Migrate file failed: %s lookup failed", entry_loc.path);
@@ -2779,12 +2797,6 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
- entry->d_stat.ia_gfid)) {
- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
- goto out;
- }
-
iatt_ptr = &iatt;
hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
--
1.8.3.1