80 lines
3.5 KiB
Diff
80 lines
3.5 KiB
Diff
|
From f2d3866e617d25ea62cda01afddc81ef0db3356e Mon Sep 17 00:00:00 2001
|
||
|
From: Xavi Hernandez <xhernandez@redhat.com>
|
||
|
Date: Tue, 4 May 2021 22:39:03 +0200
|
||
|
Subject: [PATCH 555/584] geo-rep: Improve handling of gfid mismatches
|
||
|
|
||
|
In some circumstances geo-replication can detect mismatching gfids
|
||
|
between primary and secondary. These entries are fixed in an iterative
|
||
|
way, assuming that after a fix, a previously failing entry could
|
||
|
succeed.
|
||
|
|
||
|
Previous code was trying to fix them in a loop that can be executed
|
||
|
up to 10 times. If some entry cannot be fixed after 10 attempts, it's
|
||
|
discarded. These fixes are very slow, so trying to do them many times
|
||
|
causes geo-replication to get out of sync.
|
||
|
|
||
|
To minimize the number of iterations done, this patch checks if the
|
||
|
number of entries and failures remains constant after each iteration.
|
||
|
If they are constant, it means that nothing else can be fixed, so it
|
||
|
makes no sense to do more iterations. This reduces the number of
|
||
|
iterations to 2 or 3 in most of the cases, improving geo-replication
|
||
|
performance.
|
||
|
|
||
|
Backport of:
|
||
|
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2389
|
||
|
> Fixes: #2388
|
||
|
> Change-Id: I6d9a623a60045694e1a832195e1dc1fb9e88ae54
|
||
|
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
||
|
|
||
|
BUG: 1957191
|
||
|
Change-Id: I6d9a623a60045694e1a832195e1dc1fb9e88ae54
|
||
|
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
||
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244550
|
||
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||
|
---
|
||
|
geo-replication/syncdaemon/master.py | 18 +++++++++++++++++-
|
||
|
1 file changed, 17 insertions(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
|
||
|
index 98637e7..aef9373 100644
|
||
|
--- a/geo-replication/syncdaemon/master.py
|
||
|
+++ b/geo-replication/syncdaemon/master.py
|
||
|
@@ -1224,9 +1224,11 @@ class GMasterChangelogMixin(GMasterCommon):
|
||
|
|
||
|
if gconf.get("gfid-conflict-resolution"):
|
||
|
count = 0
|
||
|
+ num_entries = len(entries)
|
||
|
+ num_failures = len(failures)
|
||
|
if failures:
|
||
|
logging.info(lf('Entry ops failed with gfid mismatch',
|
||
|
- count=len(failures)))
|
||
|
+ count=num_failures))
|
||
|
while failures and count < self.MAX_OE_RETRIES:
|
||
|
count += 1
|
||
|
self.handle_entry_failures(failures, entries)
|
||
|
@@ -1237,6 +1239,20 @@ class GMasterChangelogMixin(GMasterCommon):
|
||
|
"gfid mismatch")
|
||
|
break
|
||
|
|
||
|
+ # If this iteration has not removed any entry or reduced
|
||
|
+ # the number of failures compared to the previous one, we
|
||
|
+ # don't need to keep iterating because we'll get the same
|
||
|
+ # result in all other attempts.
|
||
|
+ if ((num_entries == len(entries)) and
|
||
|
+ (num_failures == len(failures))):
|
||
|
+ logging.info(lf("No more gfid mismatches can be fixed",
|
||
|
+ entries=num_entries,
|
||
|
+ failures=num_failures))
|
||
|
+ break
|
||
|
+
|
||
|
+ num_entries = len(entries)
|
||
|
+ num_failures = len(failures)
|
||
|
+
|
||
|
self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
|
||
|
self.status.dec_value("entry", len(entries))
|
||
|
|
||
|
--
|
||
|
1.8.3.1
|
||
|
|