glusterfs/0437-geo-rep-Add-more-intelligence-to-automatic-error-han.patch
Milind Changire b7dd6f45c1 autobuild v3.12.2-26
Resolves: bz#1479446 bz#1520882 bz#1579758 bz#1598407 bz#1599808
Resolves: bz#1603118 bz#1619357 bz#1622001 bz#1622308 bz#1631166
Resolves: bz#1631418 bz#1632563 bz#1634649 bz#1635071 bz#1635100
Resolves: bz#1635136 bz#1636291 bz#1638069 bz#1640347 bz#1642854
Resolves: bz#1643035 bz#1644120 bz#1644279 bz#1645916 bz#1647675
Signed-off-by: Milind Changire <mchangir@redhat.com>
2018-11-08 22:30:35 -05:00

145 lines
7.4 KiB
Diff

From 85da98b9c54889139822b5c3d351a0249abf75b0 Mon Sep 17 00:00:00 2001
From: Kotresh HR <khiremat@redhat.com>
Date: Fri, 26 Oct 2018 03:45:46 -0400
Subject: [PATCH 437/444] geo-rep: Add more intelligence to automatic error
handling
Geo-rep's automatic error handling does gfid conflict
resolution. But if there are ENOENT errors because the
parent is not synced to slave, it doesn' handle them.
This patch adds the intelligence to create missing
parent directories on slave. It can create the missing
directories upto the depth of 10.
Backport of:
> Patch: https://review.gluster.org/21498
> fixes: bz#1643402
> Change-Id: Ic97ed1fa5899c087e404d559e04f7963ed7bb54c
> Signed-off-by: Kotresh HR <khiremat@redhat.com>
BUG: 1638069
Change-Id: Ic97ed1fa5899c087e404d559e04f7963ed7bb54c
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/155039
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
Reviewed-by: Sunny Kumar <sunkumar@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
geo-replication/syncdaemon/master.py | 68 ++++++++++++++++++++++++------------
1 file changed, 46 insertions(+), 22 deletions(-)
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index cd135df..bdb4da2 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -693,7 +693,7 @@ class GMasterChangelogMixin(GMasterCommon):
TYPE_ENTRY = "E "
MAX_EF_RETRIES = 10
- MAX_OE_RETRIES = 5
+ MAX_OE_RETRIES = 10
# flat directory hierarchy for gfid based access
FLAT_DIR_HIERARCHY = '.'
@@ -836,11 +836,12 @@ class GMasterChangelogMixin(GMasterCommon):
# The file exists on master but with different name.
# Probably renamed and got missed during xsync crawl.
elif failure[2]['slave_isdir']:
- realpath = os.readlink(os.path.join(gconf.local_path,
- ".glusterfs",
- slave_gfid[0:2],
- slave_gfid[2:4],
- slave_gfid))
+ realpath = os.readlink(os.path.join(
+ gconf.local_path,
+ ".glusterfs",
+ slave_gfid[0:2],
+ slave_gfid[2:4],
+ slave_gfid))
dst_entry = os.path.join(pfx, realpath.split('/')[-2],
realpath.split('/')[-1])
src_entry = pbname
@@ -881,25 +882,37 @@ class GMasterChangelogMixin(GMasterCommon):
gfid=failure[2]['slave_gfid'],
entry=pbname))
elif failure[1] == ENOENT:
- # Ignore ENOENT error for fix_entry_ops aka retry_count > 1
- if retry_count > 1:
- logging.info(lf('ENOENT error while fixing entry ops. '
- 'Safe to ignore, take out entry',
+ if op in ['RENAME']:
+ pbname = failure[0]['entry1']
+ else:
+ pbname = failure[0]['entry']
+
+ pargfid = pbname.split('/')[1]
+ st = lstat(os.path.join(pfx, pargfid))
+ # Safe to ignore the failure as master doesn't contain
+ # parent directory.
+ if isinstance(st, int):
+ logging.info(lf('Fixing ENOENT error in slave. Parent '
+ 'does not exist on master. Safe to '
+ 'ignore, take out entry',
retry_count=retry_count,
entry=repr(failure)))
entries.remove(failure[0])
- elif op in ('MKNOD', 'CREATE', 'MKDIR'):
- pargfid = pbname.split('/')[1]
- st = lstat(os.path.join(pfx, pargfid))
- # Safe to ignore the failure as master doesn't contain
- # parent directory.
- if isinstance(st, int):
- logging.info(lf('Fixing ENOENT error in slave. Parent '
- 'does not exist on master. Safe to '
- 'ignore, take out entry',
- retry_count=retry_count,
- entry=repr(failure)))
- entries.remove(failure[0])
+ else:
+ logging.info(lf('Fixing ENOENT error in slave. Create '
+ 'parent directory on slave.',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ realpath = os.readlink(os.path.join(gconf.local_path,
+ ".glusterfs",
+ pargfid[0:2],
+ pargfid[2:4],
+ pargfid))
+ dir_entry = os.path.join(pfx, realpath.split('/')[-2],
+ realpath.split('/')[-1])
+ fix_entry_ops.append(
+ edct('MKDIR', gfid=pargfid, entry=dir_entry,
+ mode=st.st_mode, uid=st.st_uid, gid=st.st_gid))
if fix_entry_ops:
# Process deletions of entries whose gfids are mismatched
@@ -1077,6 +1090,11 @@ class GMasterChangelogMixin(GMasterCommon):
os.path.join(pfx, ec[self.POS_ENTRY1 - 1]))
entries.append(edct(ty, gfid=gfid, entry=e1, entry1=en,
stat=st, link=rl))
+ # If src doesn't exist while doing rename, destination
+ # is created. If data is not followed by rename, this
+ # remains zero byte file on slave. Hence add data entry
+ # for renames
+ datas.add(os.path.join(pfx, gfid))
else:
# stat() to get mode and other information
if not matching_disk_gfid(gfid, en):
@@ -1100,6 +1118,12 @@ class GMasterChangelogMixin(GMasterCommon):
rl = None
entries.append(edct(ty, stat=st, entry=en, gfid=gfid,
link=rl))
+ # If src doesn't exist while doing link, destination
+ # is created based on file type. If data is not
+ # followed by link, this remains zero byte file on
+ # slave. Hence add data entry for links
+ if rl is None:
+ datas.add(os.path.join(pfx, gfid))
elif ty == 'SYMLINK':
rl = errno_wrap(os.readlink, [en], [ENOENT],
[ESTALE, EINTR])
--
1.8.3.1