autobuild v6.0-15

Resolves: bz#1726000 bz#1731826 bz#1754407 bz#1754790 bz#1755227
Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
This commit is contained in:
Rinku Kothiya 2019-09-27 05:19:05 -04:00
parent 4db34b7f8e
commit 7680b8e076
6 changed files with 419 additions and 1 deletions

View File

@ -0,0 +1,56 @@
From f65f4739914cf317da7e5eaa3b5a06fe64f338c2 Mon Sep 17 00:00:00 2001
From: Sunny Kumar <sunkumar@redhat.com>
Date: Sat, 21 Sep 2019 01:07:30 +0530
Subject: [PATCH 298/302] geo-rep : fix sub-command during worker connection
Problem:
Geo-rep session for non-root going faulty.
Solution:
During worker start we do not construct slave url and use 'args.resource_remote'
which is basically just slave-hostname.
This works better for root session but fails in non-root session during
ssh command.
Using slave url solves this issue.
Backport of:
>fixes: bz#1753928
>Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22
>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Upstream patch:
https://review.gluster.org/#/c/glusterfs/+/23465/
BUG: 1754407
Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/181895
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
geo-replication/syncdaemon/subcmds.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py
index 8de7db2..f8515f2 100644
--- a/geo-replication/syncdaemon/subcmds.py
+++ b/geo-replication/syncdaemon/subcmds.py
@@ -73,8 +73,11 @@ def subcmd_worker(args):
Popen.init_errhandler()
fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
local = GLUSTER("localhost", args.master)
- slavevol = args.slave.split("::")[-1]
- slavehost = args.resource_remote
+ slave_url, slavevol = args.slave.split("::")
+ if "@" not in slave_url:
+ slavehost = args.resource_remote
+ else:
+ slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote)
remote = SSH(slavehost, slavevol)
remote.connect_remote()
local.connect()
--
1.8.3.1

View File

@ -0,0 +1,156 @@
From f293f7ac2f75c58d81da1229b484eb530b7083b5 Mon Sep 17 00:00:00 2001
From: Sunny Kumar <sunkumar@redhat.com>
Date: Fri, 20 Sep 2019 09:39:12 +0530
Subject: [PATCH 299/302] geo-rep: performance improvement while syncing
renames with existing gfid
Problem:
The bug[1] addresses issue of data inconsistency when handling RENAME with
existing destination. This fix requires some performance tuning considering
this issue occurs in heavy rename workload.
Solution:
If distribution count for master volume is one do not verify op's on
master and go ahead with rename.
The performance improvement with this patch can only be observed if
master volume has distribution count one.
[1]. https://bugzilla.redhat.com/show_bug.cgi?id=1694820
Backport of:
>fixes: bz#1753857
>Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b
>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Upstream Patch:
https://review.gluster.org/#/c/glusterfs/+/23459/
BUG: 1726000
Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/181893
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
geo-replication/gsyncd.conf.in | 5 +++++
geo-replication/syncdaemon/gsyncd.py | 2 ++
geo-replication/syncdaemon/monitor.py | 2 ++
geo-replication/syncdaemon/resource.py | 13 +++++++++++--
geo-replication/syncdaemon/syncdutils.py | 11 +++++++++++
5 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in
index 5ebd57a..9155cd8 100644
--- a/geo-replication/gsyncd.conf.in
+++ b/geo-replication/gsyncd.conf.in
@@ -23,6 +23,11 @@ configurable=false
type=int
value=1
+[master-distribution-count]
+configurable=false
+type=int
+value=1
+
[glusterd-workdir]
value = @GLUSTERD_WORKDIR@
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index a4c6f32..6ae5269 100644
--- a/geo-replication/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -134,6 +134,8 @@ def main():
help="Directory where Gluster binaries exist on slave")
p.add_argument("--slave-access-mount", action="store_true",
help="Do not lazy umount the slave volume")
+ p.add_argument("--master-dist-count", type=int,
+ help="Master Distribution count")
# Status
p = sp.add_parser("status")
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index 234f3f1..236afe7 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -37,6 +37,8 @@ def get_subvol_num(brick_idx, vol, hot):
tier = vol.is_tier()
disperse_count = vol.disperse_count(tier, hot)
replica_count = vol.replica_count(tier, hot)
+ distribute_count = vol.distribution_count(tier, hot)
+ gconf.setconfig("master-distribution-count", distribute_count)
if (tier and not hot):
brick_idx = brick_idx - vol.get_hot_bricks_count(tier)
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index b16db60..189d8a1 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -377,6 +377,7 @@ class Server(object):
def entry_ops(cls, entries):
pfx = gauxpfx()
logging.debug('entries: %s' % repr(entries))
+ dist_count = rconf.args.master_dist_count
def entry_purge(op, entry, gfid, e, uid, gid):
# This is an extremely racy code and needs to be fixed ASAP.
@@ -686,9 +687,15 @@ class Server(object):
raise
else:
raise
- elif not matching_disk_gfid(gfid, en):
+ elif not matching_disk_gfid(gfid, en) and dist_count > 1:
collect_failure(e, EEXIST, uid, gid, True)
else:
+ # We are here which means matching_disk_gfid for
+ # both source and destination has returned false
+ # and distribution count for master vol is greater
+ # then one. Which basically says both the source and
+ # destination exist and not hardlinks.
+ # So we are safe to go ahead with rename here.
rename_with_disk_gfid_confirmation(gfid, entry, en,
uid, gid)
if blob:
@@ -1409,7 +1416,9 @@ class SSH(object):
'--slave-gluster-log-level',
gconf.get("slave-gluster-log-level"),
'--slave-gluster-command-dir',
- gconf.get("slave-gluster-command-dir")]
+ gconf.get("slave-gluster-command-dir"),
+ '--master-dist-count',
+ str(gconf.get("master-distribution-count"))]
if gconf.get("slave-access-mount"):
args_to_slave.append('--slave-access-mount')
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index 2ee10ac..aadaebd 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -926,6 +926,14 @@ class Volinfo(object):
else:
return int(self.get('disperseCount')[0].text)
+ def distribution_count(self, tier, hot):
+ if (tier and hot):
+ return int(self.get('hotBricks/hotdistCount')[0].text)
+ elif (tier and not hot):
+ return int(self.get('coldBricks/colddistCount')[0].text)
+ else:
+ return int(self.get('distCount')[0].text)
+
@property
@memoize
def hot_bricks(self):
@@ -994,6 +1002,9 @@ class VolinfoFromGconf(object):
def disperse_count(self, tier, hot):
return gconf.get("master-disperse-count")
+ def distribution_count(self, tier, hot):
+ return gconf.get("master-distribution-count")
+
@property
@memoize
def hot_bricks(self):
--
1.8.3.1

View File

@ -0,0 +1,70 @@
From 039a3f81209706261fc809eac94564e81a3377da Mon Sep 17 00:00:00 2001
From: Sanju Rakonde <srakonde@redhat.com>
Date: Wed, 25 Sep 2019 14:55:19 +0530
Subject: [PATCH 300/302] cli: remove the warning displayed when remove brick
start issued
remove-brick start command gives displays below error:
It is recommended that remove-brick be run with cluster.force-migration
option disabled to prevent possible data corruption. Doing so will ensure
that files that receive writes during migration will not be migrated and
will need to be manually copied after the remove-brick commit operation.
Please check the value of the option and update accordingly.
Do you want to continue with your current cluster.force-migration settings? (y/n)
As we are not qualifying cluster.force-migration for 3.5.0,
we should not display this message. So, removing it.
Label: DOWNSTREAM ONLY
BUG: 1755227
Change-Id: I409f2059d43c5e867788f19d2ccb8d6d839520f7
fixes: bz#1755227
Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/182009
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
cli/src/cli-cmd-parser.c | 2 --
cli/src/cli-cmd-volume.c | 11 -----------
2 files changed, 13 deletions(-)
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 92ceb8e..4456a7b 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -2101,8 +2101,6 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
wordcount--;
if (!strcmp("start", w)) {
command = GF_OP_CMD_START;
- if (question)
- *question = 1;
} else if (!strcmp("commit", w)) {
command = GF_OP_CMD_COMMIT;
} else if (!strcmp("stop", w)) {
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index a42e663..6b958bd 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -2088,17 +2088,6 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state,
"Remove-brick force will not migrate files from the "
"removed bricks, so they will no longer be available"
" on the volume.\nDo you want to continue?";
- } else if (command == GF_OP_CMD_START) {
- question =
- "It is recommended that remove-brick be run with"
- " cluster.force-migration option disabled to prevent"
- " possible data corruption. Doing so will ensure that"
- " files that receive writes during migration will not"
- " be migrated and will need to be manually copied"
- " after the remove-brick commit operation. Please"
- " check the value of the option and update accordingly."
- " \nDo you want to continue with your current"
- " cluster.force-migration settings?";
}
if (!brick_count) {
--
1.8.3.1

View File

@ -0,0 +1,61 @@
From 913a0dc8f1eaa2fb18a6ebd6fcf66f46b48039f1 Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawal@redhat.com>
Date: Wed, 18 Sep 2019 19:11:33 +0530
Subject: [PATCH 301/302] posix: Brick is going down unexpectedly
Problem: In brick_mux environment, while multiple volumes are
created (1-1000) sometimes brick is going down due to
health_check thread failure
Solution: Ignore EAGAIN error in health_check thread code to
avoid the issue
> Change-Id: Id44c59f8e071a363a14d09d188813a6633855213
> Fixes: bz#1751907
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
> (Cherry picked from commit c4d926900dc36f71c04b3f65ceca5150ce0e8c81)
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23437/)
Change-Id: Id44c59f8e071a363a14d09d188813a6633855213
BUG: 1731826
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/182106
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/storage/posix/src/posix-helpers.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 6a1a35c..35dd3b6 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -2108,14 +2108,20 @@ out:
if (fd != -1) {
sys_close(fd);
}
+
if (ret && file_path[0]) {
gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED,
"%s() on %s returned ret is %d error is %s", op, file_path, ret,
ret != -1 ? strerror(ret) : strerror(op_errno));
- gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED,
- "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op,
- file_path, strerror(op_errno), priv->hostname, priv->base_path,
- timeout);
+
+ if ((op_errno == EAGAIN) || (ret == EAGAIN)) {
+ ret = 0;
+ } else {
+ gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED,
+ "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op,
+ file_path, strerror(op_errno), priv->hostname,
+ priv->base_path, timeout);
+ }
}
return ret;
}
--
1.8.3.1

View File

@ -0,0 +1,67 @@
From fb1d503791c874296afab0cd7be59b6865340d72 Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <jahernan@redhat.com>
Date: Wed, 25 Sep 2019 11:56:35 +0200
Subject: [PATCH 302/302] cluster/ec: prevent filling shd log with "table not
found" messages
When self-heal daemon receives an inodelk contention notification, it tries
to locate the related inode using inode_find() and the inode table owned by
top-most xlator, which in this case doesn't have any inode table. This causes
many messages to be logged by inode_find() function because the inode table
passed is NULL.
This patch prevents this by making sure the inode table is not NULL before
calling inode_find().
Upstream patch:
> Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f
> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23481
> Fixes: bz#1755344
> Signed-off-by: Xavi Hernandez <jahernan@redhat.com>
Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f
BUG: 1754790
Signed-off-by: Xavi Hernandez <jahernan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/182207
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/ec/src/ec.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 19094c4..3f31c74 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -463,6 +463,7 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
struct gf_upcall_cache_invalidation *ci = NULL;
struct gf_upcall_inodelk_contention *lc = NULL;
inode_t *inode;
+ inode_table_t *table;
switch (upcall->event_type) {
case GF_UPCALL_CACHE_INVALIDATION:
@@ -476,8 +477,18 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
/* The lock is not owned by EC, ignore it. */
return _gf_true;
}
- inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable,
- upcall->gfid);
+ table = ((xlator_t *)ec->xl->graph->top)->itable;
+ if (table == NULL) {
+ /* Self-heal daemon doesn't have an inode table on the top
+ * xlator because it doesn't need it. In this case we should
+ * use the inode table managed by EC itself where all inodes
+ * being healed should be present. However self-heal doesn't
+ * use eager-locking and inodelk's are already released as
+ * soon as possible. In this case we can safely ignore these
+ * notifications. */
+ return _gf_false;
+ }
+ inode = inode_find(table, upcall->gfid);
/* If inode is not found, it means that it's already released,
* so we can ignore it. Probably it has been released and
* destroyed while the contention notification was being sent.
--
1.8.3.1

View File

@ -231,7 +231,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
%else
Name: glusterfs
Version: 6.0
Release: 14%{?dist}
Release: 15%{?dist}
ExcludeArch: i686
%endif
License: GPLv2 or LGPLv3+
@ -606,6 +606,11 @@ Patch0294: 0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch
Patch0295: 0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch
Patch0296: 0296-glusterfind-pre-command-failure-on-a-modify.patch
Patch0297: 0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch
Patch0298: 0298-geo-rep-fix-sub-command-during-worker-connection.patch
Patch0299: 0299-geo-rep-performance-improvement-while-syncing-rename.patch
Patch0300: 0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch
Patch0301: 0301-posix-Brick-is-going-down-unexpectedly.patch
Patch0302: 0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch
%description
GlusterFS is a distributed file-system capable of scaling to several
@ -2319,6 +2324,9 @@ fi
%endif
%changelog
* Fri Sep 27 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-15
- fixes bugs bz#1726000 bz#1731826 bz#1754407 bz#1754790 bz#1755227
* Fri Sep 20 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-14
- fixes bugs bz#1719171 bz#1728673 bz#1731896 bz#1732443 bz#1733970
bz#1745107 bz#1746027 bz#1748688 bz#1750241 bz#1572163