From 7680b8e076e4f18be2297b822f3a09d8bd4eb985 Mon Sep 17 00:00:00 2001 From: Rinku Kothiya Date: Fri, 27 Sep 2019 05:19:05 -0400 Subject: [PATCH] autobuild v6.0-15 Resolves: bz#1726000 bz#1731826 bz#1754407 bz#1754790 bz#1755227 Signed-off-by: Rinku Kothiya --- ...sub-command-during-worker-connection.patch | 56 +++++++ ...nce-improvement-while-syncing-rename.patch | 156 ++++++++++++++++++ ...arning-displayed-when-remove-brick-s.patch | 70 ++++++++ ...six-Brick-is-going-down-unexpectedly.patch | 61 +++++++ ...nt-filling-shd-log-with-table-not-fo.patch | 67 ++++++++ glusterfs.spec | 10 +- 6 files changed, 419 insertions(+), 1 deletion(-) create mode 100644 0298-geo-rep-fix-sub-command-during-worker-connection.patch create mode 100644 0299-geo-rep-performance-improvement-while-syncing-rename.patch create mode 100644 0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch create mode 100644 0301-posix-Brick-is-going-down-unexpectedly.patch create mode 100644 0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch diff --git a/0298-geo-rep-fix-sub-command-during-worker-connection.patch b/0298-geo-rep-fix-sub-command-during-worker-connection.patch new file mode 100644 index 0000000..72daa15 --- /dev/null +++ b/0298-geo-rep-fix-sub-command-during-worker-connection.patch @@ -0,0 +1,56 @@ +From f65f4739914cf317da7e5eaa3b5a06fe64f338c2 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Sat, 21 Sep 2019 01:07:30 +0530 +Subject: [PATCH 298/302] geo-rep : fix sub-command during worker connection + +Problem: + +Geo-rep session for non-root going faulty. + +Solution: + +During worker start we do not construct slave url and use 'args.resource_remote' +which is basically just slave-hostname. +This works better for root session but fails in non-root session during +ssh command. +Using slave url solves this issue. + +Backport of: + >fixes: bz#1753928 + >Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22 + >Signed-off-by: Sunny Kumar + +Upstream patch: + https://review.gluster.org/#/c/glusterfs/+/23465/ + +BUG: 1754407 +Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22 +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/181895 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/subcmds.py | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py +index 8de7db2..f8515f2 100644 +--- a/geo-replication/syncdaemon/subcmds.py ++++ b/geo-replication/syncdaemon/subcmds.py +@@ -73,8 +73,11 @@ def subcmd_worker(args): + Popen.init_errhandler() + fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + local = GLUSTER("localhost", args.master) +- slavevol = args.slave.split("::")[-1] +- slavehost = args.resource_remote ++ slave_url, slavevol = args.slave.split("::") ++ if "@" not in slave_url: ++ slavehost = args.resource_remote ++ else: ++ slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote) + remote = SSH(slavehost, slavevol) + remote.connect_remote() + local.connect() +-- +1.8.3.1 + diff --git a/0299-geo-rep-performance-improvement-while-syncing-rename.patch b/0299-geo-rep-performance-improvement-while-syncing-rename.patch new file mode 100644 index 0000000..9dea8cc --- /dev/null +++ b/0299-geo-rep-performance-improvement-while-syncing-rename.patch @@ -0,0 +1,156 @@ +From f293f7ac2f75c58d81da1229b484eb530b7083b5 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar +Date: Fri, 20 Sep 2019 09:39:12 +0530 +Subject: [PATCH 299/302] geo-rep: performance improvement while syncing + renames with existing gfid + +Problem: +The bug[1] addresses issue of data inconsistency when handling RENAME with +existing destination. This fix requires some performance tuning considering +this issue occurs in heavy rename workload. + +Solution: +If distribution count for master volume is one do not verify op's on +master and go ahead with rename. + +The performance improvement with this patch can only be observed if +master volume has distribution count one. + +[1]. https://bugzilla.redhat.com/show_bug.cgi?id=1694820 +Backport of: + + >fixes: bz#1753857 + >Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b + >Signed-off-by: Sunny Kumar + +Upstream Patch: + https://review.gluster.org/#/c/glusterfs/+/23459/ + +BUG: 1726000 +Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b +Signed-off-by: Sunny Kumar +Reviewed-on: https://code.engineering.redhat.com/gerrit/181893 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + geo-replication/gsyncd.conf.in | 5 +++++ + geo-replication/syncdaemon/gsyncd.py | 2 ++ + geo-replication/syncdaemon/monitor.py | 2 ++ + geo-replication/syncdaemon/resource.py | 13 +++++++++++-- + geo-replication/syncdaemon/syncdutils.py | 11 +++++++++++ + 5 files changed, 31 insertions(+), 2 deletions(-) + +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index 5ebd57a..9155cd8 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -23,6 +23,11 @@ configurable=false + type=int + value=1 + ++[master-distribution-count] ++configurable=false ++type=int ++value=1 ++ + [glusterd-workdir] + value = @GLUSTERD_WORKDIR@ + +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index a4c6f32..6ae5269 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -134,6 +134,8 @@ def main(): + help="Directory where Gluster binaries exist on slave") + p.add_argument("--slave-access-mount", action="store_true", + help="Do not lazy umount the slave volume") ++ p.add_argument("--master-dist-count", type=int, ++ help="Master Distribution count") + + # Status + p = sp.add_parser("status") +diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py +index 234f3f1..236afe7 100644 +--- a/geo-replication/syncdaemon/monitor.py ++++ b/geo-replication/syncdaemon/monitor.py +@@ -37,6 +37,8 @@ def get_subvol_num(brick_idx, vol, hot): + tier = vol.is_tier() + disperse_count = vol.disperse_count(tier, hot) + replica_count = vol.replica_count(tier, hot) ++ distribute_count = vol.distribution_count(tier, hot) ++ gconf.setconfig("master-distribution-count", distribute_count) + + if (tier and not hot): + brick_idx = brick_idx - vol.get_hot_bricks_count(tier) +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index b16db60..189d8a1 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -377,6 +377,7 @@ class Server(object): + def entry_ops(cls, entries): + pfx = gauxpfx() + logging.debug('entries: %s' % repr(entries)) ++ dist_count = rconf.args.master_dist_count + + def entry_purge(op, entry, gfid, e, uid, gid): + # This is an extremely racy code and needs to be fixed ASAP. +@@ -686,9 +687,15 @@ class Server(object): + raise + else: + raise +- elif not matching_disk_gfid(gfid, en): ++ elif not matching_disk_gfid(gfid, en) and dist_count > 1: + collect_failure(e, EEXIST, uid, gid, True) + else: ++ # We are here which means matching_disk_gfid for ++ # both source and destination has returned false ++ # and distribution count for master vol is greater ++ # then one. Which basically says both the source and ++ # destination exist and not hardlinks. ++ # So we are safe to go ahead with rename here. + rename_with_disk_gfid_confirmation(gfid, entry, en, + uid, gid) + if blob: +@@ -1409,7 +1416,9 @@ class SSH(object): + '--slave-gluster-log-level', + gconf.get("slave-gluster-log-level"), + '--slave-gluster-command-dir', +- gconf.get("slave-gluster-command-dir")] ++ gconf.get("slave-gluster-command-dir"), ++ '--master-dist-count', ++ str(gconf.get("master-distribution-count"))] + + if gconf.get("slave-access-mount"): + args_to_slave.append('--slave-access-mount') +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index 2ee10ac..aadaebd 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -926,6 +926,14 @@ class Volinfo(object): + else: + return int(self.get('disperseCount')[0].text) + ++ def distribution_count(self, tier, hot): ++ if (tier and hot): ++ return int(self.get('hotBricks/hotdistCount')[0].text) ++ elif (tier and not hot): ++ return int(self.get('coldBricks/colddistCount')[0].text) ++ else: ++ return int(self.get('distCount')[0].text) ++ + @property + @memoize + def hot_bricks(self): +@@ -994,6 +1002,9 @@ class VolinfoFromGconf(object): + def disperse_count(self, tier, hot): + return gconf.get("master-disperse-count") + ++ def distribution_count(self, tier, hot): ++ return gconf.get("master-distribution-count") ++ + @property + @memoize + def hot_bricks(self): +-- +1.8.3.1 + diff --git a/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch b/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch new file mode 100644 index 0000000..62bac41 --- /dev/null +++ b/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch @@ -0,0 +1,70 @@ +From 039a3f81209706261fc809eac94564e81a3377da Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Wed, 25 Sep 2019 14:55:19 +0530 +Subject: [PATCH 300/302] cli: remove the warning displayed when remove brick + start issued + +remove-brick start command gives displays below error: + +It is recommended that remove-brick be run with cluster.force-migration +option disabled to prevent possible data corruption. Doing so will ensure +that files that receive writes during migration will not be migrated and +will need to be manually copied after the remove-brick commit operation. +Please check the value of the option and update accordingly. +Do you want to continue with your current cluster.force-migration settings? (y/n) + +As we are not qualifying cluster.force-migration for 3.5.0, +we should not display this message. So, removing it. + +Label: DOWNSTREAM ONLY + +BUG: 1755227 +Change-Id: I409f2059d43c5e867788f19d2ccb8d6d839520f7 +fixes: bz#1755227 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/182009 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + cli/src/cli-cmd-parser.c | 2 -- + cli/src/cli-cmd-volume.c | 11 ----------- + 2 files changed, 13 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 92ceb8e..4456a7b 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -2101,8 +2101,6 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words, + wordcount--; + if (!strcmp("start", w)) { + command = GF_OP_CMD_START; +- if (question) +- *question = 1; + } else if (!strcmp("commit", w)) { + command = GF_OP_CMD_COMMIT; + } else if (!strcmp("stop", w)) { +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index a42e663..6b958bd 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -2088,17 +2088,6 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state, + "Remove-brick force will not migrate files from the " + "removed bricks, so they will no longer be available" + " on the volume.\nDo you want to continue?"; +- } else if (command == GF_OP_CMD_START) { +- question = +- "It is recommended that remove-brick be run with" +- " cluster.force-migration option disabled to prevent" +- " possible data corruption. Doing so will ensure that" +- " files that receive writes during migration will not" +- " be migrated and will need to be manually copied" +- " after the remove-brick commit operation. Please" +- " check the value of the option and update accordingly." +- " \nDo you want to continue with your current" +- " cluster.force-migration settings?"; + } + + if (!brick_count) { +-- +1.8.3.1 + diff --git a/0301-posix-Brick-is-going-down-unexpectedly.patch b/0301-posix-Brick-is-going-down-unexpectedly.patch new file mode 100644 index 0000000..270a0d7 --- /dev/null +++ b/0301-posix-Brick-is-going-down-unexpectedly.patch @@ -0,0 +1,61 @@ +From 913a0dc8f1eaa2fb18a6ebd6fcf66f46b48039f1 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Wed, 18 Sep 2019 19:11:33 +0530 +Subject: [PATCH 301/302] posix: Brick is going down unexpectedly + +Problem: In brick_mux environment, while multiple volumes are + created (1-1000) sometimes brick is going down due to + health_check thread failure + +Solution: Ignore EAGAIN error in health_check thread code to + avoid the issue + +> Change-Id: Id44c59f8e071a363a14d09d188813a6633855213 +> Fixes: bz#1751907 +> Signed-off-by: Mohit Agrawal +> (Cherry picked from commit c4d926900dc36f71c04b3f65ceca5150ce0e8c81) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23437/) + +Change-Id: Id44c59f8e071a363a14d09d188813a6633855213 +BUG: 1731826 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/182106 +Tested-by: Mohit Agrawal +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix-helpers.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 6a1a35c..35dd3b6 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -2108,14 +2108,20 @@ out: + if (fd != -1) { + sys_close(fd); + } ++ + if (ret && file_path[0]) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED, + "%s() on %s returned ret is %d error is %s", op, file_path, ret, + ret != -1 ? strerror(ret) : strerror(op_errno)); +- gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, +- "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, +- file_path, strerror(op_errno), priv->hostname, priv->base_path, +- timeout); ++ ++ if ((op_errno == EAGAIN) || (ret == EAGAIN)) { ++ ret = 0; ++ } else { ++ gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, ++ "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, ++ file_path, strerror(op_errno), priv->hostname, ++ priv->base_path, timeout); ++ } + } + return ret; + } +-- +1.8.3.1 + diff --git a/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch b/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch new file mode 100644 index 0000000..7972767 --- /dev/null +++ b/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch @@ -0,0 +1,67 @@ +From fb1d503791c874296afab0cd7be59b6865340d72 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Wed, 25 Sep 2019 11:56:35 +0200 +Subject: [PATCH 302/302] cluster/ec: prevent filling shd log with "table not + found" messages + +When self-heal daemon receives an inodelk contention notification, it tries +to locate the related inode using inode_find() and the inode table owned by +top-most xlator, which in this case doesn't have any inode table. This causes +many messages to be logged by inode_find() function because the inode table +passed is NULL. + +This patch prevents this by making sure the inode table is not NULL before +calling inode_find(). + +Upstream patch: +> Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23481 +> Fixes: bz#1755344 +> Signed-off-by: Xavi Hernandez + +Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f +BUG: 1754790 +Signed-off-by: Xavi Hernandez +Reviewed-on: https://code.engineering.redhat.com/gerrit/182207 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/ec/src/ec.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 19094c4..3f31c74 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -463,6 +463,7 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall) + struct gf_upcall_cache_invalidation *ci = NULL; + struct gf_upcall_inodelk_contention *lc = NULL; + inode_t *inode; ++ inode_table_t *table; + + switch (upcall->event_type) { + case GF_UPCALL_CACHE_INVALIDATION: +@@ -476,8 +477,18 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall) + /* The lock is not owned by EC, ignore it. */ + return _gf_true; + } +- inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable, +- upcall->gfid); ++ table = ((xlator_t *)ec->xl->graph->top)->itable; ++ if (table == NULL) { ++ /* Self-heal daemon doesn't have an inode table on the top ++ * xlator because it doesn't need it. In this case we should ++ * use the inode table managed by EC itself where all inodes ++ * being healed should be present. However self-heal doesn't ++ * use eager-locking and inodelk's are already released as ++ * soon as possible. In this case we can safely ignore these ++ * notifications. */ ++ return _gf_false; ++ } ++ inode = inode_find(table, upcall->gfid); + /* If inode is not found, it means that it's already released, + * so we can ignore it. Probably it has been released and + * destroyed while the contention notification was being sent. +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index 3aaf25c..4605fb7 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -231,7 +231,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 14%{?dist} +Release: 15%{?dist} ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ @@ -606,6 +606,11 @@ Patch0294: 0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch Patch0295: 0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch Patch0296: 0296-glusterfind-pre-command-failure-on-a-modify.patch Patch0297: 0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch +Patch0298: 0298-geo-rep-fix-sub-command-during-worker-connection.patch +Patch0299: 0299-geo-rep-performance-improvement-while-syncing-rename.patch +Patch0300: 0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch +Patch0301: 0301-posix-Brick-is-going-down-unexpectedly.patch +Patch0302: 0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -2319,6 +2324,9 @@ fi %endif %changelog +* Fri Sep 27 2019 Rinku Kothiya - 6.0-15 +- fixes bugs bz#1726000 bz#1731826 bz#1754407 bz#1754790 bz#1755227 + * Fri Sep 20 2019 Sunil Kumar Acharya - 6.0-14 - fixes bugs bz#1719171 bz#1728673 bz#1731896 bz#1732443 bz#1733970 bz#1745107 bz#1746027 bz#1748688 bz#1750241 bz#1572163