From dddf13c543d2c2ee93a107f2a0be498a4a446dae Mon Sep 17 00:00:00 2001 From: Sunil Kumar Acharya Date: Tue, 11 Jun 2019 02:14:06 -0400 Subject: [PATCH] autobuild v6.0-5 Resolves: bz#1573077 bz#1694595 bz#1703434 bz#1714536 bz#1714588 Resolves: bz#1715407 bz#1715438 Signed-off-by: Sunil Kumar Acharya --- ...gfid-conflict-resolutiong-logs-into-.patch | 119 +++++++ ...osix-add-storage.reserve-size-option.patch | 295 ++++++++++++++++++ ...-Fix-race-with-ec_fini-and-ec_notify.patch | 141 +++++++++ ...ips-mode-rchecksum-option-in-the-inf.patch | 52 +++ ...ore-logging-in-xlator_is_cleanup_sta.patch | 53 ++++ ...-between-xlator-cleanup-and-on-going.patch | 241 ++++++++++++++ ...ix-crash-during-background-shard-del.patch | 289 +++++++++++++++++ ...ix-extra-unref-when-inode-object-is-.patch | 161 ++++++++++ ...t-treat-all-bricks-having-metadata-p.patch | 287 +++++++++++++++++ glusterfs.spec | 19 +- 10 files changed, 1654 insertions(+), 3 deletions(-) create mode 100644 0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch create mode 100644 0171-posix-add-storage.reserve-size-option.patch create mode 100644 0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch create mode 100644 0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch create mode 100644 0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch create mode 100644 0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch create mode 100644 0176-features-shard-Fix-crash-during-background-shard-del.patch create mode 100644 0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch create mode 100644 0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch diff --git a/0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch b/0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch new file mode 100644 index 0000000..a7ea429 --- /dev/null +++ b/0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch @@ -0,0 +1,119 @@ +From c4e292379928eaf1ebb47ee1c8e9b1eabbe90574 Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Tue, 14 May 2019 11:05:45 +0530 +Subject: [PATCH 170/178] geo-rep: Convert gfid conflict resolutiong logs into + debug + +The gfid conflict resolution code path is not supposed +to hit in generic code path. But few of the heavy rename +workload (BUG: 1694820) makes it a generic case. So +logging the entries to be fixed as INFO floods the log +in these particular workloads. Hence convert them to DEBUG. + +Backport of: + > Patch: https://review.gluster.org/22720 + > fixes: bz#1709653 + > Change-Id: I4d5e102b87be5fe5b54f78f329e588882d72b9d9 + +BUG: 1714536 +Change-Id: I4d5e102b87be5fe5b54f78f329e588882d72b9d9 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/172731 +Reviewed-by: Atin Mukherjee +--- + geo-replication/syncdaemon/master.py | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index 42c86d7..3f98337 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -811,7 +811,7 @@ class GMasterChangelogMixin(GMasterCommon): + st = lstat(os.path.join(pfx, slave_gfid)) + # Takes care of scenarios with no hardlinks + if isinstance(st, int) and st == ENOENT: +- logging.info(lf('Entry not present on master. Fixing gfid ' ++ logging.debug(lf('Entry not present on master. Fixing gfid ' + 'mismatch in slave. Deleting the entry', + retry_count=retry_count, + entry=repr(failure))) +@@ -843,7 +843,7 @@ class GMasterChangelogMixin(GMasterCommon): + if matching_disk_gfid(slave_gfid, pbname): + # Safe to ignore the failure as master contains same + # file with same gfid. Remove entry from entries list +- logging.info(lf('Fixing gfid mismatch in slave. ' ++ logging.debug(lf('Fixing gfid mismatch in slave. ' + ' Safe to ignore, take out entry', + retry_count=retry_count, + entry=repr(failure))) +@@ -865,14 +865,14 @@ class GMasterChangelogMixin(GMasterCommon): + dst_entry = os.path.join(pfx, realpath.split('/')[-2], + realpath.split('/')[-1]) + src_entry = pbname +- logging.info(lf('Fixing dir name/gfid mismatch in ' ++ logging.debug(lf('Fixing dir name/gfid mismatch in ' + 'slave', retry_count=retry_count, + entry=repr(failure))) + if src_entry == dst_entry: + # Safe to ignore the failure as master contains + # same directory as in slave with same gfid. + # Remove the failure entry from entries list +- logging.info(lf('Fixing dir name/gfid mismatch' ++ logging.debug(lf('Fixing dir name/gfid mismatch' + ' in slave. Safe to ignore, ' + 'take out entry', + retry_count=retry_count, +@@ -886,7 +886,7 @@ class GMasterChangelogMixin(GMasterCommon): + entry=src_entry, + entry1=dst_entry, stat=st, + link=None) +- logging.info(lf('Fixing dir name/gfid mismatch' ++ logging.debug(lf('Fixing dir name/gfid mismatch' + ' in slave. Renaming', + retry_count=retry_count, + entry=repr(rename_dict))) +@@ -896,7 +896,7 @@ class GMasterChangelogMixin(GMasterCommon): + # renamed file exists and we are sure from + # matching_disk_gfid check that the entry doesn't + # exist with same gfid so we can safely delete on slave +- logging.info(lf('Fixing file gfid mismatch in slave. ' ++ logging.debug(lf('Fixing file gfid mismatch in slave. ' + 'Hardlink/Rename Case. Deleting entry', + retry_count=retry_count, + entry=repr(failure))) +@@ -915,7 +915,7 @@ class GMasterChangelogMixin(GMasterCommon): + # Safe to ignore the failure as master doesn't contain + # parent directory. + if isinstance(st, int): +- logging.info(lf('Fixing ENOENT error in slave. Parent ' ++ logging.debug(lf('Fixing ENOENT error in slave. Parent ' + 'does not exist on master. Safe to ' + 'ignore, take out entry', + retry_count=retry_count, +@@ -925,7 +925,7 @@ class GMasterChangelogMixin(GMasterCommon): + except ValueError: + pass + else: +- logging.info(lf('Fixing ENOENT error in slave. Create ' ++ logging.debug(lf('Fixing ENOENT error in slave. Create ' + 'parent directory on slave.', + retry_count=retry_count, + entry=repr(failure))) +@@ -1223,10 +1223,13 @@ class GMasterChangelogMixin(GMasterCommon): + + if gconf.get("gfid-conflict-resolution"): + count = 0 ++ if failures: ++ logging.info(lf('Entry ops failed with gfid mismatch', ++ count=len(failures))) + while failures and count < self.MAX_OE_RETRIES: + count += 1 + self.handle_entry_failures(failures, entries) +- logging.info("Retry original entries. count = %s" % count) ++ logging.info(lf('Retry original entries', count=count)) + failures = self.slave.server.entry_ops(entries) + if not failures: + logging.info("Successfully fixed all entry ops with " +-- +1.8.3.1 + diff --git a/0171-posix-add-storage.reserve-size-option.patch b/0171-posix-add-storage.reserve-size-option.patch new file mode 100644 index 0000000..2e76908 --- /dev/null +++ b/0171-posix-add-storage.reserve-size-option.patch @@ -0,0 +1,295 @@ +From 4d82c7879387e6f7963b4d9c84c4ff8a1788055d Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha +Date: Mon, 19 Nov 2018 22:15:25 +0530 +Subject: [PATCH 171/178] posix: add storage.reserve-size option + +storage.reserve-size option will take size as input +instead of percentage. If set, priority will be given to +storage.reserve-size over storage.reserve. Default value +of this option is 0. + +> fixes: bz#1651445 +> Change-Id: I7a7342c68e436e8bf65bd39c567512ee04abbcea +> Signed-off-by: Sheetal Pamecha +> Cherry pick from commit 950726dfc8e3171bef625b563c0c6dbba1ec2928 +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21686/ + +BUG: 1573077 +Change-Id: I7a7342c68e436e8bf65bd39c567512ee04abbcea +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/172709 +Reviewed-by: Atin Mukherjee +Tested-by: RHGS Build Bot +--- + tests/bugs/posix/bug-1651445.t | 58 +++++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 33 ++++++++++++++ + xlators/storage/posix/src/posix-common.c | 34 ++++++++++++--- + xlators/storage/posix/src/posix-helpers.c | 13 ++++-- + xlators/storage/posix/src/posix-inode-fd-ops.c | 10 +++-- + xlators/storage/posix/src/posix.h | 3 +- + 6 files changed, 138 insertions(+), 13 deletions(-) + create mode 100644 tests/bugs/posix/bug-1651445.t + +diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t +new file mode 100644 +index 0000000..f6f1833 +--- /dev/null ++++ b/tests/bugs/posix/bug-1651445.t +@@ -0,0 +1,58 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../snapshot.rc ++ ++cleanup ++ ++TEST verify_lvm_version ++TEST glusterd ++TEST pidof glusterd ++TEST init_n_bricks 3 ++TEST setup_lvm 3 ++ ++TEST $CLI volume create $V0 replica 3 $H0:$L{1,2,3} ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST $CLI volume set $V0 storage.reserve-size 10MB ++ ++#No effect as priority to reserve-size ++TEST $CLI volume set $V0 storage.reserve 20 ++ ++TEST dd if=/dev/zero of=$M0/a bs=100M count=1 ++sleep 5 ++ ++#Below dd confirms posix is giving priority to reserve-size ++TEST dd if=/dev/zero of=$M0/b bs=40M count=1 ++ ++sleep 5 ++TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1 ++ ++rm -rf $M0/* ++#Size will reserve from the previously set reserve option = 20% ++TEST $CLI volume set $V0 storage.reserve-size 0 ++ ++#Overwrite reserve option ++TEST $CLI volume set $V0 storage.reserve-size 40MB ++ ++#wait 5s to reset disk_space_full flag ++sleep 5 ++ ++TEST dd if=/dev/zero of=$M0/a bs=100M count=1 ++TEST dd if=/dev/zero of=$M0/b bs=10M count=1 ++ ++# Wait 5s to update disk_space_full flag because thread check disk space ++# after every 5s ++ ++sleep 5 ++# setup_lvm create lvm partition of 150M and 40M are reserve so after ++# consuming more than 110M next dd should fail ++TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1 ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 7a83124..3a7ab83 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1231,6 +1231,30 @@ out: + + return ret; + } ++static int ++validate_size(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, ++ char **op_errstr) ++{ ++ xlator_t *this = NULL; ++ uint64_t size = 0; ++ int ret = -1; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ ret = gf_string2bytesize_uint64(value, &size); ++ if (ret < 0) { ++ gf_asprintf(op_errstr, ++ "%s is not a valid size. %s " ++ "expects a valid value in bytes", ++ value, key); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", ++ *op_errstr); ++ } ++out: ++ gf_msg_debug("glusterd", 0, "Returning %d", ret); ++ ++ return ret; ++} + + /* dispatch table for VOLUME SET + * ----------------------------- +@@ -2830,6 +2854,15 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = GD_OP_VERSION_3_13_0, + }, + { ++ .key = "storage.reserve-size", ++ .voltype = "storage/posix", ++ .value = "0", ++ .validate_fn = validate_size, ++ .description = "If set, priority will be given to " ++ "storage.reserve-size over storage.reserve", ++ .op_version = GD_OP_VERSION_7_0, ++ }, ++ { + .option = "health-check-timeout", + .key = "storage.health-check-timeout", + .type = NO_DOC, +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index ed82e35..0f70af5 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -345,11 +345,18 @@ posix_reconfigure(xlator_t *this, dict_t *options) + " fallback to :"); + } + +- GF_OPTION_RECONF("reserve", priv->disk_reserve, options, uint32, out); +- if (priv->disk_reserve) { ++ GF_OPTION_RECONF("reserve-size", priv->disk_reserve_size, options, size, ++ out); ++ ++ GF_OPTION_RECONF("reserve", priv->disk_reserve_percent, options, uint32, ++ out); ++ if (priv->disk_reserve_size || priv->disk_reserve_percent) { + ret = posix_spawn_disk_space_check_thread(this); +- if (ret) ++ if (ret) { ++ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, ++ "Getting disk space check from thread failed"); + goto out; ++ } + } + + GF_OPTION_RECONF("health-check-interval", priv->health_check_interval, +@@ -968,11 +975,17 @@ posix_init(xlator_t *this) + + _private->disk_space_check_active = _gf_false; + _private->disk_space_full = 0; +- GF_OPTION_INIT("reserve", _private->disk_reserve, uint32, out); +- if (_private->disk_reserve) { ++ GF_OPTION_INIT("reserve-size", _private->disk_reserve_size, size, out); ++ ++ GF_OPTION_INIT("reserve", _private->disk_reserve_percent, uint32, out); ++ ++ if (_private->disk_reserve_size || _private->disk_reserve_percent) { + ret = posix_spawn_disk_space_check_thread(this); +- if (ret) ++ if (ret) { ++ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, ++ "Getting disk space check from thread failed "); + goto out; ++ } + } + + _private->health_check_active = _gf_false; +@@ -1216,6 +1229,15 @@ struct volume_options posix_options[] = { + " Set to 0 to disable", + .op_version = {GD_OP_VERSION_3_13_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, ++ {.key = {"reserve-size"}, ++ .type = GF_OPTION_TYPE_SIZET, ++ .min = 0, ++ .default_value = "0", ++ .validate = GF_OPT_VALIDATE_MIN, ++ .description = "size in megabytes to be reserved for disk space." ++ " Set to 0 to disable", ++ .op_version = {GD_OP_VERSION_7_0}, ++ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index d0fd45a..aecf4f8 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -2246,6 +2246,7 @@ posix_disk_space_check(xlator_t *this) + struct posix_private *priv = NULL; + char *subvol_path = NULL; + int op_ret = 0; ++ uint64_t size = 0; + int percent = 0; + struct statvfs buf = {0}; + uint64_t totsz = 0; +@@ -2256,7 +2257,14 @@ posix_disk_space_check(xlator_t *this) + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + subvol_path = priv->base_path; +- percent = priv->disk_reserve; ++ ++ if (priv->disk_reserve_size) { ++ size = priv->disk_reserve_size; ++ } else { ++ percent = priv->disk_reserve_percent; ++ totsz = (buf.f_blocks * buf.f_bsize); ++ size = ((totsz * percent) / 100); ++ } + + op_ret = sys_statvfs(subvol_path, &buf); + +@@ -2265,10 +2273,9 @@ posix_disk_space_check(xlator_t *this) + "statvfs failed on %s", subvol_path); + goto out; + } +- totsz = (buf.f_blocks * buf.f_bsize); + freesz = (buf.f_bfree * buf.f_bsize); + +- if (freesz <= ((totsz * percent) / 100)) { ++ if (freesz <= size) { + priv->disk_space_full = 1; + } else { + priv->disk_space_full = 0; +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 2c19ce1..7ca4d26 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -720,7 +720,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + thread after every 5 sec sleep to working correctly storage.reserve + option behaviour + */ +- if (priv->disk_reserve) ++ if (priv->disk_reserve_size || priv->disk_reserve_percent) + posix_disk_space_check(this); + + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out); +@@ -2331,8 +2331,12 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + goto out; + } + +- percent = priv->disk_reserve; +- reserved_blocks = (buf.f_blocks * percent) / 100; ++ if (priv->disk_reserve_size) { ++ reserved_blocks = priv->disk_reserve_size / buf.f_bsize; ++ } else { ++ percent = priv->disk_reserve_percent; ++ reserved_blocks = (buf.f_blocks * percent) / 100; ++ } + + if (buf.f_bfree > reserved_blocks) { + buf.f_bfree = (buf.f_bfree - reserved_blocks); +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 1da4d01..4364b96 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -225,7 +225,8 @@ struct posix_private { + pthread_t health_check; + gf_boolean_t health_check_active; + +- uint32_t disk_reserve; ++ uint32_t disk_reserve_percent; ++ uint64_t disk_reserve_size; + uint32_t disk_space_full; + pthread_t disk_space_check; + gf_boolean_t disk_space_check_active; +-- +1.8.3.1 + diff --git a/0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch b/0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch new file mode 100644 index 0000000..0dc15ba --- /dev/null +++ b/0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch @@ -0,0 +1,141 @@ +From 998d9b8b5e271f407e1c654c34f45f0db36abc71 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Tue, 21 May 2019 17:15:07 +0530 +Subject: [PATCH 172/178] ec/fini: Fix race with ec_fini and ec_notify + +During a graph cleanup, we first sent a PARENT_DOWN and wait for +a child down to ultimately free the xlator and the graph. + +In the ec xlator, we cleanup the threads when we get a PARENT_DOWN event. +But a racing event like CHILD_UP or event xl_op may trigger healing threads +after threads cleanup. + +So there is a chance that the threads might access a freed private variabe + +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22758/ + +>Change-Id: I252d10181bb67b95900c903d479de707a8489532 +>fixes: bz#1703948 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I84a10352d9fb3e68d4147b3791e3af45ab79050e +BUG: 1703434 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/172285 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/glusterfs/xlator.h | 3 +++ + libglusterfs/src/libglusterfs.sym | 1 + + libglusterfs/src/xlator.c | 21 +++++++++++++++++++++ + xlators/cluster/ec/src/ec-heal.c | 4 ++++ + xlators/cluster/ec/src/ec-heald.c | 6 ++++++ + xlators/cluster/ec/src/ec.c | 3 +++ + 6 files changed, 38 insertions(+) + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index 8998976..09e463e 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -1092,4 +1092,7 @@ gluster_graph_take_reference(xlator_t *tree); + + gf_boolean_t + mgmt_is_multiplexed_daemon(char *name); ++ ++gf_boolean_t ++xlator_is_cleanup_starting(xlator_t *this); + #endif /* _XLATOR_H */ +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index ec474e7..7a2edef 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -1161,3 +1161,4 @@ glusterfs_process_svc_attach_volfp + glusterfs_mux_volfile_reconfigure + glusterfs_process_svc_detach + mgmt_is_multiplexed_daemon ++xlator_is_cleanup_starting +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 022c3ed..fbfbbe2 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1486,3 +1486,24 @@ mgmt_is_multiplexed_daemon(char *name) + } + return _gf_false; + } ++ ++gf_boolean_t ++xlator_is_cleanup_starting(xlator_t *this) ++{ ++ gf_boolean_t cleanup = _gf_false; ++ glusterfs_graph_t *graph = NULL; ++ xlator_t *xl = NULL; ++ ++ if (!this) ++ goto out; ++ graph = this->graph; ++ ++ if (!graph) ++ goto out; ++ ++ xl = graph->first; ++ if (xl && xl->cleanup_starting) ++ cleanup = _gf_true; ++out: ++ return cleanup; ++} +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 2fa1f11..8844c29 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2855,6 +2855,10 @@ ec_replace_brick_heal_wrap(void *opaque) + itable = ec->xl->itable; + else + goto out; ++ ++ if (xlator_is_cleanup_starting(ec->xl)) ++ goto out; ++ + ret = ec_replace_heal(ec, itable->root); + out: + return ret; +diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c +index edf5e11..91512d7 100644 +--- a/xlators/cluster/ec/src/ec-heald.c ++++ b/xlators/cluster/ec/src/ec-heald.c +@@ -444,6 +444,9 @@ unlock: + int + ec_shd_full_healer_spawn(xlator_t *this, int subvol) + { ++ if (xlator_is_cleanup_starting(this)) ++ return -1; ++ + return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol), + ec_shd_full_healer); + } +@@ -451,6 +454,9 @@ ec_shd_full_healer_spawn(xlator_t *this, int subvol) + int + ec_shd_index_healer_spawn(xlator_t *this, int subvol) + { ++ if (xlator_is_cleanup_starting(this)) ++ return -1; ++ + return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol), + ec_shd_index_healer); + } +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 264582a..df5912c 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -486,6 +486,9 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state) + { + uintptr_t current_state = 0; + ++ if (xlator_is_cleanup_starting(ec->xl)) ++ return _gf_false; ++ + if ((ec->xl_notify & index_mask) == 0) { + ec->xl_notify |= index_mask; + ec->xl_notify_count++; +-- +1.8.3.1 + diff --git a/0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch b/0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch new file mode 100644 index 0000000..bdaaac5 --- /dev/null +++ b/0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch @@ -0,0 +1,52 @@ +From c001b60047c73e07f42ee858dd8ae19136ecd61b Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Thu, 6 Jun 2019 13:19:29 +0530 +Subject: [PATCH 173/178] glusterd: store fips-mode-rchecksum option in the + info file + +commit 146e4b45d0ce906ae50fd6941a1efafd133897ea enabled +storage.fips-mode-rchecksum option for all new volumes with op-version +>=GD_OP_VERSION_7_0 but `gluster vol get $volname +storage.fips-mode-rchecksum` was displaying it as 'off'. This patch fixes it. + +>upstream patch link : https://review.gluster.org/#/c/glusterfs/+/22830/ + +>fixes: bz#1717782 +>Change-Id: Ie09f89838893c5776a3f60569dfe8d409d1494dd +>Signed-off-by: Ravishankar N + +BUG: 1715407 +Change-Id: Ie09f89838893c5776a3f60569dfe8d409d1494dd +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/172799 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2bc4836..7768b8e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13124,6 +13124,17 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) + } + } + } ++ if (conf->op_version >= GD_OP_VERSION_7_0) { ++ ret = dict_set_dynstr_with_alloc(volinfo->dict, ++ "storage.fips-mode-rchecksum", "on"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set option 'storage.fips-mode-rchecksum' " ++ "on volume %s", ++ volinfo->volname); ++ goto out; ++ } ++ } + out: + return ret; + } +-- +1.8.3.1 + diff --git a/0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch b/0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch new file mode 100644 index 0000000..66c5ff4 --- /dev/null +++ b/0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch @@ -0,0 +1,53 @@ +From 9b94397a5a735910fab2a29670146a1feb6d890e Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Tue, 4 Jun 2019 11:13:50 +0530 +Subject: [PATCH 174/178] xlator/log: Add more logging in + xlator_is_cleanup_starting + +This patch will add two extra logs for invalid argument + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22810/ + +>Change-Id: I3950b4f4b9d88b1f1e788ef93d8f09d4bd8d4d8b +>updates: bz#1703948 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I3950b4f4b9d88b1f1e788ef93d8f09d4bd8d4d8b +BUG: 1703434 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/172800 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + libglusterfs/src/xlator.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index fbfbbe2..71e1ed4 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1494,12 +1494,18 @@ xlator_is_cleanup_starting(xlator_t *this) + glusterfs_graph_t *graph = NULL; + xlator_t *xl = NULL; + +- if (!this) ++ if (!this) { ++ gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, ++ "xlator object is null, returning false"); + goto out; +- graph = this->graph; ++ } + +- if (!graph) ++ graph = this->graph; ++ if (!graph) { ++ gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, ++ "Graph is not set for xlator %s", this->name); + goto out; ++ } + + xl = graph->first; + if (xl && xl->cleanup_starting) +-- +1.8.3.1 + diff --git a/0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch b/0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch new file mode 100644 index 0000000..c13d96d --- /dev/null +++ b/0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch @@ -0,0 +1,241 @@ +From 9fd966aa6879ac9867381629f82eca24b950d731 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC +Date: Sun, 2 Jun 2019 01:36:33 +0530 +Subject: [PATCH 175/178] ec/fini: Fix race between xlator cleanup and on going + async fop + +Problem: +While we process a cleanup, there is a chance for a race between +async operations, for example ec_launch_replace_heal. So this can +lead to invalid mem access. + +Solution: +Just like we track on going heal fops, we can also track fops like +ec_launch_replace_heal, so that we can decide when to send a +PARENT_DOWN request. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22798/ + +>Change-Id: I055391c5c6c34d58aef7336847f3b570cb831298 +>fixes: bz#1703948 +>Signed-off-by: Mohammed Rafi KC + +Change-Id: I055391c5c6c34d58aef7336847f3b570cb831298 +BUG: 1714588 +Signed-off-by: Mohammed Rafi KC +Reviewed-on: https://code.engineering.redhat.com/gerrit/172801 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + xlators/cluster/ec/src/ec-common.c | 10 ++++++++++ + xlators/cluster/ec/src/ec-common.h | 2 ++ + xlators/cluster/ec/src/ec-data.c | 4 +++- + xlators/cluster/ec/src/ec-heal.c | 17 +++++++++++++++-- + xlators/cluster/ec/src/ec-types.h | 1 + + xlators/cluster/ec/src/ec.c | 37 +++++++++++++++++++++++++------------ + 6 files changed, 56 insertions(+), 15 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index e85aa8b..9cc6395 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2955,3 +2955,13 @@ ec_manager(ec_fop_data_t *fop, int32_t error) + + __ec_manager(fop, error); + } ++ ++gf_boolean_t ++__ec_is_last_fop(ec_t *ec) ++{ ++ if ((list_empty(&ec->pending_fops)) && ++ (GF_ATOMIC_GET(ec->async_fop_count) == 0)) { ++ return _gf_true; ++ } ++ return _gf_false; ++} +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index e948342..bf6c97d 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -204,4 +204,6 @@ void + ec_reset_entry_healing(ec_fop_data_t *fop); + char * + ec_msg_str(ec_fop_data_t *fop); ++gf_boolean_t ++__ec_is_last_fop(ec_t *ec); + #endif /* __EC_COMMON_H__ */ +diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c +index 6ef9340..8d2d9a1 100644 +--- a/xlators/cluster/ec/src/ec-data.c ++++ b/xlators/cluster/ec/src/ec-data.c +@@ -202,11 +202,13 @@ ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify) + { + ec_t *ec = fop->xl->private; + ++ *notify = _gf_false; ++ + if (!list_empty(&fop->pending_list)) { + LOCK(&ec->lock); + { + list_del_init(&fop->pending_list); +- *notify = list_empty(&ec->pending_fops); ++ *notify = __ec_is_last_fop(ec); + } + UNLOCK(&ec->lock); + } +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 8844c29..237fea2 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2814,8 +2814,20 @@ int + ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque) + { + ec_t *ec = opaque; ++ gf_boolean_t last_fop = _gf_false; + ++ if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) { ++ LOCK(&ec->lock); ++ { ++ last_fop = __ec_is_last_fop(ec); ++ } ++ UNLOCK(&ec->lock); ++ } + gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret); ++ ++ if (last_fop) ++ ec_pending_fops_completed(ec); ++ + return 0; + } + +@@ -2869,14 +2881,15 @@ ec_launch_replace_heal(ec_t *ec) + { + int ret = -1; + +- if (!ec) +- return ret; + ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap, + ec_replace_heal_done, NULL, ec); ++ + if (ret < 0) { + gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d", + ret); ++ ec_replace_heal_done(-1, NULL, ec); + } ++ + return ret; + } + +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 1c295c0..4dbf4a3 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -643,6 +643,7 @@ struct _ec { + uintptr_t xl_notify; /* Bit flag representing + notification for bricks. */ + uintptr_t node_mask; ++ gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */ + xlator_t **xl_list; + gf_lock_t lock; + gf_timer_t *timer; +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index df5912c..f0d58c0 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -355,6 +355,7 @@ ec_notify_cbk(void *data) + ec_t *ec = data; + glusterfs_event_t event = GF_EVENT_MAXVAL; + gf_boolean_t propagate = _gf_false; ++ gf_boolean_t launch_heal = _gf_false; + + LOCK(&ec->lock); + { +@@ -384,6 +385,11 @@ ec_notify_cbk(void *data) + * still bricks DOWN, they will be healed when they + * come up. */ + ec_up(ec->xl, ec); ++ ++ if (ec->shd.iamshd && !ec->shutdown) { ++ launch_heal = _gf_true; ++ GF_ATOMIC_INC(ec->async_fop_count); ++ } + } + + propagate = _gf_true; +@@ -391,13 +397,12 @@ ec_notify_cbk(void *data) + unlock: + UNLOCK(&ec->lock); + ++ if (launch_heal) { ++ /* We have just brought the volume UP, so we trigger ++ * a self-heal check on the root directory. */ ++ ec_launch_replace_heal(ec); ++ } + if (propagate) { +- if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) { +- /* We have just brought the volume UP, so we trigger +- * a self-heal check on the root directory. */ +- ec_launch_replace_heal(ec); +- } +- + default_notify(ec->xl, event, NULL); + } + } +@@ -425,7 +430,7 @@ ec_disable_delays(ec_t *ec) + { + ec->shutdown = _gf_true; + +- return list_empty(&ec->pending_fops); ++ return __ec_is_last_fop(ec); + } + + void +@@ -603,7 +608,10 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) + if (event == GF_EVENT_CHILD_UP) { + /* We need to trigger a selfheal if a brick changes + * to UP state. */ +- needs_shd_check = ec_set_up_state(ec, mask, mask); ++ if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd && ++ !ec->shutdown) { ++ needs_shd_check = _gf_true; ++ } + } else if (event == GF_EVENT_CHILD_DOWN) { + ec_set_up_state(ec, mask, 0); + } +@@ -633,17 +641,21 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) + } + } else { + propagate = _gf_false; ++ needs_shd_check = _gf_false; ++ } ++ ++ if (needs_shd_check) { ++ GF_ATOMIC_INC(ec->async_fop_count); + } + } + unlock: + UNLOCK(&ec->lock); + + done: ++ if (needs_shd_check) { ++ ec_launch_replace_heal(ec); ++ } + if (propagate) { +- if (needs_shd_check && ec->shd.iamshd) { +- ec_launch_replace_heal(ec); +- } +- + error = default_notify(this, event, data); + } + +@@ -705,6 +717,7 @@ init(xlator_t *this) + ec->xl = this; + LOCK_INIT(&ec->lock); + ++ GF_ATOMIC_INIT(ec->async_fop_count, 0); + INIT_LIST_HEAD(&ec->pending_fops); + INIT_LIST_HEAD(&ec->heal_waiting); + INIT_LIST_HEAD(&ec->healing); +-- +1.8.3.1 + diff --git a/0176-features-shard-Fix-crash-during-background-shard-del.patch b/0176-features-shard-Fix-crash-during-background-shard-del.patch new file mode 100644 index 0000000..487d8e2 --- /dev/null +++ b/0176-features-shard-Fix-crash-during-background-shard-del.patch @@ -0,0 +1,289 @@ +From 40ac42501d6bbff7206e753e8e988beefe74f5f4 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Fri, 5 Apr 2019 10:30:23 +0530 +Subject: [PATCH 176/178] features/shard: Fix crash during background shard + deletion in a specific case + +Consider the following case - +1. A file gets FALLOCATE'd such that > "shard-lru-limit" number of + shards are created. +2. And then it is deleted after that. + +The unique thing about FALLOCATE is that unlike WRITE, all of the +participant shards are resolved and created and fallocated in a single +batch. This means, in this case, after the first "shard-lru-limit" +number of shards are resolved and added to lru list, as part of +resolution of the remaining shards, some of the existing shards in lru +list will need to be evicted. So these evicted shards will be +inode_unlink()d as part of eviction. Now once the fop gets to the actual +FALLOCATE stage, the lru'd-out shards get added to fsync list. + +2 things to note at this point: +i. the lru'd out shards are only part of fsync list, so each holds 1 ref + on base shard +ii. and the more recently used shards are part of both fsync and lru list. + So each of these shards holds 2 refs on base inode - one for being + part of fsync list, and the other for being part of lru list. + +FALLOCATE completes successfully and then this very file is deleted, and +background shard deletion launched. Here's where the ref counts get mismatched. +First as part of inode_resolve()s during the deletion, the lru'd-out inodes +return NULL, because they are inode_unlink()'d by now. So these inodes need to +be freshly looked up. But as part of linking them in lookup_cbk (precisely in +shard_link_block_inode()), inode_link() returns the lru'd-out inode object. +And its inode ctx is still valid and ctx->base_inode valid from the last +time it was added to list. + +But shard_common_lookup_shards_cbk() passes NULL in the place of base_pointer +to __shard_update_shards_inode_list(). This means, as part of adding the lru'd out +inode back to lru list, base inode is not ref'd since its NULL. + +Whereas post unlinking this shard, during shard_unlink_block_inode(), +ctx->base_inode is accessible and is unref'd because the shard was found to be part +of LRU list, although the matching ref didn't occur. This at some point leads to +base_inode refcount becoming 0 and it getting destroyed and released back while some +of its associated shards are continuing to be unlinked in parallel and the client crashes +whenever it is accessed next. + +Fix is to pass base shard correctly, if available, in shard_link_block_inode(). + +Also, the patch fixes the ret value check in tests/bugs/shard/shard-fallocate.c + +>Change-Id: Ibd0bc4c6952367608e10701473cbad3947d7559f +>Updates: bz#1696136 +>Signed-off-by: Krutika Dhananjay + +Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22507/ + +BUG: 1694595 +Change-Id: Ibd0bc4c6952367608e10701473cbad3947d7559f +Signed-off-by: Sunil Kumar Acharya +Reviewed-on: https://code.engineering.redhat.com/gerrit/172856 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + tests/bugs/shard/bug-1696136.c | 121 +++++++++++++++++++++++++++++++++++++ + tests/bugs/shard/bug-1696136.t | 33 ++++++++++ + tests/bugs/shard/shard-fallocate.c | 2 +- + xlators/features/shard/src/shard.c | 12 +++- + 4 files changed, 164 insertions(+), 4 deletions(-) + create mode 100644 tests/bugs/shard/bug-1696136.c + create mode 100644 tests/bugs/shard/bug-1696136.t + +diff --git a/tests/bugs/shard/bug-1696136.c b/tests/bugs/shard/bug-1696136.c +new file mode 100644 +index 0000000..b9e8d13 +--- /dev/null ++++ b/tests/bugs/shard/bug-1696136.c +@@ -0,0 +1,121 @@ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++ ++enum fallocate_flag { ++ TEST_FALLOCATE_NONE, ++ TEST_FALLOCATE_KEEP_SIZE, ++ TEST_FALLOCATE_ZERO_RANGE, ++ TEST_FALLOCATE_PUNCH_HOLE, ++ TEST_FALLOCATE_MAX, ++}; ++ ++int ++get_fallocate_flag(int opcode) ++{ ++ int ret = 0; ++ ++ switch (opcode) { ++ case TEST_FALLOCATE_NONE: ++ ret = 0; ++ break; ++ case TEST_FALLOCATE_KEEP_SIZE: ++ ret = FALLOC_FL_KEEP_SIZE; ++ break; ++ case TEST_FALLOCATE_ZERO_RANGE: ++ ret = FALLOC_FL_ZERO_RANGE; ++ break; ++ case TEST_FALLOCATE_PUNCH_HOLE: ++ ret = FALLOC_FL_PUNCH_HOLE; ++ break; ++ default: ++ ret = -1; ++ break; ++ } ++ return ret; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ int ret = 1; ++ int opcode = -1; ++ off_t offset = 0; ++ size_t len = 0; ++ glfs_t *fs = NULL; ++ glfs_fd_t *fd = NULL; ++ ++ if (argc != 8) { ++ fprintf(stderr, ++ "Syntax: %s " ++ " \n", ++ argv[0]); ++ return 1; ++ } ++ ++ fs = glfs_new(argv[2]); ++ if (!fs) { ++ fprintf(stderr, "glfs_new: returned NULL\n"); ++ return 1; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret); ++ goto out; ++ } ++ ++ ret = glfs_set_logging(fs, argv[7], 7); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_set_logging: returned %d\n", ret); ++ goto out; ++ } ++ ++ ret = glfs_init(fs); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_init: returned %d\n", ret); ++ goto out; ++ } ++ ++ opcode = atoi(argv[3]); ++ opcode = get_fallocate_flag(opcode); ++ if (opcode < 0) { ++ fprintf(stderr, "get_fallocate_flag: invalid flag \n"); ++ goto out; ++ } ++ ++ offset = atoi(argv[4]); ++ len = atoi(argv[5]); ++ ++ fd = glfs_open(fs, argv[6], O_RDWR); ++ if (fd == NULL) { ++ fprintf(stderr, "glfs_open: returned NULL\n"); ++ goto out; ++ } ++ ++ ret = glfs_fallocate(fd, opcode, offset, len); ++ if (ret < 0) { ++ fprintf(stderr, "glfs_fallocate: returned %d\n", ret); ++ goto out; ++ } ++ ++ ret = glfs_unlink(fs, argv[6]); ++ if (ret < 0) { ++ fprintf(stderr, "glfs_unlink: returned %d\n", ret); ++ goto out; ++ } ++ /* Sleep for 3s to give enough time for background deletion to complete ++ * during which if the bug exists, the process will crash. ++ */ ++ sleep(3); ++ ret = 0; ++ ++out: ++ if (fd) ++ glfs_close(fd); ++ glfs_fini(fs); ++ return ret; ++} +diff --git a/tests/bugs/shard/bug-1696136.t b/tests/bugs/shard/bug-1696136.t +new file mode 100644 +index 0000000..b6dc858 +--- /dev/null ++++ b/tests/bugs/shard/bug-1696136.t +@@ -0,0 +1,33 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fallocate.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume set $V0 features.shard-lru-limit 120 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2 ++ ++# Create a file ++TEST touch $M0/file1 ++ ++# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit ++TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++rm -f $(dirname $0)/bug-1696136 ++ ++cleanup +diff --git a/tests/bugs/shard/shard-fallocate.c b/tests/bugs/shard/shard-fallocate.c +index 3a784d3..45b9ce0 100644 +--- a/tests/bugs/shard/shard-fallocate.c ++++ b/tests/bugs/shard/shard-fallocate.c +@@ -97,7 +97,7 @@ main(int argc, char *argv[]) + } + + ret = glfs_fallocate(fd, opcode, offset, len); +- if (ret <= 0) { ++ if (ret < 0) { + fprintf(stderr, "glfs_fallocate: returned %d\n", ret); + goto out; + } +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index fa3564a..3c4bcdc 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -2213,13 +2213,19 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, + xlator_t *this = NULL; + inode_t *fsync_inode = NULL; + shard_priv_t *priv = NULL; ++ inode_t *base_inode = NULL; + + this = THIS; + priv = this->private; +- if (local->loc.inode) ++ if (local->loc.inode) { + gf_uuid_copy(gfid, local->loc.inode->gfid); +- else ++ base_inode = local->loc.inode; ++ } else if (local->resolver_base_inode) { ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ base_inode = local->resolver_base_inode; ++ } else { + gf_uuid_copy(gfid, local->base_gfid); ++ } + + shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); + +@@ -2232,7 +2238,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, + LOCK(&priv->lock); + { + fsync_inode = __shard_update_shards_inode_list( +- linked_inode, this, local->loc.inode, block_num, gfid); ++ linked_inode, this, base_inode, block_num, gfid); + } + UNLOCK(&priv->lock); + if (fsync_inode) +-- +1.8.3.1 + diff --git a/0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch b/0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch new file mode 100644 index 0000000..0156324 --- /dev/null +++ b/0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch @@ -0,0 +1,161 @@ +From 4f0aa008ed393d7ce222c4ea4bd0fa6ed52b48f6 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay +Date: Fri, 5 Apr 2019 12:29:23 +0530 +Subject: [PATCH 177/178] features/shard: Fix extra unref when inode object is + lru'd out and added back + +Long tale of double unref! But do read... + +In cases where a shard base inode is evicted from lru list while still +being part of fsync list but added back soon before its unlink, there +could be an extra inode_unref() leading to premature inode destruction +leading to crash. + +One such specific case is the following - + +Consider features.shard-deletion-rate = features.shard-lru-limit = 2. +This is an oversimplified example but explains the problem clearly. + +First, a file is FALLOCATE'd to a size so that number of shards under +/.shard = 3 > lru-limit. +Shards 1, 2 and 3 need to be resolved. 1 and 2 are resolved first. +Resultant lru list: + 1 -----> 2 +refs on base inode - (1) + (1) = 2 +3 needs to be resolved. So 1 is lru'd out. Resultant lru list - + 2 -----> 3 +refs on base inode - (1) + (1) = 2 + +Note that 1 is inode_unlink()d but not destroyed because there are +non-zero refs on it since it is still participating in this ongoing +FALLOCATE operation. + +FALLOCATE is sent on all participant shards. In the cbk, all of them are +added to fync_list. +Resulting fsync list - + 1 -----> 2 -----> 3 (order doesn't matter) +refs on base inode - (1) + (1) + (1) = 3 +Total refs = 3 + 2 = 5 + +Now an attempt is made to unlink this file. Background deletion is triggered. +The first $shard-deletion-rate shards need to be unlinked in the first batch. +So shards 1 and 2 need to be resolved. inode_resolve fails on 1 but succeeds +on 2 and so it's moved to tail of list. +lru list now - + 3 -----> 2 +No change in refs. + +shard 1 is looked up. In lookup_cbk, it's linked and added back to lru list +at the cost of evicting shard 3. +lru list now - + 2 -----> 1 +refs on base inode: (1) + (1) = 2 +fsync list now - + 1 -----> 2 (again order doesn't matter) +refs on base inode - (1) + (1) = 2 +Total refs = 2 + 2 = 4 +After eviction, it is found 3 needs fsync. So fsync is wound, yet to be ack'd. +So it is still inode_link()d. + +Now deletion of shards 1 and 2 completes. lru list is empty. Base inode unref'd and +destroyed. +In the next batched deletion, 3 needs to be deleted. It is inode_resolve()able. +It is added back to lru list but base inode passed to __shard_update_shards_inode_list() +is NULL since the inode is destroyed. But its ctx->inode still contains base inode ptr +from first addition to lru list for no additional ref on it. +lru list now - + 3 +refs on base inode - (0) +Total refs on base inode = 0 +Unlink is sent on 3. It completes. Now since the ctx contains ptr to base_inode and the +shard is part of lru list, base shard is unref'd leading to a crash. + +FIX: +When shard is readded back to lru list, copy the base inode pointer as is into its inode ctx, +even if it is NULL. This is needed to prevent double unrefs at the time of deleting it. + +Upstream patch: +> BUG: 1696136 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22517 +> Change-Id: I99a44039da2e10a1aad183e84f644d63ca552462 +> Signed-off-by: Krutika Dhananjay + +Change-Id: I99a44039da2e10a1aad183e84f644d63ca552462 +Updates: bz#1694595 +Signed-off-by: Krutika Dhananjay +Reviewed-on: https://code.engineering.redhat.com/gerrit/172803 +Tested-by: RHGS Build Bot +Reviewed-by: Atin Mukherjee +--- + .../bug-1696136-lru-limit-equals-deletion-rate.t | 34 ++++++++++++++++++++++ + xlators/features/shard/src/shard.c | 6 ++-- + 2 files changed, 36 insertions(+), 4 deletions(-) + create mode 100644 tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t + +diff --git a/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t +new file mode 100644 +index 0000000..3e4a65a +--- /dev/null ++++ b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t +@@ -0,0 +1,34 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fallocate.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume set $V0 features.shard-lru-limit 120 ++TEST $CLI volume set $V0 features.shard-deletion-rate 120 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2 ++ ++# Create a file ++TEST touch $M0/file1 ++ ++# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit ++TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++rm -f $(dirname $0)/bug-1696136 ++ ++cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 3c4bcdc..c1799ad 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -689,8 +689,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, + ctx->block_num = block_num; + list_add_tail(&ctx->ilist, &priv->ilist_head); + priv->inode_count++; +- if (base_inode) +- ctx->base_inode = inode_ref(base_inode); ++ ctx->base_inode = inode_ref(base_inode); + } else { + /*If on the other hand there is no available slot for this inode + * in the list, delete the lru inode from the head of the list, +@@ -765,8 +764,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, + else + gf_uuid_copy(ctx->base_gfid, gfid); + ctx->block_num = block_num; +- if (base_inode) +- ctx->base_inode = inode_ref(base_inode); ++ ctx->base_inode = inode_ref(base_inode); + list_add_tail(&ctx->ilist, &priv->ilist_head); + } + } else { +-- +1.8.3.1 + diff --git a/0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch b/0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch new file mode 100644 index 0000000..1ff767d --- /dev/null +++ b/0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch @@ -0,0 +1,287 @@ +From 307074330db6e9f14941dfbabbe6f299cf841533 Mon Sep 17 00:00:00 2001 +From: karthik-us +Date: Mon, 10 Jun 2019 23:58:16 +0530 +Subject: [PATCH 178/178] Cluster/afr: Don't treat all bricks having metadata + pending as split-brain + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22831/ + +Problem: +We currently don't have a roll-back/undoing of post-ops if quorum is not met. +Though the FOP is still unwound with failure, the xattrs remain on the disk. +Due to these partial post-ops and partial heals (healing only when 2 bricks +are up), we can end up in metadata split-brain purely from the afr xattrs +point of view i.e each brick is blamed by atleast one of the others for +metadata. These scenarios are hit when there is frequent connect/disconnect +of the client/shd to the bricks. + +Fix: +Pick a source based on the xattr values. If 2 bricks blame one, the blamed +one must be treated as sink. If there is no majority, all are sources. Once +we pick a source, self-heal will then do the heal instead of erroring out +due to split-brain. +This patch also adds restriction of all the bricks to be up to perform +metadata heal to avoid any metadata loss. + +Removed the test case tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t +as it was doing metadata heal even when only 2 of 3 bricks were up. + +Change-Id: I02064ecb7d68d498f75a353af64f75249a633508 +fixes: bz#1715438 +Signed-off-by: karthik-us +Reviewed-on: https://code.engineering.redhat.com/gerrit/172935 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + .../bug-1468279-source-not-blaming-sinks.t | 64 ---------- + .../bug-1717819-metadata-split-brain-detection.t | 130 +++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-common.c | 4 +- + xlators/cluster/afr/src/afr-self-heal-metadata.c | 2 +- + 4 files changed, 133 insertions(+), 67 deletions(-) + delete mode 100644 tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t + create mode 100644 tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t + +diff --git a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t b/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t +deleted file mode 100644 +index 054a4ad..0000000 +--- a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t ++++ /dev/null +@@ -1,64 +0,0 @@ +-#!/bin/bash +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +-TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 cluster.self-heal-daemon off +-TEST $CLI volume set $V0 cluster.metadata-self-heal off +-TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; +-TEST touch $M0/file +- +-# Kill B1, create a pending metadata heal. +-TEST kill_brick $V0 $H0 $B0/${V0}0 +-TEST setfattr -n user.xattr -v value1 $M0/file +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file +- +-# Kill B2, heal from B3 to B1. +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +-TEST kill_brick $V0 $H0 $B0/${V0}1 +-TEST $CLI volume set $V0 cluster.self-heal-daemon on +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +-$CLI volume heal $V0 +-EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-0 "metadata" +-TEST $CLI volume set $V0 cluster.self-heal-daemon off +- +-# Create another pending metadata heal. +-TEST setfattr -n user.xattr -v value2 $M0/file +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file +- +-# Kill B1, heal from B3 to B2 +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +-TEST kill_brick $V0 $H0 $B0/${V0}0 +-TEST $CLI volume set $V0 cluster.self-heal-daemon on +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +-$CLI volume heal $V0 +-EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 "metadata" +-TEST $CLI volume set $V0 cluster.self-heal-daemon off +- +-# ALL bricks up again. +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +-# B1 and B2 blame each other, B3 doesn't blame anyone. +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file +-EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file +-EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file +-TEST $CLI volume set $V0 cluster.self-heal-daemon on +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +-TEST $CLI volume heal $V0 +-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +- +-cleanup; +diff --git a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t +new file mode 100644 +index 0000000..94b8bf3 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t +@@ -0,0 +1,130 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $CLI volume heal $V0 disable ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++############################################################################### ++# Case of 2 bricks blaming the third and the third blaming the other two. ++ ++TEST mkdir $M0/dir ++ ++# B0 and B2 must blame B1 ++TEST kill_brick $V0 $H0 $B0/$V0"1" ++TEST setfattr -n user.metadata -v 1 $M0/dir ++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir trusted.afr.$V0-client-1 metadata ++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir trusted.afr.$V0-client-1 metadata ++CLIENT_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $M0/dir) ++ ++# B1 must blame B0 and B2 ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir ++ ++# Launch heal ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1 ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir) ++B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir) ++B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir) ++ ++TEST [ "$CLIENT_XATTR" == "$B0_XATTR" ] ++TEST [ "$CLIENT_XATTR" == "$B1_XATTR" ] ++TEST [ "$CLIENT_XATTR" == "$B2_XATTR" ] ++TEST setfattr -x user.metadata $M0/dir ++ ++############################################################################### ++# Case of each brick blaming the next one in a cyclic manner ++ ++TEST $CLI volume heal $V0 disable ++TEST `echo "hello" >> $M0/dir/file` ++# Mark cyclic xattrs and modify metadata directly on the bricks. ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"2"/dir/file ++ ++setfattr -n user.metadata -v 1 $B0/$V0"0"/dir/file ++setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file ++setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file ++ ++# Add entry to xattrop dir to trigger index heal. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/file)) ++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0 ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file) ++B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file) ++B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file) ++ ++TEST [ "$B0_XATTR" == "$B1_XATTR" ] ++TEST [ "$B0_XATTR" == "$B2_XATTR" ] ++TEST rm -f $M0/dir/file ++ ++############################################################################### ++# Case of 2 bricks having quorum blaming and the other having only one blaming. ++ ++TEST $CLI volume heal $V0 disable ++TEST `echo "hello" >> $M0/dir/file` ++# B0 and B2 must blame B1 ++TEST kill_brick $V0 $H0 $B0/$V0"1" ++TEST setfattr -n user.metadata -v 1 $M0/dir/file ++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir/file trusted.afr.$V0-client-1 metadata ++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir/file trusted.afr.$V0-client-1 metadata ++ ++# B1 must blame B0 and B2 ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file ++ ++# B0 must blame B2 ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file ++ ++# Modify the metadata directly on the bricks B1 & B2. ++setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file ++setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file ++ ++# Launch heal ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1 ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++ ++B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file) ++B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file) ++B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file) ++ ++TEST [ "$B0_XATTR" == "$B1_XATTR" ] ++TEST [ "$B0_XATTR" == "$B2_XATTR" ] ++ ++############################################################################### ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 595bed4..5157e7d 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1590,7 +1590,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + } + } + +- if (type == AFR_DATA_TRANSACTION) { ++ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) { + min_participants = priv->child_count; + } else { + min_participants = AFR_SH_MIN_PARTICIPANTS; +@@ -1656,7 +1656,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + } + } + +- if (type == AFR_DATA_TRANSACTION) ++ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) + afr_selfheal_post_op_failure_accounting(priv, accused, sources, + locked_on); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c +index ba43341..ecfa791 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c ++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c +@@ -398,7 +398,7 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode) + ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, + data_lock); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + ret = -ENOTCONN; + goto unlock; + } +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index 6e2cd90..d889860 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -231,7 +231,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 6.0 -Release: 4%{?dist} +Release: 5%{?dist} ExcludeArch: i686 %endif License: GPLv2 or LGPLv3+ @@ -475,6 +475,15 @@ Patch0166: 0166-glusterd-svc-glusterd_svcs_stop-should-call-individu.patch Patch0167: 0167-glusterd-shd-Optimize-the-glustershd-manager-to-send.patch Patch0168: 0168-cluster-dht-Fix-directory-perms-during-selfheal.patch Patch0169: 0169-Build-Fix-spec-to-enable-rhel8-client-build.patch +Patch0170: 0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch +Patch0171: 0171-posix-add-storage.reserve-size-option.patch +Patch0172: 0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch +Patch0173: 0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch +Patch0174: 0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch +Patch0175: 0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch +Patch0176: 0176-features-shard-Fix-crash-during-background-shard-del.patch +Patch0177: 0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch +Patch0178: 0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -2176,9 +2185,13 @@ fi %endif %changelog +* Tue Jun 11 2019 Sunil Kumar Acharya - 6.0-5 +- fixes bugs bz#1573077 bz#1694595 bz#1703434 bz#1714536 bz#1714588 + bz#1715407 bz#1715438 bz#1705018 + * Fri Jun 07 2019 Rinku Kothiya - 6.0-4 -- fixes bugs bz#1480907 bz#1702298 bz#1703455 bz#1704181 bz#1704562 - bz#1707246 bz#1708067 bz#1708116 bz#1708121 bz#1709087 bz#1711249 bz#1711296 +- fixes bugs bz#1480907 bz#1702298 bz#1703455 bz#1704181 bz#1707246 + bz#1708067 bz#1708116 bz#1708121 bz#1709087 bz#1711249 bz#1711296 bz#1714078 bz#1714124 bz#1716385 bz#1716626 bz#1716821 bz#1716865 bz#1717927 * Tue May 14 2019 Rinku Kothiya - 6.0-3