autobuild v3.12.2-47

Resolves: bz#1458215 bz#1661258 bz#1669020 bz#1683893
Signed-off-by: Milind Changire <mchangir@redhat.com>
This commit is contained in:
Milind Changire 2019-03-13 02:48:00 -04:00
parent 75fd41cd04
commit 4341631c3a
5 changed files with 522 additions and 1 deletions

View File

@ -0,0 +1,66 @@
From 1d9151816d9ef915974081d82fd78b59377b6d1a Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Sat, 9 Mar 2019 08:55:44 +0530
Subject: [PATCH 535/538] posix: Deletion of block hosting volume throwing
error "target is busy"
Deletion of block hosting volume with heketi-cli few volumes failed to delete
with the message "target is busy".After analyzing the root cause we found fd
was not closed because janitor thread was killed by posix_fini.To avoid the same before
notifying CHILD_DOWN event to parent all fd's should be closed by janitor_thread.
Note: The patch is applicable only for downstream release, in upstream release
we are using different approach to handle janitor_thread
Change-Id: I8c8482924af1868b4810e708962cd2978c2a40ab
BUG: 1669020
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/164908
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/storage/posix/src/posix.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 8a6282d..9efa1f1 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -7115,6 +7115,15 @@ notify (xlator_t *this,
...)
{
xlator_t *victim = data;
+ struct posix_private *priv = NULL;
+ struct timespec sleep_till = {0,};
+
+ if (!this)
+ return 0;
+
+ priv = this->private;
+ if (!priv)
+ return 0;
switch (event)
{
@@ -7128,6 +7137,17 @@ notify (xlator_t *this,
{
if (!victim->cleanup_starting)
break;
+ pthread_mutex_lock (&priv->janitor_lock);
+ {
+ while (!list_empty (&priv->janitor_fds)) {
+ clock_gettime(CLOCK_REALTIME, &sleep_till);
+ sleep_till.tv_sec += 1;
+ (void)pthread_cond_timedwait(&priv->janitor_cond, &priv->janitor_lock,
+ &sleep_till);
+ }
+ }
+ pthread_mutex_unlock (&priv->janitor_lock);
+
gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
victim->name);
default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
--
1.8.3.1

View File

@ -0,0 +1,103 @@
From f928dfab3d2e94211d9865ce994ae3a503f2b9a0 Mon Sep 17 00:00:00 2001
From: N Balachandran <nbalacha@redhat.com>
Date: Mon, 11 Mar 2019 08:06:15 +0530
Subject: [PATCH 536/538] cluster/dht: Fix lookup selfheal and rmdir race
A race between the lookup selfheal and rmdir can cause
directories to be healed only on non-hashed subvols.
This can prevent the directory from being listed from
the mount point and in turn causes rm -rf to fail with
ENOTEMPTY.
Fix: Update the layout information correctly and reduce
the call count only after processing the response.
upstream : https://review.gluster.org/#/c/glusterfs/+/22195/
> Change-Id: I812779aaf3d7bcf24aab1cb158cb6ed50d212451
> fixes: bz#1676400
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
Change-Id: Ic76248ca7dca05d926f48ba84cd5bfa2943fab92
BUG: 1458215
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/164931
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/dht/src/dht-selfheal.c | 36 ++++++++++++++++++++++++----------
1 file changed, 26 insertions(+), 10 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 035a709..045689b 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -1366,6 +1366,7 @@ dht_selfheal_dir_mkdir_lookup_cbk (call_frame_t *frame, void *cookie,
int errst = 0;
int32_t mds_xattr_val[1] = {0};
char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ int index = -1;
VALIDATE_OR_GOTO (this->private, err);
@@ -1375,32 +1376,47 @@ dht_selfheal_dir_mkdir_lookup_cbk (call_frame_t *frame, void *cookie,
prev = cookie;
conf = this->private;
- if (local->gfid)
+ if (!gf_uuid_is_null(local->gfid))
gf_uuid_unparse(local->gfid, gfid_local);
- this_call_cnt = dht_frame_return (frame);
-
LOCK (&frame->lock);
{
+ index = dht_layout_index_for_subvol(layout, prev);
if ((op_ret < 0) &&
(op_errno == ENOENT || op_errno == ESTALE)) {
local->selfheal.hole_cnt = !local->selfheal.hole_cnt ? 1
: local->selfheal.hole_cnt + 1;
+ /* the status might have changed. Update the layout
+ * with the new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = op_errno;
+ }
}
if (!op_ret) {
dht_iatt_merge (this, &local->stbuf, stbuf, prev);
+ check_mds = dht_dict_get_array (xattr,
+ conf->mds_xattr_key,
+ mds_xattr_val, 1,
+ &errst);
+ if (dict_get (xattr, conf->mds_xattr_key) &&
+ check_mds && !errst) {
+ dict_unref (local->xattr);
+ local->xattr = dict_ref (xattr);
+ }
+ /* the status might have changed. Update the layout
+ * with the new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = -1;
+ }
}
- check_mds = dht_dict_get_array (xattr, conf->mds_xattr_key,
- mds_xattr_val, 1, &errst);
- if (dict_get (xattr, conf->mds_xattr_key) && check_mds && !errst) {
- dict_unref (local->xattr);
- local->xattr = dict_ref (xattr);
- }
-
}
UNLOCK (&frame->lock);
+ this_call_cnt = dht_frame_return (frame);
+
if (is_last_call (this_call_cnt)) {
if (local->selfheal.hole_cnt == layout->cnt) {
gf_msg_debug (this->name, op_errno,
--
1.8.3.1

View File

@ -0,0 +1,191 @@
From 3f7ff1a0343045c737fcca3ffbdc380be8a1f065 Mon Sep 17 00:00:00 2001
From: N Balachandran <nbalacha@redhat.com>
Date: Mon, 11 Mar 2019 11:30:05 +0530
Subject: [PATCH 537/538] cluster/dht: Do not use gfid-req in fresh lookup
Fuse sets a random gfid-req value for a fresh lookup. Posix
lookup will set this gfid on entries with missing gfids causing
a GFID mismatch for directories.
DHT will now ignore the Fuse provided gfid-req and use the GFID
returned from other subvols to heal the missing gfid.
upstream: https://review.gluster.org/#/c/22112/
> Change-Id: I5f541978808f246ba4542564251e341ec490db14
> fixes: bz#1670259
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
Change-Id: I50e5a9cb4db8164989082daab30a0d5cac70920c
BUG: 1661258
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/164947
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 63 ++++++++++++++++++++++++++++++++++--
xlators/cluster/dht/src/dht-common.h | 2 ++
2 files changed, 62 insertions(+), 3 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 1311a8d..12a17e6 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -1347,7 +1347,7 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!op_ret && gf_uuid_is_null (local->gfid)) {
memcpy (local->gfid, stbuf->ia_gfid, 16);
}
- if (local->gfid)
+ if (!gf_uuid_is_null (local->gfid))
gf_uuid_unparse(local->gfid, gfid_local);
/* Check if the gfid is different for file from other node */
@@ -1376,9 +1376,13 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
local->op_errno = op_errno;
gf_msg_debug (this->name, op_errno,
- "lookup of %s on %s returned error",
+ "%s: lookup on %s returned error",
local->loc.path, prev->name);
+ /* The GFID is missing on this subvol. Force a heal. */
+ if (op_errno == ENODATA) {
+ local->need_selfheal = 1;
+ }
goto unlock;
}
@@ -1485,6 +1489,17 @@ unlock:
if (local->need_selfheal) {
local->need_selfheal = 0;
+ /* Set the gfid-req so posix will set the GFID*/
+ if (!gf_uuid_is_null(local->gfid)) {
+ ret = dict_set_static_bin(local->xattr_req,
+ "gfid-req",
+ local->gfid, 16);
+ } else {
+ if (!gf_uuid_is_null(local->gfid_req))
+ ret = dict_set_static_bin(local->xattr_req,
+ "gfid-req",
+ local->gfid_req, 16);
+ }
dht_lookup_everywhere (frame, this, &local->loc);
return 0;
}
@@ -1686,6 +1701,12 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->need_lookup_everywhere = 1;
}
}
+ /* The GFID is missing on this subvol*/
+ if ((op_errno == ENODATA) &&
+ (IA_ISDIR(local->loc.inode->ia_type))) {
+ local->need_lookup_everywhere = 1;
+ }
+
goto unlock;
}
@@ -1931,6 +1952,13 @@ cont:
/* We know that current cached subvol is no more
valid, get the new one */
local->cached_subvol = NULL;
+ if (local->xattr_req) {
+ if (!gf_uuid_is_null(local->gfid)) {
+ ret = dict_set_static_bin(local->xattr_req,
+ "gfid-req",
+ local->gfid, 16);
+ }
+ }
dht_lookup_everywhere (frame, this, &local->loc);
return 0;
}
@@ -2391,6 +2419,17 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
return 0;
}
+ if (local->op_ret && local->gfid_missing) {
+ if (gf_uuid_is_null(local->gfid_req)) {
+ DHT_STACK_UNWIND(lookup, frame, -1, ENODATA, NULL,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ /* A hack */
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
+
if (local->dir_count) {
dht_lookup_directory (frame, this, &local->loc);
return 0;
@@ -2751,6 +2790,8 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
if (op_errno != ENOENT)
local->op_errno = op_errno;
+ if (op_errno == ENODATA)
+ local->gfid_missing = _gf_true;
goto unlock;
}
@@ -3292,7 +3333,8 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
}
- if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
+ if (is_dir || (op_ret == -1 &&
+ ((op_errno == ENOTCONN) || (op_errno == ENODATA)))) {
dht_lookup_directory (frame, this, &local->loc);
return 0;
}
@@ -3421,6 +3463,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
int call_cnt = 0;
int gen = 0;
loc_t new_loc = {0,};
+ void *gfid_req_ptr = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -3630,6 +3673,20 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
/* need it for dir self-heal */
dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+ /* Fuse sets a random value in gfid-req. If the gfid is missing
+ * on one or more subvols, posix will set the gfid to this
+ * value, causing GFID mismatches for directories.
+ */
+ ret = dict_get_ptr(local->xattr_req, "gfid-req",
+ &gfid_req_ptr);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "%s: No gfid-req available", loc->path);
+ } else {
+ memcpy(local->gfid_req, gfid_req_ptr, sizeof(uuid_t));
+ dict_del(local->xattr_req, "gfid-req");
+ }
+
if (!hashed_subvol) {
gf_msg_debug (this->name, 0,
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index b40815c..94a0869 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -346,6 +346,7 @@ struct dht_local {
/* gfid related */
uuid_t gfid;
+ uuid_t gfid_req;
/* flag used to make sure we need to return estale in
{lookup,revalidate}_cbk */
@@ -394,6 +395,7 @@ struct dht_local {
loc_t loc2_copy;
gf_boolean_t locked;
gf_boolean_t dont_create_linkto;
+ gf_boolean_t gfid_missing;
};
typedef struct dht_local dht_local_t;
--
1.8.3.1

View File

@ -0,0 +1,154 @@
From 039d3b0631336ba2197fdf203226151a488d60bb Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Mon, 11 Mar 2019 17:03:28 +0530
Subject: [PATCH 538/538] cluster/afr: Send truncate on arbiter brick from SHD
Problem:
In an arbiter volume configuration SHD will not send any writes onto the arbiter
brick even if there is data pending marker for the arbiter brick. If we have a
arbiter setup on the geo-rep master and there are data pending markers for the files
on arbiter brick, SHD will not mark any data changelog during healing. While syncing
the data from master to slave, if the arbiter-brick is considered as ACTIVE, then
there is a chance that slave will miss out some data. If the arbiter brick is being
newly added or replaced there is a chance of slave missing all the data during sync.
Fix:
If there is data pending marker for the arbiter brick, send truncate on the arbiter
brick during heal, so that it will record truncate as the data transaction in changelog.
Backport of: https://review.gluster.org/#/c/glusterfs/+/22325/
Change-Id: I174d5d557f1ae55dbe758bc92368c133f1ad0929
BUG: 1683893
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/164978
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
...bug-1686568-send-truncate-on-arbiter-from-shd.t | 38 ++++++++++++++++++++++
tests/volume.rc | 2 +-
xlators/cluster/afr/src/afr-self-heal-data.c | 25 +++++++-------
3 files changed, 51 insertions(+), 14 deletions(-)
create mode 100644 tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
diff --git a/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
new file mode 100644
index 0000000..78581e9
--- /dev/null
+++ b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+CHANGELOG_PATH_0="$B0/${V0}2/.glusterfs/changelogs"
+ROLLOVER_TIME=100
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST dd if=/dev/zero of=$M0/file1 bs=128K count=5
+
+TEST $CLI volume profile $V0 start
+TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST $CLI volume profile $V0 info
+truncate_count=$($CLI volume profile $V0 info | grep TRUNCATE | awk '{count += $8} END {print count}')
+
+EXPECT "1" echo $truncate_count
+EXPECT "1" check_changelog_op ${CHANGELOG_PATH_0} "^ D "
+
+cleanup;
diff --git a/tests/volume.rc b/tests/volume.rc
index 6a983fd..3af663c 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -874,5 +874,5 @@ function check_changelog_op {
local clog_path=$1
local op=$2
- $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep $op | wc -l
+ $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 2ac6e47..8bdea2a 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -399,17 +399,18 @@ __afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- unsigned char arbiter_sink_status = 0;
int i = 0;
local = frame->local;
priv = this->private;
- if (priv->arbiter_count) {
- arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
- healed_sinks[ARBITER_BRICK_INDEX] = 0;
- }
-
+ /* This will send truncate on the arbiter brick as well if it is marked
+ * as sink. If changelog is enabled on the volume it captures truncate
+ * as a data transactions on the arbiter brick. This will help geo-rep
+ * to properly sync the data from master to slave if arbiter is the
+ * ACTIVE brick during syncing and which had got some entries healed for
+ * data as part of self heal.
+ */
AFR_ONLIST (healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd,
size, NULL);
@@ -420,8 +421,6 @@ __afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,
*/
healed_sinks[i] = 0;
- if (arbiter_sink_status)
- healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status;
return 0;
}
@@ -733,6 +732,11 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto unlock;
}
+ ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks,
+ locked_replies[source].poststat.ia_size);
+ if (ret < 0)
+ goto unlock;
+
if (priv->arbiter_count &&
AFR_COUNT (healed_sinks, priv->child_count) == 1 &&
healed_sinks[ARBITER_BRICK_INDEX]) {
@@ -740,11 +744,6 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto restore_time;
}
- ret = __afr_selfheal_truncate_sinks (frame, this, fd, healed_sinks,
- locked_replies[source].poststat.ia_size);
- if (ret < 0)
- goto unlock;
-
ret = 0;
}
--
1.8.3.1

View File

@ -192,7 +192,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
%else
Name: glusterfs
Version: 3.12.2
Release: 46%{?dist}
Release: 47%{?dist}
%endif
License: GPLv2 or LGPLv3+
Group: System Environment/Base
@ -799,6 +799,10 @@ Patch0531: 0531-fuse-remove-the-duplicate-FUSE_FOP-calls.patch
Patch0532: 0532-dht-fix-double-extra-unref-of-inode-at-heal-path.patch
Patch0533: 0533-glusterd-glusterd-memory-leak-while-running-gluster-.patch
Patch0534: 0534-fuse-make-sure-the-send-lookup-on-root-instead-of-ge.patch
Patch0535: 0535-posix-Deletion-of-block-hosting-volume-throwing-erro.patch
Patch0536: 0536-cluster-dht-Fix-lookup-selfheal-and-rmdir-race.patch
Patch0537: 0537-cluster-dht-Do-not-use-gfid-req-in-fresh-lookup.patch
Patch0538: 0538-cluster-afr-Send-truncate-on-arbiter-brick-from-SHD.patch
%description
GlusterFS is a distributed file-system capable of scaling to several
@ -2663,6 +2667,9 @@ fi
%endif
%changelog
* Wed Mar 13 2019 Milind Changire <mchangir@redhat.com> - 3.12.2-47
- fixes bugs bz#1458215 bz#1661258 bz#1669020 bz#1683893
* Thu Mar 07 2019 Milind Changire <mchangir@redhat.com> - 3.12.2-46
- fixes bugs bz#1668327 bz#1684648