autobuild v3.12.2-40

Resolves: bz#1668304 bz#1669020 Signed-off-by: Milind Changire <mchangir@redhat.com>
2019-01-25 09:59:25 +05:30 · 2019-01-25 09:59:25 +05:30 · 7674b73703
commit 7674b73703
parent ba09f585ae
3 changed files with 306 additions and 1 deletions
--- a/0509-core-heketi-cli-is-throwing-error-target-is-busy.patch
+++ b/0509-core-heketi-cli-is-throwing-error-target-is-busy.patch
@ -0,0 +1,117 @@
+From f58abec63fb325e0e1c21fe3fe127de2e4a85d7d Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawa@redhat.com>
+Date: Thu, 24 Jan 2019 18:45:54 +0530
+Subject: [PATCH 509/510] core: heketi-cli is throwing error "target is busy"
+
+Problem: At the time of deleting block hosting volume
+         through heketi-cli , it is throwing an error "target is busy".
+         cli is throwing an error because brick is not detached successfully
+         and brick is not detached due to race condition to cleanp xprt
+         associated with detached brick
+
+Solution: To avoid xprt specifc race condition introduce an atomic flag
+          on rpc_transport
+
+> Change-Id: Id4ff1fe8375a63be71fb3343f455190a1b8bb6d4
+> fixes: bz#1668190
+> (Cherry pick from commit 04f84756e1baa5eff4560339700f82970eaa5d80)
+> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22073/)
+
+Change-Id: Ie3786b569ee03569bc3ac970925732dd834a76dc
+BUG: 1669020
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/161388
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ rpc/rpc-lib/src/rpc-transport.c      |  1 +
+ rpc/rpc-lib/src/rpc-transport.h      |  1 +
+ xlators/protocol/server/src/server.c | 18 ++++++++++++++++++
+ 3 files changed, 20 insertions(+)
+
+diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c
+index 77abf9617..0c6ab6694 100644
+--- a/rpc/rpc-lib/src/rpc-transport.c
+++ b/rpc/rpc-lib/src/rpc-transport.c
+@@ -371,6 +371,7 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
+ 	}
+ 
+         INIT_LIST_HEAD (&trans->list);
+        GF_ATOMIC_INIT(trans->disconnect_progress, 0);
+ 
+         return_trans = trans;
+ 
+diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h
+index 23246c564..f5fb6e13b 100644
+--- a/rpc/rpc-lib/src/rpc-transport.h
+++ b/rpc/rpc-lib/src/rpc-transport.h
+@@ -217,6 +217,7 @@ struct rpc_transport {
+          * layer or in client management notification handler functions
+          */
+         gf_boolean_t               connect_failed;
+        gf_atomic_t                disconnect_progress;
+ };
+ 
+ struct rpc_transport_ops {
+diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
+index 104615265..ba3b8316d 100644
+--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
+@@ -553,6 +553,11 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+                         break;
+                 }
+ 
+                /* Set the disconnect_progress flag to 1 to avoid races
+                   during brick detach while brick mux is enabled
+                */
+                GF_ATOMIC_INIT(trans->disconnect_progress, 1);
+
+                 /* transport has to be removed from the list upon disconnect
+                  * irrespective of whether lock self heal is off or on, since
+                  * new transport will be created upon reconnect.
+@@ -1638,6 +1643,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)
+         glusterfs_ctx_t  *ctx         = NULL;
+         gf_boolean_t     xprt_found   = _gf_false;
+         uint64_t         totxprt      = 0;
+        uint64_t         totdisconnect = 0;
+ 
+         GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+         conf = this->private;
+@@ -1715,6 +1721,10 @@ notify (xlator_t *this, int32_t event, void *data, ...)
+                         if (!xprt->xl_private) {
+                                 continue;
+                         }
+
+                        if (GF_ATOMIC_GET(xprt->disconnect_progress))
+                                continue;
+
+                         if (xprt->xl_private->bound_xl == data) {
+                                 totxprt++;
+                         }
+@@ -1740,15 +1750,23 @@ notify (xlator_t *this, int32_t event, void *data, ...)
+                         if (!xprt->xl_private) {
+                                 continue;
+                         }
+
+                        if (GF_ATOMIC_GET(xprt->disconnect_progress))
+                                continue;
+
+                         if (xprt->xl_private->bound_xl == data) {
+                                 gf_log (this->name, GF_LOG_INFO,
+                                         "disconnecting %s",
+                                         xprt->peerinfo.identifier);
+                                 xprt_found = _gf_true;
+                                totdisconnect++;
+                                 rpc_transport_disconnect (xprt, _gf_false);
+                         }
+                 }
+ 
+                if (totxprt > totdisconnect)
+                        GF_ATOMIC_SUB(victim->xprtrefcnt, (totxprt - totdisconnect));
+
+                 pthread_mutex_unlock (&conf->mutex);
+                 if (this->ctx->active) {
+                         top = this->ctx->active->first;
+-- 
+2.20.1
+
--- a/0510-features-shard-Ref-shard-inode-while-adding-to-fsync.patch
+++ b/0510-features-shard-Ref-shard-inode-while-adding-to-fsync.patch
@ -0,0 +1,183 @@
+From f4d1a1683882a4da81240413dae1f6a390ee2442 Mon Sep 17 00:00:00 2001
+From: Krutika Dhananjay <kdhananj@redhat.com>
+Date: Thu, 24 Jan 2019 14:14:39 +0530
+Subject: [PATCH 510/510] features/shard: Ref shard inode while adding to fsync
+ list
+
+> Upstream: https://review.gluster.org/22091
+> BUG: 1669077
+> Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2
+
+PROBLEM:
+
+Lot of the earlier changes in the management of shards in lru, fsync
+lists assumed that if a given shard exists in fsync list, it must be
+part of lru list as well. This was found to be not true.
+
+Consider this - a file is FALLOCATE'd to a size which would make the
+number of participant shards to be greater than the lru list size.
+In this case, some of the resolved shards that are to participate in
+this fop will be evicted from lru list to give way to the rest of the
+shards. And once FALLOCATE completes, these shards are added to fsync
+list but without a ref. After the fop completes, these shard inodes
+are unref'd and destroyed while their inode ctxs are still part of
+fsync list. Now when an FSYNC is called on the base file and the
+fsync-list traversed, the client crashes due to illegal memory access.
+
+FIX:
+
+Hold a ref on the shard inode when adding to fsync list as well.
+And unref under following conditions:
+1. when the shard is evicted from lru list
+2. when the base file is fsync'd
+3. when the shards are deleted.
+
+Change-Id: Iab460667d091b8388322f59b6cb27ce69299b1b2
+BUG: 1668304
+Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/161397
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
+---
+ tests/bugs/shard/bug-1669077.t     | 29 +++++++++++++++++++++++++++++
+ xlators/features/shard/src/shard.c | 29 +++++++++++++++++++++--------
+ 2 files changed, 50 insertions(+), 8 deletions(-)
+ create mode 100644 tests/bugs/shard/bug-1669077.t
+
+diff --git a/tests/bugs/shard/bug-1669077.t b/tests/bugs/shard/bug-1669077.t
+new file mode 100644
+index 000000000..8d3a67a36
+--- /dev/null
+++ b/tests/bugs/shard/bug-1669077.t
+@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+SHARD_COUNT_TIME=5
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# If the bug still exists, client should crash during fallocate below
+TEST fallocate -l 200M $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
+diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
+index 19dd3e4ba..cd388500d 100644
+--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
+@@ -272,6 +272,7 @@ shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this,
+          * of the to_fsync_list.
+          */
+         inode_ref (base_inode);
+        inode_ref(shard_inode);
+ 
+         LOCK (&base_inode->lock);
+         LOCK (&shard_inode->lock);
+@@ -285,8 +286,10 @@ shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this,
+         /* Unref the base inode corresponding to the ref above, if the shard is
+          * found to be already part of the fsync list.
+          */
+-        if (ret != 0)
+        if (ret != 0) {
+                 inode_unref (base_inode);
+                inode_unref(shard_inode);
+        }
+         return ret;
+ }
+ 
+@@ -735,6 +738,10 @@ after_fsync_check:
+                                               block_bname);
+                                 inode_forget (lru_inode, 0);
+                         } else {
+                                /* The following unref corresponds to the ref
+                                 * held when the shard was added to fsync list.
+                                 */
+                                inode_unref(lru_inode);
+                                 fsync_inode = lru_inode;
+                                 if (lru_base_inode)
+                                         inode_unref (lru_base_inode);
+@@ -2947,7 +2954,7 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
+         shard_priv_t         *priv              = NULL;
+         shard_inode_ctx_t    *ctx               = NULL;
+         shard_inode_ctx_t    *base_ictx         = NULL;
+-        gf_boolean_t          unlink_unref_forget = _gf_false;
+        int                   unref_shard_inode = 0;
+ 
+         this = THIS;
+         priv = this->private;
+@@ -2973,11 +2980,12 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
+                         list_del_init (&ctx->ilist);
+                         priv->inode_count--;
+                         unref_base_inode++;
+                        unref_shard_inode++;
+                         GF_ASSERT (priv->inode_count >= 0);
+-                        unlink_unref_forget = _gf_true;
+                 }
+                 if (ctx->fsync_needed) {
+                         unref_base_inode++;
+                        unref_shard_inode++;
+                         list_del_init (&ctx->to_fsync_list);
+                         if (base_inode) {
+                                 __shard_inode_ctx_get (base_inode, this, &base_ictx);
+@@ -2988,11 +2996,11 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
+         UNLOCK(&inode->lock);
+         if (base_inode)
+                 UNLOCK(&base_inode->lock);
+-        if (unlink_unref_forget) {
+-                inode_unlink (inode, priv->dot_shard_inode, block_bname);
+-                inode_unref (inode);
+-                inode_forget (inode, 0);
+-        }
+
+        inode_unlink(inode, priv->dot_shard_inode, block_bname);
+        inode_ref_reduce_by_n(inode, unref_shard_inode);
+        inode_forget(inode, 0);
+
+         if (base_inode && unref_base_inode)
+                 inode_ref_reduce_by_n (base_inode, unref_base_inode);
+         UNLOCK(&priv->lock);
+@@ -5824,6 +5832,7 @@ shard_fsync_shards_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+         shard_inode_ctx_t     *ctx         = NULL;
+         shard_inode_ctx_t     *base_ictx   = NULL;
+         inode_t               *base_inode  = NULL;
+        gf_boolean_t           unref_shard_inode = _gf_false;
+ 
+         local = frame->local;
+         base_inode = local->fd->inode;
+@@ -5858,11 +5867,15 @@ out:
+                                 list_add_tail (&ctx->to_fsync_list,
+                                                &base_ictx->to_fsync_list);
+                                 base_ictx->fsync_count++;
+                        } else {
+                            unref_shard_inode = _gf_true;
+                         }
+                 }
+                 UNLOCK (&anon_fd->inode->lock);
+                 UNLOCK (&base_inode->lock);
+         }
+        if (unref_shard_inode)
+                inode_unref(anon_fd->inode);
+         if (anon_fd)
+                 fd_unref (anon_fd);
+ 
+-- 
+2.20.1
+
--- a/glusterfs.spec
+++ b/glusterfs.spec
@ -192,7 +192,7 @@ Release:          0.1%{?prereltag:.%{prereltag}}%{?dist}
 %else
 Name:             glusterfs
 Version:          3.12.2
-Release:          39%{?dist}
+Release:          40%{?dist}
 %endif
 License:          GPLv2 or LGPLv3+
 Group:            System Environment/Base
@ -773,6 +773,8 @@ Patch0505: 0505-features-shard-Fix-launch-of-multiple-synctasks-for-.patch
 Patch0506: 0506-features-shard-Assign-fop-id-during-background-delet.patch
 Patch0507: 0507-geo-rep-fix-rename-sync-on-hybrid-crawl.patch
 Patch0508: 0508-glusterd-Resolve-multiple-leaks-in-glusterd-code-pat.patch
+Patch0509: 0509-core-heketi-cli-is-throwing-error-target-is-busy.patch
+Patch0510: 0510-features-shard-Ref-shard-inode-while-adding-to-fsync.patch

 %description
 GlusterFS is a distributed file-system capable of scaling to several
@ -2733,6 +2735,9 @@ fi
 %endif

 %changelog
+* Fri Jan 25 2019 Milind Changire <mchangir@redhat.com> - 3.12.2-40
+- fixes bugs bz#1668304 bz#1669020
+
 * Mon Jan 21 2019 Milind Changire <mchangir@redhat.com> - 3.12.2-39
 - fixes bugs bz#1667169