From 299302574e3a0371e39f292f83ccf379dc2562bc Mon Sep 17 00:00:00 2001
From: Milind Changire <mchangir@redhat.com>
Date: Fri, 27 Jul 2018 11:05:48 -0400
Subject: [PATCH] autobuild v3.12.2-15

Resolves: bz#1589279 bz#1598384 bz#1599362 bz#1599998 bz#1600790
Resolves: bz#1601331 bz#1603103
Signed-off-by: Milind Changire <mchangir@redhat.com>
---
 0326-glusterd-memory-leak-in-get-state.patch  | 138 ++++
 ...h-lk_owner-only-when-pre-op-succeeds.patch | 102 +++
 ...x-issues-with-gfid-conflict-handling.patch | 455 +++++++++++
 ...dht-Set-loc-gfid-before-healing-attr.patch |  52 ++
 ...-check-before-removing-stale-symlink.patch |  70 ++
 ...pc-free-registered-callback-programs.patch |  49 ++
 ...nection_cleanup-is-crashed-due-to-do.patch |  67 ++
 ...tiple-checks-before-attach-start-a-b.patch | 726 ++++++++++++++++++
 glusterfs.spec                                |  14 +-
 9 files changed, 1672 insertions(+), 1 deletion(-)
 create mode 100644 0326-glusterd-memory-leak-in-get-state.patch
 create mode 100644 0327-afr-switch-lk_owner-only-when-pre-op-succeeds.patch
 create mode 100644 0328-geo-rep-Fix-issues-with-gfid-conflict-handling.patch
 create mode 100644 0329-cluster-dht-Set-loc-gfid-before-healing-attr.patch
 create mode 100644 0330-posix-check-before-removing-stale-symlink.patch
 create mode 100644 0331-rpc-free-registered-callback-programs.patch
 create mode 100644 0332-rpc-rpc_clnt_connection_cleanup-is-crashed-due-to-do.patch
 create mode 100644 0333-glusterd-Add-multiple-checks-before-attach-start-a-b.patch

diff --git a/0326-glusterd-memory-leak-in-get-state.patch b/0326-glusterd-memory-leak-in-get-state.patch
new file mode 100644
index 0000000..e27bc3c
--- /dev/null
+++ b/0326-glusterd-memory-leak-in-get-state.patch
@@ -0,0 +1,138 @@
+From eadd7e7168349705b29bc6ae9f99ba3e6ae58060 Mon Sep 17 00:00:00 2001
+From: Sanju Rakonde <srakonde@redhat.com>
+Date: Mon, 16 Jul 2018 15:59:36 +0530
+Subject: [PATCH 326/333] glusterd: memory leak in get-state
+
+Problem: gluster get-state command is leaking the memory when
+geo-replication session is configured.
+
+Cause: In glusterd_print_gsync_status(), we are trying to get
+reference to the keys of gsync_dict. The references to keys of
+gsync_dict are stored status_vols[i]. status_vols[i] are
+allocated with a memory of size of gf_gsync_status_t.
+
+Solution: Need not to use a array of pointers(status_vals), using
+a pointer to hold the reference to a key of gsync_dict is sufficient.
+
+Followed the below steps for testing:
+1. Configured geo-rep session
+2. Ran gluster get-state command for 1000 times.
+
+Without this patch, glusterd's memory was increasing significantly
+(around 22000KB per 1000 times), with this patch it reduced (1500KB
+per 1000 times)
+
+>fixes: bz#1601423
+>Change-Id: I361f5525d71f821bb345419ccfdc20ca288ca292
+>Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+
+upstream patch: https://review.gluster.org/#/c/20521/
+
+Change-Id: I361f5525d71f821bb345419ccfdc20ca288ca292
+BUG: 1599362
+Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/144325
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/mgmt/glusterd/src/glusterd-handler.c | 53 ++++++++++------------------
+ 1 file changed, 19 insertions(+), 34 deletions(-)
+
+diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
+index 395b342..861ff17 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
+@@ -5082,7 +5082,7 @@ glusterd_print_gsync_status (FILE *fp, dict_t *gsync_dict)
+         int                     ret = -1;
+         int                     gsync_count = 0;
+         int                     i = 0;
+-        gf_gsync_status_t       **status_vals = NULL;
++        gf_gsync_status_t       *status_vals = NULL;
+         char                    status_val_name[PATH_MAX] = {0,};
+ 
+         GF_VALIDATE_OR_GOTO (THIS->name, fp, out);
+@@ -5097,62 +5097,47 @@ glusterd_print_gsync_status (FILE *fp, dict_t *gsync_dict)
+                 goto out;
+         }
+ 
+-        status_vals = GF_CALLOC (gsync_count, sizeof (gf_gsync_status_t *),
+-                                 gf_common_mt_char);
+-        if (!status_vals) {
+-                ret = -1;
+-                goto out;
+-        }
+-        for (i = 0; i < gsync_count; i++) {
+-                status_vals[i] = GF_CALLOC (1, sizeof (gf_gsync_status_t),
+-                                            gf_common_mt_char);
+-                if (!status_vals[i]) {
+-                        ret = -1;
+-                        goto out;
+-                }
+-        }
+-
+         for (i = 0; i < gsync_count; i++) {
+                 snprintf (status_val_name, sizeof(status_val_name), "status_value%d", i);
+ 
+-                ret = dict_get_bin (gsync_dict, status_val_name, (void **)&(status_vals[i]));
++                ret = dict_get_bin (gsync_dict, status_val_name, (void **)&(status_vals));
+                 if (ret)
+                         goto out;
+ 
+                 fprintf (fp, "Volume%d.pair%d.session_slave: %s\n", volcount, i+1,
+-                         get_struct_variable(21, status_vals[i]));
++                         get_struct_variable(21, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.master_node: %s\n", volcount, i+1,
+-                         get_struct_variable(0, status_vals[i]));
++                         get_struct_variable(0, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.master_volume: %s\n", volcount, i+1,
+-                         get_struct_variable(1, status_vals[i]));
++                         get_struct_variable(1, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.master_brick: %s\n", volcount, i+1,
+-                         get_struct_variable(2, status_vals[i]));
++                         get_struct_variable(2, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.slave_user: %s\n", volcount, i+1,
+-                         get_struct_variable(3, status_vals[i]));
++                         get_struct_variable(3, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.slave: %s\n", volcount, i+1,
+-                         get_struct_variable(4, status_vals[i]));
++                         get_struct_variable(4, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.slave_node: %s\n", volcount, i+1,
+-                         get_struct_variable(5, status_vals[i]));
++                         get_struct_variable(5, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.status: %s\n", volcount, i+1,
+-                         get_struct_variable(6, status_vals[i]));
++                         get_struct_variable(6, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.crawl_status: %s\n", volcount, i+1,
+-                         get_struct_variable(7, status_vals[i]));
++                         get_struct_variable(7, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.last_synced: %s\n", volcount, i+1,
+-                         get_struct_variable(8, status_vals[i]));
++                         get_struct_variable(8, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.entry: %s\n", volcount, i+1,
+-                         get_struct_variable(9, status_vals[i]));
++                         get_struct_variable(9, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.data: %s\n", volcount, i+1,
+-                         get_struct_variable(10, status_vals[i]));
++                         get_struct_variable(10, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.meta: %s\n", volcount, i+1,
+-                         get_struct_variable(11, status_vals[i]));
++                         get_struct_variable(11, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.failures: %s\n", volcount, i+1,
+-                         get_struct_variable(12, status_vals[i]));
++                         get_struct_variable(12, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.checkpoint_time: %s\n", volcount,
+-                         i+1, get_struct_variable(13, status_vals[i]));
++                         i+1, get_struct_variable(13, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.checkpoint_completed: %s\n",
+-                         volcount, i+1, get_struct_variable(14, status_vals[i]));
++                         volcount, i+1, get_struct_variable(14, status_vals));
+                 fprintf (fp, "Volume%d.pair%d.checkpoint_completion_time: %s\n",
+-                         volcount, i+1, get_struct_variable(15, status_vals[i]));
++                         volcount, i+1, get_struct_variable(15, status_vals));
+         }
+ out:
+         return ret;
+-- 
+1.8.3.1
+
diff --git a/0327-afr-switch-lk_owner-only-when-pre-op-succeeds.patch b/0327-afr-switch-lk_owner-only-when-pre-op-succeeds.patch
new file mode 100644
index 0000000..d69588c
--- /dev/null
+++ b/0327-afr-switch-lk_owner-only-when-pre-op-succeeds.patch
@@ -0,0 +1,102 @@
+From fef5fb73545bed5a4040db1f8e4e855286c1981d Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Wed, 18 Jul 2018 14:16:46 +0530
+Subject: [PATCH 327/333] afr: switch lk_owner only when pre-op succeeds
+
+Backport of https://review.gluster.org/#/c/20527/
+
+Problem:
+In a disk full scenario, we take a failure path in afr_transaction_perform_fop()
+and go to unlock phase. But we change the lk-owner before that, causing unlock
+to fail. When mount issues another fop that takes locks on that file, it hangs.
+
+Fix:
+Change lk-owner only when we are about to perform the fop phase.
+Also fix the same issue for arbiters when afr_txn_arbitrate_fop() fails the fop.
+
+Also removed the DISK_SPACE_CHECK_AND_GOTO in posix_xattrop. Otherwise truncate
+to zero will fail pre-op phase with ENOSPC when the user is actually trying to
+freee up space.
+
+Change-Id: I8663003fa7d472e93fe61cc1e39c78084d3de81f
+BUG: 1599998
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/144275
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/cluster/afr/src/afr-transaction.c | 20 ++++++++++----------
+ xlators/storage/posix/src/posix.c         |  5 -----
+ 2 files changed, 10 insertions(+), 15 deletions(-)
+
+diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
+index 321b6f1..3f55070 100644
+--- a/xlators/cluster/afr/src/afr-transaction.c
++++ b/xlators/cluster/afr/src/afr-transaction.c
+@@ -495,11 +495,10 @@ afr_txn_arbitrate_fop (call_frame_t *frame, xlator_t *this)
+                 local->op_errno =  ENOTCONN;
+                 for (i = 0; i < priv->child_count; i++)
+                         local->transaction.failed_subvols[i] = 1;
+-                afr_changelog_post_op (frame, this);/*uninherit should happen*/
+-        } else {
+-                afr_transaction_fop (frame, this);
+         }
+ 
++        afr_transaction_fop (frame, this);
++
+         return;
+ }
+ 
+@@ -529,13 +528,6 @@ afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
+                                 local->transaction.failed_subvols[i] = 1;
+                 }
+         }
+-        /*  Perform fops with the lk-owner from top xlator.
+-         *  Eg: lk-owner of posix-lk and flush should be same,
+-         *  flush cant clear the  posix-lks without that lk-owner.
+-         */
+-        afr_save_lk_owner (frame);
+-        frame->root->lk_owner =
+-                local->transaction.main_frame->root->lk_owner;
+ 
+ 	if (local->pre_op_compat)
+ 		/* old mode, pre-op was done as afr_changelog_do()
+@@ -561,6 +553,14 @@ afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
+         }
+ 
+ fop:
++        /*  Perform fops with the lk-owner from top xlator.
++         *  Eg: lk-owner of posix-lk and flush should be same,
++         *  flush cant clear the  posix-lks without that lk-owner.
++         */
++        afr_save_lk_owner (frame);
++        frame->root->lk_owner =
++                local->transaction.main_frame->root->lk_owner;
++
+         if (priv->arbiter_count == 1) {
+                 afr_txn_arbitrate_fop (frame, this);
+         } else {
+diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
+index 01f472b..ddb875c 100644
+--- a/xlators/storage/posix/src/posix.c
++++ b/xlators/storage/posix/src/posix.c
+@@ -6147,16 +6147,11 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+         dict_t               *xattr_rsp = NULL;
+         dict_t               *xdata_rsp = NULL;
+         struct iatt           stbuf = {0};
+-        struct  posix_private *priv     = NULL;
+-
+ 
+         VALIDATE_OR_GOTO (frame, out);
+         VALIDATE_OR_GOTO (xattr, out);
+         VALIDATE_OR_GOTO (this, out);
+ 
+-        priv = this->private;
+-        DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
+-
+         if (fd) {
+                 op_ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
+                 if (op_ret < 0) {
+-- 
+1.8.3.1
+
diff --git a/0328-geo-rep-Fix-issues-with-gfid-conflict-handling.patch b/0328-geo-rep-Fix-issues-with-gfid-conflict-handling.patch
new file mode 100644
index 0000000..c82077d
--- /dev/null
+++ b/0328-geo-rep-Fix-issues-with-gfid-conflict-handling.patch
@@ -0,0 +1,455 @@
+From a9db68fc1f05639cb79defef6ed7da58572113ea Mon Sep 17 00:00:00 2001
+From: Kotresh HR <khiremat@redhat.com>
+Date: Thu, 5 Jul 2018 07:07:38 -0400
+Subject: [PATCH 328/333] geo-rep: Fix issues with gfid conflict handling
+
+1. MKDIR/RMDIR is recorded on all bricks. So if
+   one brick succeeds creating it, other bricks
+   should ignore it. But this was not happening.
+   The fix rename of directories in hybrid crawl,
+   was trying to rename the directory to itself
+   and in the process crashing with ENOENT if the
+   directory is removed.
+
+2. If file is created, deleted and a directory is
+   created with same name, it was failing to sync.
+   Again the issue is around the fix for rename
+   of directories in hybrid crawl. Fixed the same.
+
+   If the same case was done with hardlink present
+   for the file, it was failing. This patch fixes
+   that too.
+
+Backport of
+  > Patch: https://review.gluster.org/#/c/20473/
+  > fixes: bz#1598884
+  > Change-Id: I6f3bca44e194e415a3d4de3b9d03cc8976439284
+  > Signed-off-by: Kotresh HR <khiremat@redhat.com>
+
+BUG: 1598384
+Change-Id: I6f3bca44e194e415a3d4de3b9d03cc8976439284
+Signed-off-by: Kotresh HR <khiremat@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/143400
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ geo-replication/syncdaemon/master.py     | 157 ++++++++++++++++++++++---------
+ geo-replication/syncdaemon/resource.py   |  57 ++++++-----
+ geo-replication/syncdaemon/syncdutils.py |  35 +++++++
+ 3 files changed, 180 insertions(+), 69 deletions(-)
+
+diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
+index 64e9836..1399378 100644
+--- a/geo-replication/syncdaemon/master.py
++++ b/geo-replication/syncdaemon/master.py
+@@ -692,7 +692,8 @@ class GMasterChangelogMixin(GMasterCommon):
+     TYPE_GFID = "D "
+     TYPE_ENTRY = "E "
+ 
+-    MAX_EF_RETRIES = 15
++    MAX_EF_RETRIES = 10
++    MAX_OE_RETRIES = 5
+ 
+     # flat directory hierarchy for gfid based access
+     FLAT_DIR_HIERARCHY = '.'
+@@ -788,38 +789,53 @@ class GMasterChangelogMixin(GMasterCommon):
+ 
+         self.status.inc_value("failures", num_failures)
+ 
+-    def fix_possible_entry_failures(self, failures, retry_count):
++    def fix_possible_entry_failures(self, failures, retry_count, entries):
+         pfx = gauxpfx()
+         fix_entry_ops = []
+         failures1 = []
+         for failure in failures:
+-            if failure[2]['dst']:
++            if failure[2]['name_mismatch']:
++                pbname = failure[2]['slave_entry']
++            elif failure[2]['dst']:
+                 pbname = failure[0]['entry1']
+             else:
+                 pbname = failure[0]['entry']
+-            if failure[2]['gfid_mismatch']:
++
++            op = failure[0]['op']
++            # name exists but gfid is different
++            if failure[2]['gfid_mismatch'] or failure[2]['name_mismatch']:
+                 slave_gfid = failure[2]['slave_gfid']
+                 st = lstat(os.path.join(pfx, slave_gfid))
++                # Takes care of scenarios with no hardlinks
+                 if isinstance(st, int) and st == ENOENT:
+-                    logging.info(lf('Fixing gfid mismatch in slave. Deleting'
+-                                    ' the entry', retry_count=retry_count,
++                    logging.info(lf('Entry not present on master. Fixing gfid '
++                                    'mismatch in slave. Deleting the entry',
++                                    retry_count=retry_count,
+                                     entry=repr(failure)))
+-                    #Add deletion to fix_entry_ops list
++                    # Add deletion to fix_entry_ops list
+                     if failure[2]['slave_isdir']:
+-                        fix_entry_ops.append(edct('RMDIR',
+-                                                  gfid=failure[2]['slave_gfid'],
+-                                                  entry=pbname))
++                        fix_entry_ops.append(
++                            edct('RMDIR',
++                                 gfid=failure[2]['slave_gfid'],
++                                 entry=pbname))
+                     else:
+-                        fix_entry_ops.append(edct('UNLINK',
+-                                                  gfid=failure[2]['slave_gfid'],
+-                                                  entry=pbname))
++                        fix_entry_ops.append(
++                            edct('UNLINK',
++                                 gfid=failure[2]['slave_gfid'],
++                                 entry=pbname))
++                # Takes care of scenarios of hardlinks/renames on master
+                 elif not isinstance(st, int):
+-                    #The file exists on master but with different name.
+-                    #Probabaly renamed and got missed during xsync crawl.
+-                    if failure[2]['slave_isdir']:
+-                        logging.info(lf('Fixing gfid mismatch in slave',
++                    if matching_disk_gfid(slave_gfid, pbname):
++                        # Safe to ignore the failure as master contains same
++                        # file with same gfid. Remove entry from entries list
++                        logging.info(lf('Fixing gfid mismatch in slave. '
++                                        ' Safe to ignore, take out entry',
+                                         retry_count=retry_count,
+                                         entry=repr(failure)))
++                        entries.remove(failure[0])
++                    # The file exists on master but with different name.
++                    # Probably renamed and got missed during xsync crawl.
++                    elif failure[2]['slave_isdir']:
+                         realpath = os.readlink(os.path.join(gconf.local_path,
+                                                             ".glusterfs",
+                                                             slave_gfid[0:2],
+@@ -827,64 +843,99 @@ class GMasterChangelogMixin(GMasterCommon):
+                                                             slave_gfid))
+                         dst_entry = os.path.join(pfx, realpath.split('/')[-2],
+                                                  realpath.split('/')[-1])
+-                        rename_dict = edct('RENAME', gfid=slave_gfid,
+-                                           entry=failure[0]['entry'],
+-                                           entry1=dst_entry, stat=st,
+-                                           link=None)
+-                        logging.info(lf('Fixing gfid mismatch in slave. '
+-                                        'Renaming', retry_count=retry_count,
+-                                        entry=repr(rename_dict)))
+-                        fix_entry_ops.append(rename_dict)
++                        src_entry = pbname
++                        logging.info(lf('Fixing dir name/gfid mismatch in '
++                                        'slave', retry_count=retry_count,
++                                        entry=repr(failure)))
++                        if src_entry == dst_entry:
++                            # Safe to ignore the failure as master contains
++                            # same directory as in slave with same gfid.
++                            # Remove the failure entry from entries list
++                            logging.info(lf('Fixing dir name/gfid mismatch'
++                                            ' in slave. Safe to ignore, '
++                                            'take out entry',
++                                            retry_count=retry_count,
++                                            entry=repr(failure)))
++                            entries.remove(failure[0])
++                        else:
++                            rename_dict = edct('RENAME', gfid=slave_gfid,
++                                               entry=src_entry,
++                                               entry1=dst_entry, stat=st,
++                                               link=None)
++                            logging.info(lf('Fixing dir name/gfid mismatch'
++                                            ' in slave. Renaming',
++                                            retry_count=retry_count,
++                                            entry=repr(rename_dict)))
++                            fix_entry_ops.append(rename_dict)
+                     else:
+-                        logging.info(lf('Fixing gfid mismatch in slave. '
+-                                        ' Deleting the entry',
++                        # A hardlink file exists with different name or
++                        # renamed file exists and we are sure from
++                        # matching_disk_gfid check that the entry doesn't
++                        # exist with same gfid so we can safely delete on slave
++                        logging.info(lf('Fixing file gfid mismatch in slave. '
++                                        'Hardlink/Rename Case. Deleting entry',
++                                        retry_count=retry_count,
++                                        entry=repr(failure)))
++                        fix_entry_ops.append(
++                            edct('UNLINK',
++                                 gfid=failure[2]['slave_gfid'],
++                                 entry=pbname))
++            elif failure[1] == ENOENT:
++                # Ignore ENOENT error for fix_entry_ops aka retry_count > 1
++                if retry_count > 1:
++                    logging.info(lf('ENOENT error while fixing entry ops. '
++                                    'Safe to ignore, take out entry',
++                                    retry_count=retry_count,
++                                    entry=repr(failure)))
++                    entries.remove(failure[0])
++                elif op in ('MKNOD', 'CREATE', 'MKDIR'):
++                    pargfid = pbname.split('/')[1]
++                    st = lstat(os.path.join(pfx, pargfid))
++                    # Safe to ignore the failure as master doesn't contain
++                    # parent directory.
++                    if isinstance(st, int):
++                        logging.info(lf('Fixing ENOENT error in slave. Parent '
++                                        'does not exist on master. Safe to '
++                                        'ignore, take out entry',
+                                         retry_count=retry_count,
+                                         entry=repr(failure)))
+-                        fix_entry_ops.append(edct('UNLINK',
+-                                                  gfid=failure[2]['slave_gfid'],
+-                                                  entry=pbname))
+-                        logging.error(lf('Entry cannot be fixed in slave due '
+-                                         'to GFID mismatch, find respective '
+-                                         'path for the GFID and trigger sync',
+-                                         gfid=slave_gfid))
++                        entries.remove(failure[0])
+ 
+         if fix_entry_ops:
+-            #Process deletions of entries whose gfids are mismatched
++            # Process deletions of entries whose gfids are mismatched
+             failures1 = self.slave.server.entry_ops(fix_entry_ops)
+-            if not failures1:
+-                logging.info ("Sucessfully fixed entry ops with gfid mismatch")
+ 
+-        return failures1
++        return (failures1, fix_entry_ops)
+ 
+     def handle_entry_failures(self, failures, entries):
+         retries = 0
+         pending_failures = False
+         failures1 = []
+         failures2 = []
++        entry_ops1 = []
++        entry_ops2 = []
+ 
+         if failures:
+             pending_failures = True
+             failures1 = failures
++            entry_ops1 = entries
+ 
+             while pending_failures and retries < self.MAX_EF_RETRIES:
+                 retries += 1
+-                failures2 = self.fix_possible_entry_failures(failures1,
+-                                                             retries)
++                (failures2, entry_ops2) = self.fix_possible_entry_failures(
++                    failures1, retries, entry_ops1)
+                 if not failures2:
+                     pending_failures = False
++                    logging.info(lf('Sucessfully fixed entry ops with gfid '
++                                 'mismatch', retry_count=retries))
+                 else:
+                     pending_failures = True
+                     failures1 = failures2
++                    entry_ops1 = entry_ops2
+ 
+             if pending_failures:
+                 for failure in failures1:
+                     logging.error("Failed to fix entry ops %s", repr(failure))
+-            else:
+-                #Retry original entry list 5 times
+-                failures = self.slave.server.entry_ops(entries)
+-
+-            self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
+-
+ 
+     def process_change(self, change, done, retry):
+         pfx = gauxpfx()
+@@ -1112,7 +1163,19 @@ class GMasterChangelogMixin(GMasterCommon):
+             self.status.inc_value("entry", len(entries))
+ 
+             failures = self.slave.server.entry_ops(entries)
+-            self.handle_entry_failures(failures, entries)
++            count = 0
++            while failures and count < self.MAX_OE_RETRIES:
++                count += 1
++                self.handle_entry_failures(failures, entries)
++                logging.info("Retry original entries. count = %s" % count)
++                failures = self.slave.server.entry_ops(entries)
++                if not failures:
++                    logging.info("Sucessfully fixed all entry ops with gfid "
++                                 "mismatch")
++                    break
++
++            self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
++
+             self.status.dec_value("entry", len(entries))
+ 
+             # Update Entry stime in Brick Root only in case of Changelog mode
+diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
+index 0d5462a..eb696f3 100644
+--- a/geo-replication/syncdaemon/resource.py
++++ b/geo-replication/syncdaemon/resource.py
+@@ -38,9 +38,9 @@ from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION
+ from syncdutils import GX_GFID_CANONICAL_LEN
+ from gsyncdstatus import GeorepStatus
+ from syncdutils import get_master_and_slave_data_from_args
+-from syncdutils import lf, Popen, sup, Volinfo
++from syncdutils import lf, Popen, sup
+ from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt
+-from syncdutils import unshare_propagation_supported
++from syncdutils import unshare_propagation_supported, get_slv_dir_path
+ 
+ UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z')
+ HostRX = re.compile('[a-zA-Z\d](?:[a-zA-Z\d.-]*[a-zA-Z\d])?', re.I)
+@@ -50,7 +50,6 @@ ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
+ 
+ slv_volume = None
+ slv_host = None
+-slv_bricks = None
+ 
+ def desugar(ustr):
+     """transform sugared url strings to standard <scheme>://<urlbody> form
+@@ -463,13 +462,23 @@ class Server(object):
+             # to be purged is the GFID gotten from the changelog.
+             # (a stat(changelog_gfid) would also be valid here)
+             # The race here is between the GFID check and the purge.
++
++            # If the entry or the gfid of the file to be deleted is not present
++            # on slave, we can ignore the unlink/rmdir
++            if isinstance(lstat(entry), int) or \
++               isinstance(lstat(os.path.join(pfx, gfid)), int):
++                return
++
+             if not matching_disk_gfid(gfid, entry):
+                 collect_failure(e, EEXIST)
+                 return
+ 
+             if op == 'UNLINK':
+                 er = errno_wrap(os.unlink, [entry], [ENOENT, ESTALE], [EBUSY])
+-                return er
++                # EISDIR is safe error, ignore. This can only happen when
++                # unlink is sent from master while fixing gfid conflicts.
++                if er != EISDIR:
++                    return er
+ 
+             elif op == 'RMDIR':
+                 er = errno_wrap(os.rmdir, [entry], [ENOENT, ESTALE,
+@@ -480,7 +489,11 @@ class Server(object):
+         def collect_failure(e, cmd_ret, dst=False):
+             slv_entry_info = {}
+             slv_entry_info['gfid_mismatch'] = False
++            slv_entry_info['name_mismatch'] = False
+             slv_entry_info['dst'] = dst
++            slv_entry_info['slave_isdir'] = False
++            slv_entry_info['slave_name'] = None
++            slv_entry_info['slave_gfid'] = None
+             # We do this for failing fops on Slave
+             # Master should be logging this
+             if cmd_ret is None:
+@@ -498,6 +511,9 @@ class Server(object):
+                     if not isinstance(st, int):
+                         if st and stat.S_ISDIR(st.st_mode):
+                             slv_entry_info['slave_isdir'] = True
++                            dir_name = get_slv_dir_path(slv_host, slv_volume,
++                                                        disk_gfid)
++                            slv_entry_info['slave_name'] = dir_name
+                         else:
+                             slv_entry_info['slave_isdir'] = False
+                     slv_entry_info['slave_gfid'] = disk_gfid
+@@ -618,37 +634,34 @@ class Server(object):
+                                          [ENOENT, EEXIST], [ESTALE])
+                     collect_failure(e, cmd_ret)
+             elif op == 'MKDIR':
++                en = e['entry']
+                 slink = os.path.join(pfx, gfid)
+                 st = lstat(slink)
+                 # don't create multiple entries with same gfid
+                 if isinstance(st, int):
+                     blob = entry_pack_mkdir(
+                         gfid, bname, e['mode'], e['uid'], e['gid'])
+-                else:
++                elif (isinstance(lstat(en), int) or
++                      not matching_disk_gfid(gfid, en)):
+                     # If gfid of a directory exists on slave but path based
+                     # create is getting EEXIST. This means the directory is
+                     # renamed in master but recorded as MKDIR during hybrid
+                     # crawl. Get the directory path by reading the backend
+                     # symlink and trying to rename to new name as said by
+                     # master.
+-                    global slv_bricks
+-                    global slv_volume
+-                    global slv_host
+-                    if not slv_bricks:
+-                        slv_info = Volinfo (slv_volume, slv_host)
+-                        slv_bricks = slv_info.bricks
+-                    # Result of readlink would be of format as below.
+-                    # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename"
+-                    realpath = os.readlink(os.path.join(slv_bricks[0]['dir'],
+-                                                        ".glusterfs", gfid[0:2],
+-                                                        gfid[2:4], gfid))
+-                    realpath_parts = realpath.split('/')
+-                    src_pargfid = realpath_parts[-2]
+-                    src_basename = realpath_parts[-1]
+-                    src_entry = os.path.join(pfx, src_pargfid, src_basename)
+                     logging.info(lf("Special case: rename on mkdir",
+-                                   gfid=gfid, entry=repr(entry)))
+-                    rename_with_disk_gfid_confirmation(gfid, src_entry, entry)
++                                    gfid=gfid, entry=repr(entry)))
++                    src_entry = get_slv_dir_path(slv_host, slv_volume, gfid)
++                    if src_entry is not None and src_entry != entry:
++                        slv_entry_info = {}
++                        slv_entry_info['gfid_mismatch'] = False
++                        slv_entry_info['name_mismatch'] = True
++                        slv_entry_info['dst'] = False
++                        slv_entry_info['slave_isdir'] = True
++                        slv_entry_info['slave_gfid'] = gfid
++                        slv_entry_info['slave_entry'] = src_entry
++
++                        failures.append((e, EEXIST, slv_entry_info))
+             elif op == 'LINK':
+                 slink = os.path.join(pfx, gfid)
+                 st = lstat(slink)
+diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
+index 6dafb0a..d798356 100644
+--- a/geo-replication/syncdaemon/syncdutils.py
++++ b/geo-replication/syncdaemon/syncdutils.py
+@@ -77,6 +77,7 @@ CHANGELOG_AGENT_CLIENT_VERSION = 1.0
+ NodeID = None
+ rsync_version = None
+ unshare_mnt_propagation = None
++slv_bricks = None
+ SPACE_ESCAPE_CHAR = "%20"
+ NEWLINE_ESCAPE_CHAR = "%0A"
+ PERCENTAGE_ESCAPE_CHAR = "%25"
+@@ -671,6 +672,40 @@ def get_rsync_version(rsync_cmd):
+     return rsync_version
+ 
+ 
++def get_slv_dir_path(slv_host, slv_volume, gfid):
++    global slv_bricks
++
++    dir_path = ENOENT
++
++    if not slv_bricks:
++        slv_info = Volinfo(slv_volume, slv_host)
++        slv_bricks = slv_info.bricks
++    # Result of readlink would be of format as below.
++    # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename"
++    for brick in slv_bricks:
++        dir_path = errno_wrap(os.path.join,
++                              [brick['dir'],
++                               ".glusterfs", gfid[0:2],
++                               gfid[2:4],
++                               gfid], [ENOENT], [ESTALE])
++        if dir_path != ENOENT:
++            break
++
++    if not isinstance(dir_path, int):
++        realpath = errno_wrap(os.readlink, [dir_path],
++                              [ENOENT], [ESTALE])
++
++        if not isinstance(realpath, int):
++            realpath_parts = realpath.split('/')
++            pargfid = realpath_parts[-2]
++            basename = realpath_parts[-1]
++            pfx = gauxpfx()
++            dir_entry = os.path.join(pfx, pargfid, basename)
++            return dir_entry
++
++    return None
++
++
+ def lf(event, **kwargs):
+     """
+     Log Format helper function, log messages can be
+-- 
+1.8.3.1
+
diff --git a/0329-cluster-dht-Set-loc-gfid-before-healing-attr.patch b/0329-cluster-dht-Set-loc-gfid-before-healing-attr.patch
new file mode 100644
index 0000000..ff5a0af
--- /dev/null
+++ b/0329-cluster-dht-Set-loc-gfid-before-healing-attr.patch
@@ -0,0 +1,52 @@
+From c2b215a14a38d3587a5a3ea4efab384033019ed5 Mon Sep 17 00:00:00 2001
+From: N Balachandran <nbalacha@redhat.com>
+Date: Wed, 18 Jul 2018 22:09:19 +0530
+Subject: [PATCH 329/333] cluster/dht: Set loc->gfid before healing attr
+
+AFR takes inodelks when setting attrs. The loc->gfid
+and loc->inode->gfid were both null when dht_dir_attr_heal
+was called during a fresh lookup of an existing directory.
+As the gfid is null, client_pre_inodelk asserts in the gfid
+check.
+We now set the loc->gfid before calling dht_dir_attr_heal.
+
+upstream patch: https://review.gluster.org/#/c/20530/
+
+> Change-Id: I457f5a73fd301d97a03ca032587e73d4803298ac
+> fixes: bz#1602866
+> Signed-off-by: N Balachandran <nbalacha@redhat.com>
+
+Change-Id: Ie5e30d4ab3b824eaad333da22465d6672c75a2f6
+BUG: 1601331
+Signed-off-by: N Balachandran <nbalacha@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/144386
+Reviewed-by: Mohit Agrawal <moagrawa@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ xlators/cluster/dht/src/dht-common.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
+index 2207708..0984f8f 100644
+--- a/xlators/cluster/dht/src/dht-common.c
++++ b/xlators/cluster/dht/src/dht-common.c
+@@ -1518,7 +1518,6 @@ unlock:
+                 if (local->need_attrheal) {
+                         local->need_attrheal = 0;
+                         if (!__is_root_gfid (inode->gfid)) {
+-                                gf_uuid_copy (local->gfid, local->mds_stbuf.ia_gfid);
+                                 local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
+                                 local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
+                                 local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
+@@ -1532,6 +1531,8 @@ unlock:
+                                         goto skip_attr_heal;
+                                 }
+                                 copy_local->stbuf = local->stbuf;
++                                gf_uuid_copy (copy_local->loc.gfid,
++                                              local->stbuf.ia_gfid);
+                                 copy_local->mds_stbuf = local->mds_stbuf;
+                                 copy_local->mds_subvol = local->mds_subvol;
+                                 copy->local = copy_local;
+-- 
+1.8.3.1
+
diff --git a/0330-posix-check-before-removing-stale-symlink.patch b/0330-posix-check-before-removing-stale-symlink.patch
new file mode 100644
index 0000000..09a3a07
--- /dev/null
+++ b/0330-posix-check-before-removing-stale-symlink.patch
@@ -0,0 +1,70 @@
+From 4742c4766af4b0def0e12a2b0544c30496dfb48e Mon Sep 17 00:00:00 2001
+From: Ravishankar N <ravishankar@redhat.com>
+Date: Thu, 19 Jul 2018 12:47:38 +0530
+Subject: [PATCH 330/333] posix: check before removing stale symlink
+
+Backport of https://review.gluster.org/#/c/20509/
+
+BZ 1564071 complains of directories with missing gfid symlinks and
+corresponding "Found stale gfid handle" messages in the logs. Hence
+add a check to see if the symlink points to an actual directory before
+removing it.
+
+Note: Removing stale symlinks was added via commit
+3e9a9c029fac359477fb26d9cc7803749ba038b2
+
+Change-Id: I5d91fab8e5f3a621a9ecad4a1f9c898a3c2d346a
+BUG: 1603103
+Signed-off-by: Ravishankar N <ravishankar@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/144867
+Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ xlators/storage/posix/src/posix.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
+index ddb875c..c3b7120 100644
+--- a/xlators/storage/posix/src/posix.c
++++ b/xlators/storage/posix/src/posix.c
+@@ -235,6 +235,7 @@ posix_lookup (call_frame_t *frame, xlator_t *this,
+         int32_t     nlink_samepgfid    = 0;
+         struct  posix_private *priv    = NULL;
+         posix_inode_ctx_t *ctx         = NULL;
++        int         ret                = 0;
+ 
+         VALIDATE_OR_GOTO (frame, out);
+         VALIDATE_OR_GOTO (this, out);
+@@ -284,20 +285,24 @@ posix_lookup (call_frame_t *frame, xlator_t *this,
+                                 "lstat on %s failed",
+                                 real_path ? real_path : "null");
+                 }
++                entry_ret = -1;
+                 if (loc_is_nameless(loc)) {
+                         if (!op_errno)
+                                 op_errno = ESTALE;
+                         loc_gfid (loc, gfid);
+                         MAKE_HANDLE_ABSPATH (gfid_path, this, gfid);
+-                        op_ret = sys_lstat(gfid_path, &statbuf);
+-                        if (op_ret == 0 && statbuf.st_nlink == 1) {
+-                                gf_msg (this->name, GF_LOG_WARNING, ESTALE,
++                        ret = sys_stat(gfid_path, &statbuf);
++                        if (ret == 0 && ((statbuf.st_mode & S_IFMT) == S_IFDIR))
++                                /*Don't unset if it was a symlink to a dir.*/
++                                goto parent;
++                        ret = sys_lstat(gfid_path, &statbuf);
++                        if (ret == 0 && statbuf.st_nlink == 1) {
++                                gf_msg (this->name, GF_LOG_WARNING, op_errno,
+                                         P_MSG_HANDLE_DELETE, "Found stale gfid "
+                                         "handle %s, removing it.", gfid_path);
+                                 posix_handle_unset (this, gfid, NULL);
+                         }
+                 }
+-                entry_ret = -1;
+                 goto parent;
+         }
+ 
+-- 
+1.8.3.1
+
diff --git a/0331-rpc-free-registered-callback-programs.patch b/0331-rpc-free-registered-callback-programs.patch
new file mode 100644
index 0000000..71403af
--- /dev/null
+++ b/0331-rpc-free-registered-callback-programs.patch
@@ -0,0 +1,49 @@
+From 9c1ddc2e32cbfc8ad313b4f1342fbc20e49af80b Mon Sep 17 00:00:00 2001
+From: Niels de Vos <ndevos@redhat.com>
+Date: Mon, 9 Oct 2017 18:58:09 +0200
+Subject: [PATCH 331/333] rpc: free registered callback programs
+
+> Change-Id: I8c6f6b642f025d1faf74015b8f7aaecd7ebfd4d5
+> BUG: 1443145
+> Signed-off-by: Niels de Vos <ndevos@redhat.com>
+> (cherry picked from commit ec39ca32d942d49fd701156174abbba0b73bce2f)
+> (Reviewed on upstream link https://review.gluster.org/#/c/18478)
+
+Change-Id: I23e44507d12326bf63c96c56eae83d5424f8ee63
+BUG: 1600790
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/145358
+Reviewed-by: Niels de Vos <ndevos@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ rpc/rpc-lib/src/rpc-clnt.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
+index e34d2ca..1ea8099 100644
+--- a/rpc/rpc-lib/src/rpc-clnt.c
++++ b/rpc/rpc-lib/src/rpc-clnt.c
+@@ -1771,6 +1771,9 @@ rpc_clnt_trigger_destroy (struct rpc_clnt *rpc)
+ static void
+ rpc_clnt_destroy (struct rpc_clnt *rpc)
+ {
++        rpcclnt_cb_program_t *program = NULL;
++        rpcclnt_cb_program_t *tmp = NULL;
++
+         if (!rpc)
+                 return;
+ 
+@@ -1783,6 +1786,10 @@ rpc_clnt_destroy (struct rpc_clnt *rpc)
+         mem_pool_destroy (rpc->reqpool);
+         mem_pool_destroy (rpc->saved_frames_pool);
+ 
++        list_for_each_entry_safe (program, tmp, &rpc->programs, program) {
++                GF_FREE (program);
++        }
++
+         GF_FREE (rpc);
+         return;
+ }
+-- 
+1.8.3.1
+
diff --git a/0332-rpc-rpc_clnt_connection_cleanup-is-crashed-due-to-do.patch b/0332-rpc-rpc_clnt_connection_cleanup-is-crashed-due-to-do.patch
new file mode 100644
index 0000000..d6f93dc
--- /dev/null
+++ b/0332-rpc-rpc_clnt_connection_cleanup-is-crashed-due-to-do.patch
@@ -0,0 +1,67 @@
+From 76823d120518528c4edad4af6f4c1cdd50f5b398 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Tue, 24 Jul 2018 14:48:35 +0530
+Subject: [PATCH 332/333] rpc: rpc_clnt_connection_cleanup is crashed due to
+ double free
+
+Problem: gfapi client is getting crashed in rpc_clnt_connection_cleanup
+         at the time of destroying saved_frames
+
+Solution: gfapi client is getting crashed because saved_frame ptr is
+          already freed in rpc_clnt_destroy.To avoid the same update
+          code in rpc_clnt_destroy
+
+> Change-Id: Id8cce102b49f26cfd86ef88257032ed98f43192b
+> fixes: bz#1607783
+> (cherry picked from commit abd7b1393294d29eef6913e7f93ab76040c90428)
+> (Reviewed on upstream link https://review.gluster.org/#/c/20557/)
+
+Change-Id: Id3200e36acc1c49a8f5d39a1cc5053864899754c
+BUG: 1600790
+Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/145377
+Tested-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-by: Niels de Vos <ndevos@redhat.com>
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+---
+ rpc/rpc-lib/src/rpc-clnt.c | 20 +++++++++++++++++---
+ 1 file changed, 17 insertions(+), 3 deletions(-)
+
+diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
+index 1ea8099..fd7e3ec 100644
+--- a/rpc/rpc-lib/src/rpc-clnt.c
++++ b/rpc/rpc-lib/src/rpc-clnt.c
+@@ -1771,13 +1771,27 @@ rpc_clnt_trigger_destroy (struct rpc_clnt *rpc)
+ static void
+ rpc_clnt_destroy (struct rpc_clnt *rpc)
+ {
+-        rpcclnt_cb_program_t *program = NULL;
+-        rpcclnt_cb_program_t *tmp = NULL;
++        rpcclnt_cb_program_t   *program = NULL;
++        rpcclnt_cb_program_t   *tmp = NULL;
++        struct saved_frames    *saved_frames = NULL;
++        rpc_clnt_connection_t  *conn = NULL;
+ 
+         if (!rpc)
+                 return;
+ 
+-        saved_frames_destroy (rpc->conn.saved_frames);
++        conn = &rpc->conn;
++        /* Access saved_frames in critical-section to avoid
++           crash in rpc_clnt_connection_cleanup at the time
++           of destroying saved frames
++        */
++        pthread_mutex_lock (&conn->lock);
++        {
++                saved_frames = conn->saved_frames;
++                conn->saved_frames = NULL;
++        }
++        pthread_mutex_unlock (&conn->lock);
++
++        saved_frames_destroy (saved_frames);
+         pthread_mutex_destroy (&rpc->lock);
+         pthread_mutex_destroy (&rpc->conn.lock);
+ 
+-- 
+1.8.3.1
+
diff --git a/0333-glusterd-Add-multiple-checks-before-attach-start-a-b.patch b/0333-glusterd-Add-multiple-checks-before-attach-start-a-b.patch
new file mode 100644
index 0000000..f9da9d6
--- /dev/null
+++ b/0333-glusterd-Add-multiple-checks-before-attach-start-a-b.patch
@@ -0,0 +1,726 @@
+From 53ecd916d5ef56e164228ba123b078d4b30bfa81 Mon Sep 17 00:00:00 2001
+From: Mohit Agrawal <moagrawal@redhat.com>
+Date: Thu, 12 Jul 2018 13:29:48 +0530
+Subject: [PATCH 333/333] glusterd: Add multiple checks before attach/start a
+ brick
+
+Problem: In brick mux scenario sometime glusterd is not able
+         to start/attach a brick and gluster v status shows
+         brick is already running
+
+Solution:
+          1) To make sure brick is running check brick_path in
+             /proc/<pid>/fd , if a brick is consumed by the brick
+             process it means brick stack is come up otherwise not
+          2) Before start/attach a brick check if a brick is mounted
+             or not
+          3) At the time of printing volume status check brick is
+             consumed by any brick process
+
+Test:  To test the same followed procedure
+       1) Setup brick mux environment on a vm
+       2) Put a breaking point in gdb in function posix_health_check_thread_proc
+          at the time of notify GF_EVENT_CHILD_DOWN event
+       3) unmount anyone brick path forcefully
+       4) check gluster v status it will show N/A for the brick
+       5) Try to start volume with force option, glusterd throw
+          message "No device available for mount brick"
+       6) Mount the brick_root path
+       7) Try to start volume with force option
+       8) down brick is started successfully
+
+> Change-Id: I91898dad21d082ebddd12aa0d1f7f0ed012bdf69
+> fixes: bz#1595320
+> (cherry picked from commit 9400b6f2c8aa219a493961e0ab9770b7f12e80d2)
+> (Reviewed on upstream link https://review.gluster.org/#/c/20202/)
+
+Change-Id: I62459910272754e4e062b2725fea2a1e68d743f1
+BUG: 1589279
+Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
+Reviewed-on: https://code.engineering.redhat.com/gerrit/145269
+Tested-by: RHGS Build Bot <nigelb@redhat.com>
+Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
+---
+ glusterfsd/src/glusterfsd-mgmt.c                |   3 +
+ tests/basic/bug-1595320.t                       |  92 +++++++++
+ tests/basic/posix/shared-statfs.t               |   2 +
+ tests/bitrot/bug-1373520.t                      |   1 +
+ tests/bugs/distribute/bug-1368012.t             |   2 +
+ tests/bugs/distribute/bug-853258.t              |   1 +
+ tests/bugs/quota/bug-1293601.t                  |   3 +-
+ xlators/mgmt/glusterd/src/glusterd-snapshot.c   |   2 +-
+ xlators/mgmt/glusterd/src/glusterd-utils.c      | 261 ++++++++++++++++++++----
+ xlators/mgmt/glusterd/src/glusterd-utils.h      |   6 +-
+ xlators/mgmt/glusterd/src/glusterd-volume-ops.c |   7 +-
+ 11 files changed, 329 insertions(+), 51 deletions(-)
+ create mode 100644 tests/basic/bug-1595320.t
+
+diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
+index 30a717f..cbd436a 100644
+--- a/glusterfsd/src/glusterfsd-mgmt.c
++++ b/glusterfsd/src/glusterfsd-mgmt.c
+@@ -1010,6 +1010,9 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
+                                 "got attach for %s but no active graph",
+                                 xlator_req.name);
+                 }
++                if (ret) {
++                        ret = -1;
++                }
+ 
+                 glusterfs_translator_info_response_send (req, ret, NULL, NULL);
+ 
+diff --git a/tests/basic/bug-1595320.t b/tests/basic/bug-1595320.t
+new file mode 100644
+index 0000000..9d856ee
+--- /dev/null
++++ b/tests/basic/bug-1595320.t
+@@ -0,0 +1,92 @@
++#!/bin/bash
++
++. $(dirname $0)/../include.rc
++. $(dirname $0)/../volume.rc
++. $(dirname $0)/../snapshot.rc
++
++cleanup
++
++function count_up_bricks {
++        $CLI --xml volume status $V0 | grep '<status>1' | wc -l
++}
++
++function count_brick_processes {
++        pgrep glusterfsd | wc -l
++}
++
++# Setup 3 LVMS
++LVM_PREFIX="test"
++TEST init_n_bricks 3
++TEST setup_lvm 3
++
++# Start glusterd
++TEST glusterd
++TEST pidof glusterd
++
++# Create volume and enable brick multiplexing
++TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3
++gluster v set all cluster.brick-multiplex on
++
++# Start the volume
++TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
++EXPECT 1 count_brick_processes
++
++# Kill volume ungracefully
++brick_pid=`pgrep glusterfsd`
++
++# Make sure every brick root should be consumed by a brick process
++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
++TEST [ $n -eq 1 ]
++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
++TEST [ $n -eq 1 ]
++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
++TEST [ $n -eq 1 ]
++
++b1_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-1*.pid)
++b2_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-2*.pid)
++b3_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-3*.pid)
++
++kill -9 $brick_pid
++EXPECT 0 count_brick_processes
++
++# Unmount 3rd brick root from node
++brick_root=$L3
++TEST umount -l $brick_root 2>/dev/null
++
++# Start the volume only 2 brick should be start
++TEST $CLI volume start $V0 force
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
++EXPECT 1 count_brick_processes
++
++brick_pid=`pgrep glusterfsd`
++
++# Make sure only two brick root should be consumed by a brick process
++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
++TEST [ $n -eq 1 ]
++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
++TEST [ $n -eq 1 ]
++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
++TEST [ $n -eq 0 ]
++
++# Mount the brick root
++TEST mount -t xfs -o nouuid  /dev/test_vg_3/brick_lvm $brick_root
++
++# Replace brick_pid file to test brick_attach code
++TEST cp $b1_pid_file $b3_pid_file
++
++# Start the volume all brick should be up
++TEST $CLI volume start $V0 force
++
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
++EXPECT 1 count_brick_processes
++
++# Make sure every brick root should be consumed by a brick process
++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
++TEST [ $n -eq 1 ]
++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
++TEST [ $n -eq 1 ]
++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
++TEST [ $n -eq 1 ]
++
++cleanup
+diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t
+index 8caa9fa..3343956 100644
+--- a/tests/basic/posix/shared-statfs.t
++++ b/tests/basic/posix/shared-statfs.t
+@@ -23,6 +23,7 @@ TEST MOUNT_LOOP $LO2 $B0/${V0}2
+ # Create a subdir in mountpoint and use that for volume.
+ TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1;
+ TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
+ TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+ total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
+ # Keeping the size less than 200M mainly because XFS will use
+@@ -38,6 +39,7 @@ EXPECT 'Stopped' volinfo_field $V0 'Status';
+ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/3 $H0:$B0/${V0}2/3
+ 
+ TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
+ TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+ total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
+ TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ]
+diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t
+index 225d3b1..c09d424 100644
+--- a/tests/bitrot/bug-1373520.t
++++ b/tests/bitrot/bug-1373520.t
+@@ -11,6 +11,7 @@ TEST pidof glusterd
+ #Create a disperse volume
+ TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+ TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+ 
+ #Disable md-cache
+diff --git a/tests/bugs/distribute/bug-1368012.t b/tests/bugs/distribute/bug-1368012.t
+index f89314b..b861554 100644
+--- a/tests/bugs/distribute/bug-1368012.t
++++ b/tests/bugs/distribute/bug-1368012.t
+@@ -22,6 +22,7 @@ EXPECT "$V0" volinfo_field $V0 'Volume Name';
+ EXPECT 'Created' volinfo_field $V0 'Status';
+ ## Start volume and verify
+ TEST $CLI volume start $V0;
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
+ TEST $CLI volume set $V0 performance.stat-prefetch off
+ EXPECT 'Started' volinfo_field $V0 'Status';
+ TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+@@ -36,6 +37,7 @@ TEST permission_root=`stat -c "%A" $M0`
+ TEST echo $permission_root
+ #Add-brick
+ TEST $CLI volume add-brick $V0 $H0:/${V0}3
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" online_brick_count
+ 
+ #Allow one lookup to happen
+ TEST pushd $M0
+diff --git a/tests/bugs/distribute/bug-853258.t b/tests/bugs/distribute/bug-853258.t
+index e39f507..6817d9e 100755
+--- a/tests/bugs/distribute/bug-853258.t
++++ b/tests/bugs/distribute/bug-853258.t
+@@ -31,6 +31,7 @@ done
+ 
+ # Expand the volume and force assignment of new ranges.
+ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count
+ # Force assignment of initial ranges.
+ TEST $CLI volume rebalance $V0 fix-layout start
+ EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0
+diff --git a/tests/bugs/quota/bug-1293601.t b/tests/bugs/quota/bug-1293601.t
+index def4ef9..741758b 100644
+--- a/tests/bugs/quota/bug-1293601.t
++++ b/tests/bugs/quota/bug-1293601.t
+@@ -9,6 +9,7 @@ TEST glusterd
+ 
+ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+ TEST $CLI volume start $V0
++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count
+ TEST $CLI volume quota $V0 enable
+ 
+ TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+@@ -27,6 +28,6 @@ EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "1.0MB" quotausage "/"
+ TEST $CLI volume quota $V0 disable
+ TEST $CLI volume quota $V0 enable
+ 
+-EXPECT_WITHIN 40 "1.0MB" quotausage "/"
++EXPECT_WITHIN 60 "1.0MB" quotausage "/"
+ 
+ cleanup;
+diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+index 304cef6..09e10bf 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+@@ -2844,7 +2844,7 @@ glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol,
+         GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv);
+         if (gf_is_service_running (pidfile, &pid)) {
+                 (void) send_attach_req (this, brickinfo->rpc,
+-                                        brickinfo->path, NULL,
++                                        brickinfo->path, NULL, NULL,
+                                         GLUSTERD_BRICK_TERMINATE);
+                 brickinfo->status = GF_BRICK_STOPPED;
+         }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
+index 95df889..fe9cc75 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
+@@ -2186,7 +2186,7 @@ retry:
+                 goto out;
+         }
+ 
+-        ret = glusterd_brick_process_add_brick (brickinfo, volinfo);
++        ret = glusterd_brick_process_add_brick (brickinfo);
+         if (ret) {
+                 gf_msg (this->name, GF_LOG_ERROR, 0,
+                         GD_MSG_BRICKPROC_ADD_BRICK_FAILED, "Adding brick %s:%s "
+@@ -2372,8 +2372,7 @@ out:
+ }
+ 
+ int
+-glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,
+-                                  glusterd_volinfo_t *volinfo)
++glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo)
+ {
+         int                      ret = -1;
+         xlator_t                *this = NULL;
+@@ -2500,7 +2499,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
+                                       brickinfo->hostname, brickinfo->path);
+ 
+                         (void) send_attach_req (this, brickinfo->rpc,
+-                                                brickinfo->path, NULL,
++                                                brickinfo->path, NULL, NULL,
+                                                 GLUSTERD_BRICK_TERMINATE);
+                 } else {
+                         gf_msg_debug (this->name, 0, "About to stop glusterfsd"
+@@ -5426,23 +5425,92 @@ static int32_t
+ attach_brick_callback (struct rpc_req *req, struct iovec *iov, int count,
+                        void *v_frame)
+ {
+-        call_frame_t    *frame  = v_frame;
+-        glusterd_conf_t *conf   = frame->this->private;
+-        glusterd_brickinfo_t *brickinfo = frame->local;
++        call_frame_t                     *frame     = v_frame;
++        glusterd_conf_t                  *conf      = frame->this->private;
++        glusterd_brickinfo_t             *brickinfo = frame->local;
++        glusterd_brickinfo_t             *other_brick = frame->cookie;
++        glusterd_volinfo_t               *volinfo   =  NULL;
++        xlator_t                         *this      = THIS;
++        int                               ret       = -1;
++        char                              pidfile1[PATH_MAX]      = {0};
++        char                              pidfile2[PATH_MAX]      = {0};
++        gf_getspec_rsp                    rsp   = {0,};
+ 
+         frame->local = NULL;
+-        brickinfo->port_registered = _gf_true;
++        frame->cookie = NULL;
++
++        ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
++        if (ret < 0) {
++                gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
++                ret   = -1;
++                goto out;
++        }
++
++        ret =  glusterd_get_volinfo_from_brick (other_brick->path,
++                                                &volinfo);
++        if (ret) {
++                gf_msg (THIS->name, GF_LOG_ERROR, 0,
++                        GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo"
++                        " from brick(%s) so  pidfile copying/unlink will fail",
++                        other_brick->path);
++                goto out;
++        }
++        GLUSTERD_GET_BRICK_PIDFILE (pidfile1, volinfo, other_brick, conf);
++        volinfo = NULL;
++
++        ret =  glusterd_get_volinfo_from_brick (brickinfo->path,
++                                                &volinfo);
++        if (ret) {
++                gf_msg (THIS->name, GF_LOG_ERROR, 0,
++                        GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo"
++                        " from brick(%s) so  pidfile copying/unlink will fail",
++                        brickinfo->path);
++                goto out;
++        }
++        GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf);
++
++        if (rsp.op_ret == 0) {
++                brickinfo->port_registered = _gf_true;
++
++                /* PID file is copied once brick has attached
++                   successfully
++                */
++                glusterd_copy_file (pidfile1, pidfile2);
++                brickinfo->status = GF_BRICK_STARTED;
++                brickinfo->rpc = rpc_clnt_ref (other_brick->rpc);
++                gf_log (THIS->name, GF_LOG_INFO, "brick %s is attached successfully",
++                        brickinfo->path);
++        } else {
++                gf_log (THIS->name, GF_LOG_INFO, "attach_brick failed pidfile"
++                        " is %s for brick_path %s", pidfile2, brickinfo->path);
++                brickinfo->port = 0;
++                brickinfo->status = GF_BRICK_STOPPED;
++                ret = glusterd_brick_process_remove_brick (brickinfo);
++                if (ret)
++                        gf_msg_debug (this->name, 0, "Couldn't remove brick from"
++                                      " brick process");
++                LOCK (&volinfo->lock);
++                ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
++                UNLOCK (&volinfo->lock);
++                if (ret) {
++                       gf_msg (this->name, GF_LOG_ERROR, 0,
++                               GD_MSG_VOLINFO_SET_FAIL,
++                               "Failed to store volinfo of "
++                               "%s volume", volinfo->volname);
++                       goto out;
++                }
++        }
++out:
+         synclock_lock (&conf->big_lock);
+         --(conf->blockers);
+         synclock_unlock (&conf->big_lock);
+-
+         STACK_DESTROY (frame->root);
+         return 0;
+ }
+ 
+ int
+ send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path,
+-                 glusterd_brickinfo_t *brickinfo, int op)
++                 glusterd_brickinfo_t *brickinfo, glusterd_brickinfo_t *other_brick, int op)
+ {
+         int                             ret      = -1;
+         struct iobuf                    *iobuf    = NULL;
+@@ -5516,6 +5584,7 @@ send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path,
+ 
+         if (op == GLUSTERD_BRICK_ATTACH) {
+                 frame->local = brickinfo;
++                frame->cookie = other_brick;
+                 cbkfn = attach_brick_callback;
+         }
+         /* Send the msg */
+@@ -5582,27 +5651,19 @@ attach_brick (xlator_t *this,
+                 rpc = rpc_clnt_ref (other_brick->rpc);
+                 if (rpc) {
+                         ret = send_attach_req (this, rpc, path, brickinfo,
++                                               other_brick,
+                                                GLUSTERD_BRICK_ATTACH);
+                         rpc_clnt_unref (rpc);
+                         if (!ret) {
+                                 ret = pmap_registry_extend (this, other_brick->port,
+-                                                            brickinfo->path);
++                                            brickinfo->path);
+                                 if (ret != 0) {
+                                         gf_log (this->name, GF_LOG_ERROR,
+                                                 "adding brick to process failed");
+-                                        return ret;
++                                        goto out;
+                                 }
+-
+-                                /* PID file is copied once brick has attached
+-                                  successfully
+-                                */
+-                                glusterd_copy_file (pidfile1, pidfile2);
+                                 brickinfo->port = other_brick->port;
+-                                brickinfo->status = GF_BRICK_STARTED;
+-                                brickinfo->rpc =
+-                                        rpc_clnt_ref (other_brick->rpc);
+-                                ret = glusterd_brick_process_add_brick (brickinfo,
+-                                                                        volinfo);
++                                ret = glusterd_brick_process_add_brick (brickinfo);
+                                 if (ret) {
+                                         gf_msg (this->name, GF_LOG_ERROR, 0,
+                                                 GD_MSG_BRICKPROC_ADD_BRICK_FAILED,
+@@ -5611,29 +5672,23 @@ attach_brick (xlator_t *this,
+                                                 brickinfo->path);
+                                         return ret;
+                                 }
+-
+-                                if (ret) {
+-                                        gf_msg_debug (this->name, 0, "Add brick"
+-                                                    " to brick process failed");
+-                                        return ret;
+-                                }
+-
+                                 return 0;
+                         }
+                 }
+                 /*
+-                 * It might not actually be safe to manipulate the lock like
+-                 * this, but if we don't then the connection can never actually
+-                 * complete and retries are useless.  Unfortunately, all of the
+-                 * alternatives (e.g. doing all of this in a separate thread)
+-                 * are much more complicated and risky.  TBD: see if there's a
+-                 * better way
++                 * It might not actually be safe to manipulate the lock
++                 * like this, but if we don't then the connection can
++                 * never actually complete and retries are useless.
++                 * Unfortunately, all of the alternatives (e.g. doing
++                 * all of this in a separate thread) are much more
++                 * complicated and risky.
++                 * TBD: see if there's a better way
+                  */
+                 synclock_unlock (&conf->big_lock);
+                 sleep (1);
+                 synclock_lock (&conf->big_lock);
+         }
+-
++out:
+         gf_log (this->name, GF_LOG_WARNING,
+                 "attach failed for %s", brickinfo->path);
+         return ret;
+@@ -5855,6 +5910,7 @@ find_compatible_brick (glusterd_conf_t *conf,
+         return NULL;
+ }
+ 
++
+ /* Below function is use to populate sockpath based on passed pid
+    value as a argument after check the value from proc and also
+    check if passed pid is match with running  glusterfs process
+@@ -5941,6 +5997,62 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
+ }
+ 
+ 
++char *
++search_brick_path_from_proc (pid_t brick_pid, char *brickpath)
++{
++        struct dirent *dp = NULL;
++        DIR *dirp = NULL;
++        size_t len = 0;
++        int fd = -1;
++        char path[PATH_MAX] = {0,};
++        char sym[PATH_MAX] = {0,};
++        struct dirent  scratch[2] = {{0,},};
++        char *brick_path = NULL;
++
++        if (!brickpath)
++                goto out;
++
++        sprintf(path, "/proc/%d/fd/", brick_pid);
++        dirp = sys_opendir (path);
++        if (!dirp)
++                goto out;
++
++        len = strlen (path);
++        if (len >= (sizeof(path) - 2))
++                goto out;
++
++        fd = dirfd (dirp);
++        if (fd  < 0)
++                goto out;
++
++        memset(path, 0, sizeof(path));
++        memset(sym, 0, sizeof(sym));
++
++        while ((dp = sys_readdir(dirp, scratch))) {
++                if (!strcmp(dp->d_name, ".") ||
++                    !strcmp(dp->d_name, ".."))
++                        continue;
++
++                /* check for non numerical descriptors */
++                if (!strtol(dp->d_name, (char **)NULL, 10))
++                        continue;
++
++                len = readlinkat (fd, dp->d_name, sym, sizeof(sym) - 1);
++                if (len > 1) {
++                        sym[len] = '\0';
++                        if (!strcmp (sym, brickpath)) {
++                                brick_path = gf_strdup(sym);
++                                break;
++                        }
++                        memset (sym, 0, sizeof (sym));
++                }
++        }
++out:
++        sys_closedir(dirp);
++        return brick_path;
++}
++
++
+ int
+ glusterd_brick_start (glusterd_volinfo_t *volinfo,
+                       glusterd_brickinfo_t *brickinfo,
+@@ -5954,7 +6066,9 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
+         int32_t                 pid                   = -1;
+         char                    pidfile[PATH_MAX]     = {0};
+         char                    socketpath[PATH_MAX]  = {0};
++        char                    *brickpath            = NULL;
+         glusterd_volinfo_t      *other_vol;
++        struct statvfs           brickstat = {0,};
+ 
+         this = THIS;
+         GF_ASSERT (this);
+@@ -6000,6 +6114,28 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
+                 brickinfo->start_triggered = _gf_true;
+ 
+         GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
++
++        ret = sys_statvfs (brickinfo->path, &brickstat);
++        if (ret) {
++                gf_msg (this->name, GF_LOG_ERROR,
++                        errno, GD_MSG_BRICKINFO_CREATE_FAIL,
++                        "failed to get statfs() call on brick %s",
++                        brickinfo->path);
++                goto out;
++        }
++
++        /* Compare fsid is helpful to ensure the existence of a brick_root
++           path before the start/attach a brick
++        */
++        if (brickinfo->statfs_fsid &&
++            (brickinfo->statfs_fsid != brickstat.f_fsid)) {
++                gf_log (this->name, GF_LOG_ERROR,
++                        "fsid comparison is failed it means Brick root path"
++                        " %s is not created by glusterd, start/attach will also fail",
++                        brickinfo->path);
++                goto out;
++        }
++
+         if (gf_is_service_running (pidfile, &pid)) {
+                 if (brickinfo->status != GF_BRICK_STARTING &&
+                     brickinfo->status != GF_BRICK_STARTED) {
+@@ -6019,12 +6155,29 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
+                          * TBD: re-use RPC connection across bricks
+                          */
+                         if (is_brick_mx_enabled ()) {
++                                brickpath = search_brick_path_from_proc (pid, brickinfo->path);
++                                if (!brickpath) {
++                                        gf_log (this->name, GF_LOG_INFO,
++                                                "Either pid %d is not running or brick"
++                                                " path %s is not consumed so cleanup pidfile",
++                                                pid, brickinfo->path);
++                                        /* search brick is failed so unlink pidfile */
++                                        if (sys_access (pidfile , R_OK) == 0) {
++                                                sys_unlink (pidfile);
++                                        }
++                                        goto run;
++                                }
++                                GF_FREE (brickpath);
+                                 ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
+                                                                         sizeof(socketpath));
+                                 if (ret) {
+-                                        gf_log (this->name, GF_LOG_DEBUG,
++                                        gf_log (this->name, GF_LOG_INFO,
+                                                 "Either pid %d is not running or is not match"
+                                                 " with any running brick process ", pid);
++                                        /* Fetch unix socket is failed so unlink pidfile */
++                                        if (sys_access (pidfile , R_OK) == 0) {
++                                                sys_unlink (pidfile);
++                                        }
+                                         goto run;
+                                 }
+                         } else {
+@@ -6039,7 +6192,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
+                         (void) glusterd_brick_connect (volinfo, brickinfo,
+                                         socketpath);
+ 
+-                        ret = glusterd_brick_process_add_brick (brickinfo, volinfo);
++                        ret = glusterd_brick_process_add_brick (brickinfo);
+                         if (ret) {
+                                 gf_msg (this->name, GF_LOG_ERROR, 0,
+                                         GD_MSG_BRICKPROC_ADD_BRICK_FAILED,
+@@ -6079,6 +6232,10 @@ run:
+                 if (ret == 0) {
+                         goto out;
+                 }
++                /* Attach_brick is failed so unlink pidfile */
++                if (sys_access (pidfile , R_OK) == 0) {
++                        sys_unlink (pidfile);
++                }
+         }
+ 
+         /*
+@@ -7063,14 +7220,15 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,
+                             dict_t  *dict, int32_t count)
+ {
+ 
+-        int             ret                   = -1;
+-        int32_t         pid                   = -1;
+-        char            key[1024]             = {0};
+-        char            base_key[1024]        = {0};
+-        char            pidfile[PATH_MAX]     = {0};
++        int              ret                  = -1;
++        int32_t          pid                  = -1;
++        char             key[1024]            = {0};
++        char             base_key[1024]       = {0};
++        char             pidfile[PATH_MAX]    = {0};
+         xlator_t        *this                 = NULL;
+         glusterd_conf_t *priv                 = NULL;
+-        gf_boolean_t    brick_online          = _gf_false;
++        gf_boolean_t     brick_online         = _gf_false;
++        char            *brickpath            = NULL;
+ 
+         GF_ASSERT (volinfo);
+         GF_ASSERT (brickinfo);
+@@ -7127,7 +7285,20 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,
+         if (glusterd_is_brick_started (brickinfo)) {
+                 if (gf_is_service_running (pidfile, &pid) &&
+                     brickinfo->port_registered) {
+-                        brick_online = _gf_true;
++                        if (!is_brick_mx_enabled ()) {
++                                brick_online = _gf_true;
++                        } else {
++                                brickpath = search_brick_path_from_proc (pid, brickinfo->path);
++                                if (!brickpath) {
++                                        gf_log (this->name, GF_LOG_INFO,
++                                                "brick path %s is not consumed",
++                                                brickinfo->path);
++                                        brick_online = _gf_false;
++                                } else {
++                                        brick_online = _gf_true;
++                                        GF_FREE (brickpath);
++                                }
++                        }
+                 } else {
+                         pid = -1;
+                 }
+diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
+index 4c9561e..4835728 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
+@@ -179,8 +179,7 @@ int32_t
+ glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo);
+ 
+ int
+-glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,
+-                                  glusterd_volinfo_t *volinfo);
++glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo);
+ 
+ int
+ glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo);
+@@ -200,7 +199,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
+ 
+ int
+ send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path,
+-                 glusterd_brickinfo_t *brick, int op);
++                 glusterd_brickinfo_t *brick,
++                 glusterd_brickinfo_t *other_brick, int op);
+ 
+ glusterd_volinfo_t *
+ glusterd_volinfo_ref (glusterd_volinfo_t *volinfo);
+diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+index e34d58a..8bb0b6d 100644
+--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+@@ -2585,8 +2585,13 @@ glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags,
+         }
+ 
+         glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STARTED);
+-
++        /* Update volinfo on disk in critical section because
++           attach_brick_callback can also call store_volinfo for same
++           volume to update volinfo on disk
++        */
++        LOCK (&volinfo->lock);
+         ret = glusterd_store_volinfo (volinfo, verincrement);
++        UNLOCK (&volinfo->lock);
+         if (ret) {
+                 gf_msg (this->name, GF_LOG_ERROR, 0,
+                         GD_MSG_VOLINFO_SET_FAIL,
+-- 
+1.8.3.1
+
diff --git a/glusterfs.spec b/glusterfs.spec
index d765c3f..4ac4b36 100644
--- a/glusterfs.spec
+++ b/glusterfs.spec
@@ -192,7 +192,7 @@ Release:          0.1%{?prereltag:.%{prereltag}}%{?dist}
 %else
 Name:             glusterfs
 Version:          3.12.2
-Release:          14%{?dist}
+Release:          15%{?dist}
 %endif
 License:          GPLv2 or LGPLv3+
 Group:            System Environment/Base
@@ -590,6 +590,14 @@ Patch0322: 0322-geo-rep-Cleanup-stale-unprocessed-xsync-changelogs.patch
 Patch0323: 0323-cluster-afr-Mark-dirty-for-entry-transactions-for-qu.patch
 Patch0324: 0324-dht-delete-tier-related-internal-xattr-in-dht_getxat.patch
 Patch0325: 0325-core-dereference-check-on-the-variables-in-glusterfs.patch
+Patch0326: 0326-glusterd-memory-leak-in-get-state.patch
+Patch0327: 0327-afr-switch-lk_owner-only-when-pre-op-succeeds.patch
+Patch0328: 0328-geo-rep-Fix-issues-with-gfid-conflict-handling.patch
+Patch0329: 0329-cluster-dht-Set-loc-gfid-before-healing-attr.patch
+Patch0330: 0330-posix-check-before-removing-stale-symlink.patch
+Patch0331: 0331-rpc-free-registered-callback-programs.patch
+Patch0332: 0332-rpc-rpc_clnt_connection_cleanup-is-crashed-due-to-do.patch
+Patch0333: 0333-glusterd-Add-multiple-checks-before-attach-start-a-b.patch
 
 %description
 GlusterFS is a distributed file-system capable of scaling to several
@@ -2538,6 +2546,10 @@ fi
 %endif
 
 %changelog
+* Fri Jul 27 2018 Milind Changire <mchangir@redhat.com> - 3.12.2-15
+- fixes bugs bz#1589279 bz#1598384 bz#1599362 bz#1599998 bz#1600790 
+  bz#1601331 bz#1603103
+
 * Wed Jul 18 2018 Milind Changire <mchangir@redhat.com> - 3.12.2-14
 - fixes bugs bz#1547903 bz#1566336 bz#1568896 bz#1578716 bz#1581047 
   bz#1581231 bz#1582066 bz#1593865 bz#1597506 bz#1597511 bz#1597654 bz#1597768