From 299302574e3a0371e39f292f83ccf379dc2562bc Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Fri, 27 Jul 2018 11:05:48 -0400 Subject: [PATCH] autobuild v3.12.2-15 Resolves: bz#1589279 bz#1598384 bz#1599362 bz#1599998 bz#1600790 Resolves: bz#1601331 bz#1603103 Signed-off-by: Milind Changire --- 0326-glusterd-memory-leak-in-get-state.patch | 138 ++++ ...h-lk_owner-only-when-pre-op-succeeds.patch | 102 +++ ...x-issues-with-gfid-conflict-handling.patch | 455 +++++++++++ ...dht-Set-loc-gfid-before-healing-attr.patch | 52 ++ ...-check-before-removing-stale-symlink.patch | 70 ++ ...pc-free-registered-callback-programs.patch | 49 ++ ...nection_cleanup-is-crashed-due-to-do.patch | 67 ++ ...tiple-checks-before-attach-start-a-b.patch | 726 ++++++++++++++++++ glusterfs.spec | 14 +- 9 files changed, 1672 insertions(+), 1 deletion(-) create mode 100644 0326-glusterd-memory-leak-in-get-state.patch create mode 100644 0327-afr-switch-lk_owner-only-when-pre-op-succeeds.patch create mode 100644 0328-geo-rep-Fix-issues-with-gfid-conflict-handling.patch create mode 100644 0329-cluster-dht-Set-loc-gfid-before-healing-attr.patch create mode 100644 0330-posix-check-before-removing-stale-symlink.patch create mode 100644 0331-rpc-free-registered-callback-programs.patch create mode 100644 0332-rpc-rpc_clnt_connection_cleanup-is-crashed-due-to-do.patch create mode 100644 0333-glusterd-Add-multiple-checks-before-attach-start-a-b.patch diff --git a/0326-glusterd-memory-leak-in-get-state.patch b/0326-glusterd-memory-leak-in-get-state.patch new file mode 100644 index 0000000..e27bc3c --- /dev/null +++ b/0326-glusterd-memory-leak-in-get-state.patch @@ -0,0 +1,138 @@ +From eadd7e7168349705b29bc6ae9f99ba3e6ae58060 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde +Date: Mon, 16 Jul 2018 15:59:36 +0530 +Subject: [PATCH 326/333] glusterd: memory leak in get-state + +Problem: gluster get-state command is leaking the memory when +geo-replication session is configured. + +Cause: In glusterd_print_gsync_status(), we are trying to get +reference to the keys of gsync_dict. The references to keys of +gsync_dict are stored status_vols[i]. status_vols[i] are +allocated with a memory of size of gf_gsync_status_t. + +Solution: Need not to use a array of pointers(status_vals), using +a pointer to hold the reference to a key of gsync_dict is sufficient. + +Followed the below steps for testing: +1. Configured geo-rep session +2. Ran gluster get-state command for 1000 times. + +Without this patch, glusterd's memory was increasing significantly +(around 22000KB per 1000 times), with this patch it reduced (1500KB +per 1000 times) + +>fixes: bz#1601423 +>Change-Id: I361f5525d71f821bb345419ccfdc20ca288ca292 +>Signed-off-by: Sanju Rakonde + +upstream patch: https://review.gluster.org/#/c/20521/ + +Change-Id: I361f5525d71f821bb345419ccfdc20ca288ca292 +BUG: 1599362 +Signed-off-by: Sanju Rakonde +Reviewed-on: https://code.engineering.redhat.com/gerrit/144325 +Tested-by: RHGS Build Bot +Reviewed-by: Mohit Agrawal +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 53 ++++++++++------------------ + 1 file changed, 19 insertions(+), 34 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 395b342..861ff17 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -5082,7 +5082,7 @@ glusterd_print_gsync_status (FILE *fp, dict_t *gsync_dict) + int ret = -1; + int gsync_count = 0; + int i = 0; +- gf_gsync_status_t **status_vals = NULL; ++ gf_gsync_status_t *status_vals = NULL; + char status_val_name[PATH_MAX] = {0,}; + + GF_VALIDATE_OR_GOTO (THIS->name, fp, out); +@@ -5097,62 +5097,47 @@ glusterd_print_gsync_status (FILE *fp, dict_t *gsync_dict) + goto out; + } + +- status_vals = GF_CALLOC (gsync_count, sizeof (gf_gsync_status_t *), +- gf_common_mt_char); +- if (!status_vals) { +- ret = -1; +- goto out; +- } +- for (i = 0; i < gsync_count; i++) { +- status_vals[i] = GF_CALLOC (1, sizeof (gf_gsync_status_t), +- gf_common_mt_char); +- if (!status_vals[i]) { +- ret = -1; +- goto out; +- } +- } +- + for (i = 0; i < gsync_count; i++) { + snprintf (status_val_name, sizeof(status_val_name), "status_value%d", i); + +- ret = dict_get_bin (gsync_dict, status_val_name, (void **)&(status_vals[i])); ++ ret = dict_get_bin (gsync_dict, status_val_name, (void **)&(status_vals)); + if (ret) + goto out; + + fprintf (fp, "Volume%d.pair%d.session_slave: %s\n", volcount, i+1, +- get_struct_variable(21, status_vals[i])); ++ get_struct_variable(21, status_vals)); + fprintf (fp, "Volume%d.pair%d.master_node: %s\n", volcount, i+1, +- get_struct_variable(0, status_vals[i])); ++ get_struct_variable(0, status_vals)); + fprintf (fp, "Volume%d.pair%d.master_volume: %s\n", volcount, i+1, +- get_struct_variable(1, status_vals[i])); ++ get_struct_variable(1, status_vals)); + fprintf (fp, "Volume%d.pair%d.master_brick: %s\n", volcount, i+1, +- get_struct_variable(2, status_vals[i])); ++ get_struct_variable(2, status_vals)); + fprintf (fp, "Volume%d.pair%d.slave_user: %s\n", volcount, i+1, +- get_struct_variable(3, status_vals[i])); ++ get_struct_variable(3, status_vals)); + fprintf (fp, "Volume%d.pair%d.slave: %s\n", volcount, i+1, +- get_struct_variable(4, status_vals[i])); ++ get_struct_variable(4, status_vals)); + fprintf (fp, "Volume%d.pair%d.slave_node: %s\n", volcount, i+1, +- get_struct_variable(5, status_vals[i])); ++ get_struct_variable(5, status_vals)); + fprintf (fp, "Volume%d.pair%d.status: %s\n", volcount, i+1, +- get_struct_variable(6, status_vals[i])); ++ get_struct_variable(6, status_vals)); + fprintf (fp, "Volume%d.pair%d.crawl_status: %s\n", volcount, i+1, +- get_struct_variable(7, status_vals[i])); ++ get_struct_variable(7, status_vals)); + fprintf (fp, "Volume%d.pair%d.last_synced: %s\n", volcount, i+1, +- get_struct_variable(8, status_vals[i])); ++ get_struct_variable(8, status_vals)); + fprintf (fp, "Volume%d.pair%d.entry: %s\n", volcount, i+1, +- get_struct_variable(9, status_vals[i])); ++ get_struct_variable(9, status_vals)); + fprintf (fp, "Volume%d.pair%d.data: %s\n", volcount, i+1, +- get_struct_variable(10, status_vals[i])); ++ get_struct_variable(10, status_vals)); + fprintf (fp, "Volume%d.pair%d.meta: %s\n", volcount, i+1, +- get_struct_variable(11, status_vals[i])); ++ get_struct_variable(11, status_vals)); + fprintf (fp, "Volume%d.pair%d.failures: %s\n", volcount, i+1, +- get_struct_variable(12, status_vals[i])); ++ get_struct_variable(12, status_vals)); + fprintf (fp, "Volume%d.pair%d.checkpoint_time: %s\n", volcount, +- i+1, get_struct_variable(13, status_vals[i])); ++ i+1, get_struct_variable(13, status_vals)); + fprintf (fp, "Volume%d.pair%d.checkpoint_completed: %s\n", +- volcount, i+1, get_struct_variable(14, status_vals[i])); ++ volcount, i+1, get_struct_variable(14, status_vals)); + fprintf (fp, "Volume%d.pair%d.checkpoint_completion_time: %s\n", +- volcount, i+1, get_struct_variable(15, status_vals[i])); ++ volcount, i+1, get_struct_variable(15, status_vals)); + } + out: + return ret; +-- +1.8.3.1 + diff --git a/0327-afr-switch-lk_owner-only-when-pre-op-succeeds.patch b/0327-afr-switch-lk_owner-only-when-pre-op-succeeds.patch new file mode 100644 index 0000000..d69588c --- /dev/null +++ b/0327-afr-switch-lk_owner-only-when-pre-op-succeeds.patch @@ -0,0 +1,102 @@ +From fef5fb73545bed5a4040db1f8e4e855286c1981d Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Wed, 18 Jul 2018 14:16:46 +0530 +Subject: [PATCH 327/333] afr: switch lk_owner only when pre-op succeeds + +Backport of https://review.gluster.org/#/c/20527/ + +Problem: +In a disk full scenario, we take a failure path in afr_transaction_perform_fop() +and go to unlock phase. But we change the lk-owner before that, causing unlock +to fail. When mount issues another fop that takes locks on that file, it hangs. + +Fix: +Change lk-owner only when we are about to perform the fop phase. +Also fix the same issue for arbiters when afr_txn_arbitrate_fop() fails the fop. + +Also removed the DISK_SPACE_CHECK_AND_GOTO in posix_xattrop. Otherwise truncate +to zero will fail pre-op phase with ENOSPC when the user is actually trying to +freee up space. + +Change-Id: I8663003fa7d472e93fe61cc1e39c78084d3de81f +BUG: 1599998 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/144275 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/cluster/afr/src/afr-transaction.c | 20 ++++++++++---------- + xlators/storage/posix/src/posix.c | 5 ----- + 2 files changed, 10 insertions(+), 15 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 321b6f1..3f55070 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -495,11 +495,10 @@ afr_txn_arbitrate_fop (call_frame_t *frame, xlator_t *this) + local->op_errno = ENOTCONN; + for (i = 0; i < priv->child_count; i++) + local->transaction.failed_subvols[i] = 1; +- afr_changelog_post_op (frame, this);/*uninherit should happen*/ +- } else { +- afr_transaction_fop (frame, this); + } + ++ afr_transaction_fop (frame, this); ++ + return; + } + +@@ -529,13 +528,6 @@ afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this) + local->transaction.failed_subvols[i] = 1; + } + } +- /* Perform fops with the lk-owner from top xlator. +- * Eg: lk-owner of posix-lk and flush should be same, +- * flush cant clear the posix-lks without that lk-owner. +- */ +- afr_save_lk_owner (frame); +- frame->root->lk_owner = +- local->transaction.main_frame->root->lk_owner; + + if (local->pre_op_compat) + /* old mode, pre-op was done as afr_changelog_do() +@@ -561,6 +553,14 @@ afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this) + } + + fop: ++ /* Perform fops with the lk-owner from top xlator. ++ * Eg: lk-owner of posix-lk and flush should be same, ++ * flush cant clear the posix-lks without that lk-owner. ++ */ ++ afr_save_lk_owner (frame); ++ frame->root->lk_owner = ++ local->transaction.main_frame->root->lk_owner; ++ + if (priv->arbiter_count == 1) { + afr_txn_arbitrate_fop (frame, this); + } else { +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index 01f472b..ddb875c 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -6147,16 +6147,11 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xattr_rsp = NULL; + dict_t *xdata_rsp = NULL; + struct iatt stbuf = {0}; +- struct posix_private *priv = NULL; +- + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (xattr, out); + VALIDATE_OR_GOTO (this, out); + +- priv = this->private; +- DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); +- + if (fd) { + op_ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (op_ret < 0) { +-- +1.8.3.1 + diff --git a/0328-geo-rep-Fix-issues-with-gfid-conflict-handling.patch b/0328-geo-rep-Fix-issues-with-gfid-conflict-handling.patch new file mode 100644 index 0000000..c82077d --- /dev/null +++ b/0328-geo-rep-Fix-issues-with-gfid-conflict-handling.patch @@ -0,0 +1,455 @@ +From a9db68fc1f05639cb79defef6ed7da58572113ea Mon Sep 17 00:00:00 2001 +From: Kotresh HR +Date: Thu, 5 Jul 2018 07:07:38 -0400 +Subject: [PATCH 328/333] geo-rep: Fix issues with gfid conflict handling + +1. MKDIR/RMDIR is recorded on all bricks. So if + one brick succeeds creating it, other bricks + should ignore it. But this was not happening. + The fix rename of directories in hybrid crawl, + was trying to rename the directory to itself + and in the process crashing with ENOENT if the + directory is removed. + +2. If file is created, deleted and a directory is + created with same name, it was failing to sync. + Again the issue is around the fix for rename + of directories in hybrid crawl. Fixed the same. + + If the same case was done with hardlink present + for the file, it was failing. This patch fixes + that too. + +Backport of + > Patch: https://review.gluster.org/#/c/20473/ + > fixes: bz#1598884 + > Change-Id: I6f3bca44e194e415a3d4de3b9d03cc8976439284 + > Signed-off-by: Kotresh HR + +BUG: 1598384 +Change-Id: I6f3bca44e194e415a3d4de3b9d03cc8976439284 +Signed-off-by: Kotresh HR +Reviewed-on: https://code.engineering.redhat.com/gerrit/143400 +Tested-by: RHGS Build Bot +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + geo-replication/syncdaemon/master.py | 157 ++++++++++++++++++++++--------- + geo-replication/syncdaemon/resource.py | 57 ++++++----- + geo-replication/syncdaemon/syncdutils.py | 35 +++++++ + 3 files changed, 180 insertions(+), 69 deletions(-) + +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index 64e9836..1399378 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -692,7 +692,8 @@ class GMasterChangelogMixin(GMasterCommon): + TYPE_GFID = "D " + TYPE_ENTRY = "E " + +- MAX_EF_RETRIES = 15 ++ MAX_EF_RETRIES = 10 ++ MAX_OE_RETRIES = 5 + + # flat directory hierarchy for gfid based access + FLAT_DIR_HIERARCHY = '.' +@@ -788,38 +789,53 @@ class GMasterChangelogMixin(GMasterCommon): + + self.status.inc_value("failures", num_failures) + +- def fix_possible_entry_failures(self, failures, retry_count): ++ def fix_possible_entry_failures(self, failures, retry_count, entries): + pfx = gauxpfx() + fix_entry_ops = [] + failures1 = [] + for failure in failures: +- if failure[2]['dst']: ++ if failure[2]['name_mismatch']: ++ pbname = failure[2]['slave_entry'] ++ elif failure[2]['dst']: + pbname = failure[0]['entry1'] + else: + pbname = failure[0]['entry'] +- if failure[2]['gfid_mismatch']: ++ ++ op = failure[0]['op'] ++ # name exists but gfid is different ++ if failure[2]['gfid_mismatch'] or failure[2]['name_mismatch']: + slave_gfid = failure[2]['slave_gfid'] + st = lstat(os.path.join(pfx, slave_gfid)) ++ # Takes care of scenarios with no hardlinks + if isinstance(st, int) and st == ENOENT: +- logging.info(lf('Fixing gfid mismatch in slave. Deleting' +- ' the entry', retry_count=retry_count, ++ logging.info(lf('Entry not present on master. Fixing gfid ' ++ 'mismatch in slave. Deleting the entry', ++ retry_count=retry_count, + entry=repr(failure))) +- #Add deletion to fix_entry_ops list ++ # Add deletion to fix_entry_ops list + if failure[2]['slave_isdir']: +- fix_entry_ops.append(edct('RMDIR', +- gfid=failure[2]['slave_gfid'], +- entry=pbname)) ++ fix_entry_ops.append( ++ edct('RMDIR', ++ gfid=failure[2]['slave_gfid'], ++ entry=pbname)) + else: +- fix_entry_ops.append(edct('UNLINK', +- gfid=failure[2]['slave_gfid'], +- entry=pbname)) ++ fix_entry_ops.append( ++ edct('UNLINK', ++ gfid=failure[2]['slave_gfid'], ++ entry=pbname)) ++ # Takes care of scenarios of hardlinks/renames on master + elif not isinstance(st, int): +- #The file exists on master but with different name. +- #Probabaly renamed and got missed during xsync crawl. +- if failure[2]['slave_isdir']: +- logging.info(lf('Fixing gfid mismatch in slave', ++ if matching_disk_gfid(slave_gfid, pbname): ++ # Safe to ignore the failure as master contains same ++ # file with same gfid. Remove entry from entries list ++ logging.info(lf('Fixing gfid mismatch in slave. ' ++ ' Safe to ignore, take out entry', + retry_count=retry_count, + entry=repr(failure))) ++ entries.remove(failure[0]) ++ # The file exists on master but with different name. ++ # Probably renamed and got missed during xsync crawl. ++ elif failure[2]['slave_isdir']: + realpath = os.readlink(os.path.join(gconf.local_path, + ".glusterfs", + slave_gfid[0:2], +@@ -827,64 +843,99 @@ class GMasterChangelogMixin(GMasterCommon): + slave_gfid)) + dst_entry = os.path.join(pfx, realpath.split('/')[-2], + realpath.split('/')[-1]) +- rename_dict = edct('RENAME', gfid=slave_gfid, +- entry=failure[0]['entry'], +- entry1=dst_entry, stat=st, +- link=None) +- logging.info(lf('Fixing gfid mismatch in slave. ' +- 'Renaming', retry_count=retry_count, +- entry=repr(rename_dict))) +- fix_entry_ops.append(rename_dict) ++ src_entry = pbname ++ logging.info(lf('Fixing dir name/gfid mismatch in ' ++ 'slave', retry_count=retry_count, ++ entry=repr(failure))) ++ if src_entry == dst_entry: ++ # Safe to ignore the failure as master contains ++ # same directory as in slave with same gfid. ++ # Remove the failure entry from entries list ++ logging.info(lf('Fixing dir name/gfid mismatch' ++ ' in slave. Safe to ignore, ' ++ 'take out entry', ++ retry_count=retry_count, ++ entry=repr(failure))) ++ entries.remove(failure[0]) ++ else: ++ rename_dict = edct('RENAME', gfid=slave_gfid, ++ entry=src_entry, ++ entry1=dst_entry, stat=st, ++ link=None) ++ logging.info(lf('Fixing dir name/gfid mismatch' ++ ' in slave. Renaming', ++ retry_count=retry_count, ++ entry=repr(rename_dict))) ++ fix_entry_ops.append(rename_dict) + else: +- logging.info(lf('Fixing gfid mismatch in slave. ' +- ' Deleting the entry', ++ # A hardlink file exists with different name or ++ # renamed file exists and we are sure from ++ # matching_disk_gfid check that the entry doesn't ++ # exist with same gfid so we can safely delete on slave ++ logging.info(lf('Fixing file gfid mismatch in slave. ' ++ 'Hardlink/Rename Case. Deleting entry', ++ retry_count=retry_count, ++ entry=repr(failure))) ++ fix_entry_ops.append( ++ edct('UNLINK', ++ gfid=failure[2]['slave_gfid'], ++ entry=pbname)) ++ elif failure[1] == ENOENT: ++ # Ignore ENOENT error for fix_entry_ops aka retry_count > 1 ++ if retry_count > 1: ++ logging.info(lf('ENOENT error while fixing entry ops. ' ++ 'Safe to ignore, take out entry', ++ retry_count=retry_count, ++ entry=repr(failure))) ++ entries.remove(failure[0]) ++ elif op in ('MKNOD', 'CREATE', 'MKDIR'): ++ pargfid = pbname.split('/')[1] ++ st = lstat(os.path.join(pfx, pargfid)) ++ # Safe to ignore the failure as master doesn't contain ++ # parent directory. ++ if isinstance(st, int): ++ logging.info(lf('Fixing ENOENT error in slave. Parent ' ++ 'does not exist on master. Safe to ' ++ 'ignore, take out entry', + retry_count=retry_count, + entry=repr(failure))) +- fix_entry_ops.append(edct('UNLINK', +- gfid=failure[2]['slave_gfid'], +- entry=pbname)) +- logging.error(lf('Entry cannot be fixed in slave due ' +- 'to GFID mismatch, find respective ' +- 'path for the GFID and trigger sync', +- gfid=slave_gfid)) ++ entries.remove(failure[0]) + + if fix_entry_ops: +- #Process deletions of entries whose gfids are mismatched ++ # Process deletions of entries whose gfids are mismatched + failures1 = self.slave.server.entry_ops(fix_entry_ops) +- if not failures1: +- logging.info ("Sucessfully fixed entry ops with gfid mismatch") + +- return failures1 ++ return (failures1, fix_entry_ops) + + def handle_entry_failures(self, failures, entries): + retries = 0 + pending_failures = False + failures1 = [] + failures2 = [] ++ entry_ops1 = [] ++ entry_ops2 = [] + + if failures: + pending_failures = True + failures1 = failures ++ entry_ops1 = entries + + while pending_failures and retries < self.MAX_EF_RETRIES: + retries += 1 +- failures2 = self.fix_possible_entry_failures(failures1, +- retries) ++ (failures2, entry_ops2) = self.fix_possible_entry_failures( ++ failures1, retries, entry_ops1) + if not failures2: + pending_failures = False ++ logging.info(lf('Sucessfully fixed entry ops with gfid ' ++ 'mismatch', retry_count=retries)) + else: + pending_failures = True + failures1 = failures2 ++ entry_ops1 = entry_ops2 + + if pending_failures: + for failure in failures1: + logging.error("Failed to fix entry ops %s", repr(failure)) +- else: +- #Retry original entry list 5 times +- failures = self.slave.server.entry_ops(entries) +- +- self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY') +- + + def process_change(self, change, done, retry): + pfx = gauxpfx() +@@ -1112,7 +1163,19 @@ class GMasterChangelogMixin(GMasterCommon): + self.status.inc_value("entry", len(entries)) + + failures = self.slave.server.entry_ops(entries) +- self.handle_entry_failures(failures, entries) ++ count = 0 ++ while failures and count < self.MAX_OE_RETRIES: ++ count += 1 ++ self.handle_entry_failures(failures, entries) ++ logging.info("Retry original entries. count = %s" % count) ++ failures = self.slave.server.entry_ops(entries) ++ if not failures: ++ logging.info("Sucessfully fixed all entry ops with gfid " ++ "mismatch") ++ break ++ ++ self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY') ++ + self.status.dec_value("entry", len(entries)) + + # Update Entry stime in Brick Root only in case of Changelog mode +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index 0d5462a..eb696f3 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -38,9 +38,9 @@ from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION + from syncdutils import GX_GFID_CANONICAL_LEN + from gsyncdstatus import GeorepStatus + from syncdutils import get_master_and_slave_data_from_args +-from syncdutils import lf, Popen, sup, Volinfo ++from syncdutils import lf, Popen, sup + from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt +-from syncdutils import unshare_propagation_supported ++from syncdutils import unshare_propagation_supported, get_slv_dir_path + + UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z') + HostRX = re.compile('[a-zA-Z\d](?:[a-zA-Z\d.-]*[a-zA-Z\d])?', re.I) +@@ -50,7 +50,6 @@ ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP') + + slv_volume = None + slv_host = None +-slv_bricks = None + + def desugar(ustr): + """transform sugared url strings to standard :// form +@@ -463,13 +462,23 @@ class Server(object): + # to be purged is the GFID gotten from the changelog. + # (a stat(changelog_gfid) would also be valid here) + # The race here is between the GFID check and the purge. ++ ++ # If the entry or the gfid of the file to be deleted is not present ++ # on slave, we can ignore the unlink/rmdir ++ if isinstance(lstat(entry), int) or \ ++ isinstance(lstat(os.path.join(pfx, gfid)), int): ++ return ++ + if not matching_disk_gfid(gfid, entry): + collect_failure(e, EEXIST) + return + + if op == 'UNLINK': + er = errno_wrap(os.unlink, [entry], [ENOENT, ESTALE], [EBUSY]) +- return er ++ # EISDIR is safe error, ignore. This can only happen when ++ # unlink is sent from master while fixing gfid conflicts. ++ if er != EISDIR: ++ return er + + elif op == 'RMDIR': + er = errno_wrap(os.rmdir, [entry], [ENOENT, ESTALE, +@@ -480,7 +489,11 @@ class Server(object): + def collect_failure(e, cmd_ret, dst=False): + slv_entry_info = {} + slv_entry_info['gfid_mismatch'] = False ++ slv_entry_info['name_mismatch'] = False + slv_entry_info['dst'] = dst ++ slv_entry_info['slave_isdir'] = False ++ slv_entry_info['slave_name'] = None ++ slv_entry_info['slave_gfid'] = None + # We do this for failing fops on Slave + # Master should be logging this + if cmd_ret is None: +@@ -498,6 +511,9 @@ class Server(object): + if not isinstance(st, int): + if st and stat.S_ISDIR(st.st_mode): + slv_entry_info['slave_isdir'] = True ++ dir_name = get_slv_dir_path(slv_host, slv_volume, ++ disk_gfid) ++ slv_entry_info['slave_name'] = dir_name + else: + slv_entry_info['slave_isdir'] = False + slv_entry_info['slave_gfid'] = disk_gfid +@@ -618,37 +634,34 @@ class Server(object): + [ENOENT, EEXIST], [ESTALE]) + collect_failure(e, cmd_ret) + elif op == 'MKDIR': ++ en = e['entry'] + slink = os.path.join(pfx, gfid) + st = lstat(slink) + # don't create multiple entries with same gfid + if isinstance(st, int): + blob = entry_pack_mkdir( + gfid, bname, e['mode'], e['uid'], e['gid']) +- else: ++ elif (isinstance(lstat(en), int) or ++ not matching_disk_gfid(gfid, en)): + # If gfid of a directory exists on slave but path based + # create is getting EEXIST. This means the directory is + # renamed in master but recorded as MKDIR during hybrid + # crawl. Get the directory path by reading the backend + # symlink and trying to rename to new name as said by + # master. +- global slv_bricks +- global slv_volume +- global slv_host +- if not slv_bricks: +- slv_info = Volinfo (slv_volume, slv_host) +- slv_bricks = slv_info.bricks +- # Result of readlink would be of format as below. +- # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename" +- realpath = os.readlink(os.path.join(slv_bricks[0]['dir'], +- ".glusterfs", gfid[0:2], +- gfid[2:4], gfid)) +- realpath_parts = realpath.split('/') +- src_pargfid = realpath_parts[-2] +- src_basename = realpath_parts[-1] +- src_entry = os.path.join(pfx, src_pargfid, src_basename) + logging.info(lf("Special case: rename on mkdir", +- gfid=gfid, entry=repr(entry))) +- rename_with_disk_gfid_confirmation(gfid, src_entry, entry) ++ gfid=gfid, entry=repr(entry))) ++ src_entry = get_slv_dir_path(slv_host, slv_volume, gfid) ++ if src_entry is not None and src_entry != entry: ++ slv_entry_info = {} ++ slv_entry_info['gfid_mismatch'] = False ++ slv_entry_info['name_mismatch'] = True ++ slv_entry_info['dst'] = False ++ slv_entry_info['slave_isdir'] = True ++ slv_entry_info['slave_gfid'] = gfid ++ slv_entry_info['slave_entry'] = src_entry ++ ++ failures.append((e, EEXIST, slv_entry_info)) + elif op == 'LINK': + slink = os.path.join(pfx, gfid) + st = lstat(slink) +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index 6dafb0a..d798356 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -77,6 +77,7 @@ CHANGELOG_AGENT_CLIENT_VERSION = 1.0 + NodeID = None + rsync_version = None + unshare_mnt_propagation = None ++slv_bricks = None + SPACE_ESCAPE_CHAR = "%20" + NEWLINE_ESCAPE_CHAR = "%0A" + PERCENTAGE_ESCAPE_CHAR = "%25" +@@ -671,6 +672,40 @@ def get_rsync_version(rsync_cmd): + return rsync_version + + ++def get_slv_dir_path(slv_host, slv_volume, gfid): ++ global slv_bricks ++ ++ dir_path = ENOENT ++ ++ if not slv_bricks: ++ slv_info = Volinfo(slv_volume, slv_host) ++ slv_bricks = slv_info.bricks ++ # Result of readlink would be of format as below. ++ # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename" ++ for brick in slv_bricks: ++ dir_path = errno_wrap(os.path.join, ++ [brick['dir'], ++ ".glusterfs", gfid[0:2], ++ gfid[2:4], ++ gfid], [ENOENT], [ESTALE]) ++ if dir_path != ENOENT: ++ break ++ ++ if not isinstance(dir_path, int): ++ realpath = errno_wrap(os.readlink, [dir_path], ++ [ENOENT], [ESTALE]) ++ ++ if not isinstance(realpath, int): ++ realpath_parts = realpath.split('/') ++ pargfid = realpath_parts[-2] ++ basename = realpath_parts[-1] ++ pfx = gauxpfx() ++ dir_entry = os.path.join(pfx, pargfid, basename) ++ return dir_entry ++ ++ return None ++ ++ + def lf(event, **kwargs): + """ + Log Format helper function, log messages can be +-- +1.8.3.1 + diff --git a/0329-cluster-dht-Set-loc-gfid-before-healing-attr.patch b/0329-cluster-dht-Set-loc-gfid-before-healing-attr.patch new file mode 100644 index 0000000..ff5a0af --- /dev/null +++ b/0329-cluster-dht-Set-loc-gfid-before-healing-attr.patch @@ -0,0 +1,52 @@ +From c2b215a14a38d3587a5a3ea4efab384033019ed5 Mon Sep 17 00:00:00 2001 +From: N Balachandran +Date: Wed, 18 Jul 2018 22:09:19 +0530 +Subject: [PATCH 329/333] cluster/dht: Set loc->gfid before healing attr + +AFR takes inodelks when setting attrs. The loc->gfid +and loc->inode->gfid were both null when dht_dir_attr_heal +was called during a fresh lookup of an existing directory. +As the gfid is null, client_pre_inodelk asserts in the gfid +check. +We now set the loc->gfid before calling dht_dir_attr_heal. + +upstream patch: https://review.gluster.org/#/c/20530/ + +> Change-Id: I457f5a73fd301d97a03ca032587e73d4803298ac +> fixes: bz#1602866 +> Signed-off-by: N Balachandran + +Change-Id: Ie5e30d4ab3b824eaad333da22465d6672c75a2f6 +BUG: 1601331 +Signed-off-by: N Balachandran +Reviewed-on: https://code.engineering.redhat.com/gerrit/144386 +Reviewed-by: Mohit Agrawal +Tested-by: RHGS Build Bot +--- + xlators/cluster/dht/src/dht-common.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 2207708..0984f8f 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -1518,7 +1518,6 @@ unlock: + if (local->need_attrheal) { + local->need_attrheal = 0; + if (!__is_root_gfid (inode->gfid)) { +- gf_uuid_copy (local->gfid, local->mds_stbuf.ia_gfid); + local->stbuf.ia_gid = local->mds_stbuf.ia_gid; + local->stbuf.ia_uid = local->mds_stbuf.ia_uid; + local->stbuf.ia_prot = local->mds_stbuf.ia_prot; +@@ -1532,6 +1531,8 @@ unlock: + goto skip_attr_heal; + } + copy_local->stbuf = local->stbuf; ++ gf_uuid_copy (copy_local->loc.gfid, ++ local->stbuf.ia_gfid); + copy_local->mds_stbuf = local->mds_stbuf; + copy_local->mds_subvol = local->mds_subvol; + copy->local = copy_local; +-- +1.8.3.1 + diff --git a/0330-posix-check-before-removing-stale-symlink.patch b/0330-posix-check-before-removing-stale-symlink.patch new file mode 100644 index 0000000..09a3a07 --- /dev/null +++ b/0330-posix-check-before-removing-stale-symlink.patch @@ -0,0 +1,70 @@ +From 4742c4766af4b0def0e12a2b0544c30496dfb48e Mon Sep 17 00:00:00 2001 +From: Ravishankar N +Date: Thu, 19 Jul 2018 12:47:38 +0530 +Subject: [PATCH 330/333] posix: check before removing stale symlink + +Backport of https://review.gluster.org/#/c/20509/ + +BZ 1564071 complains of directories with missing gfid symlinks and +corresponding "Found stale gfid handle" messages in the logs. Hence +add a check to see if the symlink points to an actual directory before +removing it. + +Note: Removing stale symlinks was added via commit +3e9a9c029fac359477fb26d9cc7803749ba038b2 + +Change-Id: I5d91fab8e5f3a621a9ecad4a1f9c898a3c2d346a +BUG: 1603103 +Signed-off-by: Ravishankar N +Reviewed-on: https://code.engineering.redhat.com/gerrit/144867 +Reviewed-by: Nithya Balachandran +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + xlators/storage/posix/src/posix.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c +index ddb875c..c3b7120 100644 +--- a/xlators/storage/posix/src/posix.c ++++ b/xlators/storage/posix/src/posix.c +@@ -235,6 +235,7 @@ posix_lookup (call_frame_t *frame, xlator_t *this, + int32_t nlink_samepgfid = 0; + struct posix_private *priv = NULL; + posix_inode_ctx_t *ctx = NULL; ++ int ret = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); +@@ -284,20 +285,24 @@ posix_lookup (call_frame_t *frame, xlator_t *this, + "lstat on %s failed", + real_path ? real_path : "null"); + } ++ entry_ret = -1; + if (loc_is_nameless(loc)) { + if (!op_errno) + op_errno = ESTALE; + loc_gfid (loc, gfid); + MAKE_HANDLE_ABSPATH (gfid_path, this, gfid); +- op_ret = sys_lstat(gfid_path, &statbuf); +- if (op_ret == 0 && statbuf.st_nlink == 1) { +- gf_msg (this->name, GF_LOG_WARNING, ESTALE, ++ ret = sys_stat(gfid_path, &statbuf); ++ if (ret == 0 && ((statbuf.st_mode & S_IFMT) == S_IFDIR)) ++ /*Don't unset if it was a symlink to a dir.*/ ++ goto parent; ++ ret = sys_lstat(gfid_path, &statbuf); ++ if (ret == 0 && statbuf.st_nlink == 1) { ++ gf_msg (this->name, GF_LOG_WARNING, op_errno, + P_MSG_HANDLE_DELETE, "Found stale gfid " + "handle %s, removing it.", gfid_path); + posix_handle_unset (this, gfid, NULL); + } + } +- entry_ret = -1; + goto parent; + } + +-- +1.8.3.1 + diff --git a/0331-rpc-free-registered-callback-programs.patch b/0331-rpc-free-registered-callback-programs.patch new file mode 100644 index 0000000..71403af --- /dev/null +++ b/0331-rpc-free-registered-callback-programs.patch @@ -0,0 +1,49 @@ +From 9c1ddc2e32cbfc8ad313b4f1342fbc20e49af80b Mon Sep 17 00:00:00 2001 +From: Niels de Vos +Date: Mon, 9 Oct 2017 18:58:09 +0200 +Subject: [PATCH 331/333] rpc: free registered callback programs + +> Change-Id: I8c6f6b642f025d1faf74015b8f7aaecd7ebfd4d5 +> BUG: 1443145 +> Signed-off-by: Niels de Vos +> (cherry picked from commit ec39ca32d942d49fd701156174abbba0b73bce2f) +> (Reviewed on upstream link https://review.gluster.org/#/c/18478) + +Change-Id: I23e44507d12326bf63c96c56eae83d5424f8ee63 +BUG: 1600790 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/145358 +Reviewed-by: Niels de Vos +Tested-by: RHGS Build Bot +--- + rpc/rpc-lib/src/rpc-clnt.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c +index e34d2ca..1ea8099 100644 +--- a/rpc/rpc-lib/src/rpc-clnt.c ++++ b/rpc/rpc-lib/src/rpc-clnt.c +@@ -1771,6 +1771,9 @@ rpc_clnt_trigger_destroy (struct rpc_clnt *rpc) + static void + rpc_clnt_destroy (struct rpc_clnt *rpc) + { ++ rpcclnt_cb_program_t *program = NULL; ++ rpcclnt_cb_program_t *tmp = NULL; ++ + if (!rpc) + return; + +@@ -1783,6 +1786,10 @@ rpc_clnt_destroy (struct rpc_clnt *rpc) + mem_pool_destroy (rpc->reqpool); + mem_pool_destroy (rpc->saved_frames_pool); + ++ list_for_each_entry_safe (program, tmp, &rpc->programs, program) { ++ GF_FREE (program); ++ } ++ + GF_FREE (rpc); + return; + } +-- +1.8.3.1 + diff --git a/0332-rpc-rpc_clnt_connection_cleanup-is-crashed-due-to-do.patch b/0332-rpc-rpc_clnt_connection_cleanup-is-crashed-due-to-do.patch new file mode 100644 index 0000000..d6f93dc --- /dev/null +++ b/0332-rpc-rpc_clnt_connection_cleanup-is-crashed-due-to-do.patch @@ -0,0 +1,67 @@ +From 76823d120518528c4edad4af6f4c1cdd50f5b398 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Tue, 24 Jul 2018 14:48:35 +0530 +Subject: [PATCH 332/333] rpc: rpc_clnt_connection_cleanup is crashed due to + double free + +Problem: gfapi client is getting crashed in rpc_clnt_connection_cleanup + at the time of destroying saved_frames + +Solution: gfapi client is getting crashed because saved_frame ptr is + already freed in rpc_clnt_destroy.To avoid the same update + code in rpc_clnt_destroy + +> Change-Id: Id8cce102b49f26cfd86ef88257032ed98f43192b +> fixes: bz#1607783 +> (cherry picked from commit abd7b1393294d29eef6913e7f93ab76040c90428) +> (Reviewed on upstream link https://review.gluster.org/#/c/20557/) + +Change-Id: Id3200e36acc1c49a8f5d39a1cc5053864899754c +BUG: 1600790 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/145377 +Tested-by: Mohit Agrawal +Reviewed-by: Niels de Vos +Tested-by: RHGS Build Bot +--- + rpc/rpc-lib/src/rpc-clnt.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c +index 1ea8099..fd7e3ec 100644 +--- a/rpc/rpc-lib/src/rpc-clnt.c ++++ b/rpc/rpc-lib/src/rpc-clnt.c +@@ -1771,13 +1771,27 @@ rpc_clnt_trigger_destroy (struct rpc_clnt *rpc) + static void + rpc_clnt_destroy (struct rpc_clnt *rpc) + { +- rpcclnt_cb_program_t *program = NULL; +- rpcclnt_cb_program_t *tmp = NULL; ++ rpcclnt_cb_program_t *program = NULL; ++ rpcclnt_cb_program_t *tmp = NULL; ++ struct saved_frames *saved_frames = NULL; ++ rpc_clnt_connection_t *conn = NULL; + + if (!rpc) + return; + +- saved_frames_destroy (rpc->conn.saved_frames); ++ conn = &rpc->conn; ++ /* Access saved_frames in critical-section to avoid ++ crash in rpc_clnt_connection_cleanup at the time ++ of destroying saved frames ++ */ ++ pthread_mutex_lock (&conn->lock); ++ { ++ saved_frames = conn->saved_frames; ++ conn->saved_frames = NULL; ++ } ++ pthread_mutex_unlock (&conn->lock); ++ ++ saved_frames_destroy (saved_frames); + pthread_mutex_destroy (&rpc->lock); + pthread_mutex_destroy (&rpc->conn.lock); + +-- +1.8.3.1 + diff --git a/0333-glusterd-Add-multiple-checks-before-attach-start-a-b.patch b/0333-glusterd-Add-multiple-checks-before-attach-start-a-b.patch new file mode 100644 index 0000000..f9da9d6 --- /dev/null +++ b/0333-glusterd-Add-multiple-checks-before-attach-start-a-b.patch @@ -0,0 +1,726 @@ +From 53ecd916d5ef56e164228ba123b078d4b30bfa81 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal +Date: Thu, 12 Jul 2018 13:29:48 +0530 +Subject: [PATCH 333/333] glusterd: Add multiple checks before attach/start a + brick + +Problem: In brick mux scenario sometime glusterd is not able + to start/attach a brick and gluster v status shows + brick is already running + +Solution: + 1) To make sure brick is running check brick_path in + /proc//fd , if a brick is consumed by the brick + process it means brick stack is come up otherwise not + 2) Before start/attach a brick check if a brick is mounted + or not + 3) At the time of printing volume status check brick is + consumed by any brick process + +Test: To test the same followed procedure + 1) Setup brick mux environment on a vm + 2) Put a breaking point in gdb in function posix_health_check_thread_proc + at the time of notify GF_EVENT_CHILD_DOWN event + 3) unmount anyone brick path forcefully + 4) check gluster v status it will show N/A for the brick + 5) Try to start volume with force option, glusterd throw + message "No device available for mount brick" + 6) Mount the brick_root path + 7) Try to start volume with force option + 8) down brick is started successfully + +> Change-Id: I91898dad21d082ebddd12aa0d1f7f0ed012bdf69 +> fixes: bz#1595320 +> (cherry picked from commit 9400b6f2c8aa219a493961e0ab9770b7f12e80d2) +> (Reviewed on upstream link https://review.gluster.org/#/c/20202/) + +Change-Id: I62459910272754e4e062b2725fea2a1e68d743f1 +BUG: 1589279 +Signed-off-by: Mohit Agrawal +Reviewed-on: https://code.engineering.redhat.com/gerrit/145269 +Tested-by: RHGS Build Bot +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya +--- + glusterfsd/src/glusterfsd-mgmt.c | 3 + + tests/basic/bug-1595320.t | 92 +++++++++ + tests/basic/posix/shared-statfs.t | 2 + + tests/bitrot/bug-1373520.t | 1 + + tests/bugs/distribute/bug-1368012.t | 2 + + tests/bugs/distribute/bug-853258.t | 1 + + tests/bugs/quota/bug-1293601.t | 3 +- + xlators/mgmt/glusterd/src/glusterd-snapshot.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 261 ++++++++++++++++++++---- + xlators/mgmt/glusterd/src/glusterd-utils.h | 6 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 7 +- + 11 files changed, 329 insertions(+), 51 deletions(-) + create mode 100644 tests/basic/bug-1595320.t + +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index 30a717f..cbd436a 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -1010,6 +1010,9 @@ glusterfs_handle_attach (rpcsvc_request_t *req) + "got attach for %s but no active graph", + xlator_req.name); + } ++ if (ret) { ++ ret = -1; ++ } + + glusterfs_translator_info_response_send (req, ret, NULL, NULL); + +diff --git a/tests/basic/bug-1595320.t b/tests/basic/bug-1595320.t +new file mode 100644 +index 0000000..9d856ee +--- /dev/null ++++ b/tests/basic/bug-1595320.t +@@ -0,0 +1,92 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../snapshot.rc ++ ++cleanup ++ ++function count_up_bricks { ++ $CLI --xml volume status $V0 | grep '1' | wc -l ++} ++ ++function count_brick_processes { ++ pgrep glusterfsd | wc -l ++} ++ ++# Setup 3 LVMS ++LVM_PREFIX="test" ++TEST init_n_bricks 3 ++TEST setup_lvm 3 ++ ++# Start glusterd ++TEST glusterd ++TEST pidof glusterd ++ ++# Create volume and enable brick multiplexing ++TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3 ++gluster v set all cluster.brick-multiplex on ++ ++# Start the volume ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks ++EXPECT 1 count_brick_processes ++ ++# Kill volume ungracefully ++brick_pid=`pgrep glusterfsd` ++ ++# Make sure every brick root should be consumed by a brick process ++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l` ++TEST [ $n -eq 1 ] ++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l` ++TEST [ $n -eq 1 ] ++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l` ++TEST [ $n -eq 1 ] ++ ++b1_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-1*.pid) ++b2_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-2*.pid) ++b3_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-3*.pid) ++ ++kill -9 $brick_pid ++EXPECT 0 count_brick_processes ++ ++# Unmount 3rd brick root from node ++brick_root=$L3 ++TEST umount -l $brick_root 2>/dev/null ++ ++# Start the volume only 2 brick should be start ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks ++EXPECT 1 count_brick_processes ++ ++brick_pid=`pgrep glusterfsd` ++ ++# Make sure only two brick root should be consumed by a brick process ++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l` ++TEST [ $n -eq 1 ] ++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l` ++TEST [ $n -eq 1 ] ++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l` ++TEST [ $n -eq 0 ] ++ ++# Mount the brick root ++TEST mount -t xfs -o nouuid /dev/test_vg_3/brick_lvm $brick_root ++ ++# Replace brick_pid file to test brick_attach code ++TEST cp $b1_pid_file $b3_pid_file ++ ++# Start the volume all brick should be up ++TEST $CLI volume start $V0 force ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks ++EXPECT 1 count_brick_processes ++ ++# Make sure every brick root should be consumed by a brick process ++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l` ++TEST [ $n -eq 1 ] ++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l` ++TEST [ $n -eq 1 ] ++n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l` ++TEST [ $n -eq 1 ] ++ ++cleanup +diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t +index 8caa9fa..3343956 100644 +--- a/tests/basic/posix/shared-statfs.t ++++ b/tests/basic/posix/shared-statfs.t +@@ -23,6 +23,7 @@ TEST MOUNT_LOOP $LO2 $B0/${V0}2 + # Create a subdir in mountpoint and use that for volume. + TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1; + TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count + TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 + total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') + # Keeping the size less than 200M mainly because XFS will use +@@ -38,6 +39,7 @@ EXPECT 'Stopped' volinfo_field $V0 'Status'; + TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/3 $H0:$B0/${V0}2/3 + + TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count + TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 + total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') + TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] +diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t +index 225d3b1..c09d424 100644 +--- a/tests/bitrot/bug-1373520.t ++++ b/tests/bitrot/bug-1373520.t +@@ -11,6 +11,7 @@ TEST pidof glusterd + #Create a disperse volume + TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} + TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status' + + #Disable md-cache +diff --git a/tests/bugs/distribute/bug-1368012.t b/tests/bugs/distribute/bug-1368012.t +index f89314b..b861554 100644 +--- a/tests/bugs/distribute/bug-1368012.t ++++ b/tests/bugs/distribute/bug-1368012.t +@@ -22,6 +22,7 @@ EXPECT "$V0" volinfo_field $V0 'Volume Name'; + EXPECT 'Created' volinfo_field $V0 'Status'; + ## Start volume and verify + TEST $CLI volume start $V0; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count + TEST $CLI volume set $V0 performance.stat-prefetch off + EXPECT 'Started' volinfo_field $V0 'Status'; + TEST glusterfs -s $H0 --volfile-id=$V0 $M0 +@@ -36,6 +37,7 @@ TEST permission_root=`stat -c "%A" $M0` + TEST echo $permission_root + #Add-brick + TEST $CLI volume add-brick $V0 $H0:/${V0}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" online_brick_count + + #Allow one lookup to happen + TEST pushd $M0 +diff --git a/tests/bugs/distribute/bug-853258.t b/tests/bugs/distribute/bug-853258.t +index e39f507..6817d9e 100755 +--- a/tests/bugs/distribute/bug-853258.t ++++ b/tests/bugs/distribute/bug-853258.t +@@ -31,6 +31,7 @@ done + + # Expand the volume and force assignment of new ranges. + TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count + # Force assignment of initial ranges. + TEST $CLI volume rebalance $V0 fix-layout start + EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0 +diff --git a/tests/bugs/quota/bug-1293601.t b/tests/bugs/quota/bug-1293601.t +index def4ef9..741758b 100644 +--- a/tests/bugs/quota/bug-1293601.t ++++ b/tests/bugs/quota/bug-1293601.t +@@ -9,6 +9,7 @@ TEST glusterd + + TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} + TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count + TEST $CLI volume quota $V0 enable + + TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0; +@@ -27,6 +28,6 @@ EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "1.0MB" quotausage "/" + TEST $CLI volume quota $V0 disable + TEST $CLI volume quota $V0 enable + +-EXPECT_WITHIN 40 "1.0MB" quotausage "/" ++EXPECT_WITHIN 60 "1.0MB" quotausage "/" + + cleanup; +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +index 304cef6..09e10bf 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +@@ -2844,7 +2844,7 @@ glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol, + GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv); + if (gf_is_service_running (pidfile, &pid)) { + (void) send_attach_req (this, brickinfo->rpc, +- brickinfo->path, NULL, ++ brickinfo->path, NULL, NULL, + GLUSTERD_BRICK_TERMINATE); + brickinfo->status = GF_BRICK_STOPPED; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 95df889..fe9cc75 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -2186,7 +2186,7 @@ retry: + goto out; + } + +- ret = glusterd_brick_process_add_brick (brickinfo, volinfo); ++ ret = glusterd_brick_process_add_brick (brickinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, "Adding brick %s:%s " +@@ -2372,8 +2372,7 @@ out: + } + + int +-glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, +- glusterd_volinfo_t *volinfo) ++glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo) + { + int ret = -1; + xlator_t *this = NULL; +@@ -2500,7 +2499,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, + brickinfo->hostname, brickinfo->path); + + (void) send_attach_req (this, brickinfo->rpc, +- brickinfo->path, NULL, ++ brickinfo->path, NULL, NULL, + GLUSTERD_BRICK_TERMINATE); + } else { + gf_msg_debug (this->name, 0, "About to stop glusterfsd" +@@ -5426,23 +5425,92 @@ static int32_t + attach_brick_callback (struct rpc_req *req, struct iovec *iov, int count, + void *v_frame) + { +- call_frame_t *frame = v_frame; +- glusterd_conf_t *conf = frame->this->private; +- glusterd_brickinfo_t *brickinfo = frame->local; ++ call_frame_t *frame = v_frame; ++ glusterd_conf_t *conf = frame->this->private; ++ glusterd_brickinfo_t *brickinfo = frame->local; ++ glusterd_brickinfo_t *other_brick = frame->cookie; ++ glusterd_volinfo_t *volinfo = NULL; ++ xlator_t *this = THIS; ++ int ret = -1; ++ char pidfile1[PATH_MAX] = {0}; ++ char pidfile2[PATH_MAX] = {0}; ++ gf_getspec_rsp rsp = {0,}; + + frame->local = NULL; +- brickinfo->port_registered = _gf_true; ++ frame->cookie = NULL; ++ ++ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp); ++ if (ret < 0) { ++ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error"); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = glusterd_get_volinfo_from_brick (other_brick->path, ++ &volinfo); ++ if (ret) { ++ gf_msg (THIS->name, GF_LOG_ERROR, 0, ++ GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo" ++ " from brick(%s) so pidfile copying/unlink will fail", ++ other_brick->path); ++ goto out; ++ } ++ GLUSTERD_GET_BRICK_PIDFILE (pidfile1, volinfo, other_brick, conf); ++ volinfo = NULL; ++ ++ ret = glusterd_get_volinfo_from_brick (brickinfo->path, ++ &volinfo); ++ if (ret) { ++ gf_msg (THIS->name, GF_LOG_ERROR, 0, ++ GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo" ++ " from brick(%s) so pidfile copying/unlink will fail", ++ brickinfo->path); ++ goto out; ++ } ++ GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, brickinfo, conf); ++ ++ if (rsp.op_ret == 0) { ++ brickinfo->port_registered = _gf_true; ++ ++ /* PID file is copied once brick has attached ++ successfully ++ */ ++ glusterd_copy_file (pidfile1, pidfile2); ++ brickinfo->status = GF_BRICK_STARTED; ++ brickinfo->rpc = rpc_clnt_ref (other_brick->rpc); ++ gf_log (THIS->name, GF_LOG_INFO, "brick %s is attached successfully", ++ brickinfo->path); ++ } else { ++ gf_log (THIS->name, GF_LOG_INFO, "attach_brick failed pidfile" ++ " is %s for brick_path %s", pidfile2, brickinfo->path); ++ brickinfo->port = 0; ++ brickinfo->status = GF_BRICK_STOPPED; ++ ret = glusterd_brick_process_remove_brick (brickinfo); ++ if (ret) ++ gf_msg_debug (this->name, 0, "Couldn't remove brick from" ++ " brick process"); ++ LOCK (&volinfo->lock); ++ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); ++ UNLOCK (&volinfo->lock); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, ++ GD_MSG_VOLINFO_SET_FAIL, ++ "Failed to store volinfo of " ++ "%s volume", volinfo->volname); ++ goto out; ++ } ++ } ++out: + synclock_lock (&conf->big_lock); + --(conf->blockers); + synclock_unlock (&conf->big_lock); +- + STACK_DESTROY (frame->root); + return 0; + } + + int + send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, +- glusterd_brickinfo_t *brickinfo, int op) ++ glusterd_brickinfo_t *brickinfo, glusterd_brickinfo_t *other_brick, int op) + { + int ret = -1; + struct iobuf *iobuf = NULL; +@@ -5516,6 +5584,7 @@ send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, + + if (op == GLUSTERD_BRICK_ATTACH) { + frame->local = brickinfo; ++ frame->cookie = other_brick; + cbkfn = attach_brick_callback; + } + /* Send the msg */ +@@ -5582,27 +5651,19 @@ attach_brick (xlator_t *this, + rpc = rpc_clnt_ref (other_brick->rpc); + if (rpc) { + ret = send_attach_req (this, rpc, path, brickinfo, ++ other_brick, + GLUSTERD_BRICK_ATTACH); + rpc_clnt_unref (rpc); + if (!ret) { + ret = pmap_registry_extend (this, other_brick->port, +- brickinfo->path); ++ brickinfo->path); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "adding brick to process failed"); +- return ret; ++ goto out; + } +- +- /* PID file is copied once brick has attached +- successfully +- */ +- glusterd_copy_file (pidfile1, pidfile2); + brickinfo->port = other_brick->port; +- brickinfo->status = GF_BRICK_STARTED; +- brickinfo->rpc = +- rpc_clnt_ref (other_brick->rpc); +- ret = glusterd_brick_process_add_brick (brickinfo, +- volinfo); ++ ret = glusterd_brick_process_add_brick (brickinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, +@@ -5611,29 +5672,23 @@ attach_brick (xlator_t *this, + brickinfo->path); + return ret; + } +- +- if (ret) { +- gf_msg_debug (this->name, 0, "Add brick" +- " to brick process failed"); +- return ret; +- } +- + return 0; + } + } + /* +- * It might not actually be safe to manipulate the lock like +- * this, but if we don't then the connection can never actually +- * complete and retries are useless. Unfortunately, all of the +- * alternatives (e.g. doing all of this in a separate thread) +- * are much more complicated and risky. TBD: see if there's a +- * better way ++ * It might not actually be safe to manipulate the lock ++ * like this, but if we don't then the connection can ++ * never actually complete and retries are useless. ++ * Unfortunately, all of the alternatives (e.g. doing ++ * all of this in a separate thread) are much more ++ * complicated and risky. ++ * TBD: see if there's a better way + */ + synclock_unlock (&conf->big_lock); + sleep (1); + synclock_lock (&conf->big_lock); + } +- ++out: + gf_log (this->name, GF_LOG_WARNING, + "attach failed for %s", brickinfo->path); + return ret; +@@ -5855,6 +5910,7 @@ find_compatible_brick (glusterd_conf_t *conf, + return NULL; + } + ++ + /* Below function is use to populate sockpath based on passed pid + value as a argument after check the value from proc and also + check if passed pid is match with running glusterfs process +@@ -5941,6 +5997,62 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len) + } + + ++char * ++search_brick_path_from_proc (pid_t brick_pid, char *brickpath) ++{ ++ struct dirent *dp = NULL; ++ DIR *dirp = NULL; ++ size_t len = 0; ++ int fd = -1; ++ char path[PATH_MAX] = {0,}; ++ char sym[PATH_MAX] = {0,}; ++ struct dirent scratch[2] = {{0,},}; ++ char *brick_path = NULL; ++ ++ if (!brickpath) ++ goto out; ++ ++ sprintf(path, "/proc/%d/fd/", brick_pid); ++ dirp = sys_opendir (path); ++ if (!dirp) ++ goto out; ++ ++ len = strlen (path); ++ if (len >= (sizeof(path) - 2)) ++ goto out; ++ ++ fd = dirfd (dirp); ++ if (fd < 0) ++ goto out; ++ ++ memset(path, 0, sizeof(path)); ++ memset(sym, 0, sizeof(sym)); ++ ++ while ((dp = sys_readdir(dirp, scratch))) { ++ if (!strcmp(dp->d_name, ".") || ++ !strcmp(dp->d_name, "..")) ++ continue; ++ ++ /* check for non numerical descriptors */ ++ if (!strtol(dp->d_name, (char **)NULL, 10)) ++ continue; ++ ++ len = readlinkat (fd, dp->d_name, sym, sizeof(sym) - 1); ++ if (len > 1) { ++ sym[len] = '\0'; ++ if (!strcmp (sym, brickpath)) { ++ brick_path = gf_strdup(sym); ++ break; ++ } ++ memset (sym, 0, sizeof (sym)); ++ } ++ } ++out: ++ sys_closedir(dirp); ++ return brick_path; ++} ++ ++ + int + glusterd_brick_start (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, +@@ -5954,7 +6066,9 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, + int32_t pid = -1; + char pidfile[PATH_MAX] = {0}; + char socketpath[PATH_MAX] = {0}; ++ char *brickpath = NULL; + glusterd_volinfo_t *other_vol; ++ struct statvfs brickstat = {0,}; + + this = THIS; + GF_ASSERT (this); +@@ -6000,6 +6114,28 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, + brickinfo->start_triggered = _gf_true; + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); ++ ++ ret = sys_statvfs (brickinfo->path, &brickstat); ++ if (ret) { ++ gf_msg (this->name, GF_LOG_ERROR, ++ errno, GD_MSG_BRICKINFO_CREATE_FAIL, ++ "failed to get statfs() call on brick %s", ++ brickinfo->path); ++ goto out; ++ } ++ ++ /* Compare fsid is helpful to ensure the existence of a brick_root ++ path before the start/attach a brick ++ */ ++ if (brickinfo->statfs_fsid && ++ (brickinfo->statfs_fsid != brickstat.f_fsid)) { ++ gf_log (this->name, GF_LOG_ERROR, ++ "fsid comparison is failed it means Brick root path" ++ " %s is not created by glusterd, start/attach will also fail", ++ brickinfo->path); ++ goto out; ++ } ++ + if (gf_is_service_running (pidfile, &pid)) { + if (brickinfo->status != GF_BRICK_STARTING && + brickinfo->status != GF_BRICK_STARTED) { +@@ -6019,12 +6155,29 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, + * TBD: re-use RPC connection across bricks + */ + if (is_brick_mx_enabled ()) { ++ brickpath = search_brick_path_from_proc (pid, brickinfo->path); ++ if (!brickpath) { ++ gf_log (this->name, GF_LOG_INFO, ++ "Either pid %d is not running or brick" ++ " path %s is not consumed so cleanup pidfile", ++ pid, brickinfo->path); ++ /* search brick is failed so unlink pidfile */ ++ if (sys_access (pidfile , R_OK) == 0) { ++ sys_unlink (pidfile); ++ } ++ goto run; ++ } ++ GF_FREE (brickpath); + ret = glusterd_get_sock_from_brick_pid (pid, socketpath, + sizeof(socketpath)); + if (ret) { +- gf_log (this->name, GF_LOG_DEBUG, ++ gf_log (this->name, GF_LOG_INFO, + "Either pid %d is not running or is not match" + " with any running brick process ", pid); ++ /* Fetch unix socket is failed so unlink pidfile */ ++ if (sys_access (pidfile , R_OK) == 0) { ++ sys_unlink (pidfile); ++ } + goto run; + } + } else { +@@ -6039,7 +6192,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, + (void) glusterd_brick_connect (volinfo, brickinfo, + socketpath); + +- ret = glusterd_brick_process_add_brick (brickinfo, volinfo); ++ ret = glusterd_brick_process_add_brick (brickinfo); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, +@@ -6079,6 +6232,10 @@ run: + if (ret == 0) { + goto out; + } ++ /* Attach_brick is failed so unlink pidfile */ ++ if (sys_access (pidfile , R_OK) == 0) { ++ sys_unlink (pidfile); ++ } + } + + /* +@@ -7063,14 +7220,15 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, + dict_t *dict, int32_t count) + { + +- int ret = -1; +- int32_t pid = -1; +- char key[1024] = {0}; +- char base_key[1024] = {0}; +- char pidfile[PATH_MAX] = {0}; ++ int ret = -1; ++ int32_t pid = -1; ++ char key[1024] = {0}; ++ char base_key[1024] = {0}; ++ char pidfile[PATH_MAX] = {0}; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; +- gf_boolean_t brick_online = _gf_false; ++ gf_boolean_t brick_online = _gf_false; ++ char *brickpath = NULL; + + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); +@@ -7127,7 +7285,20 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, + if (glusterd_is_brick_started (brickinfo)) { + if (gf_is_service_running (pidfile, &pid) && + brickinfo->port_registered) { +- brick_online = _gf_true; ++ if (!is_brick_mx_enabled ()) { ++ brick_online = _gf_true; ++ } else { ++ brickpath = search_brick_path_from_proc (pid, brickinfo->path); ++ if (!brickpath) { ++ gf_log (this->name, GF_LOG_INFO, ++ "brick path %s is not consumed", ++ brickinfo->path); ++ brick_online = _gf_false; ++ } else { ++ brick_online = _gf_true; ++ GF_FREE (brickpath); ++ } ++ } + } else { + pid = -1; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 4c9561e..4835728 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -179,8 +179,7 @@ int32_t + glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo); + + int +-glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo, +- glusterd_volinfo_t *volinfo); ++glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo); + + int + glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo); +@@ -200,7 +199,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, + + int + send_attach_req (xlator_t *this, struct rpc_clnt *rpc, char *path, +- glusterd_brickinfo_t *brick, int op); ++ glusterd_brickinfo_t *brick, ++ glusterd_brickinfo_t *other_brick, int op); + + glusterd_volinfo_t * + glusterd_volinfo_ref (glusterd_volinfo_t *volinfo); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index e34d58a..8bb0b6d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -2585,8 +2585,13 @@ glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags, + } + + glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STARTED); +- ++ /* Update volinfo on disk in critical section because ++ attach_brick_callback can also call store_volinfo for same ++ volume to update volinfo on disk ++ */ ++ LOCK (&volinfo->lock); + ret = glusterd_store_volinfo (volinfo, verincrement); ++ UNLOCK (&volinfo->lock); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_VOLINFO_SET_FAIL, +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index d765c3f..4ac4b36 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -192,7 +192,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} %else Name: glusterfs Version: 3.12.2 -Release: 14%{?dist} +Release: 15%{?dist} %endif License: GPLv2 or LGPLv3+ Group: System Environment/Base @@ -590,6 +590,14 @@ Patch0322: 0322-geo-rep-Cleanup-stale-unprocessed-xsync-changelogs.patch Patch0323: 0323-cluster-afr-Mark-dirty-for-entry-transactions-for-qu.patch Patch0324: 0324-dht-delete-tier-related-internal-xattr-in-dht_getxat.patch Patch0325: 0325-core-dereference-check-on-the-variables-in-glusterfs.patch +Patch0326: 0326-glusterd-memory-leak-in-get-state.patch +Patch0327: 0327-afr-switch-lk_owner-only-when-pre-op-succeeds.patch +Patch0328: 0328-geo-rep-Fix-issues-with-gfid-conflict-handling.patch +Patch0329: 0329-cluster-dht-Set-loc-gfid-before-healing-attr.patch +Patch0330: 0330-posix-check-before-removing-stale-symlink.patch +Patch0331: 0331-rpc-free-registered-callback-programs.patch +Patch0332: 0332-rpc-rpc_clnt_connection_cleanup-is-crashed-due-to-do.patch +Patch0333: 0333-glusterd-Add-multiple-checks-before-attach-start-a-b.patch %description GlusterFS is a distributed file-system capable of scaling to several @@ -2538,6 +2546,10 @@ fi %endif %changelog +* Fri Jul 27 2018 Milind Changire - 3.12.2-15 +- fixes bugs bz#1589279 bz#1598384 bz#1599362 bz#1599998 bz#1600790 + bz#1601331 bz#1603103 + * Wed Jul 18 2018 Milind Changire - 3.12.2-14 - fixes bugs bz#1547903 bz#1566336 bz#1568896 bz#1578716 bz#1581047 bz#1581231 bz#1582066 bz#1593865 bz#1597506 bz#1597511 bz#1597654 bz#1597768