autobuild v6.0-41
Resolves: bz#1785714 bz#1851424 bz#1851989 bz#1852736 bz#1853189 Resolves: bz#1855966 Signed-off-by: Deepshikha Khandelwal <dkhandel@redhat.com>
This commit is contained in:
		
							parent
							
								
									71080da8dd
								
							
						
					
					
						commit
						bd380b90dd
					
				
							
								
								
									
										409
									
								
								0457-cluster-ec-Improve-detection-of-new-heals.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										409
									
								
								0457-cluster-ec-Improve-detection-of-new-heals.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,409 @@
 | 
			
		||||
From 3e8b3a2c2c6f83635486035fc8040c87d89813d2 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Xavi Hernandez <xhernandez@redhat.com>
 | 
			
		||||
Date: Thu, 2 Jul 2020 18:08:52 +0200
 | 
			
		||||
Subject: [PATCH 457/465] cluster/ec: Improve detection of new heals
 | 
			
		||||
 | 
			
		||||
When EC successfully healed a directory it assumed that maybe other
 | 
			
		||||
entries inside that directory could have been created, which could
 | 
			
		||||
require additional heal cycles. For this reason, when the heal happened
 | 
			
		||||
as part of one index heal iteration, it triggered a new iteration.
 | 
			
		||||
 | 
			
		||||
The problem happened when the directory was healthy, so no new entries
 | 
			
		||||
were added, but its index entry was not removed for some reason. In
 | 
			
		||||
this case self-heal started and endless loop healing the same directory
 | 
			
		||||
continuously, cause high CPU utilization.
 | 
			
		||||
 | 
			
		||||
This patch improves detection of new files added to the heal index so
 | 
			
		||||
that a new index heal iteration is only triggered if there is new work
 | 
			
		||||
to do.
 | 
			
		||||
 | 
			
		||||
>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24665/
 | 
			
		||||
>Fixes: #1354
 | 
			
		||||
 | 
			
		||||
Change-Id: I2355742b85fbfa6de758bccc5d2e1a283c82b53f
 | 
			
		||||
BUG: 1852736
 | 
			
		||||
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
 | 
			
		||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208041
 | 
			
		||||
Tested-by: Ashish Pandey <aspandey@redhat.com>
 | 
			
		||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
 | 
			
		||||
Reviewed-by: Ashish Pandey <aspandey@redhat.com>
 | 
			
		||||
---
 | 
			
		||||
 xlators/cluster/ec/src/ec-common.c     |  2 +-
 | 
			
		||||
 xlators/cluster/ec/src/ec-heal.c       | 58 +++++++++++++++++++++++-----------
 | 
			
		||||
 xlators/cluster/ec/src/ec-heald.c      | 24 ++++++++++----
 | 
			
		||||
 xlators/cluster/ec/src/ec-inode-read.c | 27 ++++++++++++++--
 | 
			
		||||
 xlators/cluster/ec/src/ec-types.h      |  4 +--
 | 
			
		||||
 xlators/cluster/ec/src/ec.h            |  1 +
 | 
			
		||||
 6 files changed, 86 insertions(+), 30 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
 | 
			
		||||
index e580bfb..e3f8769 100644
 | 
			
		||||
--- a/xlators/cluster/ec/src/ec-common.c
 | 
			
		||||
+++ b/xlators/cluster/ec/src/ec-common.c
 | 
			
		||||
@@ -230,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
 | 
			
		||||
 int32_t
 | 
			
		||||
 ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
 | 
			
		||||
                int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
 | 
			
		||||
-               uintptr_t bad, dict_t *xdata)
 | 
			
		||||
+               uintptr_t bad, uint32_t pending, dict_t *xdata)
 | 
			
		||||
 {
 | 
			
		||||
     if (op_ret < 0) {
 | 
			
		||||
         gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
 | 
			
		||||
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
 | 
			
		||||
index 06a7016..e2de879 100644
 | 
			
		||||
--- a/xlators/cluster/ec/src/ec-heal.c
 | 
			
		||||
+++ b/xlators/cluster/ec/src/ec-heal.c
 | 
			
		||||
@@ -72,6 +72,7 @@ struct ec_name_data {
 | 
			
		||||
     char *name;
 | 
			
		||||
     inode_t *parent;
 | 
			
		||||
     default_args_cbk_t *replies;
 | 
			
		||||
+    uint32_t heal_pending;
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
 static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
 | 
			
		||||
@@ -996,6 +997,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
 | 
			
		||||
         ret = -ENOTCONN;
 | 
			
		||||
         goto out;
 | 
			
		||||
     }
 | 
			
		||||
+
 | 
			
		||||
 out:
 | 
			
		||||
     if (xattr)
 | 
			
		||||
         dict_unref(xattr);
 | 
			
		||||
@@ -1164,6 +1166,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
 | 
			
		||||
     dict_t *xdata = NULL;
 | 
			
		||||
     char *linkname = NULL;
 | 
			
		||||
     ec_config_t config;
 | 
			
		||||
+
 | 
			
		||||
     /* There should be just one gfid key */
 | 
			
		||||
     EC_REPLIES_ALLOC(replies, ec->nodes);
 | 
			
		||||
     if (gfid_db->count != 1) {
 | 
			
		||||
@@ -1408,6 +1411,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
 | 
			
		||||
 
 | 
			
		||||
     ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
 | 
			
		||||
                          participants);
 | 
			
		||||
+    if (ret >= 0) {
 | 
			
		||||
+        /* If ec_create_name() succeeded we return 1 to indicate that a new
 | 
			
		||||
+         * file has been created and it will need to be healed. */
 | 
			
		||||
+        ret = 1;
 | 
			
		||||
+    }
 | 
			
		||||
 out:
 | 
			
		||||
     cluster_replies_wipe(replies, ec->nodes);
 | 
			
		||||
     loc_wipe(&loc);
 | 
			
		||||
@@ -1485,18 +1493,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
 | 
			
		||||
     ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
 | 
			
		||||
                        name_on);
 | 
			
		||||
 
 | 
			
		||||
-    if (ret < 0)
 | 
			
		||||
+    if (ret < 0) {
 | 
			
		||||
         memset(name_on, 0, ec->nodes);
 | 
			
		||||
+    } else {
 | 
			
		||||
+        name_data->heal_pending += ret;
 | 
			
		||||
+    }
 | 
			
		||||
 
 | 
			
		||||
     for (i = 0; i < ec->nodes; i++)
 | 
			
		||||
         if (name_data->participants[i] && !name_on[i])
 | 
			
		||||
             name_data->failed_on[i] = 1;
 | 
			
		||||
+
 | 
			
		||||
     return 0;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 int
 | 
			
		||||
 ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
 | 
			
		||||
-              unsigned char *participants)
 | 
			
		||||
+              unsigned char *participants, uint32_t *pending)
 | 
			
		||||
 {
 | 
			
		||||
     int i = 0;
 | 
			
		||||
     int j = 0;
 | 
			
		||||
@@ -1509,7 +1521,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
 | 
			
		||||
     name_data.frame = frame;
 | 
			
		||||
     name_data.participants = participants;
 | 
			
		||||
     name_data.failed_on = alloca0(ec->nodes);
 | 
			
		||||
-    ;
 | 
			
		||||
+    name_data.heal_pending = 0;
 | 
			
		||||
 
 | 
			
		||||
     for (i = 0; i < ec->nodes; i++) {
 | 
			
		||||
         if (!participants[i])
 | 
			
		||||
@@ -1528,6 +1540,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
 | 
			
		||||
             break;
 | 
			
		||||
         }
 | 
			
		||||
     }
 | 
			
		||||
+    *pending += name_data.heal_pending;
 | 
			
		||||
+
 | 
			
		||||
     loc_wipe(&loc);
 | 
			
		||||
     return ret;
 | 
			
		||||
 }
 | 
			
		||||
@@ -1535,7 +1549,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
 | 
			
		||||
 int
 | 
			
		||||
 __ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
 | 
			
		||||
                 unsigned char *heal_on, unsigned char *sources,
 | 
			
		||||
-                unsigned char *healed_sinks)
 | 
			
		||||
+                unsigned char *healed_sinks, uint32_t *pending)
 | 
			
		||||
 {
 | 
			
		||||
     unsigned char *locked_on = NULL;
 | 
			
		||||
     unsigned char *output = NULL;
 | 
			
		||||
@@ -1580,7 +1594,7 @@ unlock:
 | 
			
		||||
         if (sources[i] || healed_sinks[i])
 | 
			
		||||
             participants[i] = 1;
 | 
			
		||||
     }
 | 
			
		||||
-    ret = ec_heal_names(frame, ec, inode, participants);
 | 
			
		||||
+    ret = ec_heal_names(frame, ec, inode, participants, pending);
 | 
			
		||||
 
 | 
			
		||||
     if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
 | 
			
		||||
         goto out;
 | 
			
		||||
@@ -1601,7 +1615,8 @@ out:
 | 
			
		||||
 
 | 
			
		||||
 int
 | 
			
		||||
 ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
 | 
			
		||||
-              unsigned char *sources, unsigned char *healed_sinks)
 | 
			
		||||
+              unsigned char *sources, unsigned char *healed_sinks,
 | 
			
		||||
+              uint32_t *pending)
 | 
			
		||||
 {
 | 
			
		||||
     unsigned char *locked_on = NULL;
 | 
			
		||||
     unsigned char *up_subvols = NULL;
 | 
			
		||||
@@ -1632,7 +1647,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
 | 
			
		||||
             goto unlock;
 | 
			
		||||
         }
 | 
			
		||||
         ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
 | 
			
		||||
-                              healed_sinks);
 | 
			
		||||
+                              healed_sinks, pending);
 | 
			
		||||
     }
 | 
			
		||||
 unlock:
 | 
			
		||||
     cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
 | 
			
		||||
@@ -1953,14 +1968,14 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
 | 
			
		||||
             if (fop->cbks.heal) {
 | 
			
		||||
                 fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0,
 | 
			
		||||
                                (heal->good | heal->bad), heal->good, heal->bad,
 | 
			
		||||
-                               NULL);
 | 
			
		||||
+                               0, NULL);
 | 
			
		||||
             }
 | 
			
		||||
 
 | 
			
		||||
             return EC_STATE_END;
 | 
			
		||||
         case -EC_STATE_REPORT:
 | 
			
		||||
             if (fop->cbks.heal) {
 | 
			
		||||
-                fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0,
 | 
			
		||||
-                               0, 0, NULL);
 | 
			
		||||
+                fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
 | 
			
		||||
+                               fop->error, 0, 0, 0, 0, NULL);
 | 
			
		||||
             }
 | 
			
		||||
 
 | 
			
		||||
             return EC_STATE_END;
 | 
			
		||||
@@ -1997,14 +2012,15 @@ out:
 | 
			
		||||
     if (fop != NULL) {
 | 
			
		||||
         ec_manager(fop, error);
 | 
			
		||||
     } else {
 | 
			
		||||
-        func(frame, NULL, this, -1, error, 0, 0, 0, NULL);
 | 
			
		||||
+        func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
 | 
			
		||||
     }
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 int32_t
 | 
			
		||||
 ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
 | 
			
		||||
                    int32_t op_ret, int32_t op_errno, uintptr_t mask,
 | 
			
		||||
-                   uintptr_t good, uintptr_t bad, dict_t *xdata)
 | 
			
		||||
+                   uintptr_t good, uintptr_t bad, uint32_t pending,
 | 
			
		||||
+                   dict_t *xdata)
 | 
			
		||||
 {
 | 
			
		||||
     ec_fop_data_t *fop = cookie;
 | 
			
		||||
     ec_heal_t *heal = fop->data;
 | 
			
		||||
@@ -2489,6 +2505,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
 | 
			
		||||
     intptr_t mbad = 0;
 | 
			
		||||
     intptr_t good = 0;
 | 
			
		||||
     intptr_t bad = 0;
 | 
			
		||||
+    uint32_t pending = 0;
 | 
			
		||||
     ec_fop_data_t *fop = data;
 | 
			
		||||
     gf_boolean_t blocking = _gf_false;
 | 
			
		||||
     ec_heal_need_t need_heal = EC_HEAL_NONEED;
 | 
			
		||||
@@ -2524,7 +2541,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
 | 
			
		||||
     if (loc->name && strlen(loc->name)) {
 | 
			
		||||
         ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
 | 
			
		||||
                            participants);
 | 
			
		||||
-        if (ret == 0) {
 | 
			
		||||
+        if (ret >= 0) {
 | 
			
		||||
             gf_msg_debug(this->name, 0,
 | 
			
		||||
                          "%s: name heal "
 | 
			
		||||
                          "successful on %" PRIXPTR,
 | 
			
		||||
@@ -2542,7 +2559,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
 | 
			
		||||
 
 | 
			
		||||
     /* Mount triggers heal only when it detects that it must need heal, shd
 | 
			
		||||
      * triggers heals periodically which need not be thorough*/
 | 
			
		||||
-    if (ec->shd.iamshd) {
 | 
			
		||||
+    if (ec->shd.iamshd && (ret <= 0)) {
 | 
			
		||||
         ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
 | 
			
		||||
                         &need_heal);
 | 
			
		||||
 
 | 
			
		||||
@@ -2552,13 +2569,15 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
 | 
			
		||||
             goto out;
 | 
			
		||||
         }
 | 
			
		||||
     }
 | 
			
		||||
+
 | 
			
		||||
     sources = alloca0(ec->nodes);
 | 
			
		||||
     healed_sinks = alloca0(ec->nodes);
 | 
			
		||||
     if (IA_ISREG(loc->inode->ia_type)) {
 | 
			
		||||
         ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
 | 
			
		||||
                            healed_sinks);
 | 
			
		||||
     } else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
 | 
			
		||||
-        ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks);
 | 
			
		||||
+        ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
 | 
			
		||||
+                            &pending);
 | 
			
		||||
     } else {
 | 
			
		||||
         ret = 0;
 | 
			
		||||
         memcpy(sources, participants, ec->nodes);
 | 
			
		||||
@@ -2588,10 +2607,11 @@ out:
 | 
			
		||||
     if (fop->cbks.heal) {
 | 
			
		||||
         fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
 | 
			
		||||
                        ec_char_array_to_mask(participants, ec->nodes),
 | 
			
		||||
-                       mgood & good, mbad & bad, NULL);
 | 
			
		||||
+                       mgood & good, mbad & bad, pending, NULL);
 | 
			
		||||
     }
 | 
			
		||||
     if (frame)
 | 
			
		||||
         STACK_DESTROY(frame->root);
 | 
			
		||||
+
 | 
			
		||||
     return;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
@@ -2638,8 +2658,8 @@ void
 | 
			
		||||
 ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
 | 
			
		||||
 {
 | 
			
		||||
     if (fop->cbks.heal) {
 | 
			
		||||
-        fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0,
 | 
			
		||||
-                       NULL);
 | 
			
		||||
+        fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
 | 
			
		||||
+                       0, 0, NULL);
 | 
			
		||||
     }
 | 
			
		||||
     ec_fop_data_release(fop);
 | 
			
		||||
 }
 | 
			
		||||
@@ -2826,7 +2846,7 @@ fail:
 | 
			
		||||
     if (fop)
 | 
			
		||||
         ec_fop_data_release(fop);
 | 
			
		||||
     if (func)
 | 
			
		||||
-        func(frame, NULL, this, -1, err, 0, 0, 0, NULL);
 | 
			
		||||
+        func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 int
 | 
			
		||||
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
 | 
			
		||||
index cba111a..4f4b6aa 100644
 | 
			
		||||
--- a/xlators/cluster/ec/src/ec-heald.c
 | 
			
		||||
+++ b/xlators/cluster/ec/src/ec-heald.c
 | 
			
		||||
@@ -156,15 +156,27 @@ int
 | 
			
		||||
 ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
 | 
			
		||||
                 gf_boolean_t full)
 | 
			
		||||
 {
 | 
			
		||||
+    dict_t *xdata = NULL;
 | 
			
		||||
+    uint32_t count;
 | 
			
		||||
     int32_t ret;
 | 
			
		||||
 
 | 
			
		||||
-    ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL);
 | 
			
		||||
-    if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
 | 
			
		||||
+    ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata);
 | 
			
		||||
+    if (!full && (loc->inode->ia_type == IA_IFDIR)) {
 | 
			
		||||
         /* If we have just healed a directory, it's possible that
 | 
			
		||||
-         * other index entries have appeared to be healed. We put a
 | 
			
		||||
-         * mark so that we can check it later and restart a scan
 | 
			
		||||
-         * without delay. */
 | 
			
		||||
-        healer->rerun = _gf_true;
 | 
			
		||||
+         * other index entries have appeared to be healed. */
 | 
			
		||||
+        if ((xdata != NULL) &&
 | 
			
		||||
+            (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
 | 
			
		||||
+            (count > 0)) {
 | 
			
		||||
+            /* Force a rerun of the index healer. */
 | 
			
		||||
+            gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
 | 
			
		||||
+                         count);
 | 
			
		||||
+
 | 
			
		||||
+            healer->rerun = _gf_true;
 | 
			
		||||
+        }
 | 
			
		||||
+    }
 | 
			
		||||
+
 | 
			
		||||
+    if (xdata != NULL) {
 | 
			
		||||
+        dict_unref(xdata);
 | 
			
		||||
     }
 | 
			
		||||
 
 | 
			
		||||
     return ret;
 | 
			
		||||
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
 | 
			
		||||
index f87a94a..e82e8f6 100644
 | 
			
		||||
--- a/xlators/cluster/ec/src/ec-inode-read.c
 | 
			
		||||
+++ b/xlators/cluster/ec/src/ec-inode-read.c
 | 
			
		||||
@@ -393,7 +393,8 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state)
 | 
			
		||||
 int32_t
 | 
			
		||||
 ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
 | 
			
		||||
                      int32_t op_ret, int32_t op_errno, uintptr_t mask,
 | 
			
		||||
-                     uintptr_t good, uintptr_t bad, dict_t *xdata)
 | 
			
		||||
+                     uintptr_t good, uintptr_t bad, uint32_t pending,
 | 
			
		||||
+                     dict_t *xdata)
 | 
			
		||||
 {
 | 
			
		||||
     ec_fop_data_t *fop = cookie;
 | 
			
		||||
     fop_getxattr_cbk_t func = fop->data;
 | 
			
		||||
@@ -402,6 +403,25 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
 | 
			
		||||
     char *str;
 | 
			
		||||
     char bin1[65], bin2[65];
 | 
			
		||||
 
 | 
			
		||||
+    /* We try to return the 'pending' information in xdata, but if this cannot
 | 
			
		||||
+     * be set, we will ignore it silently. We prefer to report the success or
 | 
			
		||||
+     * failure of the heal itself. */
 | 
			
		||||
+    if (xdata == NULL) {
 | 
			
		||||
+        xdata = dict_new();
 | 
			
		||||
+    } else {
 | 
			
		||||
+        dict_ref(xdata);
 | 
			
		||||
+    }
 | 
			
		||||
+    if (xdata != NULL) {
 | 
			
		||||
+        if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) {
 | 
			
		||||
+            /* dict_set_uint32() is marked as 'warn_unused_result' and gcc
 | 
			
		||||
+             * enforces to check the result in this case. However we don't
 | 
			
		||||
+             * really care if it succeeded or not. We'll just do the same.
 | 
			
		||||
+             *
 | 
			
		||||
+             * This empty 'if' avoids the warning, and it will be removed by
 | 
			
		||||
+             * the optimizer. */
 | 
			
		||||
+        }
 | 
			
		||||
+    }
 | 
			
		||||
+
 | 
			
		||||
     if (op_ret >= 0) {
 | 
			
		||||
         dict = dict_new();
 | 
			
		||||
         if (dict == NULL) {
 | 
			
		||||
@@ -435,11 +455,14 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
 | 
			
		||||
     }
 | 
			
		||||
 
 | 
			
		||||
 out:
 | 
			
		||||
-    func(frame, NULL, xl, op_ret, op_errno, dict, NULL);
 | 
			
		||||
+    func(frame, NULL, xl, op_ret, op_errno, dict, xdata);
 | 
			
		||||
 
 | 
			
		||||
     if (dict != NULL) {
 | 
			
		||||
         dict_unref(dict);
 | 
			
		||||
     }
 | 
			
		||||
+    if (xdata != NULL) {
 | 
			
		||||
+        dict_unref(xdata);
 | 
			
		||||
+    }
 | 
			
		||||
 
 | 
			
		||||
     return 0;
 | 
			
		||||
 }
 | 
			
		||||
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
 | 
			
		||||
index 34a9768..f15429d 100644
 | 
			
		||||
--- a/xlators/cluster/ec/src/ec-types.h
 | 
			
		||||
+++ b/xlators/cluster/ec/src/ec-types.h
 | 
			
		||||
@@ -186,10 +186,10 @@ struct _ec_inode {
 | 
			
		||||
 
 | 
			
		||||
 typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
 | 
			
		||||
                                   int32_t, uintptr_t, uintptr_t, uintptr_t,
 | 
			
		||||
-                                  dict_t *);
 | 
			
		||||
+                                  uint32_t, dict_t *);
 | 
			
		||||
 typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
 | 
			
		||||
                                    int32_t, uintptr_t, uintptr_t, uintptr_t,
 | 
			
		||||
-                                   dict_t *);
 | 
			
		||||
+                                   uint32_t, dict_t *);
 | 
			
		||||
 
 | 
			
		||||
 union _ec_cbk {
 | 
			
		||||
     fop_access_cbk_t access;
 | 
			
		||||
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
 | 
			
		||||
index 1b210d9..6f6de6d 100644
 | 
			
		||||
--- a/xlators/cluster/ec/src/ec.h
 | 
			
		||||
+++ b/xlators/cluster/ec/src/ec.h
 | 
			
		||||
@@ -18,6 +18,7 @@
 | 
			
		||||
 #define EC_XATTR_SIZE EC_XATTR_PREFIX "size"
 | 
			
		||||
 #define EC_XATTR_VERSION EC_XATTR_PREFIX "version"
 | 
			
		||||
 #define EC_XATTR_HEAL EC_XATTR_PREFIX "heal"
 | 
			
		||||
+#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new"
 | 
			
		||||
 #define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty"
 | 
			
		||||
 #define EC_STRIPE_CACHE_MAX_SIZE 10
 | 
			
		||||
 #define EC_VERSION_SIZE 2
 | 
			
		||||
-- 
 | 
			
		||||
1.8.3.1
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										182
									
								
								0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										182
									
								
								0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,182 @@
 | 
			
		||||
From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Raghavendra Bhat <raghavendra@redhat.com>
 | 
			
		||||
Date: Mon, 15 Apr 2019 14:09:34 -0400
 | 
			
		||||
Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after
 | 
			
		||||
 cancelling the signer thread
 | 
			
		||||
 | 
			
		||||
When bit-rot feature is disabled, the signer thread from the bit-rot-stub
 | 
			
		||||
xlator (the thread which performs the setxattr of the signature on to the
 | 
			
		||||
disk) is cancelled. But, if the cancelled signer thread had already held
 | 
			
		||||
the mutex (&priv->lock) which it uses to monitor the queue of files to
 | 
			
		||||
be signed, then the mutex is never released. This creates problems in
 | 
			
		||||
future when the feature is enabled again. Both the new instance of the
 | 
			
		||||
signer thread and the regular thread which enqueues the files to be
 | 
			
		||||
signed will be blocked on this mutex.
 | 
			
		||||
 | 
			
		||||
So, as part of cancelling the signer thread, unlock the mutex associated
 | 
			
		||||
with it as well using pthread_cleanup_push and pthread_cleanup_pop.
 | 
			
		||||
 | 
			
		||||
Upstream patch:
 | 
			
		||||
	> patch: https://review.gluster.org/22572
 | 
			
		||||
	> fixes: #bz1700078
 | 
			
		||||
	> Change-Id: Ib761910caed90b268e69794ddeb108165487af40
 | 
			
		||||
 | 
			
		||||
Change-Id: Ib761910caed90b268e69794ddeb108165487af40
 | 
			
		||||
BUG: 1851424
 | 
			
		||||
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
 | 
			
		||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208304
 | 
			
		||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
 | 
			
		||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
 | 
			
		||||
---
 | 
			
		||||
 .../bit-rot/src/stub/bit-rot-stub-messages.h       |  4 +-
 | 
			
		||||
 xlators/features/bit-rot/src/stub/bit-rot-stub.c   | 62 +++++++++++++++++++---
 | 
			
		||||
 2 files changed, 59 insertions(+), 7 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
 | 
			
		||||
index 7f07f29..155802b 100644
 | 
			
		||||
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
 | 
			
		||||
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
 | 
			
		||||
@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
 | 
			
		||||
            BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
 | 
			
		||||
            BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
 | 
			
		||||
            BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
 | 
			
		||||
-           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
 | 
			
		||||
+           BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
 | 
			
		||||
+           BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
 | 
			
		||||
+           BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL);
 | 
			
		||||
 
 | 
			
		||||
 #endif /* !_BITROT_STUB_MESSAGES_H_ */
 | 
			
		||||
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
 | 
			
		||||
index 3f48a4b..c3f81bc 100644
 | 
			
		||||
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
 | 
			
		||||
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
 | 
			
		||||
@@ -26,6 +26,15 @@
 | 
			
		||||
 
 | 
			
		||||
 #define BR_STUB_REQUEST_COOKIE 0x1
 | 
			
		||||
 
 | 
			
		||||
+void
 | 
			
		||||
+br_stub_lock_cleaner(void *arg)
 | 
			
		||||
+{
 | 
			
		||||
+    pthread_mutex_t *clean_mutex = arg;
 | 
			
		||||
+
 | 
			
		||||
+    pthread_mutex_unlock(clean_mutex);
 | 
			
		||||
+    return;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 void *
 | 
			
		||||
 br_stub_signth(void *);
 | 
			
		||||
 
 | 
			
		||||
@@ -166,8 +175,11 @@ init(xlator_t *this)
 | 
			
		||||
 
 | 
			
		||||
     ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
 | 
			
		||||
                            "brssign");
 | 
			
		||||
-    if (ret != 0)
 | 
			
		||||
+    if (ret != 0) {
 | 
			
		||||
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
 | 
			
		||||
+               "failed to create the new thread for signer");
 | 
			
		||||
         goto cleanup_lock;
 | 
			
		||||
+    }
 | 
			
		||||
 
 | 
			
		||||
     ret = br_stub_bad_object_container_init(this, priv);
 | 
			
		||||
     if (ret) {
 | 
			
		||||
@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options)
 | 
			
		||||
     priv = this->private;
 | 
			
		||||
 
 | 
			
		||||
     GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
 | 
			
		||||
-    if (priv->do_versioning) {
 | 
			
		||||
+    if (priv->do_versioning && !priv->signth) {
 | 
			
		||||
         ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
 | 
			
		||||
                                "brssign");
 | 
			
		||||
-        if (ret != 0)
 | 
			
		||||
+        if (ret != 0) {
 | 
			
		||||
+            gf_msg(this->name, GF_LOG_WARNING, 0,
 | 
			
		||||
+                   BRS_MSG_SPAWN_SIGN_THRD_FAILED,
 | 
			
		||||
+                   "failed to create the new thread for signer");
 | 
			
		||||
             goto err;
 | 
			
		||||
+        }
 | 
			
		||||
 
 | 
			
		||||
         ret = br_stub_bad_object_container_init(this, priv);
 | 
			
		||||
         if (ret) {
 | 
			
		||||
@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options)
 | 
			
		||||
                 gf_msg(this->name, GF_LOG_ERROR, 0,
 | 
			
		||||
                        BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
 | 
			
		||||
                        "Could not cancel sign serializer thread");
 | 
			
		||||
+            } else {
 | 
			
		||||
+                gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
 | 
			
		||||
+                       "killed the signer thread");
 | 
			
		||||
+                priv->signth = 0;
 | 
			
		||||
             }
 | 
			
		||||
-            priv->signth = 0;
 | 
			
		||||
         }
 | 
			
		||||
 
 | 
			
		||||
         if (priv->container.thread) {
 | 
			
		||||
@@ -902,6 +921,24 @@ br_stub_signth(void *arg)
 | 
			
		||||
 
 | 
			
		||||
     THIS = this;
 | 
			
		||||
     while (1) {
 | 
			
		||||
+        /*
 | 
			
		||||
+         * Disabling bit-rot feature leads to this particular thread
 | 
			
		||||
+         * getting cleaned up by reconfigure via a call to the function
 | 
			
		||||
+         * gf_thread_cleanup_xint (which in turn calls pthread_cancel
 | 
			
		||||
+         * and pthread_join). But, if this thread had held the mutex
 | 
			
		||||
+         * &priv->lock at the time of cancellation, then it leads to
 | 
			
		||||
+         * deadlock in future when bit-rot feature is enabled (which
 | 
			
		||||
+         * again spawns this thread which cant hold the lock as the
 | 
			
		||||
+         * mutex is still held by the previous instance of the thread
 | 
			
		||||
+         * which got killed). Also, the br_stub_handle_object_signature
 | 
			
		||||
+         * function which is called whenever file has to be signed
 | 
			
		||||
+         * also gets blocked as it too attempts to acquire &priv->lock.
 | 
			
		||||
+         *
 | 
			
		||||
+         * So, arrange for the lock to be unlocked as part of the
 | 
			
		||||
+         * cleanup of this thread using pthread_cleanup_push and
 | 
			
		||||
+         * pthread_cleanup_pop.
 | 
			
		||||
+         */
 | 
			
		||||
+        pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
 | 
			
		||||
         pthread_mutex_lock(&priv->lock);
 | 
			
		||||
         {
 | 
			
		||||
             while (list_empty(&priv->squeue))
 | 
			
		||||
@@ -912,6 +949,7 @@ br_stub_signth(void *arg)
 | 
			
		||||
             list_del_init(&sigstub->list);
 | 
			
		||||
         }
 | 
			
		||||
         pthread_mutex_unlock(&priv->lock);
 | 
			
		||||
+        pthread_cleanup_pop(0);
 | 
			
		||||
 
 | 
			
		||||
         call_resume(sigstub->stub);
 | 
			
		||||
 
 | 
			
		||||
@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
 | 
			
		||||
 
 | 
			
		||||
     priv = this->private;
 | 
			
		||||
 
 | 
			
		||||
-    if (frame->root->pid != GF_CLIENT_PID_BITD)
 | 
			
		||||
+    if (frame->root->pid != GF_CLIENT_PID_BITD) {
 | 
			
		||||
+        gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
 | 
			
		||||
+               "PID %d from where signature request"
 | 
			
		||||
+               "came, does not belong to bit-rot daemon."
 | 
			
		||||
+               "Unwinding the fop",
 | 
			
		||||
+               frame->root->pid);
 | 
			
		||||
         goto dofop;
 | 
			
		||||
+    }
 | 
			
		||||
 
 | 
			
		||||
     ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
 | 
			
		||||
-    if (ret)
 | 
			
		||||
+    if (ret) {
 | 
			
		||||
+        gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
 | 
			
		||||
+               "failed to prepare the signature for %s. Unwinding the fop",
 | 
			
		||||
+               uuid_utoa(fd->inode->gfid));
 | 
			
		||||
         goto dofop;
 | 
			
		||||
+    }
 | 
			
		||||
     if (fakesuccess) {
 | 
			
		||||
         op_ret = op_errno = 0;
 | 
			
		||||
         goto dofop;
 | 
			
		||||
@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
 | 
			
		||||
     /* object signature request */
 | 
			
		||||
     ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign);
 | 
			
		||||
     if (!ret) {
 | 
			
		||||
+        gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
 | 
			
		||||
+                     uuid_utoa(fd->inode->gfid));
 | 
			
		||||
         br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
 | 
			
		||||
         goto done;
 | 
			
		||||
     }
 | 
			
		||||
-- 
 | 
			
		||||
1.8.3.1
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										181
									
								
								0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										181
									
								
								0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,181 @@
 | 
			
		||||
From 80eef2f52bb92ed740ac00eeb11ee7a3e7fffff2 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Raghavendra Bhat <raghavendra@redhat.com>
 | 
			
		||||
Date: Mon, 11 Mar 2019 12:16:50 -0400
 | 
			
		||||
Subject: [PATCH 459/465] features/bit-rot: Unconditionally sign the files
 | 
			
		||||
 during oneshot crawl
 | 
			
		||||
 | 
			
		||||
Currently bit-rot feature has an issue with disabling and reenabling it
 | 
			
		||||
on the same volume. Consider enabling bit-rot detection which goes on to
 | 
			
		||||
crawl and sign all the files present in the volume. Then some files are
 | 
			
		||||
modified and the bit-rot daemon goes on to sign the modified files with
 | 
			
		||||
the correct signature. Now, disable bit-rot feature. While, signing and
 | 
			
		||||
scrubbing are not happening, previous checksums of the files continue to
 | 
			
		||||
exist as extended attributes. Now, if some files with checksum xattrs get
 | 
			
		||||
modified, they are not signed with new signature as the feature is off.
 | 
			
		||||
 | 
			
		||||
At this point, if the feature is enabled again, the bit rot daemon will
 | 
			
		||||
go and sign those files which does not have any bit-rot specific xattrs
 | 
			
		||||
(i.e. those files which were created after bit-rot was disabled). Whereas
 | 
			
		||||
the files with bit-rot xattrs wont get signed with proper new checksum.
 | 
			
		||||
At this point if scrubber runs, it finds the on disk checksum and the actual
 | 
			
		||||
checksum of the file to be different (because the file got modified) and
 | 
			
		||||
marks the file as corrupted.
 | 
			
		||||
 | 
			
		||||
FIX:
 | 
			
		||||
 | 
			
		||||
The fix is to unconditionally sign the files when the bit-rot daemon
 | 
			
		||||
comes up (instead of skipping the files with bit-rot xattrs).
 | 
			
		||||
 | 
			
		||||
upstream fix:
 | 
			
		||||
	> patch: https://review.gluster.org/#/c/glusterfs/+/22360/
 | 
			
		||||
	> fixes: #bz1700078
 | 
			
		||||
	> Change-ID: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
 | 
			
		||||
 | 
			
		||||
Change-Id: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
 | 
			
		||||
BUG: 1851424
 | 
			
		||||
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
 | 
			
		||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208305
 | 
			
		||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
 | 
			
		||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
 | 
			
		||||
---
 | 
			
		||||
 tests/bitrot/bug-1700078.t                  | 87 +++++++++++++++++++++++++++++
 | 
			
		||||
 xlators/features/bit-rot/src/bitd/bit-rot.c | 15 ++++-
 | 
			
		||||
 2 files changed, 101 insertions(+), 1 deletion(-)
 | 
			
		||||
 create mode 100644 tests/bitrot/bug-1700078.t
 | 
			
		||||
 | 
			
		||||
diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t
 | 
			
		||||
new file mode 100644
 | 
			
		||||
index 0000000..f273742
 | 
			
		||||
--- /dev/null
 | 
			
		||||
+++ b/tests/bitrot/bug-1700078.t
 | 
			
		||||
@@ -0,0 +1,87 @@
 | 
			
		||||
+#!/bin/bash
 | 
			
		||||
+
 | 
			
		||||
+. $(dirname $0)/../include.rc
 | 
			
		||||
+. $(dirname $0)/../volume.rc
 | 
			
		||||
+
 | 
			
		||||
+cleanup;
 | 
			
		||||
+
 | 
			
		||||
+## Start glusterd
 | 
			
		||||
+TEST glusterd;
 | 
			
		||||
+TEST pidof glusterd;
 | 
			
		||||
+
 | 
			
		||||
+## Lets create and start the volume
 | 
			
		||||
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
 | 
			
		||||
+TEST $CLI volume start $V0
 | 
			
		||||
+
 | 
			
		||||
+## Enable bitrot for volume $V0
 | 
			
		||||
+TEST $CLI volume bitrot $V0 enable
 | 
			
		||||
+
 | 
			
		||||
+## Turn off quick-read so that it wont cache the contents
 | 
			
		||||
+# of the file in lookup. For corrupted files, it might
 | 
			
		||||
+# end up in reads being served from the cache instead of
 | 
			
		||||
+# an error.
 | 
			
		||||
+TEST $CLI volume set $V0 performance.quick-read off
 | 
			
		||||
+
 | 
			
		||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
 | 
			
		||||
+
 | 
			
		||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
 | 
			
		||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
 | 
			
		||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
 | 
			
		||||
+
 | 
			
		||||
+## Set expiry-timeout to 1 sec
 | 
			
		||||
+TEST $CLI volume set $V0 features.expiry-time 1
 | 
			
		||||
+
 | 
			
		||||
+##Mount $V0
 | 
			
		||||
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
 | 
			
		||||
+
 | 
			
		||||
+## Turn off quick-read xlator so that, the contents are not served from the
 | 
			
		||||
+# quick-read cache.
 | 
			
		||||
+TEST $CLI volume set $V0 performance.quick-read off
 | 
			
		||||
+
 | 
			
		||||
+#Create sample file
 | 
			
		||||
+TEST `echo "1234" > $M0/FILE1`
 | 
			
		||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
 | 
			
		||||
+
 | 
			
		||||
+##disable bitrot
 | 
			
		||||
+TEST $CLI volume bitrot $V0 disable
 | 
			
		||||
+
 | 
			
		||||
+## modify the file
 | 
			
		||||
+TEST `echo "write" >> $M0/FILE1`
 | 
			
		||||
+
 | 
			
		||||
+# unmount and remount when the file has to be accessed.
 | 
			
		||||
+# This is to ensure that, when the remount happens,
 | 
			
		||||
+# and the file is read, its contents are served from the
 | 
			
		||||
+# brick instead of cache.
 | 
			
		||||
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
 | 
			
		||||
+
 | 
			
		||||
+##enable bitrot
 | 
			
		||||
+TEST $CLI volume bitrot $V0 enable
 | 
			
		||||
+
 | 
			
		||||
+# expiry time is set to 1 second. Hence sleep for 2 seconds for the
 | 
			
		||||
+# oneshot crawler to finish its crawling and sign the file properly.
 | 
			
		||||
+sleep 2
 | 
			
		||||
+
 | 
			
		||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
 | 
			
		||||
+
 | 
			
		||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
 | 
			
		||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
 | 
			
		||||
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
 | 
			
		||||
+
 | 
			
		||||
+## Ondemand scrub
 | 
			
		||||
+TEST $CLI volume bitrot $V0 scrub ondemand
 | 
			
		||||
+
 | 
			
		||||
+# the scrub ondemand CLI command, just ensures that
 | 
			
		||||
+# the scrubber has received the ondemand scrub directive
 | 
			
		||||
+# and started. sleep for 2 seconds for scrubber to finish
 | 
			
		||||
+# crawling and marking file(s) as bad (if if finds that
 | 
			
		||||
+# corruption has happened) which are filesystem operations.
 | 
			
		||||
+sleep 2
 | 
			
		||||
+
 | 
			
		||||
+TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1
 | 
			
		||||
+
 | 
			
		||||
+##Mount $V0
 | 
			
		||||
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
 | 
			
		||||
+
 | 
			
		||||
+TEST cat $M0/FILE1
 | 
			
		||||
+
 | 
			
		||||
+cleanup;
 | 
			
		||||
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
 | 
			
		||||
index b8feef7..424c0d5 100644
 | 
			
		||||
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
 | 
			
		||||
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
 | 
			
		||||
@@ -973,6 +973,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
 | 
			
		||||
     int32_t ret = -1;
 | 
			
		||||
     inode_t *linked_inode = NULL;
 | 
			
		||||
     gf_boolean_t need_signing = _gf_false;
 | 
			
		||||
+    gf_boolean_t need_reopen = _gf_true;
 | 
			
		||||
 
 | 
			
		||||
     GF_VALIDATE_OR_GOTO("bit-rot", subvol, out);
 | 
			
		||||
     GF_VALIDATE_OR_GOTO("bit-rot", data, out);
 | 
			
		||||
@@ -1046,6 +1047,18 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
 | 
			
		||||
                    uuid_utoa(linked_inode->gfid));
 | 
			
		||||
     } else {
 | 
			
		||||
         need_signing = br_check_object_need_sign(this, xattr, child);
 | 
			
		||||
+
 | 
			
		||||
+        /*
 | 
			
		||||
+         * If we are here means, bitrot daemon has started. Is it just
 | 
			
		||||
+         * a simple restart of the daemon or is it started because the
 | 
			
		||||
+         * feature is enabled is something hard to determine. Hence,
 | 
			
		||||
+         * if need_signing is false (because bit-rot version and signature
 | 
			
		||||
+         * are present), then still go ahead and sign it.
 | 
			
		||||
+         */
 | 
			
		||||
+        if (!need_signing) {
 | 
			
		||||
+            need_signing = _gf_true;
 | 
			
		||||
+            need_reopen = _gf_true;
 | 
			
		||||
+        }
 | 
			
		||||
     }
 | 
			
		||||
 
 | 
			
		||||
     if (!need_signing)
 | 
			
		||||
@@ -1054,7 +1067,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
 | 
			
		||||
     gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN,
 | 
			
		||||
            "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path,
 | 
			
		||||
            uuid_utoa(linked_inode->gfid), child->brick_path);
 | 
			
		||||
-    br_trigger_sign(this, child, linked_inode, &loc, _gf_true);
 | 
			
		||||
+    br_trigger_sign(this, child, linked_inode, &loc, need_reopen);
 | 
			
		||||
 
 | 
			
		||||
     ret = 0;
 | 
			
		||||
 
 | 
			
		||||
-- 
 | 
			
		||||
1.8.3.1
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										152
									
								
								0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										152
									
								
								0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,152 @@
 | 
			
		||||
From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Ashish Pandey <aspandey@redhat.com>
 | 
			
		||||
Date: Thu, 23 Jul 2020 11:07:32 +0530
 | 
			
		||||
Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop
 | 
			
		||||
 folder
 | 
			
		||||
 | 
			
		||||
Problem:
 | 
			
		||||
If a gfid is present in indices/xattrop folder while
 | 
			
		||||
the file/dir is actaully healthy and all the xattrs are healthy,
 | 
			
		||||
it causes lot of lookups by shd on an entry which does not need
 | 
			
		||||
to be healed.
 | 
			
		||||
This whole process eats up lot of CPU usage without doing meaningful
 | 
			
		||||
work.
 | 
			
		||||
 | 
			
		||||
Solution:
 | 
			
		||||
Set trusted.ec.dirty xattr of the entry so that actual heal process
 | 
			
		||||
happens and at the end of it, during unset of dirty, gfid enrty from
 | 
			
		||||
indices/xattrop will be removed.
 | 
			
		||||
 | 
			
		||||
>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/
 | 
			
		||||
>Fixes: #1385
 | 
			
		||||
 | 
			
		||||
Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b
 | 
			
		||||
BUG: 1785714
 | 
			
		||||
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
 | 
			
		||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208591
 | 
			
		||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
 | 
			
		||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
 | 
			
		||||
---
 | 
			
		||||
 xlators/cluster/ec/src/ec-heal.c  | 73 ++++++++++++++++++++++++++++++++++++++-
 | 
			
		||||
 xlators/cluster/ec/src/ec-types.h |  7 +++-
 | 
			
		||||
 2 files changed, 78 insertions(+), 2 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
 | 
			
		||||
index e2de879..7d25853 100644
 | 
			
		||||
--- a/xlators/cluster/ec/src/ec-heal.c
 | 
			
		||||
+++ b/xlators/cluster/ec/src/ec-heal.c
 | 
			
		||||
@@ -2488,6 +2488,59 @@ out:
 | 
			
		||||
     return ret;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+int
 | 
			
		||||
+ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
 | 
			
		||||
+{
 | 
			
		||||
+    int i = 0;
 | 
			
		||||
+    int ret = 0;
 | 
			
		||||
+    dict_t **xattr = NULL;
 | 
			
		||||
+    loc_t loc = {0};
 | 
			
		||||
+    uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
 | 
			
		||||
+    unsigned char *on = NULL;
 | 
			
		||||
+    default_args_cbk_t *replies = NULL;
 | 
			
		||||
+    dict_t *dict = NULL;
 | 
			
		||||
+
 | 
			
		||||
+    /* Allocate the required memory */
 | 
			
		||||
+    loc.inode = inode_ref(inode);
 | 
			
		||||
+    gf_uuid_copy(loc.gfid, inode->gfid);
 | 
			
		||||
+    on = alloca0(ec->nodes);
 | 
			
		||||
+    EC_REPLIES_ALLOC(replies, ec->nodes);
 | 
			
		||||
+    xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
 | 
			
		||||
+    if (!xattr) {
 | 
			
		||||
+        ret = -ENOMEM;
 | 
			
		||||
+        goto out;
 | 
			
		||||
+    }
 | 
			
		||||
+    dict = dict_new();
 | 
			
		||||
+    if (!dict) {
 | 
			
		||||
+        ret = -ENOMEM;
 | 
			
		||||
+        goto out;
 | 
			
		||||
+    }
 | 
			
		||||
+    for (i = 0; i < ec->nodes; i++) {
 | 
			
		||||
+        xattr[i] = dict;
 | 
			
		||||
+        on[i] = 1;
 | 
			
		||||
+    }
 | 
			
		||||
+    dirty_xattr[EC_METADATA_TXN] = hton64(1);
 | 
			
		||||
+    ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
 | 
			
		||||
+                              (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
 | 
			
		||||
+    if (ret < 0) {
 | 
			
		||||
+        ret = -ENOMEM;
 | 
			
		||||
+        goto out;
 | 
			
		||||
+    }
 | 
			
		||||
+    PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
 | 
			
		||||
+                        ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
 | 
			
		||||
+                        xattr, NULL);
 | 
			
		||||
+out:
 | 
			
		||||
+    if (dict) {
 | 
			
		||||
+        dict_unref(dict);
 | 
			
		||||
+    }
 | 
			
		||||
+    if (xattr) {
 | 
			
		||||
+        GF_FREE(xattr);
 | 
			
		||||
+    }
 | 
			
		||||
+    cluster_replies_wipe(replies, ec->nodes);
 | 
			
		||||
+    loc_wipe(&loc);
 | 
			
		||||
+    return ret;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 void
 | 
			
		||||
 ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
 | 
			
		||||
 {
 | 
			
		||||
@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
 | 
			
		||||
         ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
 | 
			
		||||
                         &need_heal);
 | 
			
		||||
 
 | 
			
		||||
-        if (need_heal == EC_HEAL_NONEED) {
 | 
			
		||||
+        if (need_heal == EC_HEAL_PURGE_INDEX) {
 | 
			
		||||
+            gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
 | 
			
		||||
+                   "Index entry needs to be purged for: %s ",
 | 
			
		||||
+                   uuid_utoa(loc->gfid));
 | 
			
		||||
+            /* We need to send xattrop to set dirty flag so that it can be
 | 
			
		||||
+             * healed and index entry could be removed. We need not to take lock
 | 
			
		||||
+             * on this entry to do so as we are just setting dirty flag which
 | 
			
		||||
+             * actually increases the trusted.ec.dirty count and does not set
 | 
			
		||||
+             * the new value.
 | 
			
		||||
+             * This will make sure that it is not interfering in other fops.*/
 | 
			
		||||
+            ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
 | 
			
		||||
+        } else if (need_heal == EC_HEAL_NONEED) {
 | 
			
		||||
             gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
 | 
			
		||||
                    "Heal is not required for : %s ", uuid_utoa(loc->gfid));
 | 
			
		||||
             goto out;
 | 
			
		||||
@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
 | 
			
		||||
                     goto out;
 | 
			
		||||
                 }
 | 
			
		||||
             }
 | 
			
		||||
+            /* If lock count is 0, all dirty flags are 0 and all the
 | 
			
		||||
+             * versions are macthing then why are we here. It looks
 | 
			
		||||
+             * like something went wrong while removing the index entries
 | 
			
		||||
+             * after completing a successful heal or fop. In this case
 | 
			
		||||
+             * we need to remove this index entry to avoid triggering heal
 | 
			
		||||
+             * in a loop and causing lookups again and again*/
 | 
			
		||||
+            *need_heal = EC_HEAL_PURGE_INDEX;
 | 
			
		||||
         } else {
 | 
			
		||||
             for (i = 0; i < ec->nodes; i++) {
 | 
			
		||||
                 /* Since each lock can only increment the dirty
 | 
			
		||||
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
 | 
			
		||||
index f15429d..700dc39 100644
 | 
			
		||||
--- a/xlators/cluster/ec/src/ec-types.h
 | 
			
		||||
+++ b/xlators/cluster/ec/src/ec-types.h
 | 
			
		||||
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
 | 
			
		||||
 
 | 
			
		||||
 enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
 | 
			
		||||
 
 | 
			
		||||
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
 | 
			
		||||
+enum _ec_heal_need {
 | 
			
		||||
+    EC_HEAL_NONEED,
 | 
			
		||||
+    EC_HEAL_MAYBE,
 | 
			
		||||
+    EC_HEAL_MUST,
 | 
			
		||||
+    EC_HEAL_PURGE_INDEX
 | 
			
		||||
+};
 | 
			
		||||
 
 | 
			
		||||
 enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
 | 
			
		||||
 
 | 
			
		||||
-- 
 | 
			
		||||
1.8.3.1
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										127
									
								
								0461-geo-replication-Fix-IPv6-parsing.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								0461-geo-replication-Fix-IPv6-parsing.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,127 @@
 | 
			
		||||
From d425ed54261d5bc19aa853854cc3b64647e3c897 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Aravinda Vishwanathapura <aravinda@kadalu.io>
 | 
			
		||||
Date: Sun, 12 Jul 2020 12:42:36 +0530
 | 
			
		||||
Subject: [PATCH 461/465] geo-replication: Fix IPv6 parsing
 | 
			
		||||
 | 
			
		||||
Brick paths in Volinfo used `:` as delimiter, Geo-rep uses split
 | 
			
		||||
based on `:` char. This will go wrong with IPv6.
 | 
			
		||||
 | 
			
		||||
This patch handles the IPv6 case and handles the split properly.
 | 
			
		||||
Backport of:
 | 
			
		||||
   >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24706
 | 
			
		||||
   >Fixes: #1366
 | 
			
		||||
   >Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d
 | 
			
		||||
   >Signed-off-by: Aravinda Vishwanathapura <aravinda@kadalu.io>
 | 
			
		||||
 | 
			
		||||
BUG: 1855966
 | 
			
		||||
Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d
 | 
			
		||||
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
 | 
			
		||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208610
 | 
			
		||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
 | 
			
		||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
 | 
			
		||||
---
 | 
			
		||||
 geo-replication/syncdaemon/master.py     |  5 ++--
 | 
			
		||||
 geo-replication/syncdaemon/syncdutils.py | 43 +++++++++++++++++++++++++++++---
 | 
			
		||||
 2 files changed, 43 insertions(+), 5 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
 | 
			
		||||
index 3f98337..08e98f8 100644
 | 
			
		||||
--- a/geo-replication/syncdaemon/master.py
 | 
			
		||||
+++ b/geo-replication/syncdaemon/master.py
 | 
			
		||||
@@ -26,7 +26,8 @@ from rconf import rconf
 | 
			
		||||
 from syncdutils import Thread, GsyncdError, escape_space_newline
 | 
			
		||||
 from syncdutils import unescape_space_newline, gauxpfx, escape
 | 
			
		||||
 from syncdutils import lstat, errno_wrap, FreeObject, lf, matching_disk_gfid
 | 
			
		||||
-from syncdutils import NoStimeAvailable, PartialHistoryAvailable
 | 
			
		||||
+from syncdutils import NoStimeAvailable, PartialHistoryAvailable, host_brick_split
 | 
			
		||||
+
 | 
			
		||||
 
 | 
			
		||||
 URXTIME = (-1, 0)
 | 
			
		||||
 
 | 
			
		||||
@@ -1466,7 +1467,7 @@ class GMasterChangelogMixin(GMasterCommon):
 | 
			
		||||
         node = rconf.args.resource_remote
 | 
			
		||||
         node_data = node.split("@")
 | 
			
		||||
         node = node_data[-1]
 | 
			
		||||
-        remote_node_ip = node.split(":")[0]
 | 
			
		||||
+        remote_node_ip, _ = host_brick_split(node)
 | 
			
		||||
         self.status.set_slave_node(remote_node_ip)
 | 
			
		||||
 
 | 
			
		||||
     def changelogs_batch_process(self, changes):
 | 
			
		||||
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
 | 
			
		||||
index 7560fa1..f43e13b 100644
 | 
			
		||||
--- a/geo-replication/syncdaemon/syncdutils.py
 | 
			
		||||
+++ b/geo-replication/syncdaemon/syncdutils.py
 | 
			
		||||
@@ -883,6 +883,19 @@ class Popen(subprocess.Popen):
 | 
			
		||||
             self.errfail()
 | 
			
		||||
 
 | 
			
		||||
 
 | 
			
		||||
+def host_brick_split(value):
 | 
			
		||||
+    """
 | 
			
		||||
+    IPv6 compatible way to split and get the host
 | 
			
		||||
+    and brick information. Example inputs:
 | 
			
		||||
+    node1.example.com:/exports/bricks/brick1/brick
 | 
			
		||||
+    fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick
 | 
			
		||||
+    """
 | 
			
		||||
+    parts = value.split(":")
 | 
			
		||||
+    brick = parts[-1]
 | 
			
		||||
+    hostparts = parts[0:-1]
 | 
			
		||||
+    return (":".join(hostparts), brick)
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
 class Volinfo(object):
 | 
			
		||||
 
 | 
			
		||||
     def __init__(self, vol, host='localhost', prelude=[], master=True):
 | 
			
		||||
@@ -925,7 +938,7 @@ class Volinfo(object):
 | 
			
		||||
     @memoize
 | 
			
		||||
     def bricks(self):
 | 
			
		||||
         def bparse(b):
 | 
			
		||||
-            host, dirp = b.find("name").text.split(':', 2)
 | 
			
		||||
+            host, dirp = host_brick_split(b.find("name").text)
 | 
			
		||||
             return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text}
 | 
			
		||||
         return [bparse(b) for b in self.get('brick')]
 | 
			
		||||
 
 | 
			
		||||
@@ -1001,6 +1014,16 @@ class VolinfoFromGconf(object):
 | 
			
		||||
     def is_hot(self, brickpath):
 | 
			
		||||
         return False
 | 
			
		||||
 
 | 
			
		||||
+    def is_uuid(self, value):
 | 
			
		||||
+        try:
 | 
			
		||||
+            uuid.UUID(value)
 | 
			
		||||
+            return True
 | 
			
		||||
+        except ValueError:
 | 
			
		||||
+            return False
 | 
			
		||||
+
 | 
			
		||||
+    def possible_path(self, value):
 | 
			
		||||
+        return "/" in value
 | 
			
		||||
+
 | 
			
		||||
     @property
 | 
			
		||||
     @memoize
 | 
			
		||||
     def bricks(self):
 | 
			
		||||
@@ -1014,8 +1037,22 @@ class VolinfoFromGconf(object):
 | 
			
		||||
         out = []
 | 
			
		||||
         for b in bricks_data:
 | 
			
		||||
             parts = b.split(":")
 | 
			
		||||
-            bpath = parts[2] if len(parts) == 3 else ""
 | 
			
		||||
-            out.append({"host": parts[1], "dir": bpath, "uuid": parts[0]})
 | 
			
		||||
+            b_uuid = None
 | 
			
		||||
+            if self.is_uuid(parts[0]):
 | 
			
		||||
+                b_uuid = parts[0]
 | 
			
		||||
+                # Set all parts except first
 | 
			
		||||
+                parts = parts[1:]
 | 
			
		||||
+
 | 
			
		||||
+            if self.possible_path(parts[-1]):
 | 
			
		||||
+                bpath = parts[-1]
 | 
			
		||||
+                # Set all parts except last
 | 
			
		||||
+                parts = parts[0:-1]
 | 
			
		||||
+
 | 
			
		||||
+            out.append({
 | 
			
		||||
+                "host": ":".join(parts),   # if remaining parts are IPv6 name
 | 
			
		||||
+                "dir": bpath,
 | 
			
		||||
+                "uuid": b_uuid
 | 
			
		||||
+            })
 | 
			
		||||
 
 | 
			
		||||
         return out
 | 
			
		||||
 
 | 
			
		||||
-- 
 | 
			
		||||
1.8.3.1
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										43
									
								
								0462-Issue-with-gf_fill_iatt_for_dirent.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								0462-Issue-with-gf_fill_iatt_for_dirent.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,43 @@
 | 
			
		||||
From f027734165374979bd0bff8ea059dfaadca85e07 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Soumya Koduri <skoduri@redhat.com>
 | 
			
		||||
Date: Thu, 2 Jul 2020 02:07:56 +0530
 | 
			
		||||
Subject: [PATCH 462/465] Issue with gf_fill_iatt_for_dirent
 | 
			
		||||
 | 
			
		||||
In "gf_fill_iatt_for_dirent()", while calculating inode_path for loc,
 | 
			
		||||
the inode should be of parent's. Instead it is loc.inode which results in error
 | 
			
		||||
 and eventually lookup/readdirp fails.
 | 
			
		||||
 | 
			
		||||
This patch fixes the same.
 | 
			
		||||
 | 
			
		||||
This is backport of below mainstream fix :
 | 
			
		||||
 | 
			
		||||
> Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5
 | 
			
		||||
> Fixes: #1351
 | 
			
		||||
> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24661/
 | 
			
		||||
 | 
			
		||||
Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5
 | 
			
		||||
BUG: 1853189
 | 
			
		||||
Signed-off-by: Soumya Koduri <skoduri@redhat.com>
 | 
			
		||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/208691
 | 
			
		||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
 | 
			
		||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
 | 
			
		||||
---
 | 
			
		||||
 libglusterfs/src/gf-dirent.c | 2 +-
 | 
			
		||||
 1 file changed, 1 insertion(+), 1 deletion(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c
 | 
			
		||||
index f289723..3fa67f2 100644
 | 
			
		||||
--- a/libglusterfs/src/gf-dirent.c
 | 
			
		||||
+++ b/libglusterfs/src/gf-dirent.c
 | 
			
		||||
@@ -277,7 +277,7 @@ gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol)
 | 
			
		||||
     gf_uuid_copy(loc.pargfid, parent->gfid);
 | 
			
		||||
     loc.name = entry->d_name;
 | 
			
		||||
     loc.parent = inode_ref(parent);
 | 
			
		||||
-    ret = inode_path(loc.inode, entry->d_name, &path);
 | 
			
		||||
+    ret = inode_path(loc.parent, entry->d_name, &path);
 | 
			
		||||
     loc.path = path;
 | 
			
		||||
     if (ret < 0)
 | 
			
		||||
         goto out;
 | 
			
		||||
-- 
 | 
			
		||||
1.8.3.1
 | 
			
		||||
 | 
			
		||||
@ -0,0 +1,87 @@
 | 
			
		||||
From 7d87933f648092ae55d57a96fd06e3df975d764c Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Ashish Pandey <aspandey@redhat.com>
 | 
			
		||||
Date: Tue, 18 Aug 2020 10:33:48 +0530
 | 
			
		||||
Subject: [PATCH 463/465] cluster/ec: Change handling of heal failure to avoid
 | 
			
		||||
 crash
 | 
			
		||||
 | 
			
		||||
Problem:
 | 
			
		||||
ec_getxattr_heal_cbk was called with NULL as second argument
 | 
			
		||||
in case heal was failing.
 | 
			
		||||
This function was dereferencing "cookie" argument which caused crash.
 | 
			
		||||
 | 
			
		||||
Solution:
 | 
			
		||||
Cookie is changed to carry the value that was supposed to be
 | 
			
		||||
stored in fop->data, so even in the case when fop is NULL in error
 | 
			
		||||
case, there won't be any NULL dereference.
 | 
			
		||||
 | 
			
		||||
Thanks to Xavi for the suggestion about the fix.
 | 
			
		||||
 | 
			
		||||
>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23050/
 | 
			
		||||
>fixes: bz#1729085
 | 
			
		||||
 | 
			
		||||
Change-Id: I0798000d5cadb17c3c2fbfa1baf77033ffc2bb8c
 | 
			
		||||
BUG: 1852736
 | 
			
		||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/209012
 | 
			
		||||
Tested-by: Ashish Pandey <aspandey@redhat.com>
 | 
			
		||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
 | 
			
		||||
Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
 | 
			
		||||
---
 | 
			
		||||
 xlators/cluster/ec/src/ec-heal.c       | 11 ++++++-----
 | 
			
		||||
 xlators/cluster/ec/src/ec-inode-read.c |  4 ++--
 | 
			
		||||
 2 files changed, 8 insertions(+), 7 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
 | 
			
		||||
index 7d25853..6e6948b 100644
 | 
			
		||||
--- a/xlators/cluster/ec/src/ec-heal.c
 | 
			
		||||
+++ b/xlators/cluster/ec/src/ec-heal.c
 | 
			
		||||
@@ -1966,7 +1966,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
 | 
			
		||||
 
 | 
			
		||||
         case EC_STATE_REPORT:
 | 
			
		||||
             if (fop->cbks.heal) {
 | 
			
		||||
-                fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0,
 | 
			
		||||
+                fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
 | 
			
		||||
                                (heal->good | heal->bad), heal->good, heal->bad,
 | 
			
		||||
                                0, NULL);
 | 
			
		||||
             }
 | 
			
		||||
@@ -2022,10 +2022,11 @@ ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
 | 
			
		||||
                    uintptr_t good, uintptr_t bad, uint32_t pending,
 | 
			
		||||
                    dict_t *xdata)
 | 
			
		||||
 {
 | 
			
		||||
-    ec_fop_data_t *fop = cookie;
 | 
			
		||||
-    ec_heal_t *heal = fop->data;
 | 
			
		||||
+    ec_heal_t *heal = cookie;
 | 
			
		||||
 
 | 
			
		||||
-    fop->heal = NULL;
 | 
			
		||||
+    if (heal->fop) {
 | 
			
		||||
+        heal->fop->heal = NULL;
 | 
			
		||||
+    }
 | 
			
		||||
     heal->fop = NULL;
 | 
			
		||||
     heal->error = op_ret < 0 ? op_errno : 0;
 | 
			
		||||
     syncbarrier_wake(heal->data);
 | 
			
		||||
@@ -2669,7 +2670,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
 | 
			
		||||
 out:
 | 
			
		||||
     ec_reset_entry_healing(fop);
 | 
			
		||||
     if (fop->cbks.heal) {
 | 
			
		||||
-        fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
 | 
			
		||||
+        fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
 | 
			
		||||
                        ec_char_array_to_mask(participants, ec->nodes),
 | 
			
		||||
                        mgood & good, mbad & bad, pending, NULL);
 | 
			
		||||
     }
 | 
			
		||||
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
 | 
			
		||||
index e82e8f6..c50d0ad 100644
 | 
			
		||||
--- a/xlators/cluster/ec/src/ec-inode-read.c
 | 
			
		||||
+++ b/xlators/cluster/ec/src/ec-inode-read.c
 | 
			
		||||
@@ -396,8 +396,8 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
 | 
			
		||||
                      uintptr_t good, uintptr_t bad, uint32_t pending,
 | 
			
		||||
                      dict_t *xdata)
 | 
			
		||||
 {
 | 
			
		||||
-    ec_fop_data_t *fop = cookie;
 | 
			
		||||
-    fop_getxattr_cbk_t func = fop->data;
 | 
			
		||||
+    fop_getxattr_cbk_t func = cookie;
 | 
			
		||||
+
 | 
			
		||||
     ec_t *ec = xl->private;
 | 
			
		||||
     dict_t *dict = NULL;
 | 
			
		||||
     char *str;
 | 
			
		||||
-- 
 | 
			
		||||
1.8.3.1
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										102
									
								
								0464-storage-posix-Remove-nr_files-usage.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								0464-storage-posix-Remove-nr_files-usage.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,102 @@
 | 
			
		||||
From 7c51addf7912a94320e6b148bd66f2dbf274c533 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Pranith Kumar K <pkarampu@redhat.com>
 | 
			
		||||
Date: Mon, 11 Mar 2019 14:04:39 +0530
 | 
			
		||||
Subject: [PATCH 464/465] storage/posix: Remove nr_files usage
 | 
			
		||||
 | 
			
		||||
nr_files is supposed to represent the number of files opened in posix.
 | 
			
		||||
Present logic doesn't seem to handle anon-fds because of which the
 | 
			
		||||
counts would always be wrong.
 | 
			
		||||
 | 
			
		||||
I don't remember anyone using this value in debugging any problem probably
 | 
			
		||||
because we always have 'ls -l /proc/<pid>/fd' which not only prints the
 | 
			
		||||
fds that are active but also prints their paths. It also handles directories
 | 
			
		||||
and anon-fds which actually opened the file. So removing this code
 | 
			
		||||
instead of fixing the buggy logic to have the nr_files.
 | 
			
		||||
 | 
			
		||||
> fixes bz#1688106
 | 
			
		||||
> Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040
 | 
			
		||||
> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 | 
			
		||||
> (Cherry pick from commit f5987d38f216a3142dfe45f03bf66ff4827d9b55)
 | 
			
		||||
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22333/)
 | 
			
		||||
 | 
			
		||||
Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040
 | 
			
		||||
BUG: 1851989
 | 
			
		||||
Signed-off-by: Mohit Agrawal<moagrawa@redhat.com>
 | 
			
		||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/209468
 | 
			
		||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
 | 
			
		||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
 | 
			
		||||
---
 | 
			
		||||
 xlators/storage/posix/src/posix-common.c       | 2 --
 | 
			
		||||
 xlators/storage/posix/src/posix-entry-ops.c    | 2 --
 | 
			
		||||
 xlators/storage/posix/src/posix-inode-fd-ops.c | 2 --
 | 
			
		||||
 xlators/storage/posix/src/posix.h              | 1 -
 | 
			
		||||
 4 files changed, 7 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
 | 
			
		||||
index ac53796..b317627 100644
 | 
			
		||||
--- a/xlators/storage/posix/src/posix-common.c
 | 
			
		||||
+++ b/xlators/storage/posix/src/posix-common.c
 | 
			
		||||
@@ -128,7 +128,6 @@ posix_priv(xlator_t *this)
 | 
			
		||||
     gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value));
 | 
			
		||||
     gf_proc_dump_write("max_write", "%" PRId64,
 | 
			
		||||
                        GF_ATOMIC_GET(priv->write_value));
 | 
			
		||||
-    gf_proc_dump_write("nr_files", "%" PRId64, GF_ATOMIC_GET(priv->nr_files));
 | 
			
		||||
 
 | 
			
		||||
     return 0;
 | 
			
		||||
 }
 | 
			
		||||
@@ -815,7 +814,6 @@ posix_init(xlator_t *this)
 | 
			
		||||
     }
 | 
			
		||||
 
 | 
			
		||||
     LOCK_INIT(&_private->lock);
 | 
			
		||||
-    GF_ATOMIC_INIT(_private->nr_files, 0);
 | 
			
		||||
     GF_ATOMIC_INIT(_private->read_value, 0);
 | 
			
		||||
     GF_ATOMIC_INIT(_private->write_value, 0);
 | 
			
		||||
 
 | 
			
		||||
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
 | 
			
		||||
index 65650b3..b3a5381 100644
 | 
			
		||||
--- a/xlators/storage/posix/src/posix-entry-ops.c
 | 
			
		||||
+++ b/xlators/storage/posix/src/posix-entry-ops.c
 | 
			
		||||
@@ -2243,8 +2243,6 @@ fill_stat:
 | 
			
		||||
         gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
 | 
			
		||||
                "failed to set the fd context path=%s fd=%p", real_path, fd);
 | 
			
		||||
 
 | 
			
		||||
-    GF_ATOMIC_INC(priv->nr_files);
 | 
			
		||||
-
 | 
			
		||||
     op_ret = 0;
 | 
			
		||||
 
 | 
			
		||||
 out:
 | 
			
		||||
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
 | 
			
		||||
index d135d8b..81f4a6b 100644
 | 
			
		||||
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
 | 
			
		||||
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
 | 
			
		||||
@@ -1605,7 +1605,6 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
 | 
			
		||||
         gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
 | 
			
		||||
                "failed to set the fd context path=%s fd=%p", real_path, fd);
 | 
			
		||||
 
 | 
			
		||||
-    GF_ATOMIC_INC(priv->nr_files);
 | 
			
		||||
     op_ret = 0;
 | 
			
		||||
 
 | 
			
		||||
 out:
 | 
			
		||||
@@ -2526,7 +2525,6 @@ posix_release(xlator_t *this, fd_t *fd)
 | 
			
		||||
     if (!priv)
 | 
			
		||||
         goto out;
 | 
			
		||||
 
 | 
			
		||||
-    GF_ATOMIC_DEC(priv->nr_files);
 | 
			
		||||
 out:
 | 
			
		||||
     return 0;
 | 
			
		||||
 }
 | 
			
		||||
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
 | 
			
		||||
index 61495a7..124dbb4 100644
 | 
			
		||||
--- a/xlators/storage/posix/src/posix.h
 | 
			
		||||
+++ b/xlators/storage/posix/src/posix.h
 | 
			
		||||
@@ -154,7 +154,6 @@ struct posix_private {
 | 
			
		||||
 
 | 
			
		||||
     gf_atomic_t read_value;  /* Total read, from init */
 | 
			
		||||
     gf_atomic_t write_value; /* Total write, from init */
 | 
			
		||||
-    gf_atomic_t nr_files;
 | 
			
		||||
     /*
 | 
			
		||||
        In some cases, two exported volumes may reside on the same
 | 
			
		||||
        partition on the server. Sending statvfs info for both
 | 
			
		||||
-- 
 | 
			
		||||
1.8.3.1
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										384
									
								
								0465-posix-Implement-a-janitor-thread-to-close-fd.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										384
									
								
								0465-posix-Implement-a-janitor-thread-to-close-fd.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,384 @@
 | 
			
		||||
From 143b93b230b429cc712353243ed794b68494c040 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Mohit Agrawal <moagrawa@redhat.com>
 | 
			
		||||
Date: Mon, 27 Jul 2020 18:08:00 +0530
 | 
			
		||||
Subject: [PATCH 465/465] posix: Implement a janitor thread to close fd
 | 
			
		||||
 | 
			
		||||
Problem: In the commit fb20713b380e1df8d7f9e9df96563be2f9144fd6 we use
 | 
			
		||||
         syntask to close fd but we have found the patch is reducing the
 | 
			
		||||
         performance
 | 
			
		||||
 | 
			
		||||
Solution: Use janitor thread to close fd's and save the pfd ctx into
 | 
			
		||||
          ctx janitor list and also save the posix_xlator into pfd object to
 | 
			
		||||
          avoid the race condition during cleanup in brick_mux environment
 | 
			
		||||
 | 
			
		||||
> Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092
 | 
			
		||||
> Fixes: #1396
 | 
			
		||||
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
 | 
			
		||||
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24755/)
 | 
			
		||||
> (Cherry pick from commit 41b9616435cbdf671805856e487e373060c9455b
 | 
			
		||||
 | 
			
		||||
Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092
 | 
			
		||||
BUG: 1851989
 | 
			
		||||
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
 | 
			
		||||
Reviewed-on: https://code.engineering.redhat.com/gerrit/209448
 | 
			
		||||
Tested-by: RHGS Build Bot <nigelb@redhat.com>
 | 
			
		||||
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
 | 
			
		||||
---
 | 
			
		||||
 glusterfsd/src/glusterfsd.c                    |  4 ++
 | 
			
		||||
 libglusterfs/src/glusterfs/glusterfs.h         |  7 ++
 | 
			
		||||
 rpc/rpc-lib/src/rpcsvc.c                       |  6 --
 | 
			
		||||
 xlators/storage/posix/src/posix-common.c       | 34 +++++++++-
 | 
			
		||||
 xlators/storage/posix/src/posix-helpers.c      | 93 ++++++++++++++++++++++++++
 | 
			
		||||
 xlators/storage/posix/src/posix-inode-fd-ops.c | 33 ++++-----
 | 
			
		||||
 xlators/storage/posix/src/posix.h              |  7 ++
 | 
			
		||||
 7 files changed, 161 insertions(+), 23 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
 | 
			
		||||
index 9821180..955bf1d 100644
 | 
			
		||||
--- a/glusterfsd/src/glusterfsd.c
 | 
			
		||||
+++ b/glusterfsd/src/glusterfsd.c
 | 
			
		||||
@@ -1839,6 +1839,10 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
 | 
			
		||||
 
 | 
			
		||||
     INIT_LIST_HEAD(&cmd_args->xlator_options);
 | 
			
		||||
     INIT_LIST_HEAD(&cmd_args->volfile_servers);
 | 
			
		||||
+    ctx->pxl_count = 0;
 | 
			
		||||
+    pthread_mutex_init(&ctx->fd_lock, NULL);
 | 
			
		||||
+    pthread_cond_init(&ctx->fd_cond, NULL);
 | 
			
		||||
+    INIT_LIST_HEAD(&ctx->janitor_fds);
 | 
			
		||||
 
 | 
			
		||||
     lim.rlim_cur = RLIM_INFINITY;
 | 
			
		||||
     lim.rlim_max = RLIM_INFINITY;
 | 
			
		||||
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
 | 
			
		||||
index 495a4d7..bf6a987 100644
 | 
			
		||||
--- a/libglusterfs/src/glusterfs/glusterfs.h
 | 
			
		||||
+++ b/libglusterfs/src/glusterfs/glusterfs.h
 | 
			
		||||
@@ -733,6 +733,13 @@ struct _glusterfs_ctx {
 | 
			
		||||
     } stats;
 | 
			
		||||
 
 | 
			
		||||
     struct list_head volfile_list;
 | 
			
		||||
+    /* Add members to manage janitor threads for cleanup fd */
 | 
			
		||||
+    struct list_head janitor_fds;
 | 
			
		||||
+    pthread_cond_t fd_cond;
 | 
			
		||||
+    pthread_mutex_t fd_lock;
 | 
			
		||||
+    pthread_t janitor;
 | 
			
		||||
+    /* The variable is use to save total posix xlator count */
 | 
			
		||||
+    uint32_t pxl_count;
 | 
			
		||||
 
 | 
			
		||||
     char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
 | 
			
		||||
 };
 | 
			
		||||
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
 | 
			
		||||
index 23ca1fd..3f184bf 100644
 | 
			
		||||
--- a/rpc/rpc-lib/src/rpcsvc.c
 | 
			
		||||
+++ b/rpc/rpc-lib/src/rpcsvc.c
 | 
			
		||||
@@ -375,12 +375,6 @@ rpcsvc_program_actor(rpcsvc_request_t *req)
 | 
			
		||||
 
 | 
			
		||||
     req->ownthread = program->ownthread;
 | 
			
		||||
     req->synctask = program->synctask;
 | 
			
		||||
-    if (((req->procnum == GFS3_OP_RELEASE) ||
 | 
			
		||||
-         (req->procnum == GFS3_OP_RELEASEDIR)) &&
 | 
			
		||||
-        (program->prognum == GLUSTER_FOP_PROGRAM)) {
 | 
			
		||||
-        req->ownthread = _gf_false;
 | 
			
		||||
-        req->synctask = _gf_true;
 | 
			
		||||
-    }
 | 
			
		||||
 
 | 
			
		||||
     err = SUCCESS;
 | 
			
		||||
     gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s",
 | 
			
		||||
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
 | 
			
		||||
index b317627..c5a43a1 100644
 | 
			
		||||
--- a/xlators/storage/posix/src/posix-common.c
 | 
			
		||||
+++ b/xlators/storage/posix/src/posix-common.c
 | 
			
		||||
@@ -150,6 +150,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
 | 
			
		||||
     struct timespec sleep_till = {
 | 
			
		||||
         0,
 | 
			
		||||
     };
 | 
			
		||||
+    glusterfs_ctx_t *ctx = this->ctx;
 | 
			
		||||
 
 | 
			
		||||
     switch (event) {
 | 
			
		||||
         case GF_EVENT_PARENT_UP: {
 | 
			
		||||
@@ -160,8 +161,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
 | 
			
		||||
         case GF_EVENT_PARENT_DOWN: {
 | 
			
		||||
             if (!victim->cleanup_starting)
 | 
			
		||||
                 break;
 | 
			
		||||
-            gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
 | 
			
		||||
-                   victim->name);
 | 
			
		||||
 
 | 
			
		||||
             if (priv->janitor) {
 | 
			
		||||
                 pthread_mutex_lock(&priv->janitor_mutex);
 | 
			
		||||
@@ -187,6 +186,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
 | 
			
		||||
                 GF_FREE(priv->janitor);
 | 
			
		||||
             }
 | 
			
		||||
             priv->janitor = NULL;
 | 
			
		||||
+            pthread_mutex_lock(&ctx->fd_lock);
 | 
			
		||||
+            {
 | 
			
		||||
+                while (priv->rel_fdcount > 0) {
 | 
			
		||||
+                    pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock);
 | 
			
		||||
+                }
 | 
			
		||||
+            }
 | 
			
		||||
+            pthread_mutex_unlock(&ctx->fd_lock);
 | 
			
		||||
+
 | 
			
		||||
+            gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
 | 
			
		||||
+                   victim->name);
 | 
			
		||||
             default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
 | 
			
		||||
         } break;
 | 
			
		||||
         default:
 | 
			
		||||
@@ -1038,7 +1047,13 @@ posix_init(xlator_t *this)
 | 
			
		||||
     pthread_cond_init(&_private->fsync_cond, NULL);
 | 
			
		||||
     pthread_mutex_init(&_private->janitor_mutex, NULL);
 | 
			
		||||
     pthread_cond_init(&_private->janitor_cond, NULL);
 | 
			
		||||
+    pthread_cond_init(&_private->fd_cond, NULL);
 | 
			
		||||
     INIT_LIST_HEAD(&_private->fsyncs);
 | 
			
		||||
+    _private->rel_fdcount = 0;
 | 
			
		||||
+    ret = posix_spawn_ctx_janitor_thread(this);
 | 
			
		||||
+    if (ret)
 | 
			
		||||
+        goto out;
 | 
			
		||||
+
 | 
			
		||||
     ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this,
 | 
			
		||||
                            "posixfsy");
 | 
			
		||||
     if (ret) {
 | 
			
		||||
@@ -1133,6 +1148,8 @@ posix_fini(xlator_t *this)
 | 
			
		||||
 {
 | 
			
		||||
     struct posix_private *priv = this->private;
 | 
			
		||||
     gf_boolean_t health_check = _gf_false;
 | 
			
		||||
+    glusterfs_ctx_t *ctx = this->ctx;
 | 
			
		||||
+    uint32_t count;
 | 
			
		||||
     int ret = 0;
 | 
			
		||||
 
 | 
			
		||||
     if (!priv)
 | 
			
		||||
@@ -1166,6 +1183,19 @@ posix_fini(xlator_t *this)
 | 
			
		||||
         priv->janitor = NULL;
 | 
			
		||||
     }
 | 
			
		||||
 
 | 
			
		||||
+    pthread_mutex_lock(&ctx->fd_lock);
 | 
			
		||||
+    {
 | 
			
		||||
+        count = --ctx->pxl_count;
 | 
			
		||||
+        if (count == 0) {
 | 
			
		||||
+            pthread_cond_signal(&ctx->fd_cond);
 | 
			
		||||
+        }
 | 
			
		||||
+    }
 | 
			
		||||
+    pthread_mutex_unlock(&ctx->fd_lock);
 | 
			
		||||
+
 | 
			
		||||
+    if (count == 0) {
 | 
			
		||||
+        pthread_join(ctx->janitor, NULL);
 | 
			
		||||
+    }
 | 
			
		||||
+
 | 
			
		||||
     if (priv->fsyncer) {
 | 
			
		||||
         (void)gf_thread_cleanup_xint(priv->fsyncer);
 | 
			
		||||
         priv->fsyncer = 0;
 | 
			
		||||
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
 | 
			
		||||
index 39dbcce..73a44be 100644
 | 
			
		||||
--- a/xlators/storage/posix/src/posix-helpers.c
 | 
			
		||||
+++ b/xlators/storage/posix/src/posix-helpers.c
 | 
			
		||||
@@ -1582,6 +1582,99 @@ unlock:
 | 
			
		||||
     return;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+static struct posix_fd *
 | 
			
		||||
+janitor_get_next_fd(glusterfs_ctx_t *ctx)
 | 
			
		||||
+{
 | 
			
		||||
+    struct posix_fd *pfd = NULL;
 | 
			
		||||
+
 | 
			
		||||
+    while (list_empty(&ctx->janitor_fds)) {
 | 
			
		||||
+        if (ctx->pxl_count == 0) {
 | 
			
		||||
+            return NULL;
 | 
			
		||||
+        }
 | 
			
		||||
+
 | 
			
		||||
+        pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock);
 | 
			
		||||
+    }
 | 
			
		||||
+
 | 
			
		||||
+    pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list);
 | 
			
		||||
+    list_del_init(&pfd->list);
 | 
			
		||||
+
 | 
			
		||||
+    return pfd;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void
 | 
			
		||||
+posix_close_pfd(xlator_t *xl, struct posix_fd *pfd)
 | 
			
		||||
+{
 | 
			
		||||
+    THIS = xl;
 | 
			
		||||
+
 | 
			
		||||
+    if (pfd->dir == NULL) {
 | 
			
		||||
+        gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd);
 | 
			
		||||
+        sys_close(pfd->fd);
 | 
			
		||||
+    } else {
 | 
			
		||||
+        gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir);
 | 
			
		||||
+        sys_closedir(pfd->dir);
 | 
			
		||||
+    }
 | 
			
		||||
+
 | 
			
		||||
+    GF_FREE(pfd);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void *
 | 
			
		||||
+posix_ctx_janitor_thread_proc(void *data)
 | 
			
		||||
+{
 | 
			
		||||
+    xlator_t *xl;
 | 
			
		||||
+    struct posix_fd *pfd;
 | 
			
		||||
+    glusterfs_ctx_t *ctx = NULL;
 | 
			
		||||
+    struct posix_private *priv_fd;
 | 
			
		||||
+
 | 
			
		||||
+    ctx = data;
 | 
			
		||||
+
 | 
			
		||||
+    pthread_mutex_lock(&ctx->fd_lock);
 | 
			
		||||
+
 | 
			
		||||
+    while ((pfd = janitor_get_next_fd(ctx)) != NULL) {
 | 
			
		||||
+        pthread_mutex_unlock(&ctx->fd_lock);
 | 
			
		||||
+
 | 
			
		||||
+        xl = pfd->xl;
 | 
			
		||||
+        posix_close_pfd(xl, pfd);
 | 
			
		||||
+
 | 
			
		||||
+        pthread_mutex_lock(&ctx->fd_lock);
 | 
			
		||||
+
 | 
			
		||||
+        priv_fd = xl->private;
 | 
			
		||||
+        priv_fd->rel_fdcount--;
 | 
			
		||||
+        if (!priv_fd->rel_fdcount)
 | 
			
		||||
+            pthread_cond_signal(&priv_fd->fd_cond);
 | 
			
		||||
+    }
 | 
			
		||||
+
 | 
			
		||||
+    pthread_mutex_unlock(&ctx->fd_lock);
 | 
			
		||||
+
 | 
			
		||||
+    return NULL;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+int
 | 
			
		||||
+posix_spawn_ctx_janitor_thread(xlator_t *this)
 | 
			
		||||
+{
 | 
			
		||||
+    int ret = 0;
 | 
			
		||||
+    glusterfs_ctx_t *ctx = NULL;
 | 
			
		||||
+
 | 
			
		||||
+    ctx = this->ctx;
 | 
			
		||||
+
 | 
			
		||||
+    pthread_mutex_lock(&ctx->fd_lock);
 | 
			
		||||
+    {
 | 
			
		||||
+        if (ctx->pxl_count++ == 0) {
 | 
			
		||||
+            ret = gf_thread_create(&ctx->janitor, NULL,
 | 
			
		||||
+                                   posix_ctx_janitor_thread_proc, ctx,
 | 
			
		||||
+                                   "posixctxjan");
 | 
			
		||||
+
 | 
			
		||||
+            if (ret) {
 | 
			
		||||
+                gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
 | 
			
		||||
+                       "spawning janitor thread failed");
 | 
			
		||||
+                ctx->pxl_count--;
 | 
			
		||||
+            }
 | 
			
		||||
+        }
 | 
			
		||||
+    }
 | 
			
		||||
+    pthread_mutex_unlock(&ctx->fd_lock);
 | 
			
		||||
+
 | 
			
		||||
+    return ret;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 static int
 | 
			
		||||
 is_fresh_file(int64_t ctime_sec)
 | 
			
		||||
 {
 | 
			
		||||
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
 | 
			
		||||
index 81f4a6b..21119ea 100644
 | 
			
		||||
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
 | 
			
		||||
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
 | 
			
		||||
@@ -1352,6 +1352,22 @@ out:
 | 
			
		||||
     return 0;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+static void
 | 
			
		||||
+posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd)
 | 
			
		||||
+{
 | 
			
		||||
+    glusterfs_ctx_t *ctx = this->ctx;
 | 
			
		||||
+    struct posix_private *priv = this->private;
 | 
			
		||||
+
 | 
			
		||||
+    pfd->xl = this;
 | 
			
		||||
+    pthread_mutex_lock(&ctx->fd_lock);
 | 
			
		||||
+    {
 | 
			
		||||
+        list_add_tail(&pfd->list, &ctx->janitor_fds);
 | 
			
		||||
+        priv->rel_fdcount++;
 | 
			
		||||
+        pthread_cond_signal(&ctx->fd_cond);
 | 
			
		||||
+    }
 | 
			
		||||
+    pthread_mutex_unlock(&ctx->fd_lock);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 int32_t
 | 
			
		||||
 posix_releasedir(xlator_t *this, fd_t *fd)
 | 
			
		||||
 {
 | 
			
		||||
@@ -1374,11 +1390,7 @@ posix_releasedir(xlator_t *this, fd_t *fd)
 | 
			
		||||
                "pfd->dir is NULL for fd=%p", fd);
 | 
			
		||||
         goto out;
 | 
			
		||||
     }
 | 
			
		||||
-
 | 
			
		||||
-    gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
 | 
			
		||||
-
 | 
			
		||||
-    sys_closedir(pfd->dir);
 | 
			
		||||
-    GF_FREE(pfd);
 | 
			
		||||
+    posix_add_fd_to_cleanup(this, pfd);
 | 
			
		||||
 
 | 
			
		||||
 out:
 | 
			
		||||
     return 0;
 | 
			
		||||
@@ -2494,7 +2506,6 @@ out:
 | 
			
		||||
 int32_t
 | 
			
		||||
 posix_release(xlator_t *this, fd_t *fd)
 | 
			
		||||
 {
 | 
			
		||||
-    struct posix_private *priv = NULL;
 | 
			
		||||
     struct posix_fd *pfd = NULL;
 | 
			
		||||
     int ret = -1;
 | 
			
		||||
     uint64_t tmp_pfd = 0;
 | 
			
		||||
@@ -2502,8 +2513,6 @@ posix_release(xlator_t *this, fd_t *fd)
 | 
			
		||||
     VALIDATE_OR_GOTO(this, out);
 | 
			
		||||
     VALIDATE_OR_GOTO(fd, out);
 | 
			
		||||
 
 | 
			
		||||
-    priv = this->private;
 | 
			
		||||
-
 | 
			
		||||
     ret = fd_ctx_del(fd, this, &tmp_pfd);
 | 
			
		||||
     if (ret < 0) {
 | 
			
		||||
         gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
 | 
			
		||||
@@ -2517,13 +2526,7 @@ posix_release(xlator_t *this, fd_t *fd)
 | 
			
		||||
                "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd);
 | 
			
		||||
     }
 | 
			
		||||
 
 | 
			
		||||
-    gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir);
 | 
			
		||||
-
 | 
			
		||||
-    sys_close(pfd->fd);
 | 
			
		||||
-    GF_FREE(pfd);
 | 
			
		||||
-
 | 
			
		||||
-    if (!priv)
 | 
			
		||||
-        goto out;
 | 
			
		||||
+    posix_add_fd_to_cleanup(this, pfd);
 | 
			
		||||
 
 | 
			
		||||
 out:
 | 
			
		||||
     return 0;
 | 
			
		||||
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
 | 
			
		||||
index 124dbb4..07f367b 100644
 | 
			
		||||
--- a/xlators/storage/posix/src/posix.h
 | 
			
		||||
+++ b/xlators/storage/posix/src/posix.h
 | 
			
		||||
@@ -134,6 +134,8 @@ struct posix_fd {
 | 
			
		||||
     off_t dir_eof; /* offset at dir EOF */
 | 
			
		||||
     int odirect;
 | 
			
		||||
     struct list_head list; /* to add to the janitor list */
 | 
			
		||||
+    xlator_t *xl;
 | 
			
		||||
+    char _pad[4]; /* manual padding */
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
 struct posix_private {
 | 
			
		||||
@@ -204,6 +206,7 @@ struct posix_private {
 | 
			
		||||
     pthread_cond_t fsync_cond;
 | 
			
		||||
     pthread_mutex_t janitor_mutex;
 | 
			
		||||
     pthread_cond_t janitor_cond;
 | 
			
		||||
+    pthread_cond_t fd_cond;
 | 
			
		||||
     int fsync_queue_count;
 | 
			
		||||
 
 | 
			
		||||
     enum {
 | 
			
		||||
@@ -259,6 +262,7 @@ struct posix_private {
 | 
			
		||||
     gf_boolean_t fips_mode_rchecksum;
 | 
			
		||||
     gf_boolean_t ctime;
 | 
			
		||||
     gf_boolean_t janitor_task_stop;
 | 
			
		||||
+    uint32_t rel_fdcount;
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
 typedef struct {
 | 
			
		||||
@@ -665,6 +669,9 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,
 | 
			
		||||
 int
 | 
			
		||||
 posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);
 | 
			
		||||
 
 | 
			
		||||
+int
 | 
			
		||||
+posix_spawn_ctx_janitor_thread(xlator_t *this);
 | 
			
		||||
+
 | 
			
		||||
 void
 | 
			
		||||
 posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
 | 
			
		||||
 
 | 
			
		||||
-- 
 | 
			
		||||
1.8.3.1
 | 
			
		||||
 | 
			
		||||
@ -237,7 +237,7 @@ Release:          0.1%{?prereltag:.%{prereltag}}%{?dist}
 | 
			
		||||
%else
 | 
			
		||||
Name:             glusterfs
 | 
			
		||||
Version:          6.0
 | 
			
		||||
Release:          40%{?dist}
 | 
			
		||||
Release:          41%{?dist}
 | 
			
		||||
ExcludeArch:      i686
 | 
			
		||||
%endif
 | 
			
		||||
License:          GPLv2 or LGPLv3+
 | 
			
		||||
@ -771,6 +771,15 @@ Patch0453: 0453-glusterd-add-brick-command-failure.patch
 | 
			
		||||
Patch0454: 0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch
 | 
			
		||||
Patch0455: 0455-locks-prevent-deletion-of-locked-entries.patch
 | 
			
		||||
Patch0456: 0456-add-clean-local-after-grant-lock.patch
 | 
			
		||||
Patch0457: 0457-cluster-ec-Improve-detection-of-new-heals.patch
 | 
			
		||||
Patch0458: 0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch
 | 
			
		||||
Patch0459: 0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch
 | 
			
		||||
Patch0460: 0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch
 | 
			
		||||
Patch0461: 0461-geo-replication-Fix-IPv6-parsing.patch
 | 
			
		||||
Patch0462: 0462-Issue-with-gf_fill_iatt_for_dirent.patch
 | 
			
		||||
Patch0463: 0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch
 | 
			
		||||
Patch0464: 0464-storage-posix-Remove-nr_files-usage.patch
 | 
			
		||||
Patch0465: 0465-posix-Implement-a-janitor-thread-to-close-fd.patch
 | 
			
		||||
 | 
			
		||||
%description
 | 
			
		||||
GlusterFS is a distributed file-system capable of scaling to several
 | 
			
		||||
@ -2511,6 +2520,9 @@ fi
 | 
			
		||||
%endif
 | 
			
		||||
 | 
			
		||||
%changelog
 | 
			
		||||
* Tue Aug 25 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-41
 | 
			
		||||
- fixes bugs bz#1785714 bz#1851424 bz#1851989 bz#1852736 bz#1853189 bz#1855966
 | 
			
		||||
 | 
			
		||||
* Tue Jul 21 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-40
 | 
			
		||||
- fixes bugs bz#1812789 bz#1844359 bz#1847081 bz#1854165
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user