glusterfs/SOURCES/0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch

From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001
From: Mohammed Rafi KC <rkavunga@redhat.com>
Date: Mon, 29 Apr 2019 13:22:32 +0530
Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec
 fini

We were not properly cleaning self-heal daemon resources
during ec fini. With shd multiplexing, it is absolutely
necessary to cleanup all the resources during ec fini.

Back port of
 upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/
 >Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2
 >fixes: bz#1703948
 >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>

BUG: 1703434
Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/169994
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 libglusterfs/src/syncop-utils.c          |  2 +
 xlators/cluster/afr/src/afr-self-heald.c |  5 +++
 xlators/cluster/ec/src/ec-heald.c        | 77 +++++++++++++++++++++++++++-----
 xlators/cluster/ec/src/ec-heald.h        |  3 ++
 xlators/cluster/ec/src/ec-messages.h     |  3 +-
 xlators/cluster/ec/src/ec.c              | 47 +++++++++++++++++++
 6 files changed, 124 insertions(+), 13 deletions(-)

diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
index b842142..4167db4 100644
--- a/libglusterfs/src/syncop-utils.c
+++ b/libglusterfs/src/syncop-utils.c
@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
 
     if (frame) {
         this = frame->this;
+    } else {
+        this = THIS;
     }
 
     /*For this functionality to be implemented in general, we need
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 8bc4720..522fe5d 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
     afr_private_t *priv = NULL;
 
     priv = this->private;
+
+    if (this->cleanup_starting) {
+        return -ENOTCONN;
+    }
+
     if (!priv->shd.enabled)
         return -EBUSY;
 
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index cba111a..edf5e11 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -71,6 +71,11 @@ disabled_loop:
             break;
     }
 
+    if (ec->shutdown) {
+        healer->running = _gf_false;
+        return -1;
+    }
+
     ret = healer->rerun;
     healer->rerun = 0;
 
@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
         goto out;
     }
 
+    _mask_cancellation();
     ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
                              healer, ec_shd_index_heal, xdata,
                              ec->shd.max_threads, ec->shd.wait_qlength);
+    _unmask_cancellation();
 out:
     if (xdata)
         dict_unref(xdata);
@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
     int ret = 0;
 
     ec = this->private;
+
+    if (this->cleanup_starting) {
+        return -ENOTCONN;
+    }
+
     if (ec->xl_up_count <= ec->fragments) {
         return -ENOTCONN;
     }
@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
 {
     ec_t *ec = NULL;
     loc_t loc = {0};
+    int ret = -1;
 
     ec = healer->this->private;
     loc.inode = inode;
-    return syncop_ftw(ec->xl_list[healer->subvol], &loc,
-                      GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+    _mask_cancellation();
+    ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
+                     GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+    _unmask_cancellation();
+    return ret;
 }
 
 void *
@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data)
 {
     struct subvol_healer *healer = NULL;
     xlator_t *this = NULL;
+    int run = 0;
 
     healer = data;
     THIS = this = healer->this;
     ec_t *ec = this->private;
 
     for (;;) {
-        ec_shd_healer_wait(healer);
+        run = ec_shd_healer_wait(healer);
+        if (run == -1)
+            break;
 
         if (ec->xl_up_count > ec->fragments) {
             gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data)
 
     rootloc.inode = this->itable->root;
     for (;;) {
-        pthread_mutex_lock(&healer->mutex);
-        {
-            run = __ec_shd_healer_wait(healer);
-            if (!run)
-                healer->running = _gf_false;
-        }
-        pthread_mutex_unlock(&healer->mutex);
-
-        if (!run)
+        run = ec_shd_healer_wait(healer);
+        if (run < 0) {
             break;
+        } else if (run == 0) {
+            continue;
+        }
 
         if (ec->xl_up_count > ec->fragments) {
             gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
@@ -562,3 +577,41 @@ out:
     dict_del(output, this->name);
     return ret;
 }
+
+void
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+    if (!healer)
+        return;
+
+    pthread_cond_destroy(&healer->cond);
+    pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+ec_selfheal_daemon_fini(xlator_t *this)
+{
+    struct subvol_healer *healer = NULL;
+    ec_self_heald_t *shd = NULL;
+    ec_t *priv = NULL;
+    int i = 0;
+
+    priv = this->private;
+    if (!priv)
+        return;
+
+    shd = &priv->shd;
+    if (!shd->iamshd)
+        return;
+
+    for (i = 0; i < priv->nodes; i++) {
+        healer = &shd->index_healers[i];
+        ec_destroy_healer_object(this, healer);
+
+        healer = &shd->full_healers[i];
+        ec_destroy_healer_object(this, healer);
+    }
+
+    GF_FREE(shd->index_healers);
+    GF_FREE(shd->full_healers);
+}
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
index 2eda2a7..8184cf4 100644
--- a/xlators/cluster/ec/src/ec-heald.h
+++ b/xlators/cluster/ec/src/ec-heald.h
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
 void
 ec_shd_index_healer_wake(ec_t *ec);
 
+void
+ec_selfheal_daemon_fini(xlator_t *this);
+
 #endif /* __EC_HEALD_H__ */
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
index 7c28808..ce299bb 100644
--- a/xlators/cluster/ec/src/ec-messages.h
+++ b/xlators/cluster/ec/src/ec-messages.h
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
            EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
            EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
            EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
-           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
+           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
+           EC_MSG_THREAD_CLEANUP_FAILED);
 
 #endif /* !_EC_MESSAGES_H_ */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 3c8013e..264582a 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec)
 }
 
 void
+ec_cleanup_healer_object(ec_t *ec)
+{
+    struct subvol_healer *healer = NULL;
+    ec_self_heald_t *shd = NULL;
+    void *res = NULL;
+    int i = 0;
+    gf_boolean_t is_join = _gf_false;
+
+    shd = &ec->shd;
+    if (!shd->iamshd)
+        return;
+
+    for (i = 0; i < ec->nodes; i++) {
+        healer = &shd->index_healers[i];
+        pthread_mutex_lock(&healer->mutex);
+        {
+            healer->rerun = 1;
+            if (healer->running) {
+                pthread_cond_signal(&healer->cond);
+                is_join = _gf_true;
+            }
+        }
+        pthread_mutex_unlock(&healer->mutex);
+        if (is_join) {
+            pthread_join(healer->thread, &res);
+            is_join = _gf_false;
+        }
+
+        healer = &shd->full_healers[i];
+        pthread_mutex_lock(&healer->mutex);
+        {
+            healer->rerun = 1;
+            if (healer->running) {
+                pthread_cond_signal(&healer->cond);
+                is_join = _gf_true;
+            }
+        }
+        pthread_mutex_unlock(&healer->mutex);
+        if (is_join) {
+            pthread_join(healer->thread, &res);
+            is_join = _gf_false;
+        }
+    }
+}
+void
 ec_pending_fops_completed(ec_t *ec)
 {
     if (ec->shutdown) {
@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
         /* If there aren't pending fops running after we have waken up
          * them, we immediately propagate the notification. */
         propagate = ec_disable_delays(ec);
+        ec_cleanup_healer_object(ec);
         goto unlock;
     }
 
@@ -759,6 +805,7 @@ failed:
 void
 fini(xlator_t *this)
 {
+    ec_selfheal_daemon_fini(this);
     __ec_destroy_private(this);
 }
 
-- 
1.8.3.1
import glusterfs-6.0-15.el8 2019-11-06 16:44:52 +00:00			`From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001`
			`From: Mohammed Rafi KC <rkavunga@redhat.com>`
			`Date: Mon, 29 Apr 2019 13:22:32 +0530`
			`Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec`
			`fini`

			`We were not properly cleaning self-heal daemon resources`
			`during ec fini. With shd multiplexing, it is absolutely`
			`necessary to cleanup all the resources during ec fini.`

			`Back port of`
			`upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/`
			`>Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2`
			`>fixes: bz#1703948`
			`>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>`

			`BUG: 1703434`
			`Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217`
			`Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>`
			`Reviewed-on: https://code.engineering.redhat.com/gerrit/169994`
			`Tested-by: RHGS Build Bot <nigelb@redhat.com>`
			`Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>`
			`---`
			`libglusterfs/src/syncop-utils.c \| 2 +`
			`xlators/cluster/afr/src/afr-self-heald.c \| 5 +++`
			`xlators/cluster/ec/src/ec-heald.c \| 77 +++++++++++++++++++++++++++-----`
			`xlators/cluster/ec/src/ec-heald.h \| 3 ++`
			`xlators/cluster/ec/src/ec-messages.h \| 3 +-`
			`xlators/cluster/ec/src/ec.c \| 47 +++++++++++++++++++`
			`6 files changed, 124 insertions(+), 13 deletions(-)`

			`diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c`
			`index b842142..4167db4 100644`
			`--- a/libglusterfs/src/syncop-utils.c`
			`+++ b/libglusterfs/src/syncop-utils.c`
			`@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t frame, xlator_t subvol, loc_t *loc, int pid,`

			`if (frame) {`
			`this = frame->this;`
			`+ } else {`
			`+ this = THIS;`
			`}`

			`/*For this functionality to be implemented in general, we need`
			`diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c`
			`index 8bc4720..522fe5d 100644`
			`--- a/xlators/cluster/afr/src/afr-self-heald.c`
			`+++ b/xlators/cluster/afr/src/afr-self-heald.c`
			`@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t subvol, gf_dirent_t entry, loc_t *parent,`
			`afr_private_t *priv = NULL;`

			`priv = this->private;`
			`+`
			`+ if (this->cleanup_starting) {`
			`+ return -ENOTCONN;`
			`+ }`
			`+`
			`if (!priv->shd.enabled)`
			`return -EBUSY;`

			`diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c`
			`index cba111a..edf5e11 100644`
			`--- a/xlators/cluster/ec/src/ec-heald.c`
			`+++ b/xlators/cluster/ec/src/ec-heald.c`
			`@@ -71,6 +71,11 @@ disabled_loop:`
			`break;`
			`}`

			`+ if (ec->shutdown) {`
			`+ healer->running = _gf_false;`
			`+ return -1;`
			`+ }`
			`+`
			`ret = healer->rerun;`
			`healer->rerun = 0;`

			`@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)`
			`goto out;`
			`}`

			`+ _mask_cancellation();`
			`ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,`
			`healer, ec_shd_index_heal, xdata,`
			`ec->shd.max_threads, ec->shd.wait_qlength);`
			`+ _unmask_cancellation();`
			`out:`
			`if (xdata)`
			`dict_unref(xdata);`
			`@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t subvol, gf_dirent_t entry, loc_t *parent,`
			`int ret = 0;`

			`ec = this->private;`
			`+`
			`+ if (this->cleanup_starting) {`
			`+ return -ENOTCONN;`
			`+ }`
			`+`
			`if (ec->xl_up_count <= ec->fragments) {`
			`return -ENOTCONN;`
			`}`
			`@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer healer, inode_t inode)`
			`{`
			`ec_t *ec = NULL;`
			`loc_t loc = {0};`
			`+ int ret = -1;`

			`ec = healer->this->private;`
			`loc.inode = inode;`
			`- return syncop_ftw(ec->xl_list[healer->subvol], &loc,`
			`- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);`
			`+ _mask_cancellation();`
			`+ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,`
			`+ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);`
			`+ _unmask_cancellation();`
			`+ return ret;`
			`}`

			`void *`
			`@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data)`
			`{`
			`struct subvol_healer *healer = NULL;`
			`xlator_t *this = NULL;`
			`+ int run = 0;`

			`healer = data;`
			`THIS = this = healer->this;`
			`ec_t *ec = this->private;`

			`for (;;) {`
			`- ec_shd_healer_wait(healer);`
			`+ run = ec_shd_healer_wait(healer);`
			`+ if (run == -1)`
			`+ break;`

			`if (ec->xl_up_count > ec->fragments) {`
			`gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",`
			`@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data)`

			`rootloc.inode = this->itable->root;`
			`for (;;) {`
			`- pthread_mutex_lock(&healer->mutex);`
			`- {`
			`- run = __ec_shd_healer_wait(healer);`
			`- if (!run)`
			`- healer->running = _gf_false;`
			`- }`
			`- pthread_mutex_unlock(&healer->mutex);`
			`-`
			`- if (!run)`
			`+ run = ec_shd_healer_wait(healer);`
			`+ if (run < 0) {`
			`break;`
			`+ } else if (run == 0) {`
			`+ continue;`
			`+ }`

			`if (ec->xl_up_count > ec->fragments) {`
			`gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,`
			`@@ -562,3 +577,41 @@ out:`
			`dict_del(output, this->name);`
			`return ret;`
			`}`
			`+`
			`+void`
			`+ec_destroy_healer_object(xlator_t this, struct subvol_healer healer)`
			`+{`
			`+ if (!healer)`
			`+ return;`
			`+`
			`+ pthread_cond_destroy(&healer->cond);`
			`+ pthread_mutex_destroy(&healer->mutex);`
			`+}`
			`+`
			`+void`
			`+ec_selfheal_daemon_fini(xlator_t *this)`
			`+{`
			`+ struct subvol_healer *healer = NULL;`
			`+ ec_self_heald_t *shd = NULL;`
			`+ ec_t *priv = NULL;`
			`+ int i = 0;`
			`+`
			`+ priv = this->private;`
			`+ if (!priv)`
			`+ return;`
			`+`
			`+ shd = &priv->shd;`
			`+ if (!shd->iamshd)`
			`+ return;`
			`+`
			`+ for (i = 0; i < priv->nodes; i++) {`
			`+ healer = &shd->index_healers[i];`
			`+ ec_destroy_healer_object(this, healer);`
			`+`
			`+ healer = &shd->full_healers[i];`
			`+ ec_destroy_healer_object(this, healer);`
			`+ }`
			`+`
			`+ GF_FREE(shd->index_healers);`
			`+ GF_FREE(shd->full_healers);`
			`+}`
			`diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h`
			`index 2eda2a7..8184cf4 100644`
			`--- a/xlators/cluster/ec/src/ec-heald.h`
			`+++ b/xlators/cluster/ec/src/ec-heald.h`
			`@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);`
			`void`
			`ec_shd_index_healer_wake(ec_t *ec);`

			`+void`
			`+ec_selfheal_daemon_fini(xlator_t *this);`
			`+`
			`#endif /* __EC_HEALD_H__ */`
			`diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h`
			`index 7c28808..ce299bb 100644`
			`--- a/xlators/cluster/ec/src/ec-messages.h`
			`+++ b/xlators/cluster/ec/src/ec-messages.h`
			`@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,`
			`EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,`
			`EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,`
			`EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,`
			`- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);`
			`+ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,`
			`+ EC_MSG_THREAD_CLEANUP_FAILED);`

			`#endif /* !_EC_MESSAGES_H_ */`
			`diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c`
			`index 3c8013e..264582a 100644`
			`--- a/xlators/cluster/ec/src/ec.c`
			`+++ b/xlators/cluster/ec/src/ec.c`
			`@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec)`
			`}`

			`void`
			`+ec_cleanup_healer_object(ec_t *ec)`
			`+{`
			`+ struct subvol_healer *healer = NULL;`
			`+ ec_self_heald_t *shd = NULL;`
			`+ void *res = NULL;`
			`+ int i = 0;`
			`+ gf_boolean_t is_join = _gf_false;`
			`+`
			`+ shd = &ec->shd;`
			`+ if (!shd->iamshd)`
			`+ return;`
			`+`
			`+ for (i = 0; i < ec->nodes; i++) {`
			`+ healer = &shd->index_healers[i];`
			`+ pthread_mutex_lock(&healer->mutex);`
			`+ {`
			`+ healer->rerun = 1;`
			`+ if (healer->running) {`
			`+ pthread_cond_signal(&healer->cond);`
			`+ is_join = _gf_true;`
			`+ }`
			`+ }`
			`+ pthread_mutex_unlock(&healer->mutex);`
			`+ if (is_join) {`
			`+ pthread_join(healer->thread, &res);`
			`+ is_join = _gf_false;`
			`+ }`
			`+`
			`+ healer = &shd->full_healers[i];`
			`+ pthread_mutex_lock(&healer->mutex);`
			`+ {`
			`+ healer->rerun = 1;`
			`+ if (healer->running) {`
			`+ pthread_cond_signal(&healer->cond);`
			`+ is_join = _gf_true;`
			`+ }`
			`+ }`
			`+ pthread_mutex_unlock(&healer->mutex);`
			`+ if (is_join) {`
			`+ pthread_join(healer->thread, &res);`
			`+ is_join = _gf_false;`
			`+ }`
			`+ }`
			`+}`
			`+void`
			`ec_pending_fops_completed(ec_t *ec)`
			`{`
			`if (ec->shutdown) {`
			`@@ -544,6 +589,7 @@ ec_notify(xlator_t this, int32_t event, void data, void *data2)`
			`/* If there aren't pending fops running after we have waken up`
			`* them, we immediately propagate the notification. */`
			`propagate = ec_disable_delays(ec);`
			`+ ec_cleanup_healer_object(ec);`
			`goto unlock;`
			`}`

			`@@ -759,6 +805,7 @@ failed:`
			`void`
			`fini(xlator_t *this)`
			`{`
			`+ ec_selfheal_daemon_fini(this);`
			`__ec_destroy_private(this);`
			`}`

			`--`
			`1.8.3.1`