glusterfs/SOURCES/0100-clnt-rpc-ref-leak-during-disconnect.patch

From 4d95e271a9042bf2d789a4d900ad263b6ea47681 Mon Sep 17 00:00:00 2001
From: Mohammed Rafi KC <rkavunga@redhat.com>
Date: Wed, 23 Jan 2019 21:55:01 +0530
Subject: [PATCH 100/124] clnt/rpc: ref leak during disconnect.

During disconnect cleanup, we are not cancelling reconnect
timer, which causes a ref leak each time when a disconnect
happen.

Backport of: https://review.gluster.org/#/c/glusterfs/+/22087/

>Change-Id: I9d05d1f368d080e04836bf6a0bb018bf8f7b5b8a
>updates: bz#1659708
>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>

Change-Id: I5a2dbb17e663a4809bb4c435cacadbf0ab694a76
BUG: 1471742
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/167844
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 libglusterfs/src/timer.c                           | 16 +++++++----
 rpc/rpc-lib/src/rpc-clnt.c                         | 11 +++++++-
 .../mgmt/glusterd/src/glusterd-snapshot-utils.c    | 32 ++++++++++++++++++----
 3 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c
index d882543..2643c07 100644
--- a/libglusterfs/src/timer.c
+++ b/libglusterfs/src/timer.c
@@ -75,13 +75,13 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event)
     if (ctx == NULL || event == NULL) {
         gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
                          "invalid argument");
-        return 0;
+        return -1;
     }
 
     if (ctx->cleanup_started) {
         gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED,
                          "ctx cleanup started");
-        return 0;
+        return -1;
     }
 
     LOCK(&ctx->lock);
@@ -93,10 +93,9 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event)
     if (!reg) {
         /* This can happen when cleanup may have just started and
          * gf_timer_registry_destroy() sets ctx->timer to NULL.
-         * Just bail out as success as gf_timer_proc() takes
-         * care of cleaning up the events.
+         * gf_timer_proc() takes care of cleaning up the events.
          */
-        return 0;
+        return -1;
     }
 
     LOCK(&reg->lock);
@@ -203,6 +202,13 @@ gf_timer_proc(void *data)
         list_for_each_entry_safe(event, tmp, &reg->active, list)
         {
             list_del(&event->list);
+            /* TODO Possible resource leak
+             * Before freeing the event, we need to call the respective
+             * event functions and free any resources.
+             * For example, In case of rpc_clnt_reconnect, we need to
+             * unref rpc object which was taken when added to timer
+             * wheel.
+             */
             GF_FREE(event);
         }
     }
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index 3f7bb3c..6f47515 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -495,6 +495,7 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
     int unref = 0;
     int ret = 0;
     gf_boolean_t timer_unref = _gf_false;
+    gf_boolean_t reconnect_unref = _gf_false;
 
     if (!conn) {
         goto out;
@@ -514,6 +515,12 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
                 timer_unref = _gf_true;
             conn->timer = NULL;
         }
+        if (conn->reconnect) {
+            ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect);
+            if (!ret)
+                reconnect_unref = _gf_true;
+            conn->reconnect = NULL;
+        }
 
         conn->connected = 0;
         conn->disconnected = 1;
@@ -533,6 +540,8 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
     if (timer_unref)
         rpc_clnt_unref(clnt);
 
+    if (reconnect_unref)
+        rpc_clnt_unref(clnt);
 out:
     return 0;
 }
@@ -830,7 +839,7 @@ rpc_clnt_handle_disconnect(struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
     pthread_mutex_lock(&conn->lock);
     {
         if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) {
-            ts.tv_sec = 10;
+            ts.tv_sec = 3;
             ts.tv_nsec = 0;
 
             rpc_clnt_ref(clnt);
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
index 041946d..b3c4158 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -3364,6 +3364,25 @@ out:
     return ret;
 }
 
+int
+glusterd_is_path_mounted(const char *path)
+{
+    FILE *mtab = NULL;
+    struct mntent *part = NULL;
+    int is_mounted = 0;
+
+    if ((mtab = setmntent("/etc/mtab", "r")) != NULL) {
+        while ((part = getmntent(mtab)) != NULL) {
+            if ((part->mnt_fsname != NULL) &&
+                (strcmp(part->mnt_dir, path)) == 0) {
+                is_mounted = 1;
+                break;
+            }
+        }
+        endmntent(mtab);
+    }
+    return is_mounted;
+}
 /* This function will do unmount for snaps.
  */
 int32_t
@@ -3388,14 +3407,11 @@ glusterd_snap_unmount(xlator_t *this, glusterd_volinfo_t *volinfo)
             continue;
         }
 
-        /* Fetch the brick mount path from the brickinfo->path */
-        ret = glusterd_get_brick_root(brickinfo->path, &brick_mount_path);
+        ret = glusterd_find_brick_mount_path(brickinfo->path,
+                                             &brick_mount_path);
         if (ret) {
-            gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BRICK_PATH_UNMOUNTED,
+            gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL,
                    "Failed to find brick_mount_path for %s", brickinfo->path);
-            /* There is chance that brick path is already
-             * unmounted. */
-            ret = 0;
             goto out;
         }
         /* unmount cannot be done when the brick process is still in
@@ -3440,6 +3456,10 @@ glusterd_umount(const char *path)
     GF_ASSERT(this);
     GF_ASSERT(path);
 
+    if (!glusterd_is_path_mounted(path)) {
+        return 0;
+    }
+
     runinit(&runner);
     snprintf(msg, sizeof(msg), "umount path %s", path);
     runner_add_args(&runner, _PATH_UMOUNT, "-f", path, NULL);
-- 
1.8.3.1
import glusterfs-6.0-15.el8 2019-11-06 16:44:52 +00:00			`From 4d95e271a9042bf2d789a4d900ad263b6ea47681 Mon Sep 17 00:00:00 2001`
			`From: Mohammed Rafi KC <rkavunga@redhat.com>`
			`Date: Wed, 23 Jan 2019 21:55:01 +0530`
			`Subject: [PATCH 100/124] clnt/rpc: ref leak during disconnect.`

			`During disconnect cleanup, we are not cancelling reconnect`
			`timer, which causes a ref leak each time when a disconnect`
			`happen.`

			`Backport of: https://review.gluster.org/#/c/glusterfs/+/22087/`

			`>Change-Id: I9d05d1f368d080e04836bf6a0bb018bf8f7b5b8a`
			`>updates: bz#1659708`
			`>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>`

			`Change-Id: I5a2dbb17e663a4809bb4c435cacadbf0ab694a76`
			`BUG: 1471742`
			`Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>`
			`Reviewed-on: https://code.engineering.redhat.com/gerrit/167844`
			`Tested-by: RHGS Build Bot <nigelb@redhat.com>`
			`Reviewed-by: Atin Mukherjee <amukherj@redhat.com>`
			`---`
			`libglusterfs/src/timer.c \| 16 +++++++----`
			`rpc/rpc-lib/src/rpc-clnt.c \| 11 +++++++-`
			`.../mgmt/glusterd/src/glusterd-snapshot-utils.c \| 32 ++++++++++++++++++----`
			`3 files changed, 47 insertions(+), 12 deletions(-)`

			`diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c`
			`index d882543..2643c07 100644`
			`--- a/libglusterfs/src/timer.c`
			`+++ b/libglusterfs/src/timer.c`
			`@@ -75,13 +75,13 @@ gf_timer_call_cancel(glusterfs_ctx_t ctx, gf_timer_t event)`
			`if (ctx == NULL \|\| event == NULL) {`
			`gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,`
			`"invalid argument");`
			`- return 0;`
			`+ return -1;`
			`}`

			`if (ctx->cleanup_started) {`
			`gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED,`
			`"ctx cleanup started");`
			`- return 0;`
			`+ return -1;`
			`}`

			`LOCK(&ctx->lock);`
			`@@ -93,10 +93,9 @@ gf_timer_call_cancel(glusterfs_ctx_t ctx, gf_timer_t event)`
			`if (!reg) {`
			`/* This can happen when cleanup may have just started and`
			`* gf_timer_registry_destroy() sets ctx->timer to NULL.`
			`- * Just bail out as success as gf_timer_proc() takes`
			`- * care of cleaning up the events.`
			`+ * gf_timer_proc() takes care of cleaning up the events.`
			`*/`
			`- return 0;`
			`+ return -1;`
			`}`

			`LOCK(&reg->lock);`
			`@@ -203,6 +202,13 @@ gf_timer_proc(void *data)`
			`list_for_each_entry_safe(event, tmp, &reg->active, list)`
			`{`
			`list_del(&event->list);`
			`+ /* TODO Possible resource leak`
			`+ * Before freeing the event, we need to call the respective`
			`+ * event functions and free any resources.`
			`+ * For example, In case of rpc_clnt_reconnect, we need to`
			`+ * unref rpc object which was taken when added to timer`
			`+ * wheel.`
			`+ */`
			`GF_FREE(event);`
			`}`
			`}`
			`diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c`
			`index 3f7bb3c..6f47515 100644`
			`--- a/rpc/rpc-lib/src/rpc-clnt.c`
			`+++ b/rpc/rpc-lib/src/rpc-clnt.c`
			`@@ -495,6 +495,7 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)`
			`int unref = 0;`
			`int ret = 0;`
			`gf_boolean_t timer_unref = _gf_false;`
			`+ gf_boolean_t reconnect_unref = _gf_false;`

			`if (!conn) {`
			`goto out;`
			`@@ -514,6 +515,12 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)`
			`timer_unref = _gf_true;`
			`conn->timer = NULL;`
			`}`
			`+ if (conn->reconnect) {`
			`+ ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect);`
			`+ if (!ret)`
			`+ reconnect_unref = _gf_true;`
			`+ conn->reconnect = NULL;`
			`+ }`

			`conn->connected = 0;`
			`conn->disconnected = 1;`
			`@@ -533,6 +540,8 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)`
			`if (timer_unref)`
			`rpc_clnt_unref(clnt);`

			`+ if (reconnect_unref)`
			`+ rpc_clnt_unref(clnt);`
			`out:`
			`return 0;`
			`}`
			`@@ -830,7 +839,7 @@ rpc_clnt_handle_disconnect(struct rpc_clnt clnt, rpc_clnt_connection_t conn)`
			`pthread_mutex_lock(&conn->lock);`
			`{`
			`if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) {`
			`- ts.tv_sec = 10;`
			`+ ts.tv_sec = 3;`
			`ts.tv_nsec = 0;`

			`rpc_clnt_ref(clnt);`
			`diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c`
			`index 041946d..b3c4158 100644`
			`--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c`
			`+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c`
			`@@ -3364,6 +3364,25 @@ out:`
			`return ret;`
			`}`

			`+int`
			`+glusterd_is_path_mounted(const char *path)`
			`+{`
			`+ FILE *mtab = NULL;`
			`+ struct mntent *part = NULL;`
			`+ int is_mounted = 0;`
			`+`
			`+ if ((mtab = setmntent("/etc/mtab", "r")) != NULL) {`
			`+ while ((part = getmntent(mtab)) != NULL) {`
			`+ if ((part->mnt_fsname != NULL) &&`
			`+ (strcmp(part->mnt_dir, path)) == 0) {`
			`+ is_mounted = 1;`
			`+ break;`
			`+ }`
			`+ }`
			`+ endmntent(mtab);`
			`+ }`
			`+ return is_mounted;`
			`+}`
			`/* This function will do unmount for snaps.`
			`*/`
			`int32_t`
			`@@ -3388,14 +3407,11 @@ glusterd_snap_unmount(xlator_t this, glusterd_volinfo_t volinfo)`
			`continue;`
			`}`

			`- /* Fetch the brick mount path from the brickinfo->path */`
			`- ret = glusterd_get_brick_root(brickinfo->path, &brick_mount_path);`
			`+ ret = glusterd_find_brick_mount_path(brickinfo->path,`
			`+ &brick_mount_path);`
			`if (ret) {`
			`- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BRICK_PATH_UNMOUNTED,`
			`+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL,`
			`"Failed to find brick_mount_path for %s", brickinfo->path);`
			`- /* There is chance that brick path is already`
			`- * unmounted. */`
			`- ret = 0;`
			`goto out;`
			`}`
			`/* unmount cannot be done when the brick process is still in`
			`@@ -3440,6 +3456,10 @@ glusterd_umount(const char *path)`
			`GF_ASSERT(this);`
			`GF_ASSERT(path);`

			`+ if (!glusterd_is_path_mounted(path)) {`
			`+ return 0;`
			`+ }`
			`+`
			`runinit(&runner);`
			`snprintf(msg, sizeof(msg), "umount path %s", path);`
			`runner_add_args(&runner, _PATH_UMOUNT, "-f", path, NULL);`
			`--`
			`1.8.3.1`