From 4d95e271a9042bf2d789a4d900ad263b6ea47681 Mon Sep 17 00:00:00 2001 From: Mohammed Rafi KC <rkavunga@redhat.com> Date: Wed, 23 Jan 2019 21:55:01 +0530 Subject: [PATCH 100/124] clnt/rpc: ref leak during disconnect. During disconnect cleanup, we are not cancelling reconnect timer, which causes a ref leak each time when a disconnect happen. Backport of: https://review.gluster.org/#/c/glusterfs/+/22087/ >Change-Id: I9d05d1f368d080e04836bf6a0bb018bf8f7b5b8a >updates: bz#1659708 >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> Change-Id: I5a2dbb17e663a4809bb4c435cacadbf0ab694a76 BUG: 1471742 Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> Reviewed-on: https://code.engineering.redhat.com/gerrit/167844 Tested-by: RHGS Build Bot <nigelb@redhat.com> Reviewed-by: Atin Mukherjee <amukherj@redhat.com> --- libglusterfs/src/timer.c | 16 +++++++---- rpc/rpc-lib/src/rpc-clnt.c | 11 +++++++- .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 32 ++++++++++++++++++---- 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c index d882543..2643c07 100644 --- a/libglusterfs/src/timer.c +++ b/libglusterfs/src/timer.c @@ -75,13 +75,13 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event) if (ctx == NULL || event == NULL) { gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "invalid argument"); - return 0; + return -1; } if (ctx->cleanup_started) { gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED, "ctx cleanup started"); - return 0; + return -1; } LOCK(&ctx->lock); @@ -93,10 +93,9 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event) if (!reg) { /* This can happen when cleanup may have just started and * gf_timer_registry_destroy() sets ctx->timer to NULL. - * Just bail out as success as gf_timer_proc() takes - * care of cleaning up the events. + * gf_timer_proc() takes care of cleaning up the events. */ - return 0; + return -1; } LOCK(®->lock); @@ -203,6 +202,13 @@ gf_timer_proc(void *data) list_for_each_entry_safe(event, tmp, ®->active, list) { list_del(&event->list); + /* TODO Possible resource leak + * Before freeing the event, we need to call the respective + * event functions and free any resources. + * For example, In case of rpc_clnt_reconnect, we need to + * unref rpc object which was taken when added to timer + * wheel. + */ GF_FREE(event); } } diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index 3f7bb3c..6f47515 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -495,6 +495,7 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn) int unref = 0; int ret = 0; gf_boolean_t timer_unref = _gf_false; + gf_boolean_t reconnect_unref = _gf_false; if (!conn) { goto out; @@ -514,6 +515,12 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn) timer_unref = _gf_true; conn->timer = NULL; } + if (conn->reconnect) { + ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect); + if (!ret) + reconnect_unref = _gf_true; + conn->reconnect = NULL; + } conn->connected = 0; conn->disconnected = 1; @@ -533,6 +540,8 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn) if (timer_unref) rpc_clnt_unref(clnt); + if (reconnect_unref) + rpc_clnt_unref(clnt); out: return 0; } @@ -830,7 +839,7 @@ rpc_clnt_handle_disconnect(struct rpc_clnt *clnt, rpc_clnt_connection_t *conn) pthread_mutex_lock(&conn->lock); { if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) { - ts.tv_sec = 10; + ts.tv_sec = 3; ts.tv_nsec = 0; rpc_clnt_ref(clnt); diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c index 041946d..b3c4158 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c @@ -3364,6 +3364,25 @@ out: return ret; } +int +glusterd_is_path_mounted(const char *path) +{ + FILE *mtab = NULL; + struct mntent *part = NULL; + int is_mounted = 0; + + if ((mtab = setmntent("/etc/mtab", "r")) != NULL) { + while ((part = getmntent(mtab)) != NULL) { + if ((part->mnt_fsname != NULL) && + (strcmp(part->mnt_dir, path)) == 0) { + is_mounted = 1; + break; + } + } + endmntent(mtab); + } + return is_mounted; +} /* This function will do unmount for snaps. */ int32_t @@ -3388,14 +3407,11 @@ glusterd_snap_unmount(xlator_t *this, glusterd_volinfo_t *volinfo) continue; } - /* Fetch the brick mount path from the brickinfo->path */ - ret = glusterd_get_brick_root(brickinfo->path, &brick_mount_path); + ret = glusterd_find_brick_mount_path(brickinfo->path, + &brick_mount_path); if (ret) { - gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BRICK_PATH_UNMOUNTED, + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL, "Failed to find brick_mount_path for %s", brickinfo->path); - /* There is chance that brick path is already - * unmounted. */ - ret = 0; goto out; } /* unmount cannot be done when the brick process is still in @@ -3440,6 +3456,10 @@ glusterd_umount(const char *path) GF_ASSERT(this); GF_ASSERT(path); + if (!glusterd_is_path_mounted(path)) { + return 0; + } + runinit(&runner); snprintf(msg, sizeof(msg), "umount path %s", path); runner_add_args(&runner, _PATH_UMOUNT, "-f", path, NULL); -- 1.8.3.1