glusterfs/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch

500 lines
16 KiB
Diff

From 17a9ce965ef2fec9ee5c8e4b76981bb7cbcf1352 Mon Sep 17 00:00:00 2001
From: mohit84 <moagrawa@redhat.com>
Date: Mon, 9 Nov 2020 17:15:42 +0530
Subject: [PATCH 506/511] posix: Attach a posix_spawn_disk_thread with
glusterfs_ctx (#1595)
Currently posix xlator spawns posix_disk_space_threads per brick and in
case of brick_mux environment while glusterd attached bricks at maximum
level(250) with a single brick process in that case 250 threads are
spawned for all bricks and brick process memory size also increased.
Solution: Attach a posix_disk_space thread with glusterfs_ctx to
spawn a thread per process basis instead of spawning a per brick
> Fixes: #1482
> Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7
> Cherry-picked from commit 3f93be77e1acf5baacafa97a320e91e6879d1c0e
> Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1482
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7
Bug: 1898776
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/220366
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
glusterfsd/src/glusterfsd.c | 4 +
libglusterfs/src/glusterfs/glusterfs.h | 6 ++
xlators/storage/posix/src/posix-common.c | 68 +++++++++++--
xlators/storage/posix/src/posix-handle.h | 3 +-
xlators/storage/posix/src/posix-helpers.c | 131 ++++++++++++++-----------
xlators/storage/posix/src/posix-inode-fd-ops.c | 3 +-
xlators/storage/posix/src/posix-mem-types.h | 1 +
xlators/storage/posix/src/posix.h | 12 ++-
8 files changed, 160 insertions(+), 68 deletions(-)
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 955bf1d..ac25255 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1840,9 +1840,13 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
INIT_LIST_HEAD(&cmd_args->xlator_options);
INIT_LIST_HEAD(&cmd_args->volfile_servers);
ctx->pxl_count = 0;
+ ctx->diskxl_count = 0;
pthread_mutex_init(&ctx->fd_lock, NULL);
pthread_cond_init(&ctx->fd_cond, NULL);
INIT_LIST_HEAD(&ctx->janitor_fds);
+ pthread_mutex_init(&ctx->xl_lock, NULL);
+ pthread_cond_init(&ctx->xl_cond, NULL);
+ INIT_LIST_HEAD(&ctx->diskth_xl);
lim.rlim_cur = RLIM_INFINITY;
lim.rlim_max = RLIM_INFINITY;
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
index bf6a987..d3400bf 100644
--- a/libglusterfs/src/glusterfs/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -740,7 +740,13 @@ struct _glusterfs_ctx {
pthread_t janitor;
/* The variable is use to save total posix xlator count */
uint32_t pxl_count;
+ uint32_t diskxl_count;
+ /* List of posix xlator use by disk thread*/
+ struct list_head diskth_xl;
+ pthread_mutex_t xl_lock;
+ pthread_cond_t xl_cond;
+ pthread_t disk_space_check;
char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
};
typedef struct _glusterfs_ctx glusterfs_ctx_t;
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
index e5c6e62..2c9030b 100644
--- a/xlators/storage/posix/src/posix-common.c
+++ b/xlators/storage/posix/src/posix-common.c
@@ -138,6 +138,36 @@ posix_inode(xlator_t *this)
return 0;
}
+static void
+delete_posix_diskxl(xlator_t *this)
+{
+ struct posix_private *priv = this->private;
+ struct posix_diskxl *pxl = priv->pxl;
+ glusterfs_ctx_t *ctx = this->ctx;
+ uint32_t count = 1;
+
+ if (pxl) {
+ pthread_mutex_lock(&ctx->xl_lock);
+ {
+ pxl->detach_notify = _gf_true;
+ while (pxl->is_use)
+ pthread_cond_wait(&pxl->cond, &ctx->xl_lock);
+ list_del_init(&pxl->list);
+ priv->pxl = NULL;
+ count = --ctx->diskxl_count;
+ if (count == 0)
+ pthread_cond_signal(&ctx->xl_cond);
+ }
+ pthread_mutex_unlock(&ctx->xl_lock);
+ pthread_cond_destroy(&pxl->cond);
+ GF_FREE(pxl);
+ if (count == 0) {
+ pthread_join(ctx->disk_space_check, NULL);
+ ctx->disk_space_check = 0;
+ }
+ }
+}
+
/**
* notify - when parent sends PARENT_UP, send CHILD_UP event from here
*/
@@ -194,6 +224,8 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
}
pthread_mutex_unlock(&ctx->fd_lock);
+ delete_posix_diskxl(this);
+
gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
victim->name);
default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
@@ -318,6 +350,7 @@ posix_reconfigure(xlator_t *this, dict_t *options)
int32_t force_directory_mode = -1;
int32_t create_mask = -1;
int32_t create_directory_mask = -1;
+ double old_disk_reserve = 0.0;
priv = this->private;
@@ -383,6 +416,7 @@ posix_reconfigure(xlator_t *this, dict_t *options)
" fallback to <hostname>:<export>");
}
+ old_disk_reserve = priv->disk_reserve;
GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size,
out);
/* option can be any one of percent or bytes */
@@ -390,11 +424,19 @@ posix_reconfigure(xlator_t *this, dict_t *options)
if (priv->disk_reserve < 100.0)
priv->disk_unit = 'p';
- if (priv->disk_reserve) {
+ /* Delete a pxl object from a list of disk_reserve while something
+ is changed for reserve option during graph reconfigure
+ */
+ if (old_disk_reserve != priv->disk_reserve) {
+ delete_posix_diskxl(this);
+ old_disk_reserve = 0;
+ }
+
+ if (!old_disk_reserve && priv->disk_reserve) {
ret = posix_spawn_disk_space_check_thread(this);
if (ret) {
gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
- "Getting disk space check from thread failed");
+ "Getting disk space check from thread failed ");
goto out;
}
}
@@ -1008,13 +1050,13 @@ posix_init(xlator_t *this)
" fallback to <hostname>:<export>");
}
- _private->disk_space_check_active = _gf_false;
_private->disk_space_full = 0;
GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out);
/* option can be any one of percent or bytes */
_private->disk_unit = 0;
+ pthread_cond_init(&_private->fd_cond, NULL);
if (_private->disk_reserve < 100.0)
_private->disk_unit = 'p';
@@ -1162,12 +1204,6 @@ posix_fini(xlator_t *this)
priv->health_check = 0;
}
- if (priv->disk_space_check) {
- priv->disk_space_check_active = _gf_false;
- (void)gf_thread_cleanup_xint(priv->disk_space_check);
- priv->disk_space_check = 0;
- }
-
if (priv->janitor) {
/*TODO: Make sure the synctask is also complete */
ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
@@ -1192,10 +1228,24 @@ posix_fini(xlator_t *this)
pthread_join(ctx->janitor, NULL);
}
+ pthread_mutex_lock(&ctx->xl_lock);
+ {
+ count = --ctx->diskxl_count;
+ if (count == 0)
+ pthread_cond_signal(&ctx->xl_cond);
+ }
+ pthread_mutex_unlock(&ctx->xl_lock);
+
+ if (count == 0) {
+ pthread_join(ctx->disk_space_check, NULL);
+ ctx->disk_space_check = 0;
+ }
+
if (priv->fsyncer) {
(void)gf_thread_cleanup_xint(priv->fsyncer);
priv->fsyncer = 0;
}
+
/*unlock brick dir*/
if (priv->mount_lock)
(void)sys_closedir(priv->mount_lock);
diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h
index c4d7cb1..8e4c719 100644
--- a/xlators/storage/posix/src/posix-handle.h
+++ b/xlators/storage/posix/src/posix-handle.h
@@ -206,5 +206,6 @@ int
posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata);
void
-posix_disk_space_check(xlator_t *this);
+posix_disk_space_check(struct posix_private* priv);
+
#endif /* !_POSIX_HANDLE_H */
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index ceac52a..110d383 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -2284,9 +2284,8 @@ unlock:
}
void
-posix_disk_space_check(xlator_t *this)
+posix_disk_space_check(struct posix_private *priv)
{
- struct posix_private *priv = NULL;
char *subvol_path = NULL;
int op_ret = 0;
double size = 0;
@@ -2295,16 +2294,14 @@ posix_disk_space_check(xlator_t *this)
double totsz = 0;
double freesz = 0;
- GF_VALIDATE_OR_GOTO(this->name, this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
+ GF_VALIDATE_OR_GOTO("posix-helpers", priv, out);
subvol_path = priv->base_path;
op_ret = sys_statvfs(subvol_path, &buf);
if (op_ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
+ gf_msg("posix-disk", GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
"statvfs failed on %s", subvol_path);
goto out;
}
@@ -2328,78 +2325,102 @@ out:
}
static void *
-posix_disk_space_check_thread_proc(void *data)
+posix_ctx_disk_thread_proc(void *data)
{
- xlator_t *this = NULL;
struct posix_private *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
uint32_t interval = 0;
- int ret = -1;
-
- this = data;
- priv = this->private;
+ struct posix_diskxl *pthis = NULL;
+ xlator_t *this = NULL;
+ struct timespec sleep_till = {
+ 0,
+ };
+ ctx = data;
interval = 5;
- gf_msg_debug(this->name, 0,
- "disk-space thread started, "
+
+ gf_msg_debug("glusterfs_ctx", 0,
+ "Ctx disk-space thread started, "
"interval = %d seconds",
interval);
- while (1) {
- /* aborting sleep() is a request to exit this thread, sleep()
- * will normally not return when cancelled */
- ret = sleep(interval);
- if (ret > 0)
- break;
- /* prevent thread errors while doing the health-check(s) */
- pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
-
- /* Do the disk-check.*/
- posix_disk_space_check(this);
- if (!priv->disk_space_check_active)
- goto out;
- pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
- }
-out:
- gf_msg_debug(this->name, 0, "disk space check thread exiting");
- LOCK(&priv->lock);
+ pthread_mutex_lock(&ctx->xl_lock);
{
- priv->disk_space_check_active = _gf_false;
+ while (ctx->diskxl_count > 0) {
+ list_for_each_entry(pthis, &ctx->diskth_xl, list)
+ {
+ pthis->is_use = _gf_true;
+ pthread_mutex_unlock(&ctx->xl_lock);
+
+ THIS = this = pthis->xl;
+ priv = this->private;
+
+ posix_disk_space_check(priv);
+
+ pthread_mutex_lock(&ctx->xl_lock);
+ pthis->is_use = _gf_false;
+ /* Send a signal to posix_notify function */
+ if (pthis->detach_notify)
+ pthread_cond_signal(&pthis->cond);
+ }
+
+ timespec_now_realtime(&sleep_till);
+ sleep_till.tv_sec += 5;
+ (void)pthread_cond_timedwait(&ctx->xl_cond, &ctx->xl_lock,
+ &sleep_till);
+ }
}
- UNLOCK(&priv->lock);
+ pthread_mutex_unlock(&ctx->xl_lock);
return NULL;
}
int
-posix_spawn_disk_space_check_thread(xlator_t *xl)
+posix_spawn_disk_space_check_thread(xlator_t *this)
{
- struct posix_private *priv = NULL;
- int ret = -1;
+ int ret = 0;
+ glusterfs_ctx_t *ctx = this->ctx;
+ struct posix_diskxl *pxl = NULL;
+ struct posix_private *priv = this->private;
- priv = xl->private;
+ pxl = GF_CALLOC(1, sizeof(struct posix_diskxl), gf_posix_mt_diskxl_t);
+ if (!pxl) {
+ ret = -ENOMEM;
+ gf_log(this->name, GF_LOG_ERROR,
+ "Calloc is failed to allocate "
+ "memory for diskxl object");
+ goto out;
+ }
+ pthread_cond_init(&pxl->cond, NULL);
- LOCK(&priv->lock);
+ pthread_mutex_lock(&ctx->xl_lock);
{
- /* cancel the running thread */
- if (priv->disk_space_check_active == _gf_true) {
- pthread_cancel(priv->disk_space_check);
- priv->disk_space_check_active = _gf_false;
- }
+ if (ctx->diskxl_count++ == 0) {
+ ret = gf_thread_create(&ctx->disk_space_check, NULL,
+ posix_ctx_disk_thread_proc, ctx,
+ "posixctxres");
- ret = gf_thread_create(&priv->disk_space_check, NULL,
- posix_disk_space_check_thread_proc, xl,
- "posix_reserve");
- if (ret) {
- priv->disk_space_check_active = _gf_false;
- gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED,
- "unable to setup disk space check thread");
- goto unlock;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
+ "spawning disk space check thread failed");
+ ctx->diskxl_count--;
+ pthread_mutex_unlock(&ctx->xl_lock);
+ goto out;
+ }
}
+ pxl->xl = this;
+ priv->pxl = (void *)pxl;
+ list_add_tail(&pxl->list, &ctx->diskth_xl);
+ }
+ pthread_mutex_unlock(&ctx->xl_lock);
- priv->disk_space_check_active = _gf_true;
+out:
+ if (ret) {
+ if (pxl) {
+ pthread_cond_destroy(&pxl->cond);
+ GF_FREE(pxl);
+ }
}
-unlock:
- UNLOCK(&priv->lock);
return ret;
}
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
index 1d37aed..761e018 100644
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -37,6 +37,7 @@
#include <fcntl.h>
#endif /* HAVE_LINKAT */
+#include "posix-handle.h"
#include <glusterfs/glusterfs.h>
#include <glusterfs/checksum.h>
#include <glusterfs/dict.h>
@@ -713,7 +714,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
option behaviour
*/
if (priv->disk_reserve)
- posix_disk_space_check(this);
+ posix_disk_space_check(priv);
DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock);
diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
index 2253f38..bb4c56d 100644
--- a/xlators/storage/posix/src/posix-mem-types.h
+++ b/xlators/storage/posix/src/posix-mem-types.h
@@ -20,6 +20,7 @@ enum gf_posix_mem_types_ {
gf_posix_mt_paiocb,
gf_posix_mt_inode_ctx_t,
gf_posix_mt_mdata_attr,
+ gf_posix_mt_diskxl_t,
gf_posix_mt_end
};
#endif
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 07f367b..4be979c 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -36,7 +36,6 @@
#include <glusterfs/compat.h>
#include <glusterfs/timer.h>
#include "posix-mem-types.h"
-#include "posix-handle.h"
#include <glusterfs/call-stub.h>
#ifdef HAVE_LIBAIO
@@ -138,6 +137,14 @@ struct posix_fd {
char _pad[4]; /* manual padding */
};
+struct posix_diskxl {
+ pthread_cond_t cond;
+ struct list_head list;
+ xlator_t *xl;
+ gf_boolean_t detach_notify;
+ gf_boolean_t is_use;
+};
+
struct posix_private {
char *base_path;
int32_t base_path_length;
@@ -207,6 +214,7 @@ struct posix_private {
pthread_mutex_t janitor_mutex;
pthread_cond_t janitor_cond;
pthread_cond_t fd_cond;
+ pthread_cond_t disk_cond;
int fsync_queue_count;
enum {
@@ -233,7 +241,6 @@ struct posix_private {
char disk_unit;
uint32_t disk_space_full;
pthread_t disk_space_check;
- gf_boolean_t disk_space_check_active;
#ifdef GF_DARWIN_HOST_OS
enum {
@@ -263,6 +270,7 @@ struct posix_private {
gf_boolean_t ctime;
gf_boolean_t janitor_task_stop;
uint32_t rel_fdcount;
+ void *pxl;
};
typedef struct {
--
1.8.3.1