456 lines
16 KiB
Diff
456 lines
16 KiB
Diff
From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001
|
|
From: N Balachandran <nbalacha@redhat.com>
|
|
Date: Fri, 9 Aug 2019 14:34:22 +0530
|
|
Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size
|
|
|
|
If the glusterfs fuse client process is unable to
|
|
process the invalidate requests quickly enough, the
|
|
number of such requests quickly grows large enough
|
|
to use a significant amount of memory.
|
|
We are now introducing another option to set an upper
|
|
limit on these to prevent runaway memory usage.
|
|
|
|
> Upstream https://review.gluster.org/23187
|
|
> Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788
|
|
> Fixes: bz#1732717
|
|
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
|
|
|
|
BUG: 1763208
|
|
Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4
|
|
Signed-off-by: Csaba Henk <csaba@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/187529
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
---
|
|
doc/mount.glusterfs.8 | 5 +++
|
|
glusterfsd/src/glusterfsd.c | 21 ++++++++++
|
|
glusterfsd/src/glusterfsd.h | 3 +-
|
|
libglusterfs/src/glusterfs/glusterfs.h | 1 +
|
|
libglusterfs/src/glusterfs/inode.h | 1 +
|
|
libglusterfs/src/inode.c | 31 +++++++++++----
|
|
xlators/mount/fuse/src/fuse-bridge.c | 60 ++++++++++++++++++++++-------
|
|
xlators/mount/fuse/src/fuse-bridge.h | 3 +-
|
|
xlators/mount/fuse/utils/mount.glusterfs.in | 7 ++++
|
|
9 files changed, 108 insertions(+), 24 deletions(-)
|
|
|
|
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
|
|
index 286631b..b35b362 100644
|
|
--- a/doc/mount.glusterfs.8
|
|
+++ b/doc/mount.glusterfs.8
|
|
@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None]
|
|
Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072]
|
|
.TP
|
|
.TP
|
|
+\fBinvalidate-limit=\fRN
|
|
+Suspend fuse invalidations implied by 'lru-limit' if number of outstanding
|
|
+invalidations reaches N
|
|
+.TP
|
|
+.TP
|
|
\fBbackground-qlen=\fRN
|
|
Set fuse module's background queue length to N [default: 64]
|
|
.TP
|
|
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
|
|
index 5b5e996..0856471 100644
|
|
--- a/glusterfsd/src/glusterfsd.c
|
|
+++ b/glusterfsd/src/glusterfsd.c
|
|
@@ -212,6 +212,9 @@ static struct argp_option gf_options[] = {
|
|
{"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
|
|
"Set fuse module's limit for number of inodes kept in LRU list to N "
|
|
"[default: 131072]"},
|
|
+ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
|
|
+ "Suspend inode invalidations implied by 'lru-limit' if the number of "
|
|
+ "outstanding invalidations reaches N"},
|
|
{"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
|
|
"Set fuse module's background queue length to N "
|
|
"[default: 64]"},
|
|
@@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
|
|
}
|
|
}
|
|
|
|
+ if (cmd_args->invalidate_limit >= 0) {
|
|
+ ret = dict_set_int32(options, "invalidate-limit",
|
|
+ cmd_args->invalidate_limit);
|
|
+ if (ret < 0) {
|
|
+ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
|
|
+ "invalidate-limit");
|
|
+ goto err;
|
|
+ }
|
|
+ }
|
|
+
|
|
if (cmd_args->background_qlen) {
|
|
ret = dict_set_int32(options, "background-qlen",
|
|
cmd_args->background_qlen);
|
|
@@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state)
|
|
argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
|
|
break;
|
|
|
|
+ case ARGP_FUSE_INVALIDATE_LIMIT_KEY:
|
|
+ if (!gf_string2int32(arg, &cmd_args->invalidate_limit))
|
|
+ break;
|
|
+
|
|
+ argp_failure(state, -1, 0, "unknown invalidate limit option %s",
|
|
+ arg);
|
|
+ break;
|
|
+
|
|
case ARGP_FUSE_BACKGROUND_QLEN_KEY:
|
|
if (!gf_string2int(arg, &cmd_args->background_qlen))
|
|
break;
|
|
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
|
|
index fa55789..ee655f0 100644
|
|
--- a/glusterfsd/src/glusterfsd.h
|
|
+++ b/glusterfsd/src/glusterfsd.h
|
|
@@ -111,7 +111,8 @@ enum argp_option_keys {
|
|
ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
|
|
ARGP_FUSE_LRU_LIMIT_KEY = 190,
|
|
ARGP_FUSE_AUTO_INVAL_KEY = 191,
|
|
- ARGP_BRICK_MUX_KEY = 192
|
|
+ ARGP_BRICK_MUX_KEY = 192,
|
|
+ ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195,
|
|
};
|
|
|
|
struct _gfd_vol_top_priv {
|
|
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
|
|
index 79c93ae..3b594c0 100644
|
|
--- a/libglusterfs/src/glusterfs/glusterfs.h
|
|
+++ b/libglusterfs/src/glusterfs/glusterfs.h
|
|
@@ -541,6 +541,7 @@ struct _cmd_args {
|
|
int client_pid_set;
|
|
unsigned uid_map_root;
|
|
int32_t lru_limit;
|
|
+ int32_t invalidate_limit;
|
|
int background_qlen;
|
|
int congestion_threshold;
|
|
char *fuse_mountopts;
|
|
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
|
|
index 52efdd8..4421c47 100644
|
|
--- a/libglusterfs/src/glusterfs/inode.h
|
|
+++ b/libglusterfs/src/glusterfs/inode.h
|
|
@@ -107,6 +107,7 @@ struct _inode {
|
|
struct list_head list; /* active/lru/purge */
|
|
|
|
struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
|
|
+ bool in_invalidate_list; /* Set if inode is in table invalidate list */
|
|
bool invalidate_sent; /* Set it if invalidator_fn is called for inode */
|
|
};
|
|
|
|
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
|
|
index 96ddea5..5331e93 100644
|
|
--- a/libglusterfs/src/inode.c
|
|
+++ b/libglusterfs/src/inode.c
|
|
@@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear)
|
|
|
|
this = THIS;
|
|
|
|
- if (clear && inode->invalidate_sent) {
|
|
- inode->invalidate_sent = false;
|
|
+ if (clear && inode->in_invalidate_list) {
|
|
+ inode->in_invalidate_list = false;
|
|
inode->table->invalidate_size--;
|
|
__inode_activate(inode);
|
|
}
|
|
@@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear)
|
|
inode->_ctx[index].ref--;
|
|
}
|
|
|
|
- if (!inode->ref && !inode->invalidate_sent) {
|
|
+ if (!inode->ref && !inode->in_invalidate_list) {
|
|
inode->table->active_size--;
|
|
|
|
nlookup = GF_ATOMIC_GET(inode->nlookup);
|
|
@@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate)
|
|
return inode;
|
|
|
|
if (!inode->ref) {
|
|
- if (inode->invalidate_sent) {
|
|
- inode->invalidate_sent = false;
|
|
+ if (inode->in_invalidate_list) {
|
|
+ inode->in_invalidate_list = false;
|
|
inode->table->invalidate_size--;
|
|
} else {
|
|
inode->table->lru_size--;
|
|
}
|
|
if (is_invalidate) {
|
|
- inode->invalidate_sent = true;
|
|
+ inode->in_invalidate_list = true;
|
|
inode->table->invalidate_size++;
|
|
list_move_tail(&inode->list, &inode->table->invalidate);
|
|
} else {
|
|
@@ -1609,6 +1609,7 @@ static int
|
|
inode_table_prune(inode_table_t *table)
|
|
{
|
|
int ret = 0;
|
|
+ int ret1 = 0;
|
|
struct list_head purge = {
|
|
0,
|
|
};
|
|
@@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table)
|
|
/* check for valid inode with 'nlookup' */
|
|
nlookup = GF_ATOMIC_GET(entry->nlookup);
|
|
if (nlookup) {
|
|
+ if (entry->invalidate_sent) {
|
|
+ list_move_tail(&entry->list, &table->lru);
|
|
+ continue;
|
|
+ }
|
|
__inode_ref(entry, true);
|
|
tmp = entry;
|
|
break;
|
|
@@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table)
|
|
if (tmp) {
|
|
xlator_t *old_THIS = THIS;
|
|
THIS = table->invalidator_xl;
|
|
- table->invalidator_fn(table->invalidator_xl, tmp);
|
|
+ ret1 = table->invalidator_fn(table->invalidator_xl, tmp);
|
|
THIS = old_THIS;
|
|
- inode_unref(tmp);
|
|
+ pthread_mutex_lock(&table->lock);
|
|
+ {
|
|
+ if (!ret1) {
|
|
+ tmp->invalidate_sent = true;
|
|
+ __inode_unref(tmp, false);
|
|
+ } else {
|
|
+ /* Move this back to the lru list*/
|
|
+ __inode_unref(tmp, true);
|
|
+ }
|
|
+ }
|
|
+ pthread_mutex_unlock(&table->lock);
|
|
}
|
|
|
|
/* Just so that if purge list is handled too, then clear it off */
|
|
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
|
|
index 1c946a2..8b2e7f0 100644
|
|
--- a/xlators/mount/fuse/src/fuse-bridge.c
|
|
+++ b/xlators/mount/fuse/src/fuse-bridge.c
|
|
@@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log;
|
|
void
|
|
fini(xlator_t *this_xl);
|
|
|
|
-static void
|
|
+static int32_t
|
|
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
|
|
|
|
/*
|
|
@@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
|
|
#define send_fuse_obj(this, finh, obj) \
|
|
send_fuse_data(this, finh, obj, sizeof(*(obj)))
|
|
|
|
-static void
|
|
+static int32_t
|
|
fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
|
|
{
|
|
#if FUSE_KERNEL_MINOR_VERSION >= 11
|
|
@@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
|
|
|
|
priv = this->private;
|
|
if (!priv->reverse_fuse_thread_started)
|
|
- return;
|
|
+ return -1;
|
|
+
|
|
+ if (priv->invalidate_limit &&
|
|
+ (priv->invalidate_count >= priv->invalidate_limit)) {
|
|
+ return -1;
|
|
+ }
|
|
|
|
inode = (inode_t *)(unsigned long)fuse_ino;
|
|
if (inode == NULL)
|
|
- return;
|
|
+ return -1;
|
|
|
|
list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)
|
|
{
|
|
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
|
|
if (node == NULL)
|
|
- break;
|
|
+ return -1;
|
|
|
|
INIT_LIST_HEAD(&node->next);
|
|
|
|
@@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
|
|
pthread_mutex_lock(&priv->invalidate_mutex);
|
|
{
|
|
list_add_tail(&node->next, &priv->invalidate_list);
|
|
+ priv->invalidate_count++;
|
|
pthread_cond_signal(&priv->invalidate_cond);
|
|
}
|
|
pthread_mutex_unlock(&priv->invalidate_mutex);
|
|
}
|
|
|
|
#endif
|
|
- return;
|
|
+ return 0;
|
|
}
|
|
|
|
/*
|
|
* Send an inval inode notification to fuse. This causes an invalidation of the
|
|
* entire page cache mapping on the inode.
|
|
*/
|
|
-static void
|
|
+static int32_t
|
|
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
|
|
{
|
|
#if FUSE_KERNEL_MINOR_VERSION >= 11
|
|
@@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
|
|
priv = this->private;
|
|
|
|
if (!priv->reverse_fuse_thread_started)
|
|
- return;
|
|
+ return -1;
|
|
+
|
|
+ if (priv->invalidate_limit &&
|
|
+ (priv->invalidate_count >= priv->invalidate_limit)) {
|
|
+ return -1;
|
|
+ }
|
|
|
|
inode = (inode_t *)(unsigned long)fuse_ino;
|
|
if (inode == NULL)
|
|
- return;
|
|
+ return -1;
|
|
|
|
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
|
|
if (node == NULL)
|
|
- return;
|
|
+ return -1;
|
|
|
|
INIT_LIST_HEAD(&node->next);
|
|
|
|
@@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
|
|
pthread_mutex_lock(&priv->invalidate_mutex);
|
|
{
|
|
list_add_tail(&node->next, &priv->invalidate_list);
|
|
+ priv->invalidate_count++;
|
|
pthread_cond_signal(&priv->invalidate_cond);
|
|
}
|
|
pthread_mutex_unlock(&priv->invalidate_mutex);
|
|
@@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
|
|
gf_log("glusterfs-fuse", GF_LOG_WARNING,
|
|
"fuse_invalidate_inode not implemented on this system");
|
|
#endif
|
|
- return;
|
|
+ return 0;
|
|
}
|
|
|
|
#if FUSE_KERNEL_MINOR_VERSION >= 11
|
|
@@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
|
|
static int32_t
|
|
fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
|
|
{
|
|
- fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
|
|
- return 0;
|
|
+ int32_t ret = 0;
|
|
+ ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
|
|
+ return ret;
|
|
}
|
|
#endif
|
|
|
|
@@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
|
|
gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64,
|
|
finh->nodeid);
|
|
#if FUSE_KERNEL_MINOR_VERSION >= 11
|
|
- fuse_invalidate_entry(this, finh->nodeid);
|
|
+ ret = fuse_invalidate_entry(this, finh->nodeid);
|
|
+ if (ret)
|
|
+ op_errno = EBUSY;
|
|
#endif
|
|
goto done;
|
|
}
|
|
@@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data)
|
|
fuse_invalidate_node_t, next);
|
|
|
|
list_del_init(&node->next);
|
|
+ priv->invalidate_count--;
|
|
}
|
|
pthread_mutex_unlock(&priv->invalidate_mutex);
|
|
|
|
@@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data)
|
|
list_del_init(&node->next);
|
|
GF_FREE(node);
|
|
}
|
|
+ priv->invalidate_count = 0;
|
|
}
|
|
pthread_mutex_unlock(&priv->invalidate_mutex);
|
|
|
|
@@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this)
|
|
(int)private->timed_response_fuse_thread_started);
|
|
gf_proc_dump_write("reverse_thread_started", "%d",
|
|
(int)private->reverse_fuse_thread_started);
|
|
+ gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit);
|
|
+ gf_proc_dump_write("invalidate_queue_length", "%" PRIu64,
|
|
+ private->invalidate_count);
|
|
gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
|
|
|
|
return 0;
|
|
@@ -6619,6 +6639,9 @@ init(xlator_t *this_xl)
|
|
|
|
GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
|
|
|
|
+ GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32,
|
|
+ cleanup_exit);
|
|
+
|
|
GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);
|
|
|
|
GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit);
|
|
@@ -6955,6 +6978,15 @@ struct volume_options options[] = {
|
|
"reaching this limit (0 means 'unlimited')",
|
|
},
|
|
{
|
|
+ .key = {"invalidate-limit"},
|
|
+ .type = GF_OPTION_TYPE_INT,
|
|
+ .default_value = "0",
|
|
+ .min = 0,
|
|
+ .description = "suspend invalidations as of 'lru-limit' if the number "
|
|
+ "of outstanding invalidations reaches this limit "
|
|
+ "(0 means 'unlimited')",
|
|
+ },
|
|
+ {
|
|
.key = {"auto-invalidation"},
|
|
.type = GF_OPTION_TYPE_BOOL,
|
|
.default_value = "true",
|
|
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
|
|
index 697bd88..2311582 100644
|
|
--- a/xlators/mount/fuse/src/fuse-bridge.h
|
|
+++ b/xlators/mount/fuse/src/fuse-bridge.h
|
|
@@ -139,7 +139,7 @@ struct fuse_private {
|
|
pthread_cond_t invalidate_cond;
|
|
pthread_mutex_t invalidate_mutex;
|
|
gf_boolean_t reverse_fuse_thread_started;
|
|
-
|
|
+ uint64_t invalidate_count;
|
|
/* For communicating with separate mount thread. */
|
|
int status_pipe[2];
|
|
|
|
@@ -191,6 +191,7 @@ struct fuse_private {
|
|
|
|
/* LRU Limit, if not set, default is 128k for now */
|
|
uint32_t lru_limit;
|
|
+ uint32_t invalidate_limit;
|
|
};
|
|
typedef struct fuse_private fuse_private_t;
|
|
|
|
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
|
|
index cbde42d..61d7422 100755
|
|
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
|
|
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
|
|
@@ -257,6 +257,10 @@ start_glusterfs ()
|
|
cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit");
|
|
fi
|
|
|
|
+ if [ -n "$invalidate_limit" ]; then
|
|
+ cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit");
|
|
+ fi
|
|
+
|
|
if [ -n "$bg_qlen" ]; then
|
|
cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
|
|
fi
|
|
@@ -505,6 +509,9 @@ with_options()
|
|
"lru-limit")
|
|
lru_limit=$value
|
|
;;
|
|
+ "invalidate-limit")
|
|
+ invalidate_limit=$value
|
|
+ ;;
|
|
"background-qlen")
|
|
bg_qlen=$value
|
|
;;
|
|
--
|
|
1.8.3.1
|
|
|