glusterfs/0441-features-shard-Make-lru-limit-of-inode-list-configur.patch
Milind Changire b7dd6f45c1 autobuild v3.12.2-26
Resolves: bz#1479446 bz#1520882 bz#1579758 bz#1598407 bz#1599808
Resolves: bz#1603118 bz#1619357 bz#1622001 bz#1622308 bz#1631166
Resolves: bz#1631418 bz#1632563 bz#1634649 bz#1635071 bz#1635100
Resolves: bz#1635136 bz#1636291 bz#1638069 bz#1640347 bz#1642854
Resolves: bz#1643035 bz#1644120 bz#1644279 bz#1645916 bz#1647675
Signed-off-by: Milind Changire <mchangir@redhat.com>
2018-11-08 22:30:35 -05:00

202 lines
8.1 KiB
Diff

From 7b12a7ea7a6b4945ad52f218b187ca440dfbef63 Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Fri, 20 Jul 2018 10:52:22 +0530
Subject: [PATCH 441/444] features/shard: Make lru limit of inode list
configurable
> Upstream: https://review.gluster.org/20544
> BUG: 1605056
> Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295
Currently this lru limit is hard-coded to 16384. This patch makes it
configurable to make it easier to hit the lru limit and enable testing
of different cases that arise when the limit is reached.
The option is features.shard-lru-limit. It is by design allowed to
be configured only in init() but not in reconfigure(). This is to avoid
all the complexity associated with eviction of least recently used shards
when the list is shrunk.
Change-Id: Ifdcc2099f634314fafe8444e2d676e192e89e295
BUG: 1603118
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/155126
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
libglusterfs/src/globals.h | 4 ++-
tests/bugs/shard/configure-lru-limit.t | 48 +++++++++++++++++++++++++
xlators/features/shard/src/shard.c | 19 ++++++++--
xlators/features/shard/src/shard.h | 3 +-
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 ++++
5 files changed, 75 insertions(+), 5 deletions(-)
create mode 100644 tests/bugs/shard/configure-lru-limit.t
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index 97c4fad..555f44b 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -43,7 +43,7 @@
*/
#define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly
should not change */
-#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_3 /* MAX VERSION is the maximum
+#define GD_OP_VERSION_MAX GD_OP_VERSION_4_2_0 /* MAX VERSION is the maximum
count in VME table, should
keep changing with
introduction of newer
@@ -111,6 +111,8 @@
#define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */
+#define GD_OP_VERSION_4_2_0 40200 /* Op-version for GlusterFS 4.2.0 */
+
/* Downstream only change */
#define GD_OP_VERSION_3_11_2 31102 /* Op-version for RHGS 3.3.1-async */
#define GD_OP_VERSION_3_13_3 31303 /* Op-version for RHGS-3.4-Batch Update-1*/
diff --git a/tests/bugs/shard/configure-lru-limit.t b/tests/bugs/shard/configure-lru-limit.t
new file mode 100644
index 0000000..a8ba8ed
--- /dev/null
+++ b/tests/bugs/shard/configure-lru-limit.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Perform a write that would cause 25 shards to be created, 24 of them under .shard
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=100
+
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+
+# Base shard is never added to this list. So all other shards should make up for 24 inodes in lru list
+EXPECT "24" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1)
+
+rm -f $statedump
+
+# Test to ensure there's no "reconfiguration" of the value once set.
+TEST $CLI volume set $V0 features.shard-lru-limit 30
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "30" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 6066a54..eb32168 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -668,7 +668,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
shard_inode_ctx_get (linked_inode, this, &ctx);
if (list_empty (&ctx->ilist)) {
- if (priv->inode_count + 1 <= SHARD_MAX_INODES) {
+ if (priv->inode_count + 1 <= priv->lru_limit) {
/* If this inode was linked here for the first time (indicated
* by empty list), and if there is still space in the priv list,
* add this ctx to the tail of the list.
@@ -6690,6 +6690,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("shard-deletion-rate", priv->deletion_rate, uint32, out);
+ GF_OPTION_INIT ("shard-lru-limit", priv->lru_limit, uint64, out);
+
this->local_pool = mem_pool_new (shard_local_t, 128);
if (!this->local_pool) {
ret = -1;
@@ -6808,7 +6810,7 @@ shard_priv_dump (xlator_t *this)
gf_uint64_2human_readable (priv->block_size));
gf_proc_dump_write ("inode-count", "%d", priv->inode_count);
gf_proc_dump_write ("ilist_head", "%p", &priv->ilist_head);
- gf_proc_dump_write ("lru-max-limit", "%d", SHARD_MAX_INODES);
+ gf_proc_dump_write ("lru-max-limit", "%d", priv->lru_limit);
return 0;
}
@@ -6877,5 +6879,18 @@ struct volume_options options[] = {
.max = INT_MAX,
.description = "The number of shards to send deletes on at a time",
},
+ { .key = {"shard-lru-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "16384",
+ .min = 20,
+ .max = INT_MAX,
+ .description = "The number of resolved shard inodes to keep in "
+ "memory. A higher number means shards that are "
+ "resolved will remain in memory longer, avoiding "
+ "frequent lookups on them when they participate in "
+ "file operations. The option also has a bearing on "
+ "amount of memory consumed by these inodes and their "
+ "internal metadata",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index 5de098a..ac3813c 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -23,8 +23,6 @@
#define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB)
#define SHARD_XATTR_PREFIX "trusted.glusterfs.shard."
#define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size"
-#define SHARD_INODE_LRU_LIMIT 4096
-#define SHARD_MAX_INODES 16384
/**
* Bit masks for the valid flag, which is used while updating ctx
**/
@@ -216,6 +214,7 @@ typedef struct shard_priv {
struct list_head ilist_head;
uint32_t deletion_rate;
shard_first_lookup_state_t first_lookup;
+ uint64_t lru_limit;
} shard_priv_t;
typedef struct {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index a825f52..d442fe0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -3298,6 +3298,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_7_0,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .key = "features.shard-lru-limit",
+ .voltype = "features/shard",
+ .op_version = GD_OP_VERSION_4_2_0,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .type = NO_DOC,
+ },
{ .key = "features.shard-deletion-rate",
.voltype = "features/shard",
.op_version = GD_OP_VERSION_3_13_4,
--
1.8.3.1