2021-08-19 07:53:56 +00:00
|
|
|
From f7f4b71d559dc6950bc795742f64e8eaeeadf3ec Mon Sep 17 00:00:00 2001
|
2021-07-26 18:44:01 +00:00
|
|
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
|
|
|
Date: Mon, 26 Jul 2021 16:30:26 +0100
|
|
|
|
Subject: [PATCH] cache: Add cache-min-block-size parameter
|
|
|
|
|
|
|
|
This allows you to choose a larger block size. I found experimentally
|
|
|
|
that this improves performance because of locality in access patterns.
|
|
|
|
The idea came from qcow2 which implicitly does the same thing because
|
|
|
|
of the relatively large cluster size (32K).
|
|
|
|
|
|
|
|
nbdkit + cache-filter with 4K block size + cache-on-read + curl
|
|
|
|
(to a very slow remote site):
|
|
|
|
=> virt-inspector took 22 mins
|
|
|
|
|
|
|
|
same with 64K block size:
|
|
|
|
=> virt-inspector took 19 mins
|
|
|
|
|
|
|
|
However compared to a qcow2 file using qemu's copy-on-read, backed
|
|
|
|
with nbdkit + curl we are still a lot slower, possibly because having
|
|
|
|
the cache inside virt-inspector greatly reduces round trip overhead:
|
|
|
|
=> virt-inspector took 13 mins
|
|
|
|
|
|
|
|
(cherry picked from commit 4ceacb6caa64e12bd78af5f90e86ee591e055944)
|
|
|
|
---
|
|
|
|
filters/cache/blk.c | 2 +-
|
|
|
|
filters/cache/cache.c | 36 ++++++++++----
|
|
|
|
filters/cache/cache.h | 3 ++
|
|
|
|
filters/cache/nbdkit-cache-filter.pod | 9 ++++
|
|
|
|
tests/Makefile.am | 2 +
|
|
|
|
tests/test-cache-block-size.sh | 70 +++++++++++++++++++++++++++
|
|
|
|
6 files changed, 112 insertions(+), 10 deletions(-)
|
|
|
|
create mode 100755 tests/test-cache-block-size.sh
|
|
|
|
|
|
|
|
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
|
|
|
|
index 19f79605..6276985f 100644
|
|
|
|
--- a/filters/cache/blk.c
|
|
|
|
+++ b/filters/cache/blk.c
|
|
|
|
@@ -149,7 +149,7 @@ blk_init (void)
|
|
|
|
nbdkit_error ("fstatvfs: %s: %m", tmpdir);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
- blksize = MAX (4096, statvfs.f_bsize);
|
|
|
|
+ blksize = MAX (min_block_size, statvfs.f_bsize);
|
|
|
|
nbdkit_debug ("cache: block size: %u", blksize);
|
|
|
|
|
|
|
|
bitmap_init (&bm, blksize, 2 /* bits per block */);
|
|
|
|
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
|
2021-08-05 08:13:39 +00:00
|
|
|
index 44da0008..109ac89e 100644
|
2021-07-26 18:44:01 +00:00
|
|
|
--- a/filters/cache/cache.c
|
|
|
|
+++ b/filters/cache/cache.c
|
|
|
|
@@ -40,6 +40,7 @@
|
|
|
|
#include <inttypes.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
+#include <limits.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <assert.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
@@ -62,6 +63,7 @@
|
|
|
|
#include "blk.h"
|
|
|
|
#include "reclaim.h"
|
|
|
|
#include "isaligned.h"
|
|
|
|
+#include "ispowerof2.h"
|
|
|
|
#include "minmax.h"
|
|
|
|
#include "rounding.h"
|
|
|
|
|
|
|
|
@@ -70,7 +72,8 @@
|
|
|
|
*/
|
|
|
|
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
|
|
|
|
-unsigned blksize;
|
|
|
|
+unsigned blksize; /* actual block size (picked by blk.c) */
|
|
|
|
+unsigned min_block_size = 4096;
|
|
|
|
enum cache_mode cache_mode = CACHE_MODE_WRITEBACK;
|
|
|
|
int64_t max_size = -1;
|
|
|
|
unsigned hi_thresh = 95, lo_thresh = 80;
|
|
|
|
@@ -80,13 +83,6 @@ const char *cor_path;
|
|
|
|
static int cache_flush (nbdkit_next *next, void *handle, uint32_t flags,
|
|
|
|
int *err);
|
|
|
|
|
|
|
|
-static void
|
|
|
|
-cache_load (void)
|
|
|
|
-{
|
|
|
|
- if (blk_init () == -1)
|
|
|
|
- exit (EXIT_FAILURE);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
static void
|
|
|
|
cache_unload (void)
|
|
|
|
{
|
|
|
|
@@ -116,6 +112,19 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
+ else if (strcmp (key, "cache-min-block-size") == 0) {
|
|
|
|
+ int64_t r;
|
|
|
|
+
|
|
|
|
+ r = nbdkit_parse_size (value);
|
|
|
|
+ if (r == -1)
|
|
|
|
+ return -1;
|
|
|
|
+ if (r < 4096 || !is_power_of_2 (r) || r > UINT_MAX) {
|
|
|
|
+ nbdkit_error ("cache-min-block-size is not a power of 2, or is too small or too large");
|
|
|
|
+ return -1;
|
|
|
|
+ }
|
|
|
|
+ min_block_size = r;
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
#ifdef HAVE_CACHE_RECLAIM
|
|
|
|
else if (strcmp (key, "cache-max-size") == 0) {
|
|
|
|
int64_t r;
|
|
|
|
@@ -220,6 +229,15 @@ cache_config_complete (nbdkit_next_config_complete *next,
|
|
|
|
return next (nxdata);
|
|
|
|
}
|
|
|
|
|
|
|
|
+static int
|
|
|
|
+cache_get_ready (int thread_model)
|
|
|
|
+{
|
|
|
|
+ if (blk_init () == -1)
|
|
|
|
+ return -1;
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
/* Get the file size, set the cache size. */
|
|
|
|
static int64_t
|
|
|
|
cache_get_size (nbdkit_next *next,
|
|
|
|
@@ -691,11 +709,11 @@ cache_cache (nbdkit_next *next,
|
|
|
|
static struct nbdkit_filter filter = {
|
|
|
|
.name = "cache",
|
|
|
|
.longname = "nbdkit caching filter",
|
|
|
|
- .load = cache_load,
|
|
|
|
.unload = cache_unload,
|
|
|
|
.config = cache_config,
|
|
|
|
.config_complete = cache_config_complete,
|
|
|
|
.config_help = cache_config_help,
|
|
|
|
+ .get_ready = cache_get_ready,
|
|
|
|
.prepare = cache_prepare,
|
|
|
|
.get_size = cache_get_size,
|
|
|
|
.can_cache = cache_can_cache,
|
|
|
|
diff --git a/filters/cache/cache.h b/filters/cache/cache.h
|
|
|
|
index a559adef..5c32c37c 100644
|
|
|
|
--- a/filters/cache/cache.h
|
|
|
|
+++ b/filters/cache/cache.h
|
|
|
|
@@ -45,6 +45,9 @@ extern enum cache_mode {
|
|
|
|
/* Size of a block in the cache. */
|
|
|
|
extern unsigned blksize;
|
|
|
|
|
|
|
|
+/* Minimum block size (cache-min-block-size parameter). */
|
|
|
|
+extern unsigned min_block_size;
|
|
|
|
+
|
|
|
|
/* Maximum size of the cache and high/low thresholds. */
|
|
|
|
extern int64_t max_size;
|
|
|
|
extern unsigned hi_thresh, lo_thresh;
|
|
|
|
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
|
2021-08-05 08:13:39 +00:00
|
|
|
index f20cb9ce..6cbd1c08 100644
|
2021-07-26 18:44:01 +00:00
|
|
|
--- a/filters/cache/nbdkit-cache-filter.pod
|
|
|
|
+++ b/filters/cache/nbdkit-cache-filter.pod
|
|
|
|
@@ -5,6 +5,7 @@ nbdkit-cache-filter - nbdkit caching filter
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
|
|
|
|
nbdkit --filter=cache plugin [cache=writeback|writethrough|unsafe]
|
|
|
|
+ [cache-min-block-size=SIZE]
|
|
|
|
[cache-max-size=SIZE]
|
|
|
|
[cache-high-threshold=N]
|
|
|
|
[cache-low-threshold=N]
|
|
|
|
@@ -59,6 +60,14 @@ This is dangerous and can cause data loss, but this may be acceptable
|
|
|
|
if you only use it for testing or with data that you don't care about
|
|
|
|
or can cheaply reconstruct.
|
|
|
|
|
|
|
|
+=item B<cache-min-block-size=>SIZE
|
|
|
|
+
|
|
|
|
+Set the minimum block size used by the cache. This must be a power of
|
|
|
|
+2 and E<ge> 4096.
|
|
|
|
+
|
|
|
|
+The default is 4096, or the block size of the filesystem which
|
|
|
|
+contains the temporary file storing the cache (whichever is larger).
|
|
|
|
+
|
|
|
|
=item B<cache-max-size=>SIZE
|
|
|
|
|
|
|
|
=item B<cache-high-threshold=>N
|
|
|
|
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
|
|
|
index 9630205d..a038eabc 100644
|
|
|
|
--- a/tests/Makefile.am
|
|
|
|
+++ b/tests/Makefile.am
|
|
|
|
@@ -1371,12 +1371,14 @@ EXTRA_DIST += test-blocksize.sh test-blocksize-extents.sh
|
|
|
|
# cache filter test.
|
|
|
|
TESTS += \
|
|
|
|
test-cache.sh \
|
|
|
|
+ test-cache-block-size.sh \
|
|
|
|
test-cache-on-read.sh \
|
|
|
|
test-cache-max-size.sh \
|
|
|
|
test-cache-unaligned.sh \
|
|
|
|
$(NULL)
|
|
|
|
EXTRA_DIST += \
|
|
|
|
test-cache.sh \
|
|
|
|
+ test-cache-block-size.sh \
|
|
|
|
test-cache-on-read.sh \
|
|
|
|
test-cache-max-size.sh \
|
|
|
|
test-cache-unaligned.sh \
|
|
|
|
diff --git a/tests/test-cache-block-size.sh b/tests/test-cache-block-size.sh
|
|
|
|
new file mode 100755
|
|
|
|
index 00000000..a2a27407
|
|
|
|
--- /dev/null
|
|
|
|
+++ b/tests/test-cache-block-size.sh
|
|
|
|
@@ -0,0 +1,70 @@
|
|
|
|
+#!/usr/bin/env bash
|
|
|
|
+# nbdkit
|
|
|
|
+# Copyright (C) 2018-2021 Red Hat Inc.
|
|
|
|
+#
|
|
|
|
+# Redistribution and use in source and binary forms, with or without
|
|
|
|
+# modification, are permitted provided that the following conditions are
|
|
|
|
+# met:
|
|
|
|
+#
|
|
|
|
+# * Redistributions of source code must retain the above copyright
|
|
|
|
+# notice, this list of conditions and the following disclaimer.
|
|
|
|
+#
|
|
|
|
+# * Redistributions in binary form must reproduce the above copyright
|
|
|
|
+# notice, this list of conditions and the following disclaimer in the
|
|
|
|
+# documentation and/or other materials provided with the distribution.
|
|
|
|
+#
|
|
|
|
+# * Neither the name of Red Hat nor the names of its contributors may be
|
|
|
|
+# used to endorse or promote products derived from this software without
|
|
|
|
+# specific prior written permission.
|
|
|
|
+#
|
|
|
|
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
|
|
|
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
|
|
|
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
|
|
|
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
|
|
|
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
|
|
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
|
|
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
|
|
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
+# SUCH DAMAGE.
|
|
|
|
+
|
|
|
|
+source ./functions.sh
|
|
|
|
+set -e
|
|
|
|
+set -x
|
|
|
|
+
|
|
|
|
+requires_filter cache
|
|
|
|
+requires_nbdsh_uri
|
|
|
|
+
|
|
|
|
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
|
|
|
|
+files="cache-block-size.img $sock cache-block-size.pid"
|
|
|
|
+rm -f $files
|
|
|
|
+cleanup_fn rm -f $files
|
|
|
|
+
|
|
|
|
+# Create an empty base image.
|
|
|
|
+truncate -s 128K cache-block-size.img
|
|
|
|
+
|
|
|
|
+# Run nbdkit with the caching filter.
|
|
|
|
+start_nbdkit -P cache-block-size.pid -U $sock --filter=cache \
|
|
|
|
+ file cache-block-size.img cache-min-block-size=64K
|
|
|
|
+
|
|
|
|
+nbdsh --connect "nbd+unix://?socket=$sock" \
|
|
|
|
+ -c '
|
|
|
|
+# Write some pattern data to the overlay and check it reads back OK.
|
|
|
|
+buf = b"abcd" * 16384
|
|
|
|
+h.pwrite(buf, 32768)
|
|
|
|
+zero = h.pread(32768, 0)
|
|
|
|
+assert zero == bytearray(32768)
|
|
|
|
+buf2 = h.pread(65536, 32768)
|
|
|
|
+assert buf == buf2
|
|
|
|
+
|
|
|
|
+# Flushing should write through to the underlying file.
|
|
|
|
+h.flush()
|
|
|
|
+
|
|
|
|
+with open("cache-block-size.img", "rb") as file:
|
|
|
|
+ zero = file.read(32768)
|
|
|
|
+ assert zero == bytearray(32768)
|
|
|
|
+ buf2 = file.read(65536)
|
|
|
|
+ assert buf == buf2
|
|
|
|
+'
|
|
|
|
--
|
|
|
|
2.31.1
|
|
|
|
|