From f7f4b71d559dc6950bc795742f64e8eaeeadf3ec Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Mon, 26 Jul 2021 16:30:26 +0100 Subject: [PATCH] cache: Add cache-min-block-size parameter This allows you to choose a larger block size. I found experimentally that this improves performance because of locality in access patterns. The idea came from qcow2 which implicitly does the same thing because of the relatively large cluster size (32K). nbdkit + cache-filter with 4K block size + cache-on-read + curl (to a very slow remote site): => virt-inspector took 22 mins same with 64K block size: => virt-inspector took 19 mins However compared to a qcow2 file using qemu's copy-on-read, backed with nbdkit + curl we are still a lot slower, possibly because having the cache inside virt-inspector greatly reduces round trip overhead: => virt-inspector took 13 mins (cherry picked from commit 4ceacb6caa64e12bd78af5f90e86ee591e055944) --- filters/cache/blk.c | 2 +- filters/cache/cache.c | 36 ++++++++++---- filters/cache/cache.h | 3 ++ filters/cache/nbdkit-cache-filter.pod | 9 ++++ tests/Makefile.am | 2 + tests/test-cache-block-size.sh | 70 +++++++++++++++++++++++++++ 6 files changed, 112 insertions(+), 10 deletions(-) create mode 100755 tests/test-cache-block-size.sh diff --git a/filters/cache/blk.c b/filters/cache/blk.c index 19f79605..6276985f 100644 --- a/filters/cache/blk.c +++ b/filters/cache/blk.c @@ -149,7 +149,7 @@ blk_init (void) nbdkit_error ("fstatvfs: %s: %m", tmpdir); return -1; } - blksize = MAX (4096, statvfs.f_bsize); + blksize = MAX (min_block_size, statvfs.f_bsize); nbdkit_debug ("cache: block size: %u", blksize); bitmap_init (&bm, blksize, 2 /* bits per block */); diff --git a/filters/cache/cache.c b/filters/cache/cache.c index 44da0008..109ac89e 100644 --- a/filters/cache/cache.c +++ b/filters/cache/cache.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,7 @@ #include "blk.h" #include "reclaim.h" #include "isaligned.h" +#include "ispowerof2.h" #include "minmax.h" #include "rounding.h" @@ -70,7 +72,8 @@ */ static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; -unsigned blksize; +unsigned blksize; /* actual block size (picked by blk.c) */ +unsigned min_block_size = 4096; enum cache_mode cache_mode = CACHE_MODE_WRITEBACK; int64_t max_size = -1; unsigned hi_thresh = 95, lo_thresh = 80; @@ -80,13 +83,6 @@ const char *cor_path; static int cache_flush (nbdkit_next *next, void *handle, uint32_t flags, int *err); -static void -cache_load (void) -{ - if (blk_init () == -1) - exit (EXIT_FAILURE); -} - static void cache_unload (void) { @@ -116,6 +112,19 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata, return -1; } } + else if (strcmp (key, "cache-min-block-size") == 0) { + int64_t r; + + r = nbdkit_parse_size (value); + if (r == -1) + return -1; + if (r < 4096 || !is_power_of_2 (r) || r > UINT_MAX) { + nbdkit_error ("cache-min-block-size is not a power of 2, or is too small or too large"); + return -1; + } + min_block_size = r; + return 0; + } #ifdef HAVE_CACHE_RECLAIM else if (strcmp (key, "cache-max-size") == 0) { int64_t r; @@ -220,6 +229,15 @@ cache_config_complete (nbdkit_next_config_complete *next, return next (nxdata); } +static int +cache_get_ready (int thread_model) +{ + if (blk_init () == -1) + return -1; + + return 0; +} + /* Get the file size, set the cache size. */ static int64_t cache_get_size (nbdkit_next *next, @@ -691,11 +709,11 @@ cache_cache (nbdkit_next *next, static struct nbdkit_filter filter = { .name = "cache", .longname = "nbdkit caching filter", - .load = cache_load, .unload = cache_unload, .config = cache_config, .config_complete = cache_config_complete, .config_help = cache_config_help, + .get_ready = cache_get_ready, .prepare = cache_prepare, .get_size = cache_get_size, .can_cache = cache_can_cache, diff --git a/filters/cache/cache.h b/filters/cache/cache.h index a559adef..5c32c37c 100644 --- a/filters/cache/cache.h +++ b/filters/cache/cache.h @@ -45,6 +45,9 @@ extern enum cache_mode { /* Size of a block in the cache. */ extern unsigned blksize; +/* Minimum block size (cache-min-block-size parameter). */ +extern unsigned min_block_size; + /* Maximum size of the cache and high/low thresholds. */ extern int64_t max_size; extern unsigned hi_thresh, lo_thresh; diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod index f20cb9ce..6cbd1c08 100644 --- a/filters/cache/nbdkit-cache-filter.pod +++ b/filters/cache/nbdkit-cache-filter.pod @@ -5,6 +5,7 @@ nbdkit-cache-filter - nbdkit caching filter =head1 SYNOPSIS nbdkit --filter=cache plugin [cache=writeback|writethrough|unsafe] + [cache-min-block-size=SIZE] [cache-max-size=SIZE] [cache-high-threshold=N] [cache-low-threshold=N] @@ -59,6 +60,14 @@ This is dangerous and can cause data loss, but this may be acceptable if you only use it for testing or with data that you don't care about or can cheaply reconstruct. +=item BSIZE + +Set the minimum block size used by the cache. This must be a power of +2 and E 4096. + +The default is 4096, or the block size of the filesystem which +contains the temporary file storing the cache (whichever is larger). + =item BSIZE =item BN diff --git a/tests/Makefile.am b/tests/Makefile.am index 9630205d..a038eabc 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1371,12 +1371,14 @@ EXTRA_DIST += test-blocksize.sh test-blocksize-extents.sh # cache filter test. TESTS += \ test-cache.sh \ + test-cache-block-size.sh \ test-cache-on-read.sh \ test-cache-max-size.sh \ test-cache-unaligned.sh \ $(NULL) EXTRA_DIST += \ test-cache.sh \ + test-cache-block-size.sh \ test-cache-on-read.sh \ test-cache-max-size.sh \ test-cache-unaligned.sh \ diff --git a/tests/test-cache-block-size.sh b/tests/test-cache-block-size.sh new file mode 100755 index 00000000..a2a27407 --- /dev/null +++ b/tests/test-cache-block-size.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# nbdkit +# Copyright (C) 2018-2021 Red Hat Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Red Hat nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +source ./functions.sh +set -e +set -x + +requires_filter cache +requires_nbdsh_uri + +sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) +files="cache-block-size.img $sock cache-block-size.pid" +rm -f $files +cleanup_fn rm -f $files + +# Create an empty base image. +truncate -s 128K cache-block-size.img + +# Run nbdkit with the caching filter. +start_nbdkit -P cache-block-size.pid -U $sock --filter=cache \ + file cache-block-size.img cache-min-block-size=64K + +nbdsh --connect "nbd+unix://?socket=$sock" \ + -c ' +# Write some pattern data to the overlay and check it reads back OK. +buf = b"abcd" * 16384 +h.pwrite(buf, 32768) +zero = h.pread(32768, 0) +assert zero == bytearray(32768) +buf2 = h.pread(65536, 32768) +assert buf == buf2 + +# Flushing should write through to the underlying file. +h.flush() + +with open("cache-block-size.img", "rb") as file: + zero = file.read(32768) + assert zero == bytearray(32768) + buf2 = file.read(65536) + assert buf == buf2 +' -- 2.31.1