nbdkit/SOURCES/0010-cow-Implement-cow-on-r...

458 lines
15 KiB
Diff

From 57f9bd29f9d7432ad5a70620c373b28db768a314 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 27 Jul 2021 23:01:52 +0100
Subject: [PATCH] cow: Implement cow-on-read
This is very similar to the nbdkit-cache-filter cache-on-read flag.
(cherry picked from commit bd93b3f27246f917de48a6cc2525d9c424c07976)
---
filters/cow/blk.c | 21 ++++++--
filters/cow/blk.h | 10 ++--
filters/cow/cow.c | 56 ++++++++++++++++----
filters/cow/nbdkit-cow-filter.pod | 17 ++++++
tests/Makefile.am | 4 ++
tests/test-cow-on-read-caches.sh | 87 +++++++++++++++++++++++++++++++
tests/test-cow-on-read.sh | 59 +++++++++++++++++++++
7 files changed, 236 insertions(+), 18 deletions(-)
create mode 100755 tests/test-cow-on-read-caches.sh
create mode 100755 tests/test-cow-on-read.sh
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 121b0dd4..4f84e092 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -226,7 +226,7 @@ blk_status (uint64_t blknum, bool *present, bool *trimmed)
int
blk_read_multiple (nbdkit_next *next,
uint64_t blknum, uint64_t nrblocks,
- uint8_t *block, int *err)
+ uint8_t *block, bool cow_on_read, int *err)
{
off_t offset = blknum * BLKSIZE;
enum bm_entry state;
@@ -276,6 +276,19 @@ blk_read_multiple (nbdkit_next *next,
* zeroing the tail.
*/
memset (block + n, 0, tail);
+
+ /* If cow-on-read is true then copy the blocks to the cache and
+ * set them as allocated.
+ */
+ if (cow_on_read) {
+ if (full_pwrite (fd, block, BLKSIZE * runblocks, offset) == -1) {
+ *err = errno;
+ nbdkit_error ("pwrite: %m");
+ return -1;
+ }
+ for (b = 0; b < runblocks; ++b)
+ bitmap_set_blk (&bm, blknum+b, BLOCK_ALLOCATED);
+ }
}
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
if (full_pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
@@ -297,14 +310,14 @@ blk_read_multiple (nbdkit_next *next,
blknum + runblocks,
nrblocks - runblocks,
block + BLKSIZE * runblocks,
- err);
+ cow_on_read, err);
}
int
blk_read (nbdkit_next *next,
- uint64_t blknum, uint8_t *block, int *err)
+ uint64_t blknum, uint8_t *block, bool cow_on_read, int *err)
{
- return blk_read_multiple (next, blknum, 1, block, err);
+ return blk_read_multiple (next, blknum, 1, block, cow_on_read, err);
}
int
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
index 1bc85283..b7e6f092 100644
--- a/filters/cow/blk.h
+++ b/filters/cow/blk.h
@@ -52,14 +52,16 @@ extern void blk_status (uint64_t blknum, bool *present, bool *trimmed);
/* Read a single block from the overlay or plugin. */
extern int blk_read (nbdkit_next *next,
- uint64_t blknum, uint8_t *block, int *err)
- __attribute__((__nonnull__ (1, 3, 4)));
+ uint64_t blknum, uint8_t *block,
+ bool cow_on_read, int *err)
+ __attribute__((__nonnull__ (1, 3, 5)));
/* Read multiple blocks from the overlay or plugin. */
extern int blk_read_multiple (nbdkit_next *next,
uint64_t blknum, uint64_t nrblocks,
- uint8_t *block, int *err)
- __attribute__((__nonnull__ (1, 4, 5)));
+ uint8_t *block,
+ bool cow_on_read, int *err)
+ __attribute__((__nonnull__ (1, 4, 6)));
/* Cache mode for blocks not already in overlay */
enum cache_mode {
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
index 78daca22..6efb39f2 100644
--- a/filters/cow/cow.c
+++ b/filters/cow/cow.c
@@ -38,6 +38,7 @@
#include <stdbool.h>
#include <inttypes.h>
#include <string.h>
+#include <unistd.h>
#include <errno.h>
#include <pthread.h>
@@ -59,6 +60,15 @@ static pthread_mutex_t rmw_lock = PTHREAD_MUTEX_INITIALIZER;
static bool cow_on_cache;
+/* Cache on read ("cow-on-read") mode. */
+extern enum cor_mode {
+ COR_OFF,
+ COR_ON,
+ COR_PATH,
+} cor_mode;
+enum cor_mode cor_mode = COR_OFF;
+const char *cor_path;
+
static void
cow_load (void)
{
@@ -85,13 +95,39 @@ cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
cow_on_cache = r;
return 0;
}
+ else if (strcmp (key, "cow-on-read") == 0) {
+ if (value[0] == '/') {
+ cor_path = value;
+ cor_mode = COR_PATH;
+ }
+ else {
+ int r = nbdkit_parse_bool (value);
+ if (r == -1)
+ return -1;
+ cor_mode = r ? COR_ON : COR_OFF;
+ }
+ return 0;
+ }
else {
return next (nxdata, key, value);
}
}
#define cow_config_help \
- "cow-on-cache=<BOOL> Set to true to treat client cache requests as writes.\n"
+ "cow-on-cache=<BOOL> Copy cache (prefetch) requests to the overlay.\n" \
+ "cow-on-read=<BOOL>|/PATH Copy read requests to the overlay."
+
+/* Decide if cow-on-read is currently on or off. */
+bool
+cow_on_read (void)
+{
+ switch (cor_mode) {
+ case COR_ON: return true;
+ case COR_OFF: return false;
+ case COR_PATH: return access (cor_path, F_OK) == 0;
+ default: abort ();
+ }
+}
static void *
cow_open (nbdkit_next_open *next, nbdkit_context *nxdata,
@@ -230,7 +266,7 @@ cow_pread (nbdkit_next *next,
uint64_t n = MIN (BLKSIZE - blkoffs, count);
assert (block);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r == -1)
return -1;
@@ -245,7 +281,7 @@ cow_pread (nbdkit_next *next,
/* Aligned body */
nrblocks = count / BLKSIZE;
if (nrblocks > 0) {
- r = blk_read_multiple (next, blknum, nrblocks, buf, err);
+ r = blk_read_multiple (next, blknum, nrblocks, buf, cow_on_read (), err);
if (r == -1)
return -1;
@@ -258,7 +294,7 @@ cow_pread (nbdkit_next *next,
/* Unaligned tail */
if (count) {
assert (block);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r == -1)
return -1;
@@ -299,7 +335,7 @@ cow_pwrite (nbdkit_next *next,
*/
assert (block);
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memcpy (&block[blkoffs], buf, n);
r = blk_write (blknum, block, err);
@@ -329,7 +365,7 @@ cow_pwrite (nbdkit_next *next,
if (count) {
assert (block);
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memcpy (block, buf, count);
r = blk_write (blknum, block, err);
@@ -379,7 +415,7 @@ cow_zero (nbdkit_next *next,
* Hold the rmw_lock over the whole operation.
*/
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memset (&block[blkoffs], 0, n);
r = blk_write (blknum, block, err);
@@ -411,7 +447,7 @@ cow_zero (nbdkit_next *next,
/* Unaligned tail */
if (count) {
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memset (block, 0, count);
r = blk_write (blknum, block, err);
@@ -455,7 +491,7 @@ cow_trim (nbdkit_next *next,
* Hold the lock over the whole operation.
*/
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memset (&block[blkoffs], 0, n);
r = blk_write (blknum, block, err);
@@ -482,7 +518,7 @@ cow_trim (nbdkit_next *next,
/* Unaligned tail */
if (count) {
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memset (block, 0, count);
r = blk_write (blknum, block, err);
diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod
index 571189e7..01261429 100644
--- a/filters/cow/nbdkit-cow-filter.pod
+++ b/filters/cow/nbdkit-cow-filter.pod
@@ -62,6 +62,23 @@ the data from the plugin into the overlay.
Do not save data from cache (prefetch) requests in the overlay. This
leaves the overlay as small as possible. This is the default.
+=item B<cow-on-read=true>
+
+When the client issues a read request, copy the data into the overlay
+so that the same data can be served more quickly later.
+
+=item B<cow-on-read=false>
+
+Do not save data from read requests in the overlay. This leaves the
+overlay as small as possible. This is the default.
+
+=item B<cow-on-read=/PATH>
+
+When F</PATH> (which must be an absolute path) exists, this behaves
+like C<cow-on-read=true>, and when it does not exist like
+C<cow-on-read=false>. This allows you to control the C<cow-on-read>
+behaviour while nbdkit is running.
+
=back
=head1 EXAMPLES
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 51ca913a..edc8d66d 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1407,6 +1407,8 @@ TESTS += \
test-cow-extents1.sh \
test-cow-extents2.sh \
test-cow-extents-large.sh \
+ test-cow-on-read.sh \
+ test-cow-on-read-caches.sh \
test-cow-unaligned.sh \
$(NULL)
endif
@@ -1417,6 +1419,8 @@ EXTRA_DIST += \
test-cow-extents2.sh \
test-cow-extents-large.sh \
test-cow-null.sh \
+ test-cow-on-read.sh \
+ test-cow-on-read-caches.sh \
test-cow-unaligned.sh \
$(NULL)
diff --git a/tests/test-cow-on-read-caches.sh b/tests/test-cow-on-read-caches.sh
new file mode 100755
index 00000000..c5b60198
--- /dev/null
+++ b/tests/test-cow-on-read-caches.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_filter cow
+requires_filter delay
+requires_nbdsh_uri
+
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+files="$sock cow-on-read-caches.pid"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Run nbdkit with the cow filter, cow-on-read and a read delay.
+start_nbdkit -P cow-on-read-caches.pid -U $sock \
+ --filter=cow --filter=delay \
+ memory 64K cow-on-read=true rdelay=10
+
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c '
+from time import time
+
+# First read should suffer a penalty. Because we are reading
+# a single 64K block (same size as the COW block), we should
+# only suffer one penalty of approx. 10 seconds.
+st = time()
+zb = h.pread(65536, 0)
+et = time()
+el = et-st
+print("elapsed time: %g" % el)
+assert et-st >= 10
+assert zb == bytearray(65536)
+
+# Second read should not suffer a penalty.
+st = time()
+zb = h.pread(65536, 0)
+et = time()
+el = et-st
+print("elapsed time: %g" % el)
+assert el < 10
+assert zb == bytearray(65536)
+
+# Write something.
+buf = b"abcd" * 16384
+h.pwrite(buf, 0)
+
+# Reading back should be quick since it is stored in the overlay.
+st = time()
+buf2 = h.pread(65536, 0)
+et = time()
+el = et-st
+print("elapsed time: %g" % el)
+assert el < 10
+assert buf == buf2
+'
diff --git a/tests/test-cow-on-read.sh b/tests/test-cow-on-read.sh
new file mode 100755
index 00000000..4f58b33b
--- /dev/null
+++ b/tests/test-cow-on-read.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_filter cow
+requires_nbdsh_uri
+
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+files="$sock cow-on-read.pid"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Run nbdkit with the cow filter and cow-on-read.
+start_nbdkit -P cow-on-read.pid -U $sock \
+ --filter=cow \
+ memory 128K cow-on-read=true
+
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c '
+# Write some pattern data to the overlay and check it reads back OK.
+buf = b"abcd" * 16384
+h.pwrite(buf, 32768)
+zero = h.pread(32768, 0)
+assert zero == bytearray(32768)
+buf2 = h.pread(65536, 32768)
+assert buf == buf2
+'
--
2.31.1