import nbdkit-1.26.5-1.el9
This commit is contained in:
commit
ed82013cf7
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
SOURCES/libguestfs.keyring
|
||||||
|
SOURCES/nbdkit-1.26.5.tar.gz
|
2
.nbdkit.metadata
Normal file
2
.nbdkit.metadata
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
cc1b37b9cfafa515aab3eefd345ecc59aac2ce7b SOURCES/libguestfs.keyring
|
||||||
|
52a221954f374f2a3f1adcc5c5e3e6f7332d906d SOURCES/nbdkit-1.26.5.tar.gz
|
@ -0,0 +1,39 @@
|
|||||||
|
From 89ef17c90996c0e212e3a17c8d26ff930ab464ea Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Blake <eblake@redhat.com>
|
||||||
|
Date: Mon, 16 Aug 2021 13:43:29 -0500
|
||||||
|
Subject: [PATCH] server: reset meta context replies on starttls
|
||||||
|
|
||||||
|
Related to CVE-2021-3716, but not as severe. No compliant client will
|
||||||
|
send NBD_CMD_BLOCK_STATUS unless it first negotiates
|
||||||
|
NBD_OPT_SET_META_CONTEXT. If an attacker injects a premature
|
||||||
|
SET_META_CONTEXT, either the client will never notice (because it
|
||||||
|
never uses BLOCK_STATUS), or the client will overwrite the attacker's
|
||||||
|
attempt with the client's own SET_META_CONTEXT request after
|
||||||
|
encryption is enabled. So I don't class this as having the potential
|
||||||
|
to trigger denial-of-service due to any protocol mismatch between
|
||||||
|
compliant client and server (I don't care what happens with
|
||||||
|
non-compliant clients).
|
||||||
|
|
||||||
|
Fixes: 26455d45 (server: protocol: Implement Block Status "base:allocation".)
|
||||||
|
(cherry picked from commit 6c5faac6a37077cf2366388a80862bb00616d0d8)
|
||||||
|
---
|
||||||
|
server/protocol-handshake-newstyle.c | 3 +++
|
||||||
|
1 file changed, 3 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/server/protocol-handshake-newstyle.c b/server/protocol-handshake-newstyle.c
|
||||||
|
index 7e6b7b1b..79b2c8ce 100644
|
||||||
|
--- a/server/protocol-handshake-newstyle.c
|
||||||
|
+++ b/server/protocol-handshake-newstyle.c
|
||||||
|
@@ -497,6 +497,9 @@ negotiate_handshake_newstyle_options (void)
|
||||||
|
debug ("using TLS on this connection");
|
||||||
|
/* Wipe out any cached state. */
|
||||||
|
conn->structured_replies = false;
|
||||||
|
+ free (conn->exportname_from_set_meta_context);
|
||||||
|
+ conn->exportname_from_set_meta_context = NULL;
|
||||||
|
+ conn->meta_context_base_allocation = false;
|
||||||
|
for_each_backend (b) {
|
||||||
|
free (conn->default_exportname[b->i]);
|
||||||
|
conn->default_exportname[b->i] = NULL;
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
124
SOURCES/0002-cache-Reduce-verbosity-of-debugging.patch
Normal file
124
SOURCES/0002-cache-Reduce-verbosity-of-debugging.patch
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
From 4b576a8e0eb99ec1a79ca432350fb7ac27a5c089 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Mon, 26 Jul 2021 11:59:43 +0100
|
||||||
|
Subject: [PATCH] cache: Reduce verbosity of debugging
|
||||||
|
|
||||||
|
The cache filter is very verbose in its debugging. Reduce the default
|
||||||
|
level. Use -D cache.verbose=1 to restore original debugging.
|
||||||
|
|
||||||
|
Compare commit 745a0f13662031c2b9c9b69f62b4ae3a6b2f38f0.
|
||||||
|
|
||||||
|
(cherry picked from commit 6be735edf7d5fb3fb8350c72e6d9525badbab14d)
|
||||||
|
---
|
||||||
|
filters/cache/blk.c | 53 +++++++++++++++++++++++++++------------------
|
||||||
|
1 file changed, 32 insertions(+), 21 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
|
||||||
|
index 12e8407e..f52f30e3 100644
|
||||||
|
--- a/filters/cache/blk.c
|
||||||
|
+++ b/filters/cache/blk.c
|
||||||
|
@@ -93,6 +93,9 @@ enum bm_entry {
|
||||||
|
BLOCK_DIRTY = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
+/* Extra debugging (-D cache.verbose=1). */
|
||||||
|
+NBDKIT_DLL_PUBLIC int cache_debug_verbose = 0;
|
||||||
|
+
|
||||||
|
int
|
||||||
|
blk_init (void)
|
||||||
|
{
|
||||||
|
@@ -199,12 +202,14 @@ blk_read (nbdkit_next *next,
|
||||||
|
|
||||||
|
reclaim (fd, &bm);
|
||||||
|
|
||||||
|
- nbdkit_debug ("cache: blk_read block %" PRIu64 " (offset %" PRIu64 ") is %s",
|
||||||
|
- blknum, (uint64_t) offset,
|
||||||
|
- state == BLOCK_NOT_CACHED ? "not cached" :
|
||||||
|
- state == BLOCK_CLEAN ? "clean" :
|
||||||
|
- state == BLOCK_DIRTY ? "dirty" :
|
||||||
|
- "unknown");
|
||||||
|
+ if (cache_debug_verbose)
|
||||||
|
+ nbdkit_debug ("cache: blk_read block %" PRIu64
|
||||||
|
+ " (offset %" PRIu64 ") is %s",
|
||||||
|
+ blknum, (uint64_t) offset,
|
||||||
|
+ state == BLOCK_NOT_CACHED ? "not cached" :
|
||||||
|
+ state == BLOCK_CLEAN ? "clean" :
|
||||||
|
+ state == BLOCK_DIRTY ? "dirty" :
|
||||||
|
+ "unknown");
|
||||||
|
|
||||||
|
if (state == BLOCK_NOT_CACHED) { /* Read underlying plugin. */
|
||||||
|
unsigned n = blksize, tail = 0;
|
||||||
|
@@ -225,9 +230,10 @@ blk_read (nbdkit_next *next,
|
||||||
|
|
||||||
|
/* If cache-on-read, copy the block to the cache. */
|
||||||
|
if (cache_on_read) {
|
||||||
|
- nbdkit_debug ("cache: cache-on-read block %" PRIu64
|
||||||
|
- " (offset %" PRIu64 ")",
|
||||||
|
- blknum, (uint64_t) offset);
|
||||||
|
+ if (cache_debug_verbose)
|
||||||
|
+ nbdkit_debug ("cache: cache-on-read block %" PRIu64
|
||||||
|
+ " (offset %" PRIu64 ")",
|
||||||
|
+ blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
if (pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
@@ -259,12 +265,14 @@ blk_cache (nbdkit_next *next,
|
||||||
|
|
||||||
|
reclaim (fd, &bm);
|
||||||
|
|
||||||
|
- nbdkit_debug ("cache: blk_cache block %" PRIu64 " (offset %" PRIu64 ") is %s",
|
||||||
|
- blknum, (uint64_t) offset,
|
||||||
|
- state == BLOCK_NOT_CACHED ? "not cached" :
|
||||||
|
- state == BLOCK_CLEAN ? "clean" :
|
||||||
|
- state == BLOCK_DIRTY ? "dirty" :
|
||||||
|
- "unknown");
|
||||||
|
+ if (cache_debug_verbose)
|
||||||
|
+ nbdkit_debug ("cache: blk_cache block %" PRIu64
|
||||||
|
+ " (offset %" PRIu64 ") is %s",
|
||||||
|
+ blknum, (uint64_t) offset,
|
||||||
|
+ state == BLOCK_NOT_CACHED ? "not cached" :
|
||||||
|
+ state == BLOCK_CLEAN ? "clean" :
|
||||||
|
+ state == BLOCK_DIRTY ? "dirty" :
|
||||||
|
+ "unknown");
|
||||||
|
|
||||||
|
if (state == BLOCK_NOT_CACHED) {
|
||||||
|
/* Read underlying plugin, copy to cache regardless of cache-on-read. */
|
||||||
|
@@ -284,8 +292,9 @@ blk_cache (nbdkit_next *next,
|
||||||
|
*/
|
||||||
|
memset (block + n, 0, tail);
|
||||||
|
|
||||||
|
- nbdkit_debug ("cache: cache block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
- blknum, (uint64_t) offset);
|
||||||
|
+ if (cache_debug_verbose)
|
||||||
|
+ nbdkit_debug ("cache: cache block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
+ blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
if (pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
@@ -324,8 +333,9 @@ blk_writethrough (nbdkit_next *next,
|
||||||
|
|
||||||
|
reclaim (fd, &bm);
|
||||||
|
|
||||||
|
- nbdkit_debug ("cache: writethrough block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
- blknum, (uint64_t) offset);
|
||||||
|
+ if (cache_debug_verbose)
|
||||||
|
+ nbdkit_debug ("cache: writethrough block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
+ blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
if (pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
@@ -357,8 +367,9 @@ blk_write (nbdkit_next *next,
|
||||||
|
|
||||||
|
reclaim (fd, &bm);
|
||||||
|
|
||||||
|
- nbdkit_debug ("cache: writeback block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
- blknum, (uint64_t) offset);
|
||||||
|
+ if (cache_debug_verbose)
|
||||||
|
+ nbdkit_debug ("cache: writeback block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
+ blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
if (pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
400
SOURCES/0003-cache-cow-Add-blk_read_multiple-function.patch
Normal file
400
SOURCES/0003-cache-cow-Add-blk_read_multiple-function.patch
Normal file
@ -0,0 +1,400 @@
|
|||||||
|
From b5dc8577c5c6d1205e2106b629fad327c3a409ea Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Mon, 26 Jul 2021 13:55:21 +0100
|
||||||
|
Subject: [PATCH] cache, cow: Add blk_read_multiple function
|
||||||
|
|
||||||
|
Currently the cache and cow filters break up large requests into many
|
||||||
|
single block-sized requests to the underlying plugin. For some
|
||||||
|
plugins (eg. curl) this is very inefficient and causes huge
|
||||||
|
slow-downs.
|
||||||
|
|
||||||
|
For example I tested nbdkit + curl vs nbdkit + cache + curl against a
|
||||||
|
slow, remote VMware server. A simple run of virt-inspector was at
|
||||||
|
least 6-7 times slower with the cache filter. (It was so slow that I
|
||||||
|
didn't actually let it run to completion - I am estimating the
|
||||||
|
slowdown multiple using interim debug messages).
|
||||||
|
|
||||||
|
Implement a new blk_read_multiple function in the cache filter. It
|
||||||
|
does not break up "runs" of blocks which all have the same cache
|
||||||
|
state. The cache .pread method uses the new function to read the
|
||||||
|
block-aligned part of the request.
|
||||||
|
|
||||||
|
(cherry picked from commit ab661ccef5b3369fa22c33d0289baddc251b73bf)
|
||||||
|
---
|
||||||
|
filters/cache/blk.c | 83 ++++++++++++++++++++++++++++++++-----------
|
||||||
|
filters/cache/blk.h | 6 ++++
|
||||||
|
filters/cache/cache.c | 21 +++++------
|
||||||
|
filters/cow/blk.c | 63 +++++++++++++++++++++++---------
|
||||||
|
filters/cow/blk.h | 6 ++++
|
||||||
|
filters/cow/cow.c | 21 +++++------
|
||||||
|
6 files changed, 138 insertions(+), 62 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
|
||||||
|
index f52f30e3..f85ada35 100644
|
||||||
|
--- a/filters/cache/blk.c
|
||||||
|
+++ b/filters/cache/blk.c
|
||||||
|
@@ -44,6 +44,7 @@
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
+#include <limits.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
#ifdef HAVE_SYS_STATVFS_H
|
||||||
|
@@ -193,26 +194,40 @@ blk_set_size (uint64_t new_size)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int
|
||||||
|
-blk_read (nbdkit_next *next,
|
||||||
|
- uint64_t blknum, uint8_t *block, int *err)
|
||||||
|
+static int
|
||||||
|
+_blk_read_multiple (nbdkit_next *next,
|
||||||
|
+ uint64_t blknum, uint64_t nrblocks,
|
||||||
|
+ uint8_t *block, int *err)
|
||||||
|
{
|
||||||
|
off_t offset = blknum * blksize;
|
||||||
|
- enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED);
|
||||||
|
+ bool not_cached =
|
||||||
|
+ bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED) == BLOCK_NOT_CACHED;
|
||||||
|
+ uint64_t b, runblocks;
|
||||||
|
|
||||||
|
- reclaim (fd, &bm);
|
||||||
|
+ assert (nrblocks > 0);
|
||||||
|
|
||||||
|
if (cache_debug_verbose)
|
||||||
|
- nbdkit_debug ("cache: blk_read block %" PRIu64
|
||||||
|
+ nbdkit_debug ("cache: blk_read_multiple block %" PRIu64
|
||||||
|
" (offset %" PRIu64 ") is %s",
|
||||||
|
blknum, (uint64_t) offset,
|
||||||
|
- state == BLOCK_NOT_CACHED ? "not cached" :
|
||||||
|
- state == BLOCK_CLEAN ? "clean" :
|
||||||
|
- state == BLOCK_DIRTY ? "dirty" :
|
||||||
|
- "unknown");
|
||||||
|
+ not_cached ? "not cached" : "cached");
|
||||||
|
|
||||||
|
- if (state == BLOCK_NOT_CACHED) { /* Read underlying plugin. */
|
||||||
|
- unsigned n = blksize, tail = 0;
|
||||||
|
+ /* Find out how many of the following blocks form a "run" with the
|
||||||
|
+ * same cached/not-cached state. We can process that many blocks in
|
||||||
|
+ * one go.
|
||||||
|
+ */
|
||||||
|
+ for (b = 1, runblocks = 1; b < nrblocks; ++b, ++runblocks) {
|
||||||
|
+ bool s =
|
||||||
|
+ bitmap_get_blk (&bm, blknum + b, BLOCK_NOT_CACHED) == BLOCK_NOT_CACHED;
|
||||||
|
+ if (not_cached != s)
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (not_cached) { /* Read underlying plugin. */
|
||||||
|
+ unsigned n, tail = 0;
|
||||||
|
+
|
||||||
|
+ assert (blksize * runblocks <= UINT_MAX);
|
||||||
|
+ n = blksize * runblocks;
|
||||||
|
|
||||||
|
if (offset + n > size) {
|
||||||
|
tail = offset + n - size;
|
||||||
|
@@ -228,32 +243,60 @@ blk_read (nbdkit_next *next,
|
||||||
|
*/
|
||||||
|
memset (block + n, 0, tail);
|
||||||
|
|
||||||
|
- /* If cache-on-read, copy the block to the cache. */
|
||||||
|
+ /* If cache-on-read, copy the blocks to the cache. */
|
||||||
|
if (cache_on_read) {
|
||||||
|
if (cache_debug_verbose)
|
||||||
|
nbdkit_debug ("cache: cache-on-read block %" PRIu64
|
||||||
|
" (offset %" PRIu64 ")",
|
||||||
|
blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
- if (pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
+ if (pwrite (fd, block, blksize * runblocks, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
- bitmap_set_blk (&bm, blknum, BLOCK_CLEAN);
|
||||||
|
- lru_set_recently_accessed (blknum);
|
||||||
|
+ for (b = 0; b < runblocks; ++b) {
|
||||||
|
+ bitmap_set_blk (&bm, blknum + b, BLOCK_CLEAN);
|
||||||
|
+ lru_set_recently_accessed (blknum + b);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
- return 0;
|
||||||
|
}
|
||||||
|
else { /* Read cache. */
|
||||||
|
- if (pread (fd, block, blksize, offset) == -1) {
|
||||||
|
+ if (pread (fd, block, blksize * runblocks, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pread: %m");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
- lru_set_recently_accessed (blknum);
|
||||||
|
- return 0;
|
||||||
|
+ for (b = 0; b < runblocks; ++b)
|
||||||
|
+ lru_set_recently_accessed (blknum + b);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ /* If all done, return. */
|
||||||
|
+ if (runblocks == nrblocks)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ /* Recurse to read remaining blocks. */
|
||||||
|
+ return _blk_read_multiple (next,
|
||||||
|
+ blknum + runblocks,
|
||||||
|
+ nrblocks - runblocks,
|
||||||
|
+ block + blksize * runblocks,
|
||||||
|
+ err);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+blk_read_multiple (nbdkit_next *next,
|
||||||
|
+ uint64_t blknum, uint64_t nrblocks,
|
||||||
|
+ uint8_t *block, int *err)
|
||||||
|
+{
|
||||||
|
+ reclaim (fd, &bm);
|
||||||
|
+ return _blk_read_multiple (next, blknum, nrblocks, block, err);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+blk_read (nbdkit_next *next,
|
||||||
|
+ uint64_t blknum, uint8_t *block, int *err)
|
||||||
|
+{
|
||||||
|
+ return blk_read_multiple (next, blknum, 1, block, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
diff --git a/filters/cache/blk.h b/filters/cache/blk.h
|
||||||
|
index 87c753e2..1ee33ed7 100644
|
||||||
|
--- a/filters/cache/blk.h
|
||||||
|
+++ b/filters/cache/blk.h
|
||||||
|
@@ -55,6 +55,12 @@ extern int blk_read (nbdkit_next *next,
|
||||||
|
uint64_t blknum, uint8_t *block, int *err)
|
||||||
|
__attribute__((__nonnull__ (1, 3, 4)));
|
||||||
|
|
||||||
|
+/* As above, but read multiple blocks. */
|
||||||
|
+extern int blk_read_multiple (nbdkit_next *next,
|
||||||
|
+ uint64_t blknum, uint64_t nrblocks,
|
||||||
|
+ uint8_t *block, int *err)
|
||||||
|
+ __attribute__((__nonnull__ (1, 4, 5)));
|
||||||
|
+
|
||||||
|
/* If a single block is not cached, copy it from the plugin. */
|
||||||
|
extern int blk_cache (nbdkit_next *next,
|
||||||
|
uint64_t blknum, uint8_t *block, int *err)
|
||||||
|
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
|
||||||
|
index 745f552d..14cc03f2 100644
|
||||||
|
--- a/filters/cache/cache.c
|
||||||
|
+++ b/filters/cache/cache.c
|
||||||
|
@@ -313,7 +313,7 @@ cache_pread (nbdkit_next *next,
|
||||||
|
uint32_t flags, int *err)
|
||||||
|
{
|
||||||
|
CLEANUP_FREE uint8_t *block = NULL;
|
||||||
|
- uint64_t blknum, blkoffs;
|
||||||
|
+ uint64_t blknum, blkoffs, nrblocks;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert (!flags);
|
||||||
|
@@ -348,22 +348,17 @@ cache_pread (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Aligned body */
|
||||||
|
- /* XXX This breaks up large read requests into smaller ones, which
|
||||||
|
- * is a problem for plugins which have a large, fixed per-request
|
||||||
|
- * overhead (hello, curl). We should try to keep large requests
|
||||||
|
- * together as much as possible, but that requires us to be much
|
||||||
|
- * smarter here.
|
||||||
|
- */
|
||||||
|
- while (count >= blksize) {
|
||||||
|
+ nrblocks = count / blksize;
|
||||||
|
+ if (nrblocks > 0) {
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
||||||
|
- r = blk_read (next, blknum, buf, err);
|
||||||
|
+ r = blk_read_multiple (next, blknum, nrblocks, buf, err);
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
- buf += blksize;
|
||||||
|
- count -= blksize;
|
||||||
|
- offset += blksize;
|
||||||
|
- blknum++;
|
||||||
|
+ buf += nrblocks * blksize;
|
||||||
|
+ count -= nrblocks * blksize;
|
||||||
|
+ offset += nrblocks * blksize;
|
||||||
|
+ blknum += nrblocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unaligned tail */
|
||||||
|
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
|
||||||
|
index b7c4d7f1..4ec8d1b8 100644
|
||||||
|
--- a/filters/cow/blk.c
|
||||||
|
+++ b/filters/cow/blk.c
|
||||||
|
@@ -79,6 +79,7 @@
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
+#include <limits.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
@@ -219,33 +220,48 @@ blk_status (uint64_t blknum, bool *present, bool *trimmed)
|
||||||
|
*trimmed = state == BLOCK_TRIMMED;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* These are the block operations. They always read or write a single
|
||||||
|
- * whole block of size ‘blksize’.
|
||||||
|
+/* These are the block operations. They always read or write whole
|
||||||
|
+ * blocks of size ‘blksize’.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
-blk_read (nbdkit_next *next,
|
||||||
|
- uint64_t blknum, uint8_t *block, int *err)
|
||||||
|
+blk_read_multiple (nbdkit_next *next,
|
||||||
|
+ uint64_t blknum, uint64_t nrblocks,
|
||||||
|
+ uint8_t *block, int *err)
|
||||||
|
{
|
||||||
|
off_t offset = blknum * BLKSIZE;
|
||||||
|
enum bm_entry state;
|
||||||
|
+ uint64_t b, runblocks;
|
||||||
|
|
||||||
|
- /* The state might be modified from another thread - for example
|
||||||
|
- * another thread might write (BLOCK_NOT_ALLOCATED ->
|
||||||
|
- * BLOCK_ALLOCATED) while we are reading from the plugin, returning
|
||||||
|
- * the old data. However a read issued after the write returns
|
||||||
|
- * should always return the correct data.
|
||||||
|
+ /* Find out how many of the following blocks form a "run" with the
|
||||||
|
+ * same state. We can process that many blocks in one go.
|
||||||
|
+ *
|
||||||
|
+ * About the locking: The state might be modified from another
|
||||||
|
+ * thread - for example another thread might write
|
||||||
|
+ * (BLOCK_NOT_ALLOCATED -> BLOCK_ALLOCATED) while we are reading
|
||||||
|
+ * from the plugin, returning the old data. However a read issued
|
||||||
|
+ * after the write returns should always return the correct data.
|
||||||
|
*/
|
||||||
|
{
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
||||||
|
state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_ALLOCATED);
|
||||||
|
+
|
||||||
|
+ for (b = 1, runblocks = 1; b < nrblocks; ++b, ++runblocks) {
|
||||||
|
+ enum bm_entry s = bitmap_get_blk (&bm, blknum + b, BLOCK_NOT_ALLOCATED);
|
||||||
|
+ if (state != s)
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cow_debug_verbose)
|
||||||
|
- nbdkit_debug ("cow: blk_read block %" PRIu64 " (offset %" PRIu64 ") is %s",
|
||||||
|
+ nbdkit_debug ("cow: blk_read_multiple block %" PRIu64
|
||||||
|
+ " (offset %" PRIu64 ") is %s",
|
||||||
|
blknum, (uint64_t) offset, state_to_string (state));
|
||||||
|
|
||||||
|
if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
|
||||||
|
- unsigned n = BLKSIZE, tail = 0;
|
||||||
|
+ unsigned n, tail = 0;
|
||||||
|
+
|
||||||
|
+ assert (BLKSIZE * runblocks <= UINT_MAX);
|
||||||
|
+ n = BLKSIZE * runblocks;
|
||||||
|
|
||||||
|
if (offset + n > size) {
|
||||||
|
tail = offset + n - size;
|
||||||
|
@@ -260,20 +276,35 @@ blk_read (nbdkit_next *next,
|
||||||
|
* zeroing the tail.
|
||||||
|
*/
|
||||||
|
memset (block + n, 0, tail);
|
||||||
|
- return 0;
|
||||||
|
}
|
||||||
|
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
|
||||||
|
- if (pread (fd, block, BLKSIZE, offset) == -1) {
|
||||||
|
+ if (pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pread: %m");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
- return 0;
|
||||||
|
}
|
||||||
|
else /* state == BLOCK_TRIMMED */ {
|
||||||
|
- memset (block, 0, BLKSIZE);
|
||||||
|
- return 0;
|
||||||
|
+ memset (block, 0, BLKSIZE * runblocks);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ /* If all done, return. */
|
||||||
|
+ if (runblocks == nrblocks)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ /* Recurse to read remaining blocks. */
|
||||||
|
+ return blk_read_multiple (next,
|
||||||
|
+ blknum + runblocks,
|
||||||
|
+ nrblocks - runblocks,
|
||||||
|
+ block + BLKSIZE * runblocks,
|
||||||
|
+ err);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+blk_read (nbdkit_next *next,
|
||||||
|
+ uint64_t blknum, uint8_t *block, int *err)
|
||||||
|
+{
|
||||||
|
+ return blk_read_multiple (next, blknum, 1, block, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
|
||||||
|
index e6fd7417..b066c602 100644
|
||||||
|
--- a/filters/cow/blk.h
|
||||||
|
+++ b/filters/cow/blk.h
|
||||||
|
@@ -55,6 +55,12 @@ extern int blk_read (nbdkit_next *next,
|
||||||
|
uint64_t blknum, uint8_t *block, int *err)
|
||||||
|
__attribute__((__nonnull__ (1, 3, 4)));
|
||||||
|
|
||||||
|
+/* Read multiple blocks from the overlay or plugin. */
|
||||||
|
+extern int blk_read_multiple (nbdkit_next *next,
|
||||||
|
+ uint64_t blknum, uint64_t nrblocks,
|
||||||
|
+ uint8_t *block, int *err)
|
||||||
|
+ __attribute__((__nonnull__ (1, 4, 5)));
|
||||||
|
+
|
||||||
|
/* Cache mode for blocks not already in overlay */
|
||||||
|
enum cache_mode {
|
||||||
|
BLK_CACHE_IGNORE, /* Do nothing */
|
||||||
|
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
|
||||||
|
index f30b7505..78daca22 100644
|
||||||
|
--- a/filters/cow/cow.c
|
||||||
|
+++ b/filters/cow/cow.c
|
||||||
|
@@ -210,7 +210,7 @@ cow_pread (nbdkit_next *next,
|
||||||
|
uint32_t flags, int *err)
|
||||||
|
{
|
||||||
|
CLEANUP_FREE uint8_t *block = NULL;
|
||||||
|
- uint64_t blknum, blkoffs;
|
||||||
|
+ uint64_t blknum, blkoffs, nrblocks;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (!IS_ALIGNED (count | offset, BLKSIZE)) {
|
||||||
|
@@ -243,21 +243,16 @@ cow_pread (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Aligned body */
|
||||||
|
- /* XXX This breaks up large read requests into smaller ones, which
|
||||||
|
- * is a problem for plugins which have a large, fixed per-request
|
||||||
|
- * overhead (hello, curl). We should try to keep large requests
|
||||||
|
- * together as much as possible, but that requires us to be much
|
||||||
|
- * smarter here.
|
||||||
|
- */
|
||||||
|
- while (count >= BLKSIZE) {
|
||||||
|
- r = blk_read (next, blknum, buf, err);
|
||||||
|
+ nrblocks = count / BLKSIZE;
|
||||||
|
+ if (nrblocks > 0) {
|
||||||
|
+ r = blk_read_multiple (next, blknum, nrblocks, buf, err);
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
- buf += BLKSIZE;
|
||||||
|
- count -= BLKSIZE;
|
||||||
|
- offset += BLKSIZE;
|
||||||
|
- blknum++;
|
||||||
|
+ buf += nrblocks * BLKSIZE;
|
||||||
|
+ count -= nrblocks * BLKSIZE;
|
||||||
|
+ offset += nrblocks * BLKSIZE;
|
||||||
|
+ blknum += nrblocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unaligned tail */
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
215
SOURCES/0004-cache-cow-Use-full-pread-pwrite-operations.patch
Normal file
215
SOURCES/0004-cache-cow-Use-full-pread-pwrite-operations.patch
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
From 5bd332a683811586039f99f31c01d4f2f7181334 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Mon, 26 Jul 2021 15:21:18 +0100
|
||||||
|
Subject: [PATCH] cache, cow: Use full pread/pwrite operations
|
||||||
|
|
||||||
|
Although it probably cannot happen on Linux, POSIX allows pread/pwrite
|
||||||
|
to return or write fewer bytes than requested. The cache and cow
|
||||||
|
filters didn't handle this situation. Replace the raw
|
||||||
|
pread(2)/pwrite(2) syscalls with alternate versions which can handle
|
||||||
|
this.
|
||||||
|
|
||||||
|
(cherry picked from commit ce0db9d7736dd28dd0f10951ce65853e50b35e41)
|
||||||
|
---
|
||||||
|
common/utils/Makefile.am | 1 +
|
||||||
|
common/utils/full-rw.c | 81 ++++++++++++++++++++++++++++++++++++++++
|
||||||
|
common/utils/utils.h | 2 +
|
||||||
|
filters/cache/blk.c | 10 ++---
|
||||||
|
filters/cow/blk.c | 6 +--
|
||||||
|
5 files changed, 92 insertions(+), 8 deletions(-)
|
||||||
|
create mode 100644 common/utils/full-rw.c
|
||||||
|
|
||||||
|
diff --git a/common/utils/Makefile.am b/common/utils/Makefile.am
|
||||||
|
index 1708a4c8..14e9dfc4 100644
|
||||||
|
--- a/common/utils/Makefile.am
|
||||||
|
+++ b/common/utils/Makefile.am
|
||||||
|
@@ -40,6 +40,7 @@ libutils_la_SOURCES = \
|
||||||
|
cleanup-nbdkit.c \
|
||||||
|
cleanup.h \
|
||||||
|
environ.c \
|
||||||
|
+ full-rw.c \
|
||||||
|
quote.c \
|
||||||
|
utils.c \
|
||||||
|
utils.h \
|
||||||
|
diff --git a/common/utils/full-rw.c b/common/utils/full-rw.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000..55b32cdd
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/common/utils/full-rw.c
|
||||||
|
@@ -0,0 +1,81 @@
|
||||||
|
+/* nbdkit
|
||||||
|
+ * Copyright (C) 2021 Red Hat Inc.
|
||||||
|
+ *
|
||||||
|
+ * Redistribution and use in source and binary forms, with or without
|
||||||
|
+ * modification, are permitted provided that the following conditions are
|
||||||
|
+ * met:
|
||||||
|
+ *
|
||||||
|
+ * * Redistributions of source code must retain the above copyright
|
||||||
|
+ * notice, this list of conditions and the following disclaimer.
|
||||||
|
+ *
|
||||||
|
+ * * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+ * notice, this list of conditions and the following disclaimer in the
|
||||||
|
+ * documentation and/or other materials provided with the distribution.
|
||||||
|
+ *
|
||||||
|
+ * * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+ * used to endorse or promote products derived from this software without
|
||||||
|
+ * specific prior written permission.
|
||||||
|
+ *
|
||||||
|
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+ * SUCH DAMAGE.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+/* These functions are like pread(2)/pwrite(2) but they always read or
|
||||||
|
+ * write the full amount, or fail.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include <config.h>
|
||||||
|
+
|
||||||
|
+#include <stdio.h>
|
||||||
|
+#include <stdlib.h>
|
||||||
|
+#include <unistd.h>
|
||||||
|
+#include <errno.h>
|
||||||
|
+
|
||||||
|
+ssize_t
|
||||||
|
+full_pread (int fd, void *buf, size_t count, off_t offset)
|
||||||
|
+{
|
||||||
|
+ ssize_t ret = 0, r;
|
||||||
|
+
|
||||||
|
+ while (count > 0) {
|
||||||
|
+ r = pread (fd, buf, count, offset);
|
||||||
|
+ if (r == -1) return -1;
|
||||||
|
+ if (r == 0) {
|
||||||
|
+ /* Presumably the caller wasn't expecting end-of-file here, so
|
||||||
|
+ * return an error.
|
||||||
|
+ */
|
||||||
|
+ errno = EIO;
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ ret += r;
|
||||||
|
+ offset += r;
|
||||||
|
+ count -= r;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+ssize_t
|
||||||
|
+full_pwrite (int fd, const void *buf, size_t count, off_t offset)
|
||||||
|
+{
|
||||||
|
+ ssize_t ret = 0, r;
|
||||||
|
+
|
||||||
|
+ while (count > 0) {
|
||||||
|
+ r = pwrite (fd, buf, count, offset);
|
||||||
|
+ if (r == -1) return -1;
|
||||||
|
+ ret += r;
|
||||||
|
+ offset += r;
|
||||||
|
+ count -= r;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
diff --git a/common/utils/utils.h b/common/utils/utils.h
|
||||||
|
index f8f70212..83397ae1 100644
|
||||||
|
--- a/common/utils/utils.h
|
||||||
|
+++ b/common/utils/utils.h
|
||||||
|
@@ -40,5 +40,7 @@ extern int set_cloexec (int fd);
|
||||||
|
extern int set_nonblock (int fd);
|
||||||
|
extern char **copy_environ (char **env, ...) __attribute__((__sentinel__));
|
||||||
|
extern char *make_temporary_directory (void);
|
||||||
|
+extern ssize_t full_pread (int fd, void *buf, size_t count, off_t offset);
|
||||||
|
+extern ssize_t full_pwrite (int fd, const void *buf, size_t count, off_t offset);
|
||||||
|
|
||||||
|
#endif /* NBDKIT_UTILS_H */
|
||||||
|
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
|
||||||
|
index f85ada35..42bd3779 100644
|
||||||
|
--- a/filters/cache/blk.c
|
||||||
|
+++ b/filters/cache/blk.c
|
||||||
|
@@ -250,7 +250,7 @@ _blk_read_multiple (nbdkit_next *next,
|
||||||
|
" (offset %" PRIu64 ")",
|
||||||
|
blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
- if (pwrite (fd, block, blksize * runblocks, offset) == -1) {
|
||||||
|
+ if (full_pwrite (fd, block, blksize * runblocks, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
return -1;
|
||||||
|
@@ -262,7 +262,7 @@ _blk_read_multiple (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { /* Read cache. */
|
||||||
|
- if (pread (fd, block, blksize * runblocks, offset) == -1) {
|
||||||
|
+ if (full_pread (fd, block, blksize * runblocks, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pread: %m");
|
||||||
|
return -1;
|
||||||
|
@@ -339,7 +339,7 @@ blk_cache (nbdkit_next *next,
|
||||||
|
nbdkit_debug ("cache: cache block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
- if (pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
return -1;
|
||||||
|
@@ -380,7 +380,7 @@ blk_writethrough (nbdkit_next *next,
|
||||||
|
nbdkit_debug ("cache: writethrough block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
- if (pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
return -1;
|
||||||
|
@@ -414,7 +414,7 @@ blk_write (nbdkit_next *next,
|
||||||
|
nbdkit_debug ("cache: writeback block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
- if (pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
return -1;
|
||||||
|
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
|
||||||
|
index 4ec8d1b8..121b0dd4 100644
|
||||||
|
--- a/filters/cow/blk.c
|
||||||
|
+++ b/filters/cow/blk.c
|
||||||
|
@@ -278,7 +278,7 @@ blk_read_multiple (nbdkit_next *next,
|
||||||
|
memset (block + n, 0, tail);
|
||||||
|
}
|
||||||
|
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
|
||||||
|
- if (pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
||||||
|
+ if (full_pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pread: %m");
|
||||||
|
return -1;
|
||||||
|
@@ -353,7 +353,7 @@ blk_cache (nbdkit_next *next,
|
||||||
|
memset (block + n, 0, tail);
|
||||||
|
|
||||||
|
if (mode == BLK_CACHE_COW) {
|
||||||
|
- if (pwrite (fd, block, BLKSIZE, offset) == -1) {
|
||||||
|
+ if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
return -1;
|
||||||
|
@@ -372,7 +372,7 @@ blk_write (uint64_t blknum, const uint8_t *block, int *err)
|
||||||
|
nbdkit_debug ("cow: blk_write block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
- if (pwrite (fd, block, BLKSIZE, offset) == -1) {
|
||||||
|
+ if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
return -1;
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
151
SOURCES/0005-cache-Implement-cache-on-read-PATH.patch
Normal file
151
SOURCES/0005-cache-Implement-cache-on-read-PATH.patch
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
From 4db23fd29af0488aa9c7e01577a5be9565a4465e Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Mon, 26 Jul 2021 16:16:15 +0100
|
||||||
|
Subject: [PATCH] cache: Implement cache-on-read=/PATH
|
||||||
|
|
||||||
|
For virt-v2v we will need to be able to turn cache-on-read on while
|
||||||
|
performing inspection and modification of the guest, and off when
|
||||||
|
doing the bulk copy. To do that allow the cache-on-read parameter to
|
||||||
|
refer to a path where the existence of the path toggles the feature.
|
||||||
|
|
||||||
|
(We could restart nbdkit between these phases, but this change avoids
|
||||||
|
doing that.)
|
||||||
|
|
||||||
|
(cherry picked from commit c8b575241b15b3bf0adaf15313e67e5ed4270b5a)
|
||||||
|
---
|
||||||
|
filters/cache/blk.c | 2 +-
|
||||||
|
filters/cache/cache.c | 33 ++++++++++++++++++++-------
|
||||||
|
filters/cache/cache.h | 10 ++++++--
|
||||||
|
filters/cache/nbdkit-cache-filter.pod | 11 ++++++++-
|
||||||
|
4 files changed, 44 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
|
||||||
|
index 42bd3779..19f79605 100644
|
||||||
|
--- a/filters/cache/blk.c
|
||||||
|
+++ b/filters/cache/blk.c
|
||||||
|
@@ -244,7 +244,7 @@ _blk_read_multiple (nbdkit_next *next,
|
||||||
|
memset (block + n, 0, tail);
|
||||||
|
|
||||||
|
/* If cache-on-read, copy the blocks to the cache. */
|
||||||
|
- if (cache_on_read) {
|
||||||
|
+ if (cache_on_read ()) {
|
||||||
|
if (cache_debug_verbose)
|
||||||
|
nbdkit_debug ("cache: cache-on-read block %" PRIu64
|
||||||
|
" (offset %" PRIu64 ")",
|
||||||
|
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
|
||||||
|
index 14cc03f2..44da0008 100644
|
||||||
|
--- a/filters/cache/cache.c
|
||||||
|
+++ b/filters/cache/cache.c
|
||||||
|
@@ -74,7 +74,8 @@ unsigned blksize;
|
||||||
|
enum cache_mode cache_mode = CACHE_MODE_WRITEBACK;
|
||||||
|
int64_t max_size = -1;
|
||||||
|
unsigned hi_thresh = 95, lo_thresh = 80;
|
||||||
|
-bool cache_on_read = false;
|
||||||
|
+enum cor_mode cor_mode = COR_OFF;
|
||||||
|
+const char *cor_path;
|
||||||
|
|
||||||
|
static int cache_flush (nbdkit_next *next, void *handle, uint32_t flags,
|
||||||
|
int *err);
|
||||||
|
@@ -161,12 +162,16 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
}
|
||||||
|
#endif /* !HAVE_CACHE_RECLAIM */
|
||||||
|
else if (strcmp (key, "cache-on-read") == 0) {
|
||||||
|
- int r;
|
||||||
|
-
|
||||||
|
- r = nbdkit_parse_bool (value);
|
||||||
|
- if (r == -1)
|
||||||
|
- return -1;
|
||||||
|
- cache_on_read = r;
|
||||||
|
+ if (value[0] == '/') {
|
||||||
|
+ cor_path = value;
|
||||||
|
+ cor_mode = COR_PATH;
|
||||||
|
+ }
|
||||||
|
+ else {
|
||||||
|
+ int r = nbdkit_parse_bool (value);
|
||||||
|
+ if (r == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ cor_mode = r ? COR_ON : COR_OFF;
|
||||||
|
+ }
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
@@ -177,7 +182,7 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
#define cache_config_help_common \
|
||||||
|
"cache=MODE Set cache MODE, one of writeback (default),\n" \
|
||||||
|
" writethrough, or unsafe.\n" \
|
||||||
|
- "cache-on-read=BOOL Set to true to cache on reads (default false).\n"
|
||||||
|
+ "cache-on-read=BOOL|/PATH Set to true to cache on reads (default false).\n"
|
||||||
|
#ifndef HAVE_CACHE_RECLAIM
|
||||||
|
#define cache_config_help cache_config_help_common
|
||||||
|
#else
|
||||||
|
@@ -187,6 +192,18 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
"cache-low-threshold=PCT Percentage of max size where reclaim ends.\n"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+/* Decide if cache-on-read is currently on or off. */
|
||||||
|
+bool
|
||||||
|
+cache_on_read (void)
|
||||||
|
+{
|
||||||
|
+ switch (cor_mode) {
|
||||||
|
+ case COR_ON: return true;
|
||||||
|
+ case COR_OFF: return false;
|
||||||
|
+ case COR_PATH: return access (cor_path, F_OK) == 0;
|
||||||
|
+ default: abort ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int
|
||||||
|
cache_config_complete (nbdkit_next_config_complete *next,
|
||||||
|
nbdkit_backend *nxdata)
|
||||||
|
diff --git a/filters/cache/cache.h b/filters/cache/cache.h
|
||||||
|
index 2b72221f..a559adef 100644
|
||||||
|
--- a/filters/cache/cache.h
|
||||||
|
+++ b/filters/cache/cache.h
|
||||||
|
@@ -49,7 +49,13 @@ extern unsigned blksize;
|
||||||
|
extern int64_t max_size;
|
||||||
|
extern unsigned hi_thresh, lo_thresh;
|
||||||
|
|
||||||
|
-/* Cache read requests. */
|
||||||
|
-extern bool cache_on_read;
|
||||||
|
+/* Cache on read mode. */
|
||||||
|
+extern enum cor_mode {
|
||||||
|
+ COR_OFF,
|
||||||
|
+ COR_ON,
|
||||||
|
+ COR_PATH,
|
||||||
|
+} cor_mode;
|
||||||
|
+extern const char *cor_path;
|
||||||
|
+extern bool cache_on_read (void);
|
||||||
|
|
||||||
|
#endif /* NBDKIT_CACHE_H */
|
||||||
|
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
index ebcf1d10..f20cb9ce 100644
|
||||||
|
--- a/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
+++ b/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
@@ -8,7 +8,7 @@ nbdkit-cache-filter - nbdkit caching filter
|
||||||
|
[cache-max-size=SIZE]
|
||||||
|
[cache-high-threshold=N]
|
||||||
|
[cache-low-threshold=N]
|
||||||
|
- [cache-on-read=true|false]
|
||||||
|
+ [cache-on-read=true|false|/PATH]
|
||||||
|
[plugin-args...]
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
@@ -87,6 +87,15 @@ the plugin.
|
||||||
|
|
||||||
|
Do not cache read requests (this is the default).
|
||||||
|
|
||||||
|
+=item B<cache-on-read=/PATH>
|
||||||
|
+
|
||||||
|
+(nbdkit E<ge> 1.28)
|
||||||
|
+
|
||||||
|
+When F</PATH> (which must be an absolute path) exists, this behaves
|
||||||
|
+like C<cache-on-read=true>, and when it does not exist like
|
||||||
|
+C<cache-on-read=false>. This allows you to control the cache-on-read
|
||||||
|
+behaviour while nbdkit is running.
|
||||||
|
+
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 CACHE MAXIMUM SIZE
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
278
SOURCES/0006-cache-Add-cache-min-block-size-parameter.patch
Normal file
278
SOURCES/0006-cache-Add-cache-min-block-size-parameter.patch
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
From f7f4b71d559dc6950bc795742f64e8eaeeadf3ec Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Mon, 26 Jul 2021 16:30:26 +0100
|
||||||
|
Subject: [PATCH] cache: Add cache-min-block-size parameter
|
||||||
|
|
||||||
|
This allows you to choose a larger block size. I found experimentally
|
||||||
|
that this improves performance because of locality in access patterns.
|
||||||
|
The idea came from qcow2 which implicitly does the same thing because
|
||||||
|
of the relatively large cluster size (32K).
|
||||||
|
|
||||||
|
nbdkit + cache-filter with 4K block size + cache-on-read + curl
|
||||||
|
(to a very slow remote site):
|
||||||
|
=> virt-inspector took 22 mins
|
||||||
|
|
||||||
|
same with 64K block size:
|
||||||
|
=> virt-inspector took 19 mins
|
||||||
|
|
||||||
|
However compared to a qcow2 file using qemu's copy-on-read, backed
|
||||||
|
with nbdkit + curl we are still a lot slower, possibly because having
|
||||||
|
the cache inside virt-inspector greatly reduces round trip overhead:
|
||||||
|
=> virt-inspector took 13 mins
|
||||||
|
|
||||||
|
(cherry picked from commit 4ceacb6caa64e12bd78af5f90e86ee591e055944)
|
||||||
|
---
|
||||||
|
filters/cache/blk.c | 2 +-
|
||||||
|
filters/cache/cache.c | 36 ++++++++++----
|
||||||
|
filters/cache/cache.h | 3 ++
|
||||||
|
filters/cache/nbdkit-cache-filter.pod | 9 ++++
|
||||||
|
tests/Makefile.am | 2 +
|
||||||
|
tests/test-cache-block-size.sh | 70 +++++++++++++++++++++++++++
|
||||||
|
6 files changed, 112 insertions(+), 10 deletions(-)
|
||||||
|
create mode 100755 tests/test-cache-block-size.sh
|
||||||
|
|
||||||
|
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
|
||||||
|
index 19f79605..6276985f 100644
|
||||||
|
--- a/filters/cache/blk.c
|
||||||
|
+++ b/filters/cache/blk.c
|
||||||
|
@@ -149,7 +149,7 @@ blk_init (void)
|
||||||
|
nbdkit_error ("fstatvfs: %s: %m", tmpdir);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
- blksize = MAX (4096, statvfs.f_bsize);
|
||||||
|
+ blksize = MAX (min_block_size, statvfs.f_bsize);
|
||||||
|
nbdkit_debug ("cache: block size: %u", blksize);
|
||||||
|
|
||||||
|
bitmap_init (&bm, blksize, 2 /* bits per block */);
|
||||||
|
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
|
||||||
|
index 44da0008..109ac89e 100644
|
||||||
|
--- a/filters/cache/cache.c
|
||||||
|
+++ b/filters/cache/cache.c
|
||||||
|
@@ -40,6 +40,7 @@
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
+#include <limits.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
@@ -62,6 +63,7 @@
|
||||||
|
#include "blk.h"
|
||||||
|
#include "reclaim.h"
|
||||||
|
#include "isaligned.h"
|
||||||
|
+#include "ispowerof2.h"
|
||||||
|
#include "minmax.h"
|
||||||
|
#include "rounding.h"
|
||||||
|
|
||||||
|
@@ -70,7 +72,8 @@
|
||||||
|
*/
|
||||||
|
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
-unsigned blksize;
|
||||||
|
+unsigned blksize; /* actual block size (picked by blk.c) */
|
||||||
|
+unsigned min_block_size = 4096;
|
||||||
|
enum cache_mode cache_mode = CACHE_MODE_WRITEBACK;
|
||||||
|
int64_t max_size = -1;
|
||||||
|
unsigned hi_thresh = 95, lo_thresh = 80;
|
||||||
|
@@ -80,13 +83,6 @@ const char *cor_path;
|
||||||
|
static int cache_flush (nbdkit_next *next, void *handle, uint32_t flags,
|
||||||
|
int *err);
|
||||||
|
|
||||||
|
-static void
|
||||||
|
-cache_load (void)
|
||||||
|
-{
|
||||||
|
- if (blk_init () == -1)
|
||||||
|
- exit (EXIT_FAILURE);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static void
|
||||||
|
cache_unload (void)
|
||||||
|
{
|
||||||
|
@@ -116,6 +112,19 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+ else if (strcmp (key, "cache-min-block-size") == 0) {
|
||||||
|
+ int64_t r;
|
||||||
|
+
|
||||||
|
+ r = nbdkit_parse_size (value);
|
||||||
|
+ if (r == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ if (r < 4096 || !is_power_of_2 (r) || r > UINT_MAX) {
|
||||||
|
+ nbdkit_error ("cache-min-block-size is not a power of 2, or is too small or too large");
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ min_block_size = r;
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
#ifdef HAVE_CACHE_RECLAIM
|
||||||
|
else if (strcmp (key, "cache-max-size") == 0) {
|
||||||
|
int64_t r;
|
||||||
|
@@ -220,6 +229,15 @@ cache_config_complete (nbdkit_next_config_complete *next,
|
||||||
|
return next (nxdata);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int
|
||||||
|
+cache_get_ready (int thread_model)
|
||||||
|
+{
|
||||||
|
+ if (blk_init () == -1)
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Get the file size, set the cache size. */
|
||||||
|
static int64_t
|
||||||
|
cache_get_size (nbdkit_next *next,
|
||||||
|
@@ -691,11 +709,11 @@ cache_cache (nbdkit_next *next,
|
||||||
|
static struct nbdkit_filter filter = {
|
||||||
|
.name = "cache",
|
||||||
|
.longname = "nbdkit caching filter",
|
||||||
|
- .load = cache_load,
|
||||||
|
.unload = cache_unload,
|
||||||
|
.config = cache_config,
|
||||||
|
.config_complete = cache_config_complete,
|
||||||
|
.config_help = cache_config_help,
|
||||||
|
+ .get_ready = cache_get_ready,
|
||||||
|
.prepare = cache_prepare,
|
||||||
|
.get_size = cache_get_size,
|
||||||
|
.can_cache = cache_can_cache,
|
||||||
|
diff --git a/filters/cache/cache.h b/filters/cache/cache.h
|
||||||
|
index a559adef..5c32c37c 100644
|
||||||
|
--- a/filters/cache/cache.h
|
||||||
|
+++ b/filters/cache/cache.h
|
||||||
|
@@ -45,6 +45,9 @@ extern enum cache_mode {
|
||||||
|
/* Size of a block in the cache. */
|
||||||
|
extern unsigned blksize;
|
||||||
|
|
||||||
|
+/* Minimum block size (cache-min-block-size parameter). */
|
||||||
|
+extern unsigned min_block_size;
|
||||||
|
+
|
||||||
|
/* Maximum size of the cache and high/low thresholds. */
|
||||||
|
extern int64_t max_size;
|
||||||
|
extern unsigned hi_thresh, lo_thresh;
|
||||||
|
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
index f20cb9ce..6cbd1c08 100644
|
||||||
|
--- a/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
+++ b/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
@@ -5,6 +5,7 @@ nbdkit-cache-filter - nbdkit caching filter
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
nbdkit --filter=cache plugin [cache=writeback|writethrough|unsafe]
|
||||||
|
+ [cache-min-block-size=SIZE]
|
||||||
|
[cache-max-size=SIZE]
|
||||||
|
[cache-high-threshold=N]
|
||||||
|
[cache-low-threshold=N]
|
||||||
|
@@ -59,6 +60,14 @@ This is dangerous and can cause data loss, but this may be acceptable
|
||||||
|
if you only use it for testing or with data that you don't care about
|
||||||
|
or can cheaply reconstruct.
|
||||||
|
|
||||||
|
+=item B<cache-min-block-size=>SIZE
|
||||||
|
+
|
||||||
|
+Set the minimum block size used by the cache. This must be a power of
|
||||||
|
+2 and E<ge> 4096.
|
||||||
|
+
|
||||||
|
+The default is 4096, or the block size of the filesystem which
|
||||||
|
+contains the temporary file storing the cache (whichever is larger).
|
||||||
|
+
|
||||||
|
=item B<cache-max-size=>SIZE
|
||||||
|
|
||||||
|
=item B<cache-high-threshold=>N
|
||||||
|
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
||||||
|
index 9630205d..a038eabc 100644
|
||||||
|
--- a/tests/Makefile.am
|
||||||
|
+++ b/tests/Makefile.am
|
||||||
|
@@ -1371,12 +1371,14 @@ EXTRA_DIST += test-blocksize.sh test-blocksize-extents.sh
|
||||||
|
# cache filter test.
|
||||||
|
TESTS += \
|
||||||
|
test-cache.sh \
|
||||||
|
+ test-cache-block-size.sh \
|
||||||
|
test-cache-on-read.sh \
|
||||||
|
test-cache-max-size.sh \
|
||||||
|
test-cache-unaligned.sh \
|
||||||
|
$(NULL)
|
||||||
|
EXTRA_DIST += \
|
||||||
|
test-cache.sh \
|
||||||
|
+ test-cache-block-size.sh \
|
||||||
|
test-cache-on-read.sh \
|
||||||
|
test-cache-max-size.sh \
|
||||||
|
test-cache-unaligned.sh \
|
||||||
|
diff --git a/tests/test-cache-block-size.sh b/tests/test-cache-block-size.sh
|
||||||
|
new file mode 100755
|
||||||
|
index 00000000..a2a27407
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tests/test-cache-block-size.sh
|
||||||
|
@@ -0,0 +1,70 @@
|
||||||
|
+#!/usr/bin/env bash
|
||||||
|
+# nbdkit
|
||||||
|
+# Copyright (C) 2018-2021 Red Hat Inc.
|
||||||
|
+#
|
||||||
|
+# Redistribution and use in source and binary forms, with or without
|
||||||
|
+# modification, are permitted provided that the following conditions are
|
||||||
|
+# met:
|
||||||
|
+#
|
||||||
|
+# * Redistributions of source code must retain the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer.
|
||||||
|
+#
|
||||||
|
+# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer in the
|
||||||
|
+# documentation and/or other materials provided with the distribution.
|
||||||
|
+#
|
||||||
|
+# * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+# used to endorse or promote products derived from this software without
|
||||||
|
+# specific prior written permission.
|
||||||
|
+#
|
||||||
|
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+# SUCH DAMAGE.
|
||||||
|
+
|
||||||
|
+source ./functions.sh
|
||||||
|
+set -e
|
||||||
|
+set -x
|
||||||
|
+
|
||||||
|
+requires_filter cache
|
||||||
|
+requires_nbdsh_uri
|
||||||
|
+
|
||||||
|
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
|
||||||
|
+files="cache-block-size.img $sock cache-block-size.pid"
|
||||||
|
+rm -f $files
|
||||||
|
+cleanup_fn rm -f $files
|
||||||
|
+
|
||||||
|
+# Create an empty base image.
|
||||||
|
+truncate -s 128K cache-block-size.img
|
||||||
|
+
|
||||||
|
+# Run nbdkit with the caching filter.
|
||||||
|
+start_nbdkit -P cache-block-size.pid -U $sock --filter=cache \
|
||||||
|
+ file cache-block-size.img cache-min-block-size=64K
|
||||||
|
+
|
||||||
|
+nbdsh --connect "nbd+unix://?socket=$sock" \
|
||||||
|
+ -c '
|
||||||
|
+# Write some pattern data to the overlay and check it reads back OK.
|
||||||
|
+buf = b"abcd" * 16384
|
||||||
|
+h.pwrite(buf, 32768)
|
||||||
|
+zero = h.pread(32768, 0)
|
||||||
|
+assert zero == bytearray(32768)
|
||||||
|
+buf2 = h.pread(65536, 32768)
|
||||||
|
+assert buf == buf2
|
||||||
|
+
|
||||||
|
+# Flushing should write through to the underlying file.
|
||||||
|
+h.flush()
|
||||||
|
+
|
||||||
|
+with open("cache-block-size.img", "rb") as file:
|
||||||
|
+ zero = file.read(32768)
|
||||||
|
+ assert zero == bytearray(32768)
|
||||||
|
+ buf2 = file.read(65536)
|
||||||
|
+ assert buf == buf2
|
||||||
|
+'
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
138
SOURCES/0007-cache-cow-Use-a-64K-block-size-by-default.patch
Normal file
138
SOURCES/0007-cache-cow-Use-a-64K-block-size-by-default.patch
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
From 83e1167e1a350bd08ac6245f47a5877438408492 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Mon, 26 Jul 2021 17:39:23 +0100
|
||||||
|
Subject: [PATCH] cache, cow: Use a 64K block size by default
|
||||||
|
|
||||||
|
Based on the results presented in the previous commit, use a 64K block
|
||||||
|
size by default in both the cache and cow filters. For the cache
|
||||||
|
filter you could go back to a 4K block size if you wanted by using the
|
||||||
|
cache-min-block-size=4K parameter. For cow it is compiled in so
|
||||||
|
cannot be adjusted.
|
||||||
|
|
||||||
|
(cherry picked from commit c1905b0a28677d961babdb16d6f30ae61042c825)
|
||||||
|
---
|
||||||
|
filters/cache/cache.c | 2 +-
|
||||||
|
filters/cache/nbdkit-cache-filter.pod | 4 ++--
|
||||||
|
filters/cow/blk.h | 2 +-
|
||||||
|
tests/test-cache-block-size.sh | 2 +-
|
||||||
|
tests/test-cow-extents1.sh | 33 +++++++++++++++------------
|
||||||
|
5 files changed, 23 insertions(+), 20 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
|
||||||
|
index 109ac89e..c912c5fb 100644
|
||||||
|
--- a/filters/cache/cache.c
|
||||||
|
+++ b/filters/cache/cache.c
|
||||||
|
@@ -73,7 +73,7 @@
|
||||||
|
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
unsigned blksize; /* actual block size (picked by blk.c) */
|
||||||
|
-unsigned min_block_size = 4096;
|
||||||
|
+unsigned min_block_size = 65536;
|
||||||
|
enum cache_mode cache_mode = CACHE_MODE_WRITEBACK;
|
||||||
|
int64_t max_size = -1;
|
||||||
|
unsigned hi_thresh = 95, lo_thresh = 80;
|
||||||
|
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
index 6cbd1c08..df9c1f99 100644
|
||||||
|
--- a/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
+++ b/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
@@ -65,8 +65,8 @@ or can cheaply reconstruct.
|
||||||
|
Set the minimum block size used by the cache. This must be a power of
|
||||||
|
2 and E<ge> 4096.
|
||||||
|
|
||||||
|
-The default is 4096, or the block size of the filesystem which
|
||||||
|
-contains the temporary file storing the cache (whichever is larger).
|
||||||
|
+The default is 64K, or the block size of the filesystem which contains
|
||||||
|
+the temporary file storing the cache (whichever is larger).
|
||||||
|
|
||||||
|
=item B<cache-max-size=>SIZE
|
||||||
|
|
||||||
|
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
|
||||||
|
index b066c602..1bc85283 100644
|
||||||
|
--- a/filters/cow/blk.h
|
||||||
|
+++ b/filters/cow/blk.h
|
||||||
|
@@ -36,7 +36,7 @@
|
||||||
|
/* Size of a block in the overlay. A 4K block size means that we need
|
||||||
|
* 64 MB of memory to store the bitmap for a 1 TB underlying image.
|
||||||
|
*/
|
||||||
|
-#define BLKSIZE 4096
|
||||||
|
+#define BLKSIZE 65536
|
||||||
|
|
||||||
|
/* Initialize the overlay and bitmap. */
|
||||||
|
extern int blk_init (void);
|
||||||
|
diff --git a/tests/test-cache-block-size.sh b/tests/test-cache-block-size.sh
|
||||||
|
index a2a27407..d20cc940 100755
|
||||||
|
--- a/tests/test-cache-block-size.sh
|
||||||
|
+++ b/tests/test-cache-block-size.sh
|
||||||
|
@@ -47,7 +47,7 @@ truncate -s 128K cache-block-size.img
|
||||||
|
|
||||||
|
# Run nbdkit with the caching filter.
|
||||||
|
start_nbdkit -P cache-block-size.pid -U $sock --filter=cache \
|
||||||
|
- file cache-block-size.img cache-min-block-size=64K
|
||||||
|
+ file cache-block-size.img cache-min-block-size=4K
|
||||||
|
|
||||||
|
nbdsh --connect "nbd+unix://?socket=$sock" \
|
||||||
|
-c '
|
||||||
|
diff --git a/tests/test-cow-extents1.sh b/tests/test-cow-extents1.sh
|
||||||
|
index 8e0e0383..ebfd83f6 100755
|
||||||
|
--- a/tests/test-cow-extents1.sh
|
||||||
|
+++ b/tests/test-cow-extents1.sh
|
||||||
|
@@ -65,7 +65,7 @@ cleanup_fn rm -f $files
|
||||||
|
|
||||||
|
# Create a base file which is half allocated, half sparse.
|
||||||
|
dd if=/dev/urandom of=$base count=128 bs=1K
|
||||||
|
-truncate -s 256K $base
|
||||||
|
+truncate -s 4M $base
|
||||||
|
lastmod="$(stat -c "%y" $base)"
|
||||||
|
|
||||||
|
# Run nbdkit with a COW overlay.
|
||||||
|
@@ -76,30 +76,33 @@ uri="nbd+unix:///?socket=$sock"
|
||||||
|
nbdinfo --map "$uri" > $out
|
||||||
|
cat $out
|
||||||
|
if [ "$(tr -s ' ' < $out | cut -d' ' -f 1-4)" != " 0 131072 0
|
||||||
|
- 131072 131072 3" ]; then
|
||||||
|
+ 131072 4063232 3" ]; then
|
||||||
|
echo "$0: unexpected initial file map"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Punch some holes.
|
||||||
|
nbdsh -u "$uri" \
|
||||||
|
- -c 'h.trim(4096, 4096)' \
|
||||||
|
- -c 'h.trim(4098, 16383)' \
|
||||||
|
- -c 'h.pwrite(b"1"*4096, 65536)' \
|
||||||
|
- -c 'h.trim(8192, 131072)' \
|
||||||
|
- -c 'h.pwrite(b"2"*8192, 196608)'
|
||||||
|
+ -c 'bs = 65536' \
|
||||||
|
+ -c 'h.trim(bs, bs)' \
|
||||||
|
+ -c 'h.trim(bs+2, 4*bs-1)' \
|
||||||
|
+ -c 'h.pwrite(b"1"*bs, 16*bs)' \
|
||||||
|
+ -c 'h.trim(2*bs, 32*bs)' \
|
||||||
|
+ -c 'h.pwrite(b"2"*(2*bs), 48*bs)'
|
||||||
|
|
||||||
|
# The extents map should be fully allocated.
|
||||||
|
nbdinfo --map "$uri" > $out
|
||||||
|
cat $out
|
||||||
|
-if [ "$(tr -s ' ' < $out | cut -d' ' -f 1-4)" != " 0 4096 0
|
||||||
|
- 4096 4096 3
|
||||||
|
- 8192 8192 0
|
||||||
|
- 16384 4096 3
|
||||||
|
- 20480 110592 0
|
||||||
|
- 131072 65536 3
|
||||||
|
- 196608 8192 0
|
||||||
|
- 204800 57344 3" ]; then
|
||||||
|
+if [ "$(tr -s ' ' < $out | cut -d' ' -f 1-4)" != " 0 65536 0
|
||||||
|
+ 65536 131072 3
|
||||||
|
+ 196608 65536 0
|
||||||
|
+ 262144 65536 3
|
||||||
|
+ 327680 65536 0
|
||||||
|
+ 393216 655360 3
|
||||||
|
+ 1048576 65536 0
|
||||||
|
+ 1114112 2031616 3
|
||||||
|
+ 3145728 131072 0
|
||||||
|
+ 3276800 917504 3" ]; then
|
||||||
|
echo "$0: unexpected trimmed file map"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -0,0 +1,50 @@
|
|||||||
|
From 2592bb42051b3e6d17240badc814b9b16f121c1d Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Tue, 27 Jul 2021 21:16:30 +0100
|
||||||
|
Subject: [PATCH] cache: Refactor printing state into new function
|
||||||
|
|
||||||
|
This minor refactoring just makes the cache and cow filters' blk.c a
|
||||||
|
little bit more similar.
|
||||||
|
|
||||||
|
(cherry picked from commit bdb86ea14c00a950f2a2d34071ac1e0799d29132)
|
||||||
|
---
|
||||||
|
filters/cache/blk.c | 16 ++++++++++++----
|
||||||
|
1 file changed, 12 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
|
||||||
|
index 6276985f..e50a7f24 100644
|
||||||
|
--- a/filters/cache/blk.c
|
||||||
|
+++ b/filters/cache/blk.c
|
||||||
|
@@ -94,6 +94,17 @@ enum bm_entry {
|
||||||
|
BLOCK_DIRTY = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
+static const char *
|
||||||
|
+state_to_string (enum bm_entry state)
|
||||||
|
+{
|
||||||
|
+ switch (state) {
|
||||||
|
+ case BLOCK_NOT_CACHED: return "not cached";
|
||||||
|
+ case BLOCK_CLEAN: return "clean";
|
||||||
|
+ case BLOCK_DIRTY: return "dirty";
|
||||||
|
+ default: abort ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Extra debugging (-D cache.verbose=1). */
|
||||||
|
NBDKIT_DLL_PUBLIC int cache_debug_verbose = 0;
|
||||||
|
|
||||||
|
@@ -312,10 +323,7 @@ blk_cache (nbdkit_next *next,
|
||||||
|
nbdkit_debug ("cache: blk_cache block %" PRIu64
|
||||||
|
" (offset %" PRIu64 ") is %s",
|
||||||
|
blknum, (uint64_t) offset,
|
||||||
|
- state == BLOCK_NOT_CACHED ? "not cached" :
|
||||||
|
- state == BLOCK_CLEAN ? "clean" :
|
||||||
|
- state == BLOCK_DIRTY ? "dirty" :
|
||||||
|
- "unknown");
|
||||||
|
+ state_to_string (state));
|
||||||
|
|
||||||
|
if (state == BLOCK_NOT_CACHED) {
|
||||||
|
/* Read underlying plugin, copy to cache regardless of cache-on-read. */
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -0,0 +1,147 @@
|
|||||||
|
From 315948e75e06d038bd8afa319a41e3fde33b4174 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Thu, 29 Jul 2021 20:16:43 +0100
|
||||||
|
Subject: [PATCH] tests: cache: Test cache-on-read option really caches
|
||||||
|
|
||||||
|
By making use of the delay filter to add a penalty for hitting the
|
||||||
|
plugin we can check whether or not the cache-on-read option is
|
||||||
|
working.
|
||||||
|
|
||||||
|
(cherry picked from commit 3ae7aa533bb9322ab6dc6deecb687ded76634ab4)
|
||||||
|
---
|
||||||
|
tests/Makefile.am | 2 +
|
||||||
|
tests/test-cache-on-read-caches.sh | 87 ++++++++++++++++++++++++++++++
|
||||||
|
tests/test-cache-on-read.sh | 5 --
|
||||||
|
3 files changed, 89 insertions(+), 5 deletions(-)
|
||||||
|
create mode 100755 tests/test-cache-on-read-caches.sh
|
||||||
|
|
||||||
|
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
||||||
|
index a038eabc..51ca913a 100644
|
||||||
|
--- a/tests/Makefile.am
|
||||||
|
+++ b/tests/Makefile.am
|
||||||
|
@@ -1373,6 +1373,7 @@ TESTS += \
|
||||||
|
test-cache.sh \
|
||||||
|
test-cache-block-size.sh \
|
||||||
|
test-cache-on-read.sh \
|
||||||
|
+ test-cache-on-read-caches.sh \
|
||||||
|
test-cache-max-size.sh \
|
||||||
|
test-cache-unaligned.sh \
|
||||||
|
$(NULL)
|
||||||
|
@@ -1380,6 +1381,7 @@ EXTRA_DIST += \
|
||||||
|
test-cache.sh \
|
||||||
|
test-cache-block-size.sh \
|
||||||
|
test-cache-on-read.sh \
|
||||||
|
+ test-cache-on-read-caches.sh \
|
||||||
|
test-cache-max-size.sh \
|
||||||
|
test-cache-unaligned.sh \
|
||||||
|
$(NULL)
|
||||||
|
diff --git a/tests/test-cache-on-read-caches.sh b/tests/test-cache-on-read-caches.sh
|
||||||
|
new file mode 100755
|
||||||
|
index 00000000..80b34159
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tests/test-cache-on-read-caches.sh
|
||||||
|
@@ -0,0 +1,87 @@
|
||||||
|
+#!/usr/bin/env bash
|
||||||
|
+# nbdkit
|
||||||
|
+# Copyright (C) 2018-2021 Red Hat Inc.
|
||||||
|
+#
|
||||||
|
+# Redistribution and use in source and binary forms, with or without
|
||||||
|
+# modification, are permitted provided that the following conditions are
|
||||||
|
+# met:
|
||||||
|
+#
|
||||||
|
+# * Redistributions of source code must retain the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer.
|
||||||
|
+#
|
||||||
|
+# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer in the
|
||||||
|
+# documentation and/or other materials provided with the distribution.
|
||||||
|
+#
|
||||||
|
+# * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+# used to endorse or promote products derived from this software without
|
||||||
|
+# specific prior written permission.
|
||||||
|
+#
|
||||||
|
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+# SUCH DAMAGE.
|
||||||
|
+
|
||||||
|
+source ./functions.sh
|
||||||
|
+set -e
|
||||||
|
+set -x
|
||||||
|
+
|
||||||
|
+requires_filter cache
|
||||||
|
+requires_filter delay
|
||||||
|
+requires_nbdsh_uri
|
||||||
|
+
|
||||||
|
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
|
||||||
|
+files="$sock cache-on-read-caches.pid"
|
||||||
|
+rm -f $files
|
||||||
|
+cleanup_fn rm -f $files
|
||||||
|
+
|
||||||
|
+# Run nbdkit with the cache filter, cache-on-read and a read delay.
|
||||||
|
+start_nbdkit -P cache-on-read-caches.pid -U $sock \
|
||||||
|
+ --filter=cache --filter=delay \
|
||||||
|
+ memory 64K cache-on-read=true rdelay=10
|
||||||
|
+
|
||||||
|
+nbdsh --connect "nbd+unix://?socket=$sock" \
|
||||||
|
+ -c '
|
||||||
|
+from time import time
|
||||||
|
+
|
||||||
|
+# First read should suffer a penalty. Because we are reading
|
||||||
|
+# a single 64K block (same size as the cache block), we should
|
||||||
|
+# only suffer one penalty of approx. 10 seconds.
|
||||||
|
+st = time()
|
||||||
|
+zb = h.pread(65536, 0)
|
||||||
|
+et = time()
|
||||||
|
+el = et-st
|
||||||
|
+print("elapsed time: %g" % el)
|
||||||
|
+assert et-st >= 10
|
||||||
|
+assert zb == bytearray(65536)
|
||||||
|
+
|
||||||
|
+# Second read should not suffer a penalty.
|
||||||
|
+st = time()
|
||||||
|
+zb = h.pread(65536, 0)
|
||||||
|
+et = time()
|
||||||
|
+el = et-st
|
||||||
|
+print("elapsed time: %g" % el)
|
||||||
|
+assert el < 10
|
||||||
|
+assert zb == bytearray(65536)
|
||||||
|
+
|
||||||
|
+# Write something.
|
||||||
|
+buf = b"abcd" * 16384
|
||||||
|
+h.pwrite(buf, 0)
|
||||||
|
+
|
||||||
|
+# Reading back should be quick since it is stored in the overlay.
|
||||||
|
+st = time()
|
||||||
|
+buf2 = h.pread(65536, 0)
|
||||||
|
+et = time()
|
||||||
|
+el = et-st
|
||||||
|
+print("elapsed time: %g" % el)
|
||||||
|
+assert el < 10
|
||||||
|
+assert buf == buf2
|
||||||
|
+'
|
||||||
|
diff --git a/tests/test-cache-on-read.sh b/tests/test-cache-on-read.sh
|
||||||
|
index f8584dcd..85ca83d4 100755
|
||||||
|
--- a/tests/test-cache-on-read.sh
|
||||||
|
+++ b/tests/test-cache-on-read.sh
|
||||||
|
@@ -56,9 +56,4 @@ zero = h.pread(32768, 0)
|
||||||
|
assert zero == bytearray(32768)
|
||||||
|
buf2 = h.pread(65536, 32768)
|
||||||
|
assert buf == buf2
|
||||||
|
-
|
||||||
|
-# XXX Suggestion to improve this test: Use the delay filter below the
|
||||||
|
-# cache filter, and time reads to prove that the second read is faster
|
||||||
|
-# because it is not going through the delay filter and plugin.
|
||||||
|
-# XXX second h.pread here ...
|
||||||
|
'
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
457
SOURCES/0010-cow-Implement-cow-on-read.patch
Normal file
457
SOURCES/0010-cow-Implement-cow-on-read.patch
Normal file
@ -0,0 +1,457 @@
|
|||||||
|
From 57f9bd29f9d7432ad5a70620c373b28db768a314 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Tue, 27 Jul 2021 23:01:52 +0100
|
||||||
|
Subject: [PATCH] cow: Implement cow-on-read
|
||||||
|
|
||||||
|
This is very similar to the nbdkit-cache-filter cache-on-read flag.
|
||||||
|
|
||||||
|
(cherry picked from commit bd93b3f27246f917de48a6cc2525d9c424c07976)
|
||||||
|
---
|
||||||
|
filters/cow/blk.c | 21 ++++++--
|
||||||
|
filters/cow/blk.h | 10 ++--
|
||||||
|
filters/cow/cow.c | 56 ++++++++++++++++----
|
||||||
|
filters/cow/nbdkit-cow-filter.pod | 17 ++++++
|
||||||
|
tests/Makefile.am | 4 ++
|
||||||
|
tests/test-cow-on-read-caches.sh | 87 +++++++++++++++++++++++++++++++
|
||||||
|
tests/test-cow-on-read.sh | 59 +++++++++++++++++++++
|
||||||
|
7 files changed, 236 insertions(+), 18 deletions(-)
|
||||||
|
create mode 100755 tests/test-cow-on-read-caches.sh
|
||||||
|
create mode 100755 tests/test-cow-on-read.sh
|
||||||
|
|
||||||
|
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
|
||||||
|
index 121b0dd4..4f84e092 100644
|
||||||
|
--- a/filters/cow/blk.c
|
||||||
|
+++ b/filters/cow/blk.c
|
||||||
|
@@ -226,7 +226,7 @@ blk_status (uint64_t blknum, bool *present, bool *trimmed)
|
||||||
|
int
|
||||||
|
blk_read_multiple (nbdkit_next *next,
|
||||||
|
uint64_t blknum, uint64_t nrblocks,
|
||||||
|
- uint8_t *block, int *err)
|
||||||
|
+ uint8_t *block, bool cow_on_read, int *err)
|
||||||
|
{
|
||||||
|
off_t offset = blknum * BLKSIZE;
|
||||||
|
enum bm_entry state;
|
||||||
|
@@ -276,6 +276,19 @@ blk_read_multiple (nbdkit_next *next,
|
||||||
|
* zeroing the tail.
|
||||||
|
*/
|
||||||
|
memset (block + n, 0, tail);
|
||||||
|
+
|
||||||
|
+ /* If cow-on-read is true then copy the blocks to the cache and
|
||||||
|
+ * set them as allocated.
|
||||||
|
+ */
|
||||||
|
+ if (cow_on_read) {
|
||||||
|
+ if (full_pwrite (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
||||||
|
+ *err = errno;
|
||||||
|
+ nbdkit_error ("pwrite: %m");
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ for (b = 0; b < runblocks; ++b)
|
||||||
|
+ bitmap_set_blk (&bm, blknum+b, BLOCK_ALLOCATED);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
|
||||||
|
if (full_pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
||||||
|
@@ -297,14 +310,14 @@ blk_read_multiple (nbdkit_next *next,
|
||||||
|
blknum + runblocks,
|
||||||
|
nrblocks - runblocks,
|
||||||
|
block + BLKSIZE * runblocks,
|
||||||
|
- err);
|
||||||
|
+ cow_on_read, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
blk_read (nbdkit_next *next,
|
||||||
|
- uint64_t blknum, uint8_t *block, int *err)
|
||||||
|
+ uint64_t blknum, uint8_t *block, bool cow_on_read, int *err)
|
||||||
|
{
|
||||||
|
- return blk_read_multiple (next, blknum, 1, block, err);
|
||||||
|
+ return blk_read_multiple (next, blknum, 1, block, cow_on_read, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
|
||||||
|
index 1bc85283..b7e6f092 100644
|
||||||
|
--- a/filters/cow/blk.h
|
||||||
|
+++ b/filters/cow/blk.h
|
||||||
|
@@ -52,14 +52,16 @@ extern void blk_status (uint64_t blknum, bool *present, bool *trimmed);
|
||||||
|
|
||||||
|
/* Read a single block from the overlay or plugin. */
|
||||||
|
extern int blk_read (nbdkit_next *next,
|
||||||
|
- uint64_t blknum, uint8_t *block, int *err)
|
||||||
|
- __attribute__((__nonnull__ (1, 3, 4)));
|
||||||
|
+ uint64_t blknum, uint8_t *block,
|
||||||
|
+ bool cow_on_read, int *err)
|
||||||
|
+ __attribute__((__nonnull__ (1, 3, 5)));
|
||||||
|
|
||||||
|
/* Read multiple blocks from the overlay or plugin. */
|
||||||
|
extern int blk_read_multiple (nbdkit_next *next,
|
||||||
|
uint64_t blknum, uint64_t nrblocks,
|
||||||
|
- uint8_t *block, int *err)
|
||||||
|
- __attribute__((__nonnull__ (1, 4, 5)));
|
||||||
|
+ uint8_t *block,
|
||||||
|
+ bool cow_on_read, int *err)
|
||||||
|
+ __attribute__((__nonnull__ (1, 4, 6)));
|
||||||
|
|
||||||
|
/* Cache mode for blocks not already in overlay */
|
||||||
|
enum cache_mode {
|
||||||
|
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
|
||||||
|
index 78daca22..6efb39f2 100644
|
||||||
|
--- a/filters/cow/cow.c
|
||||||
|
+++ b/filters/cow/cow.c
|
||||||
|
@@ -38,6 +38,7 @@
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <string.h>
|
||||||
|
+#include <unistd.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
#include <pthread.h>
|
||||||
|
@@ -59,6 +60,15 @@ static pthread_mutex_t rmw_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
static bool cow_on_cache;
|
||||||
|
|
||||||
|
+/* Cache on read ("cow-on-read") mode. */
|
||||||
|
+extern enum cor_mode {
|
||||||
|
+ COR_OFF,
|
||||||
|
+ COR_ON,
|
||||||
|
+ COR_PATH,
|
||||||
|
+} cor_mode;
|
||||||
|
+enum cor_mode cor_mode = COR_OFF;
|
||||||
|
+const char *cor_path;
|
||||||
|
+
|
||||||
|
static void
|
||||||
|
cow_load (void)
|
||||||
|
{
|
||||||
|
@@ -85,13 +95,39 @@ cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
cow_on_cache = r;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
+ else if (strcmp (key, "cow-on-read") == 0) {
|
||||||
|
+ if (value[0] == '/') {
|
||||||
|
+ cor_path = value;
|
||||||
|
+ cor_mode = COR_PATH;
|
||||||
|
+ }
|
||||||
|
+ else {
|
||||||
|
+ int r = nbdkit_parse_bool (value);
|
||||||
|
+ if (r == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ cor_mode = r ? COR_ON : COR_OFF;
|
||||||
|
+ }
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
else {
|
||||||
|
return next (nxdata, key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define cow_config_help \
|
||||||
|
- "cow-on-cache=<BOOL> Set to true to treat client cache requests as writes.\n"
|
||||||
|
+ "cow-on-cache=<BOOL> Copy cache (prefetch) requests to the overlay.\n" \
|
||||||
|
+ "cow-on-read=<BOOL>|/PATH Copy read requests to the overlay."
|
||||||
|
+
|
||||||
|
+/* Decide if cow-on-read is currently on or off. */
|
||||||
|
+bool
|
||||||
|
+cow_on_read (void)
|
||||||
|
+{
|
||||||
|
+ switch (cor_mode) {
|
||||||
|
+ case COR_ON: return true;
|
||||||
|
+ case COR_OFF: return false;
|
||||||
|
+ case COR_PATH: return access (cor_path, F_OK) == 0;
|
||||||
|
+ default: abort ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
|
||||||
|
static void *
|
||||||
|
cow_open (nbdkit_next_open *next, nbdkit_context *nxdata,
|
||||||
|
@@ -230,7 +266,7 @@ cow_pread (nbdkit_next *next,
|
||||||
|
uint64_t n = MIN (BLKSIZE - blkoffs, count);
|
||||||
|
|
||||||
|
assert (block);
|
||||||
|
- r = blk_read (next, blknum, block, err);
|
||||||
|
+ r = blk_read (next, blknum, block, cow_on_read (), err);
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
@@ -245,7 +281,7 @@ cow_pread (nbdkit_next *next,
|
||||||
|
/* Aligned body */
|
||||||
|
nrblocks = count / BLKSIZE;
|
||||||
|
if (nrblocks > 0) {
|
||||||
|
- r = blk_read_multiple (next, blknum, nrblocks, buf, err);
|
||||||
|
+ r = blk_read_multiple (next, blknum, nrblocks, buf, cow_on_read (), err);
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
@@ -258,7 +294,7 @@ cow_pread (nbdkit_next *next,
|
||||||
|
/* Unaligned tail */
|
||||||
|
if (count) {
|
||||||
|
assert (block);
|
||||||
|
- r = blk_read (next, blknum, block, err);
|
||||||
|
+ r = blk_read (next, blknum, block, cow_on_read (), err);
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
@@ -299,7 +335,7 @@ cow_pwrite (nbdkit_next *next,
|
||||||
|
*/
|
||||||
|
assert (block);
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
|
||||||
|
- r = blk_read (next, blknum, block, err);
|
||||||
|
+ r = blk_read (next, blknum, block, cow_on_read (), err);
|
||||||
|
if (r != -1) {
|
||||||
|
memcpy (&block[blkoffs], buf, n);
|
||||||
|
r = blk_write (blknum, block, err);
|
||||||
|
@@ -329,7 +365,7 @@ cow_pwrite (nbdkit_next *next,
|
||||||
|
if (count) {
|
||||||
|
assert (block);
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
|
||||||
|
- r = blk_read (next, blknum, block, err);
|
||||||
|
+ r = blk_read (next, blknum, block, cow_on_read (), err);
|
||||||
|
if (r != -1) {
|
||||||
|
memcpy (block, buf, count);
|
||||||
|
r = blk_write (blknum, block, err);
|
||||||
|
@@ -379,7 +415,7 @@ cow_zero (nbdkit_next *next,
|
||||||
|
* Hold the rmw_lock over the whole operation.
|
||||||
|
*/
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
|
||||||
|
- r = blk_read (next, blknum, block, err);
|
||||||
|
+ r = blk_read (next, blknum, block, cow_on_read (), err);
|
||||||
|
if (r != -1) {
|
||||||
|
memset (&block[blkoffs], 0, n);
|
||||||
|
r = blk_write (blknum, block, err);
|
||||||
|
@@ -411,7 +447,7 @@ cow_zero (nbdkit_next *next,
|
||||||
|
/* Unaligned tail */
|
||||||
|
if (count) {
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
|
||||||
|
- r = blk_read (next, blknum, block, err);
|
||||||
|
+ r = blk_read (next, blknum, block, cow_on_read (), err);
|
||||||
|
if (r != -1) {
|
||||||
|
memset (block, 0, count);
|
||||||
|
r = blk_write (blknum, block, err);
|
||||||
|
@@ -455,7 +491,7 @@ cow_trim (nbdkit_next *next,
|
||||||
|
* Hold the lock over the whole operation.
|
||||||
|
*/
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
|
||||||
|
- r = blk_read (next, blknum, block, err);
|
||||||
|
+ r = blk_read (next, blknum, block, cow_on_read (), err);
|
||||||
|
if (r != -1) {
|
||||||
|
memset (&block[blkoffs], 0, n);
|
||||||
|
r = blk_write (blknum, block, err);
|
||||||
|
@@ -482,7 +518,7 @@ cow_trim (nbdkit_next *next,
|
||||||
|
/* Unaligned tail */
|
||||||
|
if (count) {
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
|
||||||
|
- r = blk_read (next, blknum, block, err);
|
||||||
|
+ r = blk_read (next, blknum, block, cow_on_read (), err);
|
||||||
|
if (r != -1) {
|
||||||
|
memset (block, 0, count);
|
||||||
|
r = blk_write (blknum, block, err);
|
||||||
|
diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod
|
||||||
|
index 571189e7..01261429 100644
|
||||||
|
--- a/filters/cow/nbdkit-cow-filter.pod
|
||||||
|
+++ b/filters/cow/nbdkit-cow-filter.pod
|
||||||
|
@@ -62,6 +62,23 @@ the data from the plugin into the overlay.
|
||||||
|
Do not save data from cache (prefetch) requests in the overlay. This
|
||||||
|
leaves the overlay as small as possible. This is the default.
|
||||||
|
|
||||||
|
+=item B<cow-on-read=true>
|
||||||
|
+
|
||||||
|
+When the client issues a read request, copy the data into the overlay
|
||||||
|
+so that the same data can be served more quickly later.
|
||||||
|
+
|
||||||
|
+=item B<cow-on-read=false>
|
||||||
|
+
|
||||||
|
+Do not save data from read requests in the overlay. This leaves the
|
||||||
|
+overlay as small as possible. This is the default.
|
||||||
|
+
|
||||||
|
+=item B<cow-on-read=/PATH>
|
||||||
|
+
|
||||||
|
+When F</PATH> (which must be an absolute path) exists, this behaves
|
||||||
|
+like C<cow-on-read=true>, and when it does not exist like
|
||||||
|
+C<cow-on-read=false>. This allows you to control the C<cow-on-read>
|
||||||
|
+behaviour while nbdkit is running.
|
||||||
|
+
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 EXAMPLES
|
||||||
|
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
||||||
|
index 51ca913a..edc8d66d 100644
|
||||||
|
--- a/tests/Makefile.am
|
||||||
|
+++ b/tests/Makefile.am
|
||||||
|
@@ -1407,6 +1407,8 @@ TESTS += \
|
||||||
|
test-cow-extents1.sh \
|
||||||
|
test-cow-extents2.sh \
|
||||||
|
test-cow-extents-large.sh \
|
||||||
|
+ test-cow-on-read.sh \
|
||||||
|
+ test-cow-on-read-caches.sh \
|
||||||
|
test-cow-unaligned.sh \
|
||||||
|
$(NULL)
|
||||||
|
endif
|
||||||
|
@@ -1417,6 +1419,8 @@ EXTRA_DIST += \
|
||||||
|
test-cow-extents2.sh \
|
||||||
|
test-cow-extents-large.sh \
|
||||||
|
test-cow-null.sh \
|
||||||
|
+ test-cow-on-read.sh \
|
||||||
|
+ test-cow-on-read-caches.sh \
|
||||||
|
test-cow-unaligned.sh \
|
||||||
|
$(NULL)
|
||||||
|
|
||||||
|
diff --git a/tests/test-cow-on-read-caches.sh b/tests/test-cow-on-read-caches.sh
|
||||||
|
new file mode 100755
|
||||||
|
index 00000000..c5b60198
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tests/test-cow-on-read-caches.sh
|
||||||
|
@@ -0,0 +1,87 @@
|
||||||
|
+#!/usr/bin/env bash
|
||||||
|
+# nbdkit
|
||||||
|
+# Copyright (C) 2018-2021 Red Hat Inc.
|
||||||
|
+#
|
||||||
|
+# Redistribution and use in source and binary forms, with or without
|
||||||
|
+# modification, are permitted provided that the following conditions are
|
||||||
|
+# met:
|
||||||
|
+#
|
||||||
|
+# * Redistributions of source code must retain the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer.
|
||||||
|
+#
|
||||||
|
+# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer in the
|
||||||
|
+# documentation and/or other materials provided with the distribution.
|
||||||
|
+#
|
||||||
|
+# * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+# used to endorse or promote products derived from this software without
|
||||||
|
+# specific prior written permission.
|
||||||
|
+#
|
||||||
|
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+# SUCH DAMAGE.
|
||||||
|
+
|
||||||
|
+source ./functions.sh
|
||||||
|
+set -e
|
||||||
|
+set -x
|
||||||
|
+
|
||||||
|
+requires_filter cow
|
||||||
|
+requires_filter delay
|
||||||
|
+requires_nbdsh_uri
|
||||||
|
+
|
||||||
|
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
|
||||||
|
+files="$sock cow-on-read-caches.pid"
|
||||||
|
+rm -f $files
|
||||||
|
+cleanup_fn rm -f $files
|
||||||
|
+
|
||||||
|
+# Run nbdkit with the cow filter, cow-on-read and a read delay.
|
||||||
|
+start_nbdkit -P cow-on-read-caches.pid -U $sock \
|
||||||
|
+ --filter=cow --filter=delay \
|
||||||
|
+ memory 64K cow-on-read=true rdelay=10
|
||||||
|
+
|
||||||
|
+nbdsh --connect "nbd+unix://?socket=$sock" \
|
||||||
|
+ -c '
|
||||||
|
+from time import time
|
||||||
|
+
|
||||||
|
+# First read should suffer a penalty. Because we are reading
|
||||||
|
+# a single 64K block (same size as the COW block), we should
|
||||||
|
+# only suffer one penalty of approx. 10 seconds.
|
||||||
|
+st = time()
|
||||||
|
+zb = h.pread(65536, 0)
|
||||||
|
+et = time()
|
||||||
|
+el = et-st
|
||||||
|
+print("elapsed time: %g" % el)
|
||||||
|
+assert et-st >= 10
|
||||||
|
+assert zb == bytearray(65536)
|
||||||
|
+
|
||||||
|
+# Second read should not suffer a penalty.
|
||||||
|
+st = time()
|
||||||
|
+zb = h.pread(65536, 0)
|
||||||
|
+et = time()
|
||||||
|
+el = et-st
|
||||||
|
+print("elapsed time: %g" % el)
|
||||||
|
+assert el < 10
|
||||||
|
+assert zb == bytearray(65536)
|
||||||
|
+
|
||||||
|
+# Write something.
|
||||||
|
+buf = b"abcd" * 16384
|
||||||
|
+h.pwrite(buf, 0)
|
||||||
|
+
|
||||||
|
+# Reading back should be quick since it is stored in the overlay.
|
||||||
|
+st = time()
|
||||||
|
+buf2 = h.pread(65536, 0)
|
||||||
|
+et = time()
|
||||||
|
+el = et-st
|
||||||
|
+print("elapsed time: %g" % el)
|
||||||
|
+assert el < 10
|
||||||
|
+assert buf == buf2
|
||||||
|
+'
|
||||||
|
diff --git a/tests/test-cow-on-read.sh b/tests/test-cow-on-read.sh
|
||||||
|
new file mode 100755
|
||||||
|
index 00000000..4f58b33b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tests/test-cow-on-read.sh
|
||||||
|
@@ -0,0 +1,59 @@
|
||||||
|
+#!/usr/bin/env bash
|
||||||
|
+# nbdkit
|
||||||
|
+# Copyright (C) 2018-2021 Red Hat Inc.
|
||||||
|
+#
|
||||||
|
+# Redistribution and use in source and binary forms, with or without
|
||||||
|
+# modification, are permitted provided that the following conditions are
|
||||||
|
+# met:
|
||||||
|
+#
|
||||||
|
+# * Redistributions of source code must retain the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer.
|
||||||
|
+#
|
||||||
|
+# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer in the
|
||||||
|
+# documentation and/or other materials provided with the distribution.
|
||||||
|
+#
|
||||||
|
+# * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+# used to endorse or promote products derived from this software without
|
||||||
|
+# specific prior written permission.
|
||||||
|
+#
|
||||||
|
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+# SUCH DAMAGE.
|
||||||
|
+
|
||||||
|
+source ./functions.sh
|
||||||
|
+set -e
|
||||||
|
+set -x
|
||||||
|
+
|
||||||
|
+requires_filter cow
|
||||||
|
+requires_nbdsh_uri
|
||||||
|
+
|
||||||
|
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
|
||||||
|
+files="$sock cow-on-read.pid"
|
||||||
|
+rm -f $files
|
||||||
|
+cleanup_fn rm -f $files
|
||||||
|
+
|
||||||
|
+# Run nbdkit with the cow filter and cow-on-read.
|
||||||
|
+start_nbdkit -P cow-on-read.pid -U $sock \
|
||||||
|
+ --filter=cow \
|
||||||
|
+ memory 128K cow-on-read=true
|
||||||
|
+
|
||||||
|
+nbdsh --connect "nbd+unix://?socket=$sock" \
|
||||||
|
+ -c '
|
||||||
|
+# Write some pattern data to the overlay and check it reads back OK.
|
||||||
|
+buf = b"abcd" * 16384
|
||||||
|
+h.pwrite(buf, 32768)
|
||||||
|
+zero = h.pread(32768, 0)
|
||||||
|
+assert zero == bytearray(32768)
|
||||||
|
+buf2 = h.pread(65536, 32768)
|
||||||
|
+assert buf == buf2
|
||||||
|
+'
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
170
SOURCES/0011-delay-Add-delay-open-and-delay-close.patch
Normal file
170
SOURCES/0011-delay-Add-delay-open-and-delay-close.patch
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
From a7e7af18d64164fac42581452f6dc3c07650fcae Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Fri, 30 Jul 2021 10:19:57 +0100
|
||||||
|
Subject: [PATCH] delay: Add delay-open and delay-close
|
||||||
|
|
||||||
|
Useful for simulating VDDK which has very slow connection.
|
||||||
|
|
||||||
|
(cherry picked from commit de8dcd3a34a38b088a0f9a6f8ca754702ad1f598)
|
||||||
|
---
|
||||||
|
filters/delay/delay.c | 58 ++++++++++++++++++++++++++-
|
||||||
|
filters/delay/nbdkit-delay-filter.pod | 27 +++++++++++--
|
||||||
|
2 files changed, 80 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/filters/delay/delay.c b/filters/delay/delay.c
|
||||||
|
index 5a925aa4..df3729a7 100644
|
||||||
|
--- a/filters/delay/delay.c
|
||||||
|
+++ b/filters/delay/delay.c
|
||||||
|
@@ -48,6 +48,8 @@ static unsigned delay_zero_ms = 0; /* zero delay (milliseconds) */
|
||||||
|
static unsigned delay_trim_ms = 0; /* trim delay (milliseconds) */
|
||||||
|
static unsigned delay_extents_ms = 0;/* extents delay (milliseconds) */
|
||||||
|
static unsigned delay_cache_ms = 0; /* cache delay (milliseconds) */
|
||||||
|
+static unsigned delay_open_ms = 0; /* open delay (milliseconds) */
|
||||||
|
+static unsigned delay_close_ms = 0; /* close delay (milliseconds) */
|
||||||
|
|
||||||
|
static int delay_fast_zero = 1; /* whether delaying zero includes fast zero */
|
||||||
|
|
||||||
|
@@ -126,6 +128,18 @@ cache_delay (int *err)
|
||||||
|
return delay (delay_cache_ms, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int
|
||||||
|
+open_delay (int *err)
|
||||||
|
+{
|
||||||
|
+ return delay (delay_open_ms, err);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int
|
||||||
|
+close_delay (int *err)
|
||||||
|
+{
|
||||||
|
+ return delay (delay_close_ms, err);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Called for each key=value passed on the command line. */
|
||||||
|
static int
|
||||||
|
delay_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
@@ -182,6 +196,16 @@ delay_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
+ else if (strcmp (key, "delay-open") == 0) {
|
||||||
|
+ if (parse_delay (key, value, &delay_open_ms) == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+ else if (strcmp (key, "delay-close") == 0) {
|
||||||
|
+ if (parse_delay (key, value, &delay_close_ms) == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
else
|
||||||
|
return next (nxdata, key, value);
|
||||||
|
}
|
||||||
|
@@ -195,7 +219,9 @@ delay_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
"delay-extents=<NN>[ms] Extents delay in seconds/milliseconds.\n" \
|
||||||
|
"delay-cache=<NN>[ms] Cache delay in seconds/milliseconds.\n" \
|
||||||
|
"wdelay=<NN>[ms] Write, zero and trim delay in secs/msecs.\n" \
|
||||||
|
- "delay-fast-zero=<BOOL> Delay fast zero requests (default true).\n"
|
||||||
|
+ "delay-fast-zero=<BOOL> Delay fast zero requests (default true).\n" \
|
||||||
|
+ "delay-open=<NN>[ms] Open delay in seconds/milliseconds.\n" \
|
||||||
|
+ "delay-close=<NN>[ms] Close delay in seconds/milliseconds."
|
||||||
|
|
||||||
|
/* Override the plugin's .can_fast_zero if needed */
|
||||||
|
static int
|
||||||
|
@@ -208,6 +234,34 @@ delay_can_fast_zero (nbdkit_next *next,
|
||||||
|
return next->can_fast_zero (next);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Open connection. */
|
||||||
|
+static void *
|
||||||
|
+delay_open (nbdkit_next_open *next, nbdkit_context *nxdata,
|
||||||
|
+ int readonly, const char *exportname, int is_tls)
|
||||||
|
+{
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ if (open_delay (&err) == -1) {
|
||||||
|
+ errno = err;
|
||||||
|
+ nbdkit_error ("delay: %m");
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (next (nxdata, readonly, exportname) == -1)
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ return NBDKIT_HANDLE_NOT_NEEDED;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Close connection. */
|
||||||
|
+static void
|
||||||
|
+delay_close (void *handle)
|
||||||
|
+{
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ close_delay (&err);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Read data. */
|
||||||
|
static int
|
||||||
|
delay_pread (nbdkit_next *next,
|
||||||
|
@@ -285,6 +339,8 @@ static struct nbdkit_filter filter = {
|
||||||
|
.config = delay_config,
|
||||||
|
.config_help = delay_config_help,
|
||||||
|
.can_fast_zero = delay_can_fast_zero,
|
||||||
|
+ .open = delay_open,
|
||||||
|
+ .close = delay_close,
|
||||||
|
.pread = delay_pread,
|
||||||
|
.pwrite = delay_pwrite,
|
||||||
|
.zero = delay_zero,
|
||||||
|
diff --git a/filters/delay/nbdkit-delay-filter.pod b/filters/delay/nbdkit-delay-filter.pod
|
||||||
|
index d6961a9e..11ae544b 100644
|
||||||
|
--- a/filters/delay/nbdkit-delay-filter.pod
|
||||||
|
+++ b/filters/delay/nbdkit-delay-filter.pod
|
||||||
|
@@ -9,10 +9,15 @@ nbdkit-delay-filter - nbdkit delay filter
|
||||||
|
nbdkit --filter=delay plugin rdelay=NNms wdelay=NNms [plugin-args...]
|
||||||
|
|
||||||
|
nbdkit --filter=delay plugin [plugin-args ...]
|
||||||
|
- delay-read=(SECS|NNms) delay-write=(SECS|NNms)
|
||||||
|
- delay-zero=(SECS|NNms) delay-trim=(SECS|NNms)
|
||||||
|
- delay-extents=(SECS|NNms) delay-cache=(SECS|NNms)
|
||||||
|
+ delay-read=(SECS|NNms)
|
||||||
|
+ delay-write=(SECS|NNms)
|
||||||
|
+ delay-zero=(SECS|NNms)
|
||||||
|
+ delay-trim=(SECS|NNms)
|
||||||
|
+ delay-extents=(SECS|NNms)
|
||||||
|
+ delay-cache=(SECS|NNms)
|
||||||
|
delay-fast-zero=BOOL
|
||||||
|
+ delay-open=(SECS|NNms)
|
||||||
|
+ delay-close=(SECS|NNms)
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
@@ -108,6 +113,20 @@ delay as any other zero request; but setting this parameter to false
|
||||||
|
instantly fails a fast zero response without waiting for or consulting
|
||||||
|
the plugin.
|
||||||
|
|
||||||
|
+=item B<delay-open=>SECS
|
||||||
|
+
|
||||||
|
+=item B<delay-open=>NNB<ms>
|
||||||
|
+
|
||||||
|
+=item B<delay-close=>SECS
|
||||||
|
+
|
||||||
|
+=item B<delay-close=>NNB<ms>
|
||||||
|
+
|
||||||
|
+(nbdkit E<ge> 1.28)
|
||||||
|
+
|
||||||
|
+Delay open and close operations by C<SECS> seconds or C<NN>
|
||||||
|
+milliseconds. Open corresponds to client connection. Close may not
|
||||||
|
+be visible to clients if they abruptly disconnect.
|
||||||
|
+
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 FILES
|
||||||
|
@@ -140,4 +159,4 @@ Richard W.M. Jones
|
||||||
|
|
||||||
|
=head1 COPYRIGHT
|
||||||
|
|
||||||
|
-Copyright (C) 2018 Red Hat Inc.
|
||||||
|
+Copyright (C) 2018-2021 Red Hat Inc.
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
90
SOURCES/0012-python-Implement-.cleanup-method.patch
Normal file
90
SOURCES/0012-python-Implement-.cleanup-method.patch
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
From 17a912a449fa75b5c12ac3acab596b476699c671 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Tue, 3 Aug 2021 14:19:38 +0100
|
||||||
|
Subject: [PATCH] python: Implement .cleanup() method
|
||||||
|
|
||||||
|
(cherry picked from commit f2fe99e4b0f54467ab8028eaf2d039cf918b2961)
|
||||||
|
---
|
||||||
|
plugins/python/nbdkit-python-plugin.pod | 20 +++++++++++++++++---
|
||||||
|
plugins/python/plugin.c | 19 +++++++++++++++++++
|
||||||
|
2 files changed, 36 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/plugins/python/nbdkit-python-plugin.pod b/plugins/python/nbdkit-python-plugin.pod
|
||||||
|
index 6f5f2c00..a92a557f 100644
|
||||||
|
--- a/plugins/python/nbdkit-python-plugin.pod
|
||||||
|
+++ b/plugins/python/nbdkit-python-plugin.pod
|
||||||
|
@@ -257,6 +257,12 @@ There are no arguments or return value.
|
||||||
|
|
||||||
|
There are no arguments or return value.
|
||||||
|
|
||||||
|
+=item C<cleanup>
|
||||||
|
+
|
||||||
|
+(Optional, nbdkit E<ge> 1.28)
|
||||||
|
+
|
||||||
|
+There are no arguments or return value.
|
||||||
|
+
|
||||||
|
=item C<list_exports>
|
||||||
|
|
||||||
|
(Optional)
|
||||||
|
@@ -498,10 +504,18 @@ optionally using C<nbdkit.set_error> first.
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
-=item Missing: C<load> and C<unload>
|
||||||
|
+=item Missing: C<load>
|
||||||
|
|
||||||
|
-These are not needed because you can just use ordinary Python
|
||||||
|
-constructs.
|
||||||
|
+This is not needed since you can use regular Python mechanisms like
|
||||||
|
+top level statements to run code when the module is loaded.
|
||||||
|
+
|
||||||
|
+=item Missing: C<unload>
|
||||||
|
+
|
||||||
|
+This is missing, but in nbdkit E<ge> 1.28 you can put code in the
|
||||||
|
+C<cleanup()> function to have it run when nbdkit exits. In earlier
|
||||||
|
+versions of nbdkit, using a Python
|
||||||
|
+L<atexit|https://docs.python.org/3/library/atexit.html> handler is
|
||||||
|
+recommended.
|
||||||
|
|
||||||
|
=item Missing:
|
||||||
|
C<name>,
|
||||||
|
diff --git a/plugins/python/plugin.c b/plugins/python/plugin.c
|
||||||
|
index 64430a1a..f85512b4 100644
|
||||||
|
--- a/plugins/python/plugin.c
|
||||||
|
+++ b/plugins/python/plugin.c
|
||||||
|
@@ -298,6 +298,24 @@ py_after_fork (void)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void
|
||||||
|
+py_cleanup (void)
|
||||||
|
+{
|
||||||
|
+ ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE;
|
||||||
|
+ PyObject *fn;
|
||||||
|
+ PyObject *r;
|
||||||
|
+
|
||||||
|
+ if (callback_defined ("cleanup", &fn)) {
|
||||||
|
+ PyErr_Clear ();
|
||||||
|
+
|
||||||
|
+ r = PyObject_CallObject (fn, NULL);
|
||||||
|
+ Py_DECREF (fn);
|
||||||
|
+ if (check_python_failure ("cleanup") == -1)
|
||||||
|
+ return;
|
||||||
|
+ Py_DECREF (r);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int
|
||||||
|
py_list_exports (int readonly, int is_tls, struct nbdkit_exports *exports)
|
||||||
|
{
|
||||||
|
@@ -1039,6 +1057,7 @@ static struct nbdkit_plugin plugin = {
|
||||||
|
.thread_model = py_thread_model,
|
||||||
|
.get_ready = py_get_ready,
|
||||||
|
.after_fork = py_after_fork,
|
||||||
|
+ .cleanup = py_cleanup,
|
||||||
|
.list_exports = py_list_exports,
|
||||||
|
.default_export = py_default_export,
|
||||||
|
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -0,0 +1,150 @@
|
|||||||
|
From e9abe97b40fef6f9bd9028a2520f45203bba0749 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Thu, 5 Aug 2021 18:18:34 +0100
|
||||||
|
Subject: [PATCH] cow: General revision and updates to the manual
|
||||||
|
|
||||||
|
(cherry picked from commit ba5517b81307c228577cf3c54a651d044ac91a25)
|
||||||
|
---
|
||||||
|
filters/cow/nbdkit-cow-filter.pod | 74 ++++++++++++++++---------------
|
||||||
|
1 file changed, 39 insertions(+), 35 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod
|
||||||
|
index 01261429..7f861140 100644
|
||||||
|
--- a/filters/cow/nbdkit-cow-filter.pod
|
||||||
|
+++ b/filters/cow/nbdkit-cow-filter.pod
|
||||||
|
@@ -5,33 +5,23 @@ nbdkit-cow-filter - nbdkit copy-on-write (COW) filter
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
nbdkit --filter=cow plugin [plugin-args...]
|
||||||
|
+ [cow-on-cache=false|true]
|
||||||
|
+ [cow-on-read=false|true|/PATH]
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
C<nbdkit-cow-filter> is a filter that makes a temporary writable copy
|
||||||
|
-on top of a read-only plugin. It can be used to enable writes for
|
||||||
|
-plugins which only implement read-only access. Note that:
|
||||||
|
+on top of a plugin. It can also be used to enable writes for plugins
|
||||||
|
+which are read-only.
|
||||||
|
|
||||||
|
-=over 4
|
||||||
|
+The underlying plugin is opened read-only. This filter does not pass
|
||||||
|
+any writes or write-like operations (like trim and zero) through to
|
||||||
|
+the underlying plugin.
|
||||||
|
|
||||||
|
-=item *
|
||||||
|
-
|
||||||
|
-B<Anything written is thrown away as soon as nbdkit exits.>
|
||||||
|
-
|
||||||
|
-=item *
|
||||||
|
-
|
||||||
|
-All connections to the nbdkit instance see the same view of the disk.
|
||||||
|
-
|
||||||
|
-This is different from L<nbd-server(1)> where each connection sees its
|
||||||
|
-own copy-on-write overlay and simply disconnecting the client throws
|
||||||
|
-that away. It also allows us to create diffs, see below.
|
||||||
|
-
|
||||||
|
-=item *
|
||||||
|
-
|
||||||
|
-The plugin is opened read-only (as if the I<-r> flag was passed), but
|
||||||
|
-you should B<not> pass the I<-r> flag to nbdkit.
|
||||||
|
-
|
||||||
|
-=back
|
||||||
|
+B<Note that anything written is thrown away as soon as nbdkit exits.>
|
||||||
|
+If you want to save changes, either copy out the whole disk using a
|
||||||
|
+tool like L<nbdcopy(1)>, or use the method described in L</NOTES>
|
||||||
|
+below to create a diff.
|
||||||
|
|
||||||
|
Limitations of the filter include:
|
||||||
|
|
||||||
|
@@ -52,26 +42,26 @@ serve the same data to each client.
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
-=item B<cow-on-cache=true>
|
||||||
|
-
|
||||||
|
-When the client issues a cache (prefetch) request, preemptively save
|
||||||
|
-the data from the plugin into the overlay.
|
||||||
|
-
|
||||||
|
=item B<cow-on-cache=false>
|
||||||
|
|
||||||
|
Do not save data from cache (prefetch) requests in the overlay. This
|
||||||
|
leaves the overlay as small as possible. This is the default.
|
||||||
|
|
||||||
|
-=item B<cow-on-read=true>
|
||||||
|
+=item B<cow-on-cache=true>
|
||||||
|
|
||||||
|
-When the client issues a read request, copy the data into the overlay
|
||||||
|
-so that the same data can be served more quickly later.
|
||||||
|
+When the client issues a cache (prefetch) request, preemptively save
|
||||||
|
+the data from the plugin into the overlay.
|
||||||
|
|
||||||
|
=item B<cow-on-read=false>
|
||||||
|
|
||||||
|
Do not save data from read requests in the overlay. This leaves the
|
||||||
|
overlay as small as possible. This is the default.
|
||||||
|
|
||||||
|
+=item B<cow-on-read=true>
|
||||||
|
+
|
||||||
|
+When the client issues a read request, copy the data into the overlay
|
||||||
|
+so that the same data can be served more quickly later.
|
||||||
|
+
|
||||||
|
=item B<cow-on-read=/PATH>
|
||||||
|
|
||||||
|
When F</PATH> (which must be an absolute path) exists, this behaves
|
||||||
|
@@ -83,18 +73,23 @@ behaviour while nbdkit is running.
|
||||||
|
|
||||||
|
=head1 EXAMPLES
|
||||||
|
|
||||||
|
+=head2 nbdkit --filter=cow file disk.img
|
||||||
|
+
|
||||||
|
Serve the file F<disk.img>, allowing writes, but do not save any
|
||||||
|
-changes into the file:
|
||||||
|
+changes into the file.
|
||||||
|
|
||||||
|
- nbdkit --filter=cow file disk.img
|
||||||
|
+=head2 nbdkit --filter=cow --filter=xz file disk.xz cow-on-read=true
|
||||||
|
|
||||||
|
L<nbdkit-xz-filter(1)> only supports read access, but you can provide
|
||||||
|
-temporary write access by doing (although this does B<not> save
|
||||||
|
-changes to the file):
|
||||||
|
+temporary write access by using the command above. Because xz
|
||||||
|
+decompression is slow, using C<cow-on-read=true> causes reads to be
|
||||||
|
+cached as well as writes, improving performance at the expense of
|
||||||
|
+using more temporary space. Note that writes are thrown away when
|
||||||
|
+nbdkit exits and do not get saved into the file.
|
||||||
|
|
||||||
|
- nbdkit --filter=cow --filter=xz file disk.xz
|
||||||
|
+=head1 NOTES
|
||||||
|
|
||||||
|
-=head1 CREATING A DIFF WITH QEMU-IMG
|
||||||
|
+=head2 Creating a diff with qemu-img
|
||||||
|
|
||||||
|
Although nbdkit-cow-filter itself cannot save the differences, it is
|
||||||
|
possible to do this using an obscure feature of L<qemu-img(1)>.
|
||||||
|
@@ -118,6 +113,14 @@ F<diff.qcow2> now contains the differences between the base
|
||||||
|
(F<disk.img>) and the changes stored in nbdkit-cow-filter. C<nbdkit>
|
||||||
|
can now be killed.
|
||||||
|
|
||||||
|
+=head2 Compared to nbd-server -c option
|
||||||
|
+
|
||||||
|
+All connections to the nbdkit instance see the same view of the disk.
|
||||||
|
+This is different from L<nbd-server(1)> I<-c> option where each
|
||||||
|
+connection sees its own copy-on-write overlay and simply disconnecting
|
||||||
|
+the client throws that away. It also allows us to create diffs as
|
||||||
|
+above.
|
||||||
|
+
|
||||||
|
=head1 ENVIRONMENT VARIABLES
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
@@ -154,6 +157,7 @@ L<nbdkit-cache-filter(1)>,
|
||||||
|
L<nbdkit-cacheextents-filter(1)>,
|
||||||
|
L<nbdkit-xz-filter(1)>,
|
||||||
|
L<nbdkit-filter(3)>,
|
||||||
|
+L<nbdcopy(1)>,
|
||||||
|
L<qemu-img(1)>.
|
||||||
|
|
||||||
|
=head1 AUTHORS
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -0,0 +1,35 @@
|
|||||||
|
From c8c1e74a8c1c112b83646ac09fe7f9bde097a52a Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Thu, 5 Aug 2021 18:20:37 +0100
|
||||||
|
Subject: [PATCH] cache: Move plugin-args in synopsis earlier
|
||||||
|
|
||||||
|
Makes this page consistent with nbdkit-cow-filter.
|
||||||
|
|
||||||
|
(cherry picked from commit f1ddcef468907b0321041b1c4e0a430be46920be)
|
||||||
|
---
|
||||||
|
filters/cache/nbdkit-cache-filter.pod | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
index df9c1f99..d85fef09 100644
|
||||||
|
--- a/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
+++ b/filters/cache/nbdkit-cache-filter.pod
|
||||||
|
@@ -4,13 +4,13 @@ nbdkit-cache-filter - nbdkit caching filter
|
||||||
|
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
- nbdkit --filter=cache plugin [cache=writeback|writethrough|unsafe]
|
||||||
|
+ nbdkit --filter=cache plugin [plugin-args...]
|
||||||
|
+ [cache=writeback|writethrough|unsafe]
|
||||||
|
[cache-min-block-size=SIZE]
|
||||||
|
[cache-max-size=SIZE]
|
||||||
|
[cache-high-threshold=N]
|
||||||
|
[cache-low-threshold=N]
|
||||||
|
[cache-on-read=true|false|/PATH]
|
||||||
|
- [plugin-args...]
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
53
SOURCES/0015-data-Improve-the-example-with-a-diagram.patch
Normal file
53
SOURCES/0015-data-Improve-the-example-with-a-diagram.patch
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
From 0eae7ebf6f714fb339f4a476b65e070b528824ec Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Sun, 8 Aug 2021 16:32:38 +0100
|
||||||
|
Subject: [PATCH] data: Improve the example with a diagram
|
||||||
|
|
||||||
|
And other improvements to readability.
|
||||||
|
|
||||||
|
(cherry picked from commit 4e3a9bda2b7a3d141234e26250c69baa6ed5194d)
|
||||||
|
---
|
||||||
|
plugins/data/nbdkit-data-plugin.pod | 24 +++++++++++++++---------
|
||||||
|
1 file changed, 15 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/plugins/data/nbdkit-data-plugin.pod b/plugins/data/nbdkit-data-plugin.pod
|
||||||
|
index 32a450ab..b69435e9 100644
|
||||||
|
--- a/plugins/data/nbdkit-data-plugin.pod
|
||||||
|
+++ b/plugins/data/nbdkit-data-plugin.pod
|
||||||
|
@@ -172,18 +172,24 @@ compact format. It is a string containing a list of bytes which are
|
||||||
|
written into the disk image sequentially. You can move the virtual
|
||||||
|
offset where bytes are written using C<@offset>.
|
||||||
|
|
||||||
|
-For example:
|
||||||
|
-
|
||||||
|
nbdkit data '0 1 2 3 @0x1fe 0x55 0xaa'
|
||||||
|
|
||||||
|
-creates a 0x200 = 512 byte (1 sector) image containing the four bytes
|
||||||
|
-C<0 1 2 3> at the start, and the two bytes C<0x55 0xaa> at the end of
|
||||||
|
-the sector, with the remaining 506 bytes in the middle being all
|
||||||
|
-zeroes. In this example the size (512 bytes) is implied by the data.
|
||||||
|
-But you could additionally use the C<size> parameter to either
|
||||||
|
-truncate or extend (with zeroes) the disk image.
|
||||||
|
+creates:
|
||||||
|
|
||||||
|
-Whitespace between fields in the string is ignored.
|
||||||
|
+ total size 0x200 = 512 bytes (1 sector)
|
||||||
|
+┌──────┬──────┬──────┬──────┬───────── ── ── ───┬──────┬──────┐
|
||||||
|
+│ 0 │ 1 │ 2 │ 3 │ 0 0 ... 0 │ 0x55 │ 0xaa │
|
||||||
|
+└──────┴──────┴──────┴──────┴───────── ── ── ───┴──────┴──────┘
|
||||||
|
+ ↑
|
||||||
|
+ offset 0x1fe
|
||||||
|
+
|
||||||
|
+In this example the size is implied by the data. But you could also
|
||||||
|
+use the C<size> parameter to either truncate or extend (with zeroes)
|
||||||
|
+the disk image. Another way to write the same disk would be this,
|
||||||
|
+where we align the offset to the end of the sector and move back 2
|
||||||
|
+bytes to write the signature:
|
||||||
|
+
|
||||||
|
+ nbdkit data '0 1 2 3 @^0x200 @-2 le16:0xaa55'
|
||||||
|
|
||||||
|
Fields in the string can be:
|
||||||
|
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -0,0 +1,45 @@
|
|||||||
|
From a22248e3075e782d28542f8f6acd046c9dfa8998 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Mon, 9 Aug 2021 14:09:31 +0100
|
||||||
|
Subject: [PATCH] cow: Add some more debugging especially for blk_read_multiple
|
||||||
|
and cow-on-read
|
||||||
|
|
||||||
|
Only activated when we use -D cow.verbose=1
|
||||||
|
|
||||||
|
(cherry picked from commit 2da1ae0ca966af955d8fcf3feffffc80d07142fd)
|
||||||
|
---
|
||||||
|
filters/cow/blk.c | 11 +++++++++--
|
||||||
|
1 file changed, 9 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
|
||||||
|
index 4f84e092..c22d5886 100644
|
||||||
|
--- a/filters/cow/blk.c
|
||||||
|
+++ b/filters/cow/blk.c
|
||||||
|
@@ -254,8 +254,10 @@ blk_read_multiple (nbdkit_next *next,
|
||||||
|
|
||||||
|
if (cow_debug_verbose)
|
||||||
|
nbdkit_debug ("cow: blk_read_multiple block %" PRIu64
|
||||||
|
- " (offset %" PRIu64 ") is %s",
|
||||||
|
- blknum, (uint64_t) offset, state_to_string (state));
|
||||||
|
+ " (offset %" PRIu64 ") run of length %" PRIu64
|
||||||
|
+ " is %s",
|
||||||
|
+ blknum, (uint64_t) offset, runblocks,
|
||||||
|
+ state_to_string (state));
|
||||||
|
|
||||||
|
if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
|
||||||
|
unsigned n, tail = 0;
|
||||||
|
@@ -281,6 +283,11 @@ blk_read_multiple (nbdkit_next *next,
|
||||||
|
* set them as allocated.
|
||||||
|
*/
|
||||||
|
if (cow_on_read) {
|
||||||
|
+ if (cow_debug_verbose)
|
||||||
|
+ nbdkit_debug ("cow: cow-on-read saving %" PRIu64 " blocks "
|
||||||
|
+ "at offset %" PRIu64 " into the cache",
|
||||||
|
+ runblocks, offset);
|
||||||
|
+
|
||||||
|
if (full_pwrite (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
142
SOURCES/0017-delay-Fix-delay-close.patch
Normal file
142
SOURCES/0017-delay-Fix-delay-close.patch
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
From be7252bada79ee542356dffaf5f3c568a5c7fec3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Tue, 10 Aug 2021 08:39:15 +0100
|
||||||
|
Subject: [PATCH] delay: Fix delay-close
|
||||||
|
|
||||||
|
See comments in the code for how this has been fixed.
|
||||||
|
|
||||||
|
This only delays clients which use NBD_CMD_DISC (libnbd
|
||||||
|
nbd_shutdown(3)). Clients which drop the connection obviously cannot
|
||||||
|
be delayed. For example:
|
||||||
|
|
||||||
|
$ nbdkit --filter=delay null delay-close=3 \
|
||||||
|
--run 'time nbdsh -u $uri -c "h.shutdown()"
|
||||||
|
time nbdsh -u $uri -c "pass"'
|
||||||
|
|
||||||
|
real 0m3.061s # Client used shutdown, was delayed
|
||||||
|
user 0m0.028s
|
||||||
|
sys 0m0.030s
|
||||||
|
|
||||||
|
real 0m0.058s # Client disconnected, was not delayed
|
||||||
|
user 0m0.029s
|
||||||
|
sys 0m0.027s
|
||||||
|
|
||||||
|
Reported-by: Ming Xie
|
||||||
|
Fixes: commit de8dcd3a34a38b088a0f9a6f8ca754702ad1f598
|
||||||
|
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1991652
|
||||||
|
(cherry picked from commit 0cafebdb67d0d557ba1be8ea306b8acc5d9b2203)
|
||||||
|
---
|
||||||
|
filters/delay/delay.c | 42 +++++++++++++++++++--------
|
||||||
|
filters/delay/nbdkit-delay-filter.pod | 14 +++++++--
|
||||||
|
2 files changed, 41 insertions(+), 15 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/filters/delay/delay.c b/filters/delay/delay.c
|
||||||
|
index df3729a7..9252b855 100644
|
||||||
|
--- a/filters/delay/delay.c
|
||||||
|
+++ b/filters/delay/delay.c
|
||||||
|
@@ -39,6 +39,7 @@
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <limits.h>
|
||||||
|
+#include <time.h>
|
||||||
|
|
||||||
|
#include <nbdkit-filter.h>
|
||||||
|
|
||||||
|
@@ -134,12 +135,6 @@ open_delay (int *err)
|
||||||
|
return delay (delay_open_ms, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int
|
||||||
|
-close_delay (int *err)
|
||||||
|
-{
|
||||||
|
- return delay (delay_close_ms, err);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
/* Called for each key=value passed on the command line. */
|
||||||
|
static int
|
||||||
|
delay_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
@@ -253,13 +248,36 @@ delay_open (nbdkit_next_open *next, nbdkit_context *nxdata,
|
||||||
|
return NBDKIT_HANDLE_NOT_NEEDED;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Close connection. */
|
||||||
|
-static void
|
||||||
|
-delay_close (void *handle)
|
||||||
|
+/* Close connection.
|
||||||
|
+ *
|
||||||
|
+ * We cannot call nbdkit_nanosleep here because the socket may have
|
||||||
|
+ * been closed and that function will abort and return immediately.
|
||||||
|
+ * However we want to force a sleep (even if the server is shutting
|
||||||
|
+ * down) so use regular nanosleep instead.
|
||||||
|
+ *
|
||||||
|
+ * We cannot use the .close callback because that happens after the
|
||||||
|
+ * socket has closed, thus not delaying the client. By using
|
||||||
|
+ * .finalize we can delay well-behaved clients (those that use
|
||||||
|
+ * NBD_CMD_DISC). We cannot delay clients that drop the connection.
|
||||||
|
+ */
|
||||||
|
+static int
|
||||||
|
+delay_finalize (nbdkit_next *next, void *handle)
|
||||||
|
{
|
||||||
|
- int err;
|
||||||
|
+ const unsigned ms = delay_close_ms;
|
||||||
|
|
||||||
|
- close_delay (&err);
|
||||||
|
+ if (ms > 0) {
|
||||||
|
+ struct timespec ts;
|
||||||
|
+
|
||||||
|
+ ts.tv_sec = ms / 1000;
|
||||||
|
+ ts.tv_nsec = (ms % 1000) * 1000000;
|
||||||
|
+ /* If nanosleep fails we don't really want to interrupt the chain
|
||||||
|
+ * of finalize calls through the other filters, so ignore any
|
||||||
|
+ * error here.
|
||||||
|
+ */
|
||||||
|
+ nanosleep (&ts, NULL);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return next->finalize (next);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read data. */
|
||||||
|
@@ -340,7 +358,7 @@ static struct nbdkit_filter filter = {
|
||||||
|
.config_help = delay_config_help,
|
||||||
|
.can_fast_zero = delay_can_fast_zero,
|
||||||
|
.open = delay_open,
|
||||||
|
- .close = delay_close,
|
||||||
|
+ .finalize = delay_finalize,
|
||||||
|
.pread = delay_pread,
|
||||||
|
.pwrite = delay_pwrite,
|
||||||
|
.zero = delay_zero,
|
||||||
|
diff --git a/filters/delay/nbdkit-delay-filter.pod b/filters/delay/nbdkit-delay-filter.pod
|
||||||
|
index 11ae544b..76614736 100644
|
||||||
|
--- a/filters/delay/nbdkit-delay-filter.pod
|
||||||
|
+++ b/filters/delay/nbdkit-delay-filter.pod
|
||||||
|
@@ -117,15 +117,23 @@ the plugin.
|
||||||
|
|
||||||
|
=item B<delay-open=>NNB<ms>
|
||||||
|
|
||||||
|
+(nbdkit E<ge> 1.28)
|
||||||
|
+
|
||||||
|
+Delay open (client connection) by C<SECS> seconds or C<NN>
|
||||||
|
+milliseconds.
|
||||||
|
+
|
||||||
|
=item B<delay-close=>SECS
|
||||||
|
|
||||||
|
=item B<delay-close=>NNB<ms>
|
||||||
|
|
||||||
|
(nbdkit E<ge> 1.28)
|
||||||
|
|
||||||
|
-Delay open and close operations by C<SECS> seconds or C<NN>
|
||||||
|
-milliseconds. Open corresponds to client connection. Close may not
|
||||||
|
-be visible to clients if they abruptly disconnect.
|
||||||
|
+Delay close (client disconnection) by C<SECS> seconds or C<NN>
|
||||||
|
+milliseconds. This can also cause server shutdown to be delayed if
|
||||||
|
+clients are connected at the time. This only affects clients that
|
||||||
|
+gracefully disconnect (using C<NBD_CMD_DISC> / libnbd function
|
||||||
|
+L<nbd_shutdown(3)>). Clients that abruptly disconnect from the server
|
||||||
|
+cannot be delayed.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
155
SOURCES/0018-delay-Test-delay-open-and-delay-close.patch
Normal file
155
SOURCES/0018-delay-Test-delay-open-and-delay-close.patch
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
From 838ec052abe63056434c08ea80f4609e697dad0f Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Tue, 10 Aug 2021 09:11:43 +0100
|
||||||
|
Subject: [PATCH] delay: Test delay-open and delay-close
|
||||||
|
|
||||||
|
(cherry picked from commit 3caabaf87ec744b863b50b5bf77a9c1b93a7c3e0)
|
||||||
|
---
|
||||||
|
tests/Makefile.am | 12 +++++++--
|
||||||
|
tests/test-delay-close.sh | 54 +++++++++++++++++++++++++++++++++++++++
|
||||||
|
tests/test-delay-open.sh | 49 +++++++++++++++++++++++++++++++++++
|
||||||
|
3 files changed, 113 insertions(+), 2 deletions(-)
|
||||||
|
create mode 100755 tests/test-delay-close.sh
|
||||||
|
create mode 100755 tests/test-delay-open.sh
|
||||||
|
|
||||||
|
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
||||||
|
index edc8d66d..e61c5829 100644
|
||||||
|
--- a/tests/Makefile.am
|
||||||
|
+++ b/tests/Makefile.am
|
||||||
|
@@ -1425,8 +1425,16 @@ EXTRA_DIST += \
|
||||||
|
$(NULL)
|
||||||
|
|
||||||
|
# delay filter tests.
|
||||||
|
-TESTS += test-delay-shutdown.sh
|
||||||
|
-EXTRA_DIST += test-delay-shutdown.sh
|
||||||
|
+TESTS += \
|
||||||
|
+ test-delay-close.sh \
|
||||||
|
+ test-delay-open.sh \
|
||||||
|
+ test-delay-shutdown.sh \
|
||||||
|
+ $(NULL)
|
||||||
|
+EXTRA_DIST += \
|
||||||
|
+ test-delay-close.sh \
|
||||||
|
+ test-delay-open.sh \
|
||||||
|
+ test-delay-shutdown.sh \
|
||||||
|
+ $(NULL)
|
||||||
|
LIBNBD_TESTS += test-delay
|
||||||
|
|
||||||
|
test_delay_SOURCES = test-delay.c
|
||||||
|
diff --git a/tests/test-delay-close.sh b/tests/test-delay-close.sh
|
||||||
|
new file mode 100755
|
||||||
|
index 00000000..1de305f5
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tests/test-delay-close.sh
|
||||||
|
@@ -0,0 +1,54 @@
|
||||||
|
+#!/usr/bin/env bash
|
||||||
|
+# nbdkit
|
||||||
|
+# Copyright (C) 2018-2021 Red Hat Inc.
|
||||||
|
+#
|
||||||
|
+# Redistribution and use in source and binary forms, with or without
|
||||||
|
+# modification, are permitted provided that the following conditions are
|
||||||
|
+# met:
|
||||||
|
+#
|
||||||
|
+# * Redistributions of source code must retain the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer.
|
||||||
|
+#
|
||||||
|
+# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer in the
|
||||||
|
+# documentation and/or other materials provided with the distribution.
|
||||||
|
+#
|
||||||
|
+# * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+# used to endorse or promote products derived from this software without
|
||||||
|
+# specific prior written permission.
|
||||||
|
+#
|
||||||
|
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+# SUCH DAMAGE.
|
||||||
|
+
|
||||||
|
+source ./functions.sh
|
||||||
|
+set -e
|
||||||
|
+set -x
|
||||||
|
+
|
||||||
|
+requires_run
|
||||||
|
+requires_plugin null
|
||||||
|
+requires_filter delay
|
||||||
|
+requires nbdsh --version
|
||||||
|
+
|
||||||
|
+# Test delay-close with a well-behaved client.
|
||||||
|
+
|
||||||
|
+nbdkit -U - null --filter=delay delay-close=3 \
|
||||||
|
+ --run '
|
||||||
|
+start_t=$SECONDS
|
||||||
|
+nbdsh -u "$uri" -c "h.shutdown()"
|
||||||
|
+end_t=$SECONDS
|
||||||
|
+
|
||||||
|
+if [ $((end_t - start_t)) -lt 3 ]; then
|
||||||
|
+ echo "$0: delay filter failed: delay-close=3 caused delay < 3 seconds"
|
||||||
|
+ exit 1
|
||||||
|
+fi
|
||||||
|
+'
|
||||||
|
diff --git a/tests/test-delay-open.sh b/tests/test-delay-open.sh
|
||||||
|
new file mode 100755
|
||||||
|
index 00000000..2a74e44c
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tests/test-delay-open.sh
|
||||||
|
@@ -0,0 +1,49 @@
|
||||||
|
+#!/usr/bin/env bash
|
||||||
|
+# nbdkit
|
||||||
|
+# Copyright (C) 2018-2021 Red Hat Inc.
|
||||||
|
+#
|
||||||
|
+# Redistribution and use in source and binary forms, with or without
|
||||||
|
+# modification, are permitted provided that the following conditions are
|
||||||
|
+# met:
|
||||||
|
+#
|
||||||
|
+# * Redistributions of source code must retain the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer.
|
||||||
|
+#
|
||||||
|
+# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer in the
|
||||||
|
+# documentation and/or other materials provided with the distribution.
|
||||||
|
+#
|
||||||
|
+# * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+# used to endorse or promote products derived from this software without
|
||||||
|
+# specific prior written permission.
|
||||||
|
+#
|
||||||
|
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+# SUCH DAMAGE.
|
||||||
|
+
|
||||||
|
+source ./functions.sh
|
||||||
|
+set -e
|
||||||
|
+set -x
|
||||||
|
+
|
||||||
|
+requires_run
|
||||||
|
+requires_plugin null
|
||||||
|
+requires_filter delay
|
||||||
|
+requires nbdinfo --version
|
||||||
|
+
|
||||||
|
+start_t=$SECONDS
|
||||||
|
+nbdkit -U - null --filter=delay delay-open=3 --run 'nbdinfo "$uri"'
|
||||||
|
+end_t=$SECONDS
|
||||||
|
+
|
||||||
|
+if [ $((end_t - start_t)) -lt 3 ]; then
|
||||||
|
+ echo "$0: delay filter failed: delay-open=3 caused delay < 3 seconds"
|
||||||
|
+ exit 1
|
||||||
|
+fi
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
74
SOURCES/0019-vddk-Implement-can_flush-and-can_fua.patch
Normal file
74
SOURCES/0019-vddk-Implement-can_flush-and-can_fua.patch
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
From 2104686eb708bf87070c21e7af0e70e0317306b6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Mon, 5 Jul 2021 21:36:41 +0100
|
||||||
|
Subject: [PATCH] vddk: Implement can_flush and can_fua
|
||||||
|
|
||||||
|
VDDK < 6.0 doesn't support flush. Previously we advertised flush and
|
||||||
|
FUA but ignored them if VDDK didn't support it. Instead, correctly
|
||||||
|
set these flags in the NBD protocol according to what VDDK supports.
|
||||||
|
|
||||||
|
(cherry picked from commit 04b05274414a8cf4615eb2d6f46d5658814509c1)
|
||||||
|
---
|
||||||
|
plugins/vddk/vddk.c | 28 ++++++++++++++++++++--------
|
||||||
|
1 file changed, 20 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
|
||||||
|
index 76faa768..b5bce9a0 100644
|
||||||
|
--- a/plugins/vddk/vddk.c
|
||||||
|
+++ b/plugins/vddk/vddk.c
|
||||||
|
@@ -772,12 +772,28 @@ vddk_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (fua && vddk_flush (handle, 0) == -1)
|
||||||
|
- return -1;
|
||||||
|
+ if (fua) {
|
||||||
|
+ if (vddk_flush (handle, 0) == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int
|
||||||
|
+vddk_can_fua (void *handle)
|
||||||
|
+{
|
||||||
|
+ /* The Flush call was not available in VDDK < 6.0. */
|
||||||
|
+ return VixDiskLib_Flush != NULL ? NBDKIT_FUA_NATIVE : NBDKIT_FUA_NONE;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int
|
||||||
|
+vddk_can_flush (void *handle)
|
||||||
|
+{
|
||||||
|
+ /* The Flush call was not available in VDDK < 6.0. */
|
||||||
|
+ return VixDiskLib_Flush != NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Flush data to the file. */
|
||||||
|
static int
|
||||||
|
vddk_flush (void *handle, uint32_t flags)
|
||||||
|
@@ -785,12 +801,6 @@ vddk_flush (void *handle, uint32_t flags)
|
||||||
|
struct vddk_handle *h = handle;
|
||||||
|
VixError err;
|
||||||
|
|
||||||
|
- /* The Flush call was not available in VDDK < 6.0 so this is simply
|
||||||
|
- * ignored on earlier versions.
|
||||||
|
- */
|
||||||
|
- if (VixDiskLib_Flush == NULL)
|
||||||
|
- return 0;
|
||||||
|
-
|
||||||
|
DEBUG_CALL ("VixDiskLib_Flush", "handle");
|
||||||
|
err = VixDiskLib_Flush (h->handle);
|
||||||
|
if (err != VIX_OK) {
|
||||||
|
@@ -985,6 +995,8 @@ static struct nbdkit_plugin plugin = {
|
||||||
|
.get_size = vddk_get_size,
|
||||||
|
.pread = vddk_pread,
|
||||||
|
.pwrite = vddk_pwrite,
|
||||||
|
+ .can_fua = vddk_can_fua,
|
||||||
|
+ .can_flush = vddk_can_flush,
|
||||||
|
.flush = vddk_flush,
|
||||||
|
.can_extents = vddk_can_extents,
|
||||||
|
.extents = vddk_extents,
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -0,0 +1,319 @@
|
|||||||
|
From 51713e7702d389fd55d5721c4773fca40e3e89f6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Wed, 18 Aug 2021 14:26:30 +0100
|
||||||
|
Subject: [PATCH] vddk: Replace DEBUG_CALL with bracketed VDDK_CALL_START/END
|
||||||
|
macros
|
||||||
|
|
||||||
|
This is neutral refactoring, but allows us in the next commit to
|
||||||
|
collect statistics about the amount of time spent in these calls.
|
||||||
|
|
||||||
|
(cherry picked from commit 1335ebfb5637bf5a44403d0b152da7272fdd3e54)
|
||||||
|
---
|
||||||
|
plugins/vddk/vddk.c | 175 +++++++++++++++++++++++++-------------------
|
||||||
|
1 file changed, 99 insertions(+), 76 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
|
||||||
|
index b5bce9a0..888009ab 100644
|
||||||
|
--- a/plugins/vddk/vddk.c
|
||||||
|
+++ b/plugins/vddk/vddk.c
|
||||||
|
@@ -104,19 +104,23 @@ static bool is_remote;
|
||||||
|
VixDiskLib_FreeErrorText (vddk_err_msg); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
-#define DEBUG_CALL(fn, fs, ...) \
|
||||||
|
- nbdkit_debug ("VDDK call: %s (" fs ")", fn, ##__VA_ARGS__)
|
||||||
|
-#define DEBUG_CALL_DATAPATH(fn, fs, ...) \
|
||||||
|
- if (vddk_debug_datapath) \
|
||||||
|
- nbdkit_debug ("VDDK call: %s (" fs ")", fn, ##__VA_ARGS__)
|
||||||
|
+#define VDDK_CALL_START(fn, fs, ...) \
|
||||||
|
+ nbdkit_debug ("VDDK call: %s (" fs ")", #fn, ##__VA_ARGS__); \
|
||||||
|
+ do
|
||||||
|
+#define VDDK_CALL_START_DATAPATH(fn, fs, ...) \
|
||||||
|
+ if (vddk_debug_datapath) \
|
||||||
|
+ nbdkit_debug ("VDDK call: %s (" fs ")", #fn, ##__VA_ARGS__); \
|
||||||
|
+ do
|
||||||
|
+#define VDDK_CALL_END(fn) while (0)
|
||||||
|
|
||||||
|
/* Unload the plugin. */
|
||||||
|
static void
|
||||||
|
vddk_unload (void)
|
||||||
|
{
|
||||||
|
if (init_called) {
|
||||||
|
- DEBUG_CALL ("VixDiskLib_Exit", "");
|
||||||
|
- VixDiskLib_Exit ();
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_Exit, "") {
|
||||||
|
+ VixDiskLib_Exit ();
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_Exit);
|
||||||
|
}
|
||||||
|
if (dl)
|
||||||
|
dlclose (dl);
|
||||||
|
@@ -449,15 +453,16 @@ vddk_after_fork (void)
|
||||||
|
VixError err;
|
||||||
|
|
||||||
|
/* Initialize VDDK library. */
|
||||||
|
- DEBUG_CALL ("VixDiskLib_InitEx",
|
||||||
|
- "%d, %d, &debug_fn, &error_fn, &error_fn, %s, %s",
|
||||||
|
- VDDK_MAJOR, VDDK_MINOR,
|
||||||
|
- libdir, config ? : "NULL");
|
||||||
|
- err = VixDiskLib_InitEx (VDDK_MAJOR, VDDK_MINOR,
|
||||||
|
- &debug_function, /* log function */
|
||||||
|
- &error_function, /* warn function */
|
||||||
|
- &error_function, /* panic function */
|
||||||
|
- libdir, config);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_InitEx,
|
||||||
|
+ "%d, %d, &debug_fn, &error_fn, &error_fn, %s, %s",
|
||||||
|
+ VDDK_MAJOR, VDDK_MINOR,
|
||||||
|
+ libdir, config ? : "NULL") {
|
||||||
|
+ err = VixDiskLib_InitEx (VDDK_MAJOR, VDDK_MINOR,
|
||||||
|
+ &debug_function, /* log function */
|
||||||
|
+ &error_function, /* warn function */
|
||||||
|
+ &error_function, /* panic function */
|
||||||
|
+ libdir, config);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_InitEx);
|
||||||
|
if (err != VIX_OK) {
|
||||||
|
VDDK_ERROR (err, "VixDiskLib_InitEx");
|
||||||
|
exit (EXIT_FAILURE);
|
||||||
|
@@ -519,8 +524,9 @@ allocate_connect_params (void)
|
||||||
|
VixDiskLibConnectParams *ret;
|
||||||
|
|
||||||
|
if (VixDiskLib_AllocateConnectParams != NULL) {
|
||||||
|
- DEBUG_CALL ("VixDiskLib_AllocateConnectParams", "");
|
||||||
|
- ret = VixDiskLib_AllocateConnectParams ();
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_AllocateConnectParams, "") {
|
||||||
|
+ ret = VixDiskLib_AllocateConnectParams ();
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_AllocateConnectParams);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
ret = calloc (1, sizeof (VixDiskLibConnectParams));
|
||||||
|
@@ -535,8 +541,9 @@ free_connect_params (VixDiskLibConnectParams *params)
|
||||||
|
* originally called. Otherwise use free.
|
||||||
|
*/
|
||||||
|
if (VixDiskLib_AllocateConnectParams != NULL) {
|
||||||
|
- DEBUG_CALL ("VixDiskLib_FreeConnectParams", "params");
|
||||||
|
- VixDiskLib_FreeConnectParams (params);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params") {
|
||||||
|
+ VixDiskLib_FreeConnectParams (params);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_FreeConnectParams);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
free (params);
|
||||||
|
@@ -589,16 +596,17 @@ vddk_open (int readonly)
|
||||||
|
* either ESXi or vCenter servers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
- DEBUG_CALL ("VixDiskLib_ConnectEx",
|
||||||
|
- "h->params, %d, %s, %s, &connection",
|
||||||
|
- readonly,
|
||||||
|
- snapshot_moref ? : "NULL",
|
||||||
|
- transport_modes ? : "NULL");
|
||||||
|
- err = VixDiskLib_ConnectEx (h->params,
|
||||||
|
- readonly,
|
||||||
|
- snapshot_moref,
|
||||||
|
- transport_modes,
|
||||||
|
- &h->connection);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_ConnectEx,
|
||||||
|
+ "h->params, %d, %s, %s, &connection",
|
||||||
|
+ readonly,
|
||||||
|
+ snapshot_moref ? : "NULL",
|
||||||
|
+ transport_modes ? : "NULL") {
|
||||||
|
+ err = VixDiskLib_ConnectEx (h->params,
|
||||||
|
+ readonly,
|
||||||
|
+ snapshot_moref,
|
||||||
|
+ transport_modes,
|
||||||
|
+ &h->connection);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_ConnectEx);
|
||||||
|
if (err != VIX_OK) {
|
||||||
|
VDDK_ERROR (err, "VixDiskLib_ConnectEx");
|
||||||
|
goto err1;
|
||||||
|
@@ -618,9 +626,10 @@ vddk_open (int readonly)
|
||||||
|
case NONE: break;
|
||||||
|
}
|
||||||
|
|
||||||
|
- DEBUG_CALL ("VixDiskLib_Open",
|
||||||
|
- "connection, %s, %d, &handle", filename, flags);
|
||||||
|
- err = VixDiskLib_Open (h->connection, filename, flags, &h->handle);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_Open,
|
||||||
|
+ "connection, %s, %d, &handle", filename, flags) {
|
||||||
|
+ err = VixDiskLib_Open (h->connection, filename, flags, &h->handle);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_Open);
|
||||||
|
if (err != VIX_OK) {
|
||||||
|
VDDK_ERROR (err, "VixDiskLib_Open: %s", filename);
|
||||||
|
goto err2;
|
||||||
|
@@ -632,8 +641,9 @@ vddk_open (int readonly)
|
||||||
|
return h;
|
||||||
|
|
||||||
|
err2:
|
||||||
|
- DEBUG_CALL ("VixDiskLib_Disconnect", "connection");
|
||||||
|
- VixDiskLib_Disconnect (h->connection);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_Disconnect, "connection") {
|
||||||
|
+ VixDiskLib_Disconnect (h->connection);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_Disconnect);
|
||||||
|
err1:
|
||||||
|
free_connect_params (h->params);
|
||||||
|
err0:
|
||||||
|
@@ -648,10 +658,13 @@ vddk_close (void *handle)
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&open_close_lock);
|
||||||
|
struct vddk_handle *h = handle;
|
||||||
|
|
||||||
|
- DEBUG_CALL ("VixDiskLib_Close", "handle");
|
||||||
|
- VixDiskLib_Close (h->handle);
|
||||||
|
- DEBUG_CALL ("VixDiskLib_Disconnect", "connection");
|
||||||
|
- VixDiskLib_Disconnect (h->connection);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_Close, "handle") {
|
||||||
|
+ VixDiskLib_Close (h->handle);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_Close);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_Disconnect, "connection") {
|
||||||
|
+ VixDiskLib_Disconnect (h->connection);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_Disconnect);
|
||||||
|
+
|
||||||
|
free_connect_params (h->params);
|
||||||
|
free (h);
|
||||||
|
}
|
||||||
|
@@ -665,8 +678,9 @@ vddk_get_size (void *handle)
|
||||||
|
VixError err;
|
||||||
|
uint64_t size;
|
||||||
|
|
||||||
|
- DEBUG_CALL ("VixDiskLib_GetInfo", "handle, &info");
|
||||||
|
- err = VixDiskLib_GetInfo (h->handle, &info);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_GetInfo, "handle, &info") {
|
||||||
|
+ err = VixDiskLib_GetInfo (h->handle, &info);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_GetInfo);
|
||||||
|
if (err != VIX_OK) {
|
||||||
|
VDDK_ERROR (err, "VixDiskLib_GetInfo");
|
||||||
|
return -1;
|
||||||
|
@@ -694,8 +708,9 @@ vddk_get_size (void *handle)
|
||||||
|
info->uuid ? : "NULL");
|
||||||
|
}
|
||||||
|
|
||||||
|
- DEBUG_CALL ("VixDiskLib_FreeInfo", "info");
|
||||||
|
- VixDiskLib_FreeInfo (info);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_FreeInfo, "info") {
|
||||||
|
+ VixDiskLib_FreeInfo (info);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_FreeInfo);
|
||||||
|
|
||||||
|
return (int64_t) size;
|
||||||
|
}
|
||||||
|
@@ -723,11 +738,12 @@ vddk_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
|
||||||
|
offset /= VIXDISKLIB_SECTOR_SIZE;
|
||||||
|
count /= VIXDISKLIB_SECTOR_SIZE;
|
||||||
|
|
||||||
|
- DEBUG_CALL_DATAPATH ("VixDiskLib_Read",
|
||||||
|
- "handle, %" PRIu64 " sectors, "
|
||||||
|
- "%" PRIu32 " sectors, buffer",
|
||||||
|
- offset, count);
|
||||||
|
- err = VixDiskLib_Read (h->handle, offset, count, buf);
|
||||||
|
+ VDDK_CALL_START_DATAPATH (VixDiskLib_Read,
|
||||||
|
+ "handle, %" PRIu64 " sectors, "
|
||||||
|
+ "%" PRIu32 " sectors, buffer",
|
||||||
|
+ offset, count) {
|
||||||
|
+ err = VixDiskLib_Read (h->handle, offset, count, buf);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_Read);
|
||||||
|
if (err != VIX_OK) {
|
||||||
|
VDDK_ERROR (err, "VixDiskLib_Read");
|
||||||
|
return -1;
|
||||||
|
@@ -762,11 +778,12 @@ vddk_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
|
||||||
|
offset /= VIXDISKLIB_SECTOR_SIZE;
|
||||||
|
count /= VIXDISKLIB_SECTOR_SIZE;
|
||||||
|
|
||||||
|
- DEBUG_CALL_DATAPATH ("VixDiskLib_Write",
|
||||||
|
- "handle, %" PRIu64 " sectors, "
|
||||||
|
- "%" PRIu32 " sectors, buffer",
|
||||||
|
- offset, count);
|
||||||
|
- err = VixDiskLib_Write (h->handle, offset, count, buf);
|
||||||
|
+ VDDK_CALL_START_DATAPATH (VixDiskLib_Write,
|
||||||
|
+ "handle, %" PRIu64 " sectors, "
|
||||||
|
+ "%" PRIu32 " sectors, buffer",
|
||||||
|
+ offset, count) {
|
||||||
|
+ err = VixDiskLib_Write (h->handle, offset, count, buf);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_Write);
|
||||||
|
if (err != VIX_OK) {
|
||||||
|
VDDK_ERROR (err, "VixDiskLib_Write");
|
||||||
|
return -1;
|
||||||
|
@@ -801,8 +818,9 @@ vddk_flush (void *handle, uint32_t flags)
|
||||||
|
struct vddk_handle *h = handle;
|
||||||
|
VixError err;
|
||||||
|
|
||||||
|
- DEBUG_CALL ("VixDiskLib_Flush", "handle");
|
||||||
|
- err = VixDiskLib_Flush (h->handle);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_Flush, "handle") {
|
||||||
|
+ err = VixDiskLib_Flush (h->handle);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_Flush);
|
||||||
|
if (err != VIX_OK) {
|
||||||
|
VDDK_ERROR (err, "VixDiskLib_Flush");
|
||||||
|
return -1;
|
||||||
|
@@ -836,17 +854,19 @@ vddk_can_extents (void *handle)
|
||||||
|
* the best thing we can do here is to try the call and if it's
|
||||||
|
* non-functional return false.
|
||||||
|
*/
|
||||||
|
- DEBUG_CALL ("VixDiskLib_QueryAllocatedBlocks",
|
||||||
|
- "handle, 0, %d sectors, %d sectors",
|
||||||
|
- VIXDISKLIB_MIN_CHUNK_SIZE, VIXDISKLIB_MIN_CHUNK_SIZE);
|
||||||
|
- err = VixDiskLib_QueryAllocatedBlocks (h->handle,
|
||||||
|
- 0, VIXDISKLIB_MIN_CHUNK_SIZE,
|
||||||
|
- VIXDISKLIB_MIN_CHUNK_SIZE,
|
||||||
|
- &block_list);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
|
||||||
|
+ "handle, 0, %d sectors, %d sectors",
|
||||||
|
+ VIXDISKLIB_MIN_CHUNK_SIZE, VIXDISKLIB_MIN_CHUNK_SIZE) {
|
||||||
|
+ err = VixDiskLib_QueryAllocatedBlocks (h->handle,
|
||||||
|
+ 0, VIXDISKLIB_MIN_CHUNK_SIZE,
|
||||||
|
+ VIXDISKLIB_MIN_CHUNK_SIZE,
|
||||||
|
+ &block_list);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks);
|
||||||
|
error_suppression = 0;
|
||||||
|
if (err == VIX_OK) {
|
||||||
|
- DEBUG_CALL ("VixDiskLib_FreeBlockList", "block_list");
|
||||||
|
- VixDiskLib_FreeBlockList (block_list);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list") {
|
||||||
|
+ VixDiskLib_FreeBlockList (block_list);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_FreeBlockList);
|
||||||
|
}
|
||||||
|
if (err != VIX_OK) {
|
||||||
|
char *errmsg = VixDiskLib_GetErrorText (err, NULL);
|
||||||
|
@@ -923,14 +943,15 @@ vddk_extents (void *handle, uint32_t count, uint64_t offset, uint32_t flags,
|
||||||
|
nr_chunks = MIN (nr_chunks, VIXDISKLIB_MAX_CHUNK_NUMBER);
|
||||||
|
nr_sectors = nr_chunks * VIXDISKLIB_MIN_CHUNK_SIZE;
|
||||||
|
|
||||||
|
- DEBUG_CALL ("VixDiskLib_QueryAllocatedBlocks",
|
||||||
|
- "handle, %" PRIu64 " sectors, %" PRIu64 " sectors, "
|
||||||
|
- "%d sectors",
|
||||||
|
- start_sector, nr_sectors, VIXDISKLIB_MIN_CHUNK_SIZE);
|
||||||
|
- err = VixDiskLib_QueryAllocatedBlocks (h->handle,
|
||||||
|
- start_sector, nr_sectors,
|
||||||
|
- VIXDISKLIB_MIN_CHUNK_SIZE,
|
||||||
|
- &block_list);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
|
||||||
|
+ "handle, %" PRIu64 " sectors, %" PRIu64 " sectors, "
|
||||||
|
+ "%d sectors",
|
||||||
|
+ start_sector, nr_sectors, VIXDISKLIB_MIN_CHUNK_SIZE) {
|
||||||
|
+ err = VixDiskLib_QueryAllocatedBlocks (h->handle,
|
||||||
|
+ start_sector, nr_sectors,
|
||||||
|
+ VIXDISKLIB_MIN_CHUNK_SIZE,
|
||||||
|
+ &block_list);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks);
|
||||||
|
if (err != VIX_OK) {
|
||||||
|
VDDK_ERROR (err, "VixDiskLib_QueryAllocatedBlocks");
|
||||||
|
return -1;
|
||||||
|
@@ -949,13 +970,15 @@ vddk_extents (void *handle, uint32_t count, uint64_t offset, uint32_t flags,
|
||||||
|
add_extent (extents, &position, blk_offset, true) == -1) ||
|
||||||
|
(add_extent (extents,
|
||||||
|
&position, blk_offset + blk_length, false) == -1)) {
|
||||||
|
- DEBUG_CALL ("VixDiskLib_FreeBlockList", "block_list");
|
||||||
|
- VixDiskLib_FreeBlockList (block_list);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list") {
|
||||||
|
+ VixDiskLib_FreeBlockList (block_list);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_FreeBlockList);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- DEBUG_CALL ("VixDiskLib_FreeBlockList", "block_list");
|
||||||
|
- VixDiskLib_FreeBlockList (block_list);
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list") {
|
||||||
|
+ VixDiskLib_FreeBlockList (block_list);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_FreeBlockList);
|
||||||
|
|
||||||
|
/* There's an implicit hole after the returned list of blocks, up
|
||||||
|
* to the end of the QueryAllocatedBlocks request.
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
103
SOURCES/0021-tests-Add-a-better-test-of-real-VDDK.patch
Normal file
103
SOURCES/0021-tests-Add-a-better-test-of-real-VDDK.patch
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
From bd181ea739ebfafbf7239b5fa89e98becdb8cb72 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Mon, 5 Jul 2021 22:03:10 +0100
|
||||||
|
Subject: [PATCH] tests: Add a better test of real VDDK
|
||||||
|
|
||||||
|
The previous test only tested reading and maybe extents, and used an
|
||||||
|
all-zero disk. I'm fairly convinced the test only worked accidentally
|
||||||
|
since you must use an absolute path when opening a local file and the
|
||||||
|
test did not do that.
|
||||||
|
|
||||||
|
Add a more comprehensive test that tests writing and flush too.
|
||||||
|
|
||||||
|
(cherry picked from commit a6ca4f24593008bb2d8efb177e7f424cff51dfbf)
|
||||||
|
---
|
||||||
|
tests/test-vddk-real.sh | 55 ++++++++++++++++++++++++++++-------------
|
||||||
|
1 file changed, 38 insertions(+), 17 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tests/test-vddk-real.sh b/tests/test-vddk-real.sh
|
||||||
|
index df486ba1..f848db44 100755
|
||||||
|
--- a/tests/test-vddk-real.sh
|
||||||
|
+++ b/tests/test-vddk-real.sh
|
||||||
|
@@ -37,8 +37,12 @@ set -x
|
||||||
|
requires test "x$vddkdir" != "x"
|
||||||
|
requires test -d "$vddkdir"
|
||||||
|
requires test -f "$vddkdir/lib64/libvixDiskLib.so"
|
||||||
|
+requires test -r /dev/urandom
|
||||||
|
+requires cmp --version
|
||||||
|
+requires dd --version
|
||||||
|
requires qemu-img --version
|
||||||
|
requires nbdcopy --version
|
||||||
|
+requires nbdinfo --version
|
||||||
|
requires stat --version
|
||||||
|
|
||||||
|
# VDDK > 5.1.1 only supports x86_64.
|
||||||
|
@@ -47,31 +51,48 @@ if [ `uname -m` != "x86_64" ]; then
|
||||||
|
exit 77
|
||||||
|
fi
|
||||||
|
|
||||||
|
-files="test-vddk-real.vmdk test-vddk-real.out test-vddk-real.log"
|
||||||
|
-rm -f $files
|
||||||
|
-cleanup_fn rm -f $files
|
||||||
|
-
|
||||||
|
-qemu-img create -f vmdk test-vddk-real.vmdk 100M
|
||||||
|
-
|
||||||
|
# Since we are comparing error messages below, let's make sure we're
|
||||||
|
# not translating errors.
|
||||||
|
export LANG=C
|
||||||
|
|
||||||
|
-fail=0
|
||||||
|
-nbdkit -f -v -U - \
|
||||||
|
- --filter=readahead \
|
||||||
|
- vddk libdir="$vddkdir" test-vddk-real.vmdk \
|
||||||
|
- --run 'nbdcopy "$uri" test-vddk-real.out' \
|
||||||
|
- > test-vddk-real.log 2>&1 || fail=1
|
||||||
|
+pid=test-vddk-real.pid
|
||||||
|
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
|
||||||
|
+vmdk=$PWD/test-vddk-real.vmdk ;# note must be an absolute path
|
||||||
|
+raw=test-vddk-real.raw
|
||||||
|
+raw2=test-vddk-real.raw2
|
||||||
|
+log=test-vddk-real.log
|
||||||
|
+files="$pid $sock $vmdk $raw $raw2 $log"
|
||||||
|
+rm -f $files
|
||||||
|
+cleanup_fn rm -f $files
|
||||||
|
+
|
||||||
|
+qemu-img create -f vmdk $vmdk 10M
|
||||||
|
+
|
||||||
|
+# Check first that the VDDK library can be fully loaded. We have to
|
||||||
|
+# check the log file for missing modules since they may not show up as
|
||||||
|
+# errors.
|
||||||
|
+nbdkit -fv -U - vddk libdir="$vddkdir" $vmdk --run 'nbdinfo "$uri"' >$log 2>&1
|
||||||
|
|
||||||
|
# Check the log for missing modules
|
||||||
|
-cat test-vddk-real.log
|
||||||
|
+cat $log
|
||||||
|
if grep 'cannot open shared object file' test-vddk-real.log; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
-# Check the raw output file has exactly the right size.
|
||||||
|
-size="$(stat -c '%s' test-vddk-real.out)"
|
||||||
|
-test "$size" -eq $((100 * 1024 * 1024))
|
||||||
|
+# Now run nbdkit for the test.
|
||||||
|
+start_nbdkit -P $pid -U $sock vddk libdir="$vddkdir" $vmdk
|
||||||
|
+uri="nbd+unix:///?socket=$sock"
|
||||||
|
|
||||||
|
-exit $fail
|
||||||
|
+# VDDK < 6.0 did not support flush, so disable flush test there. Also
|
||||||
|
+# if nbdinfo doesn't support the --can flush syntax (added in libnbd
|
||||||
|
+# 1.10) then this is disabled.
|
||||||
|
+if nbdinfo --can flush "$uri"; then flush="--flush"; else flush=""; fi
|
||||||
|
+
|
||||||
|
+# Copy in and out some data. This should exercise read, write,
|
||||||
|
+# extents and flushing.
|
||||||
|
+dd if=/dev/urandom of=$raw count=5 bs=$((1024*1024))
|
||||||
|
+truncate -s 10M $raw
|
||||||
|
+
|
||||||
|
+nbdcopy $flush $raw "$uri"
|
||||||
|
+nbdcopy "$uri" $raw2
|
||||||
|
+
|
||||||
|
+cmp $raw $raw2
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -0,0 +1,245 @@
|
|||||||
|
From 45db64d72bf03fece8a7fb994887360954905a3b Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Wed, 18 Aug 2021 14:47:58 +0100
|
||||||
|
Subject: [PATCH] vddk: Add stats about the amount of time spent in VDDK calls
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
If you use -D vddk.stats=1 then when the plugin is unloaded it will
|
||||||
|
display the amount of time spent in each VDDK call. The output will
|
||||||
|
look something like this:
|
||||||
|
|
||||||
|
nbdkit: debug: VDDK function stats (-D vddk.stats=1):
|
||||||
|
nbdkit: debug: µs
|
||||||
|
nbdkit: debug: VixDiskLib_Exit 1001154
|
||||||
|
nbdkit: debug: VixDiskLib_InitEx 96008
|
||||||
|
nbdkit: debug: VixDiskLib_Flush 15722
|
||||||
|
nbdkit: debug: VixDiskLib_Write 12081
|
||||||
|
nbdkit: debug: VixDiskLib_Open 6029
|
||||||
|
nbdkit: debug: VixDiskLib_Read 1364
|
||||||
|
nbdkit: debug: VixDiskLib_Close 605
|
||||||
|
nbdkit: debug: VixDiskLib_QueryAllocatedBlocks 191
|
||||||
|
nbdkit: debug: VixDiskLib_ConnectEx 134
|
||||||
|
nbdkit: debug: VixDiskLib_Disconnect 76
|
||||||
|
nbdkit: debug: VixDiskLib_FreeConnectParams 57
|
||||||
|
nbdkit: debug: VixDiskLib_GetInfo 56
|
||||||
|
nbdkit: debug: VixDiskLib_GetTransportMode 43
|
||||||
|
nbdkit: debug: VixDiskLib_FreeInfo 42
|
||||||
|
nbdkit: debug: VixDiskLib_FreeBlockList 32
|
||||||
|
nbdkit: debug: VixDiskLib_AllocateConnectParams 28
|
||||||
|
|
||||||
|
VDDK APIs which are never called are not printed.
|
||||||
|
|
||||||
|
(cherry picked from commit f2dfc7d74ee650bdf2cc930a07b1c5bcb509976c)
|
||||||
|
---
|
||||||
|
plugins/vddk/nbdkit-vddk-plugin.pod | 5 ++
|
||||||
|
plugins/vddk/vddk.c | 107 +++++++++++++++++++++++++---
|
||||||
|
tests/test-vddk-real.sh | 2 +-
|
||||||
|
3 files changed, 103 insertions(+), 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
|
||||||
|
index b783f13a..2a1b17dc 100644
|
||||||
|
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
|
||||||
|
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
|
||||||
|
@@ -510,6 +510,11 @@ Debug extents returned by C<QueryAllocatedBlocks>.
|
||||||
|
|
||||||
|
Suppress debugging of datapath calls (C<Read> and C<Write>).
|
||||||
|
|
||||||
|
+=item B<-D vddk.stats=1>
|
||||||
|
+
|
||||||
|
+When the plugin exits print some statistics about the amount of time
|
||||||
|
+spent waiting on each VDDK call.
|
||||||
|
+
|
||||||
|
=back
|
||||||
|
|
||||||
|
=head1 FILES
|
||||||
|
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
|
||||||
|
index 888009ab..fce96d9a 100644
|
||||||
|
--- a/plugins/vddk/vddk.c
|
||||||
|
+++ b/plugins/vddk/vddk.c
|
||||||
|
@@ -42,6 +42,7 @@
|
||||||
|
#include <assert.h>
|
||||||
|
#include <dlfcn.h>
|
||||||
|
#include <libgen.h>
|
||||||
|
+#include <sys/time.h>
|
||||||
|
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
|
@@ -52,6 +53,8 @@
|
||||||
|
#include "isaligned.h"
|
||||||
|
#include "minmax.h"
|
||||||
|
#include "rounding.h"
|
||||||
|
+#include "tvdiff.h"
|
||||||
|
+#include "vector.h"
|
||||||
|
|
||||||
|
#include "vddk.h"
|
||||||
|
#include "vddk-structs.h"
|
||||||
|
@@ -60,6 +63,7 @@
|
||||||
|
NBDKIT_DLL_PUBLIC int vddk_debug_diskinfo;
|
||||||
|
NBDKIT_DLL_PUBLIC int vddk_debug_extents;
|
||||||
|
NBDKIT_DLL_PUBLIC int vddk_debug_datapath = 1;
|
||||||
|
+NBDKIT_DLL_PUBLIC int vddk_debug_stats;
|
||||||
|
|
||||||
|
/* For each VDDK API define a static global variable. These globals
|
||||||
|
* are initialized when the plugin is loaded (by vddk_get_ready).
|
||||||
|
@@ -96,22 +100,52 @@ static const char *username; /* user */
|
||||||
|
static const char *vmx_spec; /* vm */
|
||||||
|
static bool is_remote;
|
||||||
|
|
||||||
|
-#define VDDK_ERROR(err, fs, ...) \
|
||||||
|
- do { \
|
||||||
|
- char *vddk_err_msg; \
|
||||||
|
- vddk_err_msg = VixDiskLib_GetErrorText ((err), NULL); \
|
||||||
|
- nbdkit_error (fs ": %s", ##__VA_ARGS__, vddk_err_msg); \
|
||||||
|
- VixDiskLib_FreeErrorText (vddk_err_msg); \
|
||||||
|
- } while (0)
|
||||||
|
+/* For each VDDK API define a variable to store the time taken (used
|
||||||
|
+ * to implement -D vddk.stats=1).
|
||||||
|
+ */
|
||||||
|
+static pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
+static void display_stats (void);
|
||||||
|
+#define STUB(fn,ret,args) static int64_t stats_##fn;
|
||||||
|
+#define OPTIONAL_STUB(fn,ret,args) static int64_t stats_##fn;
|
||||||
|
+#include "vddk-stubs.h"
|
||||||
|
+#undef STUB
|
||||||
|
+#undef OPTIONAL_STUB
|
||||||
|
|
||||||
|
#define VDDK_CALL_START(fn, fs, ...) \
|
||||||
|
+ do { \
|
||||||
|
+ struct timeval start_t, end_t; \
|
||||||
|
+ if (vddk_debug_stats) \
|
||||||
|
+ gettimeofday (&start_t, NULL); \
|
||||||
|
nbdkit_debug ("VDDK call: %s (" fs ")", #fn, ##__VA_ARGS__); \
|
||||||
|
do
|
||||||
|
#define VDDK_CALL_START_DATAPATH(fn, fs, ...) \
|
||||||
|
+ do { \
|
||||||
|
+ struct timeval start_t, end_t; \
|
||||||
|
+ if (vddk_debug_stats) \
|
||||||
|
+ gettimeofday (&start_t, NULL); \
|
||||||
|
if (vddk_debug_datapath) \
|
||||||
|
nbdkit_debug ("VDDK call: %s (" fs ")", #fn, ##__VA_ARGS__); \
|
||||||
|
do
|
||||||
|
-#define VDDK_CALL_END(fn) while (0)
|
||||||
|
+#define VDDK_CALL_END(fn) \
|
||||||
|
+ while (0); \
|
||||||
|
+ if (vddk_debug_stats) { \
|
||||||
|
+ gettimeofday (&end_t, NULL); \
|
||||||
|
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&stats_lock); \
|
||||||
|
+ stats_##fn += tvdiff_usec (&start_t, &end_t); \
|
||||||
|
+ } \
|
||||||
|
+ } while (0)
|
||||||
|
+
|
||||||
|
+#define VDDK_ERROR(err, fs, ...) \
|
||||||
|
+ do { \
|
||||||
|
+ char *vddk_err_msg; \
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_GetErrorText, "%lu", err) { \
|
||||||
|
+ vddk_err_msg = VixDiskLib_GetErrorText ((err), NULL); \
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_GetErrorText); \
|
||||||
|
+ nbdkit_error (fs ": %s", ##__VA_ARGS__, vddk_err_msg); \
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_FreeErrorText, "") { \
|
||||||
|
+ VixDiskLib_FreeErrorText (vddk_err_msg); \
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_FreeErrorText); \
|
||||||
|
+ } while (0)
|
||||||
|
|
||||||
|
/* Unload the plugin. */
|
||||||
|
static void
|
||||||
|
@@ -124,11 +158,61 @@ vddk_unload (void)
|
||||||
|
}
|
||||||
|
if (dl)
|
||||||
|
dlclose (dl);
|
||||||
|
+
|
||||||
|
+ if (vddk_debug_stats)
|
||||||
|
+ display_stats ();
|
||||||
|
+
|
||||||
|
free (config);
|
||||||
|
free (libdir);
|
||||||
|
free (password);
|
||||||
|
}
|
||||||
|
|
||||||
|
+struct vddk_stat {
|
||||||
|
+ const char *fn;
|
||||||
|
+ int64_t usecs;
|
||||||
|
+};
|
||||||
|
+DEFINE_VECTOR_TYPE(statlist, struct vddk_stat)
|
||||||
|
+
|
||||||
|
+static int
|
||||||
|
+stat_compare (const void *vp1, const void *vp2)
|
||||||
|
+{
|
||||||
|
+ const struct vddk_stat *st1 = vp1;
|
||||||
|
+ const struct vddk_stat *st2 = vp2;
|
||||||
|
+
|
||||||
|
+ /* Note: sorts in reverse order. */
|
||||||
|
+ if (st1->usecs < st2->usecs) return 1;
|
||||||
|
+ else if (st1->usecs > st2->usecs) return -1;
|
||||||
|
+ else return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+display_stats (void)
|
||||||
|
+{
|
||||||
|
+ statlist stats = empty_vector;
|
||||||
|
+ struct vddk_stat st;
|
||||||
|
+ size_t i;
|
||||||
|
+
|
||||||
|
+#define ADD_ONE_STAT(fn_, usecs_) \
|
||||||
|
+ st.fn = fn_; \
|
||||||
|
+ st.usecs = usecs_; \
|
||||||
|
+ statlist_append (&stats, st)
|
||||||
|
+#define STUB(fn,ret,args) ADD_ONE_STAT (#fn, stats_##fn);
|
||||||
|
+#define OPTIONAL_STUB(fn,ret,args) ADD_ONE_STAT (#fn, stats_##fn);
|
||||||
|
+#include "vddk-stubs.h"
|
||||||
|
+#undef STUB
|
||||||
|
+#undef OPTIONAL_STUB
|
||||||
|
+#undef ADD_ONE_STAT
|
||||||
|
+
|
||||||
|
+ qsort (stats.ptr, stats.size, sizeof stats.ptr[0], stat_compare);
|
||||||
|
+
|
||||||
|
+ nbdkit_debug ("VDDK function stats (-D vddk.stats=1):");
|
||||||
|
+ nbdkit_debug ("%-40s %9s", "", "µs");
|
||||||
|
+ for (i = 0; i < stats.size; ++i) {
|
||||||
|
+ if (stats.ptr[i].usecs)
|
||||||
|
+ nbdkit_debug ("%-40s %9" PRIi64, stats.ptr[i].fn, stats.ptr[i].usecs);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void
|
||||||
|
trim (char *str)
|
||||||
|
{
|
||||||
|
@@ -557,6 +641,7 @@ vddk_open (int readonly)
|
||||||
|
struct vddk_handle *h;
|
||||||
|
VixError err;
|
||||||
|
uint32_t flags;
|
||||||
|
+ const char *transport_mode;
|
||||||
|
|
||||||
|
h = malloc (sizeof *h);
|
||||||
|
if (h == NULL) {
|
||||||
|
@@ -635,8 +720,10 @@ vddk_open (int readonly)
|
||||||
|
goto err2;
|
||||||
|
}
|
||||||
|
|
||||||
|
- nbdkit_debug ("transport mode: %s",
|
||||||
|
- VixDiskLib_GetTransportMode (h->handle));
|
||||||
|
+ VDDK_CALL_START (VixDiskLib_GetTransportMode, "handle") {
|
||||||
|
+ transport_mode = VixDiskLib_GetTransportMode (h->handle);
|
||||||
|
+ } VDDK_CALL_END (VixDiskLib_GetTransportMode);
|
||||||
|
+ nbdkit_debug ("transport mode: %s", transport_mode);
|
||||||
|
|
||||||
|
return h;
|
||||||
|
|
||||||
|
diff --git a/tests/test-vddk-real.sh b/tests/test-vddk-real.sh
|
||||||
|
index f848db44..3c8b4262 100755
|
||||||
|
--- a/tests/test-vddk-real.sh
|
||||||
|
+++ b/tests/test-vddk-real.sh
|
||||||
|
@@ -79,7 +79,7 @@ if grep 'cannot open shared object file' test-vddk-real.log; then
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Now run nbdkit for the test.
|
||||||
|
-start_nbdkit -P $pid -U $sock vddk libdir="$vddkdir" $vmdk
|
||||||
|
+start_nbdkit -P $pid -U $sock -D vddk.stats=1 vddk libdir="$vddkdir" $vmdk
|
||||||
|
uri="nbd+unix:///?socket=$sock"
|
||||||
|
|
||||||
|
# VDDK < 6.0 did not support flush, so disable flush test there. Also
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
772
SOURCES/0023-cow-Make-the-block-size-configurable.patch
Normal file
772
SOURCES/0023-cow-Make-the-block-size-configurable.patch
Normal file
@ -0,0 +1,772 @@
|
|||||||
|
From 0be4847cdec9effd6128da03ea42a4953e5a6343 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Tue, 17 Aug 2021 22:03:11 +0100
|
||||||
|
Subject: [PATCH] cow: Make the block size configurable
|
||||||
|
|
||||||
|
Commit c1905b0a28 ("cache, cow: Use a 64K block size by default")
|
||||||
|
changed the nbdkit-cow-filter block size to 64K, but it was still a
|
||||||
|
fixed size. In contrast the cache filter allows the block size to be
|
||||||
|
adjusted.
|
||||||
|
|
||||||
|
Allow the block size in this filter to be adjusted up or down with a
|
||||||
|
new cow-block-size=N parameter.
|
||||||
|
|
||||||
|
When using the VDDK plugin, adjusting this setting can make a
|
||||||
|
difference. The following timings come from a modified virt-v2v which
|
||||||
|
sets cow-block-size and was used to convert from a VMware server to
|
||||||
|
-o null (this is also using cow-on-read=true):
|
||||||
|
|
||||||
|
cow-block-size=64K: 18m18
|
||||||
|
cow-block-size=256K: 14m13
|
||||||
|
cow-block-size=1M: 14m19
|
||||||
|
cow-block-size=4M: 37m33
|
||||||
|
|
||||||
|
As you can see it's not obvious how to choose a good block size, but
|
||||||
|
at least by allowing adjustment we can tune things.
|
||||||
|
|
||||||
|
(cherry picked from commit 7182c47d04d2b68005fceadefc0c14bfaa61a533)
|
||||||
|
---
|
||||||
|
filters/cow/blk.c | 35 +++----
|
||||||
|
filters/cow/blk.h | 5 -
|
||||||
|
filters/cow/cow.c | 150 +++++++++++++++++-------------
|
||||||
|
filters/cow/cow.h | 39 ++++++++
|
||||||
|
filters/cow/nbdkit-cow-filter.pod | 5 +
|
||||||
|
tests/Makefile.am | 2 +
|
||||||
|
tests/test-cow-block-size.sh | 72 ++++++++++++++
|
||||||
|
7 files changed, 221 insertions(+), 87 deletions(-)
|
||||||
|
create mode 100644 filters/cow/cow.h
|
||||||
|
create mode 100755 tests/test-cow-block-size.sh
|
||||||
|
|
||||||
|
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
|
||||||
|
index c22d5886..f9341dc1 100644
|
||||||
|
--- a/filters/cow/blk.c
|
||||||
|
+++ b/filters/cow/blk.c
|
||||||
|
@@ -99,6 +99,7 @@
|
||||||
|
#include "pwrite.h"
|
||||||
|
#include "utils.h"
|
||||||
|
|
||||||
|
+#include "cow.h"
|
||||||
|
#include "blk.h"
|
||||||
|
|
||||||
|
/* The temporary overlay. */
|
||||||
|
@@ -137,7 +138,7 @@ blk_init (void)
|
||||||
|
size_t len;
|
||||||
|
char *template;
|
||||||
|
|
||||||
|
- bitmap_init (&bm, BLKSIZE, 2 /* bits per block */);
|
||||||
|
+ bitmap_init (&bm, blksize, 2 /* bits per block */);
|
||||||
|
|
||||||
|
tmpdir = getenv ("TMPDIR");
|
||||||
|
if (!tmpdir)
|
||||||
|
@@ -199,7 +200,7 @@ blk_set_size (uint64_t new_size)
|
||||||
|
if (bitmap_resize (&bm, size) == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
- if (ftruncate (fd, ROUND_UP (size, BLKSIZE)) == -1) {
|
||||||
|
+ if (ftruncate (fd, ROUND_UP (size, blksize)) == -1) {
|
||||||
|
nbdkit_error ("ftruncate: %m");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
@@ -228,7 +229,7 @@ blk_read_multiple (nbdkit_next *next,
|
||||||
|
uint64_t blknum, uint64_t nrblocks,
|
||||||
|
uint8_t *block, bool cow_on_read, int *err)
|
||||||
|
{
|
||||||
|
- off_t offset = blknum * BLKSIZE;
|
||||||
|
+ off_t offset = blknum * blksize;
|
||||||
|
enum bm_entry state;
|
||||||
|
uint64_t b, runblocks;
|
||||||
|
|
||||||
|
@@ -262,8 +263,8 @@ blk_read_multiple (nbdkit_next *next,
|
||||||
|
if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
|
||||||
|
unsigned n, tail = 0;
|
||||||
|
|
||||||
|
- assert (BLKSIZE * runblocks <= UINT_MAX);
|
||||||
|
- n = BLKSIZE * runblocks;
|
||||||
|
+ assert (blksize * runblocks <= UINT_MAX);
|
||||||
|
+ n = blksize * runblocks;
|
||||||
|
|
||||||
|
if (offset + n > size) {
|
||||||
|
tail = offset + n - size;
|
||||||
|
@@ -288,7 +289,7 @@ blk_read_multiple (nbdkit_next *next,
|
||||||
|
"at offset %" PRIu64 " into the cache",
|
||||||
|
runblocks, offset);
|
||||||
|
|
||||||
|
- if (full_pwrite (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
||||||
|
+ if (full_pwrite (fd, block, blksize * runblocks, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
return -1;
|
||||||
|
@@ -298,14 +299,14 @@ blk_read_multiple (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
|
||||||
|
- if (full_pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
||||||
|
+ if (full_pread (fd, block, blksize * runblocks, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pread: %m");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else /* state == BLOCK_TRIMMED */ {
|
||||||
|
- memset (block, 0, BLKSIZE * runblocks);
|
||||||
|
+ memset (block, 0, blksize * runblocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If all done, return. */
|
||||||
|
@@ -316,7 +317,7 @@ blk_read_multiple (nbdkit_next *next,
|
||||||
|
return blk_read_multiple (next,
|
||||||
|
blknum + runblocks,
|
||||||
|
nrblocks - runblocks,
|
||||||
|
- block + BLKSIZE * runblocks,
|
||||||
|
+ block + blksize * runblocks,
|
||||||
|
cow_on_read, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -333,9 +334,9 @@ blk_cache (nbdkit_next *next,
|
||||||
|
{
|
||||||
|
/* XXX Could make this lock more fine-grained with some thought. */
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
||||||
|
- off_t offset = blknum * BLKSIZE;
|
||||||
|
+ off_t offset = blknum * blksize;
|
||||||
|
enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_ALLOCATED);
|
||||||
|
- unsigned n = BLKSIZE, tail = 0;
|
||||||
|
+ unsigned n = blksize, tail = 0;
|
||||||
|
|
||||||
|
if (offset + n > size) {
|
||||||
|
tail = offset + n - size;
|
||||||
|
@@ -348,7 +349,7 @@ blk_cache (nbdkit_next *next,
|
||||||
|
|
||||||
|
if (state == BLOCK_ALLOCATED) {
|
||||||
|
#if HAVE_POSIX_FADVISE
|
||||||
|
- int r = posix_fadvise (fd, offset, BLKSIZE, POSIX_FADV_WILLNEED);
|
||||||
|
+ int r = posix_fadvise (fd, offset, blksize, POSIX_FADV_WILLNEED);
|
||||||
|
if (r) {
|
||||||
|
errno = r;
|
||||||
|
nbdkit_error ("posix_fadvise: %m");
|
||||||
|
@@ -373,7 +374,7 @@ blk_cache (nbdkit_next *next,
|
||||||
|
memset (block + n, 0, tail);
|
||||||
|
|
||||||
|
if (mode == BLK_CACHE_COW) {
|
||||||
|
- if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
|
||||||
|
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
return -1;
|
||||||
|
@@ -386,13 +387,13 @@ blk_cache (nbdkit_next *next,
|
||||||
|
int
|
||||||
|
blk_write (uint64_t blknum, const uint8_t *block, int *err)
|
||||||
|
{
|
||||||
|
- off_t offset = blknum * BLKSIZE;
|
||||||
|
+ off_t offset = blknum * blksize;
|
||||||
|
|
||||||
|
if (cow_debug_verbose)
|
||||||
|
nbdkit_debug ("cow: blk_write block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
- if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
|
||||||
|
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("pwrite: %m");
|
||||||
|
return -1;
|
||||||
|
@@ -407,14 +408,14 @@ blk_write (uint64_t blknum, const uint8_t *block, int *err)
|
||||||
|
int
|
||||||
|
blk_trim (uint64_t blknum, int *err)
|
||||||
|
{
|
||||||
|
- off_t offset = blknum * BLKSIZE;
|
||||||
|
+ off_t offset = blknum * blksize;
|
||||||
|
|
||||||
|
if (cow_debug_verbose)
|
||||||
|
nbdkit_debug ("cow: blk_trim block %" PRIu64 " (offset %" PRIu64 ")",
|
||||||
|
blknum, (uint64_t) offset);
|
||||||
|
|
||||||
|
/* XXX As an optimization we could punch a whole in the overlay
|
||||||
|
- * here. However it's not trivial since BLKSIZE is unrelated to the
|
||||||
|
+ * here. However it's not trivial since blksize is unrelated to the
|
||||||
|
* overlay filesystem block size.
|
||||||
|
*/
|
||||||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
||||||
|
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
|
||||||
|
index b7e6f092..62fb5416 100644
|
||||||
|
--- a/filters/cow/blk.h
|
||||||
|
+++ b/filters/cow/blk.h
|
||||||
|
@@ -33,11 +33,6 @@
|
||||||
|
#ifndef NBDKIT_BLK_H
|
||||||
|
#define NBDKIT_BLK_H
|
||||||
|
|
||||||
|
-/* Size of a block in the overlay. A 4K block size means that we need
|
||||||
|
- * 64 MB of memory to store the bitmap for a 1 TB underlying image.
|
||||||
|
- */
|
||||||
|
-#define BLKSIZE 65536
|
||||||
|
-
|
||||||
|
/* Initialize the overlay and bitmap. */
|
||||||
|
extern int blk_init (void);
|
||||||
|
|
||||||
|
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
|
||||||
|
index 6efb39f2..1c62c857 100644
|
||||||
|
--- a/filters/cow/cow.c
|
||||||
|
+++ b/filters/cow/cow.c
|
||||||
|
@@ -40,6 +40,7 @@
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <errno.h>
|
||||||
|
+#include <limits.h>
|
||||||
|
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
|
@@ -47,9 +48,11 @@
|
||||||
|
|
||||||
|
#include "cleanup.h"
|
||||||
|
#include "isaligned.h"
|
||||||
|
+#include "ispowerof2.h"
|
||||||
|
#include "minmax.h"
|
||||||
|
#include "rounding.h"
|
||||||
|
|
||||||
|
+#include "cow.h"
|
||||||
|
#include "blk.h"
|
||||||
|
|
||||||
|
/* Read-modify-write requests are serialized through this global lock.
|
||||||
|
@@ -58,6 +61,8 @@
|
||||||
|
*/
|
||||||
|
static pthread_mutex_t rmw_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
+unsigned blksize = 65536; /* block size */
|
||||||
|
+
|
||||||
|
static bool cow_on_cache;
|
||||||
|
|
||||||
|
/* Cache on read ("cow-on-read") mode. */
|
||||||
|
@@ -69,13 +74,6 @@ extern enum cor_mode {
|
||||||
|
enum cor_mode cor_mode = COR_OFF;
|
||||||
|
const char *cor_path;
|
||||||
|
|
||||||
|
-static void
|
||||||
|
-cow_load (void)
|
||||||
|
-{
|
||||||
|
- if (blk_init () == -1)
|
||||||
|
- exit (EXIT_FAILURE);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static void
|
||||||
|
cow_unload (void)
|
||||||
|
{
|
||||||
|
@@ -86,7 +84,19 @@ static int
|
||||||
|
cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
const char *key, const char *value)
|
||||||
|
{
|
||||||
|
- if (strcmp (key, "cow-on-cache") == 0) {
|
||||||
|
+ if (strcmp (key, "cow-block-size") == 0) {
|
||||||
|
+ int64_t r = nbdkit_parse_size (value);
|
||||||
|
+ if (r == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ if (r <= 4096 || r > UINT_MAX || !is_power_of_2 (r)) {
|
||||||
|
+ nbdkit_error ("cow-block-size is out of range (4096..2G) "
|
||||||
|
+ "or not a power of 2");
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ blksize = r;
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+ else if (strcmp (key, "cow-on-cache") == 0) {
|
||||||
|
int r;
|
||||||
|
|
||||||
|
r = nbdkit_parse_bool (value);
|
||||||
|
@@ -114,9 +124,19 @@ cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||||||
|
}
|
||||||
|
|
||||||
|
#define cow_config_help \
|
||||||
|
+ "cow-block-size=<N> Set COW block size.\n" \
|
||||||
|
"cow-on-cache=<BOOL> Copy cache (prefetch) requests to the overlay.\n" \
|
||||||
|
"cow-on-read=<BOOL>|/PATH Copy read requests to the overlay."
|
||||||
|
|
||||||
|
+static int
|
||||||
|
+cow_get_ready (int thread_model)
|
||||||
|
+{
|
||||||
|
+ if (blk_init () == -1)
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Decide if cow-on-read is currently on or off. */
|
||||||
|
bool
|
||||||
|
cow_on_read (void)
|
||||||
|
@@ -249,8 +269,8 @@ cow_pread (nbdkit_next *next,
|
||||||
|
uint64_t blknum, blkoffs, nrblocks;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
- if (!IS_ALIGNED (count | offset, BLKSIZE)) {
|
||||||
|
- block = malloc (BLKSIZE);
|
||||||
|
+ if (!IS_ALIGNED (count | offset, blksize)) {
|
||||||
|
+ block = malloc (blksize);
|
||||||
|
if (block == NULL) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("malloc: %m");
|
||||||
|
@@ -258,12 +278,12 @@ cow_pread (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- blknum = offset / BLKSIZE; /* block number */
|
||||||
|
- blkoffs = offset % BLKSIZE; /* offset within the block */
|
||||||
|
+ blknum = offset / blksize; /* block number */
|
||||||
|
+ blkoffs = offset % blksize; /* offset within the block */
|
||||||
|
|
||||||
|
/* Unaligned head */
|
||||||
|
if (blkoffs) {
|
||||||
|
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
|
||||||
|
+ uint64_t n = MIN (blksize - blkoffs, count);
|
||||||
|
|
||||||
|
assert (block);
|
||||||
|
r = blk_read (next, blknum, block, cow_on_read (), err);
|
||||||
|
@@ -279,15 +299,15 @@ cow_pread (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Aligned body */
|
||||||
|
- nrblocks = count / BLKSIZE;
|
||||||
|
+ nrblocks = count / blksize;
|
||||||
|
if (nrblocks > 0) {
|
||||||
|
r = blk_read_multiple (next, blknum, nrblocks, buf, cow_on_read (), err);
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
- buf += nrblocks * BLKSIZE;
|
||||||
|
- count -= nrblocks * BLKSIZE;
|
||||||
|
- offset += nrblocks * BLKSIZE;
|
||||||
|
+ buf += nrblocks * blksize;
|
||||||
|
+ count -= nrblocks * blksize;
|
||||||
|
+ offset += nrblocks * blksize;
|
||||||
|
blknum += nrblocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -314,8 +334,8 @@ cow_pwrite (nbdkit_next *next,
|
||||||
|
uint64_t blknum, blkoffs;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
- if (!IS_ALIGNED (count | offset, BLKSIZE)) {
|
||||||
|
- block = malloc (BLKSIZE);
|
||||||
|
+ if (!IS_ALIGNED (count | offset, blksize)) {
|
||||||
|
+ block = malloc (blksize);
|
||||||
|
if (block == NULL) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("malloc: %m");
|
||||||
|
@@ -323,12 +343,12 @@ cow_pwrite (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- blknum = offset / BLKSIZE; /* block number */
|
||||||
|
- blkoffs = offset % BLKSIZE; /* offset within the block */
|
||||||
|
+ blknum = offset / blksize; /* block number */
|
||||||
|
+ blkoffs = offset % blksize; /* offset within the block */
|
||||||
|
|
||||||
|
/* Unaligned head */
|
||||||
|
if (blkoffs) {
|
||||||
|
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
|
||||||
|
+ uint64_t n = MIN (blksize - blkoffs, count);
|
||||||
|
|
||||||
|
/* Do a read-modify-write operation on the current block.
|
||||||
|
* Hold the rmw_lock over the whole operation.
|
||||||
|
@@ -350,14 +370,14 @@ cow_pwrite (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Aligned body */
|
||||||
|
- while (count >= BLKSIZE) {
|
||||||
|
+ while (count >= blksize) {
|
||||||
|
r = blk_write (blknum, buf, err);
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
- buf += BLKSIZE;
|
||||||
|
- count -= BLKSIZE;
|
||||||
|
- offset += BLKSIZE;
|
||||||
|
+ buf += blksize;
|
||||||
|
+ count -= blksize;
|
||||||
|
+ offset += blksize;
|
||||||
|
blknum++;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -397,19 +417,19 @@ cow_zero (nbdkit_next *next,
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- block = malloc (BLKSIZE);
|
||||||
|
+ block = malloc (blksize);
|
||||||
|
if (block == NULL) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("malloc: %m");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- blknum = offset / BLKSIZE; /* block number */
|
||||||
|
- blkoffs = offset % BLKSIZE; /* offset within the block */
|
||||||
|
+ blknum = offset / blksize; /* block number */
|
||||||
|
+ blkoffs = offset % blksize; /* offset within the block */
|
||||||
|
|
||||||
|
/* Unaligned head */
|
||||||
|
if (blkoffs) {
|
||||||
|
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
|
||||||
|
+ uint64_t n = MIN (blksize - blkoffs, count);
|
||||||
|
|
||||||
|
/* Do a read-modify-write operation on the current block.
|
||||||
|
* Hold the rmw_lock over the whole operation.
|
||||||
|
@@ -429,9 +449,9 @@ cow_zero (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Aligned body */
|
||||||
|
- if (count >= BLKSIZE)
|
||||||
|
- memset (block, 0, BLKSIZE);
|
||||||
|
- while (count >= BLKSIZE) {
|
||||||
|
+ if (count >= blksize)
|
||||||
|
+ memset (block, 0, blksize);
|
||||||
|
+ while (count >= blksize) {
|
||||||
|
/* XXX There is the possibility of optimizing this: since this loop is
|
||||||
|
* writing a whole, aligned block, we should use FALLOC_FL_ZERO_RANGE.
|
||||||
|
*/
|
||||||
|
@@ -439,8 +459,8 @@ cow_zero (nbdkit_next *next,
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
- count -= BLKSIZE;
|
||||||
|
- offset += BLKSIZE;
|
||||||
|
+ count -= blksize;
|
||||||
|
+ offset += blksize;
|
||||||
|
blknum++;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -471,8 +491,8 @@ cow_trim (nbdkit_next *next,
|
||||||
|
uint64_t blknum, blkoffs;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
- if (!IS_ALIGNED (count | offset, BLKSIZE)) {
|
||||||
|
- block = malloc (BLKSIZE);
|
||||||
|
+ if (!IS_ALIGNED (count | offset, blksize)) {
|
||||||
|
+ block = malloc (blksize);
|
||||||
|
if (block == NULL) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("malloc: %m");
|
||||||
|
@@ -480,12 +500,12 @@ cow_trim (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- blknum = offset / BLKSIZE; /* block number */
|
||||||
|
- blkoffs = offset % BLKSIZE; /* offset within the block */
|
||||||
|
+ blknum = offset / blksize; /* block number */
|
||||||
|
+ blkoffs = offset % blksize; /* offset within the block */
|
||||||
|
|
||||||
|
/* Unaligned head */
|
||||||
|
if (blkoffs) {
|
||||||
|
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
|
||||||
|
+ uint64_t n = MIN (blksize - blkoffs, count);
|
||||||
|
|
||||||
|
/* Do a read-modify-write operation on the current block.
|
||||||
|
* Hold the lock over the whole operation.
|
||||||
|
@@ -505,13 +525,13 @@ cow_trim (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Aligned body */
|
||||||
|
- while (count >= BLKSIZE) {
|
||||||
|
+ while (count >= blksize) {
|
||||||
|
r = blk_trim (blknum, err);
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
- count -= BLKSIZE;
|
||||||
|
- offset += BLKSIZE;
|
||||||
|
+ count -= blksize;
|
||||||
|
+ offset += blksize;
|
||||||
|
blknum++;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -568,22 +588,22 @@ cow_cache (nbdkit_next *next,
|
||||||
|
mode = BLK_CACHE_COW;
|
||||||
|
|
||||||
|
assert (!flags);
|
||||||
|
- block = malloc (BLKSIZE);
|
||||||
|
+ block = malloc (blksize);
|
||||||
|
if (block == NULL) {
|
||||||
|
*err = errno;
|
||||||
|
nbdkit_error ("malloc: %m");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- blknum = offset / BLKSIZE; /* block number */
|
||||||
|
- blkoffs = offset % BLKSIZE; /* offset within the block */
|
||||||
|
+ blknum = offset / blksize; /* block number */
|
||||||
|
+ blkoffs = offset % blksize; /* offset within the block */
|
||||||
|
|
||||||
|
/* Unaligned head */
|
||||||
|
remaining += blkoffs;
|
||||||
|
offset -= blkoffs;
|
||||||
|
|
||||||
|
/* Unaligned tail */
|
||||||
|
- remaining = ROUND_UP (remaining, BLKSIZE);
|
||||||
|
+ remaining = ROUND_UP (remaining, blksize);
|
||||||
|
|
||||||
|
/* Aligned body */
|
||||||
|
while (remaining) {
|
||||||
|
@@ -591,8 +611,8 @@ cow_cache (nbdkit_next *next,
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
- remaining -= BLKSIZE;
|
||||||
|
- offset += BLKSIZE;
|
||||||
|
+ remaining -= blksize;
|
||||||
|
+ offset += blksize;
|
||||||
|
blknum++;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -616,13 +636,13 @@ cow_extents (nbdkit_next *next,
|
||||||
|
* value so rounding up is safe here.
|
||||||
|
*/
|
||||||
|
end = offset + count;
|
||||||
|
- offset = ROUND_DOWN (offset, BLKSIZE);
|
||||||
|
- end = ROUND_UP (end, BLKSIZE);
|
||||||
|
+ offset = ROUND_DOWN (offset, blksize);
|
||||||
|
+ end = ROUND_UP (end, blksize);
|
||||||
|
count = end - offset;
|
||||||
|
- blknum = offset / BLKSIZE;
|
||||||
|
+ blknum = offset / blksize;
|
||||||
|
|
||||||
|
- assert (IS_ALIGNED (offset, BLKSIZE));
|
||||||
|
- assert (IS_ALIGNED (count, BLKSIZE));
|
||||||
|
+ assert (IS_ALIGNED (offset, blksize));
|
||||||
|
+ assert (IS_ALIGNED (count, blksize));
|
||||||
|
assert (count > 0); /* We must make forward progress. */
|
||||||
|
|
||||||
|
while (count > 0) {
|
||||||
|
@@ -634,7 +654,7 @@ cow_extents (nbdkit_next *next,
|
||||||
|
/* Present in the overlay. */
|
||||||
|
if (present) {
|
||||||
|
e.offset = offset;
|
||||||
|
- e.length = BLKSIZE;
|
||||||
|
+ e.length = blksize;
|
||||||
|
|
||||||
|
if (trimmed)
|
||||||
|
e.type = NBDKIT_EXTENT_HOLE|NBDKIT_EXTENT_ZERO;
|
||||||
|
@@ -647,8 +667,8 @@ cow_extents (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
|
||||||
|
blknum++;
|
||||||
|
- offset += BLKSIZE;
|
||||||
|
- count -= BLKSIZE;
|
||||||
|
+ offset += blksize;
|
||||||
|
+ count -= blksize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not present in the overlay, but we can ask the plugin. */
|
||||||
|
@@ -667,12 +687,12 @@ cow_extents (nbdkit_next *next,
|
||||||
|
* (range_count), but count is a 64 bit quantity, so don't
|
||||||
|
* overflow range_count here.
|
||||||
|
*/
|
||||||
|
- if (range_count >= UINT32_MAX - BLKSIZE + 1) break;
|
||||||
|
+ if (range_count >= UINT32_MAX - blksize + 1) break;
|
||||||
|
|
||||||
|
blknum++;
|
||||||
|
- offset += BLKSIZE;
|
||||||
|
- count -= BLKSIZE;
|
||||||
|
- range_count += BLKSIZE;
|
||||||
|
+ offset += blksize;
|
||||||
|
+ count -= blksize;
|
||||||
|
+ range_count += blksize;
|
||||||
|
|
||||||
|
if (count == 0) break;
|
||||||
|
blk_status (blknum, &present, &trimmed);
|
||||||
|
@@ -706,7 +726,7 @@ cow_extents (nbdkit_next *next,
|
||||||
|
/* Otherwise assume the block is non-sparse. */
|
||||||
|
else {
|
||||||
|
e.offset = offset;
|
||||||
|
- e.length = BLKSIZE;
|
||||||
|
+ e.length = blksize;
|
||||||
|
e.type = 0;
|
||||||
|
|
||||||
|
if (nbdkit_add_extent (extents, e.offset, e.length, e.type) == -1) {
|
||||||
|
@@ -715,8 +735,8 @@ cow_extents (nbdkit_next *next,
|
||||||
|
}
|
||||||
|
|
||||||
|
blknum++;
|
||||||
|
- offset += BLKSIZE;
|
||||||
|
- count -= BLKSIZE;
|
||||||
|
+ offset += blksize;
|
||||||
|
+ count -= blksize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the caller only wanted the first extent, and we've managed
|
||||||
|
@@ -734,11 +754,11 @@ cow_extents (nbdkit_next *next,
|
||||||
|
static struct nbdkit_filter filter = {
|
||||||
|
.name = "cow",
|
||||||
|
.longname = "nbdkit copy-on-write (COW) filter",
|
||||||
|
- .load = cow_load,
|
||||||
|
.unload = cow_unload,
|
||||||
|
.open = cow_open,
|
||||||
|
.config = cow_config,
|
||||||
|
.config_help = cow_config_help,
|
||||||
|
+ .get_ready = cow_get_ready,
|
||||||
|
.prepare = cow_prepare,
|
||||||
|
.get_size = cow_get_size,
|
||||||
|
.can_write = cow_can_write,
|
||||||
|
diff --git a/filters/cow/cow.h b/filters/cow/cow.h
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000..d46dbe91
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/filters/cow/cow.h
|
||||||
|
@@ -0,0 +1,39 @@
|
||||||
|
+/* nbdkit
|
||||||
|
+ * Copyright (C) 2018-2021 Red Hat Inc.
|
||||||
|
+ *
|
||||||
|
+ * Redistribution and use in source and binary forms, with or without
|
||||||
|
+ * modification, are permitted provided that the following conditions are
|
||||||
|
+ * met:
|
||||||
|
+ *
|
||||||
|
+ * * Redistributions of source code must retain the above copyright
|
||||||
|
+ * notice, this list of conditions and the following disclaimer.
|
||||||
|
+ *
|
||||||
|
+ * * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+ * notice, this list of conditions and the following disclaimer in the
|
||||||
|
+ * documentation and/or other materials provided with the distribution.
|
||||||
|
+ *
|
||||||
|
+ * * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+ * used to endorse or promote products derived from this software without
|
||||||
|
+ * specific prior written permission.
|
||||||
|
+ *
|
||||||
|
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+ * SUCH DAMAGE.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#ifndef NBDKIT_COW_H
|
||||||
|
+#define NBDKIT_COW_H
|
||||||
|
+
|
||||||
|
+/* Size of a block in the cache. */
|
||||||
|
+extern unsigned blksize;
|
||||||
|
+
|
||||||
|
+#endif /* NBDKIT_COW_H */
|
||||||
|
diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod
|
||||||
|
index 7f861140..997c9097 100644
|
||||||
|
--- a/filters/cow/nbdkit-cow-filter.pod
|
||||||
|
+++ b/filters/cow/nbdkit-cow-filter.pod
|
||||||
|
@@ -5,6 +5,7 @@ nbdkit-cow-filter - nbdkit copy-on-write (COW) filter
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
nbdkit --filter=cow plugin [plugin-args...]
|
||||||
|
+ [cow-block-size=N]
|
||||||
|
[cow-on-cache=false|true]
|
||||||
|
[cow-on-read=false|true|/PATH]
|
||||||
|
|
||||||
|
@@ -42,6 +43,10 @@ serve the same data to each client.
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
+=item B<cow-block-size=>N
|
||||||
|
+
|
||||||
|
+Set the block size used by the filter. The default is 64K.
|
||||||
|
+
|
||||||
|
=item B<cow-on-cache=false>
|
||||||
|
|
||||||
|
Do not save data from cache (prefetch) requests in the overlay. This
|
||||||
|
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
||||||
|
index e61c5829..d93f848f 100644
|
||||||
|
--- a/tests/Makefile.am
|
||||||
|
+++ b/tests/Makefile.am
|
||||||
|
@@ -1404,6 +1404,7 @@ EXTRA_DIST += \
|
||||||
|
if HAVE_MKE2FS_WITH_D
|
||||||
|
TESTS += \
|
||||||
|
test-cow.sh \
|
||||||
|
+ test-cow-block-size.sh \
|
||||||
|
test-cow-extents1.sh \
|
||||||
|
test-cow-extents2.sh \
|
||||||
|
test-cow-extents-large.sh \
|
||||||
|
@@ -1415,6 +1416,7 @@ endif
|
||||||
|
TESTS += test-cow-null.sh
|
||||||
|
EXTRA_DIST += \
|
||||||
|
test-cow.sh \
|
||||||
|
+ test-cow-block-size.sh \
|
||||||
|
test-cow-extents1.sh \
|
||||||
|
test-cow-extents2.sh \
|
||||||
|
test-cow-extents-large.sh \
|
||||||
|
diff --git a/tests/test-cow-block-size.sh b/tests/test-cow-block-size.sh
|
||||||
|
new file mode 100755
|
||||||
|
index 00000000..6de1c068
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tests/test-cow-block-size.sh
|
||||||
|
@@ -0,0 +1,72 @@
|
||||||
|
+#!/usr/bin/env bash
|
||||||
|
+# nbdkit
|
||||||
|
+# Copyright (C) 2018-2021 Red Hat Inc.
|
||||||
|
+#
|
||||||
|
+# Redistribution and use in source and binary forms, with or without
|
||||||
|
+# modification, are permitted provided that the following conditions are
|
||||||
|
+# met:
|
||||||
|
+#
|
||||||
|
+# * Redistributions of source code must retain the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer.
|
||||||
|
+#
|
||||||
|
+# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+# notice, this list of conditions and the following disclaimer in the
|
||||||
|
+# documentation and/or other materials provided with the distribution.
|
||||||
|
+#
|
||||||
|
+# * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+# used to endorse or promote products derived from this software without
|
||||||
|
+# specific prior written permission.
|
||||||
|
+#
|
||||||
|
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+# SUCH DAMAGE.
|
||||||
|
+
|
||||||
|
+source ./functions.sh
|
||||||
|
+set -e
|
||||||
|
+set -x
|
||||||
|
+
|
||||||
|
+requires_plugin linuxdisk
|
||||||
|
+requires guestfish --version
|
||||||
|
+requires nbdcopy --version
|
||||||
|
+requires qemu-img --version
|
||||||
|
+
|
||||||
|
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
|
||||||
|
+files="cow-block-size-base.img $sock cow-block-size.pid"
|
||||||
|
+rm -f $files
|
||||||
|
+cleanup_fn rm -f $files
|
||||||
|
+
|
||||||
|
+# Create a base image which is partitioned with an empty filesystem.
|
||||||
|
+rm -rf cow-block-size.d
|
||||||
|
+mkdir cow-block-size.d
|
||||||
|
+cleanup_fn rm -rf cow-block-size.d
|
||||||
|
+nbdkit -fv -U - linuxdisk cow-block-size.d size=100M \
|
||||||
|
+ --run 'nbdcopy "$uri" cow-block-size-base.img'
|
||||||
|
+lastmod="$(stat -c "%y" cow-block-size-base.img)"
|
||||||
|
+
|
||||||
|
+# Run nbdkit with a COW overlay, 4M block size and copy on read.
|
||||||
|
+start_nbdkit -P cow-block-size.pid -U $sock \
|
||||||
|
+ --filter=cow file cow-block-size-base.img \
|
||||||
|
+ cow-block-size=4M cow-on-read=true
|
||||||
|
+
|
||||||
|
+# Write some data into the overlay.
|
||||||
|
+guestfish --format=raw -a "nbd://?socket=$sock" -m /dev/sda1 <<EOF
|
||||||
|
+ fill-pattern "abcde" 128K /large
|
||||||
|
+ write /hello "hello, world"
|
||||||
|
+EOF
|
||||||
|
+
|
||||||
|
+# The original file must not be modified.
|
||||||
|
+currmod="$(stat -c "%y" cow-block-size-base.img)"
|
||||||
|
+
|
||||||
|
+if [ "$lastmod" != "$currmod" ]; then
|
||||||
|
+ echo "$0: FAILED last modified time of base file changed"
|
||||||
|
+ exit 1
|
||||||
|
+fi
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
26
SOURCES/0024-cow-Ship-cow.h-header.patch
Normal file
26
SOURCES/0024-cow-Ship-cow.h-header.patch
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
From 8d2ef02bd4de988e20ad1efba8038d311cd59665 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Blake <eblake@redhat.com>
|
||||||
|
Date: Wed, 18 Aug 2021 19:16:43 -0500
|
||||||
|
Subject: [PATCH] cow: Ship cow.h header
|
||||||
|
|
||||||
|
Fixes: 7182c47d0 (cow: Make the block size configurable)
|
||||||
|
(cherry picked from commit 75ff1b8b1afb3744b21a306c62e4973c90d386be)
|
||||||
|
---
|
||||||
|
filters/cow/Makefile.am | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
|
||||||
|
diff --git a/filters/cow/Makefile.am b/filters/cow/Makefile.am
|
||||||
|
index a80ccd8f..88cda497 100644
|
||||||
|
--- a/filters/cow/Makefile.am
|
||||||
|
+++ b/filters/cow/Makefile.am
|
||||||
|
@@ -39,6 +39,7 @@ nbdkit_cow_filter_la_SOURCES = \
|
||||||
|
blk.c \
|
||||||
|
blk.h \
|
||||||
|
cow.c \
|
||||||
|
+ cow.h \
|
||||||
|
$(top_srcdir)/include/nbdkit-filter.h \
|
||||||
|
$(NULL)
|
||||||
|
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
55
SOURCES/copy-patches.sh
Executable file
55
SOURCES/copy-patches.sh
Executable file
@ -0,0 +1,55 @@
|
|||||||
|
#!/bin/bash -
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Maintainer script to copy patches from the git repo to the current
|
||||||
|
# directory. Use it like this:
|
||||||
|
# ./copy-patches.sh
|
||||||
|
|
||||||
|
rhel_version=9.0
|
||||||
|
|
||||||
|
# Check we're in the right directory.
|
||||||
|
if [ ! -f nbdkit.spec ]; then
|
||||||
|
echo "$0: run this from the directory containing 'nbdkit.spec'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
git_checkout=$HOME/d/nbdkit-rhel-$rhel_version
|
||||||
|
if [ ! -d $git_checkout ]; then
|
||||||
|
echo "$0: $git_checkout does not exist"
|
||||||
|
echo "This script is only for use by the maintainer when preparing a"
|
||||||
|
echo "nbdkit release on RHEL."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get the base version of nbdkit.
|
||||||
|
version=`grep '^Version:' nbdkit.spec | awk '{print $2}'`
|
||||||
|
tag="v$version"
|
||||||
|
|
||||||
|
# Remove any existing patches.
|
||||||
|
git rm -f [0-9]*.patch ||:
|
||||||
|
rm -f [0-9]*.patch
|
||||||
|
|
||||||
|
# Get the patches.
|
||||||
|
(cd $git_checkout; rm -f [0-9]*.patch; git format-patch -N $tag)
|
||||||
|
mv $git_checkout/[0-9]*.patch .
|
||||||
|
|
||||||
|
# Remove any not to be applied.
|
||||||
|
rm -f *NOT-FOR-RPM*.patch
|
||||||
|
|
||||||
|
# Add the patches.
|
||||||
|
git add [0-9]*.patch
|
||||||
|
|
||||||
|
# Print out the patch lines.
|
||||||
|
echo
|
||||||
|
echo "--- Copy the following text into nbdkit.spec file"
|
||||||
|
echo
|
||||||
|
|
||||||
|
echo "# Patches."
|
||||||
|
for f in [0-9]*.patch; do
|
||||||
|
n=`echo $f | awk -F- '{print $1}'`
|
||||||
|
echo "Patch$n: $f"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "--- End of text"
|
11
SOURCES/nbdkit-1.26.5.tar.gz.sig
Normal file
11
SOURCES/nbdkit-1.26.5.tar.gz.sig
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
-----BEGIN PGP SIGNATURE-----
|
||||||
|
|
||||||
|
iQEzBAABCAAdFiEEccLMIrHEYCkn0vOqp6FrSiUnQ2oFAmEdoO0ACgkQp6FrSiUn
|
||||||
|
Q2r9AAf/VN10pDdAapr8W5bPNbgMIJ4dGtGl3sbMKg5mUVNkWwReUpiiUInZwnQW
|
||||||
|
I+TUjMkvB2jJGkruldgipuRNbAAGROZV3JugE2fMl8qQC1TvL/RMAOHcfKCswzfv
|
||||||
|
pVik1FmvpI88litCw05csH30TEA1BtFM0TlOR9xoeDkV9e2IUtWcxFJYP6RN5COr
|
||||||
|
NgTMfouxHWuR+FlVpXkvPl4aOuCavpplobcS0OaKNrqFXMhN+qcKjYgKazUKPB9C
|
||||||
|
TEExW8/CKTBVdaNpMbcLW/VBEaE85c3mv0xU26YKcEkj+OPAl4AzJ4Z+0MSDXk7t
|
||||||
|
3nrWSxPX67gBU5XDtKZ1IMUttydvzA==
|
||||||
|
=NyyV
|
||||||
|
-----END PGP SIGNATURE-----
|
2346
SPECS/nbdkit.spec
Normal file
2346
SPECS/nbdkit.spec
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user