import nbdkit-1.26.5-1.el9

This commit is contained in:
CentOS Sources 2021-11-02 16:57:40 -04:00 committed by Stepan Oksanichenko
commit ed82013cf7
29 changed files with 6794 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
SOURCES/libguestfs.keyring
SOURCES/nbdkit-1.26.5.tar.gz

2
.nbdkit.metadata Normal file
View File

@ -0,0 +1,2 @@
cc1b37b9cfafa515aab3eefd345ecc59aac2ce7b SOURCES/libguestfs.keyring
52a221954f374f2a3f1adcc5c5e3e6f7332d906d SOURCES/nbdkit-1.26.5.tar.gz

View File

@ -0,0 +1,39 @@
From 89ef17c90996c0e212e3a17c8d26ff930ab464ea Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Mon, 16 Aug 2021 13:43:29 -0500
Subject: [PATCH] server: reset meta context replies on starttls
Related to CVE-2021-3716, but not as severe. No compliant client will
send NBD_CMD_BLOCK_STATUS unless it first negotiates
NBD_OPT_SET_META_CONTEXT. If an attacker injects a premature
SET_META_CONTEXT, either the client will never notice (because it
never uses BLOCK_STATUS), or the client will overwrite the attacker's
attempt with the client's own SET_META_CONTEXT request after
encryption is enabled. So I don't class this as having the potential
to trigger denial-of-service due to any protocol mismatch between
compliant client and server (I don't care what happens with
non-compliant clients).
Fixes: 26455d45 (server: protocol: Implement Block Status "base:allocation".)
(cherry picked from commit 6c5faac6a37077cf2366388a80862bb00616d0d8)
---
server/protocol-handshake-newstyle.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/server/protocol-handshake-newstyle.c b/server/protocol-handshake-newstyle.c
index 7e6b7b1b..79b2c8ce 100644
--- a/server/protocol-handshake-newstyle.c
+++ b/server/protocol-handshake-newstyle.c
@@ -497,6 +497,9 @@ negotiate_handshake_newstyle_options (void)
debug ("using TLS on this connection");
/* Wipe out any cached state. */
conn->structured_replies = false;
+ free (conn->exportname_from_set_meta_context);
+ conn->exportname_from_set_meta_context = NULL;
+ conn->meta_context_base_allocation = false;
for_each_backend (b) {
free (conn->default_exportname[b->i]);
conn->default_exportname[b->i] = NULL;
--
2.31.1

View File

@ -0,0 +1,124 @@
From 4b576a8e0eb99ec1a79ca432350fb7ac27a5c089 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 26 Jul 2021 11:59:43 +0100
Subject: [PATCH] cache: Reduce verbosity of debugging
The cache filter is very verbose in its debugging. Reduce the default
level. Use -D cache.verbose=1 to restore original debugging.
Compare commit 745a0f13662031c2b9c9b69f62b4ae3a6b2f38f0.
(cherry picked from commit 6be735edf7d5fb3fb8350c72e6d9525badbab14d)
---
filters/cache/blk.c | 53 +++++++++++++++++++++++++++------------------
1 file changed, 32 insertions(+), 21 deletions(-)
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index 12e8407e..f52f30e3 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -93,6 +93,9 @@ enum bm_entry {
BLOCK_DIRTY = 3,
};
+/* Extra debugging (-D cache.verbose=1). */
+NBDKIT_DLL_PUBLIC int cache_debug_verbose = 0;
+
int
blk_init (void)
{
@@ -199,12 +202,14 @@ blk_read (nbdkit_next *next,
reclaim (fd, &bm);
- nbdkit_debug ("cache: blk_read block %" PRIu64 " (offset %" PRIu64 ") is %s",
- blknum, (uint64_t) offset,
- state == BLOCK_NOT_CACHED ? "not cached" :
- state == BLOCK_CLEAN ? "clean" :
- state == BLOCK_DIRTY ? "dirty" :
- "unknown");
+ if (cache_debug_verbose)
+ nbdkit_debug ("cache: blk_read block %" PRIu64
+ " (offset %" PRIu64 ") is %s",
+ blknum, (uint64_t) offset,
+ state == BLOCK_NOT_CACHED ? "not cached" :
+ state == BLOCK_CLEAN ? "clean" :
+ state == BLOCK_DIRTY ? "dirty" :
+ "unknown");
if (state == BLOCK_NOT_CACHED) { /* Read underlying plugin. */
unsigned n = blksize, tail = 0;
@@ -225,9 +230,10 @@ blk_read (nbdkit_next *next,
/* If cache-on-read, copy the block to the cache. */
if (cache_on_read) {
- nbdkit_debug ("cache: cache-on-read block %" PRIu64
- " (offset %" PRIu64 ")",
- blknum, (uint64_t) offset);
+ if (cache_debug_verbose)
+ nbdkit_debug ("cache: cache-on-read block %" PRIu64
+ " (offset %" PRIu64 ")",
+ blknum, (uint64_t) offset);
if (pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
@@ -259,12 +265,14 @@ blk_cache (nbdkit_next *next,
reclaim (fd, &bm);
- nbdkit_debug ("cache: blk_cache block %" PRIu64 " (offset %" PRIu64 ") is %s",
- blknum, (uint64_t) offset,
- state == BLOCK_NOT_CACHED ? "not cached" :
- state == BLOCK_CLEAN ? "clean" :
- state == BLOCK_DIRTY ? "dirty" :
- "unknown");
+ if (cache_debug_verbose)
+ nbdkit_debug ("cache: blk_cache block %" PRIu64
+ " (offset %" PRIu64 ") is %s",
+ blknum, (uint64_t) offset,
+ state == BLOCK_NOT_CACHED ? "not cached" :
+ state == BLOCK_CLEAN ? "clean" :
+ state == BLOCK_DIRTY ? "dirty" :
+ "unknown");
if (state == BLOCK_NOT_CACHED) {
/* Read underlying plugin, copy to cache regardless of cache-on-read. */
@@ -284,8 +292,9 @@ blk_cache (nbdkit_next *next,
*/
memset (block + n, 0, tail);
- nbdkit_debug ("cache: cache block %" PRIu64 " (offset %" PRIu64 ")",
- blknum, (uint64_t) offset);
+ if (cache_debug_verbose)
+ nbdkit_debug ("cache: cache block %" PRIu64 " (offset %" PRIu64 ")",
+ blknum, (uint64_t) offset);
if (pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
@@ -324,8 +333,9 @@ blk_writethrough (nbdkit_next *next,
reclaim (fd, &bm);
- nbdkit_debug ("cache: writethrough block %" PRIu64 " (offset %" PRIu64 ")",
- blknum, (uint64_t) offset);
+ if (cache_debug_verbose)
+ nbdkit_debug ("cache: writethrough block %" PRIu64 " (offset %" PRIu64 ")",
+ blknum, (uint64_t) offset);
if (pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
@@ -357,8 +367,9 @@ blk_write (nbdkit_next *next,
reclaim (fd, &bm);
- nbdkit_debug ("cache: writeback block %" PRIu64 " (offset %" PRIu64 ")",
- blknum, (uint64_t) offset);
+ if (cache_debug_verbose)
+ nbdkit_debug ("cache: writeback block %" PRIu64 " (offset %" PRIu64 ")",
+ blknum, (uint64_t) offset);
if (pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
--
2.31.1

View File

@ -0,0 +1,400 @@
From b5dc8577c5c6d1205e2106b629fad327c3a409ea Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 26 Jul 2021 13:55:21 +0100
Subject: [PATCH] cache, cow: Add blk_read_multiple function
Currently the cache and cow filters break up large requests into many
single block-sized requests to the underlying plugin. For some
plugins (eg. curl) this is very inefficient and causes huge
slow-downs.
For example I tested nbdkit + curl vs nbdkit + cache + curl against a
slow, remote VMware server. A simple run of virt-inspector was at
least 6-7 times slower with the cache filter. (It was so slow that I
didn't actually let it run to completion - I am estimating the
slowdown multiple using interim debug messages).
Implement a new blk_read_multiple function in the cache filter. It
does not break up "runs" of blocks which all have the same cache
state. The cache .pread method uses the new function to read the
block-aligned part of the request.
(cherry picked from commit ab661ccef5b3369fa22c33d0289baddc251b73bf)
---
filters/cache/blk.c | 83 ++++++++++++++++++++++++++++++++-----------
filters/cache/blk.h | 6 ++++
filters/cache/cache.c | 21 +++++------
filters/cow/blk.c | 63 +++++++++++++++++++++++---------
filters/cow/blk.h | 6 ++++
filters/cow/cow.c | 21 +++++------
6 files changed, 138 insertions(+), 62 deletions(-)
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index f52f30e3..f85ada35 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -44,6 +44,7 @@
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
+#include <limits.h>
#include <errno.h>
#ifdef HAVE_SYS_STATVFS_H
@@ -193,26 +194,40 @@ blk_set_size (uint64_t new_size)
return 0;
}
-int
-blk_read (nbdkit_next *next,
- uint64_t blknum, uint8_t *block, int *err)
+static int
+_blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
{
off_t offset = blknum * blksize;
- enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED);
+ bool not_cached =
+ bitmap_get_blk (&bm, blknum, BLOCK_NOT_CACHED) == BLOCK_NOT_CACHED;
+ uint64_t b, runblocks;
- reclaim (fd, &bm);
+ assert (nrblocks > 0);
if (cache_debug_verbose)
- nbdkit_debug ("cache: blk_read block %" PRIu64
+ nbdkit_debug ("cache: blk_read_multiple block %" PRIu64
" (offset %" PRIu64 ") is %s",
blknum, (uint64_t) offset,
- state == BLOCK_NOT_CACHED ? "not cached" :
- state == BLOCK_CLEAN ? "clean" :
- state == BLOCK_DIRTY ? "dirty" :
- "unknown");
+ not_cached ? "not cached" : "cached");
- if (state == BLOCK_NOT_CACHED) { /* Read underlying plugin. */
- unsigned n = blksize, tail = 0;
+ /* Find out how many of the following blocks form a "run" with the
+ * same cached/not-cached state. We can process that many blocks in
+ * one go.
+ */
+ for (b = 1, runblocks = 1; b < nrblocks; ++b, ++runblocks) {
+ bool s =
+ bitmap_get_blk (&bm, blknum + b, BLOCK_NOT_CACHED) == BLOCK_NOT_CACHED;
+ if (not_cached != s)
+ break;
+ }
+
+ if (not_cached) { /* Read underlying plugin. */
+ unsigned n, tail = 0;
+
+ assert (blksize * runblocks <= UINT_MAX);
+ n = blksize * runblocks;
if (offset + n > size) {
tail = offset + n - size;
@@ -228,32 +243,60 @@ blk_read (nbdkit_next *next,
*/
memset (block + n, 0, tail);
- /* If cache-on-read, copy the block to the cache. */
+ /* If cache-on-read, copy the blocks to the cache. */
if (cache_on_read) {
if (cache_debug_verbose)
nbdkit_debug ("cache: cache-on-read block %" PRIu64
" (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, blksize, offset) == -1) {
+ if (pwrite (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
}
- bitmap_set_blk (&bm, blknum, BLOCK_CLEAN);
- lru_set_recently_accessed (blknum);
+ for (b = 0; b < runblocks; ++b) {
+ bitmap_set_blk (&bm, blknum + b, BLOCK_CLEAN);
+ lru_set_recently_accessed (blknum + b);
+ }
}
- return 0;
}
else { /* Read cache. */
- if (pread (fd, block, blksize, offset) == -1) {
+ if (pread (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pread: %m");
return -1;
}
- lru_set_recently_accessed (blknum);
- return 0;
+ for (b = 0; b < runblocks; ++b)
+ lru_set_recently_accessed (blknum + b);
}
+
+ /* If all done, return. */
+ if (runblocks == nrblocks)
+ return 0;
+
+ /* Recurse to read remaining blocks. */
+ return _blk_read_multiple (next,
+ blknum + runblocks,
+ nrblocks - runblocks,
+ block + blksize * runblocks,
+ err);
+}
+
+int
+blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
+{
+ reclaim (fd, &bm);
+ return _blk_read_multiple (next, blknum, nrblocks, block, err);
+}
+
+int
+blk_read (nbdkit_next *next,
+ uint64_t blknum, uint8_t *block, int *err)
+{
+ return blk_read_multiple (next, blknum, 1, block, err);
}
int
diff --git a/filters/cache/blk.h b/filters/cache/blk.h
index 87c753e2..1ee33ed7 100644
--- a/filters/cache/blk.h
+++ b/filters/cache/blk.h
@@ -55,6 +55,12 @@ extern int blk_read (nbdkit_next *next,
uint64_t blknum, uint8_t *block, int *err)
__attribute__((__nonnull__ (1, 3, 4)));
+/* As above, but read multiple blocks. */
+extern int blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
+ __attribute__((__nonnull__ (1, 4, 5)));
+
/* If a single block is not cached, copy it from the plugin. */
extern int blk_cache (nbdkit_next *next,
uint64_t blknum, uint8_t *block, int *err)
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
index 745f552d..14cc03f2 100644
--- a/filters/cache/cache.c
+++ b/filters/cache/cache.c
@@ -313,7 +313,7 @@ cache_pread (nbdkit_next *next,
uint32_t flags, int *err)
{
CLEANUP_FREE uint8_t *block = NULL;
- uint64_t blknum, blkoffs;
+ uint64_t blknum, blkoffs, nrblocks;
int r;
assert (!flags);
@@ -348,22 +348,17 @@ cache_pread (nbdkit_next *next,
}
/* Aligned body */
- /* XXX This breaks up large read requests into smaller ones, which
- * is a problem for plugins which have a large, fixed per-request
- * overhead (hello, curl). We should try to keep large requests
- * together as much as possible, but that requires us to be much
- * smarter here.
- */
- while (count >= blksize) {
+ nrblocks = count / blksize;
+ if (nrblocks > 0) {
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
- r = blk_read (next, blknum, buf, err);
+ r = blk_read_multiple (next, blknum, nrblocks, buf, err);
if (r == -1)
return -1;
- buf += blksize;
- count -= blksize;
- offset += blksize;
- blknum++;
+ buf += nrblocks * blksize;
+ count -= nrblocks * blksize;
+ offset += nrblocks * blksize;
+ blknum += nrblocks;
}
/* Unaligned tail */
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index b7c4d7f1..4ec8d1b8 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -79,6 +79,7 @@
#include <inttypes.h>
#include <unistd.h>
#include <fcntl.h>
+#include <limits.h>
#include <errno.h>
#include <sys/types.h>
@@ -219,33 +220,48 @@ blk_status (uint64_t blknum, bool *present, bool *trimmed)
*trimmed = state == BLOCK_TRIMMED;
}
-/* These are the block operations. They always read or write a single
- * whole block of size blksize.
+/* These are the block operations. They always read or write whole
+ * blocks of size blksize.
*/
int
-blk_read (nbdkit_next *next,
- uint64_t blknum, uint8_t *block, int *err)
+blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
{
off_t offset = blknum * BLKSIZE;
enum bm_entry state;
+ uint64_t b, runblocks;
- /* The state might be modified from another thread - for example
- * another thread might write (BLOCK_NOT_ALLOCATED ->
- * BLOCK_ALLOCATED) while we are reading from the plugin, returning
- * the old data. However a read issued after the write returns
- * should always return the correct data.
+ /* Find out how many of the following blocks form a "run" with the
+ * same state. We can process that many blocks in one go.
+ *
+ * About the locking: The state might be modified from another
+ * thread - for example another thread might write
+ * (BLOCK_NOT_ALLOCATED -> BLOCK_ALLOCATED) while we are reading
+ * from the plugin, returning the old data. However a read issued
+ * after the write returns should always return the correct data.
*/
{
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_ALLOCATED);
+
+ for (b = 1, runblocks = 1; b < nrblocks; ++b, ++runblocks) {
+ enum bm_entry s = bitmap_get_blk (&bm, blknum + b, BLOCK_NOT_ALLOCATED);
+ if (state != s)
+ break;
+ }
}
if (cow_debug_verbose)
- nbdkit_debug ("cow: blk_read block %" PRIu64 " (offset %" PRIu64 ") is %s",
+ nbdkit_debug ("cow: blk_read_multiple block %" PRIu64
+ " (offset %" PRIu64 ") is %s",
blknum, (uint64_t) offset, state_to_string (state));
if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
- unsigned n = BLKSIZE, tail = 0;
+ unsigned n, tail = 0;
+
+ assert (BLKSIZE * runblocks <= UINT_MAX);
+ n = BLKSIZE * runblocks;
if (offset + n > size) {
tail = offset + n - size;
@@ -260,20 +276,35 @@ blk_read (nbdkit_next *next,
* zeroing the tail.
*/
memset (block + n, 0, tail);
- return 0;
}
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
- if (pread (fd, block, BLKSIZE, offset) == -1) {
+ if (pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pread: %m");
return -1;
}
- return 0;
}
else /* state == BLOCK_TRIMMED */ {
- memset (block, 0, BLKSIZE);
- return 0;
+ memset (block, 0, BLKSIZE * runblocks);
}
+
+ /* If all done, return. */
+ if (runblocks == nrblocks)
+ return 0;
+
+ /* Recurse to read remaining blocks. */
+ return blk_read_multiple (next,
+ blknum + runblocks,
+ nrblocks - runblocks,
+ block + BLKSIZE * runblocks,
+ err);
+}
+
+int
+blk_read (nbdkit_next *next,
+ uint64_t blknum, uint8_t *block, int *err)
+{
+ return blk_read_multiple (next, blknum, 1, block, err);
}
int
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
index e6fd7417..b066c602 100644
--- a/filters/cow/blk.h
+++ b/filters/cow/blk.h
@@ -55,6 +55,12 @@ extern int blk_read (nbdkit_next *next,
uint64_t blknum, uint8_t *block, int *err)
__attribute__((__nonnull__ (1, 3, 4)));
+/* Read multiple blocks from the overlay or plugin. */
+extern int blk_read_multiple (nbdkit_next *next,
+ uint64_t blknum, uint64_t nrblocks,
+ uint8_t *block, int *err)
+ __attribute__((__nonnull__ (1, 4, 5)));
+
/* Cache mode for blocks not already in overlay */
enum cache_mode {
BLK_CACHE_IGNORE, /* Do nothing */
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
index f30b7505..78daca22 100644
--- a/filters/cow/cow.c
+++ b/filters/cow/cow.c
@@ -210,7 +210,7 @@ cow_pread (nbdkit_next *next,
uint32_t flags, int *err)
{
CLEANUP_FREE uint8_t *block = NULL;
- uint64_t blknum, blkoffs;
+ uint64_t blknum, blkoffs, nrblocks;
int r;
if (!IS_ALIGNED (count | offset, BLKSIZE)) {
@@ -243,21 +243,16 @@ cow_pread (nbdkit_next *next,
}
/* Aligned body */
- /* XXX This breaks up large read requests into smaller ones, which
- * is a problem for plugins which have a large, fixed per-request
- * overhead (hello, curl). We should try to keep large requests
- * together as much as possible, but that requires us to be much
- * smarter here.
- */
- while (count >= BLKSIZE) {
- r = blk_read (next, blknum, buf, err);
+ nrblocks = count / BLKSIZE;
+ if (nrblocks > 0) {
+ r = blk_read_multiple (next, blknum, nrblocks, buf, err);
if (r == -1)
return -1;
- buf += BLKSIZE;
- count -= BLKSIZE;
- offset += BLKSIZE;
- blknum++;
+ buf += nrblocks * BLKSIZE;
+ count -= nrblocks * BLKSIZE;
+ offset += nrblocks * BLKSIZE;
+ blknum += nrblocks;
}
/* Unaligned tail */
--
2.31.1

View File

@ -0,0 +1,215 @@
From 5bd332a683811586039f99f31c01d4f2f7181334 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 26 Jul 2021 15:21:18 +0100
Subject: [PATCH] cache, cow: Use full pread/pwrite operations
Although it probably cannot happen on Linux, POSIX allows pread/pwrite
to return or write fewer bytes than requested. The cache and cow
filters didn't handle this situation. Replace the raw
pread(2)/pwrite(2) syscalls with alternate versions which can handle
this.
(cherry picked from commit ce0db9d7736dd28dd0f10951ce65853e50b35e41)
---
common/utils/Makefile.am | 1 +
common/utils/full-rw.c | 81 ++++++++++++++++++++++++++++++++++++++++
common/utils/utils.h | 2 +
filters/cache/blk.c | 10 ++---
filters/cow/blk.c | 6 +--
5 files changed, 92 insertions(+), 8 deletions(-)
create mode 100644 common/utils/full-rw.c
diff --git a/common/utils/Makefile.am b/common/utils/Makefile.am
index 1708a4c8..14e9dfc4 100644
--- a/common/utils/Makefile.am
+++ b/common/utils/Makefile.am
@@ -40,6 +40,7 @@ libutils_la_SOURCES = \
cleanup-nbdkit.c \
cleanup.h \
environ.c \
+ full-rw.c \
quote.c \
utils.c \
utils.h \
diff --git a/common/utils/full-rw.c b/common/utils/full-rw.c
new file mode 100644
index 00000000..55b32cdd
--- /dev/null
+++ b/common/utils/full-rw.c
@@ -0,0 +1,81 @@
+/* nbdkit
+ * Copyright (C) 2021 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* These functions are like pread(2)/pwrite(2) but they always read or
+ * write the full amount, or fail.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+
+ssize_t
+full_pread (int fd, void *buf, size_t count, off_t offset)
+{
+ ssize_t ret = 0, r;
+
+ while (count > 0) {
+ r = pread (fd, buf, count, offset);
+ if (r == -1) return -1;
+ if (r == 0) {
+ /* Presumably the caller wasn't expecting end-of-file here, so
+ * return an error.
+ */
+ errno = EIO;
+ return -1;
+ }
+ ret += r;
+ offset += r;
+ count -= r;
+ }
+
+ return ret;
+}
+
+ssize_t
+full_pwrite (int fd, const void *buf, size_t count, off_t offset)
+{
+ ssize_t ret = 0, r;
+
+ while (count > 0) {
+ r = pwrite (fd, buf, count, offset);
+ if (r == -1) return -1;
+ ret += r;
+ offset += r;
+ count -= r;
+ }
+
+ return ret;
+}
diff --git a/common/utils/utils.h b/common/utils/utils.h
index f8f70212..83397ae1 100644
--- a/common/utils/utils.h
+++ b/common/utils/utils.h
@@ -40,5 +40,7 @@ extern int set_cloexec (int fd);
extern int set_nonblock (int fd);
extern char **copy_environ (char **env, ...) __attribute__((__sentinel__));
extern char *make_temporary_directory (void);
+extern ssize_t full_pread (int fd, void *buf, size_t count, off_t offset);
+extern ssize_t full_pwrite (int fd, const void *buf, size_t count, off_t offset);
#endif /* NBDKIT_UTILS_H */
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index f85ada35..42bd3779 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -250,7 +250,7 @@ _blk_read_multiple (nbdkit_next *next,
" (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, blksize * runblocks, offset) == -1) {
+ if (full_pwrite (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -262,7 +262,7 @@ _blk_read_multiple (nbdkit_next *next,
}
}
else { /* Read cache. */
- if (pread (fd, block, blksize * runblocks, offset) == -1) {
+ if (full_pread (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pread: %m");
return -1;
@@ -339,7 +339,7 @@ blk_cache (nbdkit_next *next,
nbdkit_debug ("cache: cache block %" PRIu64 " (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, blksize, offset) == -1) {
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -380,7 +380,7 @@ blk_writethrough (nbdkit_next *next,
nbdkit_debug ("cache: writethrough block %" PRIu64 " (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, blksize, offset) == -1) {
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -414,7 +414,7 @@ blk_write (nbdkit_next *next,
nbdkit_debug ("cache: writeback block %" PRIu64 " (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, blksize, offset) == -1) {
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 4ec8d1b8..121b0dd4 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -278,7 +278,7 @@ blk_read_multiple (nbdkit_next *next,
memset (block + n, 0, tail);
}
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
- if (pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
+ if (full_pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pread: %m");
return -1;
@@ -353,7 +353,7 @@ blk_cache (nbdkit_next *next,
memset (block + n, 0, tail);
if (mode == BLK_CACHE_COW) {
- if (pwrite (fd, block, BLKSIZE, offset) == -1) {
+ if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -372,7 +372,7 @@ blk_write (uint64_t blknum, const uint8_t *block, int *err)
nbdkit_debug ("cow: blk_write block %" PRIu64 " (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
- if (pwrite (fd, block, BLKSIZE, offset) == -1) {
+ if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
--
2.31.1

View File

@ -0,0 +1,151 @@
From 4db23fd29af0488aa9c7e01577a5be9565a4465e Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 26 Jul 2021 16:16:15 +0100
Subject: [PATCH] cache: Implement cache-on-read=/PATH
For virt-v2v we will need to be able to turn cache-on-read on while
performing inspection and modification of the guest, and off when
doing the bulk copy. To do that allow the cache-on-read parameter to
refer to a path where the existence of the path toggles the feature.
(We could restart nbdkit between these phases, but this change avoids
doing that.)
(cherry picked from commit c8b575241b15b3bf0adaf15313e67e5ed4270b5a)
---
filters/cache/blk.c | 2 +-
filters/cache/cache.c | 33 ++++++++++++++++++++-------
filters/cache/cache.h | 10 ++++++--
filters/cache/nbdkit-cache-filter.pod | 11 ++++++++-
4 files changed, 44 insertions(+), 12 deletions(-)
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index 42bd3779..19f79605 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -244,7 +244,7 @@ _blk_read_multiple (nbdkit_next *next,
memset (block + n, 0, tail);
/* If cache-on-read, copy the blocks to the cache. */
- if (cache_on_read) {
+ if (cache_on_read ()) {
if (cache_debug_verbose)
nbdkit_debug ("cache: cache-on-read block %" PRIu64
" (offset %" PRIu64 ")",
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
index 14cc03f2..44da0008 100644
--- a/filters/cache/cache.c
+++ b/filters/cache/cache.c
@@ -74,7 +74,8 @@ unsigned blksize;
enum cache_mode cache_mode = CACHE_MODE_WRITEBACK;
int64_t max_size = -1;
unsigned hi_thresh = 95, lo_thresh = 80;
-bool cache_on_read = false;
+enum cor_mode cor_mode = COR_OFF;
+const char *cor_path;
static int cache_flush (nbdkit_next *next, void *handle, uint32_t flags,
int *err);
@@ -161,12 +162,16 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
}
#endif /* !HAVE_CACHE_RECLAIM */
else if (strcmp (key, "cache-on-read") == 0) {
- int r;
-
- r = nbdkit_parse_bool (value);
- if (r == -1)
- return -1;
- cache_on_read = r;
+ if (value[0] == '/') {
+ cor_path = value;
+ cor_mode = COR_PATH;
+ }
+ else {
+ int r = nbdkit_parse_bool (value);
+ if (r == -1)
+ return -1;
+ cor_mode = r ? COR_ON : COR_OFF;
+ }
return 0;
}
else {
@@ -177,7 +182,7 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
#define cache_config_help_common \
"cache=MODE Set cache MODE, one of writeback (default),\n" \
" writethrough, or unsafe.\n" \
- "cache-on-read=BOOL Set to true to cache on reads (default false).\n"
+ "cache-on-read=BOOL|/PATH Set to true to cache on reads (default false).\n"
#ifndef HAVE_CACHE_RECLAIM
#define cache_config_help cache_config_help_common
#else
@@ -187,6 +192,18 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
"cache-low-threshold=PCT Percentage of max size where reclaim ends.\n"
#endif
+/* Decide if cache-on-read is currently on or off. */
+bool
+cache_on_read (void)
+{
+ switch (cor_mode) {
+ case COR_ON: return true;
+ case COR_OFF: return false;
+ case COR_PATH: return access (cor_path, F_OK) == 0;
+ default: abort ();
+ }
+}
+
static int
cache_config_complete (nbdkit_next_config_complete *next,
nbdkit_backend *nxdata)
diff --git a/filters/cache/cache.h b/filters/cache/cache.h
index 2b72221f..a559adef 100644
--- a/filters/cache/cache.h
+++ b/filters/cache/cache.h
@@ -49,7 +49,13 @@ extern unsigned blksize;
extern int64_t max_size;
extern unsigned hi_thresh, lo_thresh;
-/* Cache read requests. */
-extern bool cache_on_read;
+/* Cache on read mode. */
+extern enum cor_mode {
+ COR_OFF,
+ COR_ON,
+ COR_PATH,
+} cor_mode;
+extern const char *cor_path;
+extern bool cache_on_read (void);
#endif /* NBDKIT_CACHE_H */
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
index ebcf1d10..f20cb9ce 100644
--- a/filters/cache/nbdkit-cache-filter.pod
+++ b/filters/cache/nbdkit-cache-filter.pod
@@ -8,7 +8,7 @@ nbdkit-cache-filter - nbdkit caching filter
[cache-max-size=SIZE]
[cache-high-threshold=N]
[cache-low-threshold=N]
- [cache-on-read=true|false]
+ [cache-on-read=true|false|/PATH]
[plugin-args...]
=head1 DESCRIPTION
@@ -87,6 +87,15 @@ the plugin.
Do not cache read requests (this is the default).
+=item B<cache-on-read=/PATH>
+
+(nbdkit E<ge> 1.28)
+
+When F</PATH> (which must be an absolute path) exists, this behaves
+like C<cache-on-read=true>, and when it does not exist like
+C<cache-on-read=false>. This allows you to control the cache-on-read
+behaviour while nbdkit is running.
+
=back
=head1 CACHE MAXIMUM SIZE
--
2.31.1

View File

@ -0,0 +1,278 @@
From f7f4b71d559dc6950bc795742f64e8eaeeadf3ec Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 26 Jul 2021 16:30:26 +0100
Subject: [PATCH] cache: Add cache-min-block-size parameter
This allows you to choose a larger block size. I found experimentally
that this improves performance because of locality in access patterns.
The idea came from qcow2 which implicitly does the same thing because
of the relatively large cluster size (32K).
nbdkit + cache-filter with 4K block size + cache-on-read + curl
(to a very slow remote site):
=> virt-inspector took 22 mins
same with 64K block size:
=> virt-inspector took 19 mins
However compared to a qcow2 file using qemu's copy-on-read, backed
with nbdkit + curl we are still a lot slower, possibly because having
the cache inside virt-inspector greatly reduces round trip overhead:
=> virt-inspector took 13 mins
(cherry picked from commit 4ceacb6caa64e12bd78af5f90e86ee591e055944)
---
filters/cache/blk.c | 2 +-
filters/cache/cache.c | 36 ++++++++++----
filters/cache/cache.h | 3 ++
filters/cache/nbdkit-cache-filter.pod | 9 ++++
tests/Makefile.am | 2 +
tests/test-cache-block-size.sh | 70 +++++++++++++++++++++++++++
6 files changed, 112 insertions(+), 10 deletions(-)
create mode 100755 tests/test-cache-block-size.sh
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index 19f79605..6276985f 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -149,7 +149,7 @@ blk_init (void)
nbdkit_error ("fstatvfs: %s: %m", tmpdir);
return -1;
}
- blksize = MAX (4096, statvfs.f_bsize);
+ blksize = MAX (min_block_size, statvfs.f_bsize);
nbdkit_debug ("cache: block size: %u", blksize);
bitmap_init (&bm, blksize, 2 /* bits per block */);
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
index 44da0008..109ac89e 100644
--- a/filters/cache/cache.c
+++ b/filters/cache/cache.c
@@ -40,6 +40,7 @@
#include <inttypes.h>
#include <unistd.h>
#include <fcntl.h>
+#include <limits.h>
#include <errno.h>
#include <assert.h>
#include <sys/types.h>
@@ -62,6 +63,7 @@
#include "blk.h"
#include "reclaim.h"
#include "isaligned.h"
+#include "ispowerof2.h"
#include "minmax.h"
#include "rounding.h"
@@ -70,7 +72,8 @@
*/
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
-unsigned blksize;
+unsigned blksize; /* actual block size (picked by blk.c) */
+unsigned min_block_size = 4096;
enum cache_mode cache_mode = CACHE_MODE_WRITEBACK;
int64_t max_size = -1;
unsigned hi_thresh = 95, lo_thresh = 80;
@@ -80,13 +83,6 @@ const char *cor_path;
static int cache_flush (nbdkit_next *next, void *handle, uint32_t flags,
int *err);
-static void
-cache_load (void)
-{
- if (blk_init () == -1)
- exit (EXIT_FAILURE);
-}
-
static void
cache_unload (void)
{
@@ -116,6 +112,19 @@ cache_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
return -1;
}
}
+ else if (strcmp (key, "cache-min-block-size") == 0) {
+ int64_t r;
+
+ r = nbdkit_parse_size (value);
+ if (r == -1)
+ return -1;
+ if (r < 4096 || !is_power_of_2 (r) || r > UINT_MAX) {
+ nbdkit_error ("cache-min-block-size is not a power of 2, or is too small or too large");
+ return -1;
+ }
+ min_block_size = r;
+ return 0;
+ }
#ifdef HAVE_CACHE_RECLAIM
else if (strcmp (key, "cache-max-size") == 0) {
int64_t r;
@@ -220,6 +229,15 @@ cache_config_complete (nbdkit_next_config_complete *next,
return next (nxdata);
}
+static int
+cache_get_ready (int thread_model)
+{
+ if (blk_init () == -1)
+ return -1;
+
+ return 0;
+}
+
/* Get the file size, set the cache size. */
static int64_t
cache_get_size (nbdkit_next *next,
@@ -691,11 +709,11 @@ cache_cache (nbdkit_next *next,
static struct nbdkit_filter filter = {
.name = "cache",
.longname = "nbdkit caching filter",
- .load = cache_load,
.unload = cache_unload,
.config = cache_config,
.config_complete = cache_config_complete,
.config_help = cache_config_help,
+ .get_ready = cache_get_ready,
.prepare = cache_prepare,
.get_size = cache_get_size,
.can_cache = cache_can_cache,
diff --git a/filters/cache/cache.h b/filters/cache/cache.h
index a559adef..5c32c37c 100644
--- a/filters/cache/cache.h
+++ b/filters/cache/cache.h
@@ -45,6 +45,9 @@ extern enum cache_mode {
/* Size of a block in the cache. */
extern unsigned blksize;
+/* Minimum block size (cache-min-block-size parameter). */
+extern unsigned min_block_size;
+
/* Maximum size of the cache and high/low thresholds. */
extern int64_t max_size;
extern unsigned hi_thresh, lo_thresh;
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
index f20cb9ce..6cbd1c08 100644
--- a/filters/cache/nbdkit-cache-filter.pod
+++ b/filters/cache/nbdkit-cache-filter.pod
@@ -5,6 +5,7 @@ nbdkit-cache-filter - nbdkit caching filter
=head1 SYNOPSIS
nbdkit --filter=cache plugin [cache=writeback|writethrough|unsafe]
+ [cache-min-block-size=SIZE]
[cache-max-size=SIZE]
[cache-high-threshold=N]
[cache-low-threshold=N]
@@ -59,6 +60,14 @@ This is dangerous and can cause data loss, but this may be acceptable
if you only use it for testing or with data that you don't care about
or can cheaply reconstruct.
+=item B<cache-min-block-size=>SIZE
+
+Set the minimum block size used by the cache. This must be a power of
+2 and E<ge> 4096.
+
+The default is 4096, or the block size of the filesystem which
+contains the temporary file storing the cache (whichever is larger).
+
=item B<cache-max-size=>SIZE
=item B<cache-high-threshold=>N
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 9630205d..a038eabc 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1371,12 +1371,14 @@ EXTRA_DIST += test-blocksize.sh test-blocksize-extents.sh
# cache filter test.
TESTS += \
test-cache.sh \
+ test-cache-block-size.sh \
test-cache-on-read.sh \
test-cache-max-size.sh \
test-cache-unaligned.sh \
$(NULL)
EXTRA_DIST += \
test-cache.sh \
+ test-cache-block-size.sh \
test-cache-on-read.sh \
test-cache-max-size.sh \
test-cache-unaligned.sh \
diff --git a/tests/test-cache-block-size.sh b/tests/test-cache-block-size.sh
new file mode 100755
index 00000000..a2a27407
--- /dev/null
+++ b/tests/test-cache-block-size.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_filter cache
+requires_nbdsh_uri
+
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+files="cache-block-size.img $sock cache-block-size.pid"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Create an empty base image.
+truncate -s 128K cache-block-size.img
+
+# Run nbdkit with the caching filter.
+start_nbdkit -P cache-block-size.pid -U $sock --filter=cache \
+ file cache-block-size.img cache-min-block-size=64K
+
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c '
+# Write some pattern data to the overlay and check it reads back OK.
+buf = b"abcd" * 16384
+h.pwrite(buf, 32768)
+zero = h.pread(32768, 0)
+assert zero == bytearray(32768)
+buf2 = h.pread(65536, 32768)
+assert buf == buf2
+
+# Flushing should write through to the underlying file.
+h.flush()
+
+with open("cache-block-size.img", "rb") as file:
+ zero = file.read(32768)
+ assert zero == bytearray(32768)
+ buf2 = file.read(65536)
+ assert buf == buf2
+'
--
2.31.1

View File

@ -0,0 +1,138 @@
From 83e1167e1a350bd08ac6245f47a5877438408492 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 26 Jul 2021 17:39:23 +0100
Subject: [PATCH] cache, cow: Use a 64K block size by default
Based on the results presented in the previous commit, use a 64K block
size by default in both the cache and cow filters. For the cache
filter you could go back to a 4K block size if you wanted by using the
cache-min-block-size=4K parameter. For cow it is compiled in so
cannot be adjusted.
(cherry picked from commit c1905b0a28677d961babdb16d6f30ae61042c825)
---
filters/cache/cache.c | 2 +-
filters/cache/nbdkit-cache-filter.pod | 4 ++--
filters/cow/blk.h | 2 +-
tests/test-cache-block-size.sh | 2 +-
tests/test-cow-extents1.sh | 33 +++++++++++++++------------
5 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
index 109ac89e..c912c5fb 100644
--- a/filters/cache/cache.c
+++ b/filters/cache/cache.c
@@ -73,7 +73,7 @@
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
unsigned blksize; /* actual block size (picked by blk.c) */
-unsigned min_block_size = 4096;
+unsigned min_block_size = 65536;
enum cache_mode cache_mode = CACHE_MODE_WRITEBACK;
int64_t max_size = -1;
unsigned hi_thresh = 95, lo_thresh = 80;
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
index 6cbd1c08..df9c1f99 100644
--- a/filters/cache/nbdkit-cache-filter.pod
+++ b/filters/cache/nbdkit-cache-filter.pod
@@ -65,8 +65,8 @@ or can cheaply reconstruct.
Set the minimum block size used by the cache. This must be a power of
2 and E<ge> 4096.
-The default is 4096, or the block size of the filesystem which
-contains the temporary file storing the cache (whichever is larger).
+The default is 64K, or the block size of the filesystem which contains
+the temporary file storing the cache (whichever is larger).
=item B<cache-max-size=>SIZE
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
index b066c602..1bc85283 100644
--- a/filters/cow/blk.h
+++ b/filters/cow/blk.h
@@ -36,7 +36,7 @@
/* Size of a block in the overlay. A 4K block size means that we need
* 64 MB of memory to store the bitmap for a 1 TB underlying image.
*/
-#define BLKSIZE 4096
+#define BLKSIZE 65536
/* Initialize the overlay and bitmap. */
extern int blk_init (void);
diff --git a/tests/test-cache-block-size.sh b/tests/test-cache-block-size.sh
index a2a27407..d20cc940 100755
--- a/tests/test-cache-block-size.sh
+++ b/tests/test-cache-block-size.sh
@@ -47,7 +47,7 @@ truncate -s 128K cache-block-size.img
# Run nbdkit with the caching filter.
start_nbdkit -P cache-block-size.pid -U $sock --filter=cache \
- file cache-block-size.img cache-min-block-size=64K
+ file cache-block-size.img cache-min-block-size=4K
nbdsh --connect "nbd+unix://?socket=$sock" \
-c '
diff --git a/tests/test-cow-extents1.sh b/tests/test-cow-extents1.sh
index 8e0e0383..ebfd83f6 100755
--- a/tests/test-cow-extents1.sh
+++ b/tests/test-cow-extents1.sh
@@ -65,7 +65,7 @@ cleanup_fn rm -f $files
# Create a base file which is half allocated, half sparse.
dd if=/dev/urandom of=$base count=128 bs=1K
-truncate -s 256K $base
+truncate -s 4M $base
lastmod="$(stat -c "%y" $base)"
# Run nbdkit with a COW overlay.
@@ -76,30 +76,33 @@ uri="nbd+unix:///?socket=$sock"
nbdinfo --map "$uri" > $out
cat $out
if [ "$(tr -s ' ' < $out | cut -d' ' -f 1-4)" != " 0 131072 0
- 131072 131072 3" ]; then
+ 131072 4063232 3" ]; then
echo "$0: unexpected initial file map"
exit 1
fi
# Punch some holes.
nbdsh -u "$uri" \
- -c 'h.trim(4096, 4096)' \
- -c 'h.trim(4098, 16383)' \
- -c 'h.pwrite(b"1"*4096, 65536)' \
- -c 'h.trim(8192, 131072)' \
- -c 'h.pwrite(b"2"*8192, 196608)'
+ -c 'bs = 65536' \
+ -c 'h.trim(bs, bs)' \
+ -c 'h.trim(bs+2, 4*bs-1)' \
+ -c 'h.pwrite(b"1"*bs, 16*bs)' \
+ -c 'h.trim(2*bs, 32*bs)' \
+ -c 'h.pwrite(b"2"*(2*bs), 48*bs)'
# The extents map should be fully allocated.
nbdinfo --map "$uri" > $out
cat $out
-if [ "$(tr -s ' ' < $out | cut -d' ' -f 1-4)" != " 0 4096 0
- 4096 4096 3
- 8192 8192 0
- 16384 4096 3
- 20480 110592 0
- 131072 65536 3
- 196608 8192 0
- 204800 57344 3" ]; then
+if [ "$(tr -s ' ' < $out | cut -d' ' -f 1-4)" != " 0 65536 0
+ 65536 131072 3
+ 196608 65536 0
+ 262144 65536 3
+ 327680 65536 0
+ 393216 655360 3
+ 1048576 65536 0
+ 1114112 2031616 3
+ 3145728 131072 0
+ 3276800 917504 3" ]; then
echo "$0: unexpected trimmed file map"
exit 1
fi
--
2.31.1

View File

@ -0,0 +1,50 @@
From 2592bb42051b3e6d17240badc814b9b16f121c1d Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 27 Jul 2021 21:16:30 +0100
Subject: [PATCH] cache: Refactor printing state into new function
This minor refactoring just makes the cache and cow filters' blk.c a
little bit more similar.
(cherry picked from commit bdb86ea14c00a950f2a2d34071ac1e0799d29132)
---
filters/cache/blk.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index 6276985f..e50a7f24 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -94,6 +94,17 @@ enum bm_entry {
BLOCK_DIRTY = 3,
};
+static const char *
+state_to_string (enum bm_entry state)
+{
+ switch (state) {
+ case BLOCK_NOT_CACHED: return "not cached";
+ case BLOCK_CLEAN: return "clean";
+ case BLOCK_DIRTY: return "dirty";
+ default: abort ();
+ }
+}
+
/* Extra debugging (-D cache.verbose=1). */
NBDKIT_DLL_PUBLIC int cache_debug_verbose = 0;
@@ -312,10 +323,7 @@ blk_cache (nbdkit_next *next,
nbdkit_debug ("cache: blk_cache block %" PRIu64
" (offset %" PRIu64 ") is %s",
blknum, (uint64_t) offset,
- state == BLOCK_NOT_CACHED ? "not cached" :
- state == BLOCK_CLEAN ? "clean" :
- state == BLOCK_DIRTY ? "dirty" :
- "unknown");
+ state_to_string (state));
if (state == BLOCK_NOT_CACHED) {
/* Read underlying plugin, copy to cache regardless of cache-on-read. */
--
2.31.1

View File

@ -0,0 +1,147 @@
From 315948e75e06d038bd8afa319a41e3fde33b4174 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 29 Jul 2021 20:16:43 +0100
Subject: [PATCH] tests: cache: Test cache-on-read option really caches
By making use of the delay filter to add a penalty for hitting the
plugin we can check whether or not the cache-on-read option is
working.
(cherry picked from commit 3ae7aa533bb9322ab6dc6deecb687ded76634ab4)
---
tests/Makefile.am | 2 +
tests/test-cache-on-read-caches.sh | 87 ++++++++++++++++++++++++++++++
tests/test-cache-on-read.sh | 5 --
3 files changed, 89 insertions(+), 5 deletions(-)
create mode 100755 tests/test-cache-on-read-caches.sh
diff --git a/tests/Makefile.am b/tests/Makefile.am
index a038eabc..51ca913a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1373,6 +1373,7 @@ TESTS += \
test-cache.sh \
test-cache-block-size.sh \
test-cache-on-read.sh \
+ test-cache-on-read-caches.sh \
test-cache-max-size.sh \
test-cache-unaligned.sh \
$(NULL)
@@ -1380,6 +1381,7 @@ EXTRA_DIST += \
test-cache.sh \
test-cache-block-size.sh \
test-cache-on-read.sh \
+ test-cache-on-read-caches.sh \
test-cache-max-size.sh \
test-cache-unaligned.sh \
$(NULL)
diff --git a/tests/test-cache-on-read-caches.sh b/tests/test-cache-on-read-caches.sh
new file mode 100755
index 00000000..80b34159
--- /dev/null
+++ b/tests/test-cache-on-read-caches.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_filter cache
+requires_filter delay
+requires_nbdsh_uri
+
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+files="$sock cache-on-read-caches.pid"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Run nbdkit with the cache filter, cache-on-read and a read delay.
+start_nbdkit -P cache-on-read-caches.pid -U $sock \
+ --filter=cache --filter=delay \
+ memory 64K cache-on-read=true rdelay=10
+
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c '
+from time import time
+
+# First read should suffer a penalty. Because we are reading
+# a single 64K block (same size as the cache block), we should
+# only suffer one penalty of approx. 10 seconds.
+st = time()
+zb = h.pread(65536, 0)
+et = time()
+el = et-st
+print("elapsed time: %g" % el)
+assert et-st >= 10
+assert zb == bytearray(65536)
+
+# Second read should not suffer a penalty.
+st = time()
+zb = h.pread(65536, 0)
+et = time()
+el = et-st
+print("elapsed time: %g" % el)
+assert el < 10
+assert zb == bytearray(65536)
+
+# Write something.
+buf = b"abcd" * 16384
+h.pwrite(buf, 0)
+
+# Reading back should be quick since it is stored in the overlay.
+st = time()
+buf2 = h.pread(65536, 0)
+et = time()
+el = et-st
+print("elapsed time: %g" % el)
+assert el < 10
+assert buf == buf2
+'
diff --git a/tests/test-cache-on-read.sh b/tests/test-cache-on-read.sh
index f8584dcd..85ca83d4 100755
--- a/tests/test-cache-on-read.sh
+++ b/tests/test-cache-on-read.sh
@@ -56,9 +56,4 @@ zero = h.pread(32768, 0)
assert zero == bytearray(32768)
buf2 = h.pread(65536, 32768)
assert buf == buf2
-
-# XXX Suggestion to improve this test: Use the delay filter below the
-# cache filter, and time reads to prove that the second read is faster
-# because it is not going through the delay filter and plugin.
-# XXX second h.pread here ...
'
--
2.31.1

View File

@ -0,0 +1,457 @@
From 57f9bd29f9d7432ad5a70620c373b28db768a314 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 27 Jul 2021 23:01:52 +0100
Subject: [PATCH] cow: Implement cow-on-read
This is very similar to the nbdkit-cache-filter cache-on-read flag.
(cherry picked from commit bd93b3f27246f917de48a6cc2525d9c424c07976)
---
filters/cow/blk.c | 21 ++++++--
filters/cow/blk.h | 10 ++--
filters/cow/cow.c | 56 ++++++++++++++++----
filters/cow/nbdkit-cow-filter.pod | 17 ++++++
tests/Makefile.am | 4 ++
tests/test-cow-on-read-caches.sh | 87 +++++++++++++++++++++++++++++++
tests/test-cow-on-read.sh | 59 +++++++++++++++++++++
7 files changed, 236 insertions(+), 18 deletions(-)
create mode 100755 tests/test-cow-on-read-caches.sh
create mode 100755 tests/test-cow-on-read.sh
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 121b0dd4..4f84e092 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -226,7 +226,7 @@ blk_status (uint64_t blknum, bool *present, bool *trimmed)
int
blk_read_multiple (nbdkit_next *next,
uint64_t blknum, uint64_t nrblocks,
- uint8_t *block, int *err)
+ uint8_t *block, bool cow_on_read, int *err)
{
off_t offset = blknum * BLKSIZE;
enum bm_entry state;
@@ -276,6 +276,19 @@ blk_read_multiple (nbdkit_next *next,
* zeroing the tail.
*/
memset (block + n, 0, tail);
+
+ /* If cow-on-read is true then copy the blocks to the cache and
+ * set them as allocated.
+ */
+ if (cow_on_read) {
+ if (full_pwrite (fd, block, BLKSIZE * runblocks, offset) == -1) {
+ *err = errno;
+ nbdkit_error ("pwrite: %m");
+ return -1;
+ }
+ for (b = 0; b < runblocks; ++b)
+ bitmap_set_blk (&bm, blknum+b, BLOCK_ALLOCATED);
+ }
}
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
if (full_pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
@@ -297,14 +310,14 @@ blk_read_multiple (nbdkit_next *next,
blknum + runblocks,
nrblocks - runblocks,
block + BLKSIZE * runblocks,
- err);
+ cow_on_read, err);
}
int
blk_read (nbdkit_next *next,
- uint64_t blknum, uint8_t *block, int *err)
+ uint64_t blknum, uint8_t *block, bool cow_on_read, int *err)
{
- return blk_read_multiple (next, blknum, 1, block, err);
+ return blk_read_multiple (next, blknum, 1, block, cow_on_read, err);
}
int
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
index 1bc85283..b7e6f092 100644
--- a/filters/cow/blk.h
+++ b/filters/cow/blk.h
@@ -52,14 +52,16 @@ extern void blk_status (uint64_t blknum, bool *present, bool *trimmed);
/* Read a single block from the overlay or plugin. */
extern int blk_read (nbdkit_next *next,
- uint64_t blknum, uint8_t *block, int *err)
- __attribute__((__nonnull__ (1, 3, 4)));
+ uint64_t blknum, uint8_t *block,
+ bool cow_on_read, int *err)
+ __attribute__((__nonnull__ (1, 3, 5)));
/* Read multiple blocks from the overlay or plugin. */
extern int blk_read_multiple (nbdkit_next *next,
uint64_t blknum, uint64_t nrblocks,
- uint8_t *block, int *err)
- __attribute__((__nonnull__ (1, 4, 5)));
+ uint8_t *block,
+ bool cow_on_read, int *err)
+ __attribute__((__nonnull__ (1, 4, 6)));
/* Cache mode for blocks not already in overlay */
enum cache_mode {
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
index 78daca22..6efb39f2 100644
--- a/filters/cow/cow.c
+++ b/filters/cow/cow.c
@@ -38,6 +38,7 @@
#include <stdbool.h>
#include <inttypes.h>
#include <string.h>
+#include <unistd.h>
#include <errno.h>
#include <pthread.h>
@@ -59,6 +60,15 @@ static pthread_mutex_t rmw_lock = PTHREAD_MUTEX_INITIALIZER;
static bool cow_on_cache;
+/* Cache on read ("cow-on-read") mode. */
+extern enum cor_mode {
+ COR_OFF,
+ COR_ON,
+ COR_PATH,
+} cor_mode;
+enum cor_mode cor_mode = COR_OFF;
+const char *cor_path;
+
static void
cow_load (void)
{
@@ -85,13 +95,39 @@ cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
cow_on_cache = r;
return 0;
}
+ else if (strcmp (key, "cow-on-read") == 0) {
+ if (value[0] == '/') {
+ cor_path = value;
+ cor_mode = COR_PATH;
+ }
+ else {
+ int r = nbdkit_parse_bool (value);
+ if (r == -1)
+ return -1;
+ cor_mode = r ? COR_ON : COR_OFF;
+ }
+ return 0;
+ }
else {
return next (nxdata, key, value);
}
}
#define cow_config_help \
- "cow-on-cache=<BOOL> Set to true to treat client cache requests as writes.\n"
+ "cow-on-cache=<BOOL> Copy cache (prefetch) requests to the overlay.\n" \
+ "cow-on-read=<BOOL>|/PATH Copy read requests to the overlay."
+
+/* Decide if cow-on-read is currently on or off. */
+bool
+cow_on_read (void)
+{
+ switch (cor_mode) {
+ case COR_ON: return true;
+ case COR_OFF: return false;
+ case COR_PATH: return access (cor_path, F_OK) == 0;
+ default: abort ();
+ }
+}
static void *
cow_open (nbdkit_next_open *next, nbdkit_context *nxdata,
@@ -230,7 +266,7 @@ cow_pread (nbdkit_next *next,
uint64_t n = MIN (BLKSIZE - blkoffs, count);
assert (block);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r == -1)
return -1;
@@ -245,7 +281,7 @@ cow_pread (nbdkit_next *next,
/* Aligned body */
nrblocks = count / BLKSIZE;
if (nrblocks > 0) {
- r = blk_read_multiple (next, blknum, nrblocks, buf, err);
+ r = blk_read_multiple (next, blknum, nrblocks, buf, cow_on_read (), err);
if (r == -1)
return -1;
@@ -258,7 +294,7 @@ cow_pread (nbdkit_next *next,
/* Unaligned tail */
if (count) {
assert (block);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r == -1)
return -1;
@@ -299,7 +335,7 @@ cow_pwrite (nbdkit_next *next,
*/
assert (block);
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memcpy (&block[blkoffs], buf, n);
r = blk_write (blknum, block, err);
@@ -329,7 +365,7 @@ cow_pwrite (nbdkit_next *next,
if (count) {
assert (block);
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memcpy (block, buf, count);
r = blk_write (blknum, block, err);
@@ -379,7 +415,7 @@ cow_zero (nbdkit_next *next,
* Hold the rmw_lock over the whole operation.
*/
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memset (&block[blkoffs], 0, n);
r = blk_write (blknum, block, err);
@@ -411,7 +447,7 @@ cow_zero (nbdkit_next *next,
/* Unaligned tail */
if (count) {
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memset (block, 0, count);
r = blk_write (blknum, block, err);
@@ -455,7 +491,7 @@ cow_trim (nbdkit_next *next,
* Hold the lock over the whole operation.
*/
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memset (&block[blkoffs], 0, n);
r = blk_write (blknum, block, err);
@@ -482,7 +518,7 @@ cow_trim (nbdkit_next *next,
/* Unaligned tail */
if (count) {
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&rmw_lock);
- r = blk_read (next, blknum, block, err);
+ r = blk_read (next, blknum, block, cow_on_read (), err);
if (r != -1) {
memset (block, 0, count);
r = blk_write (blknum, block, err);
diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod
index 571189e7..01261429 100644
--- a/filters/cow/nbdkit-cow-filter.pod
+++ b/filters/cow/nbdkit-cow-filter.pod
@@ -62,6 +62,23 @@ the data from the plugin into the overlay.
Do not save data from cache (prefetch) requests in the overlay. This
leaves the overlay as small as possible. This is the default.
+=item B<cow-on-read=true>
+
+When the client issues a read request, copy the data into the overlay
+so that the same data can be served more quickly later.
+
+=item B<cow-on-read=false>
+
+Do not save data from read requests in the overlay. This leaves the
+overlay as small as possible. This is the default.
+
+=item B<cow-on-read=/PATH>
+
+When F</PATH> (which must be an absolute path) exists, this behaves
+like C<cow-on-read=true>, and when it does not exist like
+C<cow-on-read=false>. This allows you to control the C<cow-on-read>
+behaviour while nbdkit is running.
+
=back
=head1 EXAMPLES
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 51ca913a..edc8d66d 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1407,6 +1407,8 @@ TESTS += \
test-cow-extents1.sh \
test-cow-extents2.sh \
test-cow-extents-large.sh \
+ test-cow-on-read.sh \
+ test-cow-on-read-caches.sh \
test-cow-unaligned.sh \
$(NULL)
endif
@@ -1417,6 +1419,8 @@ EXTRA_DIST += \
test-cow-extents2.sh \
test-cow-extents-large.sh \
test-cow-null.sh \
+ test-cow-on-read.sh \
+ test-cow-on-read-caches.sh \
test-cow-unaligned.sh \
$(NULL)
diff --git a/tests/test-cow-on-read-caches.sh b/tests/test-cow-on-read-caches.sh
new file mode 100755
index 00000000..c5b60198
--- /dev/null
+++ b/tests/test-cow-on-read-caches.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_filter cow
+requires_filter delay
+requires_nbdsh_uri
+
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+files="$sock cow-on-read-caches.pid"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Run nbdkit with the cow filter, cow-on-read and a read delay.
+start_nbdkit -P cow-on-read-caches.pid -U $sock \
+ --filter=cow --filter=delay \
+ memory 64K cow-on-read=true rdelay=10
+
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c '
+from time import time
+
+# First read should suffer a penalty. Because we are reading
+# a single 64K block (same size as the COW block), we should
+# only suffer one penalty of approx. 10 seconds.
+st = time()
+zb = h.pread(65536, 0)
+et = time()
+el = et-st
+print("elapsed time: %g" % el)
+assert et-st >= 10
+assert zb == bytearray(65536)
+
+# Second read should not suffer a penalty.
+st = time()
+zb = h.pread(65536, 0)
+et = time()
+el = et-st
+print("elapsed time: %g" % el)
+assert el < 10
+assert zb == bytearray(65536)
+
+# Write something.
+buf = b"abcd" * 16384
+h.pwrite(buf, 0)
+
+# Reading back should be quick since it is stored in the overlay.
+st = time()
+buf2 = h.pread(65536, 0)
+et = time()
+el = et-st
+print("elapsed time: %g" % el)
+assert el < 10
+assert buf == buf2
+'
diff --git a/tests/test-cow-on-read.sh b/tests/test-cow-on-read.sh
new file mode 100755
index 00000000..4f58b33b
--- /dev/null
+++ b/tests/test-cow-on-read.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_filter cow
+requires_nbdsh_uri
+
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+files="$sock cow-on-read.pid"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Run nbdkit with the cow filter and cow-on-read.
+start_nbdkit -P cow-on-read.pid -U $sock \
+ --filter=cow \
+ memory 128K cow-on-read=true
+
+nbdsh --connect "nbd+unix://?socket=$sock" \
+ -c '
+# Write some pattern data to the overlay and check it reads back OK.
+buf = b"abcd" * 16384
+h.pwrite(buf, 32768)
+zero = h.pread(32768, 0)
+assert zero == bytearray(32768)
+buf2 = h.pread(65536, 32768)
+assert buf == buf2
+'
--
2.31.1

View File

@ -0,0 +1,170 @@
From a7e7af18d64164fac42581452f6dc3c07650fcae Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 Jul 2021 10:19:57 +0100
Subject: [PATCH] delay: Add delay-open and delay-close
Useful for simulating VDDK which has very slow connection.
(cherry picked from commit de8dcd3a34a38b088a0f9a6f8ca754702ad1f598)
---
filters/delay/delay.c | 58 ++++++++++++++++++++++++++-
filters/delay/nbdkit-delay-filter.pod | 27 +++++++++++--
2 files changed, 80 insertions(+), 5 deletions(-)
diff --git a/filters/delay/delay.c b/filters/delay/delay.c
index 5a925aa4..df3729a7 100644
--- a/filters/delay/delay.c
+++ b/filters/delay/delay.c
@@ -48,6 +48,8 @@ static unsigned delay_zero_ms = 0; /* zero delay (milliseconds) */
static unsigned delay_trim_ms = 0; /* trim delay (milliseconds) */
static unsigned delay_extents_ms = 0;/* extents delay (milliseconds) */
static unsigned delay_cache_ms = 0; /* cache delay (milliseconds) */
+static unsigned delay_open_ms = 0; /* open delay (milliseconds) */
+static unsigned delay_close_ms = 0; /* close delay (milliseconds) */
static int delay_fast_zero = 1; /* whether delaying zero includes fast zero */
@@ -126,6 +128,18 @@ cache_delay (int *err)
return delay (delay_cache_ms, err);
}
+static int
+open_delay (int *err)
+{
+ return delay (delay_open_ms, err);
+}
+
+static int
+close_delay (int *err)
+{
+ return delay (delay_close_ms, err);
+}
+
/* Called for each key=value passed on the command line. */
static int
delay_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
@@ -182,6 +196,16 @@ delay_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
return -1;
return 0;
}
+ else if (strcmp (key, "delay-open") == 0) {
+ if (parse_delay (key, value, &delay_open_ms) == -1)
+ return -1;
+ return 0;
+ }
+ else if (strcmp (key, "delay-close") == 0) {
+ if (parse_delay (key, value, &delay_close_ms) == -1)
+ return -1;
+ return 0;
+ }
else
return next (nxdata, key, value);
}
@@ -195,7 +219,9 @@ delay_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
"delay-extents=<NN>[ms] Extents delay in seconds/milliseconds.\n" \
"delay-cache=<NN>[ms] Cache delay in seconds/milliseconds.\n" \
"wdelay=<NN>[ms] Write, zero and trim delay in secs/msecs.\n" \
- "delay-fast-zero=<BOOL> Delay fast zero requests (default true).\n"
+ "delay-fast-zero=<BOOL> Delay fast zero requests (default true).\n" \
+ "delay-open=<NN>[ms] Open delay in seconds/milliseconds.\n" \
+ "delay-close=<NN>[ms] Close delay in seconds/milliseconds."
/* Override the plugin's .can_fast_zero if needed */
static int
@@ -208,6 +234,34 @@ delay_can_fast_zero (nbdkit_next *next,
return next->can_fast_zero (next);
}
+/* Open connection. */
+static void *
+delay_open (nbdkit_next_open *next, nbdkit_context *nxdata,
+ int readonly, const char *exportname, int is_tls)
+{
+ int err;
+
+ if (open_delay (&err) == -1) {
+ errno = err;
+ nbdkit_error ("delay: %m");
+ return NULL;
+ }
+
+ if (next (nxdata, readonly, exportname) == -1)
+ return NULL;
+
+ return NBDKIT_HANDLE_NOT_NEEDED;
+}
+
+/* Close connection. */
+static void
+delay_close (void *handle)
+{
+ int err;
+
+ close_delay (&err);
+}
+
/* Read data. */
static int
delay_pread (nbdkit_next *next,
@@ -285,6 +339,8 @@ static struct nbdkit_filter filter = {
.config = delay_config,
.config_help = delay_config_help,
.can_fast_zero = delay_can_fast_zero,
+ .open = delay_open,
+ .close = delay_close,
.pread = delay_pread,
.pwrite = delay_pwrite,
.zero = delay_zero,
diff --git a/filters/delay/nbdkit-delay-filter.pod b/filters/delay/nbdkit-delay-filter.pod
index d6961a9e..11ae544b 100644
--- a/filters/delay/nbdkit-delay-filter.pod
+++ b/filters/delay/nbdkit-delay-filter.pod
@@ -9,10 +9,15 @@ nbdkit-delay-filter - nbdkit delay filter
nbdkit --filter=delay plugin rdelay=NNms wdelay=NNms [plugin-args...]
nbdkit --filter=delay plugin [plugin-args ...]
- delay-read=(SECS|NNms) delay-write=(SECS|NNms)
- delay-zero=(SECS|NNms) delay-trim=(SECS|NNms)
- delay-extents=(SECS|NNms) delay-cache=(SECS|NNms)
+ delay-read=(SECS|NNms)
+ delay-write=(SECS|NNms)
+ delay-zero=(SECS|NNms)
+ delay-trim=(SECS|NNms)
+ delay-extents=(SECS|NNms)
+ delay-cache=(SECS|NNms)
delay-fast-zero=BOOL
+ delay-open=(SECS|NNms)
+ delay-close=(SECS|NNms)
=head1 DESCRIPTION
@@ -108,6 +113,20 @@ delay as any other zero request; but setting this parameter to false
instantly fails a fast zero response without waiting for or consulting
the plugin.
+=item B<delay-open=>SECS
+
+=item B<delay-open=>NNB<ms>
+
+=item B<delay-close=>SECS
+
+=item B<delay-close=>NNB<ms>
+
+(nbdkit E<ge> 1.28)
+
+Delay open and close operations by C<SECS> seconds or C<NN>
+milliseconds. Open corresponds to client connection. Close may not
+be visible to clients if they abruptly disconnect.
+
=back
=head1 FILES
@@ -140,4 +159,4 @@ Richard W.M. Jones
=head1 COPYRIGHT
-Copyright (C) 2018 Red Hat Inc.
+Copyright (C) 2018-2021 Red Hat Inc.
--
2.31.1

View File

@ -0,0 +1,90 @@
From 17a912a449fa75b5c12ac3acab596b476699c671 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 3 Aug 2021 14:19:38 +0100
Subject: [PATCH] python: Implement .cleanup() method
(cherry picked from commit f2fe99e4b0f54467ab8028eaf2d039cf918b2961)
---
plugins/python/nbdkit-python-plugin.pod | 20 +++++++++++++++++---
plugins/python/plugin.c | 19 +++++++++++++++++++
2 files changed, 36 insertions(+), 3 deletions(-)
diff --git a/plugins/python/nbdkit-python-plugin.pod b/plugins/python/nbdkit-python-plugin.pod
index 6f5f2c00..a92a557f 100644
--- a/plugins/python/nbdkit-python-plugin.pod
+++ b/plugins/python/nbdkit-python-plugin.pod
@@ -257,6 +257,12 @@ There are no arguments or return value.
There are no arguments or return value.
+=item C<cleanup>
+
+(Optional, nbdkit E<ge> 1.28)
+
+There are no arguments or return value.
+
=item C<list_exports>
(Optional)
@@ -498,10 +504,18 @@ optionally using C<nbdkit.set_error> first.
=over 4
-=item Missing: C<load> and C<unload>
+=item Missing: C<load>
-These are not needed because you can just use ordinary Python
-constructs.
+This is not needed since you can use regular Python mechanisms like
+top level statements to run code when the module is loaded.
+
+=item Missing: C<unload>
+
+This is missing, but in nbdkit E<ge> 1.28 you can put code in the
+C<cleanup()> function to have it run when nbdkit exits. In earlier
+versions of nbdkit, using a Python
+L<atexit|https://docs.python.org/3/library/atexit.html> handler is
+recommended.
=item Missing:
C<name>,
diff --git a/plugins/python/plugin.c b/plugins/python/plugin.c
index 64430a1a..f85512b4 100644
--- a/plugins/python/plugin.c
+++ b/plugins/python/plugin.c
@@ -298,6 +298,24 @@ py_after_fork (void)
return 0;
}
+static void
+py_cleanup (void)
+{
+ ACQUIRE_PYTHON_GIL_FOR_CURRENT_SCOPE;
+ PyObject *fn;
+ PyObject *r;
+
+ if (callback_defined ("cleanup", &fn)) {
+ PyErr_Clear ();
+
+ r = PyObject_CallObject (fn, NULL);
+ Py_DECREF (fn);
+ if (check_python_failure ("cleanup") == -1)
+ return;
+ Py_DECREF (r);
+ }
+}
+
static int
py_list_exports (int readonly, int is_tls, struct nbdkit_exports *exports)
{
@@ -1039,6 +1057,7 @@ static struct nbdkit_plugin plugin = {
.thread_model = py_thread_model,
.get_ready = py_get_ready,
.after_fork = py_after_fork,
+ .cleanup = py_cleanup,
.list_exports = py_list_exports,
.default_export = py_default_export,
--
2.31.1

View File

@ -0,0 +1,150 @@
From e9abe97b40fef6f9bd9028a2520f45203bba0749 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 5 Aug 2021 18:18:34 +0100
Subject: [PATCH] cow: General revision and updates to the manual
(cherry picked from commit ba5517b81307c228577cf3c54a651d044ac91a25)
---
filters/cow/nbdkit-cow-filter.pod | 74 ++++++++++++++++---------------
1 file changed, 39 insertions(+), 35 deletions(-)
diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod
index 01261429..7f861140 100644
--- a/filters/cow/nbdkit-cow-filter.pod
+++ b/filters/cow/nbdkit-cow-filter.pod
@@ -5,33 +5,23 @@ nbdkit-cow-filter - nbdkit copy-on-write (COW) filter
=head1 SYNOPSIS
nbdkit --filter=cow plugin [plugin-args...]
+ [cow-on-cache=false|true]
+ [cow-on-read=false|true|/PATH]
=head1 DESCRIPTION
C<nbdkit-cow-filter> is a filter that makes a temporary writable copy
-on top of a read-only plugin. It can be used to enable writes for
-plugins which only implement read-only access. Note that:
+on top of a plugin. It can also be used to enable writes for plugins
+which are read-only.
-=over 4
+The underlying plugin is opened read-only. This filter does not pass
+any writes or write-like operations (like trim and zero) through to
+the underlying plugin.
-=item *
-
-B<Anything written is thrown away as soon as nbdkit exits.>
-
-=item *
-
-All connections to the nbdkit instance see the same view of the disk.
-
-This is different from L<nbd-server(1)> where each connection sees its
-own copy-on-write overlay and simply disconnecting the client throws
-that away. It also allows us to create diffs, see below.
-
-=item *
-
-The plugin is opened read-only (as if the I<-r> flag was passed), but
-you should B<not> pass the I<-r> flag to nbdkit.
-
-=back
+B<Note that anything written is thrown away as soon as nbdkit exits.>
+If you want to save changes, either copy out the whole disk using a
+tool like L<nbdcopy(1)>, or use the method described in L</NOTES>
+below to create a diff.
Limitations of the filter include:
@@ -52,26 +42,26 @@ serve the same data to each client.
=over 4
-=item B<cow-on-cache=true>
-
-When the client issues a cache (prefetch) request, preemptively save
-the data from the plugin into the overlay.
-
=item B<cow-on-cache=false>
Do not save data from cache (prefetch) requests in the overlay. This
leaves the overlay as small as possible. This is the default.
-=item B<cow-on-read=true>
+=item B<cow-on-cache=true>
-When the client issues a read request, copy the data into the overlay
-so that the same data can be served more quickly later.
+When the client issues a cache (prefetch) request, preemptively save
+the data from the plugin into the overlay.
=item B<cow-on-read=false>
Do not save data from read requests in the overlay. This leaves the
overlay as small as possible. This is the default.
+=item B<cow-on-read=true>
+
+When the client issues a read request, copy the data into the overlay
+so that the same data can be served more quickly later.
+
=item B<cow-on-read=/PATH>
When F</PATH> (which must be an absolute path) exists, this behaves
@@ -83,18 +73,23 @@ behaviour while nbdkit is running.
=head1 EXAMPLES
+=head2 nbdkit --filter=cow file disk.img
+
Serve the file F<disk.img>, allowing writes, but do not save any
-changes into the file:
+changes into the file.
- nbdkit --filter=cow file disk.img
+=head2 nbdkit --filter=cow --filter=xz file disk.xz cow-on-read=true
L<nbdkit-xz-filter(1)> only supports read access, but you can provide
-temporary write access by doing (although this does B<not> save
-changes to the file):
+temporary write access by using the command above. Because xz
+decompression is slow, using C<cow-on-read=true> causes reads to be
+cached as well as writes, improving performance at the expense of
+using more temporary space. Note that writes are thrown away when
+nbdkit exits and do not get saved into the file.
- nbdkit --filter=cow --filter=xz file disk.xz
+=head1 NOTES
-=head1 CREATING A DIFF WITH QEMU-IMG
+=head2 Creating a diff with qemu-img
Although nbdkit-cow-filter itself cannot save the differences, it is
possible to do this using an obscure feature of L<qemu-img(1)>.
@@ -118,6 +113,14 @@ F<diff.qcow2> now contains the differences between the base
(F<disk.img>) and the changes stored in nbdkit-cow-filter. C<nbdkit>
can now be killed.
+=head2 Compared to nbd-server -c option
+
+All connections to the nbdkit instance see the same view of the disk.
+This is different from L<nbd-server(1)> I<-c> option where each
+connection sees its own copy-on-write overlay and simply disconnecting
+the client throws that away. It also allows us to create diffs as
+above.
+
=head1 ENVIRONMENT VARIABLES
=over 4
@@ -154,6 +157,7 @@ L<nbdkit-cache-filter(1)>,
L<nbdkit-cacheextents-filter(1)>,
L<nbdkit-xz-filter(1)>,
L<nbdkit-filter(3)>,
+L<nbdcopy(1)>,
L<qemu-img(1)>.
=head1 AUTHORS
--
2.31.1

View File

@ -0,0 +1,35 @@
From c8c1e74a8c1c112b83646ac09fe7f9bde097a52a Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 5 Aug 2021 18:20:37 +0100
Subject: [PATCH] cache: Move plugin-args in synopsis earlier
Makes this page consistent with nbdkit-cow-filter.
(cherry picked from commit f1ddcef468907b0321041b1c4e0a430be46920be)
---
filters/cache/nbdkit-cache-filter.pod | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod
index df9c1f99..d85fef09 100644
--- a/filters/cache/nbdkit-cache-filter.pod
+++ b/filters/cache/nbdkit-cache-filter.pod
@@ -4,13 +4,13 @@ nbdkit-cache-filter - nbdkit caching filter
=head1 SYNOPSIS
- nbdkit --filter=cache plugin [cache=writeback|writethrough|unsafe]
+ nbdkit --filter=cache plugin [plugin-args...]
+ [cache=writeback|writethrough|unsafe]
[cache-min-block-size=SIZE]
[cache-max-size=SIZE]
[cache-high-threshold=N]
[cache-low-threshold=N]
[cache-on-read=true|false|/PATH]
- [plugin-args...]
=head1 DESCRIPTION
--
2.31.1

View File

@ -0,0 +1,53 @@
From 0eae7ebf6f714fb339f4a476b65e070b528824ec Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sun, 8 Aug 2021 16:32:38 +0100
Subject: [PATCH] data: Improve the example with a diagram
And other improvements to readability.
(cherry picked from commit 4e3a9bda2b7a3d141234e26250c69baa6ed5194d)
---
plugins/data/nbdkit-data-plugin.pod | 24 +++++++++++++++---------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git a/plugins/data/nbdkit-data-plugin.pod b/plugins/data/nbdkit-data-plugin.pod
index 32a450ab..b69435e9 100644
--- a/plugins/data/nbdkit-data-plugin.pod
+++ b/plugins/data/nbdkit-data-plugin.pod
@@ -172,18 +172,24 @@ compact format. It is a string containing a list of bytes which are
written into the disk image sequentially. You can move the virtual
offset where bytes are written using C<@offset>.
-For example:
-
nbdkit data '0 1 2 3 @0x1fe 0x55 0xaa'
-creates a 0x200 = 512 byte (1 sector) image containing the four bytes
-C<0 1 2 3> at the start, and the two bytes C<0x55 0xaa> at the end of
-the sector, with the remaining 506 bytes in the middle being all
-zeroes. In this example the size (512 bytes) is implied by the data.
-But you could additionally use the C<size> parameter to either
-truncate or extend (with zeroes) the disk image.
+creates:
-Whitespace between fields in the string is ignored.
+ total size 0x200 = 512 bytes (1 sector)
+┌──────┬──────┬──────┬──────┬───────── ── ── ───┬──────┬──────┐
+│ 0 │ 1 │ 2 │ 3 │ 0 0 ... 0 │ 0x55 │ 0xaa │
+└──────┴──────┴──────┴──────┴───────── ── ── ───┴──────┴──────┘
+ ↑
+ offset 0x1fe
+
+In this example the size is implied by the data. But you could also
+use the C<size> parameter to either truncate or extend (with zeroes)
+the disk image. Another way to write the same disk would be this,
+where we align the offset to the end of the sector and move back 2
+bytes to write the signature:
+
+ nbdkit data '0 1 2 3 @^0x200 @-2 le16:0xaa55'
Fields in the string can be:
--
2.31.1

View File

@ -0,0 +1,45 @@
From a22248e3075e782d28542f8f6acd046c9dfa8998 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 9 Aug 2021 14:09:31 +0100
Subject: [PATCH] cow: Add some more debugging especially for blk_read_multiple
and cow-on-read
Only activated when we use -D cow.verbose=1
(cherry picked from commit 2da1ae0ca966af955d8fcf3feffffc80d07142fd)
---
filters/cow/blk.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 4f84e092..c22d5886 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -254,8 +254,10 @@ blk_read_multiple (nbdkit_next *next,
if (cow_debug_verbose)
nbdkit_debug ("cow: blk_read_multiple block %" PRIu64
- " (offset %" PRIu64 ") is %s",
- blknum, (uint64_t) offset, state_to_string (state));
+ " (offset %" PRIu64 ") run of length %" PRIu64
+ " is %s",
+ blknum, (uint64_t) offset, runblocks,
+ state_to_string (state));
if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
unsigned n, tail = 0;
@@ -281,6 +283,11 @@ blk_read_multiple (nbdkit_next *next,
* set them as allocated.
*/
if (cow_on_read) {
+ if (cow_debug_verbose)
+ nbdkit_debug ("cow: cow-on-read saving %" PRIu64 " blocks "
+ "at offset %" PRIu64 " into the cache",
+ runblocks, offset);
+
if (full_pwrite (fd, block, BLKSIZE * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
--
2.31.1

View File

@ -0,0 +1,142 @@
From be7252bada79ee542356dffaf5f3c568a5c7fec3 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 10 Aug 2021 08:39:15 +0100
Subject: [PATCH] delay: Fix delay-close
See comments in the code for how this has been fixed.
This only delays clients which use NBD_CMD_DISC (libnbd
nbd_shutdown(3)). Clients which drop the connection obviously cannot
be delayed. For example:
$ nbdkit --filter=delay null delay-close=3 \
--run 'time nbdsh -u $uri -c "h.shutdown()"
time nbdsh -u $uri -c "pass"'
real 0m3.061s # Client used shutdown, was delayed
user 0m0.028s
sys 0m0.030s
real 0m0.058s # Client disconnected, was not delayed
user 0m0.029s
sys 0m0.027s
Reported-by: Ming Xie
Fixes: commit de8dcd3a34a38b088a0f9a6f8ca754702ad1f598
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1991652
(cherry picked from commit 0cafebdb67d0d557ba1be8ea306b8acc5d9b2203)
---
filters/delay/delay.c | 42 +++++++++++++++++++--------
filters/delay/nbdkit-delay-filter.pod | 14 +++++++--
2 files changed, 41 insertions(+), 15 deletions(-)
diff --git a/filters/delay/delay.c b/filters/delay/delay.c
index df3729a7..9252b855 100644
--- a/filters/delay/delay.c
+++ b/filters/delay/delay.c
@@ -39,6 +39,7 @@
#include <stdbool.h>
#include <errno.h>
#include <limits.h>
+#include <time.h>
#include <nbdkit-filter.h>
@@ -134,12 +135,6 @@ open_delay (int *err)
return delay (delay_open_ms, err);
}
-static int
-close_delay (int *err)
-{
- return delay (delay_close_ms, err);
-}
-
/* Called for each key=value passed on the command line. */
static int
delay_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
@@ -253,13 +248,36 @@ delay_open (nbdkit_next_open *next, nbdkit_context *nxdata,
return NBDKIT_HANDLE_NOT_NEEDED;
}
-/* Close connection. */
-static void
-delay_close (void *handle)
+/* Close connection.
+ *
+ * We cannot call nbdkit_nanosleep here because the socket may have
+ * been closed and that function will abort and return immediately.
+ * However we want to force a sleep (even if the server is shutting
+ * down) so use regular nanosleep instead.
+ *
+ * We cannot use the .close callback because that happens after the
+ * socket has closed, thus not delaying the client. By using
+ * .finalize we can delay well-behaved clients (those that use
+ * NBD_CMD_DISC). We cannot delay clients that drop the connection.
+ */
+static int
+delay_finalize (nbdkit_next *next, void *handle)
{
- int err;
+ const unsigned ms = delay_close_ms;
- close_delay (&err);
+ if (ms > 0) {
+ struct timespec ts;
+
+ ts.tv_sec = ms / 1000;
+ ts.tv_nsec = (ms % 1000) * 1000000;
+ /* If nanosleep fails we don't really want to interrupt the chain
+ * of finalize calls through the other filters, so ignore any
+ * error here.
+ */
+ nanosleep (&ts, NULL);
+ }
+
+ return next->finalize (next);
}
/* Read data. */
@@ -340,7 +358,7 @@ static struct nbdkit_filter filter = {
.config_help = delay_config_help,
.can_fast_zero = delay_can_fast_zero,
.open = delay_open,
- .close = delay_close,
+ .finalize = delay_finalize,
.pread = delay_pread,
.pwrite = delay_pwrite,
.zero = delay_zero,
diff --git a/filters/delay/nbdkit-delay-filter.pod b/filters/delay/nbdkit-delay-filter.pod
index 11ae544b..76614736 100644
--- a/filters/delay/nbdkit-delay-filter.pod
+++ b/filters/delay/nbdkit-delay-filter.pod
@@ -117,15 +117,23 @@ the plugin.
=item B<delay-open=>NNB<ms>
+(nbdkit E<ge> 1.28)
+
+Delay open (client connection) by C<SECS> seconds or C<NN>
+milliseconds.
+
=item B<delay-close=>SECS
=item B<delay-close=>NNB<ms>
(nbdkit E<ge> 1.28)
-Delay open and close operations by C<SECS> seconds or C<NN>
-milliseconds. Open corresponds to client connection. Close may not
-be visible to clients if they abruptly disconnect.
+Delay close (client disconnection) by C<SECS> seconds or C<NN>
+milliseconds. This can also cause server shutdown to be delayed if
+clients are connected at the time. This only affects clients that
+gracefully disconnect (using C<NBD_CMD_DISC> / libnbd function
+L<nbd_shutdown(3)>). Clients that abruptly disconnect from the server
+cannot be delayed.
=back
--
2.31.1

View File

@ -0,0 +1,155 @@
From 838ec052abe63056434c08ea80f4609e697dad0f Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 10 Aug 2021 09:11:43 +0100
Subject: [PATCH] delay: Test delay-open and delay-close
(cherry picked from commit 3caabaf87ec744b863b50b5bf77a9c1b93a7c3e0)
---
tests/Makefile.am | 12 +++++++--
tests/test-delay-close.sh | 54 +++++++++++++++++++++++++++++++++++++++
tests/test-delay-open.sh | 49 +++++++++++++++++++++++++++++++++++
3 files changed, 113 insertions(+), 2 deletions(-)
create mode 100755 tests/test-delay-close.sh
create mode 100755 tests/test-delay-open.sh
diff --git a/tests/Makefile.am b/tests/Makefile.am
index edc8d66d..e61c5829 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1425,8 +1425,16 @@ EXTRA_DIST += \
$(NULL)
# delay filter tests.
-TESTS += test-delay-shutdown.sh
-EXTRA_DIST += test-delay-shutdown.sh
+TESTS += \
+ test-delay-close.sh \
+ test-delay-open.sh \
+ test-delay-shutdown.sh \
+ $(NULL)
+EXTRA_DIST += \
+ test-delay-close.sh \
+ test-delay-open.sh \
+ test-delay-shutdown.sh \
+ $(NULL)
LIBNBD_TESTS += test-delay
test_delay_SOURCES = test-delay.c
diff --git a/tests/test-delay-close.sh b/tests/test-delay-close.sh
new file mode 100755
index 00000000..1de305f5
--- /dev/null
+++ b/tests/test-delay-close.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_run
+requires_plugin null
+requires_filter delay
+requires nbdsh --version
+
+# Test delay-close with a well-behaved client.
+
+nbdkit -U - null --filter=delay delay-close=3 \
+ --run '
+start_t=$SECONDS
+nbdsh -u "$uri" -c "h.shutdown()"
+end_t=$SECONDS
+
+if [ $((end_t - start_t)) -lt 3 ]; then
+ echo "$0: delay filter failed: delay-close=3 caused delay < 3 seconds"
+ exit 1
+fi
+'
diff --git a/tests/test-delay-open.sh b/tests/test-delay-open.sh
new file mode 100755
index 00000000..2a74e44c
--- /dev/null
+++ b/tests/test-delay-open.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_run
+requires_plugin null
+requires_filter delay
+requires nbdinfo --version
+
+start_t=$SECONDS
+nbdkit -U - null --filter=delay delay-open=3 --run 'nbdinfo "$uri"'
+end_t=$SECONDS
+
+if [ $((end_t - start_t)) -lt 3 ]; then
+ echo "$0: delay filter failed: delay-open=3 caused delay < 3 seconds"
+ exit 1
+fi
--
2.31.1

View File

@ -0,0 +1,74 @@
From 2104686eb708bf87070c21e7af0e70e0317306b6 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 5 Jul 2021 21:36:41 +0100
Subject: [PATCH] vddk: Implement can_flush and can_fua
VDDK < 6.0 doesn't support flush. Previously we advertised flush and
FUA but ignored them if VDDK didn't support it. Instead, correctly
set these flags in the NBD protocol according to what VDDK supports.
(cherry picked from commit 04b05274414a8cf4615eb2d6f46d5658814509c1)
---
plugins/vddk/vddk.c | 28 ++++++++++++++++++++--------
1 file changed, 20 insertions(+), 8 deletions(-)
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 76faa768..b5bce9a0 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -772,12 +772,28 @@ vddk_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
return -1;
}
- if (fua && vddk_flush (handle, 0) == -1)
- return -1;
+ if (fua) {
+ if (vddk_flush (handle, 0) == -1)
+ return -1;
+ }
return 0;
}
+static int
+vddk_can_fua (void *handle)
+{
+ /* The Flush call was not available in VDDK < 6.0. */
+ return VixDiskLib_Flush != NULL ? NBDKIT_FUA_NATIVE : NBDKIT_FUA_NONE;
+}
+
+static int
+vddk_can_flush (void *handle)
+{
+ /* The Flush call was not available in VDDK < 6.0. */
+ return VixDiskLib_Flush != NULL;
+}
+
/* Flush data to the file. */
static int
vddk_flush (void *handle, uint32_t flags)
@@ -785,12 +801,6 @@ vddk_flush (void *handle, uint32_t flags)
struct vddk_handle *h = handle;
VixError err;
- /* The Flush call was not available in VDDK < 6.0 so this is simply
- * ignored on earlier versions.
- */
- if (VixDiskLib_Flush == NULL)
- return 0;
-
DEBUG_CALL ("VixDiskLib_Flush", "handle");
err = VixDiskLib_Flush (h->handle);
if (err != VIX_OK) {
@@ -985,6 +995,8 @@ static struct nbdkit_plugin plugin = {
.get_size = vddk_get_size,
.pread = vddk_pread,
.pwrite = vddk_pwrite,
+ .can_fua = vddk_can_fua,
+ .can_flush = vddk_can_flush,
.flush = vddk_flush,
.can_extents = vddk_can_extents,
.extents = vddk_extents,
--
2.31.1

View File

@ -0,0 +1,319 @@
From 51713e7702d389fd55d5721c4773fca40e3e89f6 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Wed, 18 Aug 2021 14:26:30 +0100
Subject: [PATCH] vddk: Replace DEBUG_CALL with bracketed VDDK_CALL_START/END
macros
This is neutral refactoring, but allows us in the next commit to
collect statistics about the amount of time spent in these calls.
(cherry picked from commit 1335ebfb5637bf5a44403d0b152da7272fdd3e54)
---
plugins/vddk/vddk.c | 175 +++++++++++++++++++++++++-------------------
1 file changed, 99 insertions(+), 76 deletions(-)
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index b5bce9a0..888009ab 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -104,19 +104,23 @@ static bool is_remote;
VixDiskLib_FreeErrorText (vddk_err_msg); \
} while (0)
-#define DEBUG_CALL(fn, fs, ...) \
- nbdkit_debug ("VDDK call: %s (" fs ")", fn, ##__VA_ARGS__)
-#define DEBUG_CALL_DATAPATH(fn, fs, ...) \
- if (vddk_debug_datapath) \
- nbdkit_debug ("VDDK call: %s (" fs ")", fn, ##__VA_ARGS__)
+#define VDDK_CALL_START(fn, fs, ...) \
+ nbdkit_debug ("VDDK call: %s (" fs ")", #fn, ##__VA_ARGS__); \
+ do
+#define VDDK_CALL_START_DATAPATH(fn, fs, ...) \
+ if (vddk_debug_datapath) \
+ nbdkit_debug ("VDDK call: %s (" fs ")", #fn, ##__VA_ARGS__); \
+ do
+#define VDDK_CALL_END(fn) while (0)
/* Unload the plugin. */
static void
vddk_unload (void)
{
if (init_called) {
- DEBUG_CALL ("VixDiskLib_Exit", "");
- VixDiskLib_Exit ();
+ VDDK_CALL_START (VixDiskLib_Exit, "") {
+ VixDiskLib_Exit ();
+ } VDDK_CALL_END (VixDiskLib_Exit);
}
if (dl)
dlclose (dl);
@@ -449,15 +453,16 @@ vddk_after_fork (void)
VixError err;
/* Initialize VDDK library. */
- DEBUG_CALL ("VixDiskLib_InitEx",
- "%d, %d, &debug_fn, &error_fn, &error_fn, %s, %s",
- VDDK_MAJOR, VDDK_MINOR,
- libdir, config ? : "NULL");
- err = VixDiskLib_InitEx (VDDK_MAJOR, VDDK_MINOR,
- &debug_function, /* log function */
- &error_function, /* warn function */
- &error_function, /* panic function */
- libdir, config);
+ VDDK_CALL_START (VixDiskLib_InitEx,
+ "%d, %d, &debug_fn, &error_fn, &error_fn, %s, %s",
+ VDDK_MAJOR, VDDK_MINOR,
+ libdir, config ? : "NULL") {
+ err = VixDiskLib_InitEx (VDDK_MAJOR, VDDK_MINOR,
+ &debug_function, /* log function */
+ &error_function, /* warn function */
+ &error_function, /* panic function */
+ libdir, config);
+ } VDDK_CALL_END (VixDiskLib_InitEx);
if (err != VIX_OK) {
VDDK_ERROR (err, "VixDiskLib_InitEx");
exit (EXIT_FAILURE);
@@ -519,8 +524,9 @@ allocate_connect_params (void)
VixDiskLibConnectParams *ret;
if (VixDiskLib_AllocateConnectParams != NULL) {
- DEBUG_CALL ("VixDiskLib_AllocateConnectParams", "");
- ret = VixDiskLib_AllocateConnectParams ();
+ VDDK_CALL_START (VixDiskLib_AllocateConnectParams, "") {
+ ret = VixDiskLib_AllocateConnectParams ();
+ } VDDK_CALL_END (VixDiskLib_AllocateConnectParams);
}
else
ret = calloc (1, sizeof (VixDiskLibConnectParams));
@@ -535,8 +541,9 @@ free_connect_params (VixDiskLibConnectParams *params)
* originally called. Otherwise use free.
*/
if (VixDiskLib_AllocateConnectParams != NULL) {
- DEBUG_CALL ("VixDiskLib_FreeConnectParams", "params");
- VixDiskLib_FreeConnectParams (params);
+ VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params") {
+ VixDiskLib_FreeConnectParams (params);
+ } VDDK_CALL_END (VixDiskLib_FreeConnectParams);
}
else
free (params);
@@ -589,16 +596,17 @@ vddk_open (int readonly)
* either ESXi or vCenter servers.
*/
- DEBUG_CALL ("VixDiskLib_ConnectEx",
- "h->params, %d, %s, %s, &connection",
- readonly,
- snapshot_moref ? : "NULL",
- transport_modes ? : "NULL");
- err = VixDiskLib_ConnectEx (h->params,
- readonly,
- snapshot_moref,
- transport_modes,
- &h->connection);
+ VDDK_CALL_START (VixDiskLib_ConnectEx,
+ "h->params, %d, %s, %s, &connection",
+ readonly,
+ snapshot_moref ? : "NULL",
+ transport_modes ? : "NULL") {
+ err = VixDiskLib_ConnectEx (h->params,
+ readonly,
+ snapshot_moref,
+ transport_modes,
+ &h->connection);
+ } VDDK_CALL_END (VixDiskLib_ConnectEx);
if (err != VIX_OK) {
VDDK_ERROR (err, "VixDiskLib_ConnectEx");
goto err1;
@@ -618,9 +626,10 @@ vddk_open (int readonly)
case NONE: break;
}
- DEBUG_CALL ("VixDiskLib_Open",
- "connection, %s, %d, &handle", filename, flags);
- err = VixDiskLib_Open (h->connection, filename, flags, &h->handle);
+ VDDK_CALL_START (VixDiskLib_Open,
+ "connection, %s, %d, &handle", filename, flags) {
+ err = VixDiskLib_Open (h->connection, filename, flags, &h->handle);
+ } VDDK_CALL_END (VixDiskLib_Open);
if (err != VIX_OK) {
VDDK_ERROR (err, "VixDiskLib_Open: %s", filename);
goto err2;
@@ -632,8 +641,9 @@ vddk_open (int readonly)
return h;
err2:
- DEBUG_CALL ("VixDiskLib_Disconnect", "connection");
- VixDiskLib_Disconnect (h->connection);
+ VDDK_CALL_START (VixDiskLib_Disconnect, "connection") {
+ VixDiskLib_Disconnect (h->connection);
+ } VDDK_CALL_END (VixDiskLib_Disconnect);
err1:
free_connect_params (h->params);
err0:
@@ -648,10 +658,13 @@ vddk_close (void *handle)
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&open_close_lock);
struct vddk_handle *h = handle;
- DEBUG_CALL ("VixDiskLib_Close", "handle");
- VixDiskLib_Close (h->handle);
- DEBUG_CALL ("VixDiskLib_Disconnect", "connection");
- VixDiskLib_Disconnect (h->connection);
+ VDDK_CALL_START (VixDiskLib_Close, "handle") {
+ VixDiskLib_Close (h->handle);
+ } VDDK_CALL_END (VixDiskLib_Close);
+ VDDK_CALL_START (VixDiskLib_Disconnect, "connection") {
+ VixDiskLib_Disconnect (h->connection);
+ } VDDK_CALL_END (VixDiskLib_Disconnect);
+
free_connect_params (h->params);
free (h);
}
@@ -665,8 +678,9 @@ vddk_get_size (void *handle)
VixError err;
uint64_t size;
- DEBUG_CALL ("VixDiskLib_GetInfo", "handle, &info");
- err = VixDiskLib_GetInfo (h->handle, &info);
+ VDDK_CALL_START (VixDiskLib_GetInfo, "handle, &info") {
+ err = VixDiskLib_GetInfo (h->handle, &info);
+ } VDDK_CALL_END (VixDiskLib_GetInfo);
if (err != VIX_OK) {
VDDK_ERROR (err, "VixDiskLib_GetInfo");
return -1;
@@ -694,8 +708,9 @@ vddk_get_size (void *handle)
info->uuid ? : "NULL");
}
- DEBUG_CALL ("VixDiskLib_FreeInfo", "info");
- VixDiskLib_FreeInfo (info);
+ VDDK_CALL_START (VixDiskLib_FreeInfo, "info") {
+ VixDiskLib_FreeInfo (info);
+ } VDDK_CALL_END (VixDiskLib_FreeInfo);
return (int64_t) size;
}
@@ -723,11 +738,12 @@ vddk_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
offset /= VIXDISKLIB_SECTOR_SIZE;
count /= VIXDISKLIB_SECTOR_SIZE;
- DEBUG_CALL_DATAPATH ("VixDiskLib_Read",
- "handle, %" PRIu64 " sectors, "
- "%" PRIu32 " sectors, buffer",
- offset, count);
- err = VixDiskLib_Read (h->handle, offset, count, buf);
+ VDDK_CALL_START_DATAPATH (VixDiskLib_Read,
+ "handle, %" PRIu64 " sectors, "
+ "%" PRIu32 " sectors, buffer",
+ offset, count) {
+ err = VixDiskLib_Read (h->handle, offset, count, buf);
+ } VDDK_CALL_END (VixDiskLib_Read);
if (err != VIX_OK) {
VDDK_ERROR (err, "VixDiskLib_Read");
return -1;
@@ -762,11 +778,12 @@ vddk_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
offset /= VIXDISKLIB_SECTOR_SIZE;
count /= VIXDISKLIB_SECTOR_SIZE;
- DEBUG_CALL_DATAPATH ("VixDiskLib_Write",
- "handle, %" PRIu64 " sectors, "
- "%" PRIu32 " sectors, buffer",
- offset, count);
- err = VixDiskLib_Write (h->handle, offset, count, buf);
+ VDDK_CALL_START_DATAPATH (VixDiskLib_Write,
+ "handle, %" PRIu64 " sectors, "
+ "%" PRIu32 " sectors, buffer",
+ offset, count) {
+ err = VixDiskLib_Write (h->handle, offset, count, buf);
+ } VDDK_CALL_END (VixDiskLib_Write);
if (err != VIX_OK) {
VDDK_ERROR (err, "VixDiskLib_Write");
return -1;
@@ -801,8 +818,9 @@ vddk_flush (void *handle, uint32_t flags)
struct vddk_handle *h = handle;
VixError err;
- DEBUG_CALL ("VixDiskLib_Flush", "handle");
- err = VixDiskLib_Flush (h->handle);
+ VDDK_CALL_START (VixDiskLib_Flush, "handle") {
+ err = VixDiskLib_Flush (h->handle);
+ } VDDK_CALL_END (VixDiskLib_Flush);
if (err != VIX_OK) {
VDDK_ERROR (err, "VixDiskLib_Flush");
return -1;
@@ -836,17 +854,19 @@ vddk_can_extents (void *handle)
* the best thing we can do here is to try the call and if it's
* non-functional return false.
*/
- DEBUG_CALL ("VixDiskLib_QueryAllocatedBlocks",
- "handle, 0, %d sectors, %d sectors",
- VIXDISKLIB_MIN_CHUNK_SIZE, VIXDISKLIB_MIN_CHUNK_SIZE);
- err = VixDiskLib_QueryAllocatedBlocks (h->handle,
- 0, VIXDISKLIB_MIN_CHUNK_SIZE,
- VIXDISKLIB_MIN_CHUNK_SIZE,
- &block_list);
+ VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
+ "handle, 0, %d sectors, %d sectors",
+ VIXDISKLIB_MIN_CHUNK_SIZE, VIXDISKLIB_MIN_CHUNK_SIZE) {
+ err = VixDiskLib_QueryAllocatedBlocks (h->handle,
+ 0, VIXDISKLIB_MIN_CHUNK_SIZE,
+ VIXDISKLIB_MIN_CHUNK_SIZE,
+ &block_list);
+ } VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks);
error_suppression = 0;
if (err == VIX_OK) {
- DEBUG_CALL ("VixDiskLib_FreeBlockList", "block_list");
- VixDiskLib_FreeBlockList (block_list);
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list") {
+ VixDiskLib_FreeBlockList (block_list);
+ } VDDK_CALL_END (VixDiskLib_FreeBlockList);
}
if (err != VIX_OK) {
char *errmsg = VixDiskLib_GetErrorText (err, NULL);
@@ -923,14 +943,15 @@ vddk_extents (void *handle, uint32_t count, uint64_t offset, uint32_t flags,
nr_chunks = MIN (nr_chunks, VIXDISKLIB_MAX_CHUNK_NUMBER);
nr_sectors = nr_chunks * VIXDISKLIB_MIN_CHUNK_SIZE;
- DEBUG_CALL ("VixDiskLib_QueryAllocatedBlocks",
- "handle, %" PRIu64 " sectors, %" PRIu64 " sectors, "
- "%d sectors",
- start_sector, nr_sectors, VIXDISKLIB_MIN_CHUNK_SIZE);
- err = VixDiskLib_QueryAllocatedBlocks (h->handle,
- start_sector, nr_sectors,
- VIXDISKLIB_MIN_CHUNK_SIZE,
- &block_list);
+ VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
+ "handle, %" PRIu64 " sectors, %" PRIu64 " sectors, "
+ "%d sectors",
+ start_sector, nr_sectors, VIXDISKLIB_MIN_CHUNK_SIZE) {
+ err = VixDiskLib_QueryAllocatedBlocks (h->handle,
+ start_sector, nr_sectors,
+ VIXDISKLIB_MIN_CHUNK_SIZE,
+ &block_list);
+ } VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks);
if (err != VIX_OK) {
VDDK_ERROR (err, "VixDiskLib_QueryAllocatedBlocks");
return -1;
@@ -949,13 +970,15 @@ vddk_extents (void *handle, uint32_t count, uint64_t offset, uint32_t flags,
add_extent (extents, &position, blk_offset, true) == -1) ||
(add_extent (extents,
&position, blk_offset + blk_length, false) == -1)) {
- DEBUG_CALL ("VixDiskLib_FreeBlockList", "block_list");
- VixDiskLib_FreeBlockList (block_list);
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list") {
+ VixDiskLib_FreeBlockList (block_list);
+ } VDDK_CALL_END (VixDiskLib_FreeBlockList);
return -1;
}
}
- DEBUG_CALL ("VixDiskLib_FreeBlockList", "block_list");
- VixDiskLib_FreeBlockList (block_list);
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list") {
+ VixDiskLib_FreeBlockList (block_list);
+ } VDDK_CALL_END (VixDiskLib_FreeBlockList);
/* There's an implicit hole after the returned list of blocks, up
* to the end of the QueryAllocatedBlocks request.
--
2.31.1

View File

@ -0,0 +1,103 @@
From bd181ea739ebfafbf7239b5fa89e98becdb8cb72 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 5 Jul 2021 22:03:10 +0100
Subject: [PATCH] tests: Add a better test of real VDDK
The previous test only tested reading and maybe extents, and used an
all-zero disk. I'm fairly convinced the test only worked accidentally
since you must use an absolute path when opening a local file and the
test did not do that.
Add a more comprehensive test that tests writing and flush too.
(cherry picked from commit a6ca4f24593008bb2d8efb177e7f424cff51dfbf)
---
tests/test-vddk-real.sh | 55 ++++++++++++++++++++++++++++-------------
1 file changed, 38 insertions(+), 17 deletions(-)
diff --git a/tests/test-vddk-real.sh b/tests/test-vddk-real.sh
index df486ba1..f848db44 100755
--- a/tests/test-vddk-real.sh
+++ b/tests/test-vddk-real.sh
@@ -37,8 +37,12 @@ set -x
requires test "x$vddkdir" != "x"
requires test -d "$vddkdir"
requires test -f "$vddkdir/lib64/libvixDiskLib.so"
+requires test -r /dev/urandom
+requires cmp --version
+requires dd --version
requires qemu-img --version
requires nbdcopy --version
+requires nbdinfo --version
requires stat --version
# VDDK > 5.1.1 only supports x86_64.
@@ -47,31 +51,48 @@ if [ `uname -m` != "x86_64" ]; then
exit 77
fi
-files="test-vddk-real.vmdk test-vddk-real.out test-vddk-real.log"
-rm -f $files
-cleanup_fn rm -f $files
-
-qemu-img create -f vmdk test-vddk-real.vmdk 100M
-
# Since we are comparing error messages below, let's make sure we're
# not translating errors.
export LANG=C
-fail=0
-nbdkit -f -v -U - \
- --filter=readahead \
- vddk libdir="$vddkdir" test-vddk-real.vmdk \
- --run 'nbdcopy "$uri" test-vddk-real.out' \
- > test-vddk-real.log 2>&1 || fail=1
+pid=test-vddk-real.pid
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+vmdk=$PWD/test-vddk-real.vmdk ;# note must be an absolute path
+raw=test-vddk-real.raw
+raw2=test-vddk-real.raw2
+log=test-vddk-real.log
+files="$pid $sock $vmdk $raw $raw2 $log"
+rm -f $files
+cleanup_fn rm -f $files
+
+qemu-img create -f vmdk $vmdk 10M
+
+# Check first that the VDDK library can be fully loaded. We have to
+# check the log file for missing modules since they may not show up as
+# errors.
+nbdkit -fv -U - vddk libdir="$vddkdir" $vmdk --run 'nbdinfo "$uri"' >$log 2>&1
# Check the log for missing modules
-cat test-vddk-real.log
+cat $log
if grep 'cannot open shared object file' test-vddk-real.log; then
exit 1
fi
-# Check the raw output file has exactly the right size.
-size="$(stat -c '%s' test-vddk-real.out)"
-test "$size" -eq $((100 * 1024 * 1024))
+# Now run nbdkit for the test.
+start_nbdkit -P $pid -U $sock vddk libdir="$vddkdir" $vmdk
+uri="nbd+unix:///?socket=$sock"
-exit $fail
+# VDDK < 6.0 did not support flush, so disable flush test there. Also
+# if nbdinfo doesn't support the --can flush syntax (added in libnbd
+# 1.10) then this is disabled.
+if nbdinfo --can flush "$uri"; then flush="--flush"; else flush=""; fi
+
+# Copy in and out some data. This should exercise read, write,
+# extents and flushing.
+dd if=/dev/urandom of=$raw count=5 bs=$((1024*1024))
+truncate -s 10M $raw
+
+nbdcopy $flush $raw "$uri"
+nbdcopy "$uri" $raw2
+
+cmp $raw $raw2
--
2.31.1

View File

@ -0,0 +1,245 @@
From 45db64d72bf03fece8a7fb994887360954905a3b Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Wed, 18 Aug 2021 14:47:58 +0100
Subject: [PATCH] vddk: Add stats about the amount of time spent in VDDK calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If you use -D vddk.stats=1 then when the plugin is unloaded it will
display the amount of time spent in each VDDK call. The output will
look something like this:
nbdkit: debug: VDDK function stats (-D vddk.stats=1):
nbdkit: debug: µs
nbdkit: debug: VixDiskLib_Exit 1001154
nbdkit: debug: VixDiskLib_InitEx 96008
nbdkit: debug: VixDiskLib_Flush 15722
nbdkit: debug: VixDiskLib_Write 12081
nbdkit: debug: VixDiskLib_Open 6029
nbdkit: debug: VixDiskLib_Read 1364
nbdkit: debug: VixDiskLib_Close 605
nbdkit: debug: VixDiskLib_QueryAllocatedBlocks 191
nbdkit: debug: VixDiskLib_ConnectEx 134
nbdkit: debug: VixDiskLib_Disconnect 76
nbdkit: debug: VixDiskLib_FreeConnectParams 57
nbdkit: debug: VixDiskLib_GetInfo 56
nbdkit: debug: VixDiskLib_GetTransportMode 43
nbdkit: debug: VixDiskLib_FreeInfo 42
nbdkit: debug: VixDiskLib_FreeBlockList 32
nbdkit: debug: VixDiskLib_AllocateConnectParams 28
VDDK APIs which are never called are not printed.
(cherry picked from commit f2dfc7d74ee650bdf2cc930a07b1c5bcb509976c)
---
plugins/vddk/nbdkit-vddk-plugin.pod | 5 ++
plugins/vddk/vddk.c | 107 +++++++++++++++++++++++++---
tests/test-vddk-real.sh | 2 +-
3 files changed, 103 insertions(+), 11 deletions(-)
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
index b783f13a..2a1b17dc 100644
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
@@ -510,6 +510,11 @@ Debug extents returned by C<QueryAllocatedBlocks>.
Suppress debugging of datapath calls (C<Read> and C<Write>).
+=item B<-D vddk.stats=1>
+
+When the plugin exits print some statistics about the amount of time
+spent waiting on each VDDK call.
+
=back
=head1 FILES
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 888009ab..fce96d9a 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -42,6 +42,7 @@
#include <assert.h>
#include <dlfcn.h>
#include <libgen.h>
+#include <sys/time.h>
#include <pthread.h>
@@ -52,6 +53,8 @@
#include "isaligned.h"
#include "minmax.h"
#include "rounding.h"
+#include "tvdiff.h"
+#include "vector.h"
#include "vddk.h"
#include "vddk-structs.h"
@@ -60,6 +63,7 @@
NBDKIT_DLL_PUBLIC int vddk_debug_diskinfo;
NBDKIT_DLL_PUBLIC int vddk_debug_extents;
NBDKIT_DLL_PUBLIC int vddk_debug_datapath = 1;
+NBDKIT_DLL_PUBLIC int vddk_debug_stats;
/* For each VDDK API define a static global variable. These globals
* are initialized when the plugin is loaded (by vddk_get_ready).
@@ -96,22 +100,52 @@ static const char *username; /* user */
static const char *vmx_spec; /* vm */
static bool is_remote;
-#define VDDK_ERROR(err, fs, ...) \
- do { \
- char *vddk_err_msg; \
- vddk_err_msg = VixDiskLib_GetErrorText ((err), NULL); \
- nbdkit_error (fs ": %s", ##__VA_ARGS__, vddk_err_msg); \
- VixDiskLib_FreeErrorText (vddk_err_msg); \
- } while (0)
+/* For each VDDK API define a variable to store the time taken (used
+ * to implement -D vddk.stats=1).
+ */
+static pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
+static void display_stats (void);
+#define STUB(fn,ret,args) static int64_t stats_##fn;
+#define OPTIONAL_STUB(fn,ret,args) static int64_t stats_##fn;
+#include "vddk-stubs.h"
+#undef STUB
+#undef OPTIONAL_STUB
#define VDDK_CALL_START(fn, fs, ...) \
+ do { \
+ struct timeval start_t, end_t; \
+ if (vddk_debug_stats) \
+ gettimeofday (&start_t, NULL); \
nbdkit_debug ("VDDK call: %s (" fs ")", #fn, ##__VA_ARGS__); \
do
#define VDDK_CALL_START_DATAPATH(fn, fs, ...) \
+ do { \
+ struct timeval start_t, end_t; \
+ if (vddk_debug_stats) \
+ gettimeofday (&start_t, NULL); \
if (vddk_debug_datapath) \
nbdkit_debug ("VDDK call: %s (" fs ")", #fn, ##__VA_ARGS__); \
do
-#define VDDK_CALL_END(fn) while (0)
+#define VDDK_CALL_END(fn) \
+ while (0); \
+ if (vddk_debug_stats) { \
+ gettimeofday (&end_t, NULL); \
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&stats_lock); \
+ stats_##fn += tvdiff_usec (&start_t, &end_t); \
+ } \
+ } while (0)
+
+#define VDDK_ERROR(err, fs, ...) \
+ do { \
+ char *vddk_err_msg; \
+ VDDK_CALL_START (VixDiskLib_GetErrorText, "%lu", err) { \
+ vddk_err_msg = VixDiskLib_GetErrorText ((err), NULL); \
+ } VDDK_CALL_END (VixDiskLib_GetErrorText); \
+ nbdkit_error (fs ": %s", ##__VA_ARGS__, vddk_err_msg); \
+ VDDK_CALL_START (VixDiskLib_FreeErrorText, "") { \
+ VixDiskLib_FreeErrorText (vddk_err_msg); \
+ } VDDK_CALL_END (VixDiskLib_FreeErrorText); \
+ } while (0)
/* Unload the plugin. */
static void
@@ -124,11 +158,61 @@ vddk_unload (void)
}
if (dl)
dlclose (dl);
+
+ if (vddk_debug_stats)
+ display_stats ();
+
free (config);
free (libdir);
free (password);
}
+struct vddk_stat {
+ const char *fn;
+ int64_t usecs;
+};
+DEFINE_VECTOR_TYPE(statlist, struct vddk_stat)
+
+static int
+stat_compare (const void *vp1, const void *vp2)
+{
+ const struct vddk_stat *st1 = vp1;
+ const struct vddk_stat *st2 = vp2;
+
+ /* Note: sorts in reverse order. */
+ if (st1->usecs < st2->usecs) return 1;
+ else if (st1->usecs > st2->usecs) return -1;
+ else return 0;
+}
+
+static void
+display_stats (void)
+{
+ statlist stats = empty_vector;
+ struct vddk_stat st;
+ size_t i;
+
+#define ADD_ONE_STAT(fn_, usecs_) \
+ st.fn = fn_; \
+ st.usecs = usecs_; \
+ statlist_append (&stats, st)
+#define STUB(fn,ret,args) ADD_ONE_STAT (#fn, stats_##fn);
+#define OPTIONAL_STUB(fn,ret,args) ADD_ONE_STAT (#fn, stats_##fn);
+#include "vddk-stubs.h"
+#undef STUB
+#undef OPTIONAL_STUB
+#undef ADD_ONE_STAT
+
+ qsort (stats.ptr, stats.size, sizeof stats.ptr[0], stat_compare);
+
+ nbdkit_debug ("VDDK function stats (-D vddk.stats=1):");
+ nbdkit_debug ("%-40s %9s", "", "µs");
+ for (i = 0; i < stats.size; ++i) {
+ if (stats.ptr[i].usecs)
+ nbdkit_debug ("%-40s %9" PRIi64, stats.ptr[i].fn, stats.ptr[i].usecs);
+ }
+}
+
static void
trim (char *str)
{
@@ -557,6 +641,7 @@ vddk_open (int readonly)
struct vddk_handle *h;
VixError err;
uint32_t flags;
+ const char *transport_mode;
h = malloc (sizeof *h);
if (h == NULL) {
@@ -635,8 +720,10 @@ vddk_open (int readonly)
goto err2;
}
- nbdkit_debug ("transport mode: %s",
- VixDiskLib_GetTransportMode (h->handle));
+ VDDK_CALL_START (VixDiskLib_GetTransportMode, "handle") {
+ transport_mode = VixDiskLib_GetTransportMode (h->handle);
+ } VDDK_CALL_END (VixDiskLib_GetTransportMode);
+ nbdkit_debug ("transport mode: %s", transport_mode);
return h;
diff --git a/tests/test-vddk-real.sh b/tests/test-vddk-real.sh
index f848db44..3c8b4262 100755
--- a/tests/test-vddk-real.sh
+++ b/tests/test-vddk-real.sh
@@ -79,7 +79,7 @@ if grep 'cannot open shared object file' test-vddk-real.log; then
fi
# Now run nbdkit for the test.
-start_nbdkit -P $pid -U $sock vddk libdir="$vddkdir" $vmdk
+start_nbdkit -P $pid -U $sock -D vddk.stats=1 vddk libdir="$vddkdir" $vmdk
uri="nbd+unix:///?socket=$sock"
# VDDK < 6.0 did not support flush, so disable flush test there. Also
--
2.31.1

View File

@ -0,0 +1,772 @@
From 0be4847cdec9effd6128da03ea42a4953e5a6343 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 17 Aug 2021 22:03:11 +0100
Subject: [PATCH] cow: Make the block size configurable
Commit c1905b0a28 ("cache, cow: Use a 64K block size by default")
changed the nbdkit-cow-filter block size to 64K, but it was still a
fixed size. In contrast the cache filter allows the block size to be
adjusted.
Allow the block size in this filter to be adjusted up or down with a
new cow-block-size=N parameter.
When using the VDDK plugin, adjusting this setting can make a
difference. The following timings come from a modified virt-v2v which
sets cow-block-size and was used to convert from a VMware server to
-o null (this is also using cow-on-read=true):
cow-block-size=64K: 18m18
cow-block-size=256K: 14m13
cow-block-size=1M: 14m19
cow-block-size=4M: 37m33
As you can see it's not obvious how to choose a good block size, but
at least by allowing adjustment we can tune things.
(cherry picked from commit 7182c47d04d2b68005fceadefc0c14bfaa61a533)
---
filters/cow/blk.c | 35 +++----
filters/cow/blk.h | 5 -
filters/cow/cow.c | 150 +++++++++++++++++-------------
filters/cow/cow.h | 39 ++++++++
filters/cow/nbdkit-cow-filter.pod | 5 +
tests/Makefile.am | 2 +
tests/test-cow-block-size.sh | 72 ++++++++++++++
7 files changed, 221 insertions(+), 87 deletions(-)
create mode 100644 filters/cow/cow.h
create mode 100755 tests/test-cow-block-size.sh
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index c22d5886..f9341dc1 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -99,6 +99,7 @@
#include "pwrite.h"
#include "utils.h"
+#include "cow.h"
#include "blk.h"
/* The temporary overlay. */
@@ -137,7 +138,7 @@ blk_init (void)
size_t len;
char *template;
- bitmap_init (&bm, BLKSIZE, 2 /* bits per block */);
+ bitmap_init (&bm, blksize, 2 /* bits per block */);
tmpdir = getenv ("TMPDIR");
if (!tmpdir)
@@ -199,7 +200,7 @@ blk_set_size (uint64_t new_size)
if (bitmap_resize (&bm, size) == -1)
return -1;
- if (ftruncate (fd, ROUND_UP (size, BLKSIZE)) == -1) {
+ if (ftruncate (fd, ROUND_UP (size, blksize)) == -1) {
nbdkit_error ("ftruncate: %m");
return -1;
}
@@ -228,7 +229,7 @@ blk_read_multiple (nbdkit_next *next,
uint64_t blknum, uint64_t nrblocks,
uint8_t *block, bool cow_on_read, int *err)
{
- off_t offset = blknum * BLKSIZE;
+ off_t offset = blknum * blksize;
enum bm_entry state;
uint64_t b, runblocks;
@@ -262,8 +263,8 @@ blk_read_multiple (nbdkit_next *next,
if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
unsigned n, tail = 0;
- assert (BLKSIZE * runblocks <= UINT_MAX);
- n = BLKSIZE * runblocks;
+ assert (blksize * runblocks <= UINT_MAX);
+ n = blksize * runblocks;
if (offset + n > size) {
tail = offset + n - size;
@@ -288,7 +289,7 @@ blk_read_multiple (nbdkit_next *next,
"at offset %" PRIu64 " into the cache",
runblocks, offset);
- if (full_pwrite (fd, block, BLKSIZE * runblocks, offset) == -1) {
+ if (full_pwrite (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -298,14 +299,14 @@ blk_read_multiple (nbdkit_next *next,
}
}
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
- if (full_pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
+ if (full_pread (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
nbdkit_error ("pread: %m");
return -1;
}
}
else /* state == BLOCK_TRIMMED */ {
- memset (block, 0, BLKSIZE * runblocks);
+ memset (block, 0, blksize * runblocks);
}
/* If all done, return. */
@@ -316,7 +317,7 @@ blk_read_multiple (nbdkit_next *next,
return blk_read_multiple (next,
blknum + runblocks,
nrblocks - runblocks,
- block + BLKSIZE * runblocks,
+ block + blksize * runblocks,
cow_on_read, err);
}
@@ -333,9 +334,9 @@ blk_cache (nbdkit_next *next,
{
/* XXX Could make this lock more fine-grained with some thought. */
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
- off_t offset = blknum * BLKSIZE;
+ off_t offset = blknum * blksize;
enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_ALLOCATED);
- unsigned n = BLKSIZE, tail = 0;
+ unsigned n = blksize, tail = 0;
if (offset + n > size) {
tail = offset + n - size;
@@ -348,7 +349,7 @@ blk_cache (nbdkit_next *next,
if (state == BLOCK_ALLOCATED) {
#if HAVE_POSIX_FADVISE
- int r = posix_fadvise (fd, offset, BLKSIZE, POSIX_FADV_WILLNEED);
+ int r = posix_fadvise (fd, offset, blksize, POSIX_FADV_WILLNEED);
if (r) {
errno = r;
nbdkit_error ("posix_fadvise: %m");
@@ -373,7 +374,7 @@ blk_cache (nbdkit_next *next,
memset (block + n, 0, tail);
if (mode == BLK_CACHE_COW) {
- if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -386,13 +387,13 @@ blk_cache (nbdkit_next *next,
int
blk_write (uint64_t blknum, const uint8_t *block, int *err)
{
- off_t offset = blknum * BLKSIZE;
+ off_t offset = blknum * blksize;
if (cow_debug_verbose)
nbdkit_debug ("cow: blk_write block %" PRIu64 " (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
- if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
nbdkit_error ("pwrite: %m");
return -1;
@@ -407,14 +408,14 @@ blk_write (uint64_t blknum, const uint8_t *block, int *err)
int
blk_trim (uint64_t blknum, int *err)
{
- off_t offset = blknum * BLKSIZE;
+ off_t offset = blknum * blksize;
if (cow_debug_verbose)
nbdkit_debug ("cow: blk_trim block %" PRIu64 " (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
/* XXX As an optimization we could punch a whole in the overlay
- * here. However it's not trivial since BLKSIZE is unrelated to the
+ * here. However it's not trivial since blksize is unrelated to the
* overlay filesystem block size.
*/
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
index b7e6f092..62fb5416 100644
--- a/filters/cow/blk.h
+++ b/filters/cow/blk.h
@@ -33,11 +33,6 @@
#ifndef NBDKIT_BLK_H
#define NBDKIT_BLK_H
-/* Size of a block in the overlay. A 4K block size means that we need
- * 64 MB of memory to store the bitmap for a 1 TB underlying image.
- */
-#define BLKSIZE 65536
-
/* Initialize the overlay and bitmap. */
extern int blk_init (void);
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
index 6efb39f2..1c62c857 100644
--- a/filters/cow/cow.c
+++ b/filters/cow/cow.c
@@ -40,6 +40,7 @@
#include <string.h>
#include <unistd.h>
#include <errno.h>
+#include <limits.h>
#include <pthread.h>
@@ -47,9 +48,11 @@
#include "cleanup.h"
#include "isaligned.h"
+#include "ispowerof2.h"
#include "minmax.h"
#include "rounding.h"
+#include "cow.h"
#include "blk.h"
/* Read-modify-write requests are serialized through this global lock.
@@ -58,6 +61,8 @@
*/
static pthread_mutex_t rmw_lock = PTHREAD_MUTEX_INITIALIZER;
+unsigned blksize = 65536; /* block size */
+
static bool cow_on_cache;
/* Cache on read ("cow-on-read") mode. */
@@ -69,13 +74,6 @@ extern enum cor_mode {
enum cor_mode cor_mode = COR_OFF;
const char *cor_path;
-static void
-cow_load (void)
-{
- if (blk_init () == -1)
- exit (EXIT_FAILURE);
-}
-
static void
cow_unload (void)
{
@@ -86,7 +84,19 @@ static int
cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
const char *key, const char *value)
{
- if (strcmp (key, "cow-on-cache") == 0) {
+ if (strcmp (key, "cow-block-size") == 0) {
+ int64_t r = nbdkit_parse_size (value);
+ if (r == -1)
+ return -1;
+ if (r <= 4096 || r > UINT_MAX || !is_power_of_2 (r)) {
+ nbdkit_error ("cow-block-size is out of range (4096..2G) "
+ "or not a power of 2");
+ return -1;
+ }
+ blksize = r;
+ return 0;
+ }
+ else if (strcmp (key, "cow-on-cache") == 0) {
int r;
r = nbdkit_parse_bool (value);
@@ -114,9 +124,19 @@ cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
}
#define cow_config_help \
+ "cow-block-size=<N> Set COW block size.\n" \
"cow-on-cache=<BOOL> Copy cache (prefetch) requests to the overlay.\n" \
"cow-on-read=<BOOL>|/PATH Copy read requests to the overlay."
+static int
+cow_get_ready (int thread_model)
+{
+ if (blk_init () == -1)
+ return -1;
+
+ return 0;
+}
+
/* Decide if cow-on-read is currently on or off. */
bool
cow_on_read (void)
@@ -249,8 +269,8 @@ cow_pread (nbdkit_next *next,
uint64_t blknum, blkoffs, nrblocks;
int r;
- if (!IS_ALIGNED (count | offset, BLKSIZE)) {
- block = malloc (BLKSIZE);
+ if (!IS_ALIGNED (count | offset, blksize)) {
+ block = malloc (blksize);
if (block == NULL) {
*err = errno;
nbdkit_error ("malloc: %m");
@@ -258,12 +278,12 @@ cow_pread (nbdkit_next *next,
}
}
- blknum = offset / BLKSIZE; /* block number */
- blkoffs = offset % BLKSIZE; /* offset within the block */
+ blknum = offset / blksize; /* block number */
+ blkoffs = offset % blksize; /* offset within the block */
/* Unaligned head */
if (blkoffs) {
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
+ uint64_t n = MIN (blksize - blkoffs, count);
assert (block);
r = blk_read (next, blknum, block, cow_on_read (), err);
@@ -279,15 +299,15 @@ cow_pread (nbdkit_next *next,
}
/* Aligned body */
- nrblocks = count / BLKSIZE;
+ nrblocks = count / blksize;
if (nrblocks > 0) {
r = blk_read_multiple (next, blknum, nrblocks, buf, cow_on_read (), err);
if (r == -1)
return -1;
- buf += nrblocks * BLKSIZE;
- count -= nrblocks * BLKSIZE;
- offset += nrblocks * BLKSIZE;
+ buf += nrblocks * blksize;
+ count -= nrblocks * blksize;
+ offset += nrblocks * blksize;
blknum += nrblocks;
}
@@ -314,8 +334,8 @@ cow_pwrite (nbdkit_next *next,
uint64_t blknum, blkoffs;
int r;
- if (!IS_ALIGNED (count | offset, BLKSIZE)) {
- block = malloc (BLKSIZE);
+ if (!IS_ALIGNED (count | offset, blksize)) {
+ block = malloc (blksize);
if (block == NULL) {
*err = errno;
nbdkit_error ("malloc: %m");
@@ -323,12 +343,12 @@ cow_pwrite (nbdkit_next *next,
}
}
- blknum = offset / BLKSIZE; /* block number */
- blkoffs = offset % BLKSIZE; /* offset within the block */
+ blknum = offset / blksize; /* block number */
+ blkoffs = offset % blksize; /* offset within the block */
/* Unaligned head */
if (blkoffs) {
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
+ uint64_t n = MIN (blksize - blkoffs, count);
/* Do a read-modify-write operation on the current block.
* Hold the rmw_lock over the whole operation.
@@ -350,14 +370,14 @@ cow_pwrite (nbdkit_next *next,
}
/* Aligned body */
- while (count >= BLKSIZE) {
+ while (count >= blksize) {
r = blk_write (blknum, buf, err);
if (r == -1)
return -1;
- buf += BLKSIZE;
- count -= BLKSIZE;
- offset += BLKSIZE;
+ buf += blksize;
+ count -= blksize;
+ offset += blksize;
blknum++;
}
@@ -397,19 +417,19 @@ cow_zero (nbdkit_next *next,
return -1;
}
- block = malloc (BLKSIZE);
+ block = malloc (blksize);
if (block == NULL) {
*err = errno;
nbdkit_error ("malloc: %m");
return -1;
}
- blknum = offset / BLKSIZE; /* block number */
- blkoffs = offset % BLKSIZE; /* offset within the block */
+ blknum = offset / blksize; /* block number */
+ blkoffs = offset % blksize; /* offset within the block */
/* Unaligned head */
if (blkoffs) {
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
+ uint64_t n = MIN (blksize - blkoffs, count);
/* Do a read-modify-write operation on the current block.
* Hold the rmw_lock over the whole operation.
@@ -429,9 +449,9 @@ cow_zero (nbdkit_next *next,
}
/* Aligned body */
- if (count >= BLKSIZE)
- memset (block, 0, BLKSIZE);
- while (count >= BLKSIZE) {
+ if (count >= blksize)
+ memset (block, 0, blksize);
+ while (count >= blksize) {
/* XXX There is the possibility of optimizing this: since this loop is
* writing a whole, aligned block, we should use FALLOC_FL_ZERO_RANGE.
*/
@@ -439,8 +459,8 @@ cow_zero (nbdkit_next *next,
if (r == -1)
return -1;
- count -= BLKSIZE;
- offset += BLKSIZE;
+ count -= blksize;
+ offset += blksize;
blknum++;
}
@@ -471,8 +491,8 @@ cow_trim (nbdkit_next *next,
uint64_t blknum, blkoffs;
int r;
- if (!IS_ALIGNED (count | offset, BLKSIZE)) {
- block = malloc (BLKSIZE);
+ if (!IS_ALIGNED (count | offset, blksize)) {
+ block = malloc (blksize);
if (block == NULL) {
*err = errno;
nbdkit_error ("malloc: %m");
@@ -480,12 +500,12 @@ cow_trim (nbdkit_next *next,
}
}
- blknum = offset / BLKSIZE; /* block number */
- blkoffs = offset % BLKSIZE; /* offset within the block */
+ blknum = offset / blksize; /* block number */
+ blkoffs = offset % blksize; /* offset within the block */
/* Unaligned head */
if (blkoffs) {
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
+ uint64_t n = MIN (blksize - blkoffs, count);
/* Do a read-modify-write operation on the current block.
* Hold the lock over the whole operation.
@@ -505,13 +525,13 @@ cow_trim (nbdkit_next *next,
}
/* Aligned body */
- while (count >= BLKSIZE) {
+ while (count >= blksize) {
r = blk_trim (blknum, err);
if (r == -1)
return -1;
- count -= BLKSIZE;
- offset += BLKSIZE;
+ count -= blksize;
+ offset += blksize;
blknum++;
}
@@ -568,22 +588,22 @@ cow_cache (nbdkit_next *next,
mode = BLK_CACHE_COW;
assert (!flags);
- block = malloc (BLKSIZE);
+ block = malloc (blksize);
if (block == NULL) {
*err = errno;
nbdkit_error ("malloc: %m");
return -1;
}
- blknum = offset / BLKSIZE; /* block number */
- blkoffs = offset % BLKSIZE; /* offset within the block */
+ blknum = offset / blksize; /* block number */
+ blkoffs = offset % blksize; /* offset within the block */
/* Unaligned head */
remaining += blkoffs;
offset -= blkoffs;
/* Unaligned tail */
- remaining = ROUND_UP (remaining, BLKSIZE);
+ remaining = ROUND_UP (remaining, blksize);
/* Aligned body */
while (remaining) {
@@ -591,8 +611,8 @@ cow_cache (nbdkit_next *next,
if (r == -1)
return -1;
- remaining -= BLKSIZE;
- offset += BLKSIZE;
+ remaining -= blksize;
+ offset += blksize;
blknum++;
}
@@ -616,13 +636,13 @@ cow_extents (nbdkit_next *next,
* value so rounding up is safe here.
*/
end = offset + count;
- offset = ROUND_DOWN (offset, BLKSIZE);
- end = ROUND_UP (end, BLKSIZE);
+ offset = ROUND_DOWN (offset, blksize);
+ end = ROUND_UP (end, blksize);
count = end - offset;
- blknum = offset / BLKSIZE;
+ blknum = offset / blksize;
- assert (IS_ALIGNED (offset, BLKSIZE));
- assert (IS_ALIGNED (count, BLKSIZE));
+ assert (IS_ALIGNED (offset, blksize));
+ assert (IS_ALIGNED (count, blksize));
assert (count > 0); /* We must make forward progress. */
while (count > 0) {
@@ -634,7 +654,7 @@ cow_extents (nbdkit_next *next,
/* Present in the overlay. */
if (present) {
e.offset = offset;
- e.length = BLKSIZE;
+ e.length = blksize;
if (trimmed)
e.type = NBDKIT_EXTENT_HOLE|NBDKIT_EXTENT_ZERO;
@@ -647,8 +667,8 @@ cow_extents (nbdkit_next *next,
}
blknum++;
- offset += BLKSIZE;
- count -= BLKSIZE;
+ offset += blksize;
+ count -= blksize;
}
/* Not present in the overlay, but we can ask the plugin. */
@@ -667,12 +687,12 @@ cow_extents (nbdkit_next *next,
* (range_count), but count is a 64 bit quantity, so don't
* overflow range_count here.
*/
- if (range_count >= UINT32_MAX - BLKSIZE + 1) break;
+ if (range_count >= UINT32_MAX - blksize + 1) break;
blknum++;
- offset += BLKSIZE;
- count -= BLKSIZE;
- range_count += BLKSIZE;
+ offset += blksize;
+ count -= blksize;
+ range_count += blksize;
if (count == 0) break;
blk_status (blknum, &present, &trimmed);
@@ -706,7 +726,7 @@ cow_extents (nbdkit_next *next,
/* Otherwise assume the block is non-sparse. */
else {
e.offset = offset;
- e.length = BLKSIZE;
+ e.length = blksize;
e.type = 0;
if (nbdkit_add_extent (extents, e.offset, e.length, e.type) == -1) {
@@ -715,8 +735,8 @@ cow_extents (nbdkit_next *next,
}
blknum++;
- offset += BLKSIZE;
- count -= BLKSIZE;
+ offset += blksize;
+ count -= blksize;
}
/* If the caller only wanted the first extent, and we've managed
@@ -734,11 +754,11 @@ cow_extents (nbdkit_next *next,
static struct nbdkit_filter filter = {
.name = "cow",
.longname = "nbdkit copy-on-write (COW) filter",
- .load = cow_load,
.unload = cow_unload,
.open = cow_open,
.config = cow_config,
.config_help = cow_config_help,
+ .get_ready = cow_get_ready,
.prepare = cow_prepare,
.get_size = cow_get_size,
.can_write = cow_can_write,
diff --git a/filters/cow/cow.h b/filters/cow/cow.h
new file mode 100644
index 00000000..d46dbe91
--- /dev/null
+++ b/filters/cow/cow.h
@@ -0,0 +1,39 @@
+/* nbdkit
+ * Copyright (C) 2018-2021 Red Hat Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NBDKIT_COW_H
+#define NBDKIT_COW_H
+
+/* Size of a block in the cache. */
+extern unsigned blksize;
+
+#endif /* NBDKIT_COW_H */
diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod
index 7f861140..997c9097 100644
--- a/filters/cow/nbdkit-cow-filter.pod
+++ b/filters/cow/nbdkit-cow-filter.pod
@@ -5,6 +5,7 @@ nbdkit-cow-filter - nbdkit copy-on-write (COW) filter
=head1 SYNOPSIS
nbdkit --filter=cow plugin [plugin-args...]
+ [cow-block-size=N]
[cow-on-cache=false|true]
[cow-on-read=false|true|/PATH]
@@ -42,6 +43,10 @@ serve the same data to each client.
=over 4
+=item B<cow-block-size=>N
+
+Set the block size used by the filter. The default is 64K.
+
=item B<cow-on-cache=false>
Do not save data from cache (prefetch) requests in the overlay. This
diff --git a/tests/Makefile.am b/tests/Makefile.am
index e61c5829..d93f848f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1404,6 +1404,7 @@ EXTRA_DIST += \
if HAVE_MKE2FS_WITH_D
TESTS += \
test-cow.sh \
+ test-cow-block-size.sh \
test-cow-extents1.sh \
test-cow-extents2.sh \
test-cow-extents-large.sh \
@@ -1415,6 +1416,7 @@ endif
TESTS += test-cow-null.sh
EXTRA_DIST += \
test-cow.sh \
+ test-cow-block-size.sh \
test-cow-extents1.sh \
test-cow-extents2.sh \
test-cow-extents-large.sh \
diff --git a/tests/test-cow-block-size.sh b/tests/test-cow-block-size.sh
new file mode 100755
index 00000000..6de1c068
--- /dev/null
+++ b/tests/test-cow-block-size.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018-2021 Red Hat Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_plugin linuxdisk
+requires guestfish --version
+requires nbdcopy --version
+requires qemu-img --version
+
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
+files="cow-block-size-base.img $sock cow-block-size.pid"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Create a base image which is partitioned with an empty filesystem.
+rm -rf cow-block-size.d
+mkdir cow-block-size.d
+cleanup_fn rm -rf cow-block-size.d
+nbdkit -fv -U - linuxdisk cow-block-size.d size=100M \
+ --run 'nbdcopy "$uri" cow-block-size-base.img'
+lastmod="$(stat -c "%y" cow-block-size-base.img)"
+
+# Run nbdkit with a COW overlay, 4M block size and copy on read.
+start_nbdkit -P cow-block-size.pid -U $sock \
+ --filter=cow file cow-block-size-base.img \
+ cow-block-size=4M cow-on-read=true
+
+# Write some data into the overlay.
+guestfish --format=raw -a "nbd://?socket=$sock" -m /dev/sda1 <<EOF
+ fill-pattern "abcde" 128K /large
+ write /hello "hello, world"
+EOF
+
+# The original file must not be modified.
+currmod="$(stat -c "%y" cow-block-size-base.img)"
+
+if [ "$lastmod" != "$currmod" ]; then
+ echo "$0: FAILED last modified time of base file changed"
+ exit 1
+fi
--
2.31.1

View File

@ -0,0 +1,26 @@
From 8d2ef02bd4de988e20ad1efba8038d311cd59665 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Wed, 18 Aug 2021 19:16:43 -0500
Subject: [PATCH] cow: Ship cow.h header
Fixes: 7182c47d0 (cow: Make the block size configurable)
(cherry picked from commit 75ff1b8b1afb3744b21a306c62e4973c90d386be)
---
filters/cow/Makefile.am | 1 +
1 file changed, 1 insertion(+)
diff --git a/filters/cow/Makefile.am b/filters/cow/Makefile.am
index a80ccd8f..88cda497 100644
--- a/filters/cow/Makefile.am
+++ b/filters/cow/Makefile.am
@@ -39,6 +39,7 @@ nbdkit_cow_filter_la_SOURCES = \
blk.c \
blk.h \
cow.c \
+ cow.h \
$(top_srcdir)/include/nbdkit-filter.h \
$(NULL)
--
2.31.1

55
SOURCES/copy-patches.sh Executable file
View File

@ -0,0 +1,55 @@
#!/bin/bash -
set -e
# Maintainer script to copy patches from the git repo to the current
# directory. Use it like this:
# ./copy-patches.sh
rhel_version=9.0
# Check we're in the right directory.
if [ ! -f nbdkit.spec ]; then
echo "$0: run this from the directory containing 'nbdkit.spec'"
exit 1
fi
git_checkout=$HOME/d/nbdkit-rhel-$rhel_version
if [ ! -d $git_checkout ]; then
echo "$0: $git_checkout does not exist"
echo "This script is only for use by the maintainer when preparing a"
echo "nbdkit release on RHEL."
exit 1
fi
# Get the base version of nbdkit.
version=`grep '^Version:' nbdkit.spec | awk '{print $2}'`
tag="v$version"
# Remove any existing patches.
git rm -f [0-9]*.patch ||:
rm -f [0-9]*.patch
# Get the patches.
(cd $git_checkout; rm -f [0-9]*.patch; git format-patch -N $tag)
mv $git_checkout/[0-9]*.patch .
# Remove any not to be applied.
rm -f *NOT-FOR-RPM*.patch
# Add the patches.
git add [0-9]*.patch
# Print out the patch lines.
echo
echo "--- Copy the following text into nbdkit.spec file"
echo
echo "# Patches."
for f in [0-9]*.patch; do
n=`echo $f | awk -F- '{print $1}'`
echo "Patch$n: $f"
done
echo
echo "--- End of text"

View File

@ -0,0 +1,11 @@
-----BEGIN PGP SIGNATURE-----
iQEzBAABCAAdFiEEccLMIrHEYCkn0vOqp6FrSiUnQ2oFAmEdoO0ACgkQp6FrSiUn
Q2r9AAf/VN10pDdAapr8W5bPNbgMIJ4dGtGl3sbMKg5mUVNkWwReUpiiUInZwnQW
I+TUjMkvB2jJGkruldgipuRNbAAGROZV3JugE2fMl8qQC1TvL/RMAOHcfKCswzfv
pVik1FmvpI88litCw05csH30TEA1BtFM0TlOR9xoeDkV9e2IUtWcxFJYP6RN5COr
NgTMfouxHWuR+FlVpXkvPl4aOuCavpplobcS0OaKNrqFXMhN+qcKjYgKazUKPB9C
TEExW8/CKTBVdaNpMbcLW/VBEaE85c3mv0xU26YKcEkj+OPAl4AzJ4Z+0MSDXk7t
3nrWSxPX67gBU5XDtKZ1IMUttydvzA==
=NyyV
-----END PGP SIGNATURE-----

2346
SPECS/nbdkit.spec Normal file

File diff suppressed because it is too large Load Diff