773 lines
24 KiB
Diff
773 lines
24 KiB
Diff
|
From 0be4847cdec9effd6128da03ea42a4953e5a6343 Mon Sep 17 00:00:00 2001
|
||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||
|
Date: Tue, 17 Aug 2021 22:03:11 +0100
|
||
|
Subject: [PATCH] cow: Make the block size configurable
|
||
|
|
||
|
Commit c1905b0a28 ("cache, cow: Use a 64K block size by default")
|
||
|
changed the nbdkit-cow-filter block size to 64K, but it was still a
|
||
|
fixed size. In contrast the cache filter allows the block size to be
|
||
|
adjusted.
|
||
|
|
||
|
Allow the block size in this filter to be adjusted up or down with a
|
||
|
new cow-block-size=N parameter.
|
||
|
|
||
|
When using the VDDK plugin, adjusting this setting can make a
|
||
|
difference. The following timings come from a modified virt-v2v which
|
||
|
sets cow-block-size and was used to convert from a VMware server to
|
||
|
-o null (this is also using cow-on-read=true):
|
||
|
|
||
|
cow-block-size=64K: 18m18
|
||
|
cow-block-size=256K: 14m13
|
||
|
cow-block-size=1M: 14m19
|
||
|
cow-block-size=4M: 37m33
|
||
|
|
||
|
As you can see it's not obvious how to choose a good block size, but
|
||
|
at least by allowing adjustment we can tune things.
|
||
|
|
||
|
(cherry picked from commit 7182c47d04d2b68005fceadefc0c14bfaa61a533)
|
||
|
---
|
||
|
filters/cow/blk.c | 35 +++----
|
||
|
filters/cow/blk.h | 5 -
|
||
|
filters/cow/cow.c | 150 +++++++++++++++++-------------
|
||
|
filters/cow/cow.h | 39 ++++++++
|
||
|
filters/cow/nbdkit-cow-filter.pod | 5 +
|
||
|
tests/Makefile.am | 2 +
|
||
|
tests/test-cow-block-size.sh | 72 ++++++++++++++
|
||
|
7 files changed, 221 insertions(+), 87 deletions(-)
|
||
|
create mode 100644 filters/cow/cow.h
|
||
|
create mode 100755 tests/test-cow-block-size.sh
|
||
|
|
||
|
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
|
||
|
index c22d5886..f9341dc1 100644
|
||
|
--- a/filters/cow/blk.c
|
||
|
+++ b/filters/cow/blk.c
|
||
|
@@ -99,6 +99,7 @@
|
||
|
#include "pwrite.h"
|
||
|
#include "utils.h"
|
||
|
|
||
|
+#include "cow.h"
|
||
|
#include "blk.h"
|
||
|
|
||
|
/* The temporary overlay. */
|
||
|
@@ -137,7 +138,7 @@ blk_init (void)
|
||
|
size_t len;
|
||
|
char *template;
|
||
|
|
||
|
- bitmap_init (&bm, BLKSIZE, 2 /* bits per block */);
|
||
|
+ bitmap_init (&bm, blksize, 2 /* bits per block */);
|
||
|
|
||
|
tmpdir = getenv ("TMPDIR");
|
||
|
if (!tmpdir)
|
||
|
@@ -199,7 +200,7 @@ blk_set_size (uint64_t new_size)
|
||
|
if (bitmap_resize (&bm, size) == -1)
|
||
|
return -1;
|
||
|
|
||
|
- if (ftruncate (fd, ROUND_UP (size, BLKSIZE)) == -1) {
|
||
|
+ if (ftruncate (fd, ROUND_UP (size, blksize)) == -1) {
|
||
|
nbdkit_error ("ftruncate: %m");
|
||
|
return -1;
|
||
|
}
|
||
|
@@ -228,7 +229,7 @@ blk_read_multiple (nbdkit_next *next,
|
||
|
uint64_t blknum, uint64_t nrblocks,
|
||
|
uint8_t *block, bool cow_on_read, int *err)
|
||
|
{
|
||
|
- off_t offset = blknum * BLKSIZE;
|
||
|
+ off_t offset = blknum * blksize;
|
||
|
enum bm_entry state;
|
||
|
uint64_t b, runblocks;
|
||
|
|
||
|
@@ -262,8 +263,8 @@ blk_read_multiple (nbdkit_next *next,
|
||
|
if (state == BLOCK_NOT_ALLOCATED) { /* Read underlying plugin. */
|
||
|
unsigned n, tail = 0;
|
||
|
|
||
|
- assert (BLKSIZE * runblocks <= UINT_MAX);
|
||
|
- n = BLKSIZE * runblocks;
|
||
|
+ assert (blksize * runblocks <= UINT_MAX);
|
||
|
+ n = blksize * runblocks;
|
||
|
|
||
|
if (offset + n > size) {
|
||
|
tail = offset + n - size;
|
||
|
@@ -288,7 +289,7 @@ blk_read_multiple (nbdkit_next *next,
|
||
|
"at offset %" PRIu64 " into the cache",
|
||
|
runblocks, offset);
|
||
|
|
||
|
- if (full_pwrite (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
||
|
+ if (full_pwrite (fd, block, blksize * runblocks, offset) == -1) {
|
||
|
*err = errno;
|
||
|
nbdkit_error ("pwrite: %m");
|
||
|
return -1;
|
||
|
@@ -298,14 +299,14 @@ blk_read_multiple (nbdkit_next *next,
|
||
|
}
|
||
|
}
|
||
|
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
|
||
|
- if (full_pread (fd, block, BLKSIZE * runblocks, offset) == -1) {
|
||
|
+ if (full_pread (fd, block, blksize * runblocks, offset) == -1) {
|
||
|
*err = errno;
|
||
|
nbdkit_error ("pread: %m");
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
else /* state == BLOCK_TRIMMED */ {
|
||
|
- memset (block, 0, BLKSIZE * runblocks);
|
||
|
+ memset (block, 0, blksize * runblocks);
|
||
|
}
|
||
|
|
||
|
/* If all done, return. */
|
||
|
@@ -316,7 +317,7 @@ blk_read_multiple (nbdkit_next *next,
|
||
|
return blk_read_multiple (next,
|
||
|
blknum + runblocks,
|
||
|
nrblocks - runblocks,
|
||
|
- block + BLKSIZE * runblocks,
|
||
|
+ block + blksize * runblocks,
|
||
|
cow_on_read, err);
|
||
|
}
|
||
|
|
||
|
@@ -333,9 +334,9 @@ blk_cache (nbdkit_next *next,
|
||
|
{
|
||
|
/* XXX Could make this lock more fine-grained with some thought. */
|
||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
||
|
- off_t offset = blknum * BLKSIZE;
|
||
|
+ off_t offset = blknum * blksize;
|
||
|
enum bm_entry state = bitmap_get_blk (&bm, blknum, BLOCK_NOT_ALLOCATED);
|
||
|
- unsigned n = BLKSIZE, tail = 0;
|
||
|
+ unsigned n = blksize, tail = 0;
|
||
|
|
||
|
if (offset + n > size) {
|
||
|
tail = offset + n - size;
|
||
|
@@ -348,7 +349,7 @@ blk_cache (nbdkit_next *next,
|
||
|
|
||
|
if (state == BLOCK_ALLOCATED) {
|
||
|
#if HAVE_POSIX_FADVISE
|
||
|
- int r = posix_fadvise (fd, offset, BLKSIZE, POSIX_FADV_WILLNEED);
|
||
|
+ int r = posix_fadvise (fd, offset, blksize, POSIX_FADV_WILLNEED);
|
||
|
if (r) {
|
||
|
errno = r;
|
||
|
nbdkit_error ("posix_fadvise: %m");
|
||
|
@@ -373,7 +374,7 @@ blk_cache (nbdkit_next *next,
|
||
|
memset (block + n, 0, tail);
|
||
|
|
||
|
if (mode == BLK_CACHE_COW) {
|
||
|
- if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
|
||
|
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
|
||
|
*err = errno;
|
||
|
nbdkit_error ("pwrite: %m");
|
||
|
return -1;
|
||
|
@@ -386,13 +387,13 @@ blk_cache (nbdkit_next *next,
|
||
|
int
|
||
|
blk_write (uint64_t blknum, const uint8_t *block, int *err)
|
||
|
{
|
||
|
- off_t offset = blknum * BLKSIZE;
|
||
|
+ off_t offset = blknum * blksize;
|
||
|
|
||
|
if (cow_debug_verbose)
|
||
|
nbdkit_debug ("cow: blk_write block %" PRIu64 " (offset %" PRIu64 ")",
|
||
|
blknum, (uint64_t) offset);
|
||
|
|
||
|
- if (full_pwrite (fd, block, BLKSIZE, offset) == -1) {
|
||
|
+ if (full_pwrite (fd, block, blksize, offset) == -1) {
|
||
|
*err = errno;
|
||
|
nbdkit_error ("pwrite: %m");
|
||
|
return -1;
|
||
|
@@ -407,14 +408,14 @@ blk_write (uint64_t blknum, const uint8_t *block, int *err)
|
||
|
int
|
||
|
blk_trim (uint64_t blknum, int *err)
|
||
|
{
|
||
|
- off_t offset = blknum * BLKSIZE;
|
||
|
+ off_t offset = blknum * blksize;
|
||
|
|
||
|
if (cow_debug_verbose)
|
||
|
nbdkit_debug ("cow: blk_trim block %" PRIu64 " (offset %" PRIu64 ")",
|
||
|
blknum, (uint64_t) offset);
|
||
|
|
||
|
/* XXX As an optimization we could punch a whole in the overlay
|
||
|
- * here. However it's not trivial since BLKSIZE is unrelated to the
|
||
|
+ * here. However it's not trivial since blksize is unrelated to the
|
||
|
* overlay filesystem block size.
|
||
|
*/
|
||
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
||
|
diff --git a/filters/cow/blk.h b/filters/cow/blk.h
|
||
|
index b7e6f092..62fb5416 100644
|
||
|
--- a/filters/cow/blk.h
|
||
|
+++ b/filters/cow/blk.h
|
||
|
@@ -33,11 +33,6 @@
|
||
|
#ifndef NBDKIT_BLK_H
|
||
|
#define NBDKIT_BLK_H
|
||
|
|
||
|
-/* Size of a block in the overlay. A 4K block size means that we need
|
||
|
- * 64 MB of memory to store the bitmap for a 1 TB underlying image.
|
||
|
- */
|
||
|
-#define BLKSIZE 65536
|
||
|
-
|
||
|
/* Initialize the overlay and bitmap. */
|
||
|
extern int blk_init (void);
|
||
|
|
||
|
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
|
||
|
index 6efb39f2..1c62c857 100644
|
||
|
--- a/filters/cow/cow.c
|
||
|
+++ b/filters/cow/cow.c
|
||
|
@@ -40,6 +40,7 @@
|
||
|
#include <string.h>
|
||
|
#include <unistd.h>
|
||
|
#include <errno.h>
|
||
|
+#include <limits.h>
|
||
|
|
||
|
#include <pthread.h>
|
||
|
|
||
|
@@ -47,9 +48,11 @@
|
||
|
|
||
|
#include "cleanup.h"
|
||
|
#include "isaligned.h"
|
||
|
+#include "ispowerof2.h"
|
||
|
#include "minmax.h"
|
||
|
#include "rounding.h"
|
||
|
|
||
|
+#include "cow.h"
|
||
|
#include "blk.h"
|
||
|
|
||
|
/* Read-modify-write requests are serialized through this global lock.
|
||
|
@@ -58,6 +61,8 @@
|
||
|
*/
|
||
|
static pthread_mutex_t rmw_lock = PTHREAD_MUTEX_INITIALIZER;
|
||
|
|
||
|
+unsigned blksize = 65536; /* block size */
|
||
|
+
|
||
|
static bool cow_on_cache;
|
||
|
|
||
|
/* Cache on read ("cow-on-read") mode. */
|
||
|
@@ -69,13 +74,6 @@ extern enum cor_mode {
|
||
|
enum cor_mode cor_mode = COR_OFF;
|
||
|
const char *cor_path;
|
||
|
|
||
|
-static void
|
||
|
-cow_load (void)
|
||
|
-{
|
||
|
- if (blk_init () == -1)
|
||
|
- exit (EXIT_FAILURE);
|
||
|
-}
|
||
|
-
|
||
|
static void
|
||
|
cow_unload (void)
|
||
|
{
|
||
|
@@ -86,7 +84,19 @@ static int
|
||
|
cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||
|
const char *key, const char *value)
|
||
|
{
|
||
|
- if (strcmp (key, "cow-on-cache") == 0) {
|
||
|
+ if (strcmp (key, "cow-block-size") == 0) {
|
||
|
+ int64_t r = nbdkit_parse_size (value);
|
||
|
+ if (r == -1)
|
||
|
+ return -1;
|
||
|
+ if (r <= 4096 || r > UINT_MAX || !is_power_of_2 (r)) {
|
||
|
+ nbdkit_error ("cow-block-size is out of range (4096..2G) "
|
||
|
+ "or not a power of 2");
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+ blksize = r;
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+ else if (strcmp (key, "cow-on-cache") == 0) {
|
||
|
int r;
|
||
|
|
||
|
r = nbdkit_parse_bool (value);
|
||
|
@@ -114,9 +124,19 @@ cow_config (nbdkit_next_config *next, nbdkit_backend *nxdata,
|
||
|
}
|
||
|
|
||
|
#define cow_config_help \
|
||
|
+ "cow-block-size=<N> Set COW block size.\n" \
|
||
|
"cow-on-cache=<BOOL> Copy cache (prefetch) requests to the overlay.\n" \
|
||
|
"cow-on-read=<BOOL>|/PATH Copy read requests to the overlay."
|
||
|
|
||
|
+static int
|
||
|
+cow_get_ready (int thread_model)
|
||
|
+{
|
||
|
+ if (blk_init () == -1)
|
||
|
+ return -1;
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
/* Decide if cow-on-read is currently on or off. */
|
||
|
bool
|
||
|
cow_on_read (void)
|
||
|
@@ -249,8 +269,8 @@ cow_pread (nbdkit_next *next,
|
||
|
uint64_t blknum, blkoffs, nrblocks;
|
||
|
int r;
|
||
|
|
||
|
- if (!IS_ALIGNED (count | offset, BLKSIZE)) {
|
||
|
- block = malloc (BLKSIZE);
|
||
|
+ if (!IS_ALIGNED (count | offset, blksize)) {
|
||
|
+ block = malloc (blksize);
|
||
|
if (block == NULL) {
|
||
|
*err = errno;
|
||
|
nbdkit_error ("malloc: %m");
|
||
|
@@ -258,12 +278,12 @@ cow_pread (nbdkit_next *next,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
- blknum = offset / BLKSIZE; /* block number */
|
||
|
- blkoffs = offset % BLKSIZE; /* offset within the block */
|
||
|
+ blknum = offset / blksize; /* block number */
|
||
|
+ blkoffs = offset % blksize; /* offset within the block */
|
||
|
|
||
|
/* Unaligned head */
|
||
|
if (blkoffs) {
|
||
|
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
|
||
|
+ uint64_t n = MIN (blksize - blkoffs, count);
|
||
|
|
||
|
assert (block);
|
||
|
r = blk_read (next, blknum, block, cow_on_read (), err);
|
||
|
@@ -279,15 +299,15 @@ cow_pread (nbdkit_next *next,
|
||
|
}
|
||
|
|
||
|
/* Aligned body */
|
||
|
- nrblocks = count / BLKSIZE;
|
||
|
+ nrblocks = count / blksize;
|
||
|
if (nrblocks > 0) {
|
||
|
r = blk_read_multiple (next, blknum, nrblocks, buf, cow_on_read (), err);
|
||
|
if (r == -1)
|
||
|
return -1;
|
||
|
|
||
|
- buf += nrblocks * BLKSIZE;
|
||
|
- count -= nrblocks * BLKSIZE;
|
||
|
- offset += nrblocks * BLKSIZE;
|
||
|
+ buf += nrblocks * blksize;
|
||
|
+ count -= nrblocks * blksize;
|
||
|
+ offset += nrblocks * blksize;
|
||
|
blknum += nrblocks;
|
||
|
}
|
||
|
|
||
|
@@ -314,8 +334,8 @@ cow_pwrite (nbdkit_next *next,
|
||
|
uint64_t blknum, blkoffs;
|
||
|
int r;
|
||
|
|
||
|
- if (!IS_ALIGNED (count | offset, BLKSIZE)) {
|
||
|
- block = malloc (BLKSIZE);
|
||
|
+ if (!IS_ALIGNED (count | offset, blksize)) {
|
||
|
+ block = malloc (blksize);
|
||
|
if (block == NULL) {
|
||
|
*err = errno;
|
||
|
nbdkit_error ("malloc: %m");
|
||
|
@@ -323,12 +343,12 @@ cow_pwrite (nbdkit_next *next,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
- blknum = offset / BLKSIZE; /* block number */
|
||
|
- blkoffs = offset % BLKSIZE; /* offset within the block */
|
||
|
+ blknum = offset / blksize; /* block number */
|
||
|
+ blkoffs = offset % blksize; /* offset within the block */
|
||
|
|
||
|
/* Unaligned head */
|
||
|
if (blkoffs) {
|
||
|
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
|
||
|
+ uint64_t n = MIN (blksize - blkoffs, count);
|
||
|
|
||
|
/* Do a read-modify-write operation on the current block.
|
||
|
* Hold the rmw_lock over the whole operation.
|
||
|
@@ -350,14 +370,14 @@ cow_pwrite (nbdkit_next *next,
|
||
|
}
|
||
|
|
||
|
/* Aligned body */
|
||
|
- while (count >= BLKSIZE) {
|
||
|
+ while (count >= blksize) {
|
||
|
r = blk_write (blknum, buf, err);
|
||
|
if (r == -1)
|
||
|
return -1;
|
||
|
|
||
|
- buf += BLKSIZE;
|
||
|
- count -= BLKSIZE;
|
||
|
- offset += BLKSIZE;
|
||
|
+ buf += blksize;
|
||
|
+ count -= blksize;
|
||
|
+ offset += blksize;
|
||
|
blknum++;
|
||
|
}
|
||
|
|
||
|
@@ -397,19 +417,19 @@ cow_zero (nbdkit_next *next,
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
- block = malloc (BLKSIZE);
|
||
|
+ block = malloc (blksize);
|
||
|
if (block == NULL) {
|
||
|
*err = errno;
|
||
|
nbdkit_error ("malloc: %m");
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
- blknum = offset / BLKSIZE; /* block number */
|
||
|
- blkoffs = offset % BLKSIZE; /* offset within the block */
|
||
|
+ blknum = offset / blksize; /* block number */
|
||
|
+ blkoffs = offset % blksize; /* offset within the block */
|
||
|
|
||
|
/* Unaligned head */
|
||
|
if (blkoffs) {
|
||
|
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
|
||
|
+ uint64_t n = MIN (blksize - blkoffs, count);
|
||
|
|
||
|
/* Do a read-modify-write operation on the current block.
|
||
|
* Hold the rmw_lock over the whole operation.
|
||
|
@@ -429,9 +449,9 @@ cow_zero (nbdkit_next *next,
|
||
|
}
|
||
|
|
||
|
/* Aligned body */
|
||
|
- if (count >= BLKSIZE)
|
||
|
- memset (block, 0, BLKSIZE);
|
||
|
- while (count >= BLKSIZE) {
|
||
|
+ if (count >= blksize)
|
||
|
+ memset (block, 0, blksize);
|
||
|
+ while (count >= blksize) {
|
||
|
/* XXX There is the possibility of optimizing this: since this loop is
|
||
|
* writing a whole, aligned block, we should use FALLOC_FL_ZERO_RANGE.
|
||
|
*/
|
||
|
@@ -439,8 +459,8 @@ cow_zero (nbdkit_next *next,
|
||
|
if (r == -1)
|
||
|
return -1;
|
||
|
|
||
|
- count -= BLKSIZE;
|
||
|
- offset += BLKSIZE;
|
||
|
+ count -= blksize;
|
||
|
+ offset += blksize;
|
||
|
blknum++;
|
||
|
}
|
||
|
|
||
|
@@ -471,8 +491,8 @@ cow_trim (nbdkit_next *next,
|
||
|
uint64_t blknum, blkoffs;
|
||
|
int r;
|
||
|
|
||
|
- if (!IS_ALIGNED (count | offset, BLKSIZE)) {
|
||
|
- block = malloc (BLKSIZE);
|
||
|
+ if (!IS_ALIGNED (count | offset, blksize)) {
|
||
|
+ block = malloc (blksize);
|
||
|
if (block == NULL) {
|
||
|
*err = errno;
|
||
|
nbdkit_error ("malloc: %m");
|
||
|
@@ -480,12 +500,12 @@ cow_trim (nbdkit_next *next,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
- blknum = offset / BLKSIZE; /* block number */
|
||
|
- blkoffs = offset % BLKSIZE; /* offset within the block */
|
||
|
+ blknum = offset / blksize; /* block number */
|
||
|
+ blkoffs = offset % blksize; /* offset within the block */
|
||
|
|
||
|
/* Unaligned head */
|
||
|
if (blkoffs) {
|
||
|
- uint64_t n = MIN (BLKSIZE - blkoffs, count);
|
||
|
+ uint64_t n = MIN (blksize - blkoffs, count);
|
||
|
|
||
|
/* Do a read-modify-write operation on the current block.
|
||
|
* Hold the lock over the whole operation.
|
||
|
@@ -505,13 +525,13 @@ cow_trim (nbdkit_next *next,
|
||
|
}
|
||
|
|
||
|
/* Aligned body */
|
||
|
- while (count >= BLKSIZE) {
|
||
|
+ while (count >= blksize) {
|
||
|
r = blk_trim (blknum, err);
|
||
|
if (r == -1)
|
||
|
return -1;
|
||
|
|
||
|
- count -= BLKSIZE;
|
||
|
- offset += BLKSIZE;
|
||
|
+ count -= blksize;
|
||
|
+ offset += blksize;
|
||
|
blknum++;
|
||
|
}
|
||
|
|
||
|
@@ -568,22 +588,22 @@ cow_cache (nbdkit_next *next,
|
||
|
mode = BLK_CACHE_COW;
|
||
|
|
||
|
assert (!flags);
|
||
|
- block = malloc (BLKSIZE);
|
||
|
+ block = malloc (blksize);
|
||
|
if (block == NULL) {
|
||
|
*err = errno;
|
||
|
nbdkit_error ("malloc: %m");
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
- blknum = offset / BLKSIZE; /* block number */
|
||
|
- blkoffs = offset % BLKSIZE; /* offset within the block */
|
||
|
+ blknum = offset / blksize; /* block number */
|
||
|
+ blkoffs = offset % blksize; /* offset within the block */
|
||
|
|
||
|
/* Unaligned head */
|
||
|
remaining += blkoffs;
|
||
|
offset -= blkoffs;
|
||
|
|
||
|
/* Unaligned tail */
|
||
|
- remaining = ROUND_UP (remaining, BLKSIZE);
|
||
|
+ remaining = ROUND_UP (remaining, blksize);
|
||
|
|
||
|
/* Aligned body */
|
||
|
while (remaining) {
|
||
|
@@ -591,8 +611,8 @@ cow_cache (nbdkit_next *next,
|
||
|
if (r == -1)
|
||
|
return -1;
|
||
|
|
||
|
- remaining -= BLKSIZE;
|
||
|
- offset += BLKSIZE;
|
||
|
+ remaining -= blksize;
|
||
|
+ offset += blksize;
|
||
|
blknum++;
|
||
|
}
|
||
|
|
||
|
@@ -616,13 +636,13 @@ cow_extents (nbdkit_next *next,
|
||
|
* value so rounding up is safe here.
|
||
|
*/
|
||
|
end = offset + count;
|
||
|
- offset = ROUND_DOWN (offset, BLKSIZE);
|
||
|
- end = ROUND_UP (end, BLKSIZE);
|
||
|
+ offset = ROUND_DOWN (offset, blksize);
|
||
|
+ end = ROUND_UP (end, blksize);
|
||
|
count = end - offset;
|
||
|
- blknum = offset / BLKSIZE;
|
||
|
+ blknum = offset / blksize;
|
||
|
|
||
|
- assert (IS_ALIGNED (offset, BLKSIZE));
|
||
|
- assert (IS_ALIGNED (count, BLKSIZE));
|
||
|
+ assert (IS_ALIGNED (offset, blksize));
|
||
|
+ assert (IS_ALIGNED (count, blksize));
|
||
|
assert (count > 0); /* We must make forward progress. */
|
||
|
|
||
|
while (count > 0) {
|
||
|
@@ -634,7 +654,7 @@ cow_extents (nbdkit_next *next,
|
||
|
/* Present in the overlay. */
|
||
|
if (present) {
|
||
|
e.offset = offset;
|
||
|
- e.length = BLKSIZE;
|
||
|
+ e.length = blksize;
|
||
|
|
||
|
if (trimmed)
|
||
|
e.type = NBDKIT_EXTENT_HOLE|NBDKIT_EXTENT_ZERO;
|
||
|
@@ -647,8 +667,8 @@ cow_extents (nbdkit_next *next,
|
||
|
}
|
||
|
|
||
|
blknum++;
|
||
|
- offset += BLKSIZE;
|
||
|
- count -= BLKSIZE;
|
||
|
+ offset += blksize;
|
||
|
+ count -= blksize;
|
||
|
}
|
||
|
|
||
|
/* Not present in the overlay, but we can ask the plugin. */
|
||
|
@@ -667,12 +687,12 @@ cow_extents (nbdkit_next *next,
|
||
|
* (range_count), but count is a 64 bit quantity, so don't
|
||
|
* overflow range_count here.
|
||
|
*/
|
||
|
- if (range_count >= UINT32_MAX - BLKSIZE + 1) break;
|
||
|
+ if (range_count >= UINT32_MAX - blksize + 1) break;
|
||
|
|
||
|
blknum++;
|
||
|
- offset += BLKSIZE;
|
||
|
- count -= BLKSIZE;
|
||
|
- range_count += BLKSIZE;
|
||
|
+ offset += blksize;
|
||
|
+ count -= blksize;
|
||
|
+ range_count += blksize;
|
||
|
|
||
|
if (count == 0) break;
|
||
|
blk_status (blknum, &present, &trimmed);
|
||
|
@@ -706,7 +726,7 @@ cow_extents (nbdkit_next *next,
|
||
|
/* Otherwise assume the block is non-sparse. */
|
||
|
else {
|
||
|
e.offset = offset;
|
||
|
- e.length = BLKSIZE;
|
||
|
+ e.length = blksize;
|
||
|
e.type = 0;
|
||
|
|
||
|
if (nbdkit_add_extent (extents, e.offset, e.length, e.type) == -1) {
|
||
|
@@ -715,8 +735,8 @@ cow_extents (nbdkit_next *next,
|
||
|
}
|
||
|
|
||
|
blknum++;
|
||
|
- offset += BLKSIZE;
|
||
|
- count -= BLKSIZE;
|
||
|
+ offset += blksize;
|
||
|
+ count -= blksize;
|
||
|
}
|
||
|
|
||
|
/* If the caller only wanted the first extent, and we've managed
|
||
|
@@ -734,11 +754,11 @@ cow_extents (nbdkit_next *next,
|
||
|
static struct nbdkit_filter filter = {
|
||
|
.name = "cow",
|
||
|
.longname = "nbdkit copy-on-write (COW) filter",
|
||
|
- .load = cow_load,
|
||
|
.unload = cow_unload,
|
||
|
.open = cow_open,
|
||
|
.config = cow_config,
|
||
|
.config_help = cow_config_help,
|
||
|
+ .get_ready = cow_get_ready,
|
||
|
.prepare = cow_prepare,
|
||
|
.get_size = cow_get_size,
|
||
|
.can_write = cow_can_write,
|
||
|
diff --git a/filters/cow/cow.h b/filters/cow/cow.h
|
||
|
new file mode 100644
|
||
|
index 00000000..d46dbe91
|
||
|
--- /dev/null
|
||
|
+++ b/filters/cow/cow.h
|
||
|
@@ -0,0 +1,39 @@
|
||
|
+/* nbdkit
|
||
|
+ * Copyright (C) 2018-2021 Red Hat Inc.
|
||
|
+ *
|
||
|
+ * Redistribution and use in source and binary forms, with or without
|
||
|
+ * modification, are permitted provided that the following conditions are
|
||
|
+ * met:
|
||
|
+ *
|
||
|
+ * * Redistributions of source code must retain the above copyright
|
||
|
+ * notice, this list of conditions and the following disclaimer.
|
||
|
+ *
|
||
|
+ * * Redistributions in binary form must reproduce the above copyright
|
||
|
+ * notice, this list of conditions and the following disclaimer in the
|
||
|
+ * documentation and/or other materials provided with the distribution.
|
||
|
+ *
|
||
|
+ * * Neither the name of Red Hat nor the names of its contributors may be
|
||
|
+ * used to endorse or promote products derived from this software without
|
||
|
+ * specific prior written permission.
|
||
|
+ *
|
||
|
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||
|
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||
|
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||
|
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||
|
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||
|
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||
|
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||
|
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||
|
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||
|
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||
|
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||
|
+ * SUCH DAMAGE.
|
||
|
+ */
|
||
|
+
|
||
|
+#ifndef NBDKIT_COW_H
|
||
|
+#define NBDKIT_COW_H
|
||
|
+
|
||
|
+/* Size of a block in the cache. */
|
||
|
+extern unsigned blksize;
|
||
|
+
|
||
|
+#endif /* NBDKIT_COW_H */
|
||
|
diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod
|
||
|
index 7f861140..997c9097 100644
|
||
|
--- a/filters/cow/nbdkit-cow-filter.pod
|
||
|
+++ b/filters/cow/nbdkit-cow-filter.pod
|
||
|
@@ -5,6 +5,7 @@ nbdkit-cow-filter - nbdkit copy-on-write (COW) filter
|
||
|
=head1 SYNOPSIS
|
||
|
|
||
|
nbdkit --filter=cow plugin [plugin-args...]
|
||
|
+ [cow-block-size=N]
|
||
|
[cow-on-cache=false|true]
|
||
|
[cow-on-read=false|true|/PATH]
|
||
|
|
||
|
@@ -42,6 +43,10 @@ serve the same data to each client.
|
||
|
|
||
|
=over 4
|
||
|
|
||
|
+=item B<cow-block-size=>N
|
||
|
+
|
||
|
+Set the block size used by the filter. The default is 64K.
|
||
|
+
|
||
|
=item B<cow-on-cache=false>
|
||
|
|
||
|
Do not save data from cache (prefetch) requests in the overlay. This
|
||
|
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
||
|
index e61c5829..d93f848f 100644
|
||
|
--- a/tests/Makefile.am
|
||
|
+++ b/tests/Makefile.am
|
||
|
@@ -1404,6 +1404,7 @@ EXTRA_DIST += \
|
||
|
if HAVE_MKE2FS_WITH_D
|
||
|
TESTS += \
|
||
|
test-cow.sh \
|
||
|
+ test-cow-block-size.sh \
|
||
|
test-cow-extents1.sh \
|
||
|
test-cow-extents2.sh \
|
||
|
test-cow-extents-large.sh \
|
||
|
@@ -1415,6 +1416,7 @@ endif
|
||
|
TESTS += test-cow-null.sh
|
||
|
EXTRA_DIST += \
|
||
|
test-cow.sh \
|
||
|
+ test-cow-block-size.sh \
|
||
|
test-cow-extents1.sh \
|
||
|
test-cow-extents2.sh \
|
||
|
test-cow-extents-large.sh \
|
||
|
diff --git a/tests/test-cow-block-size.sh b/tests/test-cow-block-size.sh
|
||
|
new file mode 100755
|
||
|
index 00000000..6de1c068
|
||
|
--- /dev/null
|
||
|
+++ b/tests/test-cow-block-size.sh
|
||
|
@@ -0,0 +1,72 @@
|
||
|
+#!/usr/bin/env bash
|
||
|
+# nbdkit
|
||
|
+# Copyright (C) 2018-2021 Red Hat Inc.
|
||
|
+#
|
||
|
+# Redistribution and use in source and binary forms, with or without
|
||
|
+# modification, are permitted provided that the following conditions are
|
||
|
+# met:
|
||
|
+#
|
||
|
+# * Redistributions of source code must retain the above copyright
|
||
|
+# notice, this list of conditions and the following disclaimer.
|
||
|
+#
|
||
|
+# * Redistributions in binary form must reproduce the above copyright
|
||
|
+# notice, this list of conditions and the following disclaimer in the
|
||
|
+# documentation and/or other materials provided with the distribution.
|
||
|
+#
|
||
|
+# * Neither the name of Red Hat nor the names of its contributors may be
|
||
|
+# used to endorse or promote products derived from this software without
|
||
|
+# specific prior written permission.
|
||
|
+#
|
||
|
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||
|
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||
|
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||
|
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||
|
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||
|
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||
|
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||
|
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||
|
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||
|
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||
|
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||
|
+# SUCH DAMAGE.
|
||
|
+
|
||
|
+source ./functions.sh
|
||
|
+set -e
|
||
|
+set -x
|
||
|
+
|
||
|
+requires_plugin linuxdisk
|
||
|
+requires guestfish --version
|
||
|
+requires nbdcopy --version
|
||
|
+requires qemu-img --version
|
||
|
+
|
||
|
+sock=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX)
|
||
|
+files="cow-block-size-base.img $sock cow-block-size.pid"
|
||
|
+rm -f $files
|
||
|
+cleanup_fn rm -f $files
|
||
|
+
|
||
|
+# Create a base image which is partitioned with an empty filesystem.
|
||
|
+rm -rf cow-block-size.d
|
||
|
+mkdir cow-block-size.d
|
||
|
+cleanup_fn rm -rf cow-block-size.d
|
||
|
+nbdkit -fv -U - linuxdisk cow-block-size.d size=100M \
|
||
|
+ --run 'nbdcopy "$uri" cow-block-size-base.img'
|
||
|
+lastmod="$(stat -c "%y" cow-block-size-base.img)"
|
||
|
+
|
||
|
+# Run nbdkit with a COW overlay, 4M block size and copy on read.
|
||
|
+start_nbdkit -P cow-block-size.pid -U $sock \
|
||
|
+ --filter=cow file cow-block-size-base.img \
|
||
|
+ cow-block-size=4M cow-on-read=true
|
||
|
+
|
||
|
+# Write some data into the overlay.
|
||
|
+guestfish --format=raw -a "nbd://?socket=$sock" -m /dev/sda1 <<EOF
|
||
|
+ fill-pattern "abcde" 128K /large
|
||
|
+ write /hello "hello, world"
|
||
|
+EOF
|
||
|
+
|
||
|
+# The original file must not be modified.
|
||
|
+currmod="$(stat -c "%y" cow-block-size-base.img)"
|
||
|
+
|
||
|
+if [ "$lastmod" != "$currmod" ]; then
|
||
|
+ echo "$0: FAILED last modified time of base file changed"
|
||
|
+ exit 1
|
||
|
+fi
|
||
|
--
|
||
|
2.31.1
|
||
|
|