cow: Support overlays larger than 16T on ext4 (further fixes)

resolves: RHEL-163983
This commit is contained in:
Richard W.M. Jones 2026-04-03 14:25:41 +01:00
parent 4976c635f8
commit 3adc8bcddc
5 changed files with 334 additions and 2 deletions

View File

@ -0,0 +1,132 @@
From 25e1d1fdaedfd3c0cdce0600585f42478c93eda7 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 3 Apr 2026 13:41:36 +0100
Subject: [PATCH] cache, cow: Add prefix before more calls
Try to diagnose the origin of more error messages.
Related: commit 63416a8347f9865cc69e162a1c1e42015d394b24
---
filters/cache/blk.c | 14 +++++++-------
filters/cow/blk.c | 10 +++++-----
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/filters/cache/blk.c b/filters/cache/blk.c
index 9a200dde..02c33633 100644
--- a/filters/cache/blk.c
+++ b/filters/cache/blk.c
@@ -157,7 +157,7 @@ blk_init (void)
* least as large as the filesystem block size.
*/
if (fstatvfs (fd, &statvfs) == -1) {
- nbdkit_error ("fstatvfs: %s: %m", tmpdir);
+ nbdkit_error ("cache: fstatvfs: %s: %m", tmpdir);
return -1;
}
blksize = MAX (min_block_size, statvfs.f_bsize);
@@ -263,7 +263,7 @@ _blk_read_multiple (nbdkit_next *next,
if (full_pwrite (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
- nbdkit_error ("pwrite: %m");
+ nbdkit_error ("cache: pwrite: %m");
return -1;
}
for (b = 0; b < runblocks; ++b) {
@@ -275,7 +275,7 @@ _blk_read_multiple (nbdkit_next *next,
else { /* Read cache. */
if (full_pread (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
- nbdkit_error ("pread: %m");
+ nbdkit_error ("cache: pread: %m");
return -1;
}
for (b = 0; b < runblocks; ++b)
@@ -349,7 +349,7 @@ blk_cache (nbdkit_next *next,
if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
- nbdkit_error ("pwrite: %m");
+ nbdkit_error ("cache: pwrite: %m");
return -1;
}
bitmap_set_blk (&bm, blknum, BLOCK_CLEAN);
@@ -360,7 +360,7 @@ blk_cache (nbdkit_next *next,
int r = posix_fadvise (fd, offset, blksize, POSIX_FADV_WILLNEED);
if (r) {
errno = r;
- nbdkit_error ("posix_fadvise: %m");
+ nbdkit_error ("cache: posix_fadvise: %m");
return -1;
}
#endif
@@ -390,7 +390,7 @@ blk_writethrough (nbdkit_next *next,
if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
- nbdkit_error ("pwrite: %m");
+ nbdkit_error ("cache: pwrite: %m");
return -1;
}
@@ -424,7 +424,7 @@ blk_write (nbdkit_next *next,
if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
- nbdkit_error ("pwrite: %m");
+ nbdkit_error ("cache: pwrite: %m");
return -1;
}
bitmap_set_blk (&bm, blknum, BLOCK_DIRTY);
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index defd0d94..afb48af9 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -418,7 +418,7 @@ blk_read_multiple (struct blk_overlay *blk,
if (full_pwrite (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
- nbdkit_error ("pwrite: %m");
+ nbdkit_error ("cow: pwrite: %m");
return -1;
}
for (b = 0; b < runblocks; ++b)
@@ -428,7 +428,7 @@ blk_read_multiple (struct blk_overlay *blk,
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
if (full_pread (fd, block, blksize * runblocks, offset) == -1) {
*err = errno;
- nbdkit_error ("pread: %m");
+ nbdkit_error ("cow: pread: %m");
return -1;
}
}
@@ -482,7 +482,7 @@ blk_cache (struct blk_overlay *blk,
int r = posix_fadvise (fd, offset, blksize, POSIX_FADV_WILLNEED);
if (r) {
errno = r;
- nbdkit_error ("posix_fadvise: %m");
+ nbdkit_error ("cow: posix_fadvise: %m");
return -1;
}
#endif
@@ -506,7 +506,7 @@ blk_cache (struct blk_overlay *blk,
if (mode == BLK_CACHE_COW) {
if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
- nbdkit_error ("pwrite: %m");
+ nbdkit_error ("cow: pwrite: %m");
return -1;
}
bitmap_set_blk (&blk->bm, blknum, BLOCK_ALLOCATED);
@@ -527,7 +527,7 @@ blk_write (struct blk_overlay *blk,
if (full_pwrite (fd, block, blksize, offset) == -1) {
*err = errno;
- nbdkit_error ("pwrite: %m");
+ nbdkit_error ("cow: pwrite: %m");
return -1;
}
--
2.47.3

View File

@ -0,0 +1,29 @@
From 7180bbf0f3d7b29e7eccf30207980849f3586583 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 3 Apr 2026 14:06:31 +0100
Subject: [PATCH] cow: Fix block -> fd calculation
This calculation was plainly wrong, so the other overlay files were
never used.
Fixes: commit eb79a0e0c63ba1884bde02dc884a26aea2fc4324
---
filters/cow/blk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index afb48af9..3a416d55 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -324,7 +324,7 @@ blk_set_size (struct blk_overlay *blk, uint64_t new_size)
static int
get_fd_for_blknum (struct blk_overlay *blk, uint64_t blknum)
{
- size_t i = blknum / MAX_FILE_SIZE / blksize;
+ size_t i = blknum * blksize / MAX_FILE_SIZE;
assert (i < blk->fds.len);
return blk->fds.ptr[i];
}
--
2.47.3

View File

@ -0,0 +1,111 @@
From c495b7f46b60a8e549327b2444591fe9e3173da0 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 3 Apr 2026 13:50:37 +0100
Subject: [PATCH] cow: Fix offsets when overlay is split
Commit eb79a0e0c6 ("cow: Support overlays larger than 16T on ext4")
splits the overlay into 8T chunks. However we neglected to change the
offset we are writing to with the result that it still tried to write
beyond the end of files > 16T in size. Fix the offsets so they are
correct.
This is not a data corruptor since this and the previous commit
together were causing only the first overlay to be used with full
offsets. However it completely broke the intent of commit eb79a0e0c6.
The test that was added only tests around the 8T mark, so does not hit
this case even if TMPDIR is ext4. Adjust the test also.
The error seen is:
nbdkit: file.0: error: pwrite: File too large
Reported-by: Ming Xie
Fixes: commit eb79a0e0c63ba1884bde02dc884a26aea2fc4324
---
filters/cow/blk.c | 18 ++++++++++++------
tests/test-cow-huge.sh | 4 ++--
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 3a416d55..7a3f40c1 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -114,7 +114,8 @@ enum bm_entry {
BLOCK_TRIMMED = 3,
};
-#define MAX_FILE_SIZE (UINT64_C (8) * 1024 * 1024 * 1024 * 1024) /* 8T */
+/* Maximum overlay file size, 8T. This must be a power of 2. */
+#define MAX_FILE_SIZE (UINT64_C (8) * 1024 * 1024 * 1024 * 1024)
static const char *
state_to_string (enum bm_entry state)
@@ -416,7 +417,8 @@ blk_read_multiple (struct blk_overlay *blk,
"at offset %" PRIu64 " into the cache",
runblocks, offset);
- if (full_pwrite (fd, block, blksize * runblocks, offset) == -1) {
+ if (full_pwrite (fd, block, blksize * runblocks,
+ offset & (MAX_FILE_SIZE - 1)) == -1) {
*err = errno;
nbdkit_error ("cow: pwrite: %m");
return -1;
@@ -426,7 +428,8 @@ blk_read_multiple (struct blk_overlay *blk,
}
}
else if (state == BLOCK_ALLOCATED) { /* Read overlay. */
- if (full_pread (fd, block, blksize * runblocks, offset) == -1) {
+ if (full_pread (fd, block, blksize * runblocks,
+ offset & (MAX_FILE_SIZE - 1)) == -1) {
*err = errno;
nbdkit_error ("cow: pread: %m");
return -1;
@@ -479,7 +482,8 @@ blk_cache (struct blk_overlay *blk,
if (state == BLOCK_ALLOCATED) {
#if HAVE_POSIX_FADVISE
- int r = posix_fadvise (fd, offset, blksize, POSIX_FADV_WILLNEED);
+ int r = posix_fadvise (fd, offset & (MAX_FILE_SIZE - 1),
+ blksize, POSIX_FADV_WILLNEED);
if (r) {
errno = r;
nbdkit_error ("cow: posix_fadvise: %m");
@@ -504,7 +508,8 @@ blk_cache (struct blk_overlay *blk,
memset (block + n, 0, tail);
if (mode == BLK_CACHE_COW) {
- if (full_pwrite (fd, block, blksize, offset) == -1) {
+ if (full_pwrite (fd, block, blksize,
+ offset & (MAX_FILE_SIZE - 1)) == -1) {
*err = errno;
nbdkit_error ("cow: pwrite: %m");
return -1;
@@ -525,7 +530,8 @@ blk_write (struct blk_overlay *blk,
nbdkit_debug ("cow: blk_write block %" PRIu64 " (offset %" PRIu64 ")",
blknum, (uint64_t) offset);
- if (full_pwrite (fd, block, blksize, offset) == -1) {
+ if (full_pwrite (fd, block, blksize,
+ offset & (MAX_FILE_SIZE - 1)) == -1) {
*err = errno;
nbdkit_error ("cow: pwrite: %m");
return -1;
diff --git a/tests/test-cow-huge.sh b/tests/test-cow-huge.sh
index cad1f6cc..b459b1b8 100755
--- a/tests/test-cow-huge.sh
+++ b/tests/test-cow-huge.sh
@@ -53,9 +53,9 @@ random.seed(None)
M = 1024*1024
T = 1024*1024*1024*1024
-# Test a few megabytes either side of the 8T boundary.
+# Test a few megabytes either side of the 16T boundary.
overlay = bytearray(10 * M)
-overlay_offset = int(8 * T - len(overlay)/2)
+overlay_offset = int(16 * T - len(overlay)/2)
assert h.get_size() > overlay_offset + len(overlay)
for iter in range(1, 200):
--
2.47.3

View File

@ -0,0 +1,53 @@
From 6ed71d8709cd03dd9a3c53cba9dd03e5b884031d Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 3 Apr 2026 14:10:51 +0100
Subject: [PATCH] cow: Make some offset variables const
These variables are not modified so set them to const.
---
filters/cow/blk.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/filters/cow/blk.c b/filters/cow/blk.c
index 7a3f40c1..4c1ee9d5 100644
--- a/filters/cow/blk.c
+++ b/filters/cow/blk.c
@@ -353,7 +353,7 @@ blk_read_multiple (struct blk_overlay *blk,
uint64_t blknum, uint64_t nrblocks,
uint8_t *block, bool cow_on_read, int *err)
{
- off_t offset = blknum * blksize;
+ const off_t offset = blknum * blksize;
enum bm_entry state;
uint64_t b, runblocks;
const int fd = get_fd_for_blknum (blk, blknum);
@@ -466,7 +466,7 @@ blk_cache (struct blk_overlay *blk,
{
/* XXX Could make this lock more fine-grained with some thought. */
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&blk->lock);
- off_t offset = blknum * blksize;
+ const off_t offset = blknum * blksize;
enum bm_entry state = bitmap_get_blk (&blk->bm, blknum, BLOCK_NOT_ALLOCATED);
unsigned n = blksize, tail = 0;
const int fd = get_fd_for_blknum (blk, blknum);
@@ -523,7 +523,7 @@ int
blk_write (struct blk_overlay *blk,
uint64_t blknum, const uint8_t *block, int *err)
{
- off_t offset = blknum * blksize;
+ const off_t offset = blknum * blksize;
const int fd = get_fd_for_blknum (blk, blknum);
if (cow_debug_verbose)
@@ -547,7 +547,7 @@ int
blk_trim (struct blk_overlay *blk,
uint64_t blknum, int *err)
{
- off_t offset = blknum * blksize;
+ const off_t offset = blknum * blksize;
if (cow_debug_verbose)
nbdkit_debug ("cow: blk_trim block %" PRIu64 " (offset %" PRIu64 ")",
--
2.47.3

View File

@ -55,7 +55,7 @@
Name: nbdkit
Version: 1.47.6
Release: 1%{?dist}
Release: 2%{?dist}
Summary: NBD server
License: BSD-3-Clause
@ -80,7 +80,10 @@ Source3: copy-patches.sh
# https://gitlab.com/nbdkit/nbdkit/-/commits/rhel-10.3/
# Patches.
#(nothing)
Patch0001: 0001-cache-cow-Add-prefix-before-more-calls.patch
Patch0002: 0002-cow-Fix-block-fd-calculation.patch
Patch0003: 0003-cow-Fix-offsets-when-overlay-is-split.patch
Patch0004: 0004-cow-Make-some-offset-variables-const.patch
# For automatic RPM Provides generation.
# See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html
@ -1588,6 +1591,10 @@ fi
%changelog
* Fri Apr 03 2026 Richard W.M. Jones <rjones@redhat.com> - 1.47.6-2
- cow: Support overlays larger than 16T on ext4 (further fixes)
resolves: RHEL-163983
* Thu Apr 02 2026 Richard W.M. Jones <rjones@redhat.com> - 1.47.6-1
- Rebase to nbdkit 1.47.6
Synchronize spec file with Fedora