Allow nbdkit-file-plugin to zero and trim block devices

resolves: RHEL-89371
This commit is contained in:
Richard W.M. Jones 2025-05-01 19:05:19 +01:00
parent 3febd5aa52
commit e585cbdf30
8 changed files with 401 additions and 2 deletions

View File

@ -0,0 +1,26 @@
From 4edf62487072d6e61b95bed6ca55750bf3110ae4 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:03:06 +0100
Subject: [PATCH] file: Fix minor typo in debug message
(cherry picked from commit a75db5636b94c9184f8eb02fd51182d935df64a6)
---
plugins/file/file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index b8470c20..8e4acedd 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -989,7 +989,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
if (r == 0) {
if (file_debug_zero)
- nbdkit_debug ("h->can_zero-range: "
+ nbdkit_debug ("h->can_zero_range: "
"zero succeeded using fallocate");
goto out;
}
--
2.47.1

View File

@ -0,0 +1,36 @@
From 9db29d77149a7bed9daef9f35a584d798e8f22f0 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:21:23 +0100
Subject: [PATCH] file: Add more debugging when -D file.zero=1 is used
(cherry picked from commit ecf6b15fa84a02b74ea969f06552c82ee418b9b4)
---
plugins/file/file.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 8e4acedd..a49c11e9 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -944,7 +944,17 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
static int
do_fallocate (int fd, int mode_, off_t offset, off_t len)
{
- int r = fallocate (fd, mode_, offset, len);
+ int r;
+
+ r = fallocate (fd, mode_, offset, len);
+
+ if (file_debug_zero)
+ nbdkit_debug ("fallocate ([%s%s ], %" PRIu64 ", %" PRIu64") => %d (%d)",
+ mode_ & FALLOC_FL_PUNCH_HOLE ? " FALLOC_FL_PUNCH_HOLE" : "",
+ mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "",
+ (uint64_t) offset, (uint64_t) len, r,
+ r == -1 ? errno : 0);
+
if (r == -1 && errno == ENODEV) {
/* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
with EOPNOTSUPP in this case. Normalize errno to simplify callers. */
--
2.47.1

View File

@ -0,0 +1,43 @@
From 1df73694a75a6dc353bf15a277164f21e6e66227 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:32:17 +0100
Subject: [PATCH] file: Fix comment style in a few places
No actual change here.
(cherry picked from commit 0df4142c4be2b059c4d17aae0ec71f16ffc9ba35)
---
plugins/file/file.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index a49c11e9..c381c1e2 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -957,7 +957,8 @@ do_fallocate (int fd, int mode_, off_t offset, off_t len)
if (r == -1 && errno == ENODEV) {
/* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
- with EOPNOTSUPP in this case. Normalize errno to simplify callers. */
+ * with EOPNOTSUPP in this case. Normalize errno to simplify callers.
+ */
errno = EOPNOTSUPP;
}
return r;
@@ -1014,9 +1015,10 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
#endif
#ifdef FALLOC_FL_PUNCH_HOLE
- /* If we can punch hole but may not trim, we can combine punching hole and
- * fallocate to zero a range. This is expected to be more efficient than
- * writing zeroes manually. */
+ /* If we can punch hole but may not trim, we can combine punching
+ * hole and fallocate to zero a range. This is expected to be more
+ * efficient than writing zeroes manually.
+ */
if (h->can_punch_hole && h->can_fallocate) {
int r;
--
2.47.1

View File

@ -0,0 +1,60 @@
From 6f64ded91340340ab803abadf0542d953e6bf2d5 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:59:17 +0100
Subject: [PATCH] file: Fix do_fallocate debugging on Alpine
Alpine has some weird/old kernel that doesn't support
FALLOC_FL_ZERO_RANGE but does support FALLOC_FL_PUNCH_HOLE, so the
debugging I added in commit ecf6b15fa8 failed to compile with:
file.c: In function 'do_fallocate':
file.c:958:27: error: 'FALLOC_FL_ZERO_RANGE' undeclared (first use in this function)
958 | mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "",
| ^~~~~~~~~~~~~~~~~~~~
file.c:958:27: note: each undeclared identifier is reported only once for each function it appears in
make[3]: *** [Makefile:666: nbdkit_file_plugin_la-file.lo] Error 1
Fixes: commit ecf6b15fa84a02b74ea969f06552c82ee418b9b4
(cherry picked from commit 419a347054f81c53706637feddbc5008beab77d3)
---
plugins/file/file.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index c381c1e2..00aa5416 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -940,7 +940,7 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
return 0;
}
-#if defined (FALLOC_FL_PUNCH_HOLE) || defined (FALLOC_FL_ZERO_RANGE)
+#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE)
static int
do_fallocate (int fd, int mode_, off_t offset, off_t len)
{
@@ -949,9 +949,20 @@ do_fallocate (int fd, int mode_, off_t offset, off_t len)
r = fallocate (fd, mode_, offset, len);
if (file_debug_zero)
- nbdkit_debug ("fallocate ([%s%s ], %" PRIu64 ", %" PRIu64") => %d (%d)",
+ nbdkit_debug ("fallocate (["
+#if defined(FALLOC_FL_PUNCH_HOLE)
+ "%s"
+#endif
+#if defined(FALLOC_FL_ZERO_RANGE)
+ "%s"
+#endif
+ " ], %" PRIu64 ", %" PRIu64") => %d (%d)",
+#if defined(FALLOC_FL_PUNCH_HOLE)
mode_ & FALLOC_FL_PUNCH_HOLE ? " FALLOC_FL_PUNCH_HOLE" : "",
+#endif
+#if defined(FALLOC_FL_ZERO_RANGE)
mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "",
+#endif
(uint64_t) offset, (uint64_t) len, r,
r == -1 ? errno : 0);
--
2.47.1

View File

@ -0,0 +1,65 @@
From 6653df377b97d817a1984076cb744c01c86ecf97 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:26:41 +0100
Subject: [PATCH] file: Rename h->can_zeroout to h->can_blkzeroout to reflect
ioctl
Since we're calling the blockdev-specific BLKZEROOUT ioctl when this
flag is set, rename the flag.
(cherry picked from commit fba20ce06c2f0e7c4be7e52e8e1934933851dfbc)
---
plugins/file/file.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 00aa5416..a649ccd7 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -506,7 +506,7 @@ struct handle {
bool can_punch_hole;
bool can_zero_range;
bool can_fallocate;
- bool can_zeroout;
+ bool can_blkzeroout;
};
/* Common code for opening a file by name, used by mode_filename and
@@ -746,7 +746,7 @@ file_open (int readonly)
#endif
h->can_fallocate = true;
- h->can_zeroout = h->is_block_device;
+ h->can_blkzeroout = h->is_block_device;
return h;
}
@@ -1063,14 +1063,14 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
#ifdef BLKZEROOUT
/* For aligned range and block device, we can use BLKZEROOUT. */
- if (h->can_zeroout && IS_ALIGNED (offset | count, h->sector_size)) {
+ if (h->can_blkzeroout && IS_ALIGNED (offset | count, h->sector_size)) {
int r;
uint64_t range[2] = {offset, count};
r = ioctl (h->fd, BLKZEROOUT, &range);
if (r == 0) {
if (file_debug_zero)
- nbdkit_debug ("h->can_zeroout && IS_ALIGNED: "
+ nbdkit_debug ("h->can_blkzeroout && IS_ALIGNED: "
"zero succeeded using BLKZEROOUT");
goto out;
}
@@ -1080,7 +1080,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
return -1;
}
- h->can_zeroout = false;
+ h->can_blkzeroout = false;
}
#endif
--
2.47.1

View File

@ -0,0 +1,38 @@
From 661bb729bfcba63842840c32dc8cd0e778e38b21 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:30:41 +0100
Subject: [PATCH] file: zero: Document implicit order that we will try zeroing
methods
There's no substantive change here. I just pulled out the test (flags
& NBDKIT_FLAG_MAY_TRIM) into a boolean variable, and documented that
we (will) try zero-with-trim methods first.
(cherry picked from commit 61fc023f235b17f8a19302885d1613dd0a7a3793)
---
plugins/file/file.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index a649ccd7..d7405f36 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -981,9 +981,14 @@ static int
file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
{
struct handle *h __attribute__ ((unused)) = handle;
+ const bool may_trim __attribute__ ((unused)) = flags & NBDKIT_FLAG_MAY_TRIM;
+ /* These alternate zeroing methods are ordered. Methods which can
+ * trim (if may_trim is set) are tried first. Methods which can
+ * only zero are tried last.
+ */
#ifdef FALLOC_FL_PUNCH_HOLE
- if (h->can_punch_hole && (flags & NBDKIT_FLAG_MAY_TRIM)) {
+ if (may_trim && h->can_punch_hole) {
int r;
r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
--
2.47.1

View File

@ -0,0 +1,122 @@
From cf35037d52c2abf383fc3e8db00283a58ffc53db Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:36:23 +0100
Subject: [PATCH] file: zero: Use BLKDISCARD method if may_trim is set
If we're allowed to trim and we're writing to a block device,
previously we hit the case fallocate(FALLOC_FL_ZERO_RANGE) first.
This succeeds in Linux, zeroing (not trimming) the range.
However it would be better to trim in this case. Linux supports
ioctl(BLKDISCARD) on block devices, so try this method first.
Fixes: https://issues.redhat.com/browse/RHEL-89353
Reported-by: Germano Veit Michel
Thanks: Eric Blake
(cherry picked from commit 7a9ecda24906c64d9f8c7238a96cb3f686e894eb)
---
plugins/file/file.c | 50 +++++++++++++++++++++++++++++
plugins/file/nbdkit-file-plugin.pod | 5 +++
2 files changed, 55 insertions(+)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index d7405f36..dc100bb3 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -404,6 +404,9 @@ file_dump_plugin (void)
#ifdef BLKSSZGET
printf ("file_blksszget=yes\n");
#endif
+#ifdef BLKDISCARD
+ printf ("file_blkdiscard=yes\n");
+#endif
#ifdef BLKZEROOUT
printf ("file_blkzeroout=yes\n");
#endif
@@ -506,6 +509,7 @@ struct handle {
bool can_punch_hole;
bool can_zero_range;
bool can_fallocate;
+ bool can_blkdiscard;
bool can_blkzeroout;
};
@@ -747,6 +751,7 @@ file_open (int readonly)
h->can_fallocate = true;
h->can_blkzeroout = h->is_block_device;
+ h->can_blkdiscard = h->is_block_device;
return h;
}
@@ -1009,6 +1014,51 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
#endif
+#if defined(BLKDISCARD) && defined(FALLOC_FL_ZERO_RANGE)
+ /* For aligned range and block device, we can use BLKDISCARD to
+ * trim. However BLKDISCARD doesn't necessarily zero (eg for local
+ * disk) so we have to zero first and then discard.
+ *
+ * In future all Linux block devices may understand
+ * FALLOC_FL_PUNCH_HOLE which means this case would no longer be
+ * necessary, since the case above will handle it.
+ */
+ if (may_trim && h->can_blkdiscard && h->can_zero_range &&
+ IS_ALIGNED (offset | count, h->sector_size)) {
+ int r;
+ uint64_t range[2] = {offset, count};
+
+ r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
+ if (r == 0) {
+ /* We could use FALLOC_FL_PUNCH_HOLE here instead, but currently
+ * thin LVs do not support it (XXX 2025-04).
+ */
+ r = ioctl (h->fd, BLKDISCARD, &range);
+ if (r == 0) {
+ if (file_debug_zero)
+ nbdkit_debug ("h->can_blkdiscard && may_trim && IS_ALIGNED: "
+ "zero succeeded using BLKDISCARD");
+ goto out;
+ }
+
+ if (!is_enotsup (errno)) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ h->can_blkdiscard = false;
+ }
+ else {
+ if (!is_enotsup (errno)) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ h->can_fallocate = false;
+ }
+ }
+#endif
+
#ifdef FALLOC_FL_ZERO_RANGE
if (h->can_zero_range) {
int r;
diff --git a/plugins/file/nbdkit-file-plugin.pod b/plugins/file/nbdkit-file-plugin.pod
index 43fc23c7..f1b33582 100644
--- a/plugins/file/nbdkit-file-plugin.pod
+++ b/plugins/file/nbdkit-file-plugin.pod
@@ -237,6 +237,11 @@ block devices.
If both set, the plugin may be able to efficiently zero ranges of
block devices, where the driver and block device itself supports this.
+=item C<file_blkdiscard=yes>
+
+If set, the plugin may be able to efficiently trim ranges of block
+devices, where the driver and block device itself supports this.
+
=item C<file_extents=yes>
If set, the plugin can read file extents.
--
2.47.1

View File

@ -55,7 +55,7 @@
Name: nbdkit
Version: 1.42.2
Release: 3%{?dist}
Release: 4%{?dist}
Summary: NBD server
License: BSD-3-Clause
@ -88,6 +88,13 @@ Patch0005: 0005-file-Hard-error-if-sync_file_range-fails.patch
Patch0006: 0006-file-Reduce-the-size-of-the-lock-around-write-evicti.patch
Patch0007: 0007-file-Document-implicit-assumption-about-eviction-win.patch
Patch0008: 0008-server-Turn-flush-into-a-controlpath-message.patch
Patch0009: 0009-file-Fix-minor-typo-in-debug-message.patch
Patch0010: 0010-file-Add-more-debugging-when-D-file.zero-1-is-used.patch
Patch0011: 0011-file-Fix-comment-style-in-a-few-places.patch
Patch0012: 0012-file-Fix-do_fallocate-debugging-on-Alpine.patch
Patch0013: 0013-file-Rename-h-can_zeroout-to-h-can_blkzeroout-to-ref.patch
Patch0014: 0014-file-zero-Document-implicit-order-that-we-will-try-z.patch
Patch0015: 0015-file-zero-Use-BLKDISCARD-method-if-may_trim-is-set.patch
# For automatic RPM Provides generation.
# See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html
@ -1530,7 +1537,7 @@ fi
%changelog
* Mon Apr 07 2025 Richard W.M. Jones <rjones@redhat.com> - 1.42.2-3
* Thu May 01 2025 Richard W.M. Jones <rjones@redhat.com> - 1.42.2-4
- Rebase to nbdkit 1.42.2
- Synch the spec file with Fedora Rawhide.
- nbdkit-ondemand-plugin moves into a new subpackage.
@ -1538,6 +1545,8 @@ fi
resolves: RHEL-78830
- Add extra system call checking and debugging to nbdkit-file-plugin
resolves: RHEL-85515
- Allow nbdkit-file-plugin to zero and trim block devices
resolves: RHEL-89371
* Mon Jan 06 2025 Richard W.M. Jones <rjones@redhat.com> - 1.40.4-3
- vddk: Avoid reading partial chunk beyond the end of the disk