Allow nbdkit-file-plugin to zero and trim block devices

resolves: RHEL-89353
This commit is contained in:
Richard W.M. Jones 2025-05-01 19:05:19 +01:00
parent 8bf36f603d
commit d1e8322fc6
8 changed files with 400 additions and 1 deletions

View File

@ -0,0 +1,26 @@
From a79bf57c8ec805516e8dbe7995aa2bd46b83ade3 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:03:06 +0100
Subject: [PATCH] file: Fix minor typo in debug message
(cherry picked from commit a75db5636b94c9184f8eb02fd51182d935df64a6)
---
plugins/file/file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 6bcc5537..71b349ac 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -924,7 +924,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
if (r == 0) {
if (file_debug_zero)
- nbdkit_debug ("h->can_zero-range: "
+ nbdkit_debug ("h->can_zero_range: "
"zero succeeded using fallocate");
goto out;
}
--
2.47.1

View File

@ -0,0 +1,36 @@
From 1cb341e75c1a17553b69ea8d9889662e6d09ae78 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:21:23 +0100
Subject: [PATCH] file: Add more debugging when -D file.zero=1 is used
(cherry picked from commit ecf6b15fa84a02b74ea969f06552c82ee418b9b4)
---
plugins/file/file.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 71b349ac..378f6988 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -879,7 +879,17 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
static int
do_fallocate (int fd, int mode_, off_t offset, off_t len)
{
- int r = fallocate (fd, mode_, offset, len);
+ int r;
+
+ r = fallocate (fd, mode_, offset, len);
+
+ if (file_debug_zero)
+ nbdkit_debug ("fallocate ([%s%s ], %" PRIu64 ", %" PRIu64") => %d (%d)",
+ mode_ & FALLOC_FL_PUNCH_HOLE ? " FALLOC_FL_PUNCH_HOLE" : "",
+ mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "",
+ (uint64_t) offset, (uint64_t) len, r,
+ r == -1 ? errno : 0);
+
if (r == -1 && errno == ENODEV) {
/* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
with EOPNOTSUPP in this case. Normalize errno to simplify callers. */
--
2.47.1

View File

@ -0,0 +1,43 @@
From 664e447d858a21304610db3023cc728db0c974bd Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:32:17 +0100
Subject: [PATCH] file: Fix comment style in a few places
No actual change here.
(cherry picked from commit 0df4142c4be2b059c4d17aae0ec71f16ffc9ba35)
---
plugins/file/file.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 378f6988..01ad1ef2 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -892,7 +892,8 @@ do_fallocate (int fd, int mode_, off_t offset, off_t len)
if (r == -1 && errno == ENODEV) {
/* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
- with EOPNOTSUPP in this case. Normalize errno to simplify callers. */
+ * with EOPNOTSUPP in this case. Normalize errno to simplify callers.
+ */
errno = EOPNOTSUPP;
}
return r;
@@ -949,9 +950,10 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
#endif
#ifdef FALLOC_FL_PUNCH_HOLE
- /* If we can punch hole but may not trim, we can combine punching hole and
- * fallocate to zero a range. This is expected to be more efficient than
- * writing zeroes manually. */
+ /* If we can punch hole but may not trim, we can combine punching
+ * hole and fallocate to zero a range. This is expected to be more
+ * efficient than writing zeroes manually.
+ */
if (h->can_punch_hole && h->can_fallocate) {
int r;
--
2.47.1

View File

@ -0,0 +1,60 @@
From 4c02ff62f40497335da185cc4b45c2ba43fb609b Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:59:17 +0100
Subject: [PATCH] file: Fix do_fallocate debugging on Alpine
Alpine has some weird/old kernel that doesn't support
FALLOC_FL_ZERO_RANGE but does support FALLOC_FL_PUNCH_HOLE, so the
debugging I added in commit ecf6b15fa8 failed to compile with:
file.c: In function 'do_fallocate':
file.c:958:27: error: 'FALLOC_FL_ZERO_RANGE' undeclared (first use in this function)
958 | mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "",
| ^~~~~~~~~~~~~~~~~~~~
file.c:958:27: note: each undeclared identifier is reported only once for each function it appears in
make[3]: *** [Makefile:666: nbdkit_file_plugin_la-file.lo] Error 1
Fixes: commit ecf6b15fa84a02b74ea969f06552c82ee418b9b4
(cherry picked from commit 419a347054f81c53706637feddbc5008beab77d3)
---
plugins/file/file.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 01ad1ef2..32c5e2b7 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -875,7 +875,7 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
return 0;
}
-#if defined (FALLOC_FL_PUNCH_HOLE) || defined (FALLOC_FL_ZERO_RANGE)
+#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE)
static int
do_fallocate (int fd, int mode_, off_t offset, off_t len)
{
@@ -884,9 +884,20 @@ do_fallocate (int fd, int mode_, off_t offset, off_t len)
r = fallocate (fd, mode_, offset, len);
if (file_debug_zero)
- nbdkit_debug ("fallocate ([%s%s ], %" PRIu64 ", %" PRIu64") => %d (%d)",
+ nbdkit_debug ("fallocate (["
+#if defined(FALLOC_FL_PUNCH_HOLE)
+ "%s"
+#endif
+#if defined(FALLOC_FL_ZERO_RANGE)
+ "%s"
+#endif
+ " ], %" PRIu64 ", %" PRIu64") => %d (%d)",
+#if defined(FALLOC_FL_PUNCH_HOLE)
mode_ & FALLOC_FL_PUNCH_HOLE ? " FALLOC_FL_PUNCH_HOLE" : "",
+#endif
+#if defined(FALLOC_FL_ZERO_RANGE)
mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "",
+#endif
(uint64_t) offset, (uint64_t) len, r,
r == -1 ? errno : 0);
--
2.47.1

View File

@ -0,0 +1,65 @@
From bc4598f3d2d1ef2f4ebdf5b365ed08eff14d5654 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:26:41 +0100
Subject: [PATCH] file: Rename h->can_zeroout to h->can_blkzeroout to reflect
ioctl
Since we're calling the blockdev-specific BLKZEROOUT ioctl when this
flag is set, rename the flag.
(cherry picked from commit fba20ce06c2f0e7c4be7e52e8e1934933851dfbc)
---
plugins/file/file.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 32c5e2b7..70805bd7 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -497,7 +497,7 @@ struct handle {
bool can_punch_hole;
bool can_zero_range;
bool can_fallocate;
- bool can_zeroout;
+ bool can_blkzeroout;
};
/* Common code for opening a file by name, used by mode_filename and
@@ -703,7 +703,7 @@ file_open (int readonly)
#endif
h->can_fallocate = true;
- h->can_zeroout = h->is_block_device;
+ h->can_blkzeroout = h->is_block_device;
return h;
}
@@ -998,14 +998,14 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
#ifdef BLKZEROOUT
/* For aligned range and block device, we can use BLKZEROOUT. */
- if (h->can_zeroout && IS_ALIGNED (offset | count, h->sector_size)) {
+ if (h->can_blkzeroout && IS_ALIGNED (offset | count, h->sector_size)) {
int r;
uint64_t range[2] = {offset, count};
r = ioctl (h->fd, BLKZEROOUT, &range);
if (r == 0) {
if (file_debug_zero)
- nbdkit_debug ("h->can_zeroout && IS_ALIGNED: "
+ nbdkit_debug ("h->can_blkzeroout && IS_ALIGNED: "
"zero succeeded using BLKZEROOUT");
goto out;
}
@@ -1015,7 +1015,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
return -1;
}
- h->can_zeroout = false;
+ h->can_blkzeroout = false;
}
#endif
--
2.47.1

View File

@ -0,0 +1,38 @@
From c1984ddcc6497c4446d1bf0e8828d1259852eb74 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:30:41 +0100
Subject: [PATCH] file: zero: Document implicit order that we will try zeroing
methods
There's no substantive change here. I just pulled out the test (flags
& NBDKIT_FLAG_MAY_TRIM) into a boolean variable, and documented that
we (will) try zero-with-trim methods first.
(cherry picked from commit 61fc023f235b17f8a19302885d1613dd0a7a3793)
---
plugins/file/file.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 70805bd7..3b82e02d 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -916,9 +916,14 @@ static int
file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
{
struct handle *h __attribute__ ((unused)) = handle;
+ const bool may_trim __attribute__ ((unused)) = flags & NBDKIT_FLAG_MAY_TRIM;
+ /* These alternate zeroing methods are ordered. Methods which can
+ * trim (if may_trim is set) are tried first. Methods which can
+ * only zero are tried last.
+ */
#ifdef FALLOC_FL_PUNCH_HOLE
- if (h->can_punch_hole && (flags & NBDKIT_FLAG_MAY_TRIM)) {
+ if (may_trim && h->can_punch_hole) {
int r;
r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
--
2.47.1

View File

@ -0,0 +1,122 @@
From 396e8a97835155a620cabbcf1aabaaa1fa4a08f1 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:36:23 +0100
Subject: [PATCH] file: zero: Use BLKDISCARD method if may_trim is set
If we're allowed to trim and we're writing to a block device,
previously we hit the case fallocate(FALLOC_FL_ZERO_RANGE) first.
This succeeds in Linux, zeroing (not trimming) the range.
However it would be better to trim in this case. Linux supports
ioctl(BLKDISCARD) on block devices, so try this method first.
Fixes: https://issues.redhat.com/browse/RHEL-89353
Reported-by: Germano Veit Michel
Thanks: Eric Blake
(cherry picked from commit 7a9ecda24906c64d9f8c7238a96cb3f686e894eb)
---
plugins/file/file.c | 50 +++++++++++++++++++++++++++++
plugins/file/nbdkit-file-plugin.pod | 5 +++
2 files changed, 55 insertions(+)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 3b82e02d..b4dec3c5 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -397,6 +397,9 @@ file_dump_plugin (void)
#ifdef BLKSSZGET
printf ("file_blksszget=yes\n");
#endif
+#ifdef BLKDISCARD
+ printf ("file_blkdiscard=yes\n");
+#endif
#ifdef BLKZEROOUT
printf ("file_blkzeroout=yes\n");
#endif
@@ -497,6 +500,7 @@ struct handle {
bool can_punch_hole;
bool can_zero_range;
bool can_fallocate;
+ bool can_blkdiscard;
bool can_blkzeroout;
};
@@ -704,6 +708,7 @@ file_open (int readonly)
h->can_fallocate = true;
h->can_blkzeroout = h->is_block_device;
+ h->can_blkdiscard = h->is_block_device;
return h;
}
@@ -944,6 +949,51 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
#endif
+#if defined(BLKDISCARD) && defined(FALLOC_FL_ZERO_RANGE)
+ /* For aligned range and block device, we can use BLKDISCARD to
+ * trim. However BLKDISCARD doesn't necessarily zero (eg for local
+ * disk) so we have to zero first and then discard.
+ *
+ * In future all Linux block devices may understand
+ * FALLOC_FL_PUNCH_HOLE which means this case would no longer be
+ * necessary, since the case above will handle it.
+ */
+ if (may_trim && h->can_blkdiscard && h->can_zero_range &&
+ IS_ALIGNED (offset | count, h->sector_size)) {
+ int r;
+ uint64_t range[2] = {offset, count};
+
+ r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
+ if (r == 0) {
+ /* We could use FALLOC_FL_PUNCH_HOLE here instead, but currently
+ * thin LVs do not support it (XXX 2025-04).
+ */
+ r = ioctl (h->fd, BLKDISCARD, &range);
+ if (r == 0) {
+ if (file_debug_zero)
+ nbdkit_debug ("h->can_blkdiscard && may_trim && IS_ALIGNED: "
+ "zero succeeded using BLKDISCARD");
+ goto out;
+ }
+
+ if (!is_enotsup (errno)) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ h->can_blkdiscard = false;
+ }
+ else {
+ if (!is_enotsup (errno)) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ h->can_fallocate = false;
+ }
+ }
+#endif
+
#ifdef FALLOC_FL_ZERO_RANGE
if (h->can_zero_range) {
int r;
diff --git a/plugins/file/nbdkit-file-plugin.pod b/plugins/file/nbdkit-file-plugin.pod
index a50bef2d..0e260b7f 100644
--- a/plugins/file/nbdkit-file-plugin.pod
+++ b/plugins/file/nbdkit-file-plugin.pod
@@ -227,6 +227,11 @@ future.
If both set, the plugin may be able to efficiently zero ranges of
block devices, where the driver and block device itself supports this.
+=item C<file_blkdiscard=yes>
+
+If set, the plugin may be able to efficiently trim ranges of block
+devices, where the driver and block device itself supports this.
+
=item C<file_extents=yes>
If set, the plugin can read file extents.
--
2.47.1

View File

@ -99,6 +99,13 @@ Patch0015: 0015-file-Hard-error-if-sync_file_range-fails.patch
Patch0016: 0016-file-Reduce-the-size-of-the-lock-around-write-evicti.patch
Patch0017: 0017-file-Document-implicit-assumption-about-eviction-win.patch
Patch0018: 0018-server-Turn-flush-into-a-controlpath-message.patch
Patch0019: 0019-file-Fix-minor-typo-in-debug-message.patch
Patch0020: 0020-file-Add-more-debugging-when-D-file.zero-1-is-used.patch
Patch0021: 0021-file-Fix-comment-style-in-a-few-places.patch
Patch0022: 0022-file-Fix-do_fallocate-debugging-on-Alpine.patch
Patch0023: 0023-file-Rename-h-can_zeroout-to-h-can_blkzeroout-to-ref.patch
Patch0024: 0024-file-zero-Document-implicit-order-that-we-will-try-z.patch
Patch0025: 0025-file-zero-Use-BLKDISCARD-method-if-may_trim-is-set.patch
# For automatic RPM Provides generation.
# See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html
@ -1517,9 +1524,11 @@ fi
%changelog
* Mon Apr 07 2025 Richard W.M. Jones <rjones@redhat.com> - 1.38.5-5
* Thu May 01 2025 Richard W.M. Jones <rjones@redhat.com> - 1.38.5-6
- Add extra system call checking and debugging to nbdkit-file-plugin
resolves: RHEL-85510
- Allow nbdkit-file-plugin to zero and trim block devices
resolves: RHEL-89353
* Mon Jan 06 2025 Richard W.M. Jones <rjones@redhat.com> - 1.38.5-2
- vddk: Avoid reading partial chunk beyond the end of the disk