From e585cbdf30c925117d55d5bde19299254c796b1b Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Thu, 1 May 2025 19:05:19 +0100 Subject: [PATCH] Allow nbdkit-file-plugin to zero and trim block devices resolves: RHEL-89371 --- ...file-Fix-minor-typo-in-debug-message.patch | 26 ++++ ...debugging-when-D-file.zero-1-is-used.patch | 36 ++++++ ...le-Fix-comment-style-in-a-few-places.patch | 43 ++++++ ...Fix-do_fallocate-debugging-on-Alpine.patch | 60 +++++++++ ...n_zeroout-to-h-can_blkzeroout-to-ref.patch | 65 ++++++++++ ...nt-implicit-order-that-we-will-try-z.patch | 38 ++++++ ...BLKDISCARD-method-if-may_trim-is-set.patch | 122 ++++++++++++++++++ nbdkit.spec | 13 +- 8 files changed, 401 insertions(+), 2 deletions(-) create mode 100644 0009-file-Fix-minor-typo-in-debug-message.patch create mode 100644 0010-file-Add-more-debugging-when-D-file.zero-1-is-used.patch create mode 100644 0011-file-Fix-comment-style-in-a-few-places.patch create mode 100644 0012-file-Fix-do_fallocate-debugging-on-Alpine.patch create mode 100644 0013-file-Rename-h-can_zeroout-to-h-can_blkzeroout-to-ref.patch create mode 100644 0014-file-zero-Document-implicit-order-that-we-will-try-z.patch create mode 100644 0015-file-zero-Use-BLKDISCARD-method-if-may_trim-is-set.patch diff --git a/0009-file-Fix-minor-typo-in-debug-message.patch b/0009-file-Fix-minor-typo-in-debug-message.patch new file mode 100644 index 0000000..8f4ede9 --- /dev/null +++ b/0009-file-Fix-minor-typo-in-debug-message.patch @@ -0,0 +1,26 @@ +From 4edf62487072d6e61b95bed6ca55750bf3110ae4 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Thu, 1 May 2025 10:03:06 +0100 +Subject: [PATCH] file: Fix minor typo in debug message + +(cherry picked from commit a75db5636b94c9184f8eb02fd51182d935df64a6) +--- + plugins/file/file.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/plugins/file/file.c b/plugins/file/file.c +index b8470c20..8e4acedd 100644 +--- a/plugins/file/file.c ++++ b/plugins/file/file.c +@@ -989,7 +989,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags) + r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count); + if (r == 0) { + if (file_debug_zero) +- nbdkit_debug ("h->can_zero-range: " ++ nbdkit_debug ("h->can_zero_range: " + "zero succeeded using fallocate"); + goto out; + } +-- +2.47.1 + diff --git a/0010-file-Add-more-debugging-when-D-file.zero-1-is-used.patch b/0010-file-Add-more-debugging-when-D-file.zero-1-is-used.patch new file mode 100644 index 0000000..0c69654 --- /dev/null +++ b/0010-file-Add-more-debugging-when-D-file.zero-1-is-used.patch @@ -0,0 +1,36 @@ +From 9db29d77149a7bed9daef9f35a584d798e8f22f0 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Thu, 1 May 2025 10:21:23 +0100 +Subject: [PATCH] file: Add more debugging when -D file.zero=1 is used + +(cherry picked from commit ecf6b15fa84a02b74ea969f06552c82ee418b9b4) +--- + plugins/file/file.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/plugins/file/file.c b/plugins/file/file.c +index 8e4acedd..a49c11e9 100644 +--- a/plugins/file/file.c ++++ b/plugins/file/file.c +@@ -944,7 +944,17 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset, + static int + do_fallocate (int fd, int mode_, off_t offset, off_t len) + { +- int r = fallocate (fd, mode_, offset, len); ++ int r; ++ ++ r = fallocate (fd, mode_, offset, len); ++ ++ if (file_debug_zero) ++ nbdkit_debug ("fallocate ([%s%s ], %" PRIu64 ", %" PRIu64") => %d (%d)", ++ mode_ & FALLOC_FL_PUNCH_HOLE ? " FALLOC_FL_PUNCH_HOLE" : "", ++ mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "", ++ (uint64_t) offset, (uint64_t) len, r, ++ r == -1 ? errno : 0); ++ + if (r == -1 && errno == ENODEV) { + /* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails + with EOPNOTSUPP in this case. Normalize errno to simplify callers. */ +-- +2.47.1 + diff --git a/0011-file-Fix-comment-style-in-a-few-places.patch b/0011-file-Fix-comment-style-in-a-few-places.patch new file mode 100644 index 0000000..7adb5f7 --- /dev/null +++ b/0011-file-Fix-comment-style-in-a-few-places.patch @@ -0,0 +1,43 @@ +From 1df73694a75a6dc353bf15a277164f21e6e66227 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Thu, 1 May 2025 10:32:17 +0100 +Subject: [PATCH] file: Fix comment style in a few places + +No actual change here. + +(cherry picked from commit 0df4142c4be2b059c4d17aae0ec71f16ffc9ba35) +--- + plugins/file/file.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/plugins/file/file.c b/plugins/file/file.c +index a49c11e9..c381c1e2 100644 +--- a/plugins/file/file.c ++++ b/plugins/file/file.c +@@ -957,7 +957,8 @@ do_fallocate (int fd, int mode_, off_t offset, off_t len) + + if (r == -1 && errno == ENODEV) { + /* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails +- with EOPNOTSUPP in this case. Normalize errno to simplify callers. */ ++ * with EOPNOTSUPP in this case. Normalize errno to simplify callers. ++ */ + errno = EOPNOTSUPP; + } + return r; +@@ -1014,9 +1015,10 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags) + #endif + + #ifdef FALLOC_FL_PUNCH_HOLE +- /* If we can punch hole but may not trim, we can combine punching hole and +- * fallocate to zero a range. This is expected to be more efficient than +- * writing zeroes manually. */ ++ /* If we can punch hole but may not trim, we can combine punching ++ * hole and fallocate to zero a range. This is expected to be more ++ * efficient than writing zeroes manually. ++ */ + if (h->can_punch_hole && h->can_fallocate) { + int r; + +-- +2.47.1 + diff --git a/0012-file-Fix-do_fallocate-debugging-on-Alpine.patch b/0012-file-Fix-do_fallocate-debugging-on-Alpine.patch new file mode 100644 index 0000000..d36a62a --- /dev/null +++ b/0012-file-Fix-do_fallocate-debugging-on-Alpine.patch @@ -0,0 +1,60 @@ +From 6f64ded91340340ab803abadf0542d953e6bf2d5 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Thu, 1 May 2025 10:59:17 +0100 +Subject: [PATCH] file: Fix do_fallocate debugging on Alpine + +Alpine has some weird/old kernel that doesn't support +FALLOC_FL_ZERO_RANGE but does support FALLOC_FL_PUNCH_HOLE, so the +debugging I added in commit ecf6b15fa8 failed to compile with: + + file.c: In function 'do_fallocate': + file.c:958:27: error: 'FALLOC_FL_ZERO_RANGE' undeclared (first use in this function) + 958 | mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "", + | ^~~~~~~~~~~~~~~~~~~~ + file.c:958:27: note: each undeclared identifier is reported only once for each function it appears in + make[3]: *** [Makefile:666: nbdkit_file_plugin_la-file.lo] Error 1 + +Fixes: commit ecf6b15fa84a02b74ea969f06552c82ee418b9b4 +(cherry picked from commit 419a347054f81c53706637feddbc5008beab77d3) +--- + plugins/file/file.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/plugins/file/file.c b/plugins/file/file.c +index c381c1e2..00aa5416 100644 +--- a/plugins/file/file.c ++++ b/plugins/file/file.c +@@ -940,7 +940,7 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset, + return 0; + } + +-#if defined (FALLOC_FL_PUNCH_HOLE) || defined (FALLOC_FL_ZERO_RANGE) ++#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE) + static int + do_fallocate (int fd, int mode_, off_t offset, off_t len) + { +@@ -949,9 +949,20 @@ do_fallocate (int fd, int mode_, off_t offset, off_t len) + r = fallocate (fd, mode_, offset, len); + + if (file_debug_zero) +- nbdkit_debug ("fallocate ([%s%s ], %" PRIu64 ", %" PRIu64") => %d (%d)", ++ nbdkit_debug ("fallocate ([" ++#if defined(FALLOC_FL_PUNCH_HOLE) ++ "%s" ++#endif ++#if defined(FALLOC_FL_ZERO_RANGE) ++ "%s" ++#endif ++ " ], %" PRIu64 ", %" PRIu64") => %d (%d)", ++#if defined(FALLOC_FL_PUNCH_HOLE) + mode_ & FALLOC_FL_PUNCH_HOLE ? " FALLOC_FL_PUNCH_HOLE" : "", ++#endif ++#if defined(FALLOC_FL_ZERO_RANGE) + mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "", ++#endif + (uint64_t) offset, (uint64_t) len, r, + r == -1 ? errno : 0); + +-- +2.47.1 + diff --git a/0013-file-Rename-h-can_zeroout-to-h-can_blkzeroout-to-ref.patch b/0013-file-Rename-h-can_zeroout-to-h-can_blkzeroout-to-ref.patch new file mode 100644 index 0000000..64eefa4 --- /dev/null +++ b/0013-file-Rename-h-can_zeroout-to-h-can_blkzeroout-to-ref.patch @@ -0,0 +1,65 @@ +From 6653df377b97d817a1984076cb744c01c86ecf97 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Thu, 1 May 2025 10:26:41 +0100 +Subject: [PATCH] file: Rename h->can_zeroout to h->can_blkzeroout to reflect + ioctl + +Since we're calling the blockdev-specific BLKZEROOUT ioctl when this +flag is set, rename the flag. + +(cherry picked from commit fba20ce06c2f0e7c4be7e52e8e1934933851dfbc) +--- + plugins/file/file.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/plugins/file/file.c b/plugins/file/file.c +index 00aa5416..a649ccd7 100644 +--- a/plugins/file/file.c ++++ b/plugins/file/file.c +@@ -506,7 +506,7 @@ struct handle { + bool can_punch_hole; + bool can_zero_range; + bool can_fallocate; +- bool can_zeroout; ++ bool can_blkzeroout; + }; + + /* Common code for opening a file by name, used by mode_filename and +@@ -746,7 +746,7 @@ file_open (int readonly) + #endif + + h->can_fallocate = true; +- h->can_zeroout = h->is_block_device; ++ h->can_blkzeroout = h->is_block_device; + + return h; + } +@@ -1063,14 +1063,14 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags) + + #ifdef BLKZEROOUT + /* For aligned range and block device, we can use BLKZEROOUT. */ +- if (h->can_zeroout && IS_ALIGNED (offset | count, h->sector_size)) { ++ if (h->can_blkzeroout && IS_ALIGNED (offset | count, h->sector_size)) { + int r; + uint64_t range[2] = {offset, count}; + + r = ioctl (h->fd, BLKZEROOUT, &range); + if (r == 0) { + if (file_debug_zero) +- nbdkit_debug ("h->can_zeroout && IS_ALIGNED: " ++ nbdkit_debug ("h->can_blkzeroout && IS_ALIGNED: " + "zero succeeded using BLKZEROOUT"); + goto out; + } +@@ -1080,7 +1080,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags) + return -1; + } + +- h->can_zeroout = false; ++ h->can_blkzeroout = false; + } + #endif + +-- +2.47.1 + diff --git a/0014-file-zero-Document-implicit-order-that-we-will-try-z.patch b/0014-file-zero-Document-implicit-order-that-we-will-try-z.patch new file mode 100644 index 0000000..ce07026 --- /dev/null +++ b/0014-file-zero-Document-implicit-order-that-we-will-try-z.patch @@ -0,0 +1,38 @@ +From 661bb729bfcba63842840c32dc8cd0e778e38b21 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Thu, 1 May 2025 10:30:41 +0100 +Subject: [PATCH] file: zero: Document implicit order that we will try zeroing + methods + +There's no substantive change here. I just pulled out the test (flags +& NBDKIT_FLAG_MAY_TRIM) into a boolean variable, and documented that +we (will) try zero-with-trim methods first. + +(cherry picked from commit 61fc023f235b17f8a19302885d1613dd0a7a3793) +--- + plugins/file/file.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/plugins/file/file.c b/plugins/file/file.c +index a649ccd7..d7405f36 100644 +--- a/plugins/file/file.c ++++ b/plugins/file/file.c +@@ -981,9 +981,14 @@ static int + file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags) + { + struct handle *h __attribute__ ((unused)) = handle; ++ const bool may_trim __attribute__ ((unused)) = flags & NBDKIT_FLAG_MAY_TRIM; + ++ /* These alternate zeroing methods are ordered. Methods which can ++ * trim (if may_trim is set) are tried first. Methods which can ++ * only zero are tried last. ++ */ + #ifdef FALLOC_FL_PUNCH_HOLE +- if (h->can_punch_hole && (flags & NBDKIT_FLAG_MAY_TRIM)) { ++ if (may_trim && h->can_punch_hole) { + int r; + + r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, +-- +2.47.1 + diff --git a/0015-file-zero-Use-BLKDISCARD-method-if-may_trim-is-set.patch b/0015-file-zero-Use-BLKDISCARD-method-if-may_trim-is-set.patch new file mode 100644 index 0000000..073a9f6 --- /dev/null +++ b/0015-file-zero-Use-BLKDISCARD-method-if-may_trim-is-set.patch @@ -0,0 +1,122 @@ +From cf35037d52c2abf383fc3e8db00283a58ffc53db Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Thu, 1 May 2025 10:36:23 +0100 +Subject: [PATCH] file: zero: Use BLKDISCARD method if may_trim is set + +If we're allowed to trim and we're writing to a block device, +previously we hit the case fallocate(FALLOC_FL_ZERO_RANGE) first. +This succeeds in Linux, zeroing (not trimming) the range. + +However it would be better to trim in this case. Linux supports +ioctl(BLKDISCARD) on block devices, so try this method first. + +Fixes: https://issues.redhat.com/browse/RHEL-89353 +Reported-by: Germano Veit Michel +Thanks: Eric Blake +(cherry picked from commit 7a9ecda24906c64d9f8c7238a96cb3f686e894eb) +--- + plugins/file/file.c | 50 +++++++++++++++++++++++++++++ + plugins/file/nbdkit-file-plugin.pod | 5 +++ + 2 files changed, 55 insertions(+) + +diff --git a/plugins/file/file.c b/plugins/file/file.c +index d7405f36..dc100bb3 100644 +--- a/plugins/file/file.c ++++ b/plugins/file/file.c +@@ -404,6 +404,9 @@ file_dump_plugin (void) + #ifdef BLKSSZGET + printf ("file_blksszget=yes\n"); + #endif ++#ifdef BLKDISCARD ++ printf ("file_blkdiscard=yes\n"); ++#endif + #ifdef BLKZEROOUT + printf ("file_blkzeroout=yes\n"); + #endif +@@ -506,6 +509,7 @@ struct handle { + bool can_punch_hole; + bool can_zero_range; + bool can_fallocate; ++ bool can_blkdiscard; + bool can_blkzeroout; + }; + +@@ -747,6 +751,7 @@ file_open (int readonly) + + h->can_fallocate = true; + h->can_blkzeroout = h->is_block_device; ++ h->can_blkdiscard = h->is_block_device; + + return h; + } +@@ -1009,6 +1014,51 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags) + } + #endif + ++#if defined(BLKDISCARD) && defined(FALLOC_FL_ZERO_RANGE) ++ /* For aligned range and block device, we can use BLKDISCARD to ++ * trim. However BLKDISCARD doesn't necessarily zero (eg for local ++ * disk) so we have to zero first and then discard. ++ * ++ * In future all Linux block devices may understand ++ * FALLOC_FL_PUNCH_HOLE which means this case would no longer be ++ * necessary, since the case above will handle it. ++ */ ++ if (may_trim && h->can_blkdiscard && h->can_zero_range && ++ IS_ALIGNED (offset | count, h->sector_size)) { ++ int r; ++ uint64_t range[2] = {offset, count}; ++ ++ r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count); ++ if (r == 0) { ++ /* We could use FALLOC_FL_PUNCH_HOLE here instead, but currently ++ * thin LVs do not support it (XXX 2025-04). ++ */ ++ r = ioctl (h->fd, BLKDISCARD, &range); ++ if (r == 0) { ++ if (file_debug_zero) ++ nbdkit_debug ("h->can_blkdiscard && may_trim && IS_ALIGNED: " ++ "zero succeeded using BLKDISCARD"); ++ goto out; ++ } ++ ++ if (!is_enotsup (errno)) { ++ nbdkit_error ("zero: %m"); ++ return -1; ++ } ++ ++ h->can_blkdiscard = false; ++ } ++ else { ++ if (!is_enotsup (errno)) { ++ nbdkit_error ("zero: %m"); ++ return -1; ++ } ++ ++ h->can_fallocate = false; ++ } ++ } ++#endif ++ + #ifdef FALLOC_FL_ZERO_RANGE + if (h->can_zero_range) { + int r; +diff --git a/plugins/file/nbdkit-file-plugin.pod b/plugins/file/nbdkit-file-plugin.pod +index 43fc23c7..f1b33582 100644 +--- a/plugins/file/nbdkit-file-plugin.pod ++++ b/plugins/file/nbdkit-file-plugin.pod +@@ -237,6 +237,11 @@ block devices. + If both set, the plugin may be able to efficiently zero ranges of + block devices, where the driver and block device itself supports this. + ++=item C ++ ++If set, the plugin may be able to efficiently trim ranges of block ++devices, where the driver and block device itself supports this. ++ + =item C + + If set, the plugin can read file extents. +-- +2.47.1 + diff --git a/nbdkit.spec b/nbdkit.spec index cc724ce..671fe5a 100644 --- a/nbdkit.spec +++ b/nbdkit.spec @@ -55,7 +55,7 @@ Name: nbdkit Version: 1.42.2 -Release: 3%{?dist} +Release: 4%{?dist} Summary: NBD server License: BSD-3-Clause @@ -88,6 +88,13 @@ Patch0005: 0005-file-Hard-error-if-sync_file_range-fails.patch Patch0006: 0006-file-Reduce-the-size-of-the-lock-around-write-evicti.patch Patch0007: 0007-file-Document-implicit-assumption-about-eviction-win.patch Patch0008: 0008-server-Turn-flush-into-a-controlpath-message.patch +Patch0009: 0009-file-Fix-minor-typo-in-debug-message.patch +Patch0010: 0010-file-Add-more-debugging-when-D-file.zero-1-is-used.patch +Patch0011: 0011-file-Fix-comment-style-in-a-few-places.patch +Patch0012: 0012-file-Fix-do_fallocate-debugging-on-Alpine.patch +Patch0013: 0013-file-Rename-h-can_zeroout-to-h-can_blkzeroout-to-ref.patch +Patch0014: 0014-file-zero-Document-implicit-order-that-we-will-try-z.patch +Patch0015: 0015-file-zero-Use-BLKDISCARD-method-if-may_trim-is-set.patch # For automatic RPM Provides generation. # See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html @@ -1530,7 +1537,7 @@ fi %changelog -* Mon Apr 07 2025 Richard W.M. Jones - 1.42.2-3 +* Thu May 01 2025 Richard W.M. Jones - 1.42.2-4 - Rebase to nbdkit 1.42.2 - Synch the spec file with Fedora Rawhide. - nbdkit-ondemand-plugin moves into a new subpackage. @@ -1538,6 +1545,8 @@ fi resolves: RHEL-78830 - Add extra system call checking and debugging to nbdkit-file-plugin resolves: RHEL-85515 +- Allow nbdkit-file-plugin to zero and trim block devices + resolves: RHEL-89371 * Mon Jan 06 2025 Richard W.M. Jones - 1.40.4-3 - vddk: Avoid reading partial chunk beyond the end of the disk