diff --git a/0014-tests-Add-more-generic-tests-of-file-cache-none.patch b/0014-tests-Add-more-generic-tests-of-file-cache-none.patch new file mode 100644 index 0000000..0f2038e --- /dev/null +++ b/0014-tests-Add-more-generic-tests-of-file-cache-none.patch @@ -0,0 +1,238 @@ +From bdab63138758e287b292140e17f2d0e3b4c40bbe Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Tue, 1 Apr 2025 12:45:43 +0100 +Subject: [PATCH] tests: Add more generic tests of file cache=none + +The tests added in commit 6017ba21ae require cachestats which means +they are almost always skipped in reality. Also we're interested in +whether the windowed eviction could ever corrupt data. Write a +simpler pair of tests for data integrity. + +Updates: commit 6017ba21aeeb3d7ad85925e78dba85a005194dee +(cherry picked from commit 7b45f73c0668e56a9188249eeefc2f67aeb50af3) +--- + tests/Makefile.am | 12 +++-- + tests/test-file-cache-none-read-consistent.sh | 53 ++++++++++++++++++ + ...=> test-file-cache-none-read-effective.sh} | 9 ++-- + .../test-file-cache-none-write-consistent.sh | 54 +++++++++++++++++++ + ...> test-file-cache-none-write-effective.sh} | 11 ++-- + 5 files changed, 126 insertions(+), 13 deletions(-) + create mode 100755 tests/test-file-cache-none-read-consistent.sh + rename tests/{test-file-cache-none-read.sh => test-file-cache-none-read-effective.sh} (93%) + create mode 100755 tests/test-file-cache-none-write-consistent.sh + rename tests/{test-file-cache-none-write.sh => test-file-cache-none-write-effective.sh} (92%) + +diff --git a/tests/Makefile.am b/tests/Makefile.am +index 4340cc38..eed96d28 100644 +--- a/tests/Makefile.am ++++ b/tests/Makefile.am +@@ -846,8 +846,10 @@ TESTS += \ + test-file-extents.sh \ + test-file-dir.sh \ + test-file-dirfd.sh \ +- test-file-cache-none-read.sh \ +- test-file-cache-none-write.sh \ ++ test-file-cache-none-read-consistent.sh \ ++ test-file-cache-none-read-effective.sh \ ++ test-file-cache-none-write-consistent.sh \ ++ test-file-cache-none-write-effective.sh \ + $(NULL) + EXTRA_DIST += \ + test-file.sh \ +@@ -857,8 +859,10 @@ EXTRA_DIST += \ + test-file-extents.sh \ + test-file-dir.sh \ + test-file-dirfd.sh \ +- test-file-cache-none-read.sh \ +- test-file-cache-none-write.sh \ ++ test-file-cache-none-read-consistent.sh \ ++ test-file-cache-none-read-effective.sh \ ++ test-file-cache-none-write-consistent.sh \ ++ test-file-cache-none-write-effective.sh \ + $(NULL) + LIBGUESTFS_TESTS += test-file-block + +diff --git a/tests/test-file-cache-none-read-consistent.sh b/tests/test-file-cache-none-read-consistent.sh +new file mode 100755 +index 00000000..5f5794ee +--- /dev/null ++++ b/tests/test-file-cache-none-read-consistent.sh +@@ -0,0 +1,53 @@ ++#!/usr/bin/env bash ++# nbdkit ++# Copyright Red Hat ++# ++# Redistribution and use in source and binary forms, with or without ++# modification, are permitted provided that the following conditions are ++# met: ++# ++# * Redistributions of source code must retain the above copyright ++# notice, this list of conditions and the following disclaimer. ++# ++# * Redistributions in binary form must reproduce the above copyright ++# notice, this list of conditions and the following disclaimer in the ++# documentation and/or other materials provided with the distribution. ++# ++# * Neither the name of Red Hat nor the names of its contributors may be ++# used to endorse or promote products derived from this software without ++# specific prior written permission. ++# ++# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND ++# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ++# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A ++# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR ++# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF ++# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, ++# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT ++# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++# SUCH DAMAGE. ++ ++# Test the file plugin, reading with cache=none. ++# ++# Unlike test-file-cache-none-read-effective.sh, this test doesn't ++# require cachestats. It's more about testing integrity of reading. ++ ++source ./functions.sh ++set -e ++set -x ++ ++requires_plugin file ++requires_run ++requires_nbdcopy ++requires test -f disk ++requires md5sum --version ++ ++out=file-cache-none-read-consistent.out ++cleanup_fn rm -f $out ++ ++export out ++nbdkit file disk cache=none --run 'nbdcopy "$uri" "$out"' ++test "$(md5sum < disk)" = "$(md5sum < $out)" +diff --git a/tests/test-file-cache-none-read.sh b/tests/test-file-cache-none-read-effective.sh +similarity index 93% +rename from tests/test-file-cache-none-read.sh +rename to tests/test-file-cache-none-read-effective.sh +index c9831b43..efead224 100755 +--- a/tests/test-file-cache-none-read.sh ++++ b/tests/test-file-cache-none-read-effective.sh +@@ -30,7 +30,8 @@ + # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + # SUCH DAMAGE. + +-# Test the file plugin, reading with cache=none. ++# Test the file plugin, reading with cache=none, is effective at ++# reducing page cache usage. + + source ./functions.sh + set -e +@@ -52,9 +53,9 @@ requires $SED --version + requires type cachestats + requires type cachedel + +-inp=file-cache-none-read.in +-stats1=file-cache-none-read.s1 +-stats2=file-cache-none-read.s2 ++inp=file-cache-none-read-effective.in ++stats1=file-cache-none-read-effective.s1 ++stats2=file-cache-none-read-effective.s2 + rm -f $inp $stats1 $stats2 + cleanup_fn rm -f $inp $stats1 $stats2 + +diff --git a/tests/test-file-cache-none-write-consistent.sh b/tests/test-file-cache-none-write-consistent.sh +new file mode 100755 +index 00000000..749805f1 +--- /dev/null ++++ b/tests/test-file-cache-none-write-consistent.sh +@@ -0,0 +1,54 @@ ++#!/usr/bin/env bash ++# nbdkit ++# Copyright Red Hat ++# ++# Redistribution and use in source and binary forms, with or without ++# modification, are permitted provided that the following conditions are ++# met: ++# ++# * Redistributions of source code must retain the above copyright ++# notice, this list of conditions and the following disclaimer. ++# ++# * Redistributions in binary form must reproduce the above copyright ++# notice, this list of conditions and the following disclaimer in the ++# documentation and/or other materials provided with the distribution. ++# ++# * Neither the name of Red Hat nor the names of its contributors may be ++# used to endorse or promote products derived from this software without ++# specific prior written permission. ++# ++# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND ++# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ++# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A ++# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR ++# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF ++# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, ++# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT ++# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++# SUCH DAMAGE. ++ ++# Test the file plugin, writing with cache=none. ++# ++# Unlike test-file-cache-none-write-effective.sh, this test doesn't ++# require cachestats. It's more about testing integrity of writing. ++ ++source ./functions.sh ++set -e ++set -x ++ ++requires_plugin file ++requires_run ++requires_nbdcopy ++requires test -f disk ++requires md5sum --version ++requires $TRUNCATE --version ++ ++out=file-cache-none-write-consistent.out ++cleanup_fn rm -f $out ++ ++rm -f $out; $TRUNCATE -r disk $out ++nbdkit file $out cache=none --run 'nbdcopy disk "$uri"' ++test "$(md5sum < disk)" = "$(md5sum < $out)" +diff --git a/tests/test-file-cache-none-write.sh b/tests/test-file-cache-none-write-effective.sh +similarity index 92% +rename from tests/test-file-cache-none-write.sh +rename to tests/test-file-cache-none-write-effective.sh +index 2041a5cd..e0159242 100755 +--- a/tests/test-file-cache-none-write.sh ++++ b/tests/test-file-cache-none-write-effective.sh +@@ -30,7 +30,8 @@ + # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + # SUCH DAMAGE. + +-# Test the file plugin, writing with cache=none. ++# Test the file plugin, writing with cache=none, is effective at ++# reducing page cache usage. + + source ./functions.sh + set -e +@@ -51,10 +52,10 @@ requires $SED --version + # It doesn't support --version or --help, so use 'type' instead. + requires type cachestats + +-inp=file-cache-none-write.in +-out=file-cache-none-write.out +-stats1=file-cache-none-write.s1 +-stats2=file-cache-none-write.s2 ++inp=file-cache-none-write-effective.in ++out=file-cache-none-write-effective.out ++stats1=file-cache-none-write-effective.s1 ++stats2=file-cache-none-write-effective.s2 + rm -f $inp $out $stats1 $stats2 + cleanup_fn rm -f $inp $out $stats1 $stats2 + +-- +2.47.1 + diff --git a/0015-file-Hard-error-if-sync_file_range-fails.patch b/0015-file-Hard-error-if-sync_file_range-fails.patch new file mode 100644 index 0000000..7b993a5 --- /dev/null +++ b/0015-file-Hard-error-if-sync_file_range-fails.patch @@ -0,0 +1,101 @@ +From 2772bc3e81cb0c396bde186a2581d010b8d19e5d Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Tue, 1 Apr 2025 11:07:51 +0100 +Subject: [PATCH] file: Hard error if sync_file_range fails + +After discussion with Dave Chinner, it appears that I/O errors can be +returned by sync_file_range, ie. it is not merely a hint to start the +eviction. Therefore return a hard error if any of the sync_file_range +calls fails. + +Thanks: Dave Chinner +Updates: commit d35f0636373e6a58c8f3fcfcf505af248e27c574 +(cherry picked from commit d2ed6a839c3ed50b417f45dc13e5b811ecdc4e74) +--- + plugins/file/file.c | 53 ++++++++++++++++++++++++--------------------- + 1 file changed, 28 insertions(+), 25 deletions(-) + +diff --git a/plugins/file/file.c b/plugins/file/file.c +index 638c960d..f195bcf3 100644 +--- a/plugins/file/file.c ++++ b/plugins/file/file.c +@@ -116,7 +116,7 @@ struct write_window { + static pthread_mutex_t window_lock = PTHREAD_MUTEX_INITIALIZER; + static struct write_window window[NR_WINDOWS]; + +-static void ++static int + evict_writes (int fd, uint64_t offset, size_t len) + { + ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock); +@@ -125,31 +125,32 @@ evict_writes (int fd, uint64_t offset, size_t len) + * (asynchronously). + */ + if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) { +- /* sync_file_range to start the write failed */ +- nbdkit_debug ("sync_file_range: write: (ignored): %m"); ++ nbdkit_error ("sync_file_range: cache=none: starting eviction: %m"); ++ return -1; + } +- else { +- /* sync_file_range to start the write succeeded, so +- * evict the oldest window from the page cache. +- */ +- if (window[0].len > 0) { +- if (sync_file_range (window[0].fd, window[0].offset, window[0].len, +- SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| +- SYNC_FILE_RANGE_WAIT_AFTER) == -1) +- nbdkit_debug ("sync_file_range: wait: (ignored): %m"); +- if (posix_fadvise (window[0].fd, window[0].offset, window[0].len, +- POSIX_FADV_DONTNEED) == -1) +- nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m"); ++ ++ /* Evict the oldest window from the page cache. */ ++ if (window[0].len > 0) { ++ if (sync_file_range (window[0].fd, window[0].offset, window[0].len, ++ SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| ++ SYNC_FILE_RANGE_WAIT_AFTER) == -1) { ++ nbdkit_error ("sync_file_range: cache=none: evicting oldest window: %m"); ++ return -1; + } +- +- /* Move the Nth window to N-1. */ +- memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1)); +- +- /* Add the range to the newest end of the list of windows. */ +- window[NR_WINDOWS-1].fd = fd; +- window[NR_WINDOWS-1].offset = offset; +- window[NR_WINDOWS-1].len = len; ++ if (posix_fadvise (window[0].fd, window[0].offset, window[0].len, ++ POSIX_FADV_DONTNEED) == -1) ++ nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m"); + } ++ ++ /* Move the Nth window to N-1. */ ++ memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1)); ++ ++ /* Add the range to the newest end of the list of windows. */ ++ window[NR_WINDOWS-1].fd = fd; ++ window[NR_WINDOWS-1].offset = offset; ++ window[NR_WINDOWS-1].len = len; ++ ++ return 0; + } + + /* When we close the handle we must remove any windows which are still +@@ -855,8 +856,10 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset, + return -1; + + #if EVICT_WRITES +- if (cache_mode == cache_none) +- evict_writes (h->fd, orig_offset, orig_count); ++ if (cache_mode == cache_none) { ++ if (evict_writes (h->fd, orig_offset, orig_count) == -1) ++ return -1; ++ } + #endif + + return 0; +-- +2.47.1 + diff --git a/0016-file-Reduce-the-size-of-the-lock-around-write-evicti.patch b/0016-file-Reduce-the-size-of-the-lock-around-write-evicti.patch new file mode 100644 index 0000000..09abced --- /dev/null +++ b/0016-file-Reduce-the-size-of-the-lock-around-write-evicti.patch @@ -0,0 +1,90 @@ +From 056a975578b42e8cbfd783b8b919c4ef02e40019 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Tue, 1 Apr 2025 11:56:22 +0100 +Subject: [PATCH] file: Reduce the size of the lock around write eviction + +Previously we held window_lock during the synchronous eviction of the +oldest window. This potentially serializes all writes in the slow +write case. We don't need to hold the lock here. + +(cherry picked from commit d3d2bc45bb59a30669a3d926435cf57e99feb3a2) +--- + plugins/file/file.c | 52 +++++++++++++++++++++++++++------------------ + 1 file changed, 31 insertions(+), 21 deletions(-) + +diff --git a/plugins/file/file.c b/plugins/file/file.c +index f195bcf3..e68c24ee 100644 +--- a/plugins/file/file.c ++++ b/plugins/file/file.c +@@ -119,37 +119,47 @@ static struct write_window window[NR_WINDOWS]; + static int + evict_writes (int fd, uint64_t offset, size_t len) + { +- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock); +- +- /* Tell Linux to start writing the current range out to disk +- * (asynchronously). +- */ +- if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) { +- nbdkit_error ("sync_file_range: cache=none: starting eviction: %m"); +- return -1; ++ struct write_window oldest = { 0 }; ++ ++ { ++ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock); ++ ++ /* Save oldest window[0] for eviction below, and move all windows ++ * down one. Set the newest slot to empty. ++ */ ++ oldest = window[0]; ++ memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1)); ++ window[NR_WINDOWS-1].len = 0; ++ ++ /* Tell Linux to start writing the current range out to disk ++ * (asynchronously). ++ */ ++ if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) { ++ nbdkit_error ("sync_file_range: cache=none: starting eviction: %m"); ++ return -1; ++ } ++ ++ /* Add the range to the newest end of the list of windows. */ ++ window[NR_WINDOWS-1].fd = fd; ++ window[NR_WINDOWS-1].offset = offset; ++ window[NR_WINDOWS-1].len = len; + } ++ /* Release lock here. */ + +- /* Evict the oldest window from the page cache. */ +- if (window[0].len > 0) { +- if (sync_file_range (window[0].fd, window[0].offset, window[0].len, +- SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| ++ /* Evict the oldest window from the page cache (synchronously). */ ++ if (oldest.len > 0) { ++ if (sync_file_range (oldest.fd, oldest.offset, oldest.len, ++ SYNC_FILE_RANGE_WAIT_BEFORE | ++ SYNC_FILE_RANGE_WRITE | + SYNC_FILE_RANGE_WAIT_AFTER) == -1) { + nbdkit_error ("sync_file_range: cache=none: evicting oldest window: %m"); + return -1; + } +- if (posix_fadvise (window[0].fd, window[0].offset, window[0].len, ++ if (posix_fadvise (oldest.fd, oldest.offset, oldest.len, + POSIX_FADV_DONTNEED) == -1) + nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m"); + } + +- /* Move the Nth window to N-1. */ +- memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1)); +- +- /* Add the range to the newest end of the list of windows. */ +- window[NR_WINDOWS-1].fd = fd; +- window[NR_WINDOWS-1].offset = offset; +- window[NR_WINDOWS-1].len = len; +- + return 0; + } + +-- +2.47.1 + diff --git a/0017-file-Document-implicit-assumption-about-eviction-win.patch b/0017-file-Document-implicit-assumption-about-eviction-win.patch new file mode 100644 index 0000000..fcb1bff --- /dev/null +++ b/0017-file-Document-implicit-assumption-about-eviction-win.patch @@ -0,0 +1,29 @@ +From 5f5f74fa0afae78365eb431a589a6fa4938b9287 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Tue, 1 Apr 2025 16:51:24 +0100 +Subject: [PATCH] file: Document implicit assumption about eviction windows + +Add a comment that slots in the window list are only valid when len > 0. +This was true before but only implicit. + +(cherry picked from commit 2022ac667f3c1de81b692f0128dd83a7c4999e38) +--- + plugins/file/file.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/plugins/file/file.c b/plugins/file/file.c +index e68c24ee..6bcc5537 100644 +--- a/plugins/file/file.c ++++ b/plugins/file/file.c +@@ -110,7 +110,7 @@ static enum { cache_default, cache_none } cache_mode = cache_default; + struct write_window { + int fd; + uint64_t offset; +- size_t len; ++ size_t len; /* window slot only valid if len > 0 */ + }; + + static pthread_mutex_t window_lock = PTHREAD_MUTEX_INITIALIZER; +-- +2.47.1 + diff --git a/nbdkit.spec b/nbdkit.spec index e28b86f..f040b28 100644 --- a/nbdkit.spec +++ b/nbdkit.spec @@ -56,7 +56,7 @@ Name: nbdkit Version: 1.38.5 -Release: 3%{?dist} +Release: 4%{?dist} Summary: NBD server License: BSD-3-Clause @@ -94,6 +94,10 @@ Patch0010: 0010-vddk-do_extents-Avoid-reading-partial-chunk-beyond-t.patch Patch0011: 0011-file-Add-debugging-if-sync_file_range-posix_fadvise-.patch Patch0012: 0012-file-If-sync_file_range-fails-to-start-don-t-add-win.patch Patch0013: 0013-tests-Add-tests-of-file-plugin-cache-none.patch +Patch0014: 0014-tests-Add-more-generic-tests-of-file-cache-none.patch +Patch0015: 0015-file-Hard-error-if-sync_file_range-fails.patch +Patch0016: 0016-file-Reduce-the-size-of-the-lock-around-write-evicti.patch +Patch0017: 0017-file-Document-implicit-assumption-about-eviction-win.patch # For automatic RPM Provides generation. # See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html @@ -1512,7 +1516,7 @@ fi %changelog -* Mon Mar 31 2025 Richard W.M. Jones - 1.38.5-3 +* Tue Apr 01 2025 Richard W.M. Jones - 1.38.5-4 - Add extra system call checking and debugging to nbdkit-file-plugin resolves: RHEL-85510