Add extra system call checking and debugging to nbdkit-file-plugin

resolves: RHEL-85515
This commit is contained in:
Richard W.M. Jones 2025-04-01 20:23:57 +01:00
parent 9d635982f2
commit 58b0d03046
5 changed files with 464 additions and 2 deletions

View File

@ -0,0 +1,238 @@
From cf595cb02d8241fe01b943bd28cf744a0c7cb852 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 12:45:43 +0100
Subject: [PATCH] tests: Add more generic tests of file cache=none
The tests added in commit 6017ba21ae require cachestats which means
they are almost always skipped in reality. Also we're interested in
whether the windowed eviction could ever corrupt data. Write a
simpler pair of tests for data integrity.
Updates: commit 6017ba21aeeb3d7ad85925e78dba85a005194dee
(cherry picked from commit 7b45f73c0668e56a9188249eeefc2f67aeb50af3)
---
tests/Makefile.am | 12 +++--
tests/test-file-cache-none-read-consistent.sh | 53 ++++++++++++++++++
...=> test-file-cache-none-read-effective.sh} | 9 ++--
.../test-file-cache-none-write-consistent.sh | 54 +++++++++++++++++++
...> test-file-cache-none-write-effective.sh} | 11 ++--
5 files changed, 126 insertions(+), 13 deletions(-)
create mode 100755 tests/test-file-cache-none-read-consistent.sh
rename tests/{test-file-cache-none-read.sh => test-file-cache-none-read-effective.sh} (93%)
create mode 100755 tests/test-file-cache-none-write-consistent.sh
rename tests/{test-file-cache-none-write.sh => test-file-cache-none-write-effective.sh} (92%)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index e8003c1c..2b494b89 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -899,8 +899,10 @@ TESTS += \
test-file-extents.sh \
test-file-dir.sh \
test-file-dirfd.sh \
- test-file-cache-none-read.sh \
- test-file-cache-none-write.sh \
+ test-file-cache-none-read-consistent.sh \
+ test-file-cache-none-read-effective.sh \
+ test-file-cache-none-write-consistent.sh \
+ test-file-cache-none-write-effective.sh \
$(NULL)
EXTRA_DIST += \
test-file.sh \
@@ -910,8 +912,10 @@ EXTRA_DIST += \
test-file-extents.sh \
test-file-dir.sh \
test-file-dirfd.sh \
- test-file-cache-none-read.sh \
- test-file-cache-none-write.sh \
+ test-file-cache-none-read-consistent.sh \
+ test-file-cache-none-read-effective.sh \
+ test-file-cache-none-write-consistent.sh \
+ test-file-cache-none-write-effective.sh \
$(NULL)
LIBGUESTFS_TESTS += test-file-block
LIBNBD_TESTS += test-file-block-nbd
diff --git a/tests/test-file-cache-none-read-consistent.sh b/tests/test-file-cache-none-read-consistent.sh
new file mode 100755
index 00000000..5f5794ee
--- /dev/null
+++ b/tests/test-file-cache-none-read-consistent.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the file plugin, reading with cache=none.
+#
+# Unlike test-file-cache-none-read-effective.sh, this test doesn't
+# require cachestats. It's more about testing integrity of reading.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_plugin file
+requires_run
+requires_nbdcopy
+requires test -f disk
+requires md5sum --version
+
+out=file-cache-none-read-consistent.out
+cleanup_fn rm -f $out
+
+export out
+nbdkit file disk cache=none --run 'nbdcopy "$uri" "$out"'
+test "$(md5sum < disk)" = "$(md5sum < $out)"
diff --git a/tests/test-file-cache-none-read.sh b/tests/test-file-cache-none-read-effective.sh
similarity index 93%
rename from tests/test-file-cache-none-read.sh
rename to tests/test-file-cache-none-read-effective.sh
index c9831b43..efead224 100755
--- a/tests/test-file-cache-none-read.sh
+++ b/tests/test-file-cache-none-read-effective.sh
@@ -30,7 +30,8 @@
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
-# Test the file plugin, reading with cache=none.
+# Test the file plugin, reading with cache=none, is effective at
+# reducing page cache usage.
source ./functions.sh
set -e
@@ -52,9 +53,9 @@ requires $SED --version
requires type cachestats
requires type cachedel
-inp=file-cache-none-read.in
-stats1=file-cache-none-read.s1
-stats2=file-cache-none-read.s2
+inp=file-cache-none-read-effective.in
+stats1=file-cache-none-read-effective.s1
+stats2=file-cache-none-read-effective.s2
rm -f $inp $stats1 $stats2
cleanup_fn rm -f $inp $stats1 $stats2
diff --git a/tests/test-file-cache-none-write-consistent.sh b/tests/test-file-cache-none-write-consistent.sh
new file mode 100755
index 00000000..749805f1
--- /dev/null
+++ b/tests/test-file-cache-none-write-consistent.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the file plugin, writing with cache=none.
+#
+# Unlike test-file-cache-none-write-effective.sh, this test doesn't
+# require cachestats. It's more about testing integrity of writing.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_plugin file
+requires_run
+requires_nbdcopy
+requires test -f disk
+requires md5sum --version
+requires $TRUNCATE --version
+
+out=file-cache-none-write-consistent.out
+cleanup_fn rm -f $out
+
+rm -f $out; $TRUNCATE -r disk $out
+nbdkit file $out cache=none --run 'nbdcopy disk "$uri"'
+test "$(md5sum < disk)" = "$(md5sum < $out)"
diff --git a/tests/test-file-cache-none-write.sh b/tests/test-file-cache-none-write-effective.sh
similarity index 92%
rename from tests/test-file-cache-none-write.sh
rename to tests/test-file-cache-none-write-effective.sh
index 2041a5cd..e0159242 100755
--- a/tests/test-file-cache-none-write.sh
+++ b/tests/test-file-cache-none-write-effective.sh
@@ -30,7 +30,8 @@
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
-# Test the file plugin, writing with cache=none.
+# Test the file plugin, writing with cache=none, is effective at
+# reducing page cache usage.
source ./functions.sh
set -e
@@ -51,10 +52,10 @@ requires $SED --version
# It doesn't support --version or --help, so use 'type' instead.
requires type cachestats
-inp=file-cache-none-write.in
-out=file-cache-none-write.out
-stats1=file-cache-none-write.s1
-stats2=file-cache-none-write.s2
+inp=file-cache-none-write-effective.in
+out=file-cache-none-write-effective.out
+stats1=file-cache-none-write-effective.s1
+stats2=file-cache-none-write-effective.s2
rm -f $inp $out $stats1 $stats2
cleanup_fn rm -f $inp $out $stats1 $stats2
--
2.47.1

View File

@ -0,0 +1,101 @@
From 21ac419d37f307fee4ba88d37c0bf1da0c591bac Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 11:07:51 +0100
Subject: [PATCH] file: Hard error if sync_file_range fails
After discussion with Dave Chinner, it appears that I/O errors can be
returned by sync_file_range, ie. it is not merely a hint to start the
eviction. Therefore return a hard error if any of the sync_file_range
calls fails.
Thanks: Dave Chinner
Updates: commit d35f0636373e6a58c8f3fcfcf505af248e27c574
(cherry picked from commit d2ed6a839c3ed50b417f45dc13e5b811ecdc4e74)
---
plugins/file/file.c | 53 ++++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 25 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 0271a56b..aa552037 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -118,7 +118,7 @@ struct write_window {
static pthread_mutex_t window_lock = PTHREAD_MUTEX_INITIALIZER;
static struct write_window window[NR_WINDOWS];
-static void
+static int
evict_writes (int fd, uint64_t offset, size_t len)
{
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
@@ -127,31 +127,32 @@ evict_writes (int fd, uint64_t offset, size_t len)
* (asynchronously).
*/
if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) {
- /* sync_file_range to start the write failed */
- nbdkit_debug ("sync_file_range: write: (ignored): %m");
+ nbdkit_error ("sync_file_range: cache=none: starting eviction: %m");
+ return -1;
}
- else {
- /* sync_file_range to start the write succeeded, so
- * evict the oldest window from the page cache.
- */
- if (window[0].len > 0) {
- if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
- SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
- SYNC_FILE_RANGE_WAIT_AFTER) == -1)
- nbdkit_debug ("sync_file_range: wait: (ignored): %m");
- if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
- POSIX_FADV_DONTNEED) == -1)
- nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
+
+ /* Evict the oldest window from the page cache. */
+ if (window[0].len > 0) {
+ if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
+ SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
+ SYNC_FILE_RANGE_WAIT_AFTER) == -1) {
+ nbdkit_error ("sync_file_range: cache=none: evicting oldest window: %m");
+ return -1;
}
-
- /* Move the Nth window to N-1. */
- memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
-
- /* Add the range to the newest end of the list of windows. */
- window[NR_WINDOWS-1].fd = fd;
- window[NR_WINDOWS-1].offset = offset;
- window[NR_WINDOWS-1].len = len;
+ if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
+ POSIX_FADV_DONTNEED) == -1)
+ nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
}
+
+ /* Move the Nth window to N-1. */
+ memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
+
+ /* Add the range to the newest end of the list of windows. */
+ window[NR_WINDOWS-1].fd = fd;
+ window[NR_WINDOWS-1].offset = offset;
+ window[NR_WINDOWS-1].len = len;
+
+ return 0;
}
/* When we close the handle we must remove any windows which are still
@@ -920,8 +921,10 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
return -1;
#if EVICT_WRITES
- if (cache_mode == cache_none)
- evict_writes (h->fd, orig_offset, orig_count);
+ if (cache_mode == cache_none) {
+ if (evict_writes (h->fd, orig_offset, orig_count) == -1)
+ return -1;
+ }
#endif
return 0;
--
2.47.1

View File

@ -0,0 +1,90 @@
From 15d235587c14dcbc9a98197921994bc6e2d505d8 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 11:56:22 +0100
Subject: [PATCH] file: Reduce the size of the lock around write eviction
Previously we held window_lock during the synchronous eviction of the
oldest window. This potentially serializes all writes in the slow
write case. We don't need to hold the lock here.
(cherry picked from commit d3d2bc45bb59a30669a3d926435cf57e99feb3a2)
---
plugins/file/file.c | 52 +++++++++++++++++++++++++++------------------
1 file changed, 31 insertions(+), 21 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index aa552037..d1e91203 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -121,37 +121,47 @@ static struct write_window window[NR_WINDOWS];
static int
evict_writes (int fd, uint64_t offset, size_t len)
{
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
-
- /* Tell Linux to start writing the current range out to disk
- * (asynchronously).
- */
- if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) {
- nbdkit_error ("sync_file_range: cache=none: starting eviction: %m");
- return -1;
+ struct write_window oldest = { 0 };
+
+ {
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
+
+ /* Save oldest window[0] for eviction below, and move all windows
+ * down one. Set the newest slot to empty.
+ */
+ oldest = window[0];
+ memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
+ window[NR_WINDOWS-1].len = 0;
+
+ /* Tell Linux to start writing the current range out to disk
+ * (asynchronously).
+ */
+ if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) {
+ nbdkit_error ("sync_file_range: cache=none: starting eviction: %m");
+ return -1;
+ }
+
+ /* Add the range to the newest end of the list of windows. */
+ window[NR_WINDOWS-1].fd = fd;
+ window[NR_WINDOWS-1].offset = offset;
+ window[NR_WINDOWS-1].len = len;
}
+ /* Release lock here. */
- /* Evict the oldest window from the page cache. */
- if (window[0].len > 0) {
- if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
- SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
+ /* Evict the oldest window from the page cache (synchronously). */
+ if (oldest.len > 0) {
+ if (sync_file_range (oldest.fd, oldest.offset, oldest.len,
+ SYNC_FILE_RANGE_WAIT_BEFORE |
+ SYNC_FILE_RANGE_WRITE |
SYNC_FILE_RANGE_WAIT_AFTER) == -1) {
nbdkit_error ("sync_file_range: cache=none: evicting oldest window: %m");
return -1;
}
- if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
+ if (posix_fadvise (oldest.fd, oldest.offset, oldest.len,
POSIX_FADV_DONTNEED) == -1)
nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
}
- /* Move the Nth window to N-1. */
- memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
-
- /* Add the range to the newest end of the list of windows. */
- window[NR_WINDOWS-1].fd = fd;
- window[NR_WINDOWS-1].offset = offset;
- window[NR_WINDOWS-1].len = len;
-
return 0;
}
--
2.47.1

View File

@ -0,0 +1,29 @@
From 72d95a4563149dfda0502ef28f06c7f7f972bd75 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 16:51:24 +0100
Subject: [PATCH] file: Document implicit assumption about eviction windows
Add a comment that slots in the window list are only valid when len > 0.
This was true before but only implicit.
(cherry picked from commit 2022ac667f3c1de81b692f0128dd83a7c4999e38)
---
plugins/file/file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index d1e91203..b8470c20 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -112,7 +112,7 @@ static enum { cache_default, cache_none } cache_mode = cache_default;
struct write_window {
int fd;
uint64_t offset;
- size_t len;
+ size_t len; /* window slot only valid if len > 0 */
};
static pthread_mutex_t window_lock = PTHREAD_MUTEX_INITIALIZER;
--
2.47.1

View File

@ -55,7 +55,7 @@
Name: nbdkit
Version: 1.42.2
Release: 1%{?dist}
Release: 2%{?dist}
Summary: NBD server
License: BSD-3-Clause
@ -83,6 +83,10 @@ Source3: copy-patches.sh
Patch0001: 0001-file-Add-debugging-if-sync_file_range-posix_fadvise-.patch
Patch0002: 0002-file-If-sync_file_range-fails-to-start-don-t-add-win.patch
Patch0003: 0003-tests-Add-tests-of-file-plugin-cache-none.patch
Patch0004: 0004-tests-Add-more-generic-tests-of-file-cache-none.patch
Patch0005: 0005-file-Hard-error-if-sync_file_range-fails.patch
Patch0006: 0006-file-Reduce-the-size-of-the-lock-around-write-evicti.patch
Patch0007: 0007-file-Document-implicit-assumption-about-eviction-win.patch
# For automatic RPM Provides generation.
# See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html
@ -1525,7 +1529,7 @@ fi
%changelog
* Mon Mar 31 2025 Richard W.M. Jones <rjones@redhat.com> - 1.42.2-1
* Tue Apr 01 2025 Richard W.M. Jones <rjones@redhat.com> - 1.42.2-2
- Rebase to nbdkit 1.42.2
- Synch the spec file with Fedora Rawhide.
- nbdkit-ondemand-plugin moves into a new subpackage.