Add extra system call checking and debugging to nbdkit-file-plugin

resolves: RHEL-85510
This commit is contained in:
Richard W.M. Jones 2025-04-01 20:23:51 +01:00
parent ebdd780c47
commit 71de522d71
5 changed files with 464 additions and 2 deletions

View File

@ -0,0 +1,238 @@
From bdab63138758e287b292140e17f2d0e3b4c40bbe Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 12:45:43 +0100
Subject: [PATCH] tests: Add more generic tests of file cache=none
The tests added in commit 6017ba21ae require cachestats which means
they are almost always skipped in reality. Also we're interested in
whether the windowed eviction could ever corrupt data. Write a
simpler pair of tests for data integrity.
Updates: commit 6017ba21aeeb3d7ad85925e78dba85a005194dee
(cherry picked from commit 7b45f73c0668e56a9188249eeefc2f67aeb50af3)
---
tests/Makefile.am | 12 +++--
tests/test-file-cache-none-read-consistent.sh | 53 ++++++++++++++++++
...=> test-file-cache-none-read-effective.sh} | 9 ++--
.../test-file-cache-none-write-consistent.sh | 54 +++++++++++++++++++
...> test-file-cache-none-write-effective.sh} | 11 ++--
5 files changed, 126 insertions(+), 13 deletions(-)
create mode 100755 tests/test-file-cache-none-read-consistent.sh
rename tests/{test-file-cache-none-read.sh => test-file-cache-none-read-effective.sh} (93%)
create mode 100755 tests/test-file-cache-none-write-consistent.sh
rename tests/{test-file-cache-none-write.sh => test-file-cache-none-write-effective.sh} (92%)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 4340cc38..eed96d28 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -846,8 +846,10 @@ TESTS += \
test-file-extents.sh \
test-file-dir.sh \
test-file-dirfd.sh \
- test-file-cache-none-read.sh \
- test-file-cache-none-write.sh \
+ test-file-cache-none-read-consistent.sh \
+ test-file-cache-none-read-effective.sh \
+ test-file-cache-none-write-consistent.sh \
+ test-file-cache-none-write-effective.sh \
$(NULL)
EXTRA_DIST += \
test-file.sh \
@@ -857,8 +859,10 @@ EXTRA_DIST += \
test-file-extents.sh \
test-file-dir.sh \
test-file-dirfd.sh \
- test-file-cache-none-read.sh \
- test-file-cache-none-write.sh \
+ test-file-cache-none-read-consistent.sh \
+ test-file-cache-none-read-effective.sh \
+ test-file-cache-none-write-consistent.sh \
+ test-file-cache-none-write-effective.sh \
$(NULL)
LIBGUESTFS_TESTS += test-file-block
diff --git a/tests/test-file-cache-none-read-consistent.sh b/tests/test-file-cache-none-read-consistent.sh
new file mode 100755
index 00000000..5f5794ee
--- /dev/null
+++ b/tests/test-file-cache-none-read-consistent.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the file plugin, reading with cache=none.
+#
+# Unlike test-file-cache-none-read-effective.sh, this test doesn't
+# require cachestats. It's more about testing integrity of reading.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_plugin file
+requires_run
+requires_nbdcopy
+requires test -f disk
+requires md5sum --version
+
+out=file-cache-none-read-consistent.out
+cleanup_fn rm -f $out
+
+export out
+nbdkit file disk cache=none --run 'nbdcopy "$uri" "$out"'
+test "$(md5sum < disk)" = "$(md5sum < $out)"
diff --git a/tests/test-file-cache-none-read.sh b/tests/test-file-cache-none-read-effective.sh
similarity index 93%
rename from tests/test-file-cache-none-read.sh
rename to tests/test-file-cache-none-read-effective.sh
index c9831b43..efead224 100755
--- a/tests/test-file-cache-none-read.sh
+++ b/tests/test-file-cache-none-read-effective.sh
@@ -30,7 +30,8 @@
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
-# Test the file plugin, reading with cache=none.
+# Test the file plugin, reading with cache=none, is effective at
+# reducing page cache usage.
source ./functions.sh
set -e
@@ -52,9 +53,9 @@ requires $SED --version
requires type cachestats
requires type cachedel
-inp=file-cache-none-read.in
-stats1=file-cache-none-read.s1
-stats2=file-cache-none-read.s2
+inp=file-cache-none-read-effective.in
+stats1=file-cache-none-read-effective.s1
+stats2=file-cache-none-read-effective.s2
rm -f $inp $stats1 $stats2
cleanup_fn rm -f $inp $stats1 $stats2
diff --git a/tests/test-file-cache-none-write-consistent.sh b/tests/test-file-cache-none-write-consistent.sh
new file mode 100755
index 00000000..749805f1
--- /dev/null
+++ b/tests/test-file-cache-none-write-consistent.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the file plugin, writing with cache=none.
+#
+# Unlike test-file-cache-none-write-effective.sh, this test doesn't
+# require cachestats. It's more about testing integrity of writing.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_plugin file
+requires_run
+requires_nbdcopy
+requires test -f disk
+requires md5sum --version
+requires $TRUNCATE --version
+
+out=file-cache-none-write-consistent.out
+cleanup_fn rm -f $out
+
+rm -f $out; $TRUNCATE -r disk $out
+nbdkit file $out cache=none --run 'nbdcopy disk "$uri"'
+test "$(md5sum < disk)" = "$(md5sum < $out)"
diff --git a/tests/test-file-cache-none-write.sh b/tests/test-file-cache-none-write-effective.sh
similarity index 92%
rename from tests/test-file-cache-none-write.sh
rename to tests/test-file-cache-none-write-effective.sh
index 2041a5cd..e0159242 100755
--- a/tests/test-file-cache-none-write.sh
+++ b/tests/test-file-cache-none-write-effective.sh
@@ -30,7 +30,8 @@
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
-# Test the file plugin, writing with cache=none.
+# Test the file plugin, writing with cache=none, is effective at
+# reducing page cache usage.
source ./functions.sh
set -e
@@ -51,10 +52,10 @@ requires $SED --version
# It doesn't support --version or --help, so use 'type' instead.
requires type cachestats
-inp=file-cache-none-write.in
-out=file-cache-none-write.out
-stats1=file-cache-none-write.s1
-stats2=file-cache-none-write.s2
+inp=file-cache-none-write-effective.in
+out=file-cache-none-write-effective.out
+stats1=file-cache-none-write-effective.s1
+stats2=file-cache-none-write-effective.s2
rm -f $inp $out $stats1 $stats2
cleanup_fn rm -f $inp $out $stats1 $stats2
--
2.47.1

View File

@ -0,0 +1,101 @@
From 2772bc3e81cb0c396bde186a2581d010b8d19e5d Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 11:07:51 +0100
Subject: [PATCH] file: Hard error if sync_file_range fails
After discussion with Dave Chinner, it appears that I/O errors can be
returned by sync_file_range, ie. it is not merely a hint to start the
eviction. Therefore return a hard error if any of the sync_file_range
calls fails.
Thanks: Dave Chinner
Updates: commit d35f0636373e6a58c8f3fcfcf505af248e27c574
(cherry picked from commit d2ed6a839c3ed50b417f45dc13e5b811ecdc4e74)
---
plugins/file/file.c | 53 ++++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 25 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 638c960d..f195bcf3 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -116,7 +116,7 @@ struct write_window {
static pthread_mutex_t window_lock = PTHREAD_MUTEX_INITIALIZER;
static struct write_window window[NR_WINDOWS];
-static void
+static int
evict_writes (int fd, uint64_t offset, size_t len)
{
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
@@ -125,31 +125,32 @@ evict_writes (int fd, uint64_t offset, size_t len)
* (asynchronously).
*/
if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) {
- /* sync_file_range to start the write failed */
- nbdkit_debug ("sync_file_range: write: (ignored): %m");
+ nbdkit_error ("sync_file_range: cache=none: starting eviction: %m");
+ return -1;
}
- else {
- /* sync_file_range to start the write succeeded, so
- * evict the oldest window from the page cache.
- */
- if (window[0].len > 0) {
- if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
- SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
- SYNC_FILE_RANGE_WAIT_AFTER) == -1)
- nbdkit_debug ("sync_file_range: wait: (ignored): %m");
- if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
- POSIX_FADV_DONTNEED) == -1)
- nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
+
+ /* Evict the oldest window from the page cache. */
+ if (window[0].len > 0) {
+ if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
+ SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
+ SYNC_FILE_RANGE_WAIT_AFTER) == -1) {
+ nbdkit_error ("sync_file_range: cache=none: evicting oldest window: %m");
+ return -1;
}
-
- /* Move the Nth window to N-1. */
- memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
-
- /* Add the range to the newest end of the list of windows. */
- window[NR_WINDOWS-1].fd = fd;
- window[NR_WINDOWS-1].offset = offset;
- window[NR_WINDOWS-1].len = len;
+ if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
+ POSIX_FADV_DONTNEED) == -1)
+ nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
}
+
+ /* Move the Nth window to N-1. */
+ memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
+
+ /* Add the range to the newest end of the list of windows. */
+ window[NR_WINDOWS-1].fd = fd;
+ window[NR_WINDOWS-1].offset = offset;
+ window[NR_WINDOWS-1].len = len;
+
+ return 0;
}
/* When we close the handle we must remove any windows which are still
@@ -855,8 +856,10 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
return -1;
#if EVICT_WRITES
- if (cache_mode == cache_none)
- evict_writes (h->fd, orig_offset, orig_count);
+ if (cache_mode == cache_none) {
+ if (evict_writes (h->fd, orig_offset, orig_count) == -1)
+ return -1;
+ }
#endif
return 0;
--
2.47.1

View File

@ -0,0 +1,90 @@
From 056a975578b42e8cbfd783b8b919c4ef02e40019 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 11:56:22 +0100
Subject: [PATCH] file: Reduce the size of the lock around write eviction
Previously we held window_lock during the synchronous eviction of the
oldest window. This potentially serializes all writes in the slow
write case. We don't need to hold the lock here.
(cherry picked from commit d3d2bc45bb59a30669a3d926435cf57e99feb3a2)
---
plugins/file/file.c | 52 +++++++++++++++++++++++++++------------------
1 file changed, 31 insertions(+), 21 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index f195bcf3..e68c24ee 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -119,37 +119,47 @@ static struct write_window window[NR_WINDOWS];
static int
evict_writes (int fd, uint64_t offset, size_t len)
{
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
-
- /* Tell Linux to start writing the current range out to disk
- * (asynchronously).
- */
- if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) {
- nbdkit_error ("sync_file_range: cache=none: starting eviction: %m");
- return -1;
+ struct write_window oldest = { 0 };
+
+ {
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
+
+ /* Save oldest window[0] for eviction below, and move all windows
+ * down one. Set the newest slot to empty.
+ */
+ oldest = window[0];
+ memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
+ window[NR_WINDOWS-1].len = 0;
+
+ /* Tell Linux to start writing the current range out to disk
+ * (asynchronously).
+ */
+ if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) {
+ nbdkit_error ("sync_file_range: cache=none: starting eviction: %m");
+ return -1;
+ }
+
+ /* Add the range to the newest end of the list of windows. */
+ window[NR_WINDOWS-1].fd = fd;
+ window[NR_WINDOWS-1].offset = offset;
+ window[NR_WINDOWS-1].len = len;
}
+ /* Release lock here. */
- /* Evict the oldest window from the page cache. */
- if (window[0].len > 0) {
- if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
- SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
+ /* Evict the oldest window from the page cache (synchronously). */
+ if (oldest.len > 0) {
+ if (sync_file_range (oldest.fd, oldest.offset, oldest.len,
+ SYNC_FILE_RANGE_WAIT_BEFORE |
+ SYNC_FILE_RANGE_WRITE |
SYNC_FILE_RANGE_WAIT_AFTER) == -1) {
nbdkit_error ("sync_file_range: cache=none: evicting oldest window: %m");
return -1;
}
- if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
+ if (posix_fadvise (oldest.fd, oldest.offset, oldest.len,
POSIX_FADV_DONTNEED) == -1)
nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
}
- /* Move the Nth window to N-1. */
- memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
-
- /* Add the range to the newest end of the list of windows. */
- window[NR_WINDOWS-1].fd = fd;
- window[NR_WINDOWS-1].offset = offset;
- window[NR_WINDOWS-1].len = len;
-
return 0;
}
--
2.47.1

View File

@ -0,0 +1,29 @@
From 5f5f74fa0afae78365eb431a589a6fa4938b9287 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 16:51:24 +0100
Subject: [PATCH] file: Document implicit assumption about eviction windows
Add a comment that slots in the window list are only valid when len > 0.
This was true before but only implicit.
(cherry picked from commit 2022ac667f3c1de81b692f0128dd83a7c4999e38)
---
plugins/file/file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index e68c24ee..6bcc5537 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -110,7 +110,7 @@ static enum { cache_default, cache_none } cache_mode = cache_default;
struct write_window {
int fd;
uint64_t offset;
- size_t len;
+ size_t len; /* window slot only valid if len > 0 */
};
static pthread_mutex_t window_lock = PTHREAD_MUTEX_INITIALIZER;
--
2.47.1

View File

@ -56,7 +56,7 @@
Name: nbdkit
Version: 1.38.5
Release: 3%{?dist}
Release: 4%{?dist}
Summary: NBD server
License: BSD-3-Clause
@ -94,6 +94,10 @@ Patch0010: 0010-vddk-do_extents-Avoid-reading-partial-chunk-beyond-t.patch
Patch0011: 0011-file-Add-debugging-if-sync_file_range-posix_fadvise-.patch
Patch0012: 0012-file-If-sync_file_range-fails-to-start-don-t-add-win.patch
Patch0013: 0013-tests-Add-tests-of-file-plugin-cache-none.patch
Patch0014: 0014-tests-Add-more-generic-tests-of-file-cache-none.patch
Patch0015: 0015-file-Hard-error-if-sync_file_range-fails.patch
Patch0016: 0016-file-Reduce-the-size-of-the-lock-around-write-evicti.patch
Patch0017: 0017-file-Document-implicit-assumption-about-eviction-win.patch
# For automatic RPM Provides generation.
# See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html
@ -1512,7 +1516,7 @@ fi
%changelog
* Mon Mar 31 2025 Richard W.M. Jones <rjones@redhat.com> - 1.38.5-3
* Tue Apr 01 2025 Richard W.M. Jones <rjones@redhat.com> - 1.38.5-4
- Add extra system call checking and debugging to nbdkit-file-plugin
resolves: RHEL-85510