Compare commits

..

No commits in common. "c8-stream-rhel" and "c9s" have entirely different histories.

67 changed files with 7547 additions and 2122 deletions

5
.gitignore vendored
View File

@ -1,2 +1,3 @@
SOURCES/libguestfs.keyring
SOURCES/nbdkit-1.24.0.tar.gz
/clog
/nbdkit-*.tar.gz
/nbdkit-*.tar.gz.sig

View File

@ -1,2 +0,0 @@
1bbc40f501a7fef9eef2a39b701a71aee2fea7c4 SOURCES/libguestfs.keyring
069720cc0d1502b007652101d293a57d7b4d7c41 SOURCES/nbdkit-1.24.0.tar.gz

View File

@ -0,0 +1,151 @@
From e97b5ec6e7e7406688f68a5828e66ef46046fd9f Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 23 Jul 2024 14:46:41 +0100
Subject: [PATCH] server: log: Move preserve errno to log_verror function
This neutral code refactoring just moves the place where we preserve
errno out one layer, but should have no other effect.
(cherry picked from commit f2c644d4495d5e75883ff729936102c90489e8d8)
---
server/internal.h | 8 ++++----
server/log-stderr.c | 9 ++-------
server/log-syslog.c | 13 ++++---------
server/log.c | 12 ++++++++----
4 files changed, 18 insertions(+), 24 deletions(-)
diff --git a/server/internal.h b/server/internal.h
index 7eba3bce..57e777e9 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -339,10 +339,10 @@ extern void free_debug_flags (void);
extern void log_verror (const char *fs, va_list args);
/* log-*.c */
-extern void log_stderr_verror (const char *fs, va_list args)
- ATTRIBUTE_FORMAT_PRINTF (1, 0);
-extern void log_syslog_verror (const char *fs, va_list args)
- ATTRIBUTE_FORMAT_PRINTF (1, 0);
+extern void log_stderr_verror (int orig_errno, const char *fs, va_list args)
+ ATTRIBUTE_FORMAT_PRINTF (2, 0);
+extern void log_syslog_verror (int orig_errno, const char *fs, va_list args)
+ ATTRIBUTE_FORMAT_PRINTF (2, 0);
/* vfprintf.c */
#if !HAVE_VFPRINTF_PERCENT_M
diff --git a/server/log-stderr.c b/server/log-stderr.c
index 8a55f5df..4d8b09da 100644
--- a/server/log-stderr.c
+++ b/server/log-stderr.c
@@ -43,12 +43,9 @@
#include "internal.h"
-/* Note: preserves the previous value of errno. */
void
-log_stderr_verror (const char *fs, va_list args)
+log_stderr_verror (int orig_errno, const char *fs, va_list args)
{
- int err = errno; /* must be first line of function */
-
const char *name = threadlocal_get_name ();
size_t instance_num = threadlocal_get_instance_num ();
int tty;
@@ -69,7 +66,7 @@ log_stderr_verror (const char *fs, va_list args)
}
fprintf (stderr, "error: ");
- errno = err; /* must restore in case fs contains %m */
+ errno = orig_errno; /* must restore in case fs contains %m */
vfprintf (stderr, fs, args);
fprintf (stderr, "\n");
@@ -78,6 +75,4 @@ log_stderr_verror (const char *fs, va_list args)
#ifdef HAVE_FUNLOCKFILE
funlockfile (stderr);
#endif
-
- errno = err; /* must be last line of function */
}
diff --git a/server/log-syslog.c b/server/log-syslog.c
index 76c5035b..29a7a825 100644
--- a/server/log-syslog.c
+++ b/server/log-syslog.c
@@ -45,11 +45,9 @@
/* Tempted to use LOG_FTP instead of LOG_DAEMON! */
static const int PRIORITY = LOG_DAEMON|LOG_ERR;
-/* Note: preserves the previous value of errno. */
void
-log_syslog_verror (const char *fs, va_list args)
+log_syslog_verror (int orig_errno, const char *fs, va_list args)
{
- int err = errno;
const char *name = threadlocal_get_name ();
size_t instance_num = threadlocal_get_instance_num ();
CLEANUP_FREE char *msg = NULL;
@@ -59,9 +57,9 @@ log_syslog_verror (const char *fs, va_list args)
fp = open_memstream (&msg, &len);
if (fp == NULL) {
/* Fallback to logging using fs, args directly. */
- errno = err; /* Must restore in case fs contains %m */
+ errno = orig_errno; /* must restore in case fs contains %m */
vsyslog (PRIORITY, fs, args);
- goto out;
+ return;
}
if (name) {
@@ -71,12 +69,9 @@ log_syslog_verror (const char *fs, va_list args)
fprintf (fp, ": ");
}
- errno = err; /* Must restore in case fs contains %m */
+ errno = orig_errno; /* must restore in case fs contains %m */
vfprintf (fp, fs, args);
close_memstream (fp);
syslog (PRIORITY, "%s", msg);
-
- out:
- errno = err;
}
diff --git a/server/log.c b/server/log.c
index 464e4f9a..9c1f667a 100644
--- a/server/log.c
+++ b/server/log.c
@@ -46,23 +46,27 @@
void
log_verror (const char *fs, va_list args)
{
+ int orig_errno = errno;
+
switch (log_to) {
case LOG_TO_DEFAULT:
if (forked_into_background)
- log_syslog_verror (fs, args);
+ log_syslog_verror (orig_errno, fs, args);
else
- log_stderr_verror (fs, args);
+ log_stderr_verror (orig_errno, fs, args);
break;
case LOG_TO_SYSLOG:
- log_syslog_verror (fs, args);
+ log_syslog_verror (orig_errno, fs, args);
break;
case LOG_TO_STDERR:
- log_stderr_verror (fs, args);
+ log_stderr_verror (orig_errno, fs, args);
break;
case LOG_TO_NULL:
/* nothing */
break;
}
+
+ errno = orig_errno; /* Restore errno before leaving the function. */
}
/* Note: preserves the previous value of errno. */
--
2.47.1

View File

@ -0,0 +1,177 @@
From ae28c97079cce7792c5954f67a418402a48ed0cf Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Wed, 24 Jul 2024 10:29:13 +0100
Subject: [PATCH] server: Rename threadlocal_{set,get}_error to .._errno
A simple mechanical change, to avoid confusion with
threadlocal_{set,get}_last_error introduced in the following commit.
(cherry picked from commit 1d7f655726ad3483d0e8086741182aada7ae8595)
---
server/internal.h | 4 ++--
server/plugins.c | 27 +++++++++++++--------------
server/protocol.c | 5 +++--
server/threadlocal.c | 4 ++--
4 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/server/internal.h b/server/internal.h
index 57e777e9..6549c87b 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -568,8 +568,8 @@ extern void threadlocal_set_name (const char *name)
extern const char *threadlocal_get_name (void);
extern void threadlocal_set_instance_num (size_t instance_num);
extern size_t threadlocal_get_instance_num (void);
-extern void threadlocal_set_error (int err);
-extern int threadlocal_get_error (void);
+extern void threadlocal_set_errno (int err);
+extern int threadlocal_get_errno (void);
extern void *threadlocal_buffer (size_t size);
extern void threadlocal_set_conn (struct connection *conn);
extern struct connection *threadlocal_get_conn (void);
diff --git a/server/plugins.c b/server/plugins.c
index ca89ac7a..3c7df0d2 100644
--- a/server/plugins.c
+++ b/server/plugins.c
@@ -633,15 +633,14 @@ plugin_can_cache (struct context *c)
NBDKIT_DLL_PUBLIC void
nbdkit_set_error (int err)
{
- threadlocal_set_error (err);
+ threadlocal_set_errno (err);
}
-/* Grab the appropriate error value.
- */
+/* Grab the appropriate error value. */
static int
-get_error (struct backend_plugin *p)
+get_errno (struct backend_plugin *p)
{
- int ret = threadlocal_get_error ();
+ int ret = threadlocal_get_errno ();
if (!ret && p->plugin.errno_is_preserved != 0)
ret = errno;
@@ -664,7 +663,7 @@ plugin_pread (struct context *c,
else
r = p->plugin._pread_v1 (c->handle, buf, count, offset);
if (r == -1)
- *err = get_error (p);
+ *err = get_errno (p);
return r;
}
@@ -685,7 +684,7 @@ plugin_flush (struct context *c,
return -1;
}
if (r == -1)
- *err = get_error (p);
+ *err = get_errno (p);
return r;
}
@@ -715,7 +714,7 @@ plugin_pwrite (struct context *c,
if (r != -1 && need_flush)
r = plugin_flush (c, 0, err);
if (r == -1 && !*err)
- *err = get_error (p);
+ *err = get_errno (p);
return r;
}
@@ -744,7 +743,7 @@ plugin_trim (struct context *c,
if (r != -1 && need_flush)
r = plugin_flush (c, 0, err);
if (r == -1 && !*err)
- *err = get_error (p);
+ *err = get_errno (p);
return r;
}
@@ -782,7 +781,7 @@ plugin_zero (struct context *c,
else
emulate = true;
if (r == -1)
- *err = emulate ? EOPNOTSUPP : get_error (p);
+ *err = emulate ? EOPNOTSUPP : get_errno (p);
if (r == 0 || (*err != EOPNOTSUPP && *err != ENOTSUP))
goto done;
}
@@ -794,7 +793,7 @@ plugin_zero (struct context *c,
}
flags &= ~NBDKIT_FLAG_MAY_TRIM;
- threadlocal_set_error (0);
+ threadlocal_set_errno (0);
*err = 0;
while (count) {
@@ -814,7 +813,7 @@ plugin_zero (struct context *c,
if (r != -1 && need_flush)
r = plugin_flush (c, 0, err);
if (r == -1 && !*err)
- *err = get_error (p);
+ *err = get_errno (p);
return r;
}
@@ -839,7 +838,7 @@ plugin_extents (struct context *c,
r = -1;
}
if (r == -1)
- *err = get_error (p);
+ *err = get_errno (p);
return r;
}
@@ -859,7 +858,7 @@ plugin_cache (struct context *c,
r = p->plugin.cache (c->handle, count, offset, flags);
if (r == -1)
- *err = get_error (p);
+ *err = get_errno (p);
return r;
}
diff --git a/server/protocol.c b/server/protocol.c
index 9b63f789..677da05c 100644
--- a/server/protocol.c
+++ b/server/protocol.c
@@ -235,8 +235,9 @@ handle_request (uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count,
int err = 0;
/* Clear the error, so that we know if the plugin calls
- * nbdkit_set_error() or relied on errno. */
- threadlocal_set_error (0);
+ * nbdkit_set_error() or relied on errno.
+ */
+ threadlocal_set_errno (0);
switch (cmd) {
case NBD_CMD_READ:
diff --git a/server/threadlocal.c b/server/threadlocal.c
index 088fe55a..9bb656bc 100644
--- a/server/threadlocal.c
+++ b/server/threadlocal.c
@@ -154,7 +154,7 @@ threadlocal_get_instance_num (void)
}
void
-threadlocal_set_error (int err)
+threadlocal_set_errno (int err)
{
struct threadlocal *threadlocal = pthread_getspecific (threadlocal_key);
@@ -167,7 +167,7 @@ threadlocal_set_error (int err)
/* This preserves errno, for convenience.
*/
int
-threadlocal_get_error (void)
+threadlocal_get_errno (void)
{
int err = errno;
struct threadlocal *threadlocal = pthread_getspecific (threadlocal_key);
--
2.47.1

View File

@ -0,0 +1,95 @@
From 00107f9d36fc6a1b33a1bde25e3239c520b82ab1 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Wed, 24 Jul 2024 10:37:58 +0100
Subject: [PATCH] server: Introduce threadlocal_{set,get}_last_error
Plus a function to clear the last_error field.
(cherry picked from commit fa5055ae2b9f96af941d697de39198c96ee2580a)
---
server/internal.h | 3 +++
server/threadlocal.c | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git a/server/internal.h b/server/internal.h
index 6549c87b..da19fb99 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -570,6 +570,9 @@ extern void threadlocal_set_instance_num (size_t instance_num);
extern size_t threadlocal_get_instance_num (void);
extern void threadlocal_set_errno (int err);
extern int threadlocal_get_errno (void);
+extern void threadlocal_set_last_error (char *msg);
+extern void threadlocal_clear_last_error (void);
+extern const char *threadlocal_get_last_error (void);
extern void *threadlocal_buffer (size_t size);
extern void threadlocal_set_conn (struct connection *conn);
extern struct connection *threadlocal_get_conn (void);
diff --git a/server/threadlocal.c b/server/threadlocal.c
index 9bb656bc..74a3c4e5 100644
--- a/server/threadlocal.c
+++ b/server/threadlocal.c
@@ -56,6 +56,7 @@ struct threadlocal {
char *name; /* Can be NULL. */
size_t instance_num; /* Can be 0. */
int err;
+ char *last_error; /* Can be NULL. */
void *buffer; /* Can be NULL. */
size_t buffer_size;
struct connection *conn; /* Can be NULL. */
@@ -70,6 +71,7 @@ free_threadlocal (void *threadlocalv)
struct threadlocal *threadlocal = threadlocalv;
free (threadlocal->name);
+ free (threadlocal->last_error);
free (threadlocal->buffer);
free (threadlocal);
}
@@ -176,6 +178,44 @@ threadlocal_get_errno (void)
return threadlocal ? threadlocal->err : 0;
}
+/* Set the last_error field. The ownership of the 'msg' string is
+ * passed to the threadlocal and will be freed here.
+ */
+void
+threadlocal_set_last_error (char *msg)
+{
+ struct threadlocal *threadlocal = pthread_getspecific (threadlocal_key);
+
+ if (threadlocal) {
+ free (threadlocal->last_error);
+ threadlocal->last_error = msg;
+ }
+ else {
+ /* ... otherwise throw it away, it's informational. */
+ free (msg);
+ }
+}
+
+void
+threadlocal_clear_last_error (void)
+{
+ threadlocal_set_last_error (NULL);
+}
+
+/* Get the last_error field. If successful, this returns a non-NULL
+ * string. This is valid until something calls nbdkit_error() in the
+ * same thread, so it should be used quickly. Returning NULL is not
+ * necessarily an error. The last_error is informational and may not
+ * be available.
+ */
+const char *
+threadlocal_get_last_error (void)
+{
+ struct threadlocal *threadlocal = pthread_getspecific (threadlocal_key);
+
+ return threadlocal ? threadlocal->last_error : NULL;
+}
+
/* Return the single pread/pwrite buffer for this thread. The buffer
* size is increased to size bytes if required.
*
--
2.47.1

View File

@ -0,0 +1,95 @@
From 4cde9d78c4293e294f80376266cfce420f15a6ff Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 23 Jul 2024 15:28:06 +0100
Subject: [PATCH] server: Take a thread-local copy of the last call to
nbdkit_error
nbdkit_error has traditionally been a "fancy wrapper around fprintf"
(kind of, don't take that literally). It is encouraged that plugins
and filters do something like:
if (error) {
nbdkit_error ("oops, a bad thing happened");
return -1;
}
but we don't enforce this. Plugins might call nbdkit_error more than
once or not at all.
The point where we get to sending an error back over the wire to the
NBD client is long after the plugin returned above, and after
nbdkit_error was called.
Therefore in order to send errors back to the NBD client, we must keep
the last error message around.
This change simply modifies nbdkit_error to make a best-effort attempt
to save the last error message in thread-local storage.
We also clear the last error when a new request starts, to ensure that
we don't leak errors across different callbacks or connections.
(cherry picked from commit bfa6d4064cb74f429149d14ab4025b258fc95ec4)
---
server/log.c | 21 +++++++++++++++++++++
server/protocol.c | 5 +++++
2 files changed, 26 insertions(+)
diff --git a/server/log.c b/server/log.c
index 9c1f667a..acf14d57 100644
--- a/server/log.c
+++ b/server/log.c
@@ -40,6 +40,25 @@
#include "internal.h"
+/* Copy the error message to threadlocal. This is sent to callers
+ * which are using structured replies, but is for extra information
+ * only so don't fail if we are unable to copy it.
+ */
+static void
+copy_error_to_threadlocal (int orig_errno, const char *fs, va_list args)
+{
+ va_list args_copy;
+ char *msg;
+ int r;
+
+ va_copy (args_copy, args);
+ errno = orig_errno; /* must restore in case fs contains %m */
+ r = vasprintf (&msg, fs, args_copy);
+ va_end (args_copy);
+ if (r != -1 && msg)
+ threadlocal_set_last_error (msg); /* ownership passed to threadlocal */
+}
+
/* Call the right log_*_verror function depending on log_sink.
* Note: preserves the previous value of errno.
*/
@@ -48,6 +67,8 @@ log_verror (const char *fs, va_list args)
{
int orig_errno = errno;
+ copy_error_to_threadlocal (orig_errno, fs, args);
+
switch (log_to) {
case LOG_TO_DEFAULT:
if (forked_into_background)
diff --git a/server/protocol.c b/server/protocol.c
index 677da05c..d428bfc8 100644
--- a/server/protocol.c
+++ b/server/protocol.c
@@ -239,6 +239,11 @@ handle_request (uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count,
*/
threadlocal_set_errno (0);
+ /* Also clear the last error in this thread so we will only save
+ * nbdkit_error() from this request.
+ */
+ threadlocal_clear_last_error ();
+
switch (cmd) {
case NBD_CMD_READ:
if (backend_pread (c, buf, count, offset, 0, &err) == -1)
--
2.47.1

View File

@ -0,0 +1,177 @@
From e121d8e1d39605043317cbdf28f60056e6681d56 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 23 Jul 2024 15:45:04 +0100
Subject: [PATCH] server: Send the last error to the NBD client
This sends the last error saved in the connection handle back to the
NBD client. This is informational and best effort.
qemu reports the error already, for example:
$ nbdkit --log=null \
eval open=' echo EPERM Go Away >&2; exit 1 ' get_size=' echo 100 ' \
--run 'qemu-img info "$uri"'
qemu-img: Could not open 'nbd+unix://?socket=/tmp/nbdkitIDl6iy/socket': Requested export not available
server reported: /tmp/nbdkitRDAfXH/open: Go Away
This goes back to at least qemu 2.12.0 (RHEL 7) and possibly earlier,
so we can just assume that qemu does this for the test.
libnbd requires a patch to display this information.
(cherry picked from commit 46484ca8e6a35c45fe96b6c972ceba8984d401e8)
---
server/protocol-handshake-newstyle.c | 43 ++++++++++++++++------
tests/Makefile.am | 2 +
tests/test-last-error.sh | 55 ++++++++++++++++++++++++++++
3 files changed, 88 insertions(+), 12 deletions(-)
create mode 100755 tests/test-last-error.sh
diff --git a/server/protocol-handshake-newstyle.c b/server/protocol-handshake-newstyle.c
index 6b3bc76f..c18d32e5 100644
--- a/server/protocol-handshake-newstyle.c
+++ b/server/protocol-handshake-newstyle.c
@@ -57,28 +57,47 @@ send_newstyle_option_reply (uint32_t option, uint32_t reply)
{
GET_CONN;
struct nbd_fixed_new_option_reply fixed_new_option_reply;
+ const char *last_error = NULL;
+ uint32_t replylen = 0;
+
+ if (NBD_REP_IS_ERR (reply)) {
+ last_error = threadlocal_get_last_error ();
+ /* Note that calling nbdkit_error will invalidate last_error, so
+ * be careful below.
+ */
+ if (last_error) {
+ size_t len = strlen (last_error);
+ if (len <= NBD_MAX_STRING)
+ replylen = len;
+ }
+ }
fixed_new_option_reply.magic = htobe64 (NBD_REP_MAGIC);
fixed_new_option_reply.option = htobe32 (option);
fixed_new_option_reply.reply = htobe32 (reply);
- fixed_new_option_reply.replylen = htobe32 (0);
+ fixed_new_option_reply.replylen = htobe32 (replylen);
debug ("replying to %s with %s", name_of_nbd_opt (option),
name_of_nbd_rep (reply));
if (conn->send (&fixed_new_option_reply,
- sizeof fixed_new_option_reply, 0) == -1) {
- /* The protocol document says that the client is allowed to simply
- * drop the connection after sending NBD_OPT_ABORT, or may read
- * the reply.
- */
- if (option == NBD_OPT_ABORT)
- debug ("write: %s: %m", name_of_nbd_opt (option));
- else
- nbdkit_error ("write: %s: %m", name_of_nbd_opt (option));
- return -1;
- }
+ sizeof fixed_new_option_reply,
+ replylen > 0 ? SEND_MORE : 0) == -1)
+ goto err;
+ if (replylen > 0 && conn->send (last_error, replylen, 0) == -1)
+ goto err;
return 0;
+
+err:
+ /* The protocol document says that the client is allowed to simply
+ * drop the connection after sending NBD_OPT_ABORT, or may read
+ * the reply.
+ */
+ if (option == NBD_OPT_ABORT)
+ debug ("write: %s: %m", name_of_nbd_opt (option));
+ else
+ nbdkit_error ("write: %s: %m", name_of_nbd_opt (option));
+ return -1;
}
/* Reply to NBD_OPT_LIST with the plugin's list of export names.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index b670fbf9..d510807c 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -276,6 +276,7 @@ TESTS += \
test-read-password-interactive.sh \
test-nbd-client.sh \
test-nbd-client-tls.sh \
+ test-last-error.sh \
$(NULL)
if !IS_WINDOWS
TESTS += \
@@ -301,6 +302,7 @@ EXTRA_DIST += \
test-plugin-docs.sh \
test-ipv4-lo.sh \
test-ipv6-lo.sh \
+ test-last-error.sh \
test-long-name.sh \
test-nbd-client.sh \
test-nbd-client-tls.sh \
diff --git a/tests/test-last-error.sh b/tests/test-last-error.sh
new file mode 100755
index 00000000..fc720606
--- /dev/null
+++ b/tests/test-last-error.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+# Test informational error messages sent to the NBD client.
+# qemu-img supports this since at least 2.12.0.
+
+requires_run
+requires_plugin eval
+requires qemu-img --version
+
+out=last-error.out
+rm -f $out
+cleanup_fn rm -f $out
+
+export out
+
+nbdkit eval \
+ open=' echo EPERM Go Away >&2; exit 1 ' get_size=' echo 0 ' \
+ --run ' qemu-img info "$uri" > $out 2>&1 ||: '
+cat $out
+
+grep "Go Away" $out
--
2.47.1

View File

@ -0,0 +1,28 @@
From 47345a7a56c343e2cd559b736df685214ed75a9b Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 6 Jan 2025 15:22:05 +0000
Subject: [PATCH] vddk: Include <stdbool.h>
Since this file uses booleans.
Acked-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit fe855addae44e45e2344a33bd3857c561587f12e)
---
plugins/vddk/worker.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 467d00ca..5982fcea 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -34,6 +34,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <stdint.h>
#include <inttypes.h>
--
2.47.1

View File

@ -0,0 +1,59 @@
From 927cb4063da464aa2605ae87d1b1157146551a47 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 6 Jan 2025 16:47:55 +0000
Subject: [PATCH] vddk: Cache the disk size in the handle
No functional change here, we're just making sure we have the disk
size (in bytes) available in the handle.
Acked-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit 2ba76db4a048471e997e508715081a70356f94f3)
---
plugins/vddk/vddk.c | 6 +++---
plugins/vddk/vddk.h | 3 +++
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 27d53bd6..2a787453 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -875,19 +875,19 @@ vddk_get_size (void *handle)
{
struct vddk_handle *h = handle;
VixDiskLibInfo *info;
- int64_t size;
struct command info_cmd = { .type = INFO, .ptr = &info };
if (send_command_and_wait (h, &info_cmd) == -1)
return -1;
- size = info->capacity * (int64_t)VIXDISKLIB_SECTOR_SIZE;
+ /* Compute the size and cache it into the handle. */
+ h->size = info->capacity * VIXDISKLIB_SECTOR_SIZE;
VDDK_CALL_START (VixDiskLib_FreeInfo, "info")
VixDiskLib_FreeInfo (info);
VDDK_CALL_END (VixDiskLib_FreeInfo, 0);
- return size;
+ return h->size;
}
/* Advertise most efficient block sizes. */
diff --git a/plugins/vddk/vddk.h b/plugins/vddk/vddk.h
index fb0c79a8..1d1069cc 100644
--- a/plugins/vddk/vddk.h
+++ b/plugins/vddk/vddk.h
@@ -165,6 +165,9 @@ struct vddk_handle {
command_queue commands; /* command queue */
pthread_cond_t commands_cond; /* condition (queue size 0 -> 1) */
uint64_t id; /* next command ID */
+
+ /* Cached disk size in bytes (set in get_size()). */
+ uint64_t size;
};
/* reexec.c */
--
2.47.1

View File

@ -0,0 +1,34 @@
From 0ba2e0d3c53efd49309d9e274e0cb6c2e6720cbd Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 6 Jan 2025 15:37:54 +0000
Subject: [PATCH] vddk: do_extents: Mark some local variables const
These are never changed in the code (they are fields copied out from
the *cmd struct), so mark them as const.
Acked-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit 24fd7df460ae31fe3f72b5100ca3dbe138bbadbe)
---
plugins/vddk/worker.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 5982fcea..bc015d16 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -388,9 +388,9 @@ add_extent (struct nbdkit_extents *extents,
static int
do_extents (struct command *cmd, struct vddk_handle *h)
{
- uint32_t count = cmd->count;
- uint64_t offset = cmd->offset;
- bool req_one = cmd->req_one;
+ const uint32_t count = cmd->count;
+ const uint64_t offset = cmd->offset;
+ const bool req_one = cmd->req_one;
struct nbdkit_extents *extents = cmd->ptr;
uint64_t position, end, start_sector;
--
2.47.1

View File

@ -0,0 +1,31 @@
From 6dd8b5eb14c0723764dd39597d64f62162ca3ab3 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 6 Jan 2025 16:39:51 +0000
Subject: [PATCH] vddk: do_extents: Exit the function if we hit req_one
condition
No change to the functionality, since the code previously called
'return 0' immediately following the loop.
Acked-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit 2f4d71f8f704d89d69cd635791c3239d2f44d631)
---
plugins/vddk/worker.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index bc015d16..112111e3 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -471,7 +471,7 @@ do_extents (struct command *cmd, struct vddk_handle *h)
* overlapping the original offset we're done.
*/
if (req_one && position > offset)
- break;
+ return 0;
}
return 0;
--
2.47.1

View File

@ -0,0 +1,202 @@
From fdc2a6a9818aad25ee606118d5f6a32fa0739912 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 6 Jan 2025 15:45:35 +0000
Subject: [PATCH] vddk: do_extents: Avoid reading partial chunk beyond the end
of the disk
The QueryAllocatedBlocks API has (another) frustrating feature. It
can only query whole "chunks" (128 sectors). If the disk size is not
aligned to the chunk size (say the size was 129 sectors) then there's
a bit at the end which cannot be queried. Furthermore, the API gives
an error in this case instead of being helpful:
VixDiskLib_QueryAllocatedBlocks: One of the parameters was invalid
Fixes: https://issues.redhat.com/browse/RHEL-71694
Reported-by: Ming Xie <mxie@redhat.com>
Acked-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit fd918f3d1a185fd996999766c75acb9d6e22395d)
---
plugins/vddk/worker.c | 29 ++++++++-
tests/Makefile.am | 5 +-
tests/test-vddk-real-unaligned-chunk.sh | 82 +++++++++++++++++++++++++
3 files changed, 113 insertions(+), 3 deletions(-)
create mode 100755 tests/test-vddk-real-unaligned-chunk.sh
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 112111e3..8a91250a 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -392,10 +392,9 @@ do_extents (struct command *cmd, struct vddk_handle *h)
const uint64_t offset = cmd->offset;
const bool req_one = cmd->req_one;
struct nbdkit_extents *extents = cmd->ptr;
- uint64_t position, end, start_sector;
+ uint64_t position, start_sector, size_sectors, last_queryable_sector, end;
position = offset;
- end = offset + count;
/* We can only query whole chunks. Therefore start with the
* first chunk before offset.
@@ -403,6 +402,21 @@ do_extents (struct command *cmd, struct vddk_handle *h)
start_sector =
ROUND_DOWN (offset, VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE)
/ VIXDISKLIB_SECTOR_SIZE;
+
+ /* Calculate the end byte + 1 that we're going to query, normally
+ * this is offset + count.
+ *
+ * However since chunks are larger than sectors, for a disk which
+ * has size which is not aligned to the chunk size there is a part
+ * of the disk at the end that we can never query. Reduce 'end' to
+ * the maximum possible queryable part of the disk, and we'll deal
+ * with the unaligned bit after the loop (RHEL-71694).
+ */
+ end = offset + count;
+ size_sectors = h->size / VIXDISKLIB_SECTOR_SIZE;
+ last_queryable_sector = ROUND_DOWN (size_sectors, VIXDISKLIB_MIN_CHUNK_SIZE);
+ end = MIN (end, last_queryable_sector * VIXDISKLIB_SECTOR_SIZE);
+
while (start_sector * VIXDISKLIB_SECTOR_SIZE < end) {
VixError err;
uint32_t i;
@@ -474,6 +488,17 @@ do_extents (struct command *cmd, struct vddk_handle *h)
return 0;
}
+ /* If 'end' spanned beyond the last chunk of the disk, then we
+ * reduced it above to avoid reading a chunk that extends beyond the
+ * end of the underlying disk. We have to synthesize an allocated
+ * block here, which is what VDDK's example code does
+ * (doc/samples/diskLib/vixDiskLibSample.cpp: DoGetAllocatedBlocks).
+ */
+ if (end < offset + count) {
+ if (add_extent (extents, &position, offset + count, false) == -1)
+ return -1;
+ }
+
return 0;
}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index d510807c..0aa36846 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -178,7 +178,8 @@ if HAVE_VDDK
check-vddk:
$(MAKE) check TESTS="test-vddk-real.sh \
test-vddk-real-dump-plugin.sh \
- test-vddk-real-create.sh"
+ test-vddk-real-create.sh \
+ test-vddk-real-unaligned-chunk.sh"
endif HAVE_VDDK
#----------------------------------------------------------------------
@@ -1145,6 +1146,7 @@ TESTS += \
test-vddk-password-interactive.sh \
test-vddk-real-create.sh \
test-vddk-real-dump-plugin.sh \
+ test-vddk-real-unaligned-chunk.sh \
test-vddk-real.sh \
test-vddk-reexec.sh \
test-vddk-run.sh \
@@ -1177,6 +1179,7 @@ EXTRA_DIST += \
test-vddk-password-interactive.sh \
test-vddk-real-create.sh \
test-vddk-real-dump-plugin.sh \
+ test-vddk-real-unaligned-chunk.sh \
test-vddk-real.sh \
test-vddk-reexec.sh \
test-vddk-run.sh \
diff --git a/tests/test-vddk-real-unaligned-chunk.sh b/tests/test-vddk-real-unaligned-chunk.sh
new file mode 100755
index 00000000..28fccd6c
--- /dev/null
+++ b/tests/test-vddk-real-unaligned-chunk.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Regression test for https://issues.redhat.com/browse/RHEL-71694
+
+source ./functions.sh
+set -e
+set -x
+
+requires_run
+requires test "x$vddkdir" != "x"
+requires test -d "$vddkdir"
+requires test -f "$vddkdir/lib64/libvixDiskLib.so"
+requires qemu-img --version
+requires_nbdinfo
+requires $TRUNCATE --version
+requires dd --version
+requires test -r /dev/urandom
+skip_if_valgrind "because setting LD_LIBRARY_PATH breaks valgrind"
+
+# VDDK > 5.1.1 only supports x86_64.
+if [ `uname -m` != "x86_64" ]; then
+ echo "$0: unsupported architecture"
+ exit 77
+fi
+
+d=vddk-real-unaligned-chunk.d
+cleanup_fn rm -rf $d
+rm -rf $d
+mkdir $d
+
+# Create a vmdk disk which is partially sparse and the size is NOT
+# aligned to 128 sectors (chunk size).
+dd if=/dev/urandom of=$d/test.raw bs=512 count=$(( 3*128 ))
+$TRUNCATE -s $(( (4*128 + 3) * 512)) $d/test.raw
+qemu-img convert -f raw $d/test.raw -O vmdk $d/test.vmdk
+
+# Read the map using VDDK.
+export d
+nbdkit -rfv vddk libdir="$vddkdir" \
+ $PWD/$d/test.vmdk \
+ --run 'nbdinfo --map "$uri" > $d/map'
+cat $d/map
+
+# Note a few features of the expected map. The first 3 chunks (3*128
+# sectors) are allocated, followed by a single hole chunk. Then the
+# last 3 unaligned sectors appear allocated (even though they are not)
+# because we could not read them using the QueryAllocatedBlocks API so
+# we had to assume allocated.
+test "$(cat $d/map)" = "\
+ 0 196608 0 data
+ 196608 65536 3 hole,zero
+ 262144 1536 0 data"
--
2.47.1

View File

@ -0,0 +1,41 @@
From 881113e29b45975742ca11a4d0539ed2eb40b717 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 28 Mar 2025 20:32:46 +0000
Subject: [PATCH] file: Add debugging if sync_file_range / posix_fadvise fails
These are advisory hints, but add some debugging so we know if these
calls are failing. The errors are ignored so there is no behaviour
change.
Thanks: Eric Sandeen
(cherry picked from commit feb22cfc85b0750a4e4bfbc5fe56636883599feb)
---
plugins/file/file.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 2bad6480..6d0e77a1 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -123,11 +123,13 @@ evict_writes (int fd, uint64_t offset, size_t len)
/* Evict the oldest window from the page cache. */
if (window[0].len > 0) {
- sync_file_range (window[0].fd, window[0].offset, window[0].len,
- SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
- SYNC_FILE_RANGE_WAIT_AFTER);
- posix_fadvise (window[0].fd, window[0].offset, window[0].len,
- POSIX_FADV_DONTNEED);
+ if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
+ SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
+ SYNC_FILE_RANGE_WAIT_AFTER) == -1)
+ nbdkit_debug ("sync_file_range: wait: (ignored): %m");
+ if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
+ POSIX_FADV_DONTNEED) == -1)
+ nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
}
/* Move the Nth window to N-1. */
--
2.47.1

View File

@ -0,0 +1,82 @@
From 205fb5f8ebd72a37fb624e7d551ead313da2437b Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 28 Mar 2025 20:35:54 +0000
Subject: [PATCH] file: If sync_file_range fails to start, don't add window to
list
When starting the process of writing out the newest range, we
previously didn't check if sync_file_range succeeded or failed. But
if it failed there's no reason to add it to the list of windows. Also
debug any failures.
This shouldn't change any semantics as far as we're aware.
Thanks: Eric Sandeen
(cherry picked from commit d35f0636373e6a58c8f3fcfcf505af248e27c574)
---
plugins/file/file.c | 48 ++++++++++++++++++++++++++-------------------
1 file changed, 28 insertions(+), 20 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 6d0e77a1..638c960d 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -121,27 +121,35 @@ evict_writes (int fd, uint64_t offset, size_t len)
{
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
- /* Evict the oldest window from the page cache. */
- if (window[0].len > 0) {
- if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
- SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
- SYNC_FILE_RANGE_WAIT_AFTER) == -1)
- nbdkit_debug ("sync_file_range: wait: (ignored): %m");
- if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
- POSIX_FADV_DONTNEED) == -1)
- nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
- }
-
- /* Move the Nth window to N-1. */
- memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
-
- /* Set up the current window and tell Linux to start writing it out
- * to disk (asynchronously).
+ /* Tell Linux to start writing the current range out to disk
+ * (asynchronously).
*/
- sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE);
- window[NR_WINDOWS-1].fd = fd;
- window[NR_WINDOWS-1].offset = offset;
- window[NR_WINDOWS-1].len = len;
+ if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) {
+ /* sync_file_range to start the write failed */
+ nbdkit_debug ("sync_file_range: write: (ignored): %m");
+ }
+ else {
+ /* sync_file_range to start the write succeeded, so
+ * evict the oldest window from the page cache.
+ */
+ if (window[0].len > 0) {
+ if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
+ SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
+ SYNC_FILE_RANGE_WAIT_AFTER) == -1)
+ nbdkit_debug ("sync_file_range: wait: (ignored): %m");
+ if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
+ POSIX_FADV_DONTNEED) == -1)
+ nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
+ }
+
+ /* Move the Nth window to N-1. */
+ memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
+
+ /* Add the range to the newest end of the list of windows. */
+ window[NR_WINDOWS-1].fd = fd;
+ window[NR_WINDOWS-1].offset = offset;
+ window[NR_WINDOWS-1].len = len;
+ }
}
/* When we close the handle we must remove any windows which are still
--
2.47.1

View File

@ -0,0 +1,254 @@
From e9c972bc03d42d6fe7f11cd076a5500d39976a61 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sat, 29 Mar 2025 10:01:05 +0000
Subject: [PATCH] tests: Add tests of file plugin cache=none
These are based on the tests which were done in the commit message
when the feature was first added in commit aa5a2183a6, but cleaned up
and modernised a little.
It is likely that these tests would fail under memory pressure (since
they test that the page cache is used vs not used). Making them more
robust left as an exercise for the future.
(cherry picked from commit 6017ba21aeeb3d7ad85925e78dba85a005194dee)
---
README.md | 1 +
tests/Makefile.am | 4 ++
tests/test-file-cache-none-read.sh | 90 ++++++++++++++++++++++++++++
tests/test-file-cache-none-write.sh | 92 +++++++++++++++++++++++++++++
4 files changed, 187 insertions(+)
create mode 100755 tests/test-file-cache-none-read.sh
create mode 100755 tests/test-file-cache-none-write.sh
diff --git a/README.md b/README.md
index fbe403da..86ac6c57 100644
--- a/README.md
+++ b/README.md
@@ -174,6 +174,7 @@ To test for memory leaks (`make check-valgrind`):
For non-essential enhancements to the test suite:
+* cachedel, cachestats (from https://github.com/Feh/nocache)
* expect
* fdisk, sfdisk (from util-linux)
* flake8
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 0aa36846..4340cc38 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -846,6 +846,8 @@ TESTS += \
test-file-extents.sh \
test-file-dir.sh \
test-file-dirfd.sh \
+ test-file-cache-none-read.sh \
+ test-file-cache-none-write.sh \
$(NULL)
EXTRA_DIST += \
test-file.sh \
@@ -855,6 +857,8 @@ EXTRA_DIST += \
test-file-extents.sh \
test-file-dir.sh \
test-file-dirfd.sh \
+ test-file-cache-none-read.sh \
+ test-file-cache-none-write.sh \
$(NULL)
LIBGUESTFS_TESTS += test-file-block
diff --git a/tests/test-file-cache-none-read.sh b/tests/test-file-cache-none-read.sh
new file mode 100755
index 00000000..c9831b43
--- /dev/null
+++ b/tests/test-file-cache-none-read.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the file plugin, reading with cache=none.
+
+source ./functions.sh
+set -e
+set -x
+
+# Makes no sense to run this test under valgrind.
+skip_if_valgrind
+
+requires_plugin file
+requires_run
+requires_nbdcopy
+requires $TRUNCATE --version
+requires test -r /dev/urandom
+requires dd --version
+requires $SED --version
+
+# Requires the cachestats tool from https://github.com/Feh/nocache
+# It doesn't support --version or --help, so use 'type' instead.
+requires type cachestats
+requires type cachedel
+
+inp=file-cache-none-read.in
+stats1=file-cache-none-read.s1
+stats2=file-cache-none-read.s2
+rm -f $inp $stats1 $stats2
+cleanup_fn rm -f $inp $stats1 $stats2
+
+# Create a large random file as input.
+dd if=/dev/urandom of=$inp bs=1024k count=1024
+
+# Drop the input file from the page cache and read it out with nbdkit.
+# We expect to see the input file mostly or completely in cache after.
+cachedel $inp
+nbdkit file $inp --run 'nbdcopy "$uri" null:'
+cachestats $inp > $stats1
+cat $stats1
+
+# The same, with cache=none.
+# We expect to see the input file not cached after.
+cachedel $inp
+nbdkit file $inp --run 'nbdcopy "$uri" null:' cache=none
+cachestats $inp > $stats2
+cat $stats2
+
+# The output of cachestats looks like this:
+# pages in cache: 262144/262144 (100.0%) [filesize=1048576.0K, pagesize=4K]
+# We want to check that % pages in cache using cache=none is much
+# lower than the default case.
+pic1="$($SED 's,pages in cache: [0-9/]* (\([0-9]*\)\.[0-9]*%).*,\1,' \
+ < $stats1)"
+pic2="$($SED 's,pages in cache: [0-9/]* (\([0-9]*\)\.[0-9]*%).*,\1,' \
+ < $stats2)"
+
+# Test before is > 10%
+test "$pic1" -gt 10
+# Test after is < 10%
+test "$pic2" -lt 10
diff --git a/tests/test-file-cache-none-write.sh b/tests/test-file-cache-none-write.sh
new file mode 100755
index 00000000..2041a5cd
--- /dev/null
+++ b/tests/test-file-cache-none-write.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the file plugin, writing with cache=none.
+
+source ./functions.sh
+set -e
+set -x
+
+# Makes no sense to run this test under valgrind.
+skip_if_valgrind
+
+requires_plugin file
+requires_run
+requires_nbdcopy
+requires $TRUNCATE --version
+requires test -r /dev/urandom
+requires dd --version
+requires $SED --version
+
+# Requires the cachestats tool from https://github.com/Feh/nocache
+# It doesn't support --version or --help, so use 'type' instead.
+requires type cachestats
+
+inp=file-cache-none-write.in
+out=file-cache-none-write.out
+stats1=file-cache-none-write.s1
+stats2=file-cache-none-write.s2
+rm -f $inp $out $stats1 $stats2
+cleanup_fn rm -f $inp $out $stats1 $stats2
+
+# Create a large random file as input.
+dd if=/dev/urandom of=$inp bs=1024k count=1024
+
+# Copy to output using cache=default and collect the stats.
+# We expect to see the output file mostly or completely in cache after.
+rm -f $out; truncate -r $inp $out
+export inp
+nbdkit file $out --run 'nbdcopy $inp "$uri"' cache=default
+cachestats $out > $stats1
+cat $stats1
+
+# The same, with cache=none.
+# We expect to see the output file not cached after.
+rm -f $out; truncate -r $inp $out
+export inp
+nbdkit file $out --run 'nbdcopy $inp "$uri"' cache=none
+cachestats $out > $stats2
+cat $stats2
+
+# The output of cachestats looks like this:
+# pages in cache: 262144/262144 (100.0%) [filesize=1048576.0K, pagesize=4K]
+# We want to check that % pages in cache using cache=none is much
+# lower than the default case.
+pic1="$($SED 's,pages in cache: [0-9/]* (\([0-9]*\)\.[0-9]*%).*,\1,' \
+ < $stats1)"
+pic2="$($SED 's,pages in cache: [0-9/]* (\([0-9]*\)\.[0-9]*%).*,\1,' \
+ < $stats2)"
+
+# Test before is > 10%
+test "$pic1" -gt 10
+# Test after is < 10%
+test "$pic2" -lt 10
--
2.47.1

View File

@ -0,0 +1,238 @@
From bdab63138758e287b292140e17f2d0e3b4c40bbe Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 12:45:43 +0100
Subject: [PATCH] tests: Add more generic tests of file cache=none
The tests added in commit 6017ba21ae require cachestats which means
they are almost always skipped in reality. Also we're interested in
whether the windowed eviction could ever corrupt data. Write a
simpler pair of tests for data integrity.
Updates: commit 6017ba21aeeb3d7ad85925e78dba85a005194dee
(cherry picked from commit 7b45f73c0668e56a9188249eeefc2f67aeb50af3)
---
tests/Makefile.am | 12 +++--
tests/test-file-cache-none-read-consistent.sh | 53 ++++++++++++++++++
...=> test-file-cache-none-read-effective.sh} | 9 ++--
.../test-file-cache-none-write-consistent.sh | 54 +++++++++++++++++++
...> test-file-cache-none-write-effective.sh} | 11 ++--
5 files changed, 126 insertions(+), 13 deletions(-)
create mode 100755 tests/test-file-cache-none-read-consistent.sh
rename tests/{test-file-cache-none-read.sh => test-file-cache-none-read-effective.sh} (93%)
create mode 100755 tests/test-file-cache-none-write-consistent.sh
rename tests/{test-file-cache-none-write.sh => test-file-cache-none-write-effective.sh} (92%)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 4340cc38..eed96d28 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -846,8 +846,10 @@ TESTS += \
test-file-extents.sh \
test-file-dir.sh \
test-file-dirfd.sh \
- test-file-cache-none-read.sh \
- test-file-cache-none-write.sh \
+ test-file-cache-none-read-consistent.sh \
+ test-file-cache-none-read-effective.sh \
+ test-file-cache-none-write-consistent.sh \
+ test-file-cache-none-write-effective.sh \
$(NULL)
EXTRA_DIST += \
test-file.sh \
@@ -857,8 +859,10 @@ EXTRA_DIST += \
test-file-extents.sh \
test-file-dir.sh \
test-file-dirfd.sh \
- test-file-cache-none-read.sh \
- test-file-cache-none-write.sh \
+ test-file-cache-none-read-consistent.sh \
+ test-file-cache-none-read-effective.sh \
+ test-file-cache-none-write-consistent.sh \
+ test-file-cache-none-write-effective.sh \
$(NULL)
LIBGUESTFS_TESTS += test-file-block
diff --git a/tests/test-file-cache-none-read-consistent.sh b/tests/test-file-cache-none-read-consistent.sh
new file mode 100755
index 00000000..5f5794ee
--- /dev/null
+++ b/tests/test-file-cache-none-read-consistent.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the file plugin, reading with cache=none.
+#
+# Unlike test-file-cache-none-read-effective.sh, this test doesn't
+# require cachestats. It's more about testing integrity of reading.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_plugin file
+requires_run
+requires_nbdcopy
+requires test -f disk
+requires md5sum --version
+
+out=file-cache-none-read-consistent.out
+cleanup_fn rm -f $out
+
+export out
+nbdkit file disk cache=none --run 'nbdcopy "$uri" "$out"'
+test "$(md5sum < disk)" = "$(md5sum < $out)"
diff --git a/tests/test-file-cache-none-read.sh b/tests/test-file-cache-none-read-effective.sh
similarity index 93%
rename from tests/test-file-cache-none-read.sh
rename to tests/test-file-cache-none-read-effective.sh
index c9831b43..efead224 100755
--- a/tests/test-file-cache-none-read.sh
+++ b/tests/test-file-cache-none-read-effective.sh
@@ -30,7 +30,8 @@
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
-# Test the file plugin, reading with cache=none.
+# Test the file plugin, reading with cache=none, is effective at
+# reducing page cache usage.
source ./functions.sh
set -e
@@ -52,9 +53,9 @@ requires $SED --version
requires type cachestats
requires type cachedel
-inp=file-cache-none-read.in
-stats1=file-cache-none-read.s1
-stats2=file-cache-none-read.s2
+inp=file-cache-none-read-effective.in
+stats1=file-cache-none-read-effective.s1
+stats2=file-cache-none-read-effective.s2
rm -f $inp $stats1 $stats2
cleanup_fn rm -f $inp $stats1 $stats2
diff --git a/tests/test-file-cache-none-write-consistent.sh b/tests/test-file-cache-none-write-consistent.sh
new file mode 100755
index 00000000..749805f1
--- /dev/null
+++ b/tests/test-file-cache-none-write-consistent.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the file plugin, writing with cache=none.
+#
+# Unlike test-file-cache-none-write-effective.sh, this test doesn't
+# require cachestats. It's more about testing integrity of writing.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_plugin file
+requires_run
+requires_nbdcopy
+requires test -f disk
+requires md5sum --version
+requires $TRUNCATE --version
+
+out=file-cache-none-write-consistent.out
+cleanup_fn rm -f $out
+
+rm -f $out; $TRUNCATE -r disk $out
+nbdkit file $out cache=none --run 'nbdcopy disk "$uri"'
+test "$(md5sum < disk)" = "$(md5sum < $out)"
diff --git a/tests/test-file-cache-none-write.sh b/tests/test-file-cache-none-write-effective.sh
similarity index 92%
rename from tests/test-file-cache-none-write.sh
rename to tests/test-file-cache-none-write-effective.sh
index 2041a5cd..e0159242 100755
--- a/tests/test-file-cache-none-write.sh
+++ b/tests/test-file-cache-none-write-effective.sh
@@ -30,7 +30,8 @@
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
-# Test the file plugin, writing with cache=none.
+# Test the file plugin, writing with cache=none, is effective at
+# reducing page cache usage.
source ./functions.sh
set -e
@@ -51,10 +52,10 @@ requires $SED --version
# It doesn't support --version or --help, so use 'type' instead.
requires type cachestats
-inp=file-cache-none-write.in
-out=file-cache-none-write.out
-stats1=file-cache-none-write.s1
-stats2=file-cache-none-write.s2
+inp=file-cache-none-write-effective.in
+out=file-cache-none-write-effective.out
+stats1=file-cache-none-write-effective.s1
+stats2=file-cache-none-write-effective.s2
rm -f $inp $out $stats1 $stats2
cleanup_fn rm -f $inp $out $stats1 $stats2
--
2.47.1

View File

@ -0,0 +1,101 @@
From 2772bc3e81cb0c396bde186a2581d010b8d19e5d Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 11:07:51 +0100
Subject: [PATCH] file: Hard error if sync_file_range fails
After discussion with Dave Chinner, it appears that I/O errors can be
returned by sync_file_range, ie. it is not merely a hint to start the
eviction. Therefore return a hard error if any of the sync_file_range
calls fails.
Thanks: Dave Chinner
Updates: commit d35f0636373e6a58c8f3fcfcf505af248e27c574
(cherry picked from commit d2ed6a839c3ed50b417f45dc13e5b811ecdc4e74)
---
plugins/file/file.c | 53 ++++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 25 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 638c960d..f195bcf3 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -116,7 +116,7 @@ struct write_window {
static pthread_mutex_t window_lock = PTHREAD_MUTEX_INITIALIZER;
static struct write_window window[NR_WINDOWS];
-static void
+static int
evict_writes (int fd, uint64_t offset, size_t len)
{
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
@@ -125,31 +125,32 @@ evict_writes (int fd, uint64_t offset, size_t len)
* (asynchronously).
*/
if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) {
- /* sync_file_range to start the write failed */
- nbdkit_debug ("sync_file_range: write: (ignored): %m");
+ nbdkit_error ("sync_file_range: cache=none: starting eviction: %m");
+ return -1;
}
- else {
- /* sync_file_range to start the write succeeded, so
- * evict the oldest window from the page cache.
- */
- if (window[0].len > 0) {
- if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
- SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
- SYNC_FILE_RANGE_WAIT_AFTER) == -1)
- nbdkit_debug ("sync_file_range: wait: (ignored): %m");
- if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
- POSIX_FADV_DONTNEED) == -1)
- nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
+
+ /* Evict the oldest window from the page cache. */
+ if (window[0].len > 0) {
+ if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
+ SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
+ SYNC_FILE_RANGE_WAIT_AFTER) == -1) {
+ nbdkit_error ("sync_file_range: cache=none: evicting oldest window: %m");
+ return -1;
}
-
- /* Move the Nth window to N-1. */
- memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
-
- /* Add the range to the newest end of the list of windows. */
- window[NR_WINDOWS-1].fd = fd;
- window[NR_WINDOWS-1].offset = offset;
- window[NR_WINDOWS-1].len = len;
+ if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
+ POSIX_FADV_DONTNEED) == -1)
+ nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
}
+
+ /* Move the Nth window to N-1. */
+ memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
+
+ /* Add the range to the newest end of the list of windows. */
+ window[NR_WINDOWS-1].fd = fd;
+ window[NR_WINDOWS-1].offset = offset;
+ window[NR_WINDOWS-1].len = len;
+
+ return 0;
}
/* When we close the handle we must remove any windows which are still
@@ -855,8 +856,10 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
return -1;
#if EVICT_WRITES
- if (cache_mode == cache_none)
- evict_writes (h->fd, orig_offset, orig_count);
+ if (cache_mode == cache_none) {
+ if (evict_writes (h->fd, orig_offset, orig_count) == -1)
+ return -1;
+ }
#endif
return 0;
--
2.47.1

View File

@ -0,0 +1,90 @@
From 056a975578b42e8cbfd783b8b919c4ef02e40019 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 11:56:22 +0100
Subject: [PATCH] file: Reduce the size of the lock around write eviction
Previously we held window_lock during the synchronous eviction of the
oldest window. This potentially serializes all writes in the slow
write case. We don't need to hold the lock here.
(cherry picked from commit d3d2bc45bb59a30669a3d926435cf57e99feb3a2)
---
plugins/file/file.c | 52 +++++++++++++++++++++++++++------------------
1 file changed, 31 insertions(+), 21 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index f195bcf3..e68c24ee 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -119,37 +119,47 @@ static struct write_window window[NR_WINDOWS];
static int
evict_writes (int fd, uint64_t offset, size_t len)
{
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
-
- /* Tell Linux to start writing the current range out to disk
- * (asynchronously).
- */
- if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) {
- nbdkit_error ("sync_file_range: cache=none: starting eviction: %m");
- return -1;
+ struct write_window oldest = { 0 };
+
+ {
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
+
+ /* Save oldest window[0] for eviction below, and move all windows
+ * down one. Set the newest slot to empty.
+ */
+ oldest = window[0];
+ memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
+ window[NR_WINDOWS-1].len = 0;
+
+ /* Tell Linux to start writing the current range out to disk
+ * (asynchronously).
+ */
+ if (sync_file_range (fd, offset, len, SYNC_FILE_RANGE_WRITE) == -1) {
+ nbdkit_error ("sync_file_range: cache=none: starting eviction: %m");
+ return -1;
+ }
+
+ /* Add the range to the newest end of the list of windows. */
+ window[NR_WINDOWS-1].fd = fd;
+ window[NR_WINDOWS-1].offset = offset;
+ window[NR_WINDOWS-1].len = len;
}
+ /* Release lock here. */
- /* Evict the oldest window from the page cache. */
- if (window[0].len > 0) {
- if (sync_file_range (window[0].fd, window[0].offset, window[0].len,
- SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|
+ /* Evict the oldest window from the page cache (synchronously). */
+ if (oldest.len > 0) {
+ if (sync_file_range (oldest.fd, oldest.offset, oldest.len,
+ SYNC_FILE_RANGE_WAIT_BEFORE |
+ SYNC_FILE_RANGE_WRITE |
SYNC_FILE_RANGE_WAIT_AFTER) == -1) {
nbdkit_error ("sync_file_range: cache=none: evicting oldest window: %m");
return -1;
}
- if (posix_fadvise (window[0].fd, window[0].offset, window[0].len,
+ if (posix_fadvise (oldest.fd, oldest.offset, oldest.len,
POSIX_FADV_DONTNEED) == -1)
nbdkit_debug ("posix_fadvise: POSIX_FADV_DONTNEED: (ignored): %m");
}
- /* Move the Nth window to N-1. */
- memmove (&window[0], &window[1], sizeof window[0] * (NR_WINDOWS-1));
-
- /* Add the range to the newest end of the list of windows. */
- window[NR_WINDOWS-1].fd = fd;
- window[NR_WINDOWS-1].offset = offset;
- window[NR_WINDOWS-1].len = len;
-
return 0;
}
--
2.47.1

View File

@ -0,0 +1,29 @@
From 5f5f74fa0afae78365eb431a589a6fa4938b9287 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 1 Apr 2025 16:51:24 +0100
Subject: [PATCH] file: Document implicit assumption about eviction windows
Add a comment that slots in the window list are only valid when len > 0.
This was true before but only implicit.
(cherry picked from commit 2022ac667f3c1de81b692f0128dd83a7c4999e38)
---
plugins/file/file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index e68c24ee..6bcc5537 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -110,7 +110,7 @@ static enum { cache_default, cache_none } cache_mode = cache_default;
struct write_window {
int fd;
uint64_t offset;
- size_t len;
+ size_t len; /* window slot only valid if len > 0 */
};
static pthread_mutex_t window_lock = PTHREAD_MUTEX_INITIALIZER;
--
2.47.1

View File

@ -0,0 +1,33 @@
From 23a2ec7a574dda51a5b4bd2ddef3dcc2b2c8b8f2 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sat, 5 Apr 2025 09:02:03 +0100
Subject: [PATCH] server: Turn flush into a controlpath message
The server debug flags -D nbdkit.backend.controlpath=<bool> and
-D nbdkit.backend.datapath=<bool> control the verbosity of messages.
'flush' was categorized as a datapath message, but it's more arguably
a controlpath message, and anyway it is rare and useful to see in
virt-v2v output even when datapath messages are suppressed.
(cherry picked from commit 079c8a91bf5161614916470dcb1f52bee8bfb397)
---
server/backend.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/server/backend.c b/server/backend.c
index 6232b69d..1f1bcfce 100644
--- a/server/backend.c
+++ b/server/backend.c
@@ -693,7 +693,7 @@ backend_flush (struct context *c,
assert (c->handle && (c->state & HANDLE_CONNECTED));
assert (c->can_flush == 1);
assert (flags == 0);
- datapath_debug ("%s: flush", b->name);
+ controlpath_debug ("%s: flush", b->name);
r = b->flush (c, flags, err);
if (r == -1)
--
2.47.1

View File

@ -0,0 +1,26 @@
From a79bf57c8ec805516e8dbe7995aa2bd46b83ade3 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:03:06 +0100
Subject: [PATCH] file: Fix minor typo in debug message
(cherry picked from commit a75db5636b94c9184f8eb02fd51182d935df64a6)
---
plugins/file/file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 6bcc5537..71b349ac 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -924,7 +924,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
if (r == 0) {
if (file_debug_zero)
- nbdkit_debug ("h->can_zero-range: "
+ nbdkit_debug ("h->can_zero_range: "
"zero succeeded using fallocate");
goto out;
}
--
2.47.1

View File

@ -0,0 +1,36 @@
From 1cb341e75c1a17553b69ea8d9889662e6d09ae78 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:21:23 +0100
Subject: [PATCH] file: Add more debugging when -D file.zero=1 is used
(cherry picked from commit ecf6b15fa84a02b74ea969f06552c82ee418b9b4)
---
plugins/file/file.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 71b349ac..378f6988 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -879,7 +879,17 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
static int
do_fallocate (int fd, int mode_, off_t offset, off_t len)
{
- int r = fallocate (fd, mode_, offset, len);
+ int r;
+
+ r = fallocate (fd, mode_, offset, len);
+
+ if (file_debug_zero)
+ nbdkit_debug ("fallocate ([%s%s ], %" PRIu64 ", %" PRIu64") => %d (%d)",
+ mode_ & FALLOC_FL_PUNCH_HOLE ? " FALLOC_FL_PUNCH_HOLE" : "",
+ mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "",
+ (uint64_t) offset, (uint64_t) len, r,
+ r == -1 ? errno : 0);
+
if (r == -1 && errno == ENODEV) {
/* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
with EOPNOTSUPP in this case. Normalize errno to simplify callers. */
--
2.47.1

View File

@ -0,0 +1,43 @@
From 664e447d858a21304610db3023cc728db0c974bd Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:32:17 +0100
Subject: [PATCH] file: Fix comment style in a few places
No actual change here.
(cherry picked from commit 0df4142c4be2b059c4d17aae0ec71f16ffc9ba35)
---
plugins/file/file.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 378f6988..01ad1ef2 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -892,7 +892,8 @@ do_fallocate (int fd, int mode_, off_t offset, off_t len)
if (r == -1 && errno == ENODEV) {
/* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
- with EOPNOTSUPP in this case. Normalize errno to simplify callers. */
+ * with EOPNOTSUPP in this case. Normalize errno to simplify callers.
+ */
errno = EOPNOTSUPP;
}
return r;
@@ -949,9 +950,10 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
#endif
#ifdef FALLOC_FL_PUNCH_HOLE
- /* If we can punch hole but may not trim, we can combine punching hole and
- * fallocate to zero a range. This is expected to be more efficient than
- * writing zeroes manually. */
+ /* If we can punch hole but may not trim, we can combine punching
+ * hole and fallocate to zero a range. This is expected to be more
+ * efficient than writing zeroes manually.
+ */
if (h->can_punch_hole && h->can_fallocate) {
int r;
--
2.47.1

View File

@ -0,0 +1,60 @@
From 4c02ff62f40497335da185cc4b45c2ba43fb609b Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:59:17 +0100
Subject: [PATCH] file: Fix do_fallocate debugging on Alpine
Alpine has some weird/old kernel that doesn't support
FALLOC_FL_ZERO_RANGE but does support FALLOC_FL_PUNCH_HOLE, so the
debugging I added in commit ecf6b15fa8 failed to compile with:
file.c: In function 'do_fallocate':
file.c:958:27: error: 'FALLOC_FL_ZERO_RANGE' undeclared (first use in this function)
958 | mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "",
| ^~~~~~~~~~~~~~~~~~~~
file.c:958:27: note: each undeclared identifier is reported only once for each function it appears in
make[3]: *** [Makefile:666: nbdkit_file_plugin_la-file.lo] Error 1
Fixes: commit ecf6b15fa84a02b74ea969f06552c82ee418b9b4
(cherry picked from commit 419a347054f81c53706637feddbc5008beab77d3)
---
plugins/file/file.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 01ad1ef2..32c5e2b7 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -875,7 +875,7 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
return 0;
}
-#if defined (FALLOC_FL_PUNCH_HOLE) || defined (FALLOC_FL_ZERO_RANGE)
+#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE)
static int
do_fallocate (int fd, int mode_, off_t offset, off_t len)
{
@@ -884,9 +884,20 @@ do_fallocate (int fd, int mode_, off_t offset, off_t len)
r = fallocate (fd, mode_, offset, len);
if (file_debug_zero)
- nbdkit_debug ("fallocate ([%s%s ], %" PRIu64 ", %" PRIu64") => %d (%d)",
+ nbdkit_debug ("fallocate (["
+#if defined(FALLOC_FL_PUNCH_HOLE)
+ "%s"
+#endif
+#if defined(FALLOC_FL_ZERO_RANGE)
+ "%s"
+#endif
+ " ], %" PRIu64 ", %" PRIu64") => %d (%d)",
+#if defined(FALLOC_FL_PUNCH_HOLE)
mode_ & FALLOC_FL_PUNCH_HOLE ? " FALLOC_FL_PUNCH_HOLE" : "",
+#endif
+#if defined(FALLOC_FL_ZERO_RANGE)
mode_ & FALLOC_FL_ZERO_RANGE ? " FALLOC_FL_ZERO_RANGE" : "",
+#endif
(uint64_t) offset, (uint64_t) len, r,
r == -1 ? errno : 0);
--
2.47.1

View File

@ -0,0 +1,65 @@
From bc4598f3d2d1ef2f4ebdf5b365ed08eff14d5654 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:26:41 +0100
Subject: [PATCH] file: Rename h->can_zeroout to h->can_blkzeroout to reflect
ioctl
Since we're calling the blockdev-specific BLKZEROOUT ioctl when this
flag is set, rename the flag.
(cherry picked from commit fba20ce06c2f0e7c4be7e52e8e1934933851dfbc)
---
plugins/file/file.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 32c5e2b7..70805bd7 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -497,7 +497,7 @@ struct handle {
bool can_punch_hole;
bool can_zero_range;
bool can_fallocate;
- bool can_zeroout;
+ bool can_blkzeroout;
};
/* Common code for opening a file by name, used by mode_filename and
@@ -703,7 +703,7 @@ file_open (int readonly)
#endif
h->can_fallocate = true;
- h->can_zeroout = h->is_block_device;
+ h->can_blkzeroout = h->is_block_device;
return h;
}
@@ -998,14 +998,14 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
#ifdef BLKZEROOUT
/* For aligned range and block device, we can use BLKZEROOUT. */
- if (h->can_zeroout && IS_ALIGNED (offset | count, h->sector_size)) {
+ if (h->can_blkzeroout && IS_ALIGNED (offset | count, h->sector_size)) {
int r;
uint64_t range[2] = {offset, count};
r = ioctl (h->fd, BLKZEROOUT, &range);
if (r == 0) {
if (file_debug_zero)
- nbdkit_debug ("h->can_zeroout && IS_ALIGNED: "
+ nbdkit_debug ("h->can_blkzeroout && IS_ALIGNED: "
"zero succeeded using BLKZEROOUT");
goto out;
}
@@ -1015,7 +1015,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
return -1;
}
- h->can_zeroout = false;
+ h->can_blkzeroout = false;
}
#endif
--
2.47.1

View File

@ -0,0 +1,38 @@
From c1984ddcc6497c4446d1bf0e8828d1259852eb74 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:30:41 +0100
Subject: [PATCH] file: zero: Document implicit order that we will try zeroing
methods
There's no substantive change here. I just pulled out the test (flags
& NBDKIT_FLAG_MAY_TRIM) into a boolean variable, and documented that
we (will) try zero-with-trim methods first.
(cherry picked from commit 61fc023f235b17f8a19302885d1613dd0a7a3793)
---
plugins/file/file.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 70805bd7..3b82e02d 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -916,9 +916,14 @@ static int
file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
{
struct handle *h __attribute__ ((unused)) = handle;
+ const bool may_trim __attribute__ ((unused)) = flags & NBDKIT_FLAG_MAY_TRIM;
+ /* These alternate zeroing methods are ordered. Methods which can
+ * trim (if may_trim is set) are tried first. Methods which can
+ * only zero are tried last.
+ */
#ifdef FALLOC_FL_PUNCH_HOLE
- if (h->can_punch_hole && (flags & NBDKIT_FLAG_MAY_TRIM)) {
+ if (may_trim && h->can_punch_hole) {
int r;
r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
--
2.47.1

View File

@ -0,0 +1,122 @@
From 396e8a97835155a620cabbcf1aabaaa1fa4a08f1 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 1 May 2025 10:36:23 +0100
Subject: [PATCH] file: zero: Use BLKDISCARD method if may_trim is set
If we're allowed to trim and we're writing to a block device,
previously we hit the case fallocate(FALLOC_FL_ZERO_RANGE) first.
This succeeds in Linux, zeroing (not trimming) the range.
However it would be better to trim in this case. Linux supports
ioctl(BLKDISCARD) on block devices, so try this method first.
Fixes: https://issues.redhat.com/browse/RHEL-89353
Reported-by: Germano Veit Michel
Thanks: Eric Blake
(cherry picked from commit 7a9ecda24906c64d9f8c7238a96cb3f686e894eb)
---
plugins/file/file.c | 50 +++++++++++++++++++++++++++++
plugins/file/nbdkit-file-plugin.pod | 5 +++
2 files changed, 55 insertions(+)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 3b82e02d..b4dec3c5 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -397,6 +397,9 @@ file_dump_plugin (void)
#ifdef BLKSSZGET
printf ("file_blksszget=yes\n");
#endif
+#ifdef BLKDISCARD
+ printf ("file_blkdiscard=yes\n");
+#endif
#ifdef BLKZEROOUT
printf ("file_blkzeroout=yes\n");
#endif
@@ -497,6 +500,7 @@ struct handle {
bool can_punch_hole;
bool can_zero_range;
bool can_fallocate;
+ bool can_blkdiscard;
bool can_blkzeroout;
};
@@ -704,6 +708,7 @@ file_open (int readonly)
h->can_fallocate = true;
h->can_blkzeroout = h->is_block_device;
+ h->can_blkdiscard = h->is_block_device;
return h;
}
@@ -944,6 +949,51 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
#endif
+#if defined(BLKDISCARD) && defined(FALLOC_FL_ZERO_RANGE)
+ /* For aligned range and block device, we can use BLKDISCARD to
+ * trim. However BLKDISCARD doesn't necessarily zero (eg for local
+ * disk) so we have to zero first and then discard.
+ *
+ * In future all Linux block devices may understand
+ * FALLOC_FL_PUNCH_HOLE which means this case would no longer be
+ * necessary, since the case above will handle it.
+ */
+ if (may_trim && h->can_blkdiscard && h->can_zero_range &&
+ IS_ALIGNED (offset | count, h->sector_size)) {
+ int r;
+ uint64_t range[2] = {offset, count};
+
+ r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count);
+ if (r == 0) {
+ /* We could use FALLOC_FL_PUNCH_HOLE here instead, but currently
+ * thin LVs do not support it (XXX 2025-04).
+ */
+ r = ioctl (h->fd, BLKDISCARD, &range);
+ if (r == 0) {
+ if (file_debug_zero)
+ nbdkit_debug ("h->can_blkdiscard && may_trim && IS_ALIGNED: "
+ "zero succeeded using BLKDISCARD");
+ goto out;
+ }
+
+ if (!is_enotsup (errno)) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ h->can_blkdiscard = false;
+ }
+ else {
+ if (!is_enotsup (errno)) {
+ nbdkit_error ("zero: %m");
+ return -1;
+ }
+
+ h->can_fallocate = false;
+ }
+ }
+#endif
+
#ifdef FALLOC_FL_ZERO_RANGE
if (h->can_zero_range) {
int r;
diff --git a/plugins/file/nbdkit-file-plugin.pod b/plugins/file/nbdkit-file-plugin.pod
index a50bef2d..0e260b7f 100644
--- a/plugins/file/nbdkit-file-plugin.pod
+++ b/plugins/file/nbdkit-file-plugin.pod
@@ -227,6 +227,11 @@ future.
If both set, the plugin may be able to efficiently zero ranges of
block devices, where the driver and block device itself supports this.
+=item C<file_blkdiscard=yes>
+
+If set, the plugin may be able to efficiently trim ranges of block
+devices, where the driver and block device itself supports this.
+
=item C<file_extents=yes>
If set, the plugin can read file extents.
--
2.47.1

View File

@ -0,0 +1,30 @@
From 2b4f411ebf4fca9c084e8fc74ed2c53debfb3614 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 29 May 2025 09:15:33 +0000
Subject: [PATCH] vddk: Debug length of extents when using -D vddk.extents=1
(cherry picked from commit a53746d326e08fae9ec1ea782df740abb48d0114)
---
plugins/vddk/worker.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 8a91250a..c61c4d16 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -375,9 +375,10 @@ add_extent (struct nbdkit_extents *extents,
return 0;
if (vddk_debug_extents)
- nbdkit_debug ("adding extent type %s at [%" PRIu64 "...%" PRIu64 "]",
+ nbdkit_debug ("adding extent type %s at [%" PRIu64 "...%" PRIu64 "] "
+ "(length %" PRIu64 ")",
is_hole ? "hole" : "allocated data",
- *position, next_position-1);
+ *position, next_position-1, length);
if (nbdkit_add_extent (extents, *position, length, type) == -1)
return -1;
--
2.47.1

View File

@ -0,0 +1,38 @@
From 70df77c1abd92fccf0c5594f613f3e375c71c2b8 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sat, 31 May 2025 08:12:32 +0100
Subject: [PATCH] cacheextents: Mark this filter as deprecated
We will remove it in nbdkit 1.46.
Its usage was removed from virt-v2v because we found that it did not
do anything:
https://github.com/libguestfs/virt-v2v/commit/48c4ce8e6cf6f1c390a48245ef0f99233f80cfe8
(cherry picked from commit 9717c47999fb2f56c3569cf1cdd4d0c160f866c1)
---
filters/cacheextents/nbdkit-cacheextents-filter.pod | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/filters/cacheextents/nbdkit-cacheextents-filter.pod b/filters/cacheextents/nbdkit-cacheextents-filter.pod
index a2b2aa51..ebf0fbc5 100644
--- a/filters/cacheextents/nbdkit-cacheextents-filter.pod
+++ b/filters/cacheextents/nbdkit-cacheextents-filter.pod
@@ -6,6 +6,14 @@ nbdkit-cacheextents-filter - cache extents
nbdkit --filter=cacheextents plugin
+=head1 DEPRECATED
+
+B<The cacheextents filter is deprecated in S<nbdkit E<ge> 1.43.10> and
+will be removed in S<nbdkit 1.46>>. There is no direct replacement,
+but as the filter only worked for a narrow and unusual range of access
+patterns it is likely that it has no effect and you can just stop
+using it.
+
=head1 DESCRIPTION
C<nbdkit-cacheextents-filter> is a filter that caches the result of last
--
2.47.1

View File

@ -0,0 +1,77 @@
From ea41c6517d0093e60b3acbc7a310758ba75211fc Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sat, 31 May 2025 08:04:41 +0100
Subject: [PATCH] include: Move some extent functions to nbdkit-common.h
No existing plugins use the extent functions nbdkit_extents_new,
nbdkit_extents_free, etc, since they are mainly useful for filters so
they can build and manipulate new lists of extents. Nevertheless
there is nothing that prevents them from being used in plugins, so
move those functions to the common header (so they appear for users of
<nbdkit-plugin.h>)
There are a couple of helper functions which are really
filter-specific so leave those in nbdkit-filter.h.
(cherry picked from commit 03792d04f270f2cffc589dd9703c9de9c3d5a65e)
---
include/nbdkit-common.h | 15 +++++++++++++++
include/nbdkit-filter.h | 15 +--------------
2 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/include/nbdkit-common.h b/include/nbdkit-common.h
index d0a5c854..9d7409e6 100644
--- a/include/nbdkit-common.h
+++ b/include/nbdkit-common.h
@@ -152,7 +152,22 @@ NBDKIT_EXTERN_DECL (const char *, nbdkit_vprintf_intern,
(const char *msg, va_list args)
ATTRIBUTE_FORMAT_PRINTF (1, 0));
+/* Extent functions. */
+struct nbdkit_extent {
+ uint64_t offset;
+ uint64_t length;
+ uint32_t type;
+};
+
struct nbdkit_extents;
+
+NBDKIT_EXTERN_DECL (struct nbdkit_extents *, nbdkit_extents_new,
+ (uint64_t start, uint64_t end));
+NBDKIT_EXTERN_DECL (void, nbdkit_extents_free, (struct nbdkit_extents *));
+NBDKIT_EXTERN_DECL (size_t, nbdkit_extents_count,
+ (const struct nbdkit_extents *));
+NBDKIT_EXTERN_DECL (struct nbdkit_extent, nbdkit_get_extent,
+ (const struct nbdkit_extents *, size_t));
NBDKIT_EXTERN_DECL (int, nbdkit_add_extent,
(struct nbdkit_extents *,
uint64_t offset, uint64_t length, uint32_t type));
diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h
index a4ac09d4..31520bf5 100644
--- a/include/nbdkit-filter.h
+++ b/include/nbdkit-filter.h
@@ -121,20 +121,7 @@ struct nbdkit_next_ops {
*/
};
-/* Extent functions. */
-struct nbdkit_extent {
- uint64_t offset;
- uint64_t length;
- uint32_t type;
-};
-
-NBDKIT_EXTERN_DECL (struct nbdkit_extents *, nbdkit_extents_new,
- (uint64_t start, uint64_t end));
-NBDKIT_EXTERN_DECL (void, nbdkit_extents_free, (struct nbdkit_extents *));
-NBDKIT_EXTERN_DECL (size_t, nbdkit_extents_count,
- (const struct nbdkit_extents *));
-NBDKIT_EXTERN_DECL (struct nbdkit_extent, nbdkit_get_extent,
- (const struct nbdkit_extents *, size_t));
+/* Extent helpers. */
NBDKIT_EXTERN_DECL (struct nbdkit_extents *, nbdkit_extents_full,
(nbdkit_next *next,
uint32_t count, uint64_t offset,
--
2.47.1

View File

@ -0,0 +1,29 @@
From dde9c60307ca0cefcc8109dc1ef71dee8144d931 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 May 2025 13:29:28 +0100
Subject: [PATCH] vddk: Display command type in command completed message
Useful extra debugging.
(cherry picked from commit 81d4d74fecf3c071e144a8ba016f43ba1de1b093)
---
plugins/vddk/worker.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index c61c4d16..3bf1d5c2 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -116,7 +116,8 @@ complete_command (void *vp, VixError result)
struct command *cmd = vp;
if (vddk_debug_datapath)
- nbdkit_debug ("command %" PRIu64 " completed", cmd->id);
+ nbdkit_debug ("command %" PRIu64 " (%s) completed",
+ cmd->id, command_type_string (cmd->type));
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex);
--
2.47.1

View File

@ -0,0 +1,41 @@
From 6bfb320ff03dd89d1b9b584516b7e20830d2cdb5 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 May 2025 13:32:00 +0100
Subject: [PATCH] vddk: Cache the readonly flag from the .open call in the
handle
(cherry picked from commit 0d953ea644f44259edb19c97e3c7863794c0ca1c)
---
plugins/vddk/vddk.c | 1 +
plugins/vddk/vddk.h | 3 +++
2 files changed, 4 insertions(+)
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 2a787453..468971f4 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -699,6 +699,7 @@ vddk_open (int readonly)
nbdkit_error ("calloc: %m");
return NULL;
}
+ h->readonly = readonly;
h->commands = (command_queue) empty_vector;
pthread_mutex_init (&h->commands_lock, NULL);
pthread_cond_init (&h->commands_cond, NULL);
diff --git a/plugins/vddk/vddk.h b/plugins/vddk/vddk.h
index 1d1069cc..3586c5da 100644
--- a/plugins/vddk/vddk.h
+++ b/plugins/vddk/vddk.h
@@ -166,6 +166,9 @@ struct vddk_handle {
pthread_cond_t commands_cond; /* condition (queue size 0 -> 1) */
uint64_t id; /* next command ID */
+ /* Cached readonly flag from the open call. */
+ int readonly;
+
/* Cached disk size in bytes (set in get_size()). */
uint64_t size;
};
--
2.47.1

View File

@ -0,0 +1,250 @@
From ddd507afe350e5c4470c49e4e714914032c535c8 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 May 2025 13:49:10 +0100
Subject: [PATCH] vddk: Move minimum version of VDDK to 6.7
We can now assume that QueryAllocatedBlocks exists, as well as a
couple of other functions, simplifying the code.
This was released in 2018 (7 years ago) so there's been plenty of time
to upgrade.
(cherry picked from commit 7c90116664b1b1a3c3756d6a79d6d483bc6062dd)
---
plugins/vddk/nbdkit-vddk-plugin.pod | 8 ++---
plugins/vddk/vddk-stubs.h | 32 +++++++++++---------
plugins/vddk/vddk.c | 43 ++++++---------------------
plugins/vddk/worker.c | 14 ++-------
tests/dummy-vddk.c | 45 +++++++++++++++++++++++++++--
5 files changed, 76 insertions(+), 66 deletions(-)
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
index 19f25423..e937942c 100644
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
@@ -396,15 +396,13 @@ is I<not> recommended.
=over 4
-=item VDDK E<ge> 6.5 (released Nov 2016)
+=item VDDK E<ge> 6.7 (released Apr 2018)
This is the minimum version of VDDK supported. Older versions will
not work.
-=item VDDK 6.7 (released Apr 2018)
-
-This is the first version that supported the
-C<VixDiskLib_QueryAllocatedBlocks> API. This is required to provide
+This is also the first version that supported the
+C<VixDiskLib_QueryAllocatedBlocks> API. This is used to provide
sparseness (extent) information over NBD.
=item VDDK 8.0.2.1 (released Feb 2024)
diff --git a/plugins/vddk/vddk-stubs.h b/plugins/vddk/vddk-stubs.h
index 8620d11a..658c11f1 100644
--- a/plugins/vddk/vddk-stubs.h
+++ b/plugins/vddk/vddk-stubs.h
@@ -129,16 +129,22 @@ STUB (VixDiskLib_Wait,
VixError,
(VixDiskLibHandle handle));
-/* Added in VDDK 6.7, these will be NULL for earlier versions: */
-OPTIONAL_STUB (VixDiskLib_QueryAllocatedBlocks,
- VixError,
- (VixDiskLibHandle diskHandle,
- uint64_t start_sector, uint64_t nr_sectors,
- uint64_t chunk_size,
- VixDiskLibBlockList **block_list));
-OPTIONAL_STUB (VixDiskLib_FreeBlockList,
- VixError,
- (VixDiskLibBlockList *block_list));
-OPTIONAL_STUB (VixDiskLib_AllocateConnectParams,
- VixDiskLibConnectParams *,
- (void));
+/* Added in VDDK 6.7. */
+STUB (VixDiskLib_QueryAllocatedBlocks,
+ VixError,
+ (VixDiskLibHandle diskHandle,
+ uint64_t start_sector, uint64_t nr_sectors,
+ uint64_t chunk_size,
+ VixDiskLibBlockList **block_list));
+STUB (VixDiskLib_FreeBlockList,
+ VixError,
+ (VixDiskLibBlockList *block_list));
+STUB (VixDiskLib_AllocateConnectParams,
+ VixDiskLibConnectParams *,
+ (void));
+
+/* OPTIONAL_STUB can be used to add new APIs which are not supported
+ * by older versions of VDDK, and to make them NULL if not present.
+ * However VDDK >= 6.7 has everything we need for now so we are no
+ * longer using this macro.
+ */
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 468971f4..9d49203c 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -652,37 +652,6 @@ vddk_dump_plugin (void)
/* Lock protecting open/close calls - see above. */
static pthread_mutex_t open_close_lock = PTHREAD_MUTEX_INITIALIZER;
-static inline VixDiskLibConnectParams *
-allocate_connect_params (void)
-{
- VixDiskLibConnectParams *ret;
-
- if (VixDiskLib_AllocateConnectParams != NULL) {
- VDDK_CALL_START (VixDiskLib_AllocateConnectParams, "")
- ret = VixDiskLib_AllocateConnectParams ();
- VDDK_CALL_END (VixDiskLib_AllocateConnectParams, 0);
- }
- else
- ret = calloc (1, sizeof (VixDiskLibConnectParams));
-
- return ret;
-}
-
-static inline void
-free_connect_params (VixDiskLibConnectParams *params)
-{
- /* Only use FreeConnectParams if AllocateConnectParams was
- * originally called. Otherwise use free.
- */
- if (VixDiskLib_AllocateConnectParams != NULL) {
- VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params")
- VixDiskLib_FreeConnectParams (params);
- VDDK_CALL_END (VixDiskLib_FreeConnectParams, 0);
- }
- else
- free (params);
-}
-
/* Create the per-connection handle. */
static void *
vddk_open (int readonly)
@@ -704,7 +673,9 @@ vddk_open (int readonly)
pthread_mutex_init (&h->commands_lock, NULL);
pthread_cond_init (&h->commands_cond, NULL);
- h->params = allocate_connect_params ();
+ VDDK_CALL_START (VixDiskLib_AllocateConnectParams, "")
+ h->params = VixDiskLib_AllocateConnectParams ();
+ VDDK_CALL_END (VixDiskLib_AllocateConnectParams, 0);
if (h->params == NULL) {
nbdkit_error ("allocate VixDiskLibConnectParams: %m");
goto err0;
@@ -837,7 +808,9 @@ vddk_open (int readonly)
VixDiskLib_Disconnect (h->connection);
VDDK_CALL_END (VixDiskLib_Disconnect, 0);
err1:
- free_connect_params (h->params);
+ VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params")
+ VixDiskLib_FreeConnectParams (h->params);
+ VDDK_CALL_END (VixDiskLib_FreeConnectParams, 0);
err0:
pthread_mutex_destroy (&h->commands_lock);
pthread_cond_destroy (&h->commands_cond);
@@ -863,7 +836,9 @@ vddk_close (void *handle)
VixDiskLib_Disconnect (h->connection);
VDDK_CALL_END (VixDiskLib_Disconnect, 0);
- free_connect_params (h->params);
+ VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params")
+ VixDiskLib_FreeConnectParams (h->params);
+ VDDK_CALL_END (VixDiskLib_FreeConnectParams, 0);
pthread_mutex_destroy (&h->commands_lock);
pthread_cond_destroy (&h->commands_cond);
command_queue_reset (&h->commands);
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 3bf1d5c2..076b2bd7 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -309,23 +309,13 @@ do_can_extents (struct command *cmd, struct vddk_handle *h)
VixError err;
VixDiskLibBlockList *block_list;
- /* This call was added in VDDK 6.7. In earlier versions the
- * function pointer will be NULL and we cannot query extents.
- */
- if (VixDiskLib_QueryAllocatedBlocks == NULL) {
- nbdkit_debug ("can_extents: VixDiskLib_QueryAllocatedBlocks == NULL, "
- "probably this is VDDK < 6.7");
- return 0;
- }
-
/* Suppress errors around this call. See:
* https://bugzilla.redhat.com/show_bug.cgi?id=1709211#c7
*/
error_suppression = 1;
- /* However even when the call is available it rarely works well so
- * the best thing we can do here is to try the call and if it's
- * non-functional return false.
+ /* Try the QueryAllocatedBlocks call and if it's non-functional
+ * return false.
*/
VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
"handle, 0, %d sectors, %d sectors",
diff --git a/tests/dummy-vddk.c b/tests/dummy-vddk.c
index 8b45ea6b..5247173b 100644
--- a/tests/dummy-vddk.c
+++ b/tests/dummy-vddk.c
@@ -110,11 +110,52 @@ VixDiskLib_FreeErrorText (char *text)
free (text);
}
+NBDKIT_DLL_PUBLIC VixError
+VixDiskLib_QueryAllocatedBlocks (VixDiskLibHandle diskHandle,
+ uint64_t start_sector, uint64_t nr_sectors,
+ uint64_t chunk_size,
+ VixDiskLibBlockList **block_list)
+{
+ VixDiskLibBlockList *ret;
+
+ /* This is safe because ret->blocks is a 1-sized array and we only
+ * use 1 entry here.
+ */
+ ret = calloc (1, sizeof *ret);
+ if (ret == NULL)
+ abort ();
+
+ /* Pretend it's all allocated. */
+ ret->numBlocks = 1;
+ ret->blocks[0].offset = start_sector;
+ ret->blocks[0].length = nr_sectors;
+
+ *block_list = ret;
+ return VIX_OK;
+}
+
+NBDKIT_DLL_PUBLIC VixError
+VixDiskLib_FreeBlockList (VixDiskLibBlockList *block_list)
+{
+ free (block_list);
+ return VIX_OK;
+}
+
+NBDKIT_DLL_PUBLIC VixDiskLibConnectParams *
+VixDiskLib_AllocateConnectParams (void)
+{
+ VixDiskLibConnectParams *ret;
+
+ ret = calloc (1, sizeof *ret);
+ if (ret == NULL)
+ abort ();
+ return ret;
+}
+
NBDKIT_DLL_PUBLIC void
VixDiskLib_FreeConnectParams (VixDiskLibConnectParams *params)
{
- /* never called since we don't define optional AllocateConnectParams */
- abort ();
+ free (params);
}
NBDKIT_DLL_PUBLIC VixError
--
2.47.1

View File

@ -0,0 +1,78 @@
From 8d5ab5acd2d111f6cf02dbce4ad034b49b86bc35 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 May 2025 13:59:28 +0100
Subject: [PATCH] vddk: Unconditionally test QueryAllocatedBlocks
Except in rare cases like a suddenly dropped connection, we always
call vddk_can_extents and therefore do this test. We might as well do
it unconditionally when the worker thread starts up.
This simplifies following commits where we may do more work based on
this flag when the worker thread starts up.
(cherry picked from commit 787db3e8ecfd81b683f541065daee75665ab47e0)
---
plugins/vddk/worker.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 076b2bd7..6efcc6f6 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -303,8 +303,13 @@ do_flush (struct command *cmd, struct vddk_handle *h)
return 0;
}
-static int
-do_can_extents (struct command *cmd, struct vddk_handle *h)
+/* Try the QueryAllocatedBlocks call and if it's non-functional return
+ * false. At some point in future, perhaps when we move to baseline
+ * VDDK >= 7, we can just assume it works and remove this test
+ * entirely.
+ */
+static bool
+test_can_extents (struct vddk_handle *h)
{
VixError err;
VixDiskLibBlockList *block_list;
@@ -314,9 +319,6 @@ do_can_extents (struct command *cmd, struct vddk_handle *h)
*/
error_suppression = 1;
- /* Try the QueryAllocatedBlocks call and if it's non-functional
- * return false.
- */
VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
"handle, 0, %d sectors, %d sectors",
VIXDISKLIB_MIN_CHUNK_SIZE, VIXDISKLIB_MIN_CHUNK_SIZE)
@@ -502,6 +504,10 @@ vddk_worker_thread (void *handle)
{
struct vddk_handle *h = handle;
bool stop = false;
+ bool can_extents;
+
+ /* Test if QueryAllocatedBlocks will work. */
+ can_extents = test_can_extents (h);
while (!stop) {
struct command *cmd;
@@ -544,12 +550,13 @@ vddk_worker_thread (void *handle)
break;
case CAN_EXTENTS:
- r = do_can_extents (cmd, h);
- if (r >= 0)
- *(int *)cmd->ptr = r;
+ *(int *)cmd->ptr = can_extents;
+ r = 0;
break;
case EXTENTS:
+ /* If we returned false above, we should never be called here. */
+ assert (can_extents);
r = do_extents (cmd, h);
break;
--
2.47.1

View File

@ -0,0 +1,349 @@
From 9bcda0ca197a20db8675253957fee954f362e689 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 May 2025 15:06:07 +0100
Subject: [PATCH] vddk: Pre-cache the extents for readonly connections
As explained in detail in the code comment, QueryAllocatedBlocks has
very poor performance. We can partially work around this by
pre-caching extents when the first NBD_BLOCK_STATUS request is made.
We only do this for readonly connections, since it would be very
complex to do it for writable connections where the extent information
could change under us. And we only do it on the first
NBD_BLOCK_STATUS request, so we know that the caller is interested in
extents.
Benchmarking
------------
This improves performance, dramatically in some cases:
Size Used% [ni] [nm] [nc] [qi] [qm] [qc] [dd]
before 64G 16% 17 21 354 6 62 180
after " " 18 13 59 6 7 66 57
nc=178 MBytes/s dd=1150 MBytes/sec
before 128G 5.5% 17 29 646 6 50 151
after " " 17 14 68 6 8 71
nc=106 MBytes/s
before 128G 45% 17 30 1073 6 52 578
after " " 17 14 457 6 8 506 143
nc=128 MBytes/s dd=409 MBytes/sec
(all times in seconds)
[ni] nbdinfo $uri
Note: Makes two connections, unlike qemu-img info.
[nm] nbdinfo --map --totals $uri
Note: Slower than it ought to be, needs investigation.
[nc] nbdcopy -p $uri null:
[qi] qemu-img info $uri
[qm] qemu-img map $uri
[qc] qemu-img convert -p -n -f raw $uri \
-O raw 'json:{"file.driver":"null-co","file.size":"1E"}'
Note: Requests the map up front, which is why it performs better
than nbdcopy on the "before" version since reads are not being
serialized by concurrent calls to QueryAllocatedBlocks.
[dd] dd if=*-flat.vmdk of=/dev/null bs=16777216
Note: This command was run on the ESXi server where the storage
is assumed to be local. ESXi has very limited tools available
(eg. no "fio" etc). Also the cp command is from Busybox and is
notably slower than dd. To get the accurate copying rate I
assumed that this command copies all data on disk to /dev/null,
skipping reading holes where thin provisioned. IOW using the "ls
-s" output as the number of blocks read.
It should be noted that the after/[nc] numbers are not very stable.
In the last test where [nc] = 457, I see a deviation of as much as 10%
either side over multiple runs.
The network used in the test is 25 Gbps and clearly we are nowhere
near able to reach that. A more likely upper limit is the speed of
reading from the disks ([dd]). There is also a large gap between our
performance and that number. VMware is thought to impose a
per-connection limit of around 1 Gbps on NFC connections, and there
are other limitations
(https://knowledge.broadcom.com/external/article/307001/nfc-performance-is-slow-resulting-in-mig.html).
Tuning NFC makes no observable difference
-----------------------------------------
Further tuning of NFC is possible
(https://techdocs.broadcom.com/us/en/vmware-cis/vsphere/vsphere-supervisor/7-0/best-practices-for-nbd-transport.html).
Using compression (nbdkit vddk plugin 'compression' option) is
possible but in my test it makes things much slower. This is using
the first VM from the tests above:
[nc]
(no compression) 59 (178 MBytes/sec)
compression=zlib 323 ( 33 MBytes/sec)
VMware documentation also suggests using a configuration file
containing the entries below (the configuration file is placed
somewhere on the client, and referenced using the
config=/path/to/config.ini parameter):
vixDiskLib.nfcAio.Session.BufSizeIn64KB=32
vixDiskLib.nfcAio.Session.BufCount=16
This made no difference for me, at least when testing a single
conversion. Separate tests done by the MTV team suggest it may
improve performance if you are converting multiple disks / VMs in
parallel
(https://docs.redhat.com/en/documentation/migration_toolkit_for_virtualization/2.7/html/installing_and_using_the_migration_toolkit_for_virtualization/mtv-performance-recommendation_mtv#mtv-aio-buffer-key-findings_mtv)
Some VMware documentation also suggests:
config.nfc.useSSL=false
but this also made no difference.
Some VMware documentation suggests using unbuffered I/O
(unbuffered=true) but in my test this caused a large slow down.
Continue to disable multi-conn
------------------------------
We have recommended against using multi-conn with the VDDK plugin,
because we observed some slow down. This commit makes no difference
to this advice. The same amount of slow down is still observed. (In
virt-v2v we use --filter=multi-conn multi-conn-mode=disable to ensure
it is never used.)
(cherry picked from commit 5a882e74cae3dbaa09bf3b942a02f9947b12f6e5)
---
plugins/vddk/vddk.c | 2 +
plugins/vddk/vddk.h | 3 +
plugins/vddk/worker.c | 166 +++++++++++++++++++++++++++++++++++++++++-
3 files changed, 170 insertions(+), 1 deletion(-)
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 9d49203c..bbf0af31 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -829,6 +829,8 @@ vddk_close (void *handle)
send_command_and_wait (h, &stop_cmd);
pthread_join (h->thread, NULL);
+ nbdkit_extents_free (h->extents);
+
VDDK_CALL_START (VixDiskLib_Close, "handle")
VixDiskLib_Close (h->handle);
VDDK_CALL_END (VixDiskLib_Close, 0);
diff --git a/plugins/vddk/vddk.h b/plugins/vddk/vddk.h
index 3586c5da..461fb528 100644
--- a/plugins/vddk/vddk.h
+++ b/plugins/vddk/vddk.h
@@ -171,6 +171,9 @@ struct vddk_handle {
/* Cached disk size in bytes (set in get_size()). */
uint64_t size;
+
+ /* Cached extents for readonly disks. */
+ struct nbdkit_extents *extents;
};
/* reexec.c */
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 6efcc6f6..3925fb91 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -37,6 +37,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <inttypes.h>
+#include <time.h>
#include <pthread.h>
@@ -380,7 +381,7 @@ add_extent (struct nbdkit_extents *extents,
}
static int
-do_extents (struct command *cmd, struct vddk_handle *h)
+get_extents_slow (struct command *cmd, struct vddk_handle *h)
{
const uint32_t count = cmd->count;
const uint64_t offset = cmd->offset;
@@ -496,6 +497,169 @@ do_extents (struct command *cmd, struct vddk_handle *h)
return 0;
}
+static int
+pre_cache_extents (struct vddk_handle *h)
+{
+ struct nbdkit_extents *extents;
+ uint64_t start_sector = 0;
+ uint64_t nr_chunks_remaining =
+ h->size / VIXDISKLIB_MIN_CHUNK_SIZE / VIXDISKLIB_SECTOR_SIZE;
+ uint64_t position = 0;
+
+ extents = nbdkit_extents_new (0, h->size);
+ if (extents == NULL)
+ return -1;
+
+ /* Scan through the disk reading whole "chunks" (32 GB), the most
+ * efficient way to use QueryAllocatedBlocks.
+ */
+ while (nr_chunks_remaining > 0) {
+ VixError err;
+ uint32_t i;
+ uint64_t nr_chunks, nr_sectors;
+ VixDiskLibBlockList *block_list;
+
+ assert (IS_ALIGNED (start_sector, VIXDISKLIB_MIN_CHUNK_SIZE));
+
+ nr_chunks = MIN (nr_chunks_remaining, VIXDISKLIB_MAX_CHUNK_NUMBER);
+ nr_sectors = nr_chunks * VIXDISKLIB_MIN_CHUNK_SIZE;
+
+ VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
+ "handle, %" PRIu64 " sectors, %" PRIu64 " sectors, "
+ "%d sectors",
+ start_sector, nr_sectors, VIXDISKLIB_MIN_CHUNK_SIZE)
+ err = VixDiskLib_QueryAllocatedBlocks (h->handle,
+ start_sector, nr_sectors,
+ VIXDISKLIB_MIN_CHUNK_SIZE,
+ &block_list);
+ VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
+ if (err != VIX_OK) {
+ VDDK_ERROR (err, "VixDiskLib_QueryAllocatedBlocks");
+ nbdkit_extents_free (extents);
+ return -1;
+ }
+
+ for (i = 0; i < block_list->numBlocks; ++i) {
+ uint64_t blk_offset, blk_length;
+
+ blk_offset = block_list->blocks[i].offset * VIXDISKLIB_SECTOR_SIZE;
+ blk_length = block_list->blocks[i].length * VIXDISKLIB_SECTOR_SIZE;
+
+ /* The query returns allocated blocks. We must insert holes
+ * between the blocks as necessary.
+ */
+ if ((position < blk_offset &&
+ add_extent (extents, &position, blk_offset, true) == -1) ||
+ (add_extent (extents,
+ &position, blk_offset + blk_length, false) == -1)) {
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
+ VixDiskLib_FreeBlockList (block_list);
+ VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
+ nbdkit_extents_free (extents);
+ return -1;
+ }
+ }
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
+ VixDiskLib_FreeBlockList (block_list);
+ VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
+
+ /* There's an implicit hole after the returned list of blocks,
+ * up to the end of the QueryAllocatedBlocks request.
+ */
+ if (add_extent (extents,
+ &position,
+ (start_sector + nr_sectors) * VIXDISKLIB_SECTOR_SIZE,
+ true) == -1) {
+ nbdkit_extents_free (extents);
+ return -1;
+ }
+
+ start_sector += nr_sectors;
+ nr_chunks_remaining -= nr_chunks;
+ }
+
+ /* Add the allocated unaligned bit at the end. */
+ if (position < h->size) {
+ if (add_extent (extents, &position, h->size, false) == -1) {
+ nbdkit_extents_free (extents);
+ return -1;
+ }
+ }
+
+ /* Save the pre-cached extents in the handle. */
+ h->extents = extents;
+ return 0;
+}
+
+static int
+get_extents_from_cache (struct command *cmd, struct vddk_handle *h)
+{
+ struct nbdkit_extents *rextents = cmd->ptr;
+ struct nbdkit_extent e;
+ size_t i;
+
+ /* We can just copy from the pre-cached extents in the handle which
+ * cover the entire disk, into the returned extents, because
+ * nbdkit_add_extent does the right thing.
+ */
+ for (i = 0; i < nbdkit_extents_count (h->extents); ++i) {
+ e = nbdkit_get_extent (h->extents, i);
+ if (nbdkit_add_extent (rextents, e.offset, e.length, e.type) == -1)
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Handle extents.
+ *
+ * Oh QueryAllocatedBlocks, how much I hate you. The API has two
+ * enormous problems: (a) It's slow, taking about 1 second per
+ * invocation regardless of how much or little data you request. (b)
+ * It serialises all other requests to the disk, like concurrent
+ * reads.
+ *
+ * NBD / nbdkit doesn't help much either by having a 4GB - 1 byte
+ * limit on the size of extent requests. This means that for each 4GB
+ * of disk, we will need to run QueryAllocatedBlocks twice. For a 1TB
+ * virtual disk, about 500 seconds would be used directly in the API
+ * calls, and much more time is lost because of serialization.
+ *
+ * To work around these problems, in the readonly case (used by
+ * virt-v2v), when the first NBD_BLOCK_STATUS request is received, we
+ * will read over the whole disk and cache the extents. We will read
+ * in 32 GB chunks (the maximum possible for the underlying
+ * QueryAllocatedBlocks API). For a 1TB disk this will take ~ 30
+ * seconds, but avoids all the overheads above. The cached extents
+ * are stored in the handle, and subsequent NBD_BLOCK_STATUS will use
+ * the cache only.
+ *
+ * For writable disks we can't easily do any caching so don't attempt
+ * it.
+ */
+static int
+do_extents (struct command *cmd, struct vddk_handle *h)
+{
+ if (h->readonly && !h->extents) {
+ time_t start_t, end_t;
+
+ time (&start_t);
+ nbdkit_debug ("vddk: pre-caching extents");
+
+ if (pre_cache_extents (h) == -1)
+ return -1;
+
+ time (&end_t);
+ nbdkit_debug ("vddk: finished pre-caching extents in %d second(s)",
+ (int) (end_t - start_t));
+ }
+
+ if (h->extents)
+ return get_extents_from_cache (cmd, h);
+ else
+ return get_extents_slow (cmd, h);
+}
+
/* Background worker thread, one per connection, which is where the
* VDDK commands are issued.
*/
--
2.47.1

View File

@ -0,0 +1,241 @@
From 0d7f579afc815049a6a35e1ecdd34f1857b2784b Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Wed, 4 Jun 2025 21:00:47 +0100
Subject: [PATCH] file: Save the filename or equivalent in the file handle
This will allow us to improve error messages.
To make this change easier by reducing duplication, I also implemented
a common error path in the file_open function.
(cherry picked from commit 83ad77d28d598e4b85809e7225e754e64314f802)
---
plugins/file/file.c | 108 +++++++++++++++++++++++++-------------------
1 file changed, 62 insertions(+), 46 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index b4dec3c5..f96d8a8c 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -493,6 +493,7 @@ file_list_exports (int readonly, int default_only,
/* The per-connection handle. */
struct handle {
int fd;
+ char *name; /* Filename or equivalent. */
struct stat statbuf;
bool is_block_device;
int sector_size;
@@ -548,7 +549,6 @@ static void *
file_open (int readonly)
{
struct handle *h;
- const char *file;
h = malloc (sizeof *h);
if (h == NULL) {
@@ -557,36 +557,44 @@ file_open (int readonly)
}
h->can_write = !readonly;
h->fd = -1;
+ h->name = NULL;
switch (mode) {
case mode_filename:
- file = filename;
- if (open_file_by_name (h, readonly, -1, file) == -1) {
- free (h);
- return NULL;
+ h->name = strdup (filename);
+ if (h->name == NULL) {
+ nbdkit_error ("strdup: %m");
+ goto err;
}
+ if (open_file_by_name (h, readonly, -1, h->name) == -1)
+ goto err;
break;
case mode_directory: {
int dfd;
+ const char *export_name = nbdkit_export_name ();
- file = nbdkit_export_name ();
- if (strchr (file, '/')) {
- nbdkit_error ("exportname cannot contain /");
- free (h);
+ if (export_name == NULL)
+ goto err;
+
+ h->name = strdup (export_name);
+ if (h->name == NULL) {
+ nbdkit_error ("strdup: %m");
+ goto err;
+ }
+ if (strchr (h->name, '/')) {
+ nbdkit_error ("%s: exportname cannot contain /", h->name);
errno = EINVAL;
- return NULL;
+ goto err;
}
dfd = open (directory, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
if (dfd == -1) {
nbdkit_error ("open %s: %m", directory);
- free (h);
- return NULL;
+ goto err;
}
- if (open_file_by_name (h, readonly, dfd, file) == -1) {
- free (h);
+ if (open_file_by_name (h, readonly, dfd, h->name) == -1) {
close (dfd);
- return NULL;
+ goto err;
}
close (dfd);
break;
@@ -595,14 +603,14 @@ file_open (int readonly)
case mode_fd: {
int r;
- /* This is needed for error messages. */
- file = "<file descriptor>";
-
+ if (asprintf (&h->name, "fd=%d", filedesc) == -1) {
+ nbdkit_error ("asprintf: %m");
+ goto err;
+ }
h->fd = dup (filedesc);
if (h->fd == -1) {
- nbdkit_error ("dup fd=%d: %m", filedesc);
- free (h);
- return NULL;
+ nbdkit_error ("dup: %s: %m", h->name);
+ goto err;
}
/* If the file descriptor is readonly then we should not advertise
@@ -610,10 +618,8 @@ file_open (int readonly)
*/
r = fcntl (h->fd, F_GETFL);
if (r == -1) {
- nbdkit_error ("fcntl: F_GETFL: %m");
- close (h->fd);
- free (h);
- return NULL;
+ nbdkit_error ("fcntl: %s: F_GETFL: %m", h->name);
+ goto err;
}
r &= O_ACCMODE;
if (r == O_RDONLY)
@@ -627,25 +633,30 @@ file_open (int readonly)
case mode_dirfd: {
int dfd;
+ const char *export_name = nbdkit_export_name ();
- file = nbdkit_export_name ();
- if (strchr (file, '/')) {
- nbdkit_error ("exportname cannot contain /");
- free (h);
+ if (export_name == NULL)
+ goto err;
+
+ h->name = strdup (export_name);
+ if (h->name == NULL) {
+ nbdkit_error ("strdup: %m");
+ goto err;
+ }
+ if (strchr (h->name, '/')) {
+ nbdkit_error ("%s: exportname cannot contain /", h->name);
errno = EINVAL;
- return NULL;
+ goto err;
}
/* We don't fork, so no need to worry about FD_CLOEXEC on the directory */
dfd = dup (filedesc);
if (dfd == -1) {
- nbdkit_error ("dup dirfd=%d: %m", filedesc);
- free (h);
- return NULL;
+ nbdkit_error ("dup: dirfd=%d: %m", filedesc);
+ goto err;
}
- if (open_file_by_name (h, readonly, dfd, file) == -1) {
- free (h);
+ if (open_file_by_name (h, readonly, dfd, h->name) == -1) {
close (dfd);
- return NULL;
+ goto err;
}
close (dfd);
break;
@@ -656,12 +667,11 @@ file_open (int readonly)
}
assert (h->fd >= 0);
+ assert (h->name != NULL);
if (fstat (h->fd, &h->statbuf) == -1) {
- nbdkit_error ("fstat: %s: %m", file);
- close (h->fd);
- free (h);
- return NULL;
+ nbdkit_error ("fstat: %s: %m", h->name);
+ goto err;
}
if (fadvise_mode != -1) {
@@ -669,7 +679,7 @@ file_open (int readonly)
#ifdef HAVE_POSIX_FADVISE
int r = posix_fadvise (h->fd, 0, 0, fadvise_mode);
if (r == -1)
- nbdkit_debug ("posix_fadvise: %s: %m (ignored)", file);
+ nbdkit_debug ("posix_fadvise: %s: %m (ignored)", h->name);
#else
nbdkit_debug ("fadvise is not supported");
#endif
@@ -680,17 +690,15 @@ file_open (int readonly)
else if (S_ISREG (h->statbuf.st_mode))
h->is_block_device = false;
else {
- nbdkit_error ("file is not regular or block device: %s", file);
- close (h->fd);
- free (h);
- return NULL;
+ nbdkit_error ("file is not regular or block device: %s", h->name);
+ goto err;
}
h->sector_size = 4096; /* Start with safe guess */
#ifdef BLKSSZGET
if (h->is_block_device) {
if (ioctl (h->fd, BLKSSZGET, &h->sector_size) == -1)
- nbdkit_debug ("cannot get sector size: %s: %m", file);
+ nbdkit_debug ("cannot get sector size: %s: %m", h->name);
}
#endif
@@ -711,6 +719,13 @@ file_open (int readonly)
h->can_blkdiscard = h->is_block_device;
return h;
+
+ err:
+ if (h->fd >= 0)
+ close (h->fd);
+ free (h->name);
+ free (h);
+ return NULL;
}
/* Free up the per-connection handle. */
@@ -723,6 +738,7 @@ file_close (void *handle)
remove_fd_from_window (h->fd);
#endif
close (h->fd);
+ free (h->name);
free (h);
}
--
2.47.1

View File

@ -0,0 +1,143 @@
From 6e5d6e5da19acfa8ba2c9fcf81651d0894f83dda Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Wed, 4 Jun 2025 21:03:58 +0100
Subject: [PATCH] file: Add the filename (or equivalent) to error messages
It helps to have this information when there's an error. It
particularly helps in two cases: (1) You don't have access to the
nbdkit command line. (2) The filename is derived from an export name
passed by the client.
(cherry picked from commit 7273bd829fc0d8e492786f8908d8ddc3d5399e06)
---
plugins/file/file.c | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index f96d8a8c..9ba61187 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -754,7 +754,7 @@ file_get_size (void *handle)
r = device_size (h->fd, &h->statbuf);
if (r == -1) {
- nbdkit_error ("device_size: %m");
+ nbdkit_error ("device_size: %s: %m", h->name);
return -1;
}
return r;
@@ -818,7 +818,7 @@ file_flush (void *handle, uint32_t flags)
struct handle *h = handle;
if (fdatasync (h->fd) == -1) {
- nbdkit_error ("fdatasync: %m");
+ nbdkit_error ("fdatasync: %s: %m", h->name);
return -1;
}
@@ -839,11 +839,11 @@ file_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
while (count > 0) {
ssize_t r = pread (h->fd, buf, count, offset);
if (r == -1) {
- nbdkit_error ("pread: %m");
+ nbdkit_error ("pread: %s: %m", h->name);
return -1;
}
if (r == 0) {
- nbdkit_error ("pread: unexpected end of file");
+ nbdkit_error ("pread: %s: unexpected end of file", h->name);
return -1;
}
buf += r;
@@ -875,7 +875,7 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
while (count > 0) {
ssize_t r = pwrite (h->fd, buf, count, offset);
if (r == -1) {
- nbdkit_error ("pwrite: %m");
+ nbdkit_error ("pwrite: %s: %m", h->name);
return -1;
}
buf += r;
@@ -957,7 +957,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %m");
+ nbdkit_error ("zero: %s: %m", h->name);
return -1;
}
@@ -993,7 +993,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %m");
+ nbdkit_error ("zero: %s: %m", h->name);
return -1;
}
@@ -1001,7 +1001,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
else {
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %m");
+ nbdkit_error ("zero: %s: %m", h->name);
return -1;
}
@@ -1023,7 +1023,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %m");
+ nbdkit_error ("zero: %s: %m", h->name);
return -1;
}
@@ -1051,14 +1051,14 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %m");
+ nbdkit_error ("zero: %s: %m", h->name);
return -1;
}
h->can_fallocate = false;
} else {
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %m");
+ nbdkit_error ("zero: %s: %m", h->name);
return -1;
}
@@ -1082,7 +1082,7 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
if (errno != ENOTTY) {
- nbdkit_error ("zero: %m");
+ nbdkit_error ("zero: %s: %m", h->name);
return -1;
}
@@ -1120,7 +1120,7 @@ file_trim (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
/* Trim is advisory; we don't care if it fails for anything other
* than EIO or EPERM. */
if (errno == EPERM || errno == EIO) {
- nbdkit_error ("fallocate: %m");
+ nbdkit_error ("fallocate: %s: %m", h->name);
return -1;
}
@@ -1241,7 +1241,7 @@ file_cache (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
r = posix_fadvise (h->fd, offset, count, POSIX_FADV_WILLNEED);
if (r) {
errno = r;
- nbdkit_error ("posix_fadvise: %m");
+ nbdkit_error ("posix_fadvise: %s: %m", h->name);
return -1;
}
return 0;
--
2.47.1

View File

@ -0,0 +1,132 @@
From 45f740e3b4a707e5d2db739b16fb33f4d7b466cf Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Wed, 4 Jun 2025 21:15:26 +0100
Subject: [PATCH] file: Add offset/count to error messages
Although the same information is available in debug logs, if verbose
messages (-v) are not used then the information is lost. As it might
be useful occasionally, ensure it is captured in errors.
(cherry picked from commit 253574354252f4a77e2df0769b721e76f1777651)
---
plugins/file/file.c | 35 ++++++++++++++++++++++++-----------
1 file changed, 24 insertions(+), 11 deletions(-)
diff --git a/plugins/file/file.c b/plugins/file/file.c
index 9ba61187..ade6f5ff 100644
--- a/plugins/file/file.c
+++ b/plugins/file/file.c
@@ -839,7 +839,8 @@ file_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
while (count > 0) {
ssize_t r = pread (h->fd, buf, count, offset);
if (r == -1) {
- nbdkit_error ("pread: %s: %m", h->name);
+ nbdkit_error ("pread: %s: offset=%" PRIu64 ", count=%" PRIu32 ": %m",
+ h->name, offset, count);
return -1;
}
if (r == 0) {
@@ -875,7 +876,8 @@ file_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
while (count > 0) {
ssize_t r = pwrite (h->fd, buf, count, offset);
if (r == -1) {
- nbdkit_error ("pwrite: %s: %m", h->name);
+ nbdkit_error ("pwrite: %s: offset=%" PRIu64 ", count=%" PRIu32 ": %m",
+ h->name, offset, count);
return -1;
}
buf += r;
@@ -957,7 +959,8 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %s: %m", h->name);
+ nbdkit_error ("zero: %s: offset=%" PRIu64 ", count=%" PRIu32 ": %m",
+ h->name, offset, count);
return -1;
}
@@ -993,7 +996,8 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %s: %m", h->name);
+ nbdkit_error ("zero: %s: offset=%" PRIu64 ", count=%" PRIu32 ": %m",
+ h->name, offset, count);
return -1;
}
@@ -1001,7 +1005,8 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
else {
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %s: %m", h->name);
+ nbdkit_error ("zero: %s: offset=%" PRIu64 ", count=%" PRIu32 ": %m",
+ h->name, offset, count);
return -1;
}
@@ -1023,7 +1028,8 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %s: %m", h->name);
+ nbdkit_error ("zero: %s: offset=%" PRIu64 ", count=%" PRIu32 ": %m",
+ h->name, offset, count);
return -1;
}
@@ -1051,14 +1057,16 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %s: %m", h->name);
+ nbdkit_error ("zero: %s: offset=%" PRIu64 ", count=%" PRIu32 ": %m",
+ h->name, offset, count);
return -1;
}
h->can_fallocate = false;
} else {
if (!is_enotsup (errno)) {
- nbdkit_error ("zero: %s: %m", h->name);
+ nbdkit_error ("zero: %s: offset=%" PRIu64 ", count=%" PRIu32 ":%m",
+ h->name, offset, count);
return -1;
}
@@ -1082,7 +1090,8 @@ file_zero (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
}
if (errno != ENOTTY) {
- nbdkit_error ("zero: %s: %m", h->name);
+ nbdkit_error ("zero: %s: offset=%" PRIu64 ", count=%" PRIu32 ": %m",
+ h->name, offset, count);
return -1;
}
@@ -1120,7 +1129,9 @@ file_trim (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
/* Trim is advisory; we don't care if it fails for anything other
* than EIO or EPERM. */
if (errno == EPERM || errno == EIO) {
- nbdkit_error ("fallocate: %s: %m", h->name);
+ nbdkit_error ("fallocate: %s: offset=%" PRIu64 ", count=%" PRIu32 ":"
+ " %m",
+ h->name, offset, count);
return -1;
}
@@ -1241,7 +1252,9 @@ file_cache (void *handle, uint32_t count, uint64_t offset, uint32_t flags)
r = posix_fadvise (h->fd, offset, count, POSIX_FADV_WILLNEED);
if (r) {
errno = r;
- nbdkit_error ("posix_fadvise: %s: %m", h->name);
+ nbdkit_error ("posix_fadvise: %s: offset=%" PRIu64 ", count=%" PRIu32 ":"
+ " %m",
+ h->name, offset, count);
return -1;
}
return 0;
--
2.47.1

View File

@ -0,0 +1,34 @@
From bac77fb3da2aa6353b5b5a71a7b86bc83289bfc6 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sun, 8 Jun 2025 10:44:32 +0100
Subject: [PATCH] =?UTF-8?q?vddk:=20stats:=20Use=20"us"=20instead=20of=20(U?=
=?UTF-8?q?nicode)=20"=C2=B5s"=20for=20microseconds?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
nbdkit_debug uses C-style escaping for non-ASCII characters in debug
strings, so in logs what we actually see is "\xc2\xb5s" which messes
up the columns.
(cherry picked from commit 1f09bb4abefe8f3f052e8c0b6b34d314887b3c32)
---
plugins/vddk/stats.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/plugins/vddk/stats.c b/plugins/vddk/stats.c
index 7f63333a..59cfee5a 100644
--- a/plugins/vddk/stats.c
+++ b/plugins/vddk/stats.c
@@ -98,7 +98,7 @@ display_stats (void)
nbdkit_debug ("VDDK function stats (-D vddk.stats=1):");
nbdkit_debug ("%-24s %15s %5s %15s",
- "VixDiskLib_...", "µs", "calls", "bytes");
+ "VixDiskLib_...", "us", "calls", "bytes");
for (i = 0; i < stats.len; ++i) {
if (stats.ptr[i].usecs) {
if (stats.ptr[i].bytes > 0)
--
2.47.1

View File

@ -0,0 +1,26 @@
From c39e0da834bf7fdd23a9be0d391c2666596988be Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sun, 8 Jun 2025 11:04:41 +0100
Subject: [PATCH] vddk: stats: Line up the columns correctly
(cherry picked from commit 7da09b07148cc12c3214b18bc96c65ed45625dde)
---
plugins/vddk/stats.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/plugins/vddk/stats.c b/plugins/vddk/stats.c
index 59cfee5a..b551fc5a 100644
--- a/plugins/vddk/stats.c
+++ b/plugins/vddk/stats.c
@@ -97,7 +97,7 @@ display_stats (void)
qsort (stats.ptr, stats.len, sizeof stats.ptr[0], stat_compare);
nbdkit_debug ("VDDK function stats (-D vddk.stats=1):");
- nbdkit_debug ("%-24s %15s %5s %15s",
+ nbdkit_debug ("%-24s %15s %5s %15s",
"VixDiskLib_...", "us", "calls", "bytes");
for (i = 0; i < stats.len; ++i) {
if (stats.ptr[i].usecs) {
--
2.47.1

View File

@ -0,0 +1,61 @@
From c721bf99917a3f33454ebdf683fa450f6d996202 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sun, 8 Jun 2025 10:40:26 +0100
Subject: [PATCH] vddk: stats: Record the byte count of each
QueryAllocatedBlocks call
In -D vddk.stats=1 output, keep track of the size of each call to
QueryAllocatedBlocks, and display that at the end.
After this change, we see the number of bytes scanned in the "bytes"
column (previously this column was empty for this call):
nbdkit: debug: VixDiskLib_... us calls bytes
nbdkit: debug: Open 27051497 2
nbdkit: debug: GetInfo 12538428 4
nbdkit: debug: ConnectEx 6944819 2
nbdkit: debug: QueryAllocatedBlocks 4563503 3 21474967552
nbdkit: debug: Close 1440271 2
nbdkit: debug: Exit 1001407 1
(cherry picked from commit 2db1ede27bb529b36b0075eab337f0c585d1a7ec)
---
plugins/vddk/worker.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 3925fb91..feb8d96f 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -327,7 +327,8 @@ test_can_extents (struct vddk_handle *h)
0, VIXDISKLIB_MIN_CHUNK_SIZE,
VIXDISKLIB_MIN_CHUNK_SIZE,
&block_list);
- VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
+ VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks,
+ VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE);
error_suppression = 0;
if (err == VIX_OK) {
VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
@@ -435,7 +436,8 @@ get_extents_slow (struct command *cmd, struct vddk_handle *h)
start_sector, nr_sectors,
VIXDISKLIB_MIN_CHUNK_SIZE,
&block_list);
- VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
+ VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks,
+ nr_sectors * VIXDISKLIB_SECTOR_SIZE);
if (err != VIX_OK) {
VDDK_ERROR (err, "VixDiskLib_QueryAllocatedBlocks");
return -1;
@@ -532,7 +534,8 @@ pre_cache_extents (struct vddk_handle *h)
start_sector, nr_sectors,
VIXDISKLIB_MIN_CHUNK_SIZE,
&block_list);
- VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
+ VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks,
+ nr_sectors * VIXDISKLIB_SECTOR_SIZE);
if (err != VIX_OK) {
VDDK_ERROR (err, "VixDiskLib_QueryAllocatedBlocks");
nbdkit_extents_free (extents);
--
2.47.1

View File

@ -0,0 +1,178 @@
From 3bc36ccfafeea14e3df60e44e36731d27ba44585 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sun, 8 Jun 2025 10:59:57 +0100
Subject: [PATCH] vddk: stats: Collect elapsed time for ReadAsync and
WriteAsync
A major existing problem with -D vddk.stats=1 is that it did not
accurately account for the time taken in these asynchronous calls.
This change records the elapsed time. (It's not possible to collect
the actual time spent "working on" these calls, we just add up the
total elapsed time, which is fine.)
In the remote case, here's how the output changes from before this
change:
nbdkit: debug: VixDiskLib_... us calls bytes
nbdkit: debug: ReadAsync 18 1 8192
to after this change:
nbdkit: debug: VixDiskLib_... us calls bytes
nbdkit: debug: ReadAsync 243745 1 8192
An interesting thing about this is that in the local case this
actually reduces the apparent elapsed time, I think because VDDK calls
complete_command directly from the Read/WriteAsync function. It only
increases the apparent elapsed time in the remote case.
(cherry picked from commit a224d7fd8d839287c63cb4a063569283dd974b48)
---
plugins/vddk/nbdkit-vddk-plugin.pod | 3 +--
plugins/vddk/vddk.h | 37 ++++++++++++++++++++++-------
plugins/vddk/worker.c | 13 ++++++++--
3 files changed, 41 insertions(+), 12 deletions(-)
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
index e937942c..1e376140 100644
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
@@ -715,8 +715,7 @@ Same as above, but for writing and flushing writes.
=item C<WriteAsync>
Same as above, but for asynchronous read and write calls introduced in
-nbdkit 1.30. Unfortunately at the moment the amount of time spent in
-these calls is not accounted for correctly.
+S<nbdkit 1.30>.
=item C<QueryAllocatedBlocks>
diff --git a/plugins/vddk/vddk.h b/plugins/vddk/vddk.h
index 461fb528..5ff02649 100644
--- a/plugins/vddk/vddk.h
+++ b/plugins/vddk/vddk.h
@@ -39,6 +39,7 @@
#include <pthread.h>
+#include "cleanup.h"
#include "isaligned.h"
#include "tvdiff.h"
#include "vector.h"
@@ -91,7 +92,7 @@ extern int vddk_debug_stats;
*/
#define VDDK_CALL_START(fn, fs, ...) \
do { \
- struct timeval start_t, end_t; \
+ struct timeval start_t; \
/* GCC can optimize this away at compile time: */ \
const bool datapath = \
strcmp (#fn, "VixDiskLib_Read") == 0 || \
@@ -105,13 +106,13 @@ extern int vddk_debug_stats;
do
#define VDDK_CALL_END(fn, bytes_) \
while (0); \
- if (vddk_debug_stats) { \
- gettimeofday (&end_t, NULL); \
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&stats_lock); \
- stats_##fn.usecs += tvdiff_usec (&start_t, &end_t); \
- stats_##fn.calls++; \
- stats_##fn.bytes += bytes_; \
- } \
+ update_stats (&start_t, bytes_, &stats_##fn); \
+ } while (0)
+/* Same as VDDK_CALL_END, but for {Read,Write}Async, where we will
+ * update the stats ourself.
+ */
+#define VDDK_CALL_END_ASYNC() \
+ while (0); \
} while (0)
/* Print VDDK errors. */
@@ -141,6 +142,7 @@ struct command {
uint64_t id; /* serial number */
/* These fields are used by the internal implementation. */
+ struct timeval start_t; /* start time */
pthread_mutex_t mutex; /* completion mutex */
pthread_cond_t cond; /* completion condition */
enum { SUBMITTED, SUCCEEDED, FAILED } status;
@@ -197,6 +199,25 @@ extern pthread_mutex_t stats_lock;
#undef OPTIONAL_STUB
extern void display_stats (void);
+static inline void
+update_stats (const struct timeval *start_t, uint64_t bytes,
+ struct vddk_stat *stat)
+{
+ if (vddk_debug_stats) {
+ struct timeval end_t;
+ int64_t usecs;
+
+ gettimeofday (&end_t, NULL);
+ usecs = tvdiff_usec (start_t, &end_t);
+
+ /* Keep this critical section as small as possible. */
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&stats_lock);
+ stat->usecs += usecs;
+ stat->calls++;
+ stat->bytes += bytes;
+ }
+}
+
/* utils.c */
extern void trim (char *str);
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index feb8d96f..9d3a5940 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -120,6 +120,11 @@ complete_command (void *vp, VixError result)
nbdkit_debug ("command %" PRIu64 " (%s) completed",
cmd->id, command_type_string (cmd->type));
+ /* Update the stats for this asynchronous call. */
+ update_stats (&cmd->start_t, cmd->count,
+ cmd->type == READ ? &stats_VixDiskLib_ReadAsync :
+ &stats_VixDiskLib_WriteAsync);
+
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex);
if (result == VIX_OK) {
@@ -219,13 +224,15 @@ do_read (struct command *cmd, struct vddk_handle *h)
offset /= VIXDISKLIB_SECTOR_SIZE;
count /= VIXDISKLIB_SECTOR_SIZE;
+ gettimeofday (&cmd->start_t, NULL);
+
VDDK_CALL_START (VixDiskLib_ReadAsync,
"handle, %" PRIu64 " sectors, "
"%" PRIu32 " sectors, buffer, callback, %" PRIu64,
offset, count, cmd->id)
err = VixDiskLib_ReadAsync (h->handle, offset, count, buf,
complete_command, cmd);
- VDDK_CALL_END (VixDiskLib_ReadAsync, count * VIXDISKLIB_SECTOR_SIZE);
+ VDDK_CALL_END_ASYNC ();
if (err != VIX_ASYNC) {
VDDK_ERROR (err, "VixDiskLib_ReadAsync");
return -1;
@@ -256,13 +263,15 @@ do_write (struct command *cmd, struct vddk_handle *h)
offset /= VIXDISKLIB_SECTOR_SIZE;
count /= VIXDISKLIB_SECTOR_SIZE;
+ gettimeofday (&cmd->start_t, NULL);
+
VDDK_CALL_START (VixDiskLib_WriteAsync,
"handle, %" PRIu64 " sectors, "
"%" PRIu32 " sectors, buffer, callback, %" PRIu64,
offset, count, cmd->id)
err = VixDiskLib_WriteAsync (h->handle, offset, count, buf,
complete_command, cmd);
- VDDK_CALL_END (VixDiskLib_WriteAsync, count * VIXDISKLIB_SECTOR_SIZE);
+ VDDK_CALL_END_ASYNC ();
if (err != VIX_ASYNC) {
VDDK_ERROR (err, "VixDiskLib_WriteAsync");
return -1;
--
2.47.1

View File

@ -0,0 +1,170 @@
From 4d7da0f8b01f70ceddfd3d10345bf08284591938 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 22 Apr 2025 17:01:12 -0500
Subject: [PATCH] server: Fix off-by-one for maximum block_status length
[CVE-2025-47711]
There has been an off-by-one bug in the code for .extents since the
introduction of that callback. Remember, internally the code allows
plugins to report on extents with 64-bit lengths, but the protocol
only supports 32-bit block status calls (nbdkit will need to create
plugin version 3 before it can support NBD's newer 64-bit block
status). As such, the server loop intentionally truncates a plugin's
large extent to 2**32-1 bytes. But in the process of checking whether
the loop should exit early, or if any additional extents should be
reported to the client, the server used 'pos > offset+count' instead
of >=, which is one byte too far. If the client has requested exactly
2**32-1 bytes, and the plugin's first extent has that same length, the
code erroneously proceeds on to the plugin's second extent. Worse, if
the plugin's first extent has 2**32 bytes or more, it was truncated to
2**31-1 bytes, but not completely handled, and the failure to exit the
loop early means that the server then fails the assertion:
nbdkit: ../../server/protocol.c:505: extents_to_block_descriptors:
Assertion `e.length <= length' failed.
The single-byte fix addresses both symptoms, while the added test
demonstrates both when run on older nbdkit (the protocol violation
when the plugin returns 2**32-1 bytes in the first extent, and the
assertion failure when the plugin returns 2**32 or more bytes in the
first extent).
The problem can only be triggered by a client request for 2**32-1
bytes; anything smaller is immune. The problem also does not occur
for plugins that do not return extents information beyond the client's
request, or if the first extent is smaller than the client's request.
The ability to cause the server to die from an assertion failure can
be used as a denial of service attack against other clients.
Mitigations: if you require the use of TLS, then you can ensure that
you only have trusted clients that won't trigger a block status call
of length 2**32-1 bytes. Also, you can use "--filter=blocksize-policy
blocksize-minimum=512" to reject block status attempts from clients
that are not sector-aligned.
Fixes: 26455d45 ('server: protocol: Implement Block Status "base:allocation".', v1.11.10)
Reported-by: Nikolay Ivanets <stenavin@gmail.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20250423211953.GR1450@redhat.com>
Reviewed-by: Richard W.M. Jones <rjones@redhat.com>
(cherry picked from commit e6f96bd1b77c0cc927ce6aeff650b52238304f39)
---
server/protocol.c | 2 +-
tests/Makefile.am | 2 ++
tests/test-eval-extents.sh | 71 ++++++++++++++++++++++++++++++++++++++
3 files changed, 74 insertions(+), 1 deletion(-)
create mode 100755 tests/test-eval-extents.sh
diff --git a/server/protocol.c b/server/protocol.c
index d428bfc8..b4b1c162 100644
--- a/server/protocol.c
+++ b/server/protocol.c
@@ -499,7 +499,7 @@ extents_to_block_descriptors (struct nbdkit_extents *extents,
(*nr_blocks)++;
pos += length;
- if (pos > offset + count) /* this must be the last block */
+ if (pos >= offset + count) /* this must be the last block */
break;
/* If we reach here then we must have consumed this whole
diff --git a/tests/Makefile.am b/tests/Makefile.am
index eed96d28..9f9885b4 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -824,6 +824,7 @@ TESTS += \
test-eval.sh \
test-eval-file.sh \
test-eval-exports.sh \
+ test-eval-extents.sh \
test-eval-cache.sh \
test-eval-dump-plugin.sh \
test-eval-disconnect.sh \
@@ -832,6 +833,7 @@ EXTRA_DIST += \
test-eval.sh \
test-eval-file.sh \
test-eval-exports.sh \
+ test-eval-extents.sh \
test-eval-cache.sh \
test-eval-dump-plugin.sh \
test-eval-disconnect.sh \
diff --git a/tests/test-eval-extents.sh b/tests/test-eval-extents.sh
new file mode 100755
index 00000000..92b503e6
--- /dev/null
+++ b/tests/test-eval-extents.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+source ./functions.sh
+set -e
+set -x
+
+requires_run
+requires_plugin eval
+requires_nbdsh_uri
+requires nbdsh --base-allocation --version
+
+files="eval-extents.out"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Trigger an off-by-one bug introduced in v1.11.10 and fixed in v1.43.7
+export script='
+def f(context, offset, extents, status):
+ print(extents)
+
+# First, probe where the server should return 2 extents.
+h.block_status(2**32-1, 2, f)
+
+# Next, probe where the server has exactly 2**32-1 bytes in its first extent.
+h.block_status(2**32-1, 1, f)
+
+# Now, probe where the first extent has to be truncated.
+h.block_status(2**32-1, 0, f)
+'
+nbdkit eval \
+ get_size='echo 5G' \
+ pread='dd if=/dev/zero count=$3 iflag=count_bytes' \
+ extents='echo 0 4G 1; echo 4G 1G 2' \
+ --run 'nbdsh --base-allocation --uri "$uri" -c "$script"' \
+ > eval-extents.out
+cat eval-extents.out
+diff -u - eval-extents.out <<EOF
+[4294967294, 1, 1073741824, 2]
+[4294967295, 1]
+[4294967295, 1]
+EOF
--
2.47.1

View File

@ -0,0 +1,163 @@
From fbca97afb4139f4ae42113f1a91ba595d332d07c Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 22 Apr 2025 19:53:39 -0500
Subject: [PATCH] blocksize: Fix 32-bit overflow in .extents [CVE-2025-47712]
If the original request is larger than 2**32 - minblock, then we were
calling nbdkit_extents_aligned() with a count that rounded up then
overflowed to 0 instead of the intended 4G because of overflowing a
32-bit type, which in turn causes an assertion failure:
nbdkit: ../../server/backend.c:814: backend_extents: Assertion `backend_valid_range (c, offset, count)' failed.
The fix is to force the rounding to be in a 64-bit type from the
get-go.
The ability for a well-behaved client to cause the server to die from
an assertion failure can be used as a denial of service attack against
other clients. Mitigations: if you requrire the use of TLS, then you
can ensure that you only have trusted clients that won't trigger a
block status call that large. Also, the problem only occurs when
using the blocksize filter, although setting the filter's maxlen
parameter to a smaller value than its default of 2**32-1 does not
help.
Fixes: 2680be00 ('blocksize: Fix .extents when plugin changes type within minblock', v1.21.16)
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20250423210917.1784789-3-eblake@redhat.com>
Reviewed-by: Richard W.M. Jones <rjones@redhat.com>
(cherry picked from commit a486f88d1eea653ea88b0bf8804c4825dab25ec7)
---
filters/blocksize/blocksize.c | 5 +-
tests/Makefile.am | 2 +
tests/test-blocksize-extents-overflow.sh | 83 ++++++++++++++++++++++++
3 files changed, 88 insertions(+), 2 deletions(-)
create mode 100755 tests/test-blocksize-extents-overflow.sh
diff --git a/filters/blocksize/blocksize.c b/filters/blocksize/blocksize.c
index 09195cea..e5c8b744 100644
--- a/filters/blocksize/blocksize.c
+++ b/filters/blocksize/blocksize.c
@@ -482,8 +482,9 @@ blocksize_extents (nbdkit_next *next,
return -1;
}
- if (nbdkit_extents_aligned (next, MIN (ROUND_UP (count, h->minblock),
- h->maxlen),
+ if (nbdkit_extents_aligned (next,
+ MIN (ROUND_UP ((uint64_t) count, h->minblock),
+ h->maxlen),
ROUND_DOWN (offset, h->minblock), flags,
h->minblock, extents2, err) == -1)
return -1;
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 9f9885b4..428b65e2 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1592,12 +1592,14 @@ test_layers_filter3_la_LIBADD = $(IMPORT_LIBRARY_ON_WINDOWS)
TESTS += \
test-blocksize.sh \
test-blocksize-extents.sh \
+ test-blocksize-extents-overflow.sh \
test-blocksize-default.sh \
test-blocksize-sharding.sh \
$(NULL)
EXTRA_DIST += \
test-blocksize.sh \
test-blocksize-extents.sh \
+ test-blocksize-extents-overflow.sh \
test-blocksize-default.sh \
test-blocksize-sharding.sh \
$(NULL)
diff --git a/tests/test-blocksize-extents-overflow.sh b/tests/test-blocksize-extents-overflow.sh
new file mode 100755
index 00000000..844c3999
--- /dev/null
+++ b/tests/test-blocksize-extents-overflow.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright Red Hat
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Demonstrate a fix for a bug where blocksize overflowed 32 bits
+
+source ./functions.sh
+set -e
+set -x
+
+requires_run
+requires_plugin eval
+requires_nbdsh_uri
+requires nbdsh --base-allocation --version
+
+# Script a sparse server that requires 512-byte aligned requests.
+exts='
+if test $(( ($3|$4) & 511 )) != 0; then
+ echo "EINVAL request unaligned" 2>&1
+ exit 1
+fi
+echo 0 5G 0
+'
+
+# We also need an nbdsh script to parse all extents, coalescing adjacent
+# types for simplicity.
+# FIXME: Once nbdkit plugin version 3 allows 64-bit block extents, run
+# this test twice, once for each bit size (32-bit needs 2 extents, 64-bit
+# will get the same result with only 1 extent).
+export script='
+size = h.get_size()
+offs = 0
+entries = []
+def f(metacontext, offset, e, err):
+ global entries
+ global offs
+ assert offs == offset
+ for length, flags in zip(*[iter(e)] * 2):
+ if entries and flags == entries[-1][1]:
+ entries[-1] = (entries[-1][0] + length, flags)
+ else:
+ entries.append((length, flags))
+ offs = offs + length
+
+# Test a loop over the entire device
+while offs < size:
+ len = min(size - offs, 2**32-1)
+ h.block_status(len, offs, f)
+assert entries == [(5 * 2**30, 0)]
+'
+
+# Now run everything
+nbdkit --filter=blocksize eval minblock=512 \
+ get_size='echo 5G' pread='exit 1' extents="$exts" \
+ --run 'nbdsh --base-allocation -u "$uri" -c "$script"'
--
2.47.1

View File

@ -0,0 +1,49 @@
From c33178791b9f66cb49082a496b5e65c6027f5ebd Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 23 Jun 2025 13:05:51 +0100
Subject: [PATCH] vddk: Add support for VDDK 9.0.0.0
(cherry picked from commit c966fe7d05ed7e992e1bf725d4625434c74eaf8d)
---
plugins/vddk/nbdkit-vddk-plugin.pod | 2 +-
plugins/vddk/vddk.c | 4 +++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
index 1e376140..a03f688a 100644
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
@@ -405,7 +405,7 @@ This is also the first version that supported the
C<VixDiskLib_QueryAllocatedBlocks> API. This is used to provide
sparseness (extent) information over NBD.
-=item VDDK 8.0.2.1 (released Feb 2024)
+=item VDDK 9.0.0.0 (released Jun 2025)
This is the latest version of VDDK that we have tested at the time of
writing, but the plugin should work with future versions.
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index bbf0af31..f5e22ae3 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -78,7 +78,7 @@ NBDKIT_DLL_PUBLIC int vddk_debug_datapath = 1;
void *dl; /* dlopen handle */
bool init_called; /* was InitEx called */
__thread int error_suppression; /* threadlocal error suppression */
-int library_version; /* VDDK major: 6, 7, 8, ... */
+int library_version; /* VDDK major: 6, 7, 8, 9 */
bool is_remote; /* true if remote connection */
enum compression_type compression; /* compression */
@@ -407,6 +407,8 @@ load_library (bool load_error_is_fatal)
* our testsuite is easier to write if we point libdir directly to
* a stub .so.
*/
+ { "lib64/libvixDiskLib.so.9", 9 },
+ { "libvixDiskLib.so.9", 9 },
{ "lib64/libvixDiskLib.so.8", 8 },
{ "libvixDiskLib.so.8", 8 },
{ "lib64/libvixDiskLib.so.7", 7 },
--
2.47.1

View File

@ -0,0 +1,39 @@
From e947a2a55cb018711b7f4ede5b5cec291ed5cf24 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Wed, 2 Jul 2025 16:18:19 +0100
Subject: [PATCH] server: Fix .zero fallback path
When no efficient zero method is supported, we fall back to writing a
buffer of actual zeroes. However because of an omitted update to
'offset' we would only zero out (up to) the first 64M of each range.
nbdcopy defaults to working on blocks of 128M, leaving the second 64M
unzeroed.
This affects only backing filesystems which do not support fallocate
FALLOC_FL_PUNCH_HOLE or FALLOC_FL_ZERO_RANGE, which turns out to be
rare, but it does include some NFS-mounted filesystems which is where
I saw this problem.
Fixes: commit 19184d3eb6356ae3b14da0fbaa9c9bdc7743a448
Thanks: Alex Kalenyuk
(cherry picked from commit 20b23fc9838faeddfd42664a7f497a9b29dc5921)
(cherry picked from commit 3c8d0812e7a7b6a1f9e5cceab7bee6e25b9cd7cd)
---
server/plugins.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/server/plugins.c b/server/plugins.c
index 3c7df0d2..db36ce42 100644
--- a/server/plugins.c
+++ b/server/plugins.c
@@ -807,6 +807,7 @@ plugin_zero (struct context *c,
if (r == -1)
break;
count -= limit;
+ offset += limit;
}
done:
--
2.47.1

View File

@ -1,82 +0,0 @@
From 99788909d9ec36e3210cf85976fe5b18da690ddd Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Wed, 4 Aug 2021 20:24:59 +0100
Subject: [PATCH] cache, cow: Fix data corruption in zero and trim on unaligned
tail
Commit eb6009b092 ("cache, cow: Reduce use of bounce-buffer") first
introduced in nbdkit 1.14 added an optimization of the
read-modify-write mechanism used for unaligned heads and tails when
zeroing in the cache layer.
Unfortunately the part applied to the tail contained a mistake: It
zeroes the end of the buffer rather than the beginning. This causes
data corruption when you use the zero or trim function with an offset
and count which is not aligned to the block size.
Although the bug has been around for years, a recent change made it
more likely to happen. Commit c1905b0a28 ("cache, cow: Use a 64K
block size by default") increased the default block size from 4K to
64K. Most filesystems use a 4K block size so operations like fstrim
will make 4K-aligned requests, and with a 4K block size also in the
cache or cow filter the unaligned case would never have been hit
before.
We can demonstrate the bug simply by filling a buffer with data
(100000 bytes in the example), and then trimming that data, which
ought to zero it out.
Before this commit there is data visible after the trim:
$ nbdkit --filter=cow data "0x21 * 100000" --run 'nbdsh -u $uri -c "h.trim(100000, 0)" ; nbdcopy $uri - | hexdump -C'
00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00018000 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 |!!!!!!!!!!!!!!!!|
*
000186a0
After this commit the trim completely clears the data:
$ nbdkit --filter=cow data "0x21 * 100000" --run 'nbdsh -u $uri -c "h.trim(100000, 0)" ; nbdcopy $uri - | hexdump -C'
00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
000186a0
Thanks: Ming Xie for finding the bug
Fixes: commit eb6009b092ae642ed25f133d487dd40ef7bf70f8
(cherry picked from commit a0ae7b2158598ce48ac31706319007f716d01c87)
(cherry picked from commit c0b15574647672cb5c48178333acdd07424692ef)
---
filters/cache/cache.c | 2 +-
filters/cow/cow.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/filters/cache/cache.c b/filters/cache/cache.c
index 91dcc43d..0616cc7b 100644
--- a/filters/cache/cache.c
+++ b/filters/cache/cache.c
@@ -493,7 +493,7 @@ cache_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
r = blk_read (next_ops, nxdata, blknum, block, err);
if (r != -1) {
- memset (&block[count], 0, blksize - count);
+ memset (block, 0, count);
r = blk_write (next_ops, nxdata, blknum, block, flags, err);
}
if (r == -1)
diff --git a/filters/cow/cow.c b/filters/cow/cow.c
index 51ca64a4..1cfcc4e7 100644
--- a/filters/cow/cow.c
+++ b/filters/cow/cow.c
@@ -419,7 +419,7 @@ cow_zero (struct nbdkit_next_ops *next_ops, void *nxdata,
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
r = blk_read (next_ops, nxdata, blknum, block, err);
if (r != -1) {
- memset (&block[count], 0, BLKSIZE - count);
+ memset (block, 0, count);
r = blk_write (blknum, block, err);
}
if (r == -1)
--
2.31.1

View File

@ -1,94 +0,0 @@
From 6b9d4380df9bd0be91f49aad8c4f47b4e672adde Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Mon, 16 Aug 2021 13:43:29 -0500
Subject: [PATCH] server: CVE-2021-3716 reset structured replies on starttls
https://nostarttls.secvuln.info/ pointed out a series of CVEs in
common implementation flaw in various SMTP and IMAP clients and
servers, all with a common thread of improperly caching plaintext
state across the STARTTLS encryption boundary; and recommended that
other protocols with a STARTTLS operation perform a similar audit.
It turns out that nbdkit has the same vulnerability in regards to the
NBD protocol: when nbdkit is run in opportunistic TLS mode, an
attacker is able to inject a plaintext NBD_OPT_STRUCTURED_REPLY before
proxying everything else a client sends to the server; if the server
then acts on that plaintext request (as nbdkit did before this patch),
then the server ends up sending structured replies to at least
NBD_CMD_READ, even though the client was assuming that the transition
to TLS has ruled out a MitM attack.
On the bright side, nbdkit's behavior on a second
NBD_OPT_STRUCTURED_REPLY was to still reply with success, so a client
that always requests structured replies after starting TLS sees no
difference in behavior (that is, qemu 2.12 and later are immune) (had
nbdkit given an error to the second request, that may have caused
confusion to more clients). And there is always the mitigation of
using --tls=require, which lets nbdkit reject the MitM message
pre-encryption. However, nbd-client 3.15 to the present do not
understand structured replies, and I have confirmed that a MitM
attacker can thus cause a denial-of-service attack that does not
trigger until the client does its first encrypted NBD_CMD_READ.
The NBD spec has been recently tightened to declare the nbdkit
behavior to be a security hole:
https://github.com/NetworkBlockDevice/nbd/commit/77e55378096aa
Fixes: eaa4c6e9a2c4bd (server: Minimal implementation of NBD Structured Replies.)
(cherry picked from commit 09a13dafb7bb3a38ab52eb5501cba786365ba7fd)
(cherry picked from commit 6185b15a81e6915734d678f0781e31d45a7941a1)
---
docs/nbdkit-security.pod | 11 +++++++++--
server/protocol-handshake-newstyle.c | 3 ++-
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/docs/nbdkit-security.pod b/docs/nbdkit-security.pod
index 3a28e54d..5a4e6da8 100644
--- a/docs/nbdkit-security.pod
+++ b/docs/nbdkit-security.pod
@@ -10,7 +10,7 @@ For how to report new security issues, see the C<SECURITY> file in the
top level source directory, also available online here:
L<https://github.com/libguestfs/nbdkit/blob/master/SECURITY>
-=head2 CVE-2019-14850
+=head2 CVE-2019-14850
denial of service due to premature opening of back-end connection
See the full announcement and links to mitigation, tests and fixes
@@ -26,6 +26,13 @@ See the full announcement and links to mitigation, tests and fixes
here:
https://www.redhat.com/archives/libguestfs/2019-September/msg00272.html
+=head2 CVE-2021-3716
+structured read denial of service attack against starttls
+
+See the full announcement and links to mitigation, tests and fixes
+here:
+https://www.redhat.com/archives/libguestfs/2021-August/msg00083.html
+
=head1 SEE ALSO
L<nbdkit(1)>.
@@ -38,4 +45,4 @@ Richard W.M. Jones
=head1 COPYRIGHT
-Copyright (C) 2013-2020 Red Hat Inc.
+Copyright (C) 2013-2021 Red Hat Inc.
diff --git a/server/protocol-handshake-newstyle.c b/server/protocol-handshake-newstyle.c
index 0a76a814..b94950e2 100644
--- a/server/protocol-handshake-newstyle.c
+++ b/server/protocol-handshake-newstyle.c
@@ -495,7 +495,8 @@ negotiate_handshake_newstyle_options (void)
return -1;
conn->using_tls = true;
debug ("using TLS on this connection");
- /* Wipe out any cached default export name. */
+ /* Wipe out any cached state. */
+ conn->structured_replies = false;
for_each_backend (b) {
struct handle *h = get_handle (conn, b->i);
free (h->default_exportname);
--
2.31.1

View File

@ -1,40 +0,0 @@
From add9b794b9dc697a1b52115c997fcfb6e06bf64c Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Mon, 16 Aug 2021 13:43:29 -0500
Subject: [PATCH] server: reset meta context replies on starttls
Related to CVE-2021-3716, but not as severe. No compliant client will
send NBD_CMD_BLOCK_STATUS unless it first negotiates
NBD_OPT_SET_META_CONTEXT. If an attacker injects a premature
SET_META_CONTEXT, either the client will never notice (because it
never uses BLOCK_STATUS), or the client will overwrite the attacker's
attempt with the client's own SET_META_CONTEXT request after
encryption is enabled. So I don't class this as having the potential
to trigger denial-of-service due to any protocol mismatch between
compliant client and server (I don't care what happens with
non-compliant clients).
Fixes: 26455d45 (server: protocol: Implement Block Status "base:allocation".)
(cherry picked from commit 6c5faac6a37077cf2366388a80862bb00616d0d8)
(cherry picked from commit 814d8103fb4b581dc01dfd25d2cd81596576f211)
---
server/protocol-handshake-newstyle.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/server/protocol-handshake-newstyle.c b/server/protocol-handshake-newstyle.c
index b94950e2..eb0f3961 100644
--- a/server/protocol-handshake-newstyle.c
+++ b/server/protocol-handshake-newstyle.c
@@ -497,6 +497,9 @@ negotiate_handshake_newstyle_options (void)
debug ("using TLS on this connection");
/* Wipe out any cached state. */
conn->structured_replies = false;
+ free (conn->exportname_from_set_meta_context);
+ conn->exportname_from_set_meta_context = NULL;
+ conn->meta_context_base_allocation = false;
for_each_backend (b) {
struct handle *h = get_handle (conn, b->i);
free (h->default_exportname);
--
2.31.1

View File

@ -1,59 +0,0 @@
From 3c2879a38c299b725091cea45329879e3f46fc99 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Tue, 31 Aug 2021 11:23:27 +0100
Subject: [PATCH] cow: Fix for qemu 6.1 which requires backing format
The diffing example in the manual created a qcow2 file with a backing
file but did not specify the backing format. However qemu 6.1 now
requires this and fails with:
qemu-img: cow-diff.qcow2: Backing file specified without backing format
or:
qemu-img: Could not change the backing file to 'cow-base.img': backing format must be specified
Fix the example by adding the -F option to the command line.
Also there was a test of this rebasing sequence which failed, so this
commit updates the test too.
(cherry picked from commit 618290ef33ce13b75c1a79fea1f1ffb327b5ba07)
---
filters/cow/nbdkit-cow-filter.pod | 4 ++--
tests/test-cow.sh | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/filters/cow/nbdkit-cow-filter.pod b/filters/cow/nbdkit-cow-filter.pod
index 4d5ae856..510bdd40 100644
--- a/filters/cow/nbdkit-cow-filter.pod
+++ b/filters/cow/nbdkit-cow-filter.pod
@@ -101,8 +101,8 @@ At the end, disconnect the client.
Run these C<qemu-img> commands to construct a qcow2 file containing
the differences:
- qemu-img create -f qcow2 -b nbd:localhost diff.qcow2
- qemu-img rebase -b disk.img diff.qcow2
+ qemu-img create -F raw -b nbd:localhost -f qcow2 diff.qcow2
+ qemu-img rebase -F raw -b disk.img -f qcow2 diff.qcow2
F<diff.qcow2> now contains the differences between the base
(F<disk.img>) and the changes stored in nbdkit-cow-filter. C<nbdkit>
diff --git a/tests/test-cow.sh b/tests/test-cow.sh
index 8772afd7..edc4c223 100755
--- a/tests/test-cow.sh
+++ b/tests/test-cow.sh
@@ -72,8 +72,8 @@ fi
# If we have qemu-img, try the hairy rebase operation documented
# in the nbdkit-cow-filter manual.
if qemu-img --version >/dev/null 2>&1; then
- qemu-img create -f qcow2 -b nbd:unix:$sock cow-diff.qcow2
- time qemu-img rebase -b cow-base.img cow-diff.qcow2
+ qemu-img create -F raw -b nbd:unix:$sock -f qcow2 cow-diff.qcow2
+ time qemu-img rebase -F raw -b cow-base.img -f qcow2 cow-diff.qcow2
qemu-img info cow-diff.qcow2
# This checks the file we created exists.
--
2.31.1

View File

@ -1,141 +0,0 @@
From 9e20e2696fdb68008c9b4f1c36298f813320e381 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sat, 23 Oct 2021 16:16:39 +0100
Subject: [PATCH] vddk: Include VDDK major library version in --dump-plugin
output
Although it doesn't seem to be possible to get the precise VDDK
version, With a relatively simple change we can at least return the
VDDK major version. Currently this can be 5, 6 or 7.
(cherry picked from commit 8700649d147948897f3b97810a1dff37924bdd6e)
---
plugins/vddk/nbdkit-vddk-plugin.pod | 4 ++++
plugins/vddk/vddk.c | 29 +++++++++++++++++++----------
tests/test-vddk-real-dump-plugin.sh | 2 ++
3 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
index 8b14eda0..822b96be 100644
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
@@ -417,6 +417,10 @@ at runtime.
If this is printed then the C<nfchostport=PORT> parameter is supported
by this build.
+=item C<vddk_library_version=...>
+
+The VDDK major library version: 5, 6, 7, ...
+
=item C<vddk_dll=...>
Prints the full path to the VDDK shared library. Since this requires
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 69193504..291283f4 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -77,6 +77,7 @@ int vddk_debug_datapath = 1;
static void *dl; /* dlopen handle */
static bool init_called; /* was InitEx called */
static __thread int error_suppression; /* threadlocal error suppression */
+static int library_version; /* VDDK major: 5, 6, 7, ... */
static enum { NONE = 0, ZLIB, FASTLZ, SKIPZ } compression; /* compression */
static char *config; /* config */
@@ -297,7 +298,10 @@ vddk_config (const char *key, const char *value)
static void
load_library (bool load_error_is_fatal)
{
- static const char *sonames[] = {
+ static struct {
+ const char *soname;
+ int library_version;
+ } libs[] = {
/* Prefer the newest library in case multiple exist. Check two
* possible directories: the usual VDDK installation puts .so
* files in an arch-specific subdirectory of $libdir (our minimum
@@ -305,12 +309,13 @@ load_library (bool load_error_is_fatal)
* but our testsuite is easier to write if we point libdir
* directly to a stub .so.
*/
- "lib64/libvixDiskLib.so.7",
- "libvixDiskLib.so.7",
- "lib64/libvixDiskLib.so.6",
- "libvixDiskLib.so.6",
- "lib64/libvixDiskLib.so.5",
- "libvixDiskLib.so.5",
+ { "lib64/libvixDiskLib.so.7", 7 },
+ { "libvixDiskLib.so.7", 7 },
+ { "lib64/libvixDiskLib.so.6", 6 },
+ { "libvixDiskLib.so.6", 6 },
+ { "lib64/libvixDiskLib.so.5", 5 },
+ { "libvixDiskLib.so.5", 5 },
+ { NULL }
};
size_t i;
CLEANUP_FREE char *orig_error = NULL;
@@ -323,19 +328,20 @@ load_library (bool load_error_is_fatal)
}
}
- for (i = 0; i < sizeof sonames / sizeof sonames[0]; ++i) {
+ for (i = 0; libs[i].soname != NULL; ++i) {
CLEANUP_FREE char *path;
/* Set the full path so that dlopen will preferentially load the
* system libraries from the same directory.
*/
- if (asprintf (&path, "%s/%s", libdir, sonames[i]) == -1) {
+ if (asprintf (&path, "%s/%s", libdir, libs[i].soname) == -1) {
nbdkit_error ("asprintf: %m");
exit (EXIT_FAILURE);
}
dl = dlopen (path, RTLD_NOW);
if (dl != NULL) {
+ library_version = libs[i].library_version;
/* Now that we found the library, ensure that LD_LIBRARY_PATH
* includes its directory for all future loads. This may modify
* path in-place and/or re-exec nbdkit, but that's okay.
@@ -356,10 +362,12 @@ load_library (bool load_error_is_fatal)
"If '%s' is located on a non-standard path you may need to\n"
"set libdir=/path/to/vmware-vix-disklib-distrib.\n\n"
"See nbdkit-vddk-plugin(1) man page section \"LIBRARY LOCATION\" for details.",
- orig_error ? : "(unknown error)", sonames[0]);
+ orig_error ? : "(unknown error)", libs[0].soname);
exit (EXIT_FAILURE);
}
+ assert (library_version >= 5);
+
/* Load symbols. */
#define STUB(fn,ret,args) \
do { \
@@ -474,6 +482,7 @@ vddk_dump_plugin (void)
printf ("vddk_default_libdir=%s\n", VDDK_LIBDIR);
printf ("vddk_has_nfchostport=1\n");
+ printf ("vddk_library_version=%d\n", library_version);
#if defined(HAVE_DLADDR)
/* It would be nice to print the version of VDDK from the shared
diff --git a/tests/test-vddk-real-dump-plugin.sh b/tests/test-vddk-real-dump-plugin.sh
index 1479e416..59c79693 100755
--- a/tests/test-vddk-real-dump-plugin.sh
+++ b/tests/test-vddk-real-dump-plugin.sh
@@ -51,10 +51,12 @@ rm -f $files
cleanup_fn rm -f $files
nbdkit -f -v vddk libdir="$vddkdir" --dump-plugin > $out
+cat $out
# Check the vddk_* entries are set.
grep ^vddk_default_libdir= $out
grep ^vddk_has_nfchostport= $out
+grep ^vddk_library_version= $out
grep ^vddk_dll= $out
dll="$(grep ^vddk_dll $out | cut -d= -f2)"
--
2.31.1

View File

@ -1,55 +0,0 @@
From b8b376cf39d97c9f523a9867612126088b43c523 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sat, 23 Oct 2021 19:50:52 +0100
Subject: [PATCH] vddk: Only print vddk_library_version when we managed to load
the library
Because --dump-plugin calls load_library (false) it won't fail if we
didn't manage to load the library. This results in library_version
being 0, which we printed incorrectly.
Resolve this problem by not printing the vddk_library_version entry in
this case.
Fixes: commit 8700649d147948897f3b97810a1dff37924bdd6e
(cherry picked from commit a3fba12c3e9c2113009f556360ae0bd04c45f6bb)
---
plugins/vddk/nbdkit-vddk-plugin.pod | 1 +
plugins/vddk/vddk.c | 9 ++++++++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
index 822b96be..c56faddc 100644
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
@@ -420,6 +420,7 @@ by this build.
=item C<vddk_library_version=...>
The VDDK major library version: 5, 6, 7, ...
+If this is omitted it means the library could not be loaded.
=item C<vddk_dll=...>
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 291283f4..96615749 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -482,7 +482,14 @@ vddk_dump_plugin (void)
printf ("vddk_default_libdir=%s\n", VDDK_LIBDIR);
printf ("vddk_has_nfchostport=1\n");
- printf ("vddk_library_version=%d\n", library_version);
+
+ /* Because load_library (false) we might not have loaded VDDK, in
+ * which case we didn't set library_version. Note this cannot
+ * happen in the normal (non-debug-plugin) path because there we use
+ * load_library (true).
+ */
+ if (library_version > 0)
+ printf ("vddk_library_version=%d\n", library_version);
#if defined(HAVE_DLADDR)
/* It would be nice to print the version of VDDK from the shared
--
2.31.1

View File

@ -1,53 +0,0 @@
From e850f65053d89ad54c27280f48506da5eb631a68 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 18 Nov 2022 09:43:19 +0000
Subject: [PATCH] vddk: Add support for VDDK 8.0.0
There are no changes in any of the structures or enums that we rely on.
Reported-by: Ming Xie
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2143889
(cherry picked from commit dbe12ed499baeea94d603db55cad9e971e0ebcf0)
---
plugins/vddk/nbdkit-vddk-plugin.pod | 2 +-
plugins/vddk/vddk.c | 4 +++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
index c56faddc..c94c41eb 100644
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
@@ -419,7 +419,7 @@ by this build.
=item C<vddk_library_version=...>
-The VDDK major library version: 5, 6, 7, ...
+The VDDK major library version: 5, 6, 7, 8, ...
If this is omitted it means the library could not be loaded.
=item C<vddk_dll=...>
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 96615749..2140789a 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -77,7 +77,7 @@ int vddk_debug_datapath = 1;
static void *dl; /* dlopen handle */
static bool init_called; /* was InitEx called */
static __thread int error_suppression; /* threadlocal error suppression */
-static int library_version; /* VDDK major: 5, 6, 7, ... */
+static int library_version; /* VDDK major: 5, 6, 7, 8, ... */
static enum { NONE = 0, ZLIB, FASTLZ, SKIPZ } compression; /* compression */
static char *config; /* config */
@@ -309,6 +309,8 @@ load_library (bool load_error_is_fatal)
* but our testsuite is easier to write if we point libdir
* directly to a stub .so.
*/
+ { "lib64/libvixDiskLib.so.8", 8 },
+ { "libvixDiskLib.so.8", 8 },
{ "lib64/libvixDiskLib.so.7", 7 },
{ "libvixDiskLib.so.7", 7 },
{ "lib64/libvixDiskLib.so.6", 6 },
--
2.31.1

View File

@ -1,17 +0,0 @@
-----BEGIN PGP SIGNATURE-----
iQJFBAABCAAvFiEE93dPsa0HSn6Mh2fqkXOPc+G3aKAFAl/3RBgRHHJpY2hAYW5u
ZXhpYS5vcmcACgkQkXOPc+G3aKBIIRAAmgoGrmJ8aYO7z+kKgNFjd/p0QxRTZhS/
ol59ojG6jIzN2x/C2PFbRmPB6HJTEg4anrDX04WrP6R+lID1RrH9pTFQabv0YDQC
z49oeXAqINYHvAqgFUJCwlymd7BHEYUudLlK3yu7gQKxMM+J/2v0glpxrtLM7KlD
vvSZkVfbvHlCWIbMWLWIaRHeoWZIXNOjsAp3uEWN2YgikDoxbXVKoh07JoQx5tJ5
2U+a/zo4BQuRspjnhmWc252ZF/8d954/L8J+2mKvbRRf2iAmsqPgS+MNi7WKWO4K
w7/urKn0osuOaArs5xYHJnApmJ9U88CzZpoHQkYhcGgnDOipW9ByJRzT41vVQPW5
IluQODpZUuawWtRIwV/Eoi+LaV2gINAL48Afr02UFYj4gmYQ5TeayLP7NKRQO0VL
jwL4Z3a0cDyUX4i1OArn2ll8THfiog38HfLb70AG1l3P1BVoVVBYWCYbs4xgC9IK
LWkjPKuGXvkGVfZi0nCGdPTOoB1CqCXUvKHXm52FCHg12uJMrBQEivodBoCTbtl0
fSjULQcfrovUEb4d/rDAX7EgJbFS+1jDnodaFHsmNToo3CqfkMBdhLkxG3XExwjy
OOR34wZssjTLsLlWH/RPucWD25RDy1vdPBska9QvvO7W0p+aOtFbnttkTh5cqs45
rHg/sDEiaLA=
=OrsS
-----END PGP SIGNATURE-----

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@ set -e
# directory. Use it like this:
# ./copy-patches.sh
rhel_version=8.8
rhel_version=9.7
# Check we're in the right directory.
if [ ! -f nbdkit.spec ]; then

6
gating.yaml Executable file
View File

@ -0,0 +1,6 @@
--- !Policy
product_versions:
- rhel-9
decision_context: osci_compose_gate
rules:
- !PassingTestCaseRule {test_case_name: osci.brew-build.tier0.functional}

BIN
libguestfs.keyring Normal file

Binary file not shown.

23
nbdkit-find-provides Executable file
View File

@ -0,0 +1,23 @@
#!/bin/bash -
# Generate RPM provides automatically for nbdkit packages and filters.
# Copyright (C) 2009-2022 Red Hat Inc.
# To test:
# find /usr/lib64/nbdkit/plugins | ./nbdkit-find-provides VER REL
# find /usr/lib64/nbdkit/filters | ./nbdkit-find-provides VER REL
ver="$1"
rel="$2"
function process_file
{
if [[ $1 =~ /plugins/nbdkit-.*-plugin ]] ||
[[ $1 =~ /filters/nbdkit-.*-filter ]]; then
echo "Provides:" "$(basename $1 .so)" "=" "$ver-$rel"
fi
}
while read line; do
process_file "$line"
done

3
nbdkit.attr Normal file
View File

@ -0,0 +1,3 @@
%__nbdkit_provides %{_rpmconfigdir}/nbdkit-find-provides %{version} %{release}
%__nbdkit_path %{_libdir}/nbdkit/(plugins|filters)/nbdkit-.*-(plugin|filter)(\.so)?$
%__nbdkit_flags exeonly

3
nbdkit.fc Normal file
View File

@ -0,0 +1,3 @@
/usr/sbin/nbdkit -- gen_context(system_u:object_r:nbdkit_exec_t,s0)
/usr/lib/systemd/system/nbdkit.* gen_context(system_u:object_r:nbdkit_unit_file_t,s0)

207
nbdkit.if Normal file
View File

@ -0,0 +1,207 @@
## <summary>policy for nbdkit</summary>
########################################
## <summary>
## Execute nbdkit_exec_t in the nbdkit domain.
## </summary>
## <param name="domain">
## <summary>
## Domain allowed to transition.
## </summary>
## </param>
#
interface(`nbdkit_domtrans',`
gen_require(`
type nbdkit_t, nbdkit_exec_t;
')
corecmd_search_bin($1)
domtrans_pattern($1, nbdkit_exec_t, nbdkit_t)
')
######################################
## <summary>
## Execute nbdkit in the caller domain.
## </summary>
## <param name="domain">
## <summary>
## Domain allowed access.
## </summary>
## </param>
#
interface(`nbdkit_exec',`
gen_require(`
type nbdkit_exec_t;
')
corecmd_search_bin($1)
can_exec($1, nbdkit_exec_t)
')
########################################
## <summary>
## Execute nbdkit in the nbdkit domain, and
## allow the specified role the nbdkit domain.
## </summary>
## <param name="domain">
## <summary>
## Domain allowed to transition
## </summary>
## </param>
## <param name="role">
## <summary>
## The role to be allowed the nbdkit domain.
## </summary>
## </param>
#
interface(`nbdkit_run',`
gen_require(`
type nbdkit_t;
attribute_role nbdkit_roles;
')
nbdkit_domtrans($1)
roleattribute $2 nbdkit_roles;
')
########################################
## <summary>
## Role access for nbdkit
## </summary>
## <param name="role">
## <summary>
## Role allowed access
## </summary>
## </param>
## <param name="domain">
## <summary>
## User domain for the role
## </summary>
## </param>
#
interface(`nbdkit_role',`
gen_require(`
type nbdkit_t;
attribute_role nbdkit_roles;
')
roleattribute $1 nbdkit_roles;
nbdkit_domtrans($2)
ps_process_pattern($2, nbdkit_t)
allow $2 nbdkit_t:process { signull signal sigkill };
')
########################################
## <summary>
## Allow attempts to connect to nbdkit
## with a unix stream socket.
## </summary>
## <param name="domain">
## <summary>
## Domain to not audit.
## </summary>
## </param>
#
interface(`nbdkit_stream_connect',`
gen_require(`
type nbdkit_t;
')
allow $1 nbdkit_t:unix_stream_socket connectto;
')
########################################
## <summary>
## Allow nbdkit_exec_t to be an entrypoint
## of the specified domain
## </summary>
## <param name="domain">
## <summary>
## Domain allowed access.
## </summary>
## </param>
## <rolecap/>
#
interface(`nbdkit_entrypoint',`
gen_require(`
type nbdkit_exec_t;
')
allow $1 nbdkit_exec_t:file entrypoint;
')
# ----------------------------------------------------------------------
# RWMJ: See:
# https://issues.redhat.com/browse/RHEL-5174?focusedId=23387259&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-23387259
# Remove this when virt.if gets updated.
########################################
#
# Interface compatibility blocks
#
# The following definitions ensure compatibility with distribution policy
# versions that do not contain given interfaces (epel, or older Fedora
# releases).
# Each block tests for existence of given interface and defines it if needed.
#
########################################
## <summary>
## Read and write to svirt_image dirs.
## </summary>
## <param name="domain">
## <summary>
## Domain allowed access.
## </summary>
## </param>
#
ifndef(`virt_rw_svirt_image_dirs',`
interface(`virt_rw_svirt_image_dirs',`
gen_require(`
type svirt_image_t;
')
allow $1 svirt_image_t:dir rw_dir_perms;
')
')
########################################
## <summary>
## Create svirt_image sock_files.
## </summary>
## <param name="domain">
## <summary>
## Domain allowed access.
## </summary>
## </param>
#
ifndef(`virt_create_svirt_image_sock_files',`
interface(`virt_create_svirt_image_sock_files',`
gen_require(`
type svirt_image_t;
')
allow $1 svirt_image_t:sock_file create_sock_file_perms;
')
')
########################################
## <summary>
## Read and write virtlogd pipes.
## </summary>
## <param name="domain">
## <summary>
## Domain allowed access.
## </summary>
## </param>
#
ifndef(`virtlogd_rw_pipes',`
interface(`virtlogd_rw_pipes',`
gen_require(`
type virtlogd_t;
')
allow $1 virtlogd_t:fifo_file rw_fifo_file_perms;
')
')

2746
nbdkit.spec Normal file

File diff suppressed because it is too large Load Diff

100
nbdkit.te Normal file
View File

@ -0,0 +1,100 @@
policy_module(nbdkit, 1.0.0)
########################################
#
# Declarations
#
gen_require(`
type unconfined_t;
')
type nbdkit_t;
type nbdkit_exec_t;
application_domain(nbdkit_t, nbdkit_exec_t)
mcs_constrained(nbdkit_t)
role system_r types nbdkit_t;
type nbdkit_home_t;
userdom_user_home_content(nbdkit_home_t)
type nbdkit_tmp_t;
files_tmp_file(nbdkit_tmp_t)
type nbdkit_unit_file_t;
systemd_unit_file(nbdkit_unit_file_t)
permissive nbdkit_t;
########################################
#
# nbdkit local policy
#
allow nbdkit_t self:capability { setgid setuid };
allow nbdkit_t self:fifo_file rw_fifo_file_perms;
allow nbdkit_t self:netlink_route_socket rw_netlink_socket_perms;
allow nbdkit_t self:process { fork setsockcreate signal_perms };
allow nbdkit_t self:tcp_socket create_stream_socket_perms;
allow nbdkit_t self:udp_socket create_socket_perms;
manage_dirs_pattern(nbdkit_t, nbdkit_tmp_t, nbdkit_tmp_t)
manage_files_pattern(nbdkit_t, nbdkit_tmp_t, nbdkit_tmp_t)
userdom_user_tmp_filetrans(nbdkit_t, nbdkit_tmp_t, { dir file })
manage_dirs_pattern(nbdkit_t, nbdkit_home_t, nbdkit_home_t)
manage_files_pattern(nbdkit_t, nbdkit_home_t, nbdkit_home_t)
userdom_user_home_dir_filetrans(nbdkit_t, nbdkit_home_t, { dir file })
corenet_tcp_connect_http_port(nbdkit_t)
corenet_tcp_connect_ssh_port(nbdkit_t)
corenet_tcp_connect_tftp_port(nbdkit_t)
corenet_tcp_bind_generic_port(nbdkit_t)
corenet_tcp_bind_generic_node(nbdkit_t)
domain_use_interactive_fds(nbdkit_t)
files_read_etc_files(nbdkit_t)
init_abstract_socket_activation(nbdkit_t)
init_ioctl_stream_sockets(nbdkit_t)
init_rw_stream_sockets(nbdkit_t)
optional_policy(`
auth_use_nsswitch(nbdkit_t)
')
optional_policy(`
logging_send_syslog_msg(nbdkit_t)
')
optional_policy(`
miscfiles_read_localization(nbdkit_t)
miscfiles_read_generic_certs(nbdkit_t)
')
optional_policy(`
sysnet_dns_name_resolve(nbdkit_t)
sysnet_read_config(nbdkit_t)
')
optional_policy(`
userdom_read_user_home_content_files(nbdkit_t)
userdom_use_inherited_user_ptys(nbdkit_t)
')
optional_policy(`
virt_create_svirt_image_sock_files(nbdkit_t)
virt_read_qemu_pid_files(nbdkit_t)
virtlogd_rw_pipes(nbdkit_t)
virt_rw_svirt_image(nbdkit_t)
virt_rw_svirt_image_dirs(nbdkit_t)
virt_search_lib(nbdkit_t)
virt_stream_connect_svirt(nbdkit_t)
')
# FIXME: It would be nice to allow libvirt to transition nbdkit_exec_t to
# nbdkit_t when libvirtd was started manually from the commandline (i.e. in
# unconfined_t), but we don't want this transition to happen automatically
# when starting directly from the shell. I'm not sure how to achieve this...
#nbdkit_domtrans(unconfined_t, nbdkit_exec_t, nbdkit_t)

2
sources Normal file
View File

@ -0,0 +1,2 @@
SHA512 (nbdkit-1.38.5.tar.gz) = 86e3160e46f8a571e2d18378987066abb3365aea76db4610b478b7d29bf510f1114132a23c149e7a211920be42274606aa05a42e15576a97915582d07077bc4e
SHA512 (nbdkit-1.38.5.tar.gz.sig) = 3be02420dae81472892980ea35087c23838321766cbe3e2b77238fbd9a2be79538c8d92f1ee471c2ee9983b56f19e59385962e6bd79b5c9efc072f7e1c682c49

6
tests/basic-test.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/bash -
set -e
set -x
# Run nbdkit and check that nbdinfo can connect back to it.
nbdkit -U - memory 1G --run 'nbdinfo "$uri"'

12
tests/tests.yml Executable file
View File

@ -0,0 +1,12 @@
- hosts: localhost
roles:
- role: standard-test-basic
tags:
- classic
required_packages:
- libnbd
- nbdkit
tests:
- simple:
dir: .
run: ./basic-test.sh