Rebase to new stable branch version 1.30.4
resolves: rhbz#2059289 Add support for ssh create remote file. Backport new readahead filter from 1.32.
This commit is contained in:
parent
3ebd7154dd
commit
c54cb3f9a4
292
0001-ssh-Allow-the-remote-file-to-be-created.patch
Normal file
292
0001-ssh-Allow-the-remote-file-to-be-created.patch
Normal file
@ -0,0 +1,292 @@
|
|||||||
|
From 04cf7c2aa8aa8ddc446f1571d2f98661ba8a8ca7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Fri, 15 Apr 2022 12:08:37 +0100
|
||||||
|
Subject: [PATCH] ssh: Allow the remote file to be created
|
||||||
|
|
||||||
|
This adds new parameters, create=(true|false), create-size=SIZE and
|
||||||
|
create-mode=MODE to create and truncate the remote file.
|
||||||
|
|
||||||
|
Reviewed-by: Laszlo Ersek <lersek@redhat.com>
|
||||||
|
(cherry picked from commit 0793f30b1071753532362b2ebf9cb8156a88c3c3)
|
||||||
|
---
|
||||||
|
plugins/ssh/nbdkit-ssh-plugin.pod | 34 ++++++++-
|
||||||
|
plugins/ssh/ssh.c | 112 +++++++++++++++++++++++++++---
|
||||||
|
tests/test-ssh.sh | 13 +++-
|
||||||
|
3 files changed, 146 insertions(+), 13 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/plugins/ssh/nbdkit-ssh-plugin.pod b/plugins/ssh/nbdkit-ssh-plugin.pod
|
||||||
|
index 3f401c15..2bc2c4a7 100644
|
||||||
|
--- a/plugins/ssh/nbdkit-ssh-plugin.pod
|
||||||
|
+++ b/plugins/ssh/nbdkit-ssh-plugin.pod
|
||||||
|
@@ -5,8 +5,10 @@ nbdkit-ssh-plugin - access disk images over the SSH protocol
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
nbdkit ssh host=HOST [path=]PATH
|
||||||
|
- [compression=true] [config=CONFIG_FILE] [identity=FILENAME]
|
||||||
|
- [known-hosts=FILENAME] [password=PASSWORD|-|+FILENAME]
|
||||||
|
+ [compression=true] [config=CONFIG_FILE]
|
||||||
|
+ [create=true] [create-mode=MODE] [create-size=SIZE]
|
||||||
|
+ [identity=FILENAME] [known-hosts=FILENAME]
|
||||||
|
+ [password=PASSWORD|-|+FILENAME]
|
||||||
|
[port=PORT] [timeout=SECS] [user=USER]
|
||||||
|
[verify-remote-host=false]
|
||||||
|
|
||||||
|
@@ -62,6 +64,34 @@ The C<config> parameter is optional. If it is I<not> specified at all
|
||||||
|
then F<~/.ssh/config> and F</etc/ssh/ssh_config> are both read.
|
||||||
|
Missing or unreadable files are ignored.
|
||||||
|
|
||||||
|
+=item B<create=true>
|
||||||
|
+
|
||||||
|
+(nbdkit E<ge> 1.32)
|
||||||
|
+
|
||||||
|
+If set, the remote file will be created. The remote file is created
|
||||||
|
+on the first NBD connection to nbdkit, not when nbdkit starts up. If
|
||||||
|
+the file already exists, it will be replaced and any existing content
|
||||||
|
+lost.
|
||||||
|
+
|
||||||
|
+If using this option, you must use C<create-size>. C<create-mode> can
|
||||||
|
+be used to control the permissions of the new file.
|
||||||
|
+
|
||||||
|
+=item B<create-mode=>MODE
|
||||||
|
+
|
||||||
|
+(nbdkit E<ge> 1.32)
|
||||||
|
+
|
||||||
|
+If using C<create=true> specify the default permissions of the new
|
||||||
|
+remote file. You can use octal modes like C<create-mode=0777> or
|
||||||
|
+C<create-mode=0644>. The default is C<0600>, ie. only readable and
|
||||||
|
+writable by the remote user.
|
||||||
|
+
|
||||||
|
+=item B<create-size=>SIZE
|
||||||
|
+
|
||||||
|
+(nbdkit E<ge> 1.32)
|
||||||
|
+
|
||||||
|
+If using C<create=true>, specify the virtual size of the new disk.
|
||||||
|
+C<SIZE> can use modifiers like C<100M> etc.
|
||||||
|
+
|
||||||
|
=item B<host=>HOST
|
||||||
|
|
||||||
|
Specify the name or IP address of the remote host.
|
||||||
|
diff --git a/plugins/ssh/ssh.c b/plugins/ssh/ssh.c
|
||||||
|
index 77cfcf6c..ac9cc230 100644
|
||||||
|
--- a/plugins/ssh/ssh.c
|
||||||
|
+++ b/plugins/ssh/ssh.c
|
||||||
|
@@ -44,12 +44,15 @@
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
|
+#include <pthread.h>
|
||||||
|
+
|
||||||
|
#include <libssh/libssh.h>
|
||||||
|
#include <libssh/sftp.h>
|
||||||
|
#include <libssh/callbacks.h>
|
||||||
|
|
||||||
|
#include <nbdkit-plugin.h>
|
||||||
|
|
||||||
|
+#include "cleanup.h"
|
||||||
|
#include "const-string-vector.h"
|
||||||
|
#include "minmax.h"
|
||||||
|
|
||||||
|
@@ -63,6 +66,9 @@ static const char *known_hosts = NULL;
|
||||||
|
static const_string_vector identities = empty_vector;
|
||||||
|
static uint32_t timeout = 0;
|
||||||
|
static bool compression = false;
|
||||||
|
+static bool create = false;
|
||||||
|
+static int64_t create_size = -1;
|
||||||
|
+static unsigned create_mode = S_IRUSR | S_IWUSR /* 0600 */;
|
||||||
|
|
||||||
|
/* config can be:
|
||||||
|
* NULL => parse options from default file
|
||||||
|
@@ -167,6 +173,27 @@ ssh_config (const char *key, const char *value)
|
||||||
|
return -1;
|
||||||
|
compression = r;
|
||||||
|
}
|
||||||
|
+ else if (strcmp (key, "create") == 0) {
|
||||||
|
+ r = nbdkit_parse_bool (value);
|
||||||
|
+ if (r == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ create = r;
|
||||||
|
+ }
|
||||||
|
+ else if (strcmp (key, "create-size") == 0) {
|
||||||
|
+ create_size = nbdkit_parse_size (value);
|
||||||
|
+ if (create_size == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ else if (strcmp (key, "create-mode") == 0) {
|
||||||
|
+ r = nbdkit_parse_unsigned (key, value, &create_mode);
|
||||||
|
+ if (r == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ /* OpenSSH checks this too. */
|
||||||
|
+ if (create_mode > 0777) {
|
||||||
|
+ nbdkit_error ("create-mode must be <= 0777");
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
|
||||||
|
else {
|
||||||
|
nbdkit_error ("unknown parameter '%s'", key);
|
||||||
|
@@ -186,6 +213,13 @@ ssh_config_complete (void)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /* If create=true, create-size must be supplied. */
|
||||||
|
+ if (create && create_size == -1) {
|
||||||
|
+ nbdkit_error ("if using create=true, you must specify the size "
|
||||||
|
+ "of the new remote file using create-size=SIZE");
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -200,7 +234,10 @@ ssh_config_complete (void)
|
||||||
|
"identity=<FILENAME> Prepend private key (identity) file.\n" \
|
||||||
|
"timeout=SECS Set SSH connection timeout.\n" \
|
||||||
|
"verify-remote-host=false Ignore known_hosts.\n" \
|
||||||
|
- "compression=true Enable compression."
|
||||||
|
+ "compression=true Enable compression.\n" \
|
||||||
|
+ "create=true Create the remote file.\n" \
|
||||||
|
+ "create-mode=MODE Set the permissions of the remote file.\n" \
|
||||||
|
+ "create-size=SIZE Set the size of the remote file."
|
||||||
|
|
||||||
|
/* Since we must simulate atomic pread and pwrite using seek +
|
||||||
|
* read/write, calls on each handle must be serialized.
|
||||||
|
@@ -329,6 +366,65 @@ authenticate (struct ssh_handle *h)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* This function opens or creates the remote file (depending on
|
||||||
|
+ * create=false|true). Parallel connections might call this function
|
||||||
|
+ * at the same time, and so we must hold a lock to ensure that the
|
||||||
|
+ * file is created at most once.
|
||||||
|
+ */
|
||||||
|
+static pthread_mutex_t create_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
+
|
||||||
|
+static sftp_file
|
||||||
|
+open_or_create_path (ssh_session session, sftp_session sftp, int readonly)
|
||||||
|
+{
|
||||||
|
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&create_lock);
|
||||||
|
+ int access_type;
|
||||||
|
+ int r;
|
||||||
|
+ sftp_file file;
|
||||||
|
+
|
||||||
|
+ access_type = readonly ? O_RDONLY : O_RDWR;
|
||||||
|
+ if (create) access_type |= O_CREAT | O_TRUNC;
|
||||||
|
+
|
||||||
|
+ file = sftp_open (sftp, path, access_type, S_IRWXU);
|
||||||
|
+ if (!file) {
|
||||||
|
+ nbdkit_error ("cannot %s file for %s: %s",
|
||||||
|
+ create ? "create" : "open",
|
||||||
|
+ readonly ? "reading" : "writing",
|
||||||
|
+ ssh_get_error (session));
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (create) {
|
||||||
|
+ /* There's no sftp_truncate call. However OpenSSH lets you call
|
||||||
|
+ * SSH_FXP_SETSTAT + SSH_FILEXFER_ATTR_SIZE which invokes
|
||||||
|
+ * truncate(2) on the server. Libssh doesn't provide a binding
|
||||||
|
+ * for SSH_FXP_FSETSTAT so we have to pass the session + path.
|
||||||
|
+ */
|
||||||
|
+ struct sftp_attributes_struct attrs = {
|
||||||
|
+ .flags = SSH_FILEXFER_ATTR_SIZE |
|
||||||
|
+ SSH_FILEXFER_ATTR_PERMISSIONS,
|
||||||
|
+ .size = create_size,
|
||||||
|
+ .permissions = create_mode,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ r = sftp_setstat (sftp, path, &attrs);
|
||||||
|
+ if (r != SSH_OK) {
|
||||||
|
+ nbdkit_error ("setstat failed: %s", ssh_get_error (session));
|
||||||
|
+
|
||||||
|
+ /* Best-effort attempt to delete the remote file on failure. */
|
||||||
|
+ r = sftp_unlink (sftp, path);
|
||||||
|
+ if (r != SSH_OK)
|
||||||
|
+ nbdkit_debug ("unlink failed: %s", ssh_get_error (session));
|
||||||
|
+
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* On the next connection, don't create or truncate the file. */
|
||||||
|
+ create = false;
|
||||||
|
+
|
||||||
|
+ return file;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Create the per-connection handle. */
|
||||||
|
static void *
|
||||||
|
ssh_open (int readonly)
|
||||||
|
@@ -337,7 +433,6 @@ ssh_open (int readonly)
|
||||||
|
const int set = 1;
|
||||||
|
size_t i;
|
||||||
|
int r;
|
||||||
|
- int access_type;
|
||||||
|
|
||||||
|
h = calloc (1, sizeof *h);
|
||||||
|
if (h == NULL) {
|
||||||
|
@@ -471,7 +566,7 @@ ssh_open (int readonly)
|
||||||
|
if (authenticate (h) == -1)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
- /* Open the SFTP connection and file. */
|
||||||
|
+ /* Open the SFTP connection. */
|
||||||
|
h->sftp = sftp_new (h->session);
|
||||||
|
if (!h->sftp) {
|
||||||
|
nbdkit_error ("failed to allocate sftp session: %s",
|
||||||
|
@@ -484,14 +579,11 @@ ssh_open (int readonly)
|
||||||
|
ssh_get_error (h->session));
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
- access_type = readonly ? O_RDONLY : O_RDWR;
|
||||||
|
- h->file = sftp_open (h->sftp, path, access_type, S_IRWXU);
|
||||||
|
- if (!h->file) {
|
||||||
|
- nbdkit_error ("cannot open file for %s: %s",
|
||||||
|
- readonly ? "reading" : "writing",
|
||||||
|
- ssh_get_error (h->session));
|
||||||
|
+
|
||||||
|
+ /* Open or create the remote file. */
|
||||||
|
+ h->file = open_or_create_path (h->session, h->sftp, readonly);
|
||||||
|
+ if (!h->file)
|
||||||
|
goto err;
|
||||||
|
- }
|
||||||
|
|
||||||
|
nbdkit_debug ("opened libssh handle");
|
||||||
|
|
||||||
|
diff --git a/tests/test-ssh.sh b/tests/test-ssh.sh
|
||||||
|
index 6c0ce410..f04b4488 100755
|
||||||
|
--- a/tests/test-ssh.sh
|
||||||
|
+++ b/tests/test-ssh.sh
|
||||||
|
@@ -36,6 +36,7 @@ set -x
|
||||||
|
|
||||||
|
requires test -f disk
|
||||||
|
requires nbdcopy --version
|
||||||
|
+requires stat --version
|
||||||
|
|
||||||
|
# Check that ssh to localhost will work without any passwords or phrases.
|
||||||
|
#
|
||||||
|
@@ -48,7 +49,7 @@ then
|
||||||
|
exit 77
|
||||||
|
fi
|
||||||
|
|
||||||
|
-files="ssh.img"
|
||||||
|
+files="ssh.img ssh2.img"
|
||||||
|
rm -f $files
|
||||||
|
cleanup_fn rm -f $files
|
||||||
|
|
||||||
|
@@ -59,3 +60,13 @@ nbdkit -v -D ssh.log=2 -U - \
|
||||||
|
|
||||||
|
# The output should be identical.
|
||||||
|
cmp disk ssh.img
|
||||||
|
+
|
||||||
|
+# Copy local file 'ssh.img' to newly created "remote" 'ssh2.img'
|
||||||
|
+size="$(stat -c %s disk)"
|
||||||
|
+nbdkit -v -D ssh.log=2 -U - \
|
||||||
|
+ ssh host=localhost $PWD/ssh2.img \
|
||||||
|
+ create=true create-size=$size \
|
||||||
|
+ --run 'nbdcopy ssh.img "$uri"'
|
||||||
|
+
|
||||||
|
+# The output should be identical.
|
||||||
|
+cmp disk ssh2.img
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
794
0002-readahead-Rewrite-this-filter-so-it-prefetches-using.patch
Normal file
794
0002-readahead-Rewrite-this-filter-so-it-prefetches-using.patch
Normal file
@ -0,0 +1,794 @@
|
|||||||
|
From 160601397d307f70586f1bc70111141855ff68ea Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Sat, 16 Apr 2022 18:39:13 +0100
|
||||||
|
Subject: [PATCH] readahead: Rewrite this filter so it prefetches using .cache
|
||||||
|
|
||||||
|
The previous readahead filter did not work well and we have stopped
|
||||||
|
using it in virt-v2v. However the concept is a good one if we can
|
||||||
|
make it work. This commit completely rethinks the filter.
|
||||||
|
|
||||||
|
Now, in parallel with the ordinary pread command, we issue a prefetch
|
||||||
|
(ie. .cache) to the underlying plugin for the data immediately
|
||||||
|
following the pread. For example a simple sequence of operations:
|
||||||
|
|
||||||
|
t=1 pread (offset=0, count=65536)
|
||||||
|
t=2 pread (offset=65536, count=65536)
|
||||||
|
t=3 pread (offset=131072, count=65536)
|
||||||
|
|
||||||
|
would become:
|
||||||
|
|
||||||
|
t=1 pread (offset=0, count=65536) <--\ issued
|
||||||
|
cache (offset=65536, count=65536) <--/ in parallel
|
||||||
|
t=2 pread (offset=65536, count=65536)
|
||||||
|
cache (offset=131072, count=65536)
|
||||||
|
t=3 pread (offset=131072, count=65536)
|
||||||
|
cache (offset=196608, count=65536)
|
||||||
|
|
||||||
|
This requires that the underlying filter(s) and plugin chain can
|
||||||
|
actually do something with the .cache request. If this is not the
|
||||||
|
case then the filter does nothing (but it will print a warning). For
|
||||||
|
plugins which don't have native support for prefetching, it is
|
||||||
|
sufficient to insert nbdkit-cache-filter after this filter.
|
||||||
|
(nbdkit-cow-filter can also be used with cow-on-cache=true, but that
|
||||||
|
is more useful for advanced users who are already using the cow
|
||||||
|
filter).
|
||||||
|
|
||||||
|
The implementation creates a background thread per connection to issue
|
||||||
|
the parallel .cache requests. This is safer than the alternative (one
|
||||||
|
background thread in total) since we don't have to deal with the
|
||||||
|
problem of cache requests being issued with the wrong export name or
|
||||||
|
stale next pointer. The background thread is controlled by a queue of
|
||||||
|
commands, with the only possible commands being "cache" or "quit".
|
||||||
|
|
||||||
|
Because the background thread issues parallel requests on the same
|
||||||
|
connection, the underlying plugin must support the parallel thread
|
||||||
|
model, otherwise we would be violating the plugin's thread model. It
|
||||||
|
may be possible in future to open a new connection from the background
|
||||||
|
thread (but with the same exportname), which would lift this
|
||||||
|
restriction to at least serialize_requests. Because of the current
|
||||||
|
limitation, nbdkit-curl-plugin cannot use prefetch.
|
||||||
|
|
||||||
|
(cherry picked from commit 2ff548d66ad3eae87868402ec5b3319edd12090f)
|
||||||
|
---
|
||||||
|
TODO | 22 +-
|
||||||
|
filters/readahead/Makefile.am | 2 +
|
||||||
|
filters/readahead/bgthread.c | 76 ++++
|
||||||
|
filters/readahead/nbdkit-readahead-filter.pod | 70 +++-
|
||||||
|
filters/readahead/readahead.c | 338 +++++++++---------
|
||||||
|
filters/readahead/readahead.h | 60 ++++
|
||||||
|
tests/test-readahead.sh | 2 +-
|
||||||
|
7 files changed, 367 insertions(+), 203 deletions(-)
|
||||||
|
create mode 100644 filters/readahead/bgthread.c
|
||||||
|
create mode 100644 filters/readahead/readahead.h
|
||||||
|
|
||||||
|
diff --git a/TODO b/TODO
|
||||||
|
index 5ae21db5..4d2a9796 100644
|
||||||
|
--- a/TODO
|
||||||
|
+++ b/TODO
|
||||||
|
@@ -62,16 +62,6 @@ General ideas for improvements
|
||||||
|
continue to keep their non-standard handshake while utilizing nbdkit
|
||||||
|
to prototype new behaviors in serving the kernel.
|
||||||
|
|
||||||
|
-* Background thread for filters. Some filters (readahead, cache and
|
||||||
|
- proposed scan filter - see below) could be more effective if they
|
||||||
|
- were able to defer work to a background thread. We finally have
|
||||||
|
- nbdkit_next_context_open and friends for allowing a background
|
||||||
|
- thread to have access into the plugin, but still need to worry about
|
||||||
|
- thread-safety (how much must the filter do vs. nbdkit, to avoid
|
||||||
|
- calling into the plugin too many times at once) and cleanup
|
||||||
|
- (spawning the thread during .after_fork is viable, but cleaning it
|
||||||
|
- up during .unload is too late).
|
||||||
|
-
|
||||||
|
* "nbdkit.so": nbdkit as a loadable shared library. The aim of nbdkit
|
||||||
|
is to make it reusable from other programs (see nbdkit-captive(1)).
|
||||||
|
If it was a loadable shared library it would be even more reusable.
|
||||||
|
@@ -228,6 +218,8 @@ Suggestions for filters
|
||||||
|
* nbdkit-cache-filter should handle ENOSPC errors automatically by
|
||||||
|
reclaiming blocks from the cache
|
||||||
|
|
||||||
|
+* nbdkit-cache-filter could use a background thread for reclaiming.
|
||||||
|
+
|
||||||
|
* zstd filter was requested as a way to do what we currently do with
|
||||||
|
xz but saving many hours on compression (at the cost of hundreds of
|
||||||
|
MBs of extra data)
|
||||||
|
@@ -240,6 +232,16 @@ Suggestions for filters
|
||||||
|
could inject a flush after pausing. However this requires that
|
||||||
|
filter background threads have access to the plugin (see above).
|
||||||
|
|
||||||
|
+nbdkit-readahead-filter:
|
||||||
|
+
|
||||||
|
+* The filter should open a new connection to the plugin per background
|
||||||
|
+ thread so it is able to work with plugins that use the
|
||||||
|
+ serialize_requests thread model (like curl). At the moment it makes
|
||||||
|
+ requests on the same connection, so it requires plugins to use the
|
||||||
|
+ parallel thread model.
|
||||||
|
+
|
||||||
|
+* It should combine (or avoid) overlapping cache requests.
|
||||||
|
+
|
||||||
|
nbdkit-rate-filter:
|
||||||
|
|
||||||
|
* allow other kinds of traffic shaping such as VBR
|
||||||
|
diff --git a/filters/readahead/Makefile.am b/filters/readahead/Makefile.am
|
||||||
|
index ee5bb3fb..187993ae 100644
|
||||||
|
--- a/filters/readahead/Makefile.am
|
||||||
|
+++ b/filters/readahead/Makefile.am
|
||||||
|
@@ -37,6 +37,8 @@ filter_LTLIBRARIES = nbdkit-readahead-filter.la
|
||||||
|
|
||||||
|
nbdkit_readahead_filter_la_SOURCES = \
|
||||||
|
readahead.c \
|
||||||
|
+ readahead.h \
|
||||||
|
+ bgthread.c \
|
||||||
|
$(top_srcdir)/include/nbdkit-filter.h \
|
||||||
|
$(NULL)
|
||||||
|
|
||||||
|
diff --git a/filters/readahead/bgthread.c b/filters/readahead/bgthread.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000..5894bb5f
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/filters/readahead/bgthread.c
|
||||||
|
@@ -0,0 +1,76 @@
|
||||||
|
+/* nbdkit
|
||||||
|
+ * Copyright (C) 2019-2022 Red Hat Inc.
|
||||||
|
+ *
|
||||||
|
+ * Redistribution and use in source and binary forms, with or without
|
||||||
|
+ * modification, are permitted provided that the following conditions are
|
||||||
|
+ * met:
|
||||||
|
+ *
|
||||||
|
+ * * Redistributions of source code must retain the above copyright
|
||||||
|
+ * notice, this list of conditions and the following disclaimer.
|
||||||
|
+ *
|
||||||
|
+ * * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+ * notice, this list of conditions and the following disclaimer in the
|
||||||
|
+ * documentation and/or other materials provided with the distribution.
|
||||||
|
+ *
|
||||||
|
+ * * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+ * used to endorse or promote products derived from this software without
|
||||||
|
+ * specific prior written permission.
|
||||||
|
+ *
|
||||||
|
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+ * SUCH DAMAGE.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include <config.h>
|
||||||
|
+
|
||||||
|
+#include <stdio.h>
|
||||||
|
+#include <stdlib.h>
|
||||||
|
+#include <stdint.h>
|
||||||
|
+#include <pthread.h>
|
||||||
|
+
|
||||||
|
+#include <nbdkit-filter.h>
|
||||||
|
+
|
||||||
|
+#include "readahead.h"
|
||||||
|
+
|
||||||
|
+#include "cleanup.h"
|
||||||
|
+
|
||||||
|
+void *
|
||||||
|
+readahead_thread (void *vp)
|
||||||
|
+{
|
||||||
|
+ struct bgthread_ctrl *ctrl = vp;
|
||||||
|
+
|
||||||
|
+ for (;;) {
|
||||||
|
+ struct command cmd;
|
||||||
|
+
|
||||||
|
+ /* Wait until we are sent at least one command. */
|
||||||
|
+ {
|
||||||
|
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&ctrl->lock);
|
||||||
|
+ while (ctrl->cmds.len == 0)
|
||||||
|
+ pthread_cond_wait (&ctrl->cond, &ctrl->lock);
|
||||||
|
+ cmd = ctrl->cmds.ptr[0];
|
||||||
|
+ command_queue_remove (&ctrl->cmds, 0);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ switch (cmd.type) {
|
||||||
|
+ case CMD_QUIT:
|
||||||
|
+ /* Finish processing and exit the thread. */
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ case CMD_CACHE:
|
||||||
|
+ /* Issue .cache (readahead) to underlying plugin. We ignore any
|
||||||
|
+ * errors because there's no way to communicate that back to the
|
||||||
|
+ * client, and readahead is only advisory.
|
||||||
|
+ */
|
||||||
|
+ cmd.next->cache (cmd.next, cmd.count, cmd.offset, 0, NULL);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
diff --git a/filters/readahead/nbdkit-readahead-filter.pod b/filters/readahead/nbdkit-readahead-filter.pod
|
||||||
|
index c220d379..630e5924 100644
|
||||||
|
--- a/filters/readahead/nbdkit-readahead-filter.pod
|
||||||
|
+++ b/filters/readahead/nbdkit-readahead-filter.pod
|
||||||
|
@@ -1,28 +1,66 @@
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
-nbdkit-readahead-filter - prefetch data when reading sequentially
|
||||||
|
+nbdkit-readahead-filter - prefetch data ahead of sequential reads
|
||||||
|
|
||||||
|
=head1 SYNOPSIS
|
||||||
|
|
||||||
|
- nbdkit --filter=readahead plugin
|
||||||
|
+ nbdkit --filter=readahead PLUGIN
|
||||||
|
+
|
||||||
|
+ nbdkit --filter=readahead --filter=cache PLUGIN
|
||||||
|
+
|
||||||
|
+ nbdkit --filter=readahead --filter=cow PLUGIN cow-on-cache=true
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
C<nbdkit-readahead-filter> is a filter that prefetches data when the
|
||||||
|
-client is reading sequentially.
|
||||||
|
+client is reading.
|
||||||
|
|
||||||
|
-A common use for this filter is to accelerate sequential copy
|
||||||
|
-operations (like S<C<qemu-img convert>>) when plugin requests have a
|
||||||
|
-high overhead (like L<nbdkit-curl-plugin(1)>). For example:
|
||||||
|
-
|
||||||
|
- nbdkit -U - --filter=readahead curl https://example.com/disk.img \
|
||||||
|
- --run 'qemu-img convert $nbd disk.img'
|
||||||
|
+When the client issues a read, this filter issues a parallel prefetch
|
||||||
|
+(C<.cache>) for subsequent data. Plugins which support this command
|
||||||
|
+will prefetch the data, making sequential reads faster. For plugins
|
||||||
|
+which do not support this command, you can inject
|
||||||
|
+L<nbdkit-cache-filter(1)> below (after) this filter, giving
|
||||||
|
+approximately the same effect. L<nbdkit-cow-filter(1)> can be used
|
||||||
|
+instead of nbdkit-cache-filter, if you add the C<cow-on-cache=true>
|
||||||
|
+option.
|
||||||
|
|
||||||
|
The filter uses a simple adaptive algorithm which accelerates
|
||||||
|
-sequential reads, but has a small penalty if the client does random
|
||||||
|
-reads. If the client mixes reads with writes or write-like operations
|
||||||
|
-(trimming, zeroing) then it will work but there can be a large
|
||||||
|
-performance penalty.
|
||||||
|
+sequential reads and requires no further configuration.
|
||||||
|
+
|
||||||
|
+=head2 Limitations
|
||||||
|
+
|
||||||
|
+In a number of significant cases this filter will do nothing. The
|
||||||
|
+filter will print a warning message if this happens.
|
||||||
|
+
|
||||||
|
+=over 4
|
||||||
|
+
|
||||||
|
+=item Thread model must be parallel
|
||||||
|
+
|
||||||
|
+For example L<nbdkit-curl-plugin(1)> only supports
|
||||||
|
+C<serialize_requests>, and so this filter cannot perform prefetches in
|
||||||
|
+parallel with the read requests.
|
||||||
|
+
|
||||||
|
+We may be able to lift this restriction in future.
|
||||||
|
+
|
||||||
|
+=item Underlying filters or plugin must support C<.cache> (prefetch)
|
||||||
|
+
|
||||||
|
+Very many plugins do not have the concept of prefetching and/or
|
||||||
|
+do not implement the C<.cache> callback, and so there is no
|
||||||
|
+way for this filter to issue prefetches.
|
||||||
|
+
|
||||||
|
+You can usually get around this by adding I<--filter=cache> after this
|
||||||
|
+filter as explained above. It may be necessary to limit the total
|
||||||
|
+size of the cache (see L<nbdkit-cache-filter(1)/CACHE MAXIMUM SIZE>).
|
||||||
|
+
|
||||||
|
+=item Clients and kernels may do readahead already
|
||||||
|
+
|
||||||
|
+It may be the case that NBD clients are already issuing
|
||||||
|
+C<NBD_CMD_CACHE> (NBD prefetch) commands. It may also be the case
|
||||||
|
+that your plugin is using local file functions where the kernel is
|
||||||
|
+doing readahead. In such cases this filter is not necessary and may
|
||||||
|
+be pessimal.
|
||||||
|
+
|
||||||
|
+=back
|
||||||
|
|
||||||
|
=head1 PARAMETERS
|
||||||
|
|
||||||
|
@@ -50,9 +88,9 @@ C<nbdkit-readahead-filter> first appeared in nbdkit 1.12.
|
||||||
|
|
||||||
|
L<nbdkit(1)>,
|
||||||
|
L<nbdkit-cache-filter(1)>,
|
||||||
|
-L<nbdkit-curl-plugin(1)>,
|
||||||
|
+L<nbdkit-cow-filter(1)>,
|
||||||
|
+L<nbdkit-file-plugin(1)>,
|
||||||
|
L<nbdkit-retry-filter(1)>,
|
||||||
|
-L<nbdkit-ssh-plugin(1)>,
|
||||||
|
L<nbdkit-torrent-plugin(1)>,
|
||||||
|
L<nbdkit-vddk-plugin(1)>,
|
||||||
|
L<nbdkit-filter(3)>,
|
||||||
|
@@ -64,4 +102,4 @@ Richard W.M. Jones
|
||||||
|
|
||||||
|
=head1 COPYRIGHT
|
||||||
|
|
||||||
|
-Copyright (C) 2019 Red Hat Inc.
|
||||||
|
+Copyright (C) 2019-2022 Red Hat Inc.
|
||||||
|
diff --git a/filters/readahead/readahead.c b/filters/readahead/readahead.c
|
||||||
|
index f5552d4c..1d7ae111 100644
|
||||||
|
--- a/filters/readahead/readahead.c
|
||||||
|
+++ b/filters/readahead/readahead.c
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/* nbdkit
|
||||||
|
- * Copyright (C) 2019-2021 Red Hat Inc.
|
||||||
|
+ * Copyright (C) 2019-2022 Red Hat Inc.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
@@ -34,232 +34,218 @@
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
+#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
|
-
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
|
#include <nbdkit-filter.h>
|
||||||
|
|
||||||
|
+#include "readahead.h"
|
||||||
|
+
|
||||||
|
#include "cleanup.h"
|
||||||
|
#include "minmax.h"
|
||||||
|
-
|
||||||
|
-/* Copied from server/plugins.c. */
|
||||||
|
-#define MAX_REQUEST_SIZE (64 * 1024 * 1024)
|
||||||
|
+#include "vector.h"
|
||||||
|
|
||||||
|
/* These could be made configurable in future. */
|
||||||
|
-#define READAHEAD_MIN 65536
|
||||||
|
-#define READAHEAD_MAX MAX_REQUEST_SIZE
|
||||||
|
-
|
||||||
|
-/* This lock protects the global state. */
|
||||||
|
-static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
-
|
||||||
|
-/* The real size of the underlying plugin. */
|
||||||
|
-static uint64_t size;
|
||||||
|
+#define READAHEAD_MIN 32768
|
||||||
|
+#define READAHEAD_MAX (4*1024*1024)
|
||||||
|
|
||||||
|
/* Size of the readahead window. */
|
||||||
|
+static pthread_mutex_t window_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
static uint64_t window = READAHEAD_MIN;
|
||||||
|
+static uint64_t last_offset = 0, last_readahead = 0;
|
||||||
|
|
||||||
|
-/* The single prefetch buffer shared by all threads, and its virtual
|
||||||
|
- * location in the virtual disk. The prefetch buffer grows
|
||||||
|
- * dynamically as required, but never shrinks.
|
||||||
|
+static int thread_model = -1; /* Thread model of the underlying plugin. */
|
||||||
|
+
|
||||||
|
+/* Per-connection data. */
|
||||||
|
+struct readahead_handle {
|
||||||
|
+ int can_cache; /* Can the underlying plugin cache? */
|
||||||
|
+ pthread_t thread; /* The background thread, one per connection. */
|
||||||
|
+ struct bgthread_ctrl ctrl;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+/* We have various requirements of the underlying filter(s) + plugin:
|
||||||
|
+ * - They must support NBDKIT_CACHE_NATIVE (otherwise our requests
|
||||||
|
+ * would not do anything useful).
|
||||||
|
+ * - They must use the PARALLEL thread model (otherwise we could
|
||||||
|
+ * violate their thread model).
|
||||||
|
+ */
|
||||||
|
+static bool
|
||||||
|
+filter_working (struct readahead_handle *h)
|
||||||
|
+{
|
||||||
|
+ return
|
||||||
|
+ h->can_cache == NBDKIT_CACHE_NATIVE &&
|
||||||
|
+ thread_model == NBDKIT_THREAD_MODEL_PARALLEL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+suggest_cache_filter (struct readahead_handle *h)
|
||||||
|
+{
|
||||||
|
+ return
|
||||||
|
+ h->can_cache != NBDKIT_CACHE_NATIVE &&
|
||||||
|
+ thread_model == NBDKIT_THREAD_MODEL_PARALLEL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* We need to hook into .get_ready() so we can read the final thread
|
||||||
|
+ * model (of the whole server).
|
||||||
|
*/
|
||||||
|
-static char *buffer = NULL;
|
||||||
|
-static size_t bufsize = 0;
|
||||||
|
-static uint64_t position;
|
||||||
|
-static uint32_t length = 0;
|
||||||
|
+static int
|
||||||
|
+readahead_get_ready (int final_thread_model)
|
||||||
|
+{
|
||||||
|
+ thread_model = final_thread_model;
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int
|
||||||
|
+send_command_to_background_thread (struct bgthread_ctrl *ctrl,
|
||||||
|
+ const struct command cmd)
|
||||||
|
+{
|
||||||
|
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&ctrl->lock);
|
||||||
|
+ if (command_queue_append (&ctrl->cmds, cmd) == -1)
|
||||||
|
+ return -1;
|
||||||
|
+ /* Signal the thread if it could be sleeping on an empty queue. */
|
||||||
|
+ if (ctrl->cmds.len == 1)
|
||||||
|
+ pthread_cond_signal (&ctrl->cond);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void *
|
||||||
|
+readahead_open (nbdkit_next_open *next, nbdkit_context *nxdata,
|
||||||
|
+ int readonly, const char *exportname, int is_tls)
|
||||||
|
+{
|
||||||
|
+ struct readahead_handle *h;
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ if (next (nxdata, readonly, exportname) == -1)
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ h = malloc (sizeof *h);
|
||||||
|
+ if (h == NULL) {
|
||||||
|
+ nbdkit_error ("malloc: %m");
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ h->ctrl.cmds = (command_queue) empty_vector;
|
||||||
|
+ pthread_mutex_init (&h->ctrl.lock, NULL);
|
||||||
|
+ pthread_cond_init (&h->ctrl.cond, NULL);
|
||||||
|
+
|
||||||
|
+ /* Create the background thread. */
|
||||||
|
+ err = pthread_create (&h->thread, NULL, readahead_thread, &h->ctrl);
|
||||||
|
+ if (err != 0) {
|
||||||
|
+ errno = err;
|
||||||
|
+ nbdkit_error ("pthread_create: %m");
|
||||||
|
+ pthread_cond_destroy (&h->ctrl.cond);
|
||||||
|
+ pthread_mutex_destroy (&h->ctrl.lock);
|
||||||
|
+ free (h);
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return h;
|
||||||
|
+}
|
||||||
|
|
||||||
|
static void
|
||||||
|
-readahead_unload (void)
|
||||||
|
+readahead_close (void *handle)
|
||||||
|
{
|
||||||
|
- free (buffer);
|
||||||
|
+ struct readahead_handle *h = handle;
|
||||||
|
+ const struct command quit_cmd = { .type = CMD_QUIT };
|
||||||
|
+
|
||||||
|
+ send_command_to_background_thread (&h->ctrl, quit_cmd);
|
||||||
|
+ pthread_join (h->thread, NULL);
|
||||||
|
+ pthread_cond_destroy (&h->ctrl.cond);
|
||||||
|
+ pthread_mutex_destroy (&h->ctrl.lock);
|
||||||
|
+ command_queue_reset (&h->ctrl.cmds);
|
||||||
|
+ free (h);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int64_t readahead_get_size (nbdkit_next *next, void *handle);
|
||||||
|
-
|
||||||
|
-/* In prepare, force a call to get_size which sets the size global. */
|
||||||
|
static int
|
||||||
|
-readahead_prepare (nbdkit_next *next, void *handle, int readonly)
|
||||||
|
+readahead_can_cache (nbdkit_next *next, void *handle)
|
||||||
|
{
|
||||||
|
- int64_t r;
|
||||||
|
+ struct readahead_handle *h = handle;
|
||||||
|
+ int r;
|
||||||
|
|
||||||
|
- r = readahead_get_size (next, handle);
|
||||||
|
- return r >= 0 ? 0 : -1;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-/* Get the size. */
|
||||||
|
-static int64_t
|
||||||
|
-readahead_get_size (nbdkit_next *next, void *handle)
|
||||||
|
-{
|
||||||
|
- int64_t r;
|
||||||
|
-
|
||||||
|
- r = next->get_size (next);
|
||||||
|
+ /* Call next->can_cache to read the underlying 'can_cache'. */
|
||||||
|
+ r = next->can_cache (next);
|
||||||
|
if (r == -1)
|
||||||
|
return -1;
|
||||||
|
+ h->can_cache = r;
|
||||||
|
|
||||||
|
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
||||||
|
- size = r;
|
||||||
|
+ if (!filter_working (h)) {
|
||||||
|
+ nbdkit_error ("readahead: warning: underlying plugin does not support "
|
||||||
|
+ "NBD_CMD_CACHE or PARALLEL thread model, so the filter "
|
||||||
|
+ "won't do anything");
|
||||||
|
+ if (suggest_cache_filter (h))
|
||||||
|
+ nbdkit_error ("readahead: try adding --filter=cache "
|
||||||
|
+ "after this filter");
|
||||||
|
+ /* This is an error, but that's just to ensure that the warning
|
||||||
|
+ * above is seen. We don't need to return -1 here.
|
||||||
|
+ */
|
||||||
|
+ }
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Cache */
|
||||||
|
-static int
|
||||||
|
-readahead_can_cache (nbdkit_next *next, void *handle)
|
||||||
|
-{
|
||||||
|
- /* We are already operating as a cache regardless of the plugin's
|
||||||
|
- * underlying .can_cache, but it's easiest to just rely on nbdkit's
|
||||||
|
- * behavior of calling .pread for caching.
|
||||||
|
- */
|
||||||
|
- return NBDKIT_CACHE_EMULATE;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
/* Read data. */
|
||||||
|
-
|
||||||
|
-static int
|
||||||
|
-fill_readahead (nbdkit_next *next,
|
||||||
|
- uint32_t count, uint64_t offset, uint32_t flags, int *err)
|
||||||
|
-{
|
||||||
|
- position = offset;
|
||||||
|
-
|
||||||
|
- /* Read at least window bytes, but if count is larger read that.
|
||||||
|
- * Note that the count cannot be bigger than the buffer size.
|
||||||
|
- */
|
||||||
|
- length = MAX (count, window);
|
||||||
|
-
|
||||||
|
- /* Don't go beyond the end of the underlying file. */
|
||||||
|
- length = MIN (length, size - position);
|
||||||
|
-
|
||||||
|
- /* Grow the buffer if necessary. */
|
||||||
|
- if (bufsize < length) {
|
||||||
|
- char *new_buffer = realloc (buffer, length);
|
||||||
|
- if (new_buffer == NULL) {
|
||||||
|
- *err = errno;
|
||||||
|
- nbdkit_error ("realloc: %m");
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
- buffer = new_buffer;
|
||||||
|
- bufsize = length;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- if (next->pread (next, buffer, length, offset, flags, err) == -1) {
|
||||||
|
- length = 0; /* failed to fill the prefetch buffer */
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static int
|
||||||
|
readahead_pread (nbdkit_next *next,
|
||||||
|
void *handle, void *buf, uint32_t count, uint64_t offset,
|
||||||
|
uint32_t flags, int *err)
|
||||||
|
{
|
||||||
|
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
||||||
|
+ struct readahead_handle *h = handle;
|
||||||
|
|
||||||
|
- while (count > 0) {
|
||||||
|
- if (length == 0) {
|
||||||
|
- /* We don't have a prefetch buffer at all. This could be the
|
||||||
|
- * first request or reset after a miss.
|
||||||
|
- */
|
||||||
|
- window = READAHEAD_MIN;
|
||||||
|
- if (fill_readahead (next, count, offset, flags, err) == -1)
|
||||||
|
- return -1;
|
||||||
|
- }
|
||||||
|
+ /* If the underlying plugin doesn't support caching then skip that
|
||||||
|
+ * step completely. The filter will do nothing.
|
||||||
|
+ */
|
||||||
|
+ if (filter_working (h)) {
|
||||||
|
+ struct command ra_cmd = { .type = CMD_CACHE, .next = NULL };
|
||||||
|
+ int64_t size;
|
||||||
|
|
||||||
|
- /* Can we satisfy this request partly or entirely from the prefetch
|
||||||
|
- * buffer?
|
||||||
|
- */
|
||||||
|
- else if (position <= offset && offset < position + length) {
|
||||||
|
- uint32_t n = MIN (position - offset + length, count);
|
||||||
|
- memcpy (buf, &buffer[offset-position], n);
|
||||||
|
- buf += n;
|
||||||
|
- offset += n;
|
||||||
|
- count -= n;
|
||||||
|
- }
|
||||||
|
+ size = next->get_size (next);
|
||||||
|
+ if (size >= 0) {
|
||||||
|
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock);
|
||||||
|
|
||||||
|
- /* Does the request start immediately after the prefetch buffer?
|
||||||
|
- * This is a “hit” allowing us to double the window size.
|
||||||
|
- */
|
||||||
|
- else if (offset == position + length) {
|
||||||
|
- window = MIN (window * 2, READAHEAD_MAX);
|
||||||
|
- if (fill_readahead (next, count, offset, flags, err) == -1)
|
||||||
|
- return -1;
|
||||||
|
+ /* Generate the asynchronous (background) cache command for
|
||||||
|
+ * the readahead window.
|
||||||
|
+ */
|
||||||
|
+ ra_cmd.offset = offset + count;
|
||||||
|
+ if (ra_cmd.offset < size) {
|
||||||
|
+ ra_cmd.count = MIN (window, size - ra_cmd.offset);
|
||||||
|
+ ra_cmd.next = next; /* If .next is non-NULL, we'll send it below. */
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Should we change the window size?
|
||||||
|
+ * If the last readahead < current offset, double the window.
|
||||||
|
+ * If not, but we're still making forward progress, keep the window.
|
||||||
|
+ * If we're not making forward progress, reduce the window to minimum.
|
||||||
|
+ */
|
||||||
|
+ if (last_readahead < offset)
|
||||||
|
+ window = MIN (window * 2, READAHEAD_MAX);
|
||||||
|
+ else if (last_offset < offset)
|
||||||
|
+ /* leave window unchanged */ ;
|
||||||
|
+ else
|
||||||
|
+ window = READAHEAD_MIN;
|
||||||
|
+ last_offset = offset;
|
||||||
|
+ last_readahead = ra_cmd.offset + ra_cmd.count;
|
||||||
|
}
|
||||||
|
|
||||||
|
- /* Else it's a “miss”. Reset everything and start again. */
|
||||||
|
- else
|
||||||
|
- length = 0;
|
||||||
|
+ if (ra_cmd.next &&
|
||||||
|
+ send_command_to_background_thread (&h->ctrl, ra_cmd) == -1)
|
||||||
|
+ return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-/* Any writes or write-like operations kill the prefetch buffer.
|
||||||
|
- *
|
||||||
|
- * We could do better here, but for the current use case of this
|
||||||
|
- * filter it doesn't matter. XXX
|
||||||
|
- */
|
||||||
|
-
|
||||||
|
-static void
|
||||||
|
-kill_readahead (void)
|
||||||
|
-{
|
||||||
|
- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock);
|
||||||
|
- window = READAHEAD_MIN;
|
||||||
|
- length = 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int
|
||||||
|
-readahead_pwrite (nbdkit_next *next,
|
||||||
|
- void *handle,
|
||||||
|
- const void *buf, uint32_t count, uint64_t offset,
|
||||||
|
- uint32_t flags, int *err)
|
||||||
|
-{
|
||||||
|
- kill_readahead ();
|
||||||
|
- return next->pwrite (next, buf, count, offset, flags, err);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int
|
||||||
|
-readahead_trim (nbdkit_next *next,
|
||||||
|
- void *handle,
|
||||||
|
- uint32_t count, uint64_t offset, uint32_t flags,
|
||||||
|
- int *err)
|
||||||
|
-{
|
||||||
|
- kill_readahead ();
|
||||||
|
- return next->trim (next, count, offset, flags, err);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int
|
||||||
|
-readahead_zero (nbdkit_next *next,
|
||||||
|
- void *handle,
|
||||||
|
- uint32_t count, uint64_t offset, uint32_t flags,
|
||||||
|
- int *err)
|
||||||
|
-{
|
||||||
|
- kill_readahead ();
|
||||||
|
- return next->zero (next, count, offset, flags, err);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static int
|
||||||
|
-readahead_flush (nbdkit_next *next,
|
||||||
|
- void *handle, uint32_t flags, int *err)
|
||||||
|
-{
|
||||||
|
- kill_readahead ();
|
||||||
|
- return next->flush (next, flags, err);
|
||||||
|
+ /* Issue the synchronous read. */
|
||||||
|
+ return next->pread (next, buf, count, offset, flags, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct nbdkit_filter filter = {
|
||||||
|
.name = "readahead",
|
||||||
|
.longname = "nbdkit readahead filter",
|
||||||
|
- .unload = readahead_unload,
|
||||||
|
- .prepare = readahead_prepare,
|
||||||
|
- .get_size = readahead_get_size,
|
||||||
|
+ .get_ready = readahead_get_ready,
|
||||||
|
+ .open = readahead_open,
|
||||||
|
+ .close = readahead_close,
|
||||||
|
.can_cache = readahead_can_cache,
|
||||||
|
.pread = readahead_pread,
|
||||||
|
- .pwrite = readahead_pwrite,
|
||||||
|
- .trim = readahead_trim,
|
||||||
|
- .zero = readahead_zero,
|
||||||
|
- .flush = readahead_flush,
|
||||||
|
};
|
||||||
|
|
||||||
|
NBDKIT_REGISTER_FILTER(filter)
|
||||||
|
diff --git a/filters/readahead/readahead.h b/filters/readahead/readahead.h
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000..a68204d5
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/filters/readahead/readahead.h
|
||||||
|
@@ -0,0 +1,60 @@
|
||||||
|
+/* nbdkit
|
||||||
|
+ * Copyright (C) 2019-2022 Red Hat Inc.
|
||||||
|
+ *
|
||||||
|
+ * Redistribution and use in source and binary forms, with or without
|
||||||
|
+ * modification, are permitted provided that the following conditions are
|
||||||
|
+ * met:
|
||||||
|
+ *
|
||||||
|
+ * * Redistributions of source code must retain the above copyright
|
||||||
|
+ * notice, this list of conditions and the following disclaimer.
|
||||||
|
+ *
|
||||||
|
+ * * Redistributions in binary form must reproduce the above copyright
|
||||||
|
+ * notice, this list of conditions and the following disclaimer in the
|
||||||
|
+ * documentation and/or other materials provided with the distribution.
|
||||||
|
+ *
|
||||||
|
+ * * Neither the name of Red Hat nor the names of its contributors may be
|
||||||
|
+ * used to endorse or promote products derived from this software without
|
||||||
|
+ * specific prior written permission.
|
||||||
|
+ *
|
||||||
|
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
||||||
|
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
||||||
|
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||||
|
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||||
|
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
+ * SUCH DAMAGE.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#ifndef NBDKIT_READAHEAD_H
|
||||||
|
+#define NBDKIT_READAHEAD_H
|
||||||
|
+
|
||||||
|
+#include <pthread.h>
|
||||||
|
+
|
||||||
|
+#include <nbdkit-filter.h>
|
||||||
|
+
|
||||||
|
+#include "vector.h"
|
||||||
|
+
|
||||||
|
+/* List of commands issued to the background thread. */
|
||||||
|
+struct command {
|
||||||
|
+ enum { CMD_QUIT, CMD_CACHE } type;
|
||||||
|
+ nbdkit_next *next;
|
||||||
|
+ uint64_t offset;
|
||||||
|
+ uint32_t count;
|
||||||
|
+};
|
||||||
|
+DEFINE_VECTOR_TYPE(command_queue, struct command);
|
||||||
|
+
|
||||||
|
+struct bgthread_ctrl {
|
||||||
|
+ command_queue cmds; /* Command queue. */
|
||||||
|
+ pthread_mutex_t lock; /* Lock for queue. */
|
||||||
|
+ pthread_cond_t cond; /* Condition queue size 0 -> 1. */
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+/* Start background thread (one per connection). */
|
||||||
|
+extern void *readahead_thread (void *vp);
|
||||||
|
+
|
||||||
|
+#endif /* NBDKIT_READAHEAD_H */
|
||||||
|
diff --git a/tests/test-readahead.sh b/tests/test-readahead.sh
|
||||||
|
index 7ec7f8e9..17126e5a 100755
|
||||||
|
--- a/tests/test-readahead.sh
|
||||||
|
+++ b/tests/test-readahead.sh
|
||||||
|
@@ -59,7 +59,7 @@ for i in range(0, 512*10, 512):
|
||||||
|
echo $((end_t - start_t))
|
||||||
|
}
|
||||||
|
|
||||||
|
-t1=$(test --filter=readahead)
|
||||||
|
+t1=$(test --filter=readahead --filter=cache)
|
||||||
|
t2=$(test)
|
||||||
|
|
||||||
|
# In the t1 case we should make only 1 request into the plugin,
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
120
0003-readahead-Fix-test.patch
Normal file
120
0003-readahead-Fix-test.patch
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
From a9708db324eb1989680132813da20d5a4c5496a9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||||
|
Date: Thu, 21 Apr 2022 16:14:46 +0100
|
||||||
|
Subject: [PATCH] readahead: Fix test
|
||||||
|
|
||||||
|
The previous test turned out to be pretty bad at testing the new
|
||||||
|
filter. A specific problem is that the filter starts a background
|
||||||
|
thread which issues .cache requests, while on the main connection
|
||||||
|
.pread requests are being passed through. The test used
|
||||||
|
--filter=readahead --filter=cache with the cache filter only caching
|
||||||
|
on .cache requests (since cache-on-read defaults to false), so only
|
||||||
|
caching requests made by the background thread.
|
||||||
|
|
||||||
|
main thread
|
||||||
|
client ---- .pread ----- delay-filter -------> plugin
|
||||||
|
\
|
||||||
|
\ background thread
|
||||||
|
.cache --- cache-filter
|
||||||
|
|
||||||
|
Under very high load, the background thread could be starved. This
|
||||||
|
means no requests were being cached at all, and all requests were
|
||||||
|
passing through the delay filter. It would appear that readahead was
|
||||||
|
failing (which it was, in a way).
|
||||||
|
|
||||||
|
It's not very easy to fix this since readahead is best-effort, but we
|
||||||
|
can go back to using a simpler plugin that logs reads and caches and
|
||||||
|
check that they look valid.
|
||||||
|
|
||||||
|
Update: commit 2ff548d66ad3eae87868402ec5b3319edd12090f
|
||||||
|
(cherry picked from commit db1e3311727c6ecab3264a1811d33db1aa45a4d0)
|
||||||
|
---
|
||||||
|
tests/test-readahead.sh | 61 +++++++++++++++++++++++------------------
|
||||||
|
1 file changed, 35 insertions(+), 26 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tests/test-readahead.sh b/tests/test-readahead.sh
|
||||||
|
index 17126e5a..37f4a06f 100755
|
||||||
|
--- a/tests/test-readahead.sh
|
||||||
|
+++ b/tests/test-readahead.sh
|
||||||
|
@@ -30,43 +30,52 @@
|
||||||
|
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
# SUCH DAMAGE.
|
||||||
|
|
||||||
|
-# Is the readahead filter faster? Copy a blank disk with a custom
|
||||||
|
-# plugin that sleeps on every request. Because the readahead filter
|
||||||
|
-# should result in fewer requests it should run faster.
|
||||||
|
-
|
||||||
|
source ./functions.sh
|
||||||
|
set -e
|
||||||
|
set -x
|
||||||
|
|
||||||
|
-requires_filter delay
|
||||||
|
+requires_plugin sh
|
||||||
|
requires nbdsh --version
|
||||||
|
requires dd iflag=count_bytes </dev/null
|
||||||
|
|
||||||
|
-files="readahead.img"
|
||||||
|
+files="readahead.out"
|
||||||
|
rm -f $files
|
||||||
|
cleanup_fn rm -f $files
|
||||||
|
|
||||||
|
-test ()
|
||||||
|
-{
|
||||||
|
- start_t=$SECONDS
|
||||||
|
- nbdkit -fv -U - "$@" null size=1M --filter=delay rdelay=5 \
|
||||||
|
- --run 'nbdsh --uri "$uri" -c "
|
||||||
|
+nbdkit -fv -U - "$@" sh - \
|
||||||
|
+ --filter=readahead \
|
||||||
|
+ --run 'nbdsh --uri "$uri" -c "
|
||||||
|
for i in range(0, 512*10, 512):
|
||||||
|
h.pread(512, i)
|
||||||
|
-"'
|
||||||
|
+"' <<'EOF'
|
||||||
|
+case "$1" in
|
||||||
|
+ thread_model)
|
||||||
|
+ echo parallel
|
||||||
|
+ ;;
|
||||||
|
+ can_cache)
|
||||||
|
+ echo native
|
||||||
|
+ ;;
|
||||||
|
+ get_size)
|
||||||
|
+ echo 1M
|
||||||
|
+ ;;
|
||||||
|
+ cache)
|
||||||
|
+ echo "$@" >> readahead.out
|
||||||
|
+ ;;
|
||||||
|
+ pread)
|
||||||
|
+ echo "$@" >> readahead.out
|
||||||
|
+ dd if=/dev/zero count=$3 iflag=count_bytes
|
||||||
|
+ ;;
|
||||||
|
+ *)
|
||||||
|
+ exit 2
|
||||||
|
+ ;;
|
||||||
|
+esac
|
||||||
|
+EOF
|
||||||
|
|
||||||
|
- end_t=$SECONDS
|
||||||
|
- echo $((end_t - start_t))
|
||||||
|
-}
|
||||||
|
+cat readahead.out
|
||||||
|
|
||||||
|
-t1=$(test --filter=readahead --filter=cache)
|
||||||
|
-t2=$(test)
|
||||||
|
-
|
||||||
|
-# In the t1 case we should make only 1 request into the plugin,
|
||||||
|
-# resulting in around 1 sleep period (5 seconds). In the t2 case we
|
||||||
|
-# make 10 requests so sleep for around 50 seconds. t1 should be < t2
|
||||||
|
-# is every reasonable scenario.
|
||||||
|
-if [ $t1 -ge $t2 ]; then
|
||||||
|
- echo "$0: readahead filter took longer, should be shorter"
|
||||||
|
- exit 1
|
||||||
|
-fi
|
||||||
|
+# We should see the pread requests, and additional cache requests for
|
||||||
|
+# the 32K region following each pread request.
|
||||||
|
+for i in `seq 0 512 $((512*10 - 512))` ; do
|
||||||
|
+ grep "pread 512 $i" readahead.out
|
||||||
|
+ grep "cache 32768 $((i+512))" readahead.out
|
||||||
|
+done
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
14
nbdkit.spec
14
nbdkit.spec
@ -46,13 +46,13 @@ ExclusiveArch: x86_64
|
|||||||
%global verify_tarball_signature 1
|
%global verify_tarball_signature 1
|
||||||
|
|
||||||
# If there are patches which touch autotools files, set this to 1.
|
# If there are patches which touch autotools files, set this to 1.
|
||||||
%global patches_touch_autotools %{nil}
|
%global patches_touch_autotools 1
|
||||||
|
|
||||||
# The source directory.
|
# The source directory.
|
||||||
%global source_directory 1.30-stable
|
%global source_directory 1.30-stable
|
||||||
|
|
||||||
Name: nbdkit
|
Name: nbdkit
|
||||||
Version: 1.30.2
|
Version: 1.30.4
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: NBD server
|
Summary: NBD server
|
||||||
|
|
||||||
@ -78,7 +78,9 @@ Source3: copy-patches.sh
|
|||||||
# https://gitlab.com/nbdkit/nbdkit/-/commits/rhel-9.1/
|
# https://gitlab.com/nbdkit/nbdkit/-/commits/rhel-9.1/
|
||||||
|
|
||||||
# Patches.
|
# Patches.
|
||||||
# (none)
|
Patch0001: 0001-ssh-Allow-the-remote-file-to-be-created.patch
|
||||||
|
Patch0002: 0002-readahead-Rewrite-this-filter-so-it-prefetches-using.patch
|
||||||
|
Patch0003: 0003-readahead-Fix-test.patch
|
||||||
|
|
||||||
# For automatic RPM Provides generation.
|
# For automatic RPM Provides generation.
|
||||||
# See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html
|
# See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html
|
||||||
@ -1178,8 +1180,8 @@ export LIBGUESTFS_TRACE=1
|
|||||||
|
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
* Mon Apr 04 2022 Richard W.M. Jones <rjones@redhat.com> - 1.30.2-1
|
* Tue Apr 26 2022 Richard W.M. Jones <rjones@redhat.com> - 1.30.4-1
|
||||||
- Rebase to new stable branch version 1.30.2
|
- Rebase to new stable branch version 1.30.4
|
||||||
resolves: rhbz#2059289
|
resolves: rhbz#2059289
|
||||||
- Add automatic provides generator and subpackage nbdkit-srpm-macros
|
- Add automatic provides generator and subpackage nbdkit-srpm-macros
|
||||||
resolves: rhbz#2059291
|
resolves: rhbz#2059291
|
||||||
@ -1188,6 +1190,8 @@ export LIBGUESTFS_TRACE=1
|
|||||||
- vddk: Fix use of uninitialized memory when computing block size
|
- vddk: Fix use of uninitialized memory when computing block size
|
||||||
resolves: rhbz#2066655
|
resolves: rhbz#2066655
|
||||||
- Skip vsock tests unless the vsock_loopback module is loaded (2069558)
|
- Skip vsock tests unless the vsock_loopback module is loaded (2069558)
|
||||||
|
- Add support for ssh create remote file.
|
||||||
|
- Backport new readahead filter from 1.32.
|
||||||
|
|
||||||
* Mon Jan 24 2022 Richard W.M. Jones <rjones@redhat.com> - 1.28.5-1
|
* Mon Jan 24 2022 Richard W.M. Jones <rjones@redhat.com> - 1.28.5-1
|
||||||
- Rebase to new stable branch version 1.28.5
|
- Rebase to new stable branch version 1.28.5
|
||||||
|
4
sources
4
sources
@ -1,2 +1,2 @@
|
|||||||
SHA512 (nbdkit-1.30.2.tar.gz) = 84da92005bbb59a860c9ba5a88082d859c9dd4f9db5c81d8760e9d6925e10d80d27de78be9391113bf715d16de7726594b5b803512cd8b59ce14ec135fc4b09d
|
SHA512 (nbdkit-1.30.4.tar.gz) = b2db6cde29b04fc73831a5f062b7b5d0a4ed2c3785ccefe81add4bc89cd62d5705b904922f53ea1fd4b0987c9aa2a0ef34fc58eefa804b37d424a44112fbf1b1
|
||||||
SHA512 (nbdkit-1.30.2.tar.gz.sig) = c64ccdbc25f56626360aa812af330e33f26e6954415b93976f5a3d58808affdc94efa6e309066b23dbfd354192c4693a408b79aa583a6f5e84ab1c2ef406179e
|
SHA512 (nbdkit-1.30.4.tar.gz.sig) = ed8a60214274d88f418656b1b50d9eb5b73aa0ff18e36446cf21a6fe2c07ca72311fd745c5d99f94cb3676dba2ee3a768ab08a05cf9d0d24d8156c43118d57d6
|
||||||
|
Loading…
Reference in New Issue
Block a user