diff --git a/0001-ssh-Allow-the-remote-file-to-be-created.patch b/0001-ssh-Allow-the-remote-file-to-be-created.patch new file mode 100644 index 0000000..aa28047 --- /dev/null +++ b/0001-ssh-Allow-the-remote-file-to-be-created.patch @@ -0,0 +1,292 @@ +From 04cf7c2aa8aa8ddc446f1571d2f98661ba8a8ca7 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Fri, 15 Apr 2022 12:08:37 +0100 +Subject: [PATCH] ssh: Allow the remote file to be created + +This adds new parameters, create=(true|false), create-size=SIZE and +create-mode=MODE to create and truncate the remote file. + +Reviewed-by: Laszlo Ersek +(cherry picked from commit 0793f30b1071753532362b2ebf9cb8156a88c3c3) +--- + plugins/ssh/nbdkit-ssh-plugin.pod | 34 ++++++++- + plugins/ssh/ssh.c | 112 +++++++++++++++++++++++++++--- + tests/test-ssh.sh | 13 +++- + 3 files changed, 146 insertions(+), 13 deletions(-) + +diff --git a/plugins/ssh/nbdkit-ssh-plugin.pod b/plugins/ssh/nbdkit-ssh-plugin.pod +index 3f401c15..2bc2c4a7 100644 +--- a/plugins/ssh/nbdkit-ssh-plugin.pod ++++ b/plugins/ssh/nbdkit-ssh-plugin.pod +@@ -5,8 +5,10 @@ nbdkit-ssh-plugin - access disk images over the SSH protocol + =head1 SYNOPSIS + + nbdkit ssh host=HOST [path=]PATH +- [compression=true] [config=CONFIG_FILE] [identity=FILENAME] +- [known-hosts=FILENAME] [password=PASSWORD|-|+FILENAME] ++ [compression=true] [config=CONFIG_FILE] ++ [create=true] [create-mode=MODE] [create-size=SIZE] ++ [identity=FILENAME] [known-hosts=FILENAME] ++ [password=PASSWORD|-|+FILENAME] + [port=PORT] [timeout=SECS] [user=USER] + [verify-remote-host=false] + +@@ -62,6 +64,34 @@ The C parameter is optional. If it is I specified at all + then F<~/.ssh/config> and F are both read. + Missing or unreadable files are ignored. + ++=item B ++ ++(nbdkit E 1.32) ++ ++If set, the remote file will be created. The remote file is created ++on the first NBD connection to nbdkit, not when nbdkit starts up. If ++the file already exists, it will be replaced and any existing content ++lost. ++ ++If using this option, you must use C. C can ++be used to control the permissions of the new file. ++ ++=item BMODE ++ ++(nbdkit E 1.32) ++ ++If using C specify the default permissions of the new ++remote file. You can use octal modes like C or ++C. The default is C<0600>, ie. only readable and ++writable by the remote user. ++ ++=item BSIZE ++ ++(nbdkit E 1.32) ++ ++If using C, specify the virtual size of the new disk. ++C can use modifiers like C<100M> etc. ++ + =item BHOST + + Specify the name or IP address of the remote host. +diff --git a/plugins/ssh/ssh.c b/plugins/ssh/ssh.c +index 77cfcf6c..ac9cc230 100644 +--- a/plugins/ssh/ssh.c ++++ b/plugins/ssh/ssh.c +@@ -44,12 +44,15 @@ + #include + #include + ++#include ++ + #include + #include + #include + + #include + ++#include "cleanup.h" + #include "const-string-vector.h" + #include "minmax.h" + +@@ -63,6 +66,9 @@ static const char *known_hosts = NULL; + static const_string_vector identities = empty_vector; + static uint32_t timeout = 0; + static bool compression = false; ++static bool create = false; ++static int64_t create_size = -1; ++static unsigned create_mode = S_IRUSR | S_IWUSR /* 0600 */; + + /* config can be: + * NULL => parse options from default file +@@ -167,6 +173,27 @@ ssh_config (const char *key, const char *value) + return -1; + compression = r; + } ++ else if (strcmp (key, "create") == 0) { ++ r = nbdkit_parse_bool (value); ++ if (r == -1) ++ return -1; ++ create = r; ++ } ++ else if (strcmp (key, "create-size") == 0) { ++ create_size = nbdkit_parse_size (value); ++ if (create_size == -1) ++ return -1; ++ } ++ else if (strcmp (key, "create-mode") == 0) { ++ r = nbdkit_parse_unsigned (key, value, &create_mode); ++ if (r == -1) ++ return -1; ++ /* OpenSSH checks this too. */ ++ if (create_mode > 0777) { ++ nbdkit_error ("create-mode must be <= 0777"); ++ return -1; ++ } ++ } + + else { + nbdkit_error ("unknown parameter '%s'", key); +@@ -186,6 +213,13 @@ ssh_config_complete (void) + return -1; + } + ++ /* If create=true, create-size must be supplied. */ ++ if (create && create_size == -1) { ++ nbdkit_error ("if using create=true, you must specify the size " ++ "of the new remote file using create-size=SIZE"); ++ return -1; ++ } ++ + return 0; + } + +@@ -200,7 +234,10 @@ ssh_config_complete (void) + "identity= Prepend private key (identity) file.\n" \ + "timeout=SECS Set SSH connection timeout.\n" \ + "verify-remote-host=false Ignore known_hosts.\n" \ +- "compression=true Enable compression." ++ "compression=true Enable compression.\n" \ ++ "create=true Create the remote file.\n" \ ++ "create-mode=MODE Set the permissions of the remote file.\n" \ ++ "create-size=SIZE Set the size of the remote file." + + /* Since we must simulate atomic pread and pwrite using seek + + * read/write, calls on each handle must be serialized. +@@ -329,6 +366,65 @@ authenticate (struct ssh_handle *h) + return -1; + } + ++/* This function opens or creates the remote file (depending on ++ * create=false|true). Parallel connections might call this function ++ * at the same time, and so we must hold a lock to ensure that the ++ * file is created at most once. ++ */ ++static pthread_mutex_t create_lock = PTHREAD_MUTEX_INITIALIZER; ++ ++static sftp_file ++open_or_create_path (ssh_session session, sftp_session sftp, int readonly) ++{ ++ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&create_lock); ++ int access_type; ++ int r; ++ sftp_file file; ++ ++ access_type = readonly ? O_RDONLY : O_RDWR; ++ if (create) access_type |= O_CREAT | O_TRUNC; ++ ++ file = sftp_open (sftp, path, access_type, S_IRWXU); ++ if (!file) { ++ nbdkit_error ("cannot %s file for %s: %s", ++ create ? "create" : "open", ++ readonly ? "reading" : "writing", ++ ssh_get_error (session)); ++ return NULL; ++ } ++ ++ if (create) { ++ /* There's no sftp_truncate call. However OpenSSH lets you call ++ * SSH_FXP_SETSTAT + SSH_FILEXFER_ATTR_SIZE which invokes ++ * truncate(2) on the server. Libssh doesn't provide a binding ++ * for SSH_FXP_FSETSTAT so we have to pass the session + path. ++ */ ++ struct sftp_attributes_struct attrs = { ++ .flags = SSH_FILEXFER_ATTR_SIZE | ++ SSH_FILEXFER_ATTR_PERMISSIONS, ++ .size = create_size, ++ .permissions = create_mode, ++ }; ++ ++ r = sftp_setstat (sftp, path, &attrs); ++ if (r != SSH_OK) { ++ nbdkit_error ("setstat failed: %s", ssh_get_error (session)); ++ ++ /* Best-effort attempt to delete the remote file on failure. */ ++ r = sftp_unlink (sftp, path); ++ if (r != SSH_OK) ++ nbdkit_debug ("unlink failed: %s", ssh_get_error (session)); ++ ++ return NULL; ++ } ++ } ++ ++ /* On the next connection, don't create or truncate the file. */ ++ create = false; ++ ++ return file; ++} ++ + /* Create the per-connection handle. */ + static void * + ssh_open (int readonly) +@@ -337,7 +433,6 @@ ssh_open (int readonly) + const int set = 1; + size_t i; + int r; +- int access_type; + + h = calloc (1, sizeof *h); + if (h == NULL) { +@@ -471,7 +566,7 @@ ssh_open (int readonly) + if (authenticate (h) == -1) + goto err; + +- /* Open the SFTP connection and file. */ ++ /* Open the SFTP connection. */ + h->sftp = sftp_new (h->session); + if (!h->sftp) { + nbdkit_error ("failed to allocate sftp session: %s", +@@ -484,14 +579,11 @@ ssh_open (int readonly) + ssh_get_error (h->session)); + goto err; + } +- access_type = readonly ? O_RDONLY : O_RDWR; +- h->file = sftp_open (h->sftp, path, access_type, S_IRWXU); +- if (!h->file) { +- nbdkit_error ("cannot open file for %s: %s", +- readonly ? "reading" : "writing", +- ssh_get_error (h->session)); ++ ++ /* Open or create the remote file. */ ++ h->file = open_or_create_path (h->session, h->sftp, readonly); ++ if (!h->file) + goto err; +- } + + nbdkit_debug ("opened libssh handle"); + +diff --git a/tests/test-ssh.sh b/tests/test-ssh.sh +index 6c0ce410..f04b4488 100755 +--- a/tests/test-ssh.sh ++++ b/tests/test-ssh.sh +@@ -36,6 +36,7 @@ set -x + + requires test -f disk + requires nbdcopy --version ++requires stat --version + + # Check that ssh to localhost will work without any passwords or phrases. + # +@@ -48,7 +49,7 @@ then + exit 77 + fi + +-files="ssh.img" ++files="ssh.img ssh2.img" + rm -f $files + cleanup_fn rm -f $files + +@@ -59,3 +60,13 @@ nbdkit -v -D ssh.log=2 -U - \ + + # The output should be identical. + cmp disk ssh.img ++ ++# Copy local file 'ssh.img' to newly created "remote" 'ssh2.img' ++size="$(stat -c %s disk)" ++nbdkit -v -D ssh.log=2 -U - \ ++ ssh host=localhost $PWD/ssh2.img \ ++ create=true create-size=$size \ ++ --run 'nbdcopy ssh.img "$uri"' ++ ++# The output should be identical. ++cmp disk ssh2.img +-- +2.31.1 + diff --git a/0002-readahead-Rewrite-this-filter-so-it-prefetches-using.patch b/0002-readahead-Rewrite-this-filter-so-it-prefetches-using.patch new file mode 100644 index 0000000..50039d5 --- /dev/null +++ b/0002-readahead-Rewrite-this-filter-so-it-prefetches-using.patch @@ -0,0 +1,794 @@ +From 160601397d307f70586f1bc70111141855ff68ea Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Sat, 16 Apr 2022 18:39:13 +0100 +Subject: [PATCH] readahead: Rewrite this filter so it prefetches using .cache + +The previous readahead filter did not work well and we have stopped +using it in virt-v2v. However the concept is a good one if we can +make it work. This commit completely rethinks the filter. + +Now, in parallel with the ordinary pread command, we issue a prefetch +(ie. .cache) to the underlying plugin for the data immediately +following the pread. For example a simple sequence of operations: + + t=1 pread (offset=0, count=65536) + t=2 pread (offset=65536, count=65536) + t=3 pread (offset=131072, count=65536) + +would become: + + t=1 pread (offset=0, count=65536) <--\ issued + cache (offset=65536, count=65536) <--/ in parallel + t=2 pread (offset=65536, count=65536) + cache (offset=131072, count=65536) + t=3 pread (offset=131072, count=65536) + cache (offset=196608, count=65536) + +This requires that the underlying filter(s) and plugin chain can +actually do something with the .cache request. If this is not the +case then the filter does nothing (but it will print a warning). For +plugins which don't have native support for prefetching, it is +sufficient to insert nbdkit-cache-filter after this filter. +(nbdkit-cow-filter can also be used with cow-on-cache=true, but that +is more useful for advanced users who are already using the cow +filter). + +The implementation creates a background thread per connection to issue +the parallel .cache requests. This is safer than the alternative (one +background thread in total) since we don't have to deal with the +problem of cache requests being issued with the wrong export name or +stale next pointer. The background thread is controlled by a queue of +commands, with the only possible commands being "cache" or "quit". + +Because the background thread issues parallel requests on the same +connection, the underlying plugin must support the parallel thread +model, otherwise we would be violating the plugin's thread model. It +may be possible in future to open a new connection from the background +thread (but with the same exportname), which would lift this +restriction to at least serialize_requests. Because of the current +limitation, nbdkit-curl-plugin cannot use prefetch. + +(cherry picked from commit 2ff548d66ad3eae87868402ec5b3319edd12090f) +--- + TODO | 22 +- + filters/readahead/Makefile.am | 2 + + filters/readahead/bgthread.c | 76 ++++ + filters/readahead/nbdkit-readahead-filter.pod | 70 +++- + filters/readahead/readahead.c | 338 +++++++++--------- + filters/readahead/readahead.h | 60 ++++ + tests/test-readahead.sh | 2 +- + 7 files changed, 367 insertions(+), 203 deletions(-) + create mode 100644 filters/readahead/bgthread.c + create mode 100644 filters/readahead/readahead.h + +diff --git a/TODO b/TODO +index 5ae21db5..4d2a9796 100644 +--- a/TODO ++++ b/TODO +@@ -62,16 +62,6 @@ General ideas for improvements + continue to keep their non-standard handshake while utilizing nbdkit + to prototype new behaviors in serving the kernel. + +-* Background thread for filters. Some filters (readahead, cache and +- proposed scan filter - see below) could be more effective if they +- were able to defer work to a background thread. We finally have +- nbdkit_next_context_open and friends for allowing a background +- thread to have access into the plugin, but still need to worry about +- thread-safety (how much must the filter do vs. nbdkit, to avoid +- calling into the plugin too many times at once) and cleanup +- (spawning the thread during .after_fork is viable, but cleaning it +- up during .unload is too late). +- + * "nbdkit.so": nbdkit as a loadable shared library. The aim of nbdkit + is to make it reusable from other programs (see nbdkit-captive(1)). + If it was a loadable shared library it would be even more reusable. +@@ -228,6 +218,8 @@ Suggestions for filters + * nbdkit-cache-filter should handle ENOSPC errors automatically by + reclaiming blocks from the cache + ++* nbdkit-cache-filter could use a background thread for reclaiming. ++ + * zstd filter was requested as a way to do what we currently do with + xz but saving many hours on compression (at the cost of hundreds of + MBs of extra data) +@@ -240,6 +232,16 @@ Suggestions for filters + could inject a flush after pausing. However this requires that + filter background threads have access to the plugin (see above). + ++nbdkit-readahead-filter: ++ ++* The filter should open a new connection to the plugin per background ++ thread so it is able to work with plugins that use the ++ serialize_requests thread model (like curl). At the moment it makes ++ requests on the same connection, so it requires plugins to use the ++ parallel thread model. ++ ++* It should combine (or avoid) overlapping cache requests. ++ + nbdkit-rate-filter: + + * allow other kinds of traffic shaping such as VBR +diff --git a/filters/readahead/Makefile.am b/filters/readahead/Makefile.am +index ee5bb3fb..187993ae 100644 +--- a/filters/readahead/Makefile.am ++++ b/filters/readahead/Makefile.am +@@ -37,6 +37,8 @@ filter_LTLIBRARIES = nbdkit-readahead-filter.la + + nbdkit_readahead_filter_la_SOURCES = \ + readahead.c \ ++ readahead.h \ ++ bgthread.c \ + $(top_srcdir)/include/nbdkit-filter.h \ + $(NULL) + +diff --git a/filters/readahead/bgthread.c b/filters/readahead/bgthread.c +new file mode 100644 +index 00000000..5894bb5f +--- /dev/null ++++ b/filters/readahead/bgthread.c +@@ -0,0 +1,76 @@ ++/* nbdkit ++ * Copyright (C) 2019-2022 Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * * Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * * Neither the name of Red Hat nor the names of its contributors may be ++ * used to endorse or promote products derived from this software without ++ * specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ++ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A ++ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR ++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF ++ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, ++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT ++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "readahead.h" ++ ++#include "cleanup.h" ++ ++void * ++readahead_thread (void *vp) ++{ ++ struct bgthread_ctrl *ctrl = vp; ++ ++ for (;;) { ++ struct command cmd; ++ ++ /* Wait until we are sent at least one command. */ ++ { ++ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&ctrl->lock); ++ while (ctrl->cmds.len == 0) ++ pthread_cond_wait (&ctrl->cond, &ctrl->lock); ++ cmd = ctrl->cmds.ptr[0]; ++ command_queue_remove (&ctrl->cmds, 0); ++ } ++ ++ switch (cmd.type) { ++ case CMD_QUIT: ++ /* Finish processing and exit the thread. */ ++ return NULL; ++ ++ case CMD_CACHE: ++ /* Issue .cache (readahead) to underlying plugin. We ignore any ++ * errors because there's no way to communicate that back to the ++ * client, and readahead is only advisory. ++ */ ++ cmd.next->cache (cmd.next, cmd.count, cmd.offset, 0, NULL); ++ } ++ } ++} +diff --git a/filters/readahead/nbdkit-readahead-filter.pod b/filters/readahead/nbdkit-readahead-filter.pod +index c220d379..630e5924 100644 +--- a/filters/readahead/nbdkit-readahead-filter.pod ++++ b/filters/readahead/nbdkit-readahead-filter.pod +@@ -1,28 +1,66 @@ + =head1 NAME + +-nbdkit-readahead-filter - prefetch data when reading sequentially ++nbdkit-readahead-filter - prefetch data ahead of sequential reads + + =head1 SYNOPSIS + +- nbdkit --filter=readahead plugin ++ nbdkit --filter=readahead PLUGIN ++ ++ nbdkit --filter=readahead --filter=cache PLUGIN ++ ++ nbdkit --filter=readahead --filter=cow PLUGIN cow-on-cache=true + + =head1 DESCRIPTION + + C is a filter that prefetches data when the +-client is reading sequentially. ++client is reading. + +-A common use for this filter is to accelerate sequential copy +-operations (like S>) when plugin requests have a +-high overhead (like L). For example: +- +- nbdkit -U - --filter=readahead curl https://example.com/disk.img \ +- --run 'qemu-img convert $nbd disk.img' ++When the client issues a read, this filter issues a parallel prefetch ++(C<.cache>) for subsequent data. Plugins which support this command ++will prefetch the data, making sequential reads faster. For plugins ++which do not support this command, you can inject ++L below (after) this filter, giving ++approximately the same effect. L can be used ++instead of nbdkit-cache-filter, if you add the C ++option. + + The filter uses a simple adaptive algorithm which accelerates +-sequential reads, but has a small penalty if the client does random +-reads. If the client mixes reads with writes or write-like operations +-(trimming, zeroing) then it will work but there can be a large +-performance penalty. ++sequential reads and requires no further configuration. ++ ++=head2 Limitations ++ ++In a number of significant cases this filter will do nothing. The ++filter will print a warning message if this happens. ++ ++=over 4 ++ ++=item Thread model must be parallel ++ ++For example L only supports ++C, and so this filter cannot perform prefetches in ++parallel with the read requests. ++ ++We may be able to lift this restriction in future. ++ ++=item Underlying filters or plugin must support C<.cache> (prefetch) ++ ++Very many plugins do not have the concept of prefetching and/or ++do not implement the C<.cache> callback, and so there is no ++way for this filter to issue prefetches. ++ ++You can usually get around this by adding I<--filter=cache> after this ++filter as explained above. It may be necessary to limit the total ++size of the cache (see L). ++ ++=item Clients and kernels may do readahead already ++ ++It may be the case that NBD clients are already issuing ++C (NBD prefetch) commands. It may also be the case ++that your plugin is using local file functions where the kernel is ++doing readahead. In such cases this filter is not necessary and may ++be pessimal. ++ ++=back + + =head1 PARAMETERS + +@@ -50,9 +88,9 @@ C first appeared in nbdkit 1.12. + + L, + L, +-L, ++L, ++L, + L, +-L, + L, + L, + L, +@@ -64,4 +102,4 @@ Richard W.M. Jones + + =head1 COPYRIGHT + +-Copyright (C) 2019 Red Hat Inc. ++Copyright (C) 2019-2022 Red Hat Inc. +diff --git a/filters/readahead/readahead.c b/filters/readahead/readahead.c +index f5552d4c..1d7ae111 100644 +--- a/filters/readahead/readahead.c ++++ b/filters/readahead/readahead.c +@@ -1,5 +1,5 @@ + /* nbdkit +- * Copyright (C) 2019-2021 Red Hat Inc. ++ * Copyright (C) 2019-2022 Red Hat Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are +@@ -34,232 +34,218 @@ + + #include + #include ++#include + #include + #include + #include +- + #include + + #include + ++#include "readahead.h" ++ + #include "cleanup.h" + #include "minmax.h" +- +-/* Copied from server/plugins.c. */ +-#define MAX_REQUEST_SIZE (64 * 1024 * 1024) ++#include "vector.h" + + /* These could be made configurable in future. */ +-#define READAHEAD_MIN 65536 +-#define READAHEAD_MAX MAX_REQUEST_SIZE +- +-/* This lock protects the global state. */ +-static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; +- +-/* The real size of the underlying plugin. */ +-static uint64_t size; ++#define READAHEAD_MIN 32768 ++#define READAHEAD_MAX (4*1024*1024) + + /* Size of the readahead window. */ ++static pthread_mutex_t window_lock = PTHREAD_MUTEX_INITIALIZER; + static uint64_t window = READAHEAD_MIN; ++static uint64_t last_offset = 0, last_readahead = 0; + +-/* The single prefetch buffer shared by all threads, and its virtual +- * location in the virtual disk. The prefetch buffer grows +- * dynamically as required, but never shrinks. ++static int thread_model = -1; /* Thread model of the underlying plugin. */ ++ ++/* Per-connection data. */ ++struct readahead_handle { ++ int can_cache; /* Can the underlying plugin cache? */ ++ pthread_t thread; /* The background thread, one per connection. */ ++ struct bgthread_ctrl ctrl; ++}; ++ ++/* We have various requirements of the underlying filter(s) + plugin: ++ * - They must support NBDKIT_CACHE_NATIVE (otherwise our requests ++ * would not do anything useful). ++ * - They must use the PARALLEL thread model (otherwise we could ++ * violate their thread model). ++ */ ++static bool ++filter_working (struct readahead_handle *h) ++{ ++ return ++ h->can_cache == NBDKIT_CACHE_NATIVE && ++ thread_model == NBDKIT_THREAD_MODEL_PARALLEL; ++} ++ ++static bool ++suggest_cache_filter (struct readahead_handle *h) ++{ ++ return ++ h->can_cache != NBDKIT_CACHE_NATIVE && ++ thread_model == NBDKIT_THREAD_MODEL_PARALLEL; ++} ++ ++/* We need to hook into .get_ready() so we can read the final thread ++ * model (of the whole server). + */ +-static char *buffer = NULL; +-static size_t bufsize = 0; +-static uint64_t position; +-static uint32_t length = 0; ++static int ++readahead_get_ready (int final_thread_model) ++{ ++ thread_model = final_thread_model; ++ return 0; ++} ++ ++static int ++send_command_to_background_thread (struct bgthread_ctrl *ctrl, ++ const struct command cmd) ++{ ++ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&ctrl->lock); ++ if (command_queue_append (&ctrl->cmds, cmd) == -1) ++ return -1; ++ /* Signal the thread if it could be sleeping on an empty queue. */ ++ if (ctrl->cmds.len == 1) ++ pthread_cond_signal (&ctrl->cond); ++ return 0; ++} ++ ++static void * ++readahead_open (nbdkit_next_open *next, nbdkit_context *nxdata, ++ int readonly, const char *exportname, int is_tls) ++{ ++ struct readahead_handle *h; ++ int err; ++ ++ if (next (nxdata, readonly, exportname) == -1) ++ return NULL; ++ ++ h = malloc (sizeof *h); ++ if (h == NULL) { ++ nbdkit_error ("malloc: %m"); ++ return NULL; ++ } ++ ++ h->ctrl.cmds = (command_queue) empty_vector; ++ pthread_mutex_init (&h->ctrl.lock, NULL); ++ pthread_cond_init (&h->ctrl.cond, NULL); ++ ++ /* Create the background thread. */ ++ err = pthread_create (&h->thread, NULL, readahead_thread, &h->ctrl); ++ if (err != 0) { ++ errno = err; ++ nbdkit_error ("pthread_create: %m"); ++ pthread_cond_destroy (&h->ctrl.cond); ++ pthread_mutex_destroy (&h->ctrl.lock); ++ free (h); ++ return NULL; ++ } ++ ++ return h; ++} + + static void +-readahead_unload (void) ++readahead_close (void *handle) + { +- free (buffer); ++ struct readahead_handle *h = handle; ++ const struct command quit_cmd = { .type = CMD_QUIT }; ++ ++ send_command_to_background_thread (&h->ctrl, quit_cmd); ++ pthread_join (h->thread, NULL); ++ pthread_cond_destroy (&h->ctrl.cond); ++ pthread_mutex_destroy (&h->ctrl.lock); ++ command_queue_reset (&h->ctrl.cmds); ++ free (h); + } + +-static int64_t readahead_get_size (nbdkit_next *next, void *handle); +- +-/* In prepare, force a call to get_size which sets the size global. */ + static int +-readahead_prepare (nbdkit_next *next, void *handle, int readonly) ++readahead_can_cache (nbdkit_next *next, void *handle) + { +- int64_t r; ++ struct readahead_handle *h = handle; ++ int r; + +- r = readahead_get_size (next, handle); +- return r >= 0 ? 0 : -1; +-} +- +-/* Get the size. */ +-static int64_t +-readahead_get_size (nbdkit_next *next, void *handle) +-{ +- int64_t r; +- +- r = next->get_size (next); ++ /* Call next->can_cache to read the underlying 'can_cache'. */ ++ r = next->can_cache (next); + if (r == -1) + return -1; ++ h->can_cache = r; + +- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); +- size = r; ++ if (!filter_working (h)) { ++ nbdkit_error ("readahead: warning: underlying plugin does not support " ++ "NBD_CMD_CACHE or PARALLEL thread model, so the filter " ++ "won't do anything"); ++ if (suggest_cache_filter (h)) ++ nbdkit_error ("readahead: try adding --filter=cache " ++ "after this filter"); ++ /* This is an error, but that's just to ensure that the warning ++ * above is seen. We don't need to return -1 here. ++ */ ++ } + + return r; + } + +-/* Cache */ +-static int +-readahead_can_cache (nbdkit_next *next, void *handle) +-{ +- /* We are already operating as a cache regardless of the plugin's +- * underlying .can_cache, but it's easiest to just rely on nbdkit's +- * behavior of calling .pread for caching. +- */ +- return NBDKIT_CACHE_EMULATE; +-} +- + /* Read data. */ +- +-static int +-fill_readahead (nbdkit_next *next, +- uint32_t count, uint64_t offset, uint32_t flags, int *err) +-{ +- position = offset; +- +- /* Read at least window bytes, but if count is larger read that. +- * Note that the count cannot be bigger than the buffer size. +- */ +- length = MAX (count, window); +- +- /* Don't go beyond the end of the underlying file. */ +- length = MIN (length, size - position); +- +- /* Grow the buffer if necessary. */ +- if (bufsize < length) { +- char *new_buffer = realloc (buffer, length); +- if (new_buffer == NULL) { +- *err = errno; +- nbdkit_error ("realloc: %m"); +- return -1; +- } +- buffer = new_buffer; +- bufsize = length; +- } +- +- if (next->pread (next, buffer, length, offset, flags, err) == -1) { +- length = 0; /* failed to fill the prefetch buffer */ +- return -1; +- } +- +- return 0; +-} +- + static int + readahead_pread (nbdkit_next *next, + void *handle, void *buf, uint32_t count, uint64_t offset, + uint32_t flags, int *err) + { +- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); ++ struct readahead_handle *h = handle; + +- while (count > 0) { +- if (length == 0) { +- /* We don't have a prefetch buffer at all. This could be the +- * first request or reset after a miss. +- */ +- window = READAHEAD_MIN; +- if (fill_readahead (next, count, offset, flags, err) == -1) +- return -1; +- } ++ /* If the underlying plugin doesn't support caching then skip that ++ * step completely. The filter will do nothing. ++ */ ++ if (filter_working (h)) { ++ struct command ra_cmd = { .type = CMD_CACHE, .next = NULL }; ++ int64_t size; + +- /* Can we satisfy this request partly or entirely from the prefetch +- * buffer? +- */ +- else if (position <= offset && offset < position + length) { +- uint32_t n = MIN (position - offset + length, count); +- memcpy (buf, &buffer[offset-position], n); +- buf += n; +- offset += n; +- count -= n; +- } ++ size = next->get_size (next); ++ if (size >= 0) { ++ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&window_lock); + +- /* Does the request start immediately after the prefetch buffer? +- * This is a “hit” allowing us to double the window size. +- */ +- else if (offset == position + length) { +- window = MIN (window * 2, READAHEAD_MAX); +- if (fill_readahead (next, count, offset, flags, err) == -1) +- return -1; ++ /* Generate the asynchronous (background) cache command for ++ * the readahead window. ++ */ ++ ra_cmd.offset = offset + count; ++ if (ra_cmd.offset < size) { ++ ra_cmd.count = MIN (window, size - ra_cmd.offset); ++ ra_cmd.next = next; /* If .next is non-NULL, we'll send it below. */ ++ } ++ ++ /* Should we change the window size? ++ * If the last readahead < current offset, double the window. ++ * If not, but we're still making forward progress, keep the window. ++ * If we're not making forward progress, reduce the window to minimum. ++ */ ++ if (last_readahead < offset) ++ window = MIN (window * 2, READAHEAD_MAX); ++ else if (last_offset < offset) ++ /* leave window unchanged */ ; ++ else ++ window = READAHEAD_MIN; ++ last_offset = offset; ++ last_readahead = ra_cmd.offset + ra_cmd.count; + } + +- /* Else it's a “miss”. Reset everything and start again. */ +- else +- length = 0; ++ if (ra_cmd.next && ++ send_command_to_background_thread (&h->ctrl, ra_cmd) == -1) ++ return -1; + } + +- return 0; +-} +- +-/* Any writes or write-like operations kill the prefetch buffer. +- * +- * We could do better here, but for the current use case of this +- * filter it doesn't matter. XXX +- */ +- +-static void +-kill_readahead (void) +-{ +- ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); +- window = READAHEAD_MIN; +- length = 0; +-} +- +-static int +-readahead_pwrite (nbdkit_next *next, +- void *handle, +- const void *buf, uint32_t count, uint64_t offset, +- uint32_t flags, int *err) +-{ +- kill_readahead (); +- return next->pwrite (next, buf, count, offset, flags, err); +-} +- +-static int +-readahead_trim (nbdkit_next *next, +- void *handle, +- uint32_t count, uint64_t offset, uint32_t flags, +- int *err) +-{ +- kill_readahead (); +- return next->trim (next, count, offset, flags, err); +-} +- +-static int +-readahead_zero (nbdkit_next *next, +- void *handle, +- uint32_t count, uint64_t offset, uint32_t flags, +- int *err) +-{ +- kill_readahead (); +- return next->zero (next, count, offset, flags, err); +-} +- +-static int +-readahead_flush (nbdkit_next *next, +- void *handle, uint32_t flags, int *err) +-{ +- kill_readahead (); +- return next->flush (next, flags, err); ++ /* Issue the synchronous read. */ ++ return next->pread (next, buf, count, offset, flags, err); + } + + static struct nbdkit_filter filter = { + .name = "readahead", + .longname = "nbdkit readahead filter", +- .unload = readahead_unload, +- .prepare = readahead_prepare, +- .get_size = readahead_get_size, ++ .get_ready = readahead_get_ready, ++ .open = readahead_open, ++ .close = readahead_close, + .can_cache = readahead_can_cache, + .pread = readahead_pread, +- .pwrite = readahead_pwrite, +- .trim = readahead_trim, +- .zero = readahead_zero, +- .flush = readahead_flush, + }; + + NBDKIT_REGISTER_FILTER(filter) +diff --git a/filters/readahead/readahead.h b/filters/readahead/readahead.h +new file mode 100644 +index 00000000..a68204d5 +--- /dev/null ++++ b/filters/readahead/readahead.h +@@ -0,0 +1,60 @@ ++/* nbdkit ++ * Copyright (C) 2019-2022 Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are ++ * met: ++ * ++ * * Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * * Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * * Neither the name of Red Hat nor the names of its contributors may be ++ * used to endorse or promote products derived from this software without ++ * specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ++ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A ++ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR ++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF ++ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, ++ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT ++ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ */ ++ ++#ifndef NBDKIT_READAHEAD_H ++#define NBDKIT_READAHEAD_H ++ ++#include ++ ++#include ++ ++#include "vector.h" ++ ++/* List of commands issued to the background thread. */ ++struct command { ++ enum { CMD_QUIT, CMD_CACHE } type; ++ nbdkit_next *next; ++ uint64_t offset; ++ uint32_t count; ++}; ++DEFINE_VECTOR_TYPE(command_queue, struct command); ++ ++struct bgthread_ctrl { ++ command_queue cmds; /* Command queue. */ ++ pthread_mutex_t lock; /* Lock for queue. */ ++ pthread_cond_t cond; /* Condition queue size 0 -> 1. */ ++}; ++ ++/* Start background thread (one per connection). */ ++extern void *readahead_thread (void *vp); ++ ++#endif /* NBDKIT_READAHEAD_H */ +diff --git a/tests/test-readahead.sh b/tests/test-readahead.sh +index 7ec7f8e9..17126e5a 100755 +--- a/tests/test-readahead.sh ++++ b/tests/test-readahead.sh +@@ -59,7 +59,7 @@ for i in range(0, 512*10, 512): + echo $((end_t - start_t)) + } + +-t1=$(test --filter=readahead) ++t1=$(test --filter=readahead --filter=cache) + t2=$(test) + + # In the t1 case we should make only 1 request into the plugin, +-- +2.31.1 + diff --git a/0003-readahead-Fix-test.patch b/0003-readahead-Fix-test.patch new file mode 100644 index 0000000..7f3a8b9 --- /dev/null +++ b/0003-readahead-Fix-test.patch @@ -0,0 +1,120 @@ +From a9708db324eb1989680132813da20d5a4c5496a9 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Thu, 21 Apr 2022 16:14:46 +0100 +Subject: [PATCH] readahead: Fix test + +The previous test turned out to be pretty bad at testing the new +filter. A specific problem is that the filter starts a background +thread which issues .cache requests, while on the main connection +.pread requests are being passed through. The test used +--filter=readahead --filter=cache with the cache filter only caching +on .cache requests (since cache-on-read defaults to false), so only +caching requests made by the background thread. + + main thread + client ---- .pread ----- delay-filter -------> plugin + \ + \ background thread + .cache --- cache-filter + +Under very high load, the background thread could be starved. This +means no requests were being cached at all, and all requests were +passing through the delay filter. It would appear that readahead was +failing (which it was, in a way). + +It's not very easy to fix this since readahead is best-effort, but we +can go back to using a simpler plugin that logs reads and caches and +check that they look valid. + +Update: commit 2ff548d66ad3eae87868402ec5b3319edd12090f +(cherry picked from commit db1e3311727c6ecab3264a1811d33db1aa45a4d0) +--- + tests/test-readahead.sh | 61 +++++++++++++++++++++++------------------ + 1 file changed, 35 insertions(+), 26 deletions(-) + +diff --git a/tests/test-readahead.sh b/tests/test-readahead.sh +index 17126e5a..37f4a06f 100755 +--- a/tests/test-readahead.sh ++++ b/tests/test-readahead.sh +@@ -30,43 +30,52 @@ + # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + # SUCH DAMAGE. + +-# Is the readahead filter faster? Copy a blank disk with a custom +-# plugin that sleeps on every request. Because the readahead filter +-# should result in fewer requests it should run faster. +- + source ./functions.sh + set -e + set -x + +-requires_filter delay ++requires_plugin sh + requires nbdsh --version + requires dd iflag=count_bytes > readahead.out ++ ;; ++ pread) ++ echo "$@" >> readahead.out ++ dd if=/dev/zero count=$3 iflag=count_bytes ++ ;; ++ *) ++ exit 2 ++ ;; ++esac ++EOF + +- end_t=$SECONDS +- echo $((end_t - start_t)) +-} ++cat readahead.out + +-t1=$(test --filter=readahead --filter=cache) +-t2=$(test) +- +-# In the t1 case we should make only 1 request into the plugin, +-# resulting in around 1 sleep period (5 seconds). In the t2 case we +-# make 10 requests so sleep for around 50 seconds. t1 should be < t2 +-# is every reasonable scenario. +-if [ $t1 -ge $t2 ]; then +- echo "$0: readahead filter took longer, should be shorter" +- exit 1 +-fi ++# We should see the pread requests, and additional cache requests for ++# the 32K region following each pread request. ++for i in `seq 0 512 $((512*10 - 512))` ; do ++ grep "pread 512 $i" readahead.out ++ grep "cache 32768 $((i+512))" readahead.out ++done +-- +2.31.1 + diff --git a/nbdkit.spec b/nbdkit.spec index 52277a7..3cb9740 100644 --- a/nbdkit.spec +++ b/nbdkit.spec @@ -46,13 +46,13 @@ ExclusiveArch: x86_64 %global verify_tarball_signature 1 # If there are patches which touch autotools files, set this to 1. -%global patches_touch_autotools %{nil} +%global patches_touch_autotools 1 # The source directory. %global source_directory 1.30-stable Name: nbdkit -Version: 1.30.2 +Version: 1.30.4 Release: 1%{?dist} Summary: NBD server @@ -78,7 +78,9 @@ Source3: copy-patches.sh # https://gitlab.com/nbdkit/nbdkit/-/commits/rhel-9.1/ # Patches. -# (none) +Patch0001: 0001-ssh-Allow-the-remote-file-to-be-created.patch +Patch0002: 0002-readahead-Rewrite-this-filter-so-it-prefetches-using.patch +Patch0003: 0003-readahead-Fix-test.patch # For automatic RPM Provides generation. # See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html @@ -1178,8 +1180,8 @@ export LIBGUESTFS_TRACE=1 %changelog -* Mon Apr 04 2022 Richard W.M. Jones - 1.30.2-1 -- Rebase to new stable branch version 1.30.2 +* Tue Apr 26 2022 Richard W.M. Jones - 1.30.4-1 +- Rebase to new stable branch version 1.30.4 resolves: rhbz#2059289 - Add automatic provides generator and subpackage nbdkit-srpm-macros resolves: rhbz#2059291 @@ -1188,6 +1190,8 @@ export LIBGUESTFS_TRACE=1 - vddk: Fix use of uninitialized memory when computing block size resolves: rhbz#2066655 - Skip vsock tests unless the vsock_loopback module is loaded (2069558) +- Add support for ssh create remote file. +- Backport new readahead filter from 1.32. * Mon Jan 24 2022 Richard W.M. Jones - 1.28.5-1 - Rebase to new stable branch version 1.28.5 diff --git a/sources b/sources index 4009f4c..b012ddd 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -SHA512 (nbdkit-1.30.2.tar.gz) = 84da92005bbb59a860c9ba5a88082d859c9dd4f9db5c81d8760e9d6925e10d80d27de78be9391113bf715d16de7726594b5b803512cd8b59ce14ec135fc4b09d -SHA512 (nbdkit-1.30.2.tar.gz.sig) = c64ccdbc25f56626360aa812af330e33f26e6954415b93976f5a3d58808affdc94efa6e309066b23dbfd354192c4693a408b79aa583a6f5e84ab1c2ef406179e +SHA512 (nbdkit-1.30.4.tar.gz) = b2db6cde29b04fc73831a5f062b7b5d0a4ed2c3785ccefe81add4bc89cd62d5705b904922f53ea1fd4b0987c9aa2a0ef34fc58eefa804b37d424a44112fbf1b1 +SHA512 (nbdkit-1.30.4.tar.gz.sig) = ed8a60214274d88f418656b1b50d9eb5b73aa0ff18e36446cf21a6fe2c07ca72311fd745c5d99f94cb3676dba2ee3a768ab08a05cf9d0d24d8156c43118d57d6