diff --git a/0016-vddk-Debug-length-of-extents-when-using-D-vddk.exten.patch b/0016-vddk-Debug-length-of-extents-when-using-D-vddk.exten.patch new file mode 100644 index 0000000..30161f3 --- /dev/null +++ b/0016-vddk-Debug-length-of-extents-when-using-D-vddk.exten.patch @@ -0,0 +1,30 @@ +From 27736bb9e22f6c86b1a19fc02668dd04f6fc8bd0 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Thu, 29 May 2025 09:15:33 +0000 +Subject: [PATCH] vddk: Debug length of extents when using -D vddk.extents=1 + +(cherry picked from commit a53746d326e08fae9ec1ea782df740abb48d0114) +--- + plugins/vddk/worker.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c +index 8a91250a..c61c4d16 100644 +--- a/plugins/vddk/worker.c ++++ b/plugins/vddk/worker.c +@@ -375,9 +375,10 @@ add_extent (struct nbdkit_extents *extents, + return 0; + + if (vddk_debug_extents) +- nbdkit_debug ("adding extent type %s at [%" PRIu64 "...%" PRIu64 "]", ++ nbdkit_debug ("adding extent type %s at [%" PRIu64 "...%" PRIu64 "] " ++ "(length %" PRIu64 ")", + is_hole ? "hole" : "allocated data", +- *position, next_position-1); ++ *position, next_position-1, length); + if (nbdkit_add_extent (extents, *position, length, type) == -1) + return -1; + +-- +2.47.1 + diff --git a/0017-cacheextents-Mark-this-filter-as-deprecated.patch b/0017-cacheextents-Mark-this-filter-as-deprecated.patch new file mode 100644 index 0000000..b5d05cc --- /dev/null +++ b/0017-cacheextents-Mark-this-filter-as-deprecated.patch @@ -0,0 +1,38 @@ +From 85499cfd9c282d986ee8964d0404ccdf25733a6a Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Sat, 31 May 2025 08:12:32 +0100 +Subject: [PATCH] cacheextents: Mark this filter as deprecated + +We will remove it in nbdkit 1.46. + +Its usage was removed from virt-v2v because we found that it did not +do anything: +https://github.com/libguestfs/virt-v2v/commit/48c4ce8e6cf6f1c390a48245ef0f99233f80cfe8 + +(cherry picked from commit 9717c47999fb2f56c3569cf1cdd4d0c160f866c1) +--- + filters/cacheextents/nbdkit-cacheextents-filter.pod | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/filters/cacheextents/nbdkit-cacheextents-filter.pod b/filters/cacheextents/nbdkit-cacheextents-filter.pod +index c3d89465..0693ca80 100644 +--- a/filters/cacheextents/nbdkit-cacheextents-filter.pod ++++ b/filters/cacheextents/nbdkit-cacheextents-filter.pod +@@ -6,6 +6,14 @@ nbdkit-cacheextents-filter - cache extents + + nbdkit --filter=cacheextents plugin + ++=head1 DEPRECATED ++ ++B 1.43.10> and ++will be removed in S>. There is no direct replacement, ++but as the filter only worked for a narrow and unusual range of access ++patterns it is likely that it has no effect and you can just stop ++using it. ++ + =head1 DESCRIPTION + + C is a filter that caches the result of last +-- +2.47.1 + diff --git a/0018-include-Move-some-extent-functions-to-nbdkit-common..patch b/0018-include-Move-some-extent-functions-to-nbdkit-common..patch new file mode 100644 index 0000000..98a6cd2 --- /dev/null +++ b/0018-include-Move-some-extent-functions-to-nbdkit-common..patch @@ -0,0 +1,77 @@ +From 2282c37faa556339ff6ce0557866c171f2e9ee29 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Sat, 31 May 2025 08:04:41 +0100 +Subject: [PATCH] include: Move some extent functions to nbdkit-common.h + +No existing plugins use the extent functions nbdkit_extents_new, +nbdkit_extents_free, etc, since they are mainly useful for filters so +they can build and manipulate new lists of extents. Nevertheless +there is nothing that prevents them from being used in plugins, so +move those functions to the common header (so they appear for users of +) + +There are a couple of helper functions which are really +filter-specific so leave those in nbdkit-filter.h. + +(cherry picked from commit 03792d04f270f2cffc589dd9703c9de9c3d5a65e) +--- + include/nbdkit-common.h | 15 +++++++++++++++ + include/nbdkit-filter.h | 15 +-------------- + 2 files changed, 16 insertions(+), 14 deletions(-) + +diff --git a/include/nbdkit-common.h b/include/nbdkit-common.h +index 43d674aa..bb5e3e55 100644 +--- a/include/nbdkit-common.h ++++ b/include/nbdkit-common.h +@@ -157,7 +157,22 @@ NBDKIT_EXTERN_DECL (const char *, nbdkit_vprintf_intern, + (const char *msg, va_list args) + ATTRIBUTE_FORMAT_PRINTF (1, 0)); + ++/* Extent functions. */ ++struct nbdkit_extent { ++ uint64_t offset; ++ uint64_t length; ++ uint32_t type; ++}; ++ + struct nbdkit_extents; ++ ++NBDKIT_EXTERN_DECL (struct nbdkit_extents *, nbdkit_extents_new, ++ (uint64_t start, uint64_t end)); ++NBDKIT_EXTERN_DECL (void, nbdkit_extents_free, (struct nbdkit_extents *)); ++NBDKIT_EXTERN_DECL (size_t, nbdkit_extents_count, ++ (const struct nbdkit_extents *)); ++NBDKIT_EXTERN_DECL (struct nbdkit_extent, nbdkit_get_extent, ++ (const struct nbdkit_extents *, size_t)); + NBDKIT_EXTERN_DECL (int, nbdkit_add_extent, + (struct nbdkit_extents *, + uint64_t offset, uint64_t length, uint32_t type)); +diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h +index a4ac09d4..31520bf5 100644 +--- a/include/nbdkit-filter.h ++++ b/include/nbdkit-filter.h +@@ -121,20 +121,7 @@ struct nbdkit_next_ops { + */ + }; + +-/* Extent functions. */ +-struct nbdkit_extent { +- uint64_t offset; +- uint64_t length; +- uint32_t type; +-}; +- +-NBDKIT_EXTERN_DECL (struct nbdkit_extents *, nbdkit_extents_new, +- (uint64_t start, uint64_t end)); +-NBDKIT_EXTERN_DECL (void, nbdkit_extents_free, (struct nbdkit_extents *)); +-NBDKIT_EXTERN_DECL (size_t, nbdkit_extents_count, +- (const struct nbdkit_extents *)); +-NBDKIT_EXTERN_DECL (struct nbdkit_extent, nbdkit_get_extent, +- (const struct nbdkit_extents *, size_t)); ++/* Extent helpers. */ + NBDKIT_EXTERN_DECL (struct nbdkit_extents *, nbdkit_extents_full, + (nbdkit_next *next, + uint32_t count, uint64_t offset, +-- +2.47.1 + diff --git a/0019-vddk-Display-command-type-in-command-completed-messa.patch b/0019-vddk-Display-command-type-in-command-completed-messa.patch new file mode 100644 index 0000000..184d166 --- /dev/null +++ b/0019-vddk-Display-command-type-in-command-completed-messa.patch @@ -0,0 +1,29 @@ +From ae0556bd62bd7578986026321866e122bd228689 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Fri, 30 May 2025 13:29:28 +0100 +Subject: [PATCH] vddk: Display command type in command completed message + +Useful extra debugging. + +(cherry picked from commit 81d4d74fecf3c071e144a8ba016f43ba1de1b093) +--- + plugins/vddk/worker.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c +index c61c4d16..3bf1d5c2 100644 +--- a/plugins/vddk/worker.c ++++ b/plugins/vddk/worker.c +@@ -116,7 +116,8 @@ complete_command (void *vp, VixError result) + struct command *cmd = vp; + + if (vddk_debug_datapath) +- nbdkit_debug ("command %" PRIu64 " completed", cmd->id); ++ nbdkit_debug ("command %" PRIu64 " (%s) completed", ++ cmd->id, command_type_string (cmd->type)); + + ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex); + +-- +2.47.1 + diff --git a/0020-vddk-Cache-the-readonly-flag-from-the-.open-call-in-.patch b/0020-vddk-Cache-the-readonly-flag-from-the-.open-call-in-.patch new file mode 100644 index 0000000..5acc431 --- /dev/null +++ b/0020-vddk-Cache-the-readonly-flag-from-the-.open-call-in-.patch @@ -0,0 +1,41 @@ +From d01437869db201f45c1a7ce1bb1e5f9142c9081c Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Fri, 30 May 2025 13:32:00 +0100 +Subject: [PATCH] vddk: Cache the readonly flag from the .open call in the + handle + +(cherry picked from commit 0d953ea644f44259edb19c97e3c7863794c0ca1c) +--- + plugins/vddk/vddk.c | 1 + + plugins/vddk/vddk.h | 3 +++ + 2 files changed, 4 insertions(+) + +diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c +index 1de356d8..342a913b 100644 +--- a/plugins/vddk/vddk.c ++++ b/plugins/vddk/vddk.c +@@ -713,6 +713,7 @@ vddk_open (int readonly) + nbdkit_error ("calloc: %m"); + return NULL; + } ++ h->readonly = readonly; + h->commands = (command_queue) empty_vector; + pthread_mutex_init (&h->commands_lock, NULL); + pthread_cond_init (&h->commands_cond, NULL); +diff --git a/plugins/vddk/vddk.h b/plugins/vddk/vddk.h +index 1d1069cc..3586c5da 100644 +--- a/plugins/vddk/vddk.h ++++ b/plugins/vddk/vddk.h +@@ -166,6 +166,9 @@ struct vddk_handle { + pthread_cond_t commands_cond; /* condition (queue size 0 -> 1) */ + uint64_t id; /* next command ID */ + ++ /* Cached readonly flag from the open call. */ ++ int readonly; ++ + /* Cached disk size in bytes (set in get_size()). */ + uint64_t size; + }; +-- +2.47.1 + diff --git a/0021-vddk-Move-minimum-version-of-VDDK-to-6.7.patch b/0021-vddk-Move-minimum-version-of-VDDK-to-6.7.patch new file mode 100644 index 0000000..016a824 --- /dev/null +++ b/0021-vddk-Move-minimum-version-of-VDDK-to-6.7.patch @@ -0,0 +1,250 @@ +From 6df932e5a77250d9946b91a02a143bdd28ee2f77 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Fri, 30 May 2025 13:49:10 +0100 +Subject: [PATCH] vddk: Move minimum version of VDDK to 6.7 + +We can now assume that QueryAllocatedBlocks exists, as well as a +couple of other functions, simplifying the code. + +This was released in 2018 (7 years ago) so there's been plenty of time +to upgrade. + +(cherry picked from commit 7c90116664b1b1a3c3756d6a79d6d483bc6062dd) +--- + plugins/vddk/nbdkit-vddk-plugin.pod | 8 ++--- + plugins/vddk/vddk-stubs.h | 32 +++++++++++--------- + plugins/vddk/vddk.c | 43 ++++++--------------------- + plugins/vddk/worker.c | 14 ++------- + tests/dummy-vddk.c | 45 +++++++++++++++++++++++++++-- + 5 files changed, 76 insertions(+), 66 deletions(-) + +diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod +index da3e9f9a..279da2d0 100644 +--- a/plugins/vddk/nbdkit-vddk-plugin.pod ++++ b/plugins/vddk/nbdkit-vddk-plugin.pod +@@ -397,15 +397,13 @@ is I recommended. + + =over 4 + +-=item VDDK E 6.5 (released Nov 2016) ++=item VDDK E 6.7 (released Apr 2018) + + This is the minimum version of VDDK supported. Older versions will + not work. + +-=item VDDK 6.7 (released Apr 2018) +- +-This is the first version that supported the +-C API. This is required to provide ++This is also the first version that supported the ++C API. This is used to provide + sparseness (extent) information over NBD. + + =item VDDK 8.0.3 (released Jun 2024) +diff --git a/plugins/vddk/vddk-stubs.h b/plugins/vddk/vddk-stubs.h +index 8e5afdb8..d768c51a 100644 +--- a/plugins/vddk/vddk-stubs.h ++++ b/plugins/vddk/vddk-stubs.h +@@ -132,16 +132,22 @@ STUB (VixDiskLib_Wait, + VixError, + (VixDiskLibHandle handle)); + +-/* Added in VDDK 6.7, these will be NULL for earlier versions: */ +-OPTIONAL_STUB (VixDiskLib_QueryAllocatedBlocks, +- VixError, +- (VixDiskLibHandle diskHandle, +- uint64_t start_sector, uint64_t nr_sectors, +- uint64_t chunk_size, +- VixDiskLibBlockList **block_list)); +-OPTIONAL_STUB (VixDiskLib_FreeBlockList, +- VixError, +- (VixDiskLibBlockList *block_list)); +-OPTIONAL_STUB (VixDiskLib_AllocateConnectParams, +- VixDiskLibConnectParams *, +- (void)); ++/* Added in VDDK 6.7. */ ++STUB (VixDiskLib_QueryAllocatedBlocks, ++ VixError, ++ (VixDiskLibHandle diskHandle, ++ uint64_t start_sector, uint64_t nr_sectors, ++ uint64_t chunk_size, ++ VixDiskLibBlockList **block_list)); ++STUB (VixDiskLib_FreeBlockList, ++ VixError, ++ (VixDiskLibBlockList *block_list)); ++STUB (VixDiskLib_AllocateConnectParams, ++ VixDiskLibConnectParams *, ++ (void)); ++ ++/* OPTIONAL_STUB can be used to add new APIs which are not supported ++ * by older versions of VDDK, and to make them NULL if not present. ++ * However VDDK >= 6.7 has everything we need for now so we are no ++ * longer using this macro. ++ */ +diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c +index 342a913b..60f75b51 100644 +--- a/plugins/vddk/vddk.c ++++ b/plugins/vddk/vddk.c +@@ -666,37 +666,6 @@ vddk_dump_plugin (void) + /* Lock protecting open/close calls - see above. */ + static pthread_mutex_t open_close_lock = PTHREAD_MUTEX_INITIALIZER; + +-static inline VixDiskLibConnectParams * +-allocate_connect_params (void) +-{ +- VixDiskLibConnectParams *ret; +- +- if (VixDiskLib_AllocateConnectParams != NULL) { +- VDDK_CALL_START (VixDiskLib_AllocateConnectParams, "") +- ret = VixDiskLib_AllocateConnectParams (); +- VDDK_CALL_END (VixDiskLib_AllocateConnectParams, 0); +- } +- else +- ret = calloc (1, sizeof (VixDiskLibConnectParams)); +- +- return ret; +-} +- +-static inline void +-free_connect_params (VixDiskLibConnectParams *params) +-{ +- /* Only use FreeConnectParams if AllocateConnectParams was +- * originally called. Otherwise use free. +- */ +- if (VixDiskLib_AllocateConnectParams != NULL) { +- VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params") +- VixDiskLib_FreeConnectParams (params); +- VDDK_CALL_END (VixDiskLib_FreeConnectParams, 0); +- } +- else +- free (params); +-} +- + /* Create the per-connection handle. */ + static void * + vddk_open (int readonly) +@@ -718,7 +687,9 @@ vddk_open (int readonly) + pthread_mutex_init (&h->commands_lock, NULL); + pthread_cond_init (&h->commands_cond, NULL); + +- h->params = allocate_connect_params (); ++ VDDK_CALL_START (VixDiskLib_AllocateConnectParams, "") ++ h->params = VixDiskLib_AllocateConnectParams (); ++ VDDK_CALL_END (VixDiskLib_AllocateConnectParams, 0); + if (h->params == NULL) { + nbdkit_error ("allocate VixDiskLibConnectParams: %m"); + goto err0; +@@ -851,7 +822,9 @@ vddk_open (int readonly) + VixDiskLib_Disconnect (h->connection); + VDDK_CALL_END (VixDiskLib_Disconnect, 0); + err1: +- free_connect_params (h->params); ++ VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params") ++ VixDiskLib_FreeConnectParams (h->params); ++ VDDK_CALL_END (VixDiskLib_FreeConnectParams, 0); + err0: + pthread_mutex_destroy (&h->commands_lock); + pthread_cond_destroy (&h->commands_cond); +@@ -877,7 +850,9 @@ vddk_close (void *handle) + VixDiskLib_Disconnect (h->connection); + VDDK_CALL_END (VixDiskLib_Disconnect, 0); + +- free_connect_params (h->params); ++ VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params") ++ VixDiskLib_FreeConnectParams (h->params); ++ VDDK_CALL_END (VixDiskLib_FreeConnectParams, 0); + pthread_mutex_destroy (&h->commands_lock); + pthread_cond_destroy (&h->commands_cond); + command_queue_reset (&h->commands); +diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c +index 3bf1d5c2..076b2bd7 100644 +--- a/plugins/vddk/worker.c ++++ b/plugins/vddk/worker.c +@@ -309,23 +309,13 @@ do_can_extents (struct command *cmd, struct vddk_handle *h) + VixError err; + VixDiskLibBlockList *block_list; + +- /* This call was added in VDDK 6.7. In earlier versions the +- * function pointer will be NULL and we cannot query extents. +- */ +- if (VixDiskLib_QueryAllocatedBlocks == NULL) { +- nbdkit_debug ("can_extents: VixDiskLib_QueryAllocatedBlocks == NULL, " +- "probably this is VDDK < 6.7"); +- return 0; +- } +- + /* Suppress errors around this call. See: + * https://bugzilla.redhat.com/show_bug.cgi?id=1709211#c7 + */ + error_suppression = 1; + +- /* However even when the call is available it rarely works well so +- * the best thing we can do here is to try the call and if it's +- * non-functional return false. ++ /* Try the QueryAllocatedBlocks call and if it's non-functional ++ * return false. + */ + VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks, + "handle, 0, %d sectors, %d sectors", +diff --git a/tests/dummy-vddk.c b/tests/dummy-vddk.c +index 0e69012a..01284f38 100644 +--- a/tests/dummy-vddk.c ++++ b/tests/dummy-vddk.c +@@ -116,11 +116,52 @@ VixDiskLib_FreeErrorText (char *text) + free (text); + } + ++NBDKIT_DLL_PUBLIC VixError ++VixDiskLib_QueryAllocatedBlocks (VixDiskLibHandle diskHandle, ++ uint64_t start_sector, uint64_t nr_sectors, ++ uint64_t chunk_size, ++ VixDiskLibBlockList **block_list) ++{ ++ VixDiskLibBlockList *ret; ++ ++ /* This is safe because ret->blocks is a 1-sized array and we only ++ * use 1 entry here. ++ */ ++ ret = calloc (1, sizeof *ret); ++ if (ret == NULL) ++ abort (); ++ ++ /* Pretend it's all allocated. */ ++ ret->numBlocks = 1; ++ ret->blocks[0].offset = start_sector; ++ ret->blocks[0].length = nr_sectors; ++ ++ *block_list = ret; ++ return VIX_OK; ++} ++ ++NBDKIT_DLL_PUBLIC VixError ++VixDiskLib_FreeBlockList (VixDiskLibBlockList *block_list) ++{ ++ free (block_list); ++ return VIX_OK; ++} ++ ++NBDKIT_DLL_PUBLIC VixDiskLibConnectParams * ++VixDiskLib_AllocateConnectParams (void) ++{ ++ VixDiskLibConnectParams *ret; ++ ++ ret = calloc (1, sizeof *ret); ++ if (ret == NULL) ++ abort (); ++ return ret; ++} ++ + NBDKIT_DLL_PUBLIC void + VixDiskLib_FreeConnectParams (VixDiskLibConnectParams *params) + { +- /* never called since we don't define optional AllocateConnectParams */ +- abort (); ++ free (params); + } + + NBDKIT_DLL_PUBLIC VixError +-- +2.47.1 + diff --git a/0022-vddk-Unconditionally-test-QueryAllocatedBlocks.patch b/0022-vddk-Unconditionally-test-QueryAllocatedBlocks.patch new file mode 100644 index 0000000..9572372 --- /dev/null +++ b/0022-vddk-Unconditionally-test-QueryAllocatedBlocks.patch @@ -0,0 +1,78 @@ +From 0c3394942b857815feebe7586b2cb18e2bf113e5 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Fri, 30 May 2025 13:59:28 +0100 +Subject: [PATCH] vddk: Unconditionally test QueryAllocatedBlocks + +Except in rare cases like a suddenly dropped connection, we always +call vddk_can_extents and therefore do this test. We might as well do +it unconditionally when the worker thread starts up. + +This simplifies following commits where we may do more work based on +this flag when the worker thread starts up. + +(cherry picked from commit 787db3e8ecfd81b683f541065daee75665ab47e0) +--- + plugins/vddk/worker.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c +index 076b2bd7..6efcc6f6 100644 +--- a/plugins/vddk/worker.c ++++ b/plugins/vddk/worker.c +@@ -303,8 +303,13 @@ do_flush (struct command *cmd, struct vddk_handle *h) + return 0; + } + +-static int +-do_can_extents (struct command *cmd, struct vddk_handle *h) ++/* Try the QueryAllocatedBlocks call and if it's non-functional return ++ * false. At some point in future, perhaps when we move to baseline ++ * VDDK >= 7, we can just assume it works and remove this test ++ * entirely. ++ */ ++static bool ++test_can_extents (struct vddk_handle *h) + { + VixError err; + VixDiskLibBlockList *block_list; +@@ -314,9 +319,6 @@ do_can_extents (struct command *cmd, struct vddk_handle *h) + */ + error_suppression = 1; + +- /* Try the QueryAllocatedBlocks call and if it's non-functional +- * return false. +- */ + VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks, + "handle, 0, %d sectors, %d sectors", + VIXDISKLIB_MIN_CHUNK_SIZE, VIXDISKLIB_MIN_CHUNK_SIZE) +@@ -502,6 +504,10 @@ vddk_worker_thread (void *handle) + { + struct vddk_handle *h = handle; + bool stop = false; ++ bool can_extents; ++ ++ /* Test if QueryAllocatedBlocks will work. */ ++ can_extents = test_can_extents (h); + + while (!stop) { + struct command *cmd; +@@ -544,12 +550,13 @@ vddk_worker_thread (void *handle) + break; + + case CAN_EXTENTS: +- r = do_can_extents (cmd, h); +- if (r >= 0) +- *(int *)cmd->ptr = r; ++ *(int *)cmd->ptr = can_extents; ++ r = 0; + break; + + case EXTENTS: ++ /* If we returned false above, we should never be called here. */ ++ assert (can_extents); + r = do_extents (cmd, h); + break; + +-- +2.47.1 + diff --git a/0023-vddk-Pre-cache-the-extents-for-readonly-connections.patch b/0023-vddk-Pre-cache-the-extents-for-readonly-connections.patch new file mode 100644 index 0000000..ef46fc5 --- /dev/null +++ b/0023-vddk-Pre-cache-the-extents-for-readonly-connections.patch @@ -0,0 +1,349 @@ +From 3be0221d8cbf5cc916efb9975b4f6a2d85044991 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Fri, 30 May 2025 15:06:07 +0100 +Subject: [PATCH] vddk: Pre-cache the extents for readonly connections + +As explained in detail in the code comment, QueryAllocatedBlocks has +very poor performance. We can partially work around this by +pre-caching extents when the first NBD_BLOCK_STATUS request is made. +We only do this for readonly connections, since it would be very +complex to do it for writable connections where the extent information +could change under us. And we only do it on the first +NBD_BLOCK_STATUS request, so we know that the caller is interested in +extents. + +Benchmarking +------------ + +This improves performance, dramatically in some cases: + + Size Used% [ni] [nm] [nc] [qi] [qm] [qc] [dd] + +before 64G 16% 17 21 354 6 62 180 +after " " 18 13 59 6 7 66 57 + nc=178 MBytes/s dd=1150 MBytes/sec + +before 128G 5.5% 17 29 646 6 50 151 +after " " 17 14 68 6 8 71 + nc=106 MBytes/s + +before 128G 45% 17 30 1073 6 52 578 +after " " 17 14 457 6 8 506 143 + nc=128 MBytes/s dd=409 MBytes/sec + + (all times in seconds) + +[ni] nbdinfo $uri + Note: Makes two connections, unlike qemu-img info. + +[nm] nbdinfo --map --totals $uri + Note: Slower than it ought to be, needs investigation. + +[nc] nbdcopy -p $uri null: + +[qi] qemu-img info $uri + +[qm] qemu-img map $uri + +[qc] qemu-img convert -p -n -f raw $uri \ + -O raw 'json:{"file.driver":"null-co","file.size":"1E"}' + Note: Requests the map up front, which is why it performs better + than nbdcopy on the "before" version since reads are not being + serialized by concurrent calls to QueryAllocatedBlocks. + +[dd] dd if=*-flat.vmdk of=/dev/null bs=16777216 + Note: This command was run on the ESXi server where the storage + is assumed to be local. ESXi has very limited tools available + (eg. no "fio" etc). Also the cp command is from Busybox and is + notably slower than dd. To get the accurate copying rate I + assumed that this command copies all data on disk to /dev/null, + skipping reading holes where thin provisioned. IOW using the "ls + -s" output as the number of blocks read. + +It should be noted that the after/[nc] numbers are not very stable. +In the last test where [nc] = 457, I see a deviation of as much as 10% +either side over multiple runs. + +The network used in the test is 25 Gbps and clearly we are nowhere +near able to reach that. A more likely upper limit is the speed of +reading from the disks ([dd]). There is also a large gap between our +performance and that number. VMware is thought to impose a +per-connection limit of around 1 Gbps on NFC connections, and there +are other limitations +(https://knowledge.broadcom.com/external/article/307001/nfc-performance-is-slow-resulting-in-mig.html). + +Tuning NFC makes no observable difference +----------------------------------------- + +Further tuning of NFC is possible +(https://techdocs.broadcom.com/us/en/vmware-cis/vsphere/vsphere-supervisor/7-0/best-practices-for-nbd-transport.html). + +Using compression (nbdkit vddk plugin 'compression' option) is +possible but in my test it makes things much slower. This is using +the first VM from the tests above: + + [nc] + + (no compression) 59 (178 MBytes/sec) + compression=zlib 323 ( 33 MBytes/sec) + +VMware documentation also suggests using a configuration file +containing the entries below (the configuration file is placed +somewhere on the client, and referenced using the +config=/path/to/config.ini parameter): + + vixDiskLib.nfcAio.Session.BufSizeIn64KB=32 + vixDiskLib.nfcAio.Session.BufCount=16 + +This made no difference for me, at least when testing a single +conversion. Separate tests done by the MTV team suggest it may +improve performance if you are converting multiple disks / VMs in +parallel +(https://docs.redhat.com/en/documentation/migration_toolkit_for_virtualization/2.7/html/installing_and_using_the_migration_toolkit_for_virtualization/mtv-performance-recommendation_mtv#mtv-aio-buffer-key-findings_mtv) + +Some VMware documentation also suggests: + + config.nfc.useSSL=false + +but this also made no difference. + +Some VMware documentation suggests using unbuffered I/O +(unbuffered=true) but in my test this caused a large slow down. + +Continue to disable multi-conn +------------------------------ + +We have recommended against using multi-conn with the VDDK plugin, +because we observed some slow down. This commit makes no difference +to this advice. The same amount of slow down is still observed. (In +virt-v2v we use --filter=multi-conn multi-conn-mode=disable to ensure +it is never used.) + +(cherry picked from commit 5a882e74cae3dbaa09bf3b942a02f9947b12f6e5) +--- + plugins/vddk/vddk.c | 2 + + plugins/vddk/vddk.h | 3 + + plugins/vddk/worker.c | 166 +++++++++++++++++++++++++++++++++++++++++- + 3 files changed, 170 insertions(+), 1 deletion(-) + +diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c +index 60f75b51..3c63014e 100644 +--- a/plugins/vddk/vddk.c ++++ b/plugins/vddk/vddk.c +@@ -843,6 +843,8 @@ vddk_close (void *handle) + send_command_and_wait (h, &stop_cmd); + pthread_join (h->thread, NULL); + ++ nbdkit_extents_free (h->extents); ++ + VDDK_CALL_START (VixDiskLib_Close, "handle") + VixDiskLib_Close (h->handle); + VDDK_CALL_END (VixDiskLib_Close, 0); +diff --git a/plugins/vddk/vddk.h b/plugins/vddk/vddk.h +index 3586c5da..461fb528 100644 +--- a/plugins/vddk/vddk.h ++++ b/plugins/vddk/vddk.h +@@ -171,6 +171,9 @@ struct vddk_handle { + + /* Cached disk size in bytes (set in get_size()). */ + uint64_t size; ++ ++ /* Cached extents for readonly disks. */ ++ struct nbdkit_extents *extents; + }; + + /* reexec.c */ +diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c +index 6efcc6f6..3925fb91 100644 +--- a/plugins/vddk/worker.c ++++ b/plugins/vddk/worker.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + + #include + +@@ -380,7 +381,7 @@ add_extent (struct nbdkit_extents *extents, + } + + static int +-do_extents (struct command *cmd, struct vddk_handle *h) ++get_extents_slow (struct command *cmd, struct vddk_handle *h) + { + const uint32_t count = cmd->count; + const uint64_t offset = cmd->offset; +@@ -496,6 +497,169 @@ do_extents (struct command *cmd, struct vddk_handle *h) + return 0; + } + ++static int ++pre_cache_extents (struct vddk_handle *h) ++{ ++ struct nbdkit_extents *extents; ++ uint64_t start_sector = 0; ++ uint64_t nr_chunks_remaining = ++ h->size / VIXDISKLIB_MIN_CHUNK_SIZE / VIXDISKLIB_SECTOR_SIZE; ++ uint64_t position = 0; ++ ++ extents = nbdkit_extents_new (0, h->size); ++ if (extents == NULL) ++ return -1; ++ ++ /* Scan through the disk reading whole "chunks" (32 GB), the most ++ * efficient way to use QueryAllocatedBlocks. ++ */ ++ while (nr_chunks_remaining > 0) { ++ VixError err; ++ uint32_t i; ++ uint64_t nr_chunks, nr_sectors; ++ VixDiskLibBlockList *block_list; ++ ++ assert (IS_ALIGNED (start_sector, VIXDISKLIB_MIN_CHUNK_SIZE)); ++ ++ nr_chunks = MIN (nr_chunks_remaining, VIXDISKLIB_MAX_CHUNK_NUMBER); ++ nr_sectors = nr_chunks * VIXDISKLIB_MIN_CHUNK_SIZE; ++ ++ VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks, ++ "handle, %" PRIu64 " sectors, %" PRIu64 " sectors, " ++ "%d sectors", ++ start_sector, nr_sectors, VIXDISKLIB_MIN_CHUNK_SIZE) ++ err = VixDiskLib_QueryAllocatedBlocks (h->handle, ++ start_sector, nr_sectors, ++ VIXDISKLIB_MIN_CHUNK_SIZE, ++ &block_list); ++ VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0); ++ if (err != VIX_OK) { ++ VDDK_ERROR (err, "VixDiskLib_QueryAllocatedBlocks"); ++ nbdkit_extents_free (extents); ++ return -1; ++ } ++ ++ for (i = 0; i < block_list->numBlocks; ++i) { ++ uint64_t blk_offset, blk_length; ++ ++ blk_offset = block_list->blocks[i].offset * VIXDISKLIB_SECTOR_SIZE; ++ blk_length = block_list->blocks[i].length * VIXDISKLIB_SECTOR_SIZE; ++ ++ /* The query returns allocated blocks. We must insert holes ++ * between the blocks as necessary. ++ */ ++ if ((position < blk_offset && ++ add_extent (extents, &position, blk_offset, true) == -1) || ++ (add_extent (extents, ++ &position, blk_offset + blk_length, false) == -1)) { ++ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list") ++ VixDiskLib_FreeBlockList (block_list); ++ VDDK_CALL_END (VixDiskLib_FreeBlockList, 0); ++ nbdkit_extents_free (extents); ++ return -1; ++ } ++ } ++ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list") ++ VixDiskLib_FreeBlockList (block_list); ++ VDDK_CALL_END (VixDiskLib_FreeBlockList, 0); ++ ++ /* There's an implicit hole after the returned list of blocks, ++ * up to the end of the QueryAllocatedBlocks request. ++ */ ++ if (add_extent (extents, ++ &position, ++ (start_sector + nr_sectors) * VIXDISKLIB_SECTOR_SIZE, ++ true) == -1) { ++ nbdkit_extents_free (extents); ++ return -1; ++ } ++ ++ start_sector += nr_sectors; ++ nr_chunks_remaining -= nr_chunks; ++ } ++ ++ /* Add the allocated unaligned bit at the end. */ ++ if (position < h->size) { ++ if (add_extent (extents, &position, h->size, false) == -1) { ++ nbdkit_extents_free (extents); ++ return -1; ++ } ++ } ++ ++ /* Save the pre-cached extents in the handle. */ ++ h->extents = extents; ++ return 0; ++} ++ ++static int ++get_extents_from_cache (struct command *cmd, struct vddk_handle *h) ++{ ++ struct nbdkit_extents *rextents = cmd->ptr; ++ struct nbdkit_extent e; ++ size_t i; ++ ++ /* We can just copy from the pre-cached extents in the handle which ++ * cover the entire disk, into the returned extents, because ++ * nbdkit_add_extent does the right thing. ++ */ ++ for (i = 0; i < nbdkit_extents_count (h->extents); ++i) { ++ e = nbdkit_get_extent (h->extents, i); ++ if (nbdkit_add_extent (rextents, e.offset, e.length, e.type) == -1) ++ return -1; ++ } ++ ++ return 0; ++} ++ ++/* Handle extents. ++ * ++ * Oh QueryAllocatedBlocks, how much I hate you. The API has two ++ * enormous problems: (a) It's slow, taking about 1 second per ++ * invocation regardless of how much or little data you request. (b) ++ * It serialises all other requests to the disk, like concurrent ++ * reads. ++ * ++ * NBD / nbdkit doesn't help much either by having a 4GB - 1 byte ++ * limit on the size of extent requests. This means that for each 4GB ++ * of disk, we will need to run QueryAllocatedBlocks twice. For a 1TB ++ * virtual disk, about 500 seconds would be used directly in the API ++ * calls, and much more time is lost because of serialization. ++ * ++ * To work around these problems, in the readonly case (used by ++ * virt-v2v), when the first NBD_BLOCK_STATUS request is received, we ++ * will read over the whole disk and cache the extents. We will read ++ * in 32 GB chunks (the maximum possible for the underlying ++ * QueryAllocatedBlocks API). For a 1TB disk this will take ~ 30 ++ * seconds, but avoids all the overheads above. The cached extents ++ * are stored in the handle, and subsequent NBD_BLOCK_STATUS will use ++ * the cache only. ++ * ++ * For writable disks we can't easily do any caching so don't attempt ++ * it. ++ */ ++static int ++do_extents (struct command *cmd, struct vddk_handle *h) ++{ ++ if (h->readonly && !h->extents) { ++ time_t start_t, end_t; ++ ++ time (&start_t); ++ nbdkit_debug ("vddk: pre-caching extents"); ++ ++ if (pre_cache_extents (h) == -1) ++ return -1; ++ ++ time (&end_t); ++ nbdkit_debug ("vddk: finished pre-caching extents in %d second(s)", ++ (int) (end_t - start_t)); ++ } ++ ++ if (h->extents) ++ return get_extents_from_cache (cmd, h); ++ else ++ return get_extents_slow (cmd, h); ++} ++ + /* Background worker thread, one per connection, which is where the + * VDDK commands are issued. + */ +-- +2.47.1 + diff --git a/nbdkit.spec b/nbdkit.spec index 671fe5a..c81c1e4 100644 --- a/nbdkit.spec +++ b/nbdkit.spec @@ -55,7 +55,7 @@ Name: nbdkit Version: 1.42.2 -Release: 4%{?dist} +Release: 5%{?dist} Summary: NBD server License: BSD-3-Clause @@ -95,6 +95,14 @@ Patch0012: 0012-file-Fix-do_fallocate-debugging-on-Alpine.patch Patch0013: 0013-file-Rename-h-can_zeroout-to-h-can_blkzeroout-to-ref.patch Patch0014: 0014-file-zero-Document-implicit-order-that-we-will-try-z.patch Patch0015: 0015-file-zero-Use-BLKDISCARD-method-if-may_trim-is-set.patch +Patch0016: 0016-vddk-Debug-length-of-extents-when-using-D-vddk.exten.patch +Patch0017: 0017-cacheextents-Mark-this-filter-as-deprecated.patch +Patch0018: 0018-include-Move-some-extent-functions-to-nbdkit-common..patch +Patch0019: 0019-vddk-Display-command-type-in-command-completed-messa.patch +Patch0020: 0020-vddk-Cache-the-readonly-flag-from-the-.open-call-in-.patch +Patch0021: 0021-vddk-Move-minimum-version-of-VDDK-to-6.7.patch +Patch0022: 0022-vddk-Unconditionally-test-QueryAllocatedBlocks.patch +Patch0023: 0023-vddk-Pre-cache-the-extents-for-readonly-connections.patch # For automatic RPM Provides generation. # See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html @@ -1537,7 +1545,7 @@ fi %changelog -* Thu May 01 2025 Richard W.M. Jones - 1.42.2-4 +* Mon Jun 02 2025 Richard W.M. Jones - 1.42.2-5 - Rebase to nbdkit 1.42.2 - Synch the spec file with Fedora Rawhide. - nbdkit-ondemand-plugin moves into a new subpackage. @@ -1547,6 +1555,8 @@ fi resolves: RHEL-85515 - Allow nbdkit-file-plugin to zero and trim block devices resolves: RHEL-89371 +- vddk: Pre-cache the extents for readonly connections + resolves: RHEL-94825 * Mon Jan 06 2025 Richard W.M. Jones - 1.40.4-3 - vddk: Avoid reading partial chunk beyond the end of the disk