vddk: Pre-cache the extents for readonly connections

resolves: RHEL-94825
This commit is contained in:
Richard W.M. Jones 2025-06-02 15:44:38 +01:00
parent e585cbdf30
commit 8430edda07
9 changed files with 904 additions and 2 deletions

View File

@ -0,0 +1,30 @@
From 27736bb9e22f6c86b1a19fc02668dd04f6fc8bd0 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Thu, 29 May 2025 09:15:33 +0000
Subject: [PATCH] vddk: Debug length of extents when using -D vddk.extents=1
(cherry picked from commit a53746d326e08fae9ec1ea782df740abb48d0114)
---
plugins/vddk/worker.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 8a91250a..c61c4d16 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -375,9 +375,10 @@ add_extent (struct nbdkit_extents *extents,
return 0;
if (vddk_debug_extents)
- nbdkit_debug ("adding extent type %s at [%" PRIu64 "...%" PRIu64 "]",
+ nbdkit_debug ("adding extent type %s at [%" PRIu64 "...%" PRIu64 "] "
+ "(length %" PRIu64 ")",
is_hole ? "hole" : "allocated data",
- *position, next_position-1);
+ *position, next_position-1, length);
if (nbdkit_add_extent (extents, *position, length, type) == -1)
return -1;
--
2.47.1

View File

@ -0,0 +1,38 @@
From 85499cfd9c282d986ee8964d0404ccdf25733a6a Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sat, 31 May 2025 08:12:32 +0100
Subject: [PATCH] cacheextents: Mark this filter as deprecated
We will remove it in nbdkit 1.46.
Its usage was removed from virt-v2v because we found that it did not
do anything:
https://github.com/libguestfs/virt-v2v/commit/48c4ce8e6cf6f1c390a48245ef0f99233f80cfe8
(cherry picked from commit 9717c47999fb2f56c3569cf1cdd4d0c160f866c1)
---
filters/cacheextents/nbdkit-cacheextents-filter.pod | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/filters/cacheextents/nbdkit-cacheextents-filter.pod b/filters/cacheextents/nbdkit-cacheextents-filter.pod
index c3d89465..0693ca80 100644
--- a/filters/cacheextents/nbdkit-cacheextents-filter.pod
+++ b/filters/cacheextents/nbdkit-cacheextents-filter.pod
@@ -6,6 +6,14 @@ nbdkit-cacheextents-filter - cache extents
nbdkit --filter=cacheextents plugin
+=head1 DEPRECATED
+
+B<The cacheextents filter is deprecated in S<nbdkit E<ge> 1.43.10> and
+will be removed in S<nbdkit 1.46>>. There is no direct replacement,
+but as the filter only worked for a narrow and unusual range of access
+patterns it is likely that it has no effect and you can just stop
+using it.
+
=head1 DESCRIPTION
C<nbdkit-cacheextents-filter> is a filter that caches the result of last
--
2.47.1

View File

@ -0,0 +1,77 @@
From 2282c37faa556339ff6ce0557866c171f2e9ee29 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Sat, 31 May 2025 08:04:41 +0100
Subject: [PATCH] include: Move some extent functions to nbdkit-common.h
No existing plugins use the extent functions nbdkit_extents_new,
nbdkit_extents_free, etc, since they are mainly useful for filters so
they can build and manipulate new lists of extents. Nevertheless
there is nothing that prevents them from being used in plugins, so
move those functions to the common header (so they appear for users of
<nbdkit-plugin.h>)
There are a couple of helper functions which are really
filter-specific so leave those in nbdkit-filter.h.
(cherry picked from commit 03792d04f270f2cffc589dd9703c9de9c3d5a65e)
---
include/nbdkit-common.h | 15 +++++++++++++++
include/nbdkit-filter.h | 15 +--------------
2 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/include/nbdkit-common.h b/include/nbdkit-common.h
index 43d674aa..bb5e3e55 100644
--- a/include/nbdkit-common.h
+++ b/include/nbdkit-common.h
@@ -157,7 +157,22 @@ NBDKIT_EXTERN_DECL (const char *, nbdkit_vprintf_intern,
(const char *msg, va_list args)
ATTRIBUTE_FORMAT_PRINTF (1, 0));
+/* Extent functions. */
+struct nbdkit_extent {
+ uint64_t offset;
+ uint64_t length;
+ uint32_t type;
+};
+
struct nbdkit_extents;
+
+NBDKIT_EXTERN_DECL (struct nbdkit_extents *, nbdkit_extents_new,
+ (uint64_t start, uint64_t end));
+NBDKIT_EXTERN_DECL (void, nbdkit_extents_free, (struct nbdkit_extents *));
+NBDKIT_EXTERN_DECL (size_t, nbdkit_extents_count,
+ (const struct nbdkit_extents *));
+NBDKIT_EXTERN_DECL (struct nbdkit_extent, nbdkit_get_extent,
+ (const struct nbdkit_extents *, size_t));
NBDKIT_EXTERN_DECL (int, nbdkit_add_extent,
(struct nbdkit_extents *,
uint64_t offset, uint64_t length, uint32_t type));
diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h
index a4ac09d4..31520bf5 100644
--- a/include/nbdkit-filter.h
+++ b/include/nbdkit-filter.h
@@ -121,20 +121,7 @@ struct nbdkit_next_ops {
*/
};
-/* Extent functions. */
-struct nbdkit_extent {
- uint64_t offset;
- uint64_t length;
- uint32_t type;
-};
-
-NBDKIT_EXTERN_DECL (struct nbdkit_extents *, nbdkit_extents_new,
- (uint64_t start, uint64_t end));
-NBDKIT_EXTERN_DECL (void, nbdkit_extents_free, (struct nbdkit_extents *));
-NBDKIT_EXTERN_DECL (size_t, nbdkit_extents_count,
- (const struct nbdkit_extents *));
-NBDKIT_EXTERN_DECL (struct nbdkit_extent, nbdkit_get_extent,
- (const struct nbdkit_extents *, size_t));
+/* Extent helpers. */
NBDKIT_EXTERN_DECL (struct nbdkit_extents *, nbdkit_extents_full,
(nbdkit_next *next,
uint32_t count, uint64_t offset,
--
2.47.1

View File

@ -0,0 +1,29 @@
From ae0556bd62bd7578986026321866e122bd228689 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 May 2025 13:29:28 +0100
Subject: [PATCH] vddk: Display command type in command completed message
Useful extra debugging.
(cherry picked from commit 81d4d74fecf3c071e144a8ba016f43ba1de1b093)
---
plugins/vddk/worker.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index c61c4d16..3bf1d5c2 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -116,7 +116,8 @@ complete_command (void *vp, VixError result)
struct command *cmd = vp;
if (vddk_debug_datapath)
- nbdkit_debug ("command %" PRIu64 " completed", cmd->id);
+ nbdkit_debug ("command %" PRIu64 " (%s) completed",
+ cmd->id, command_type_string (cmd->type));
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex);
--
2.47.1

View File

@ -0,0 +1,41 @@
From d01437869db201f45c1a7ce1bb1e5f9142c9081c Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 May 2025 13:32:00 +0100
Subject: [PATCH] vddk: Cache the readonly flag from the .open call in the
handle
(cherry picked from commit 0d953ea644f44259edb19c97e3c7863794c0ca1c)
---
plugins/vddk/vddk.c | 1 +
plugins/vddk/vddk.h | 3 +++
2 files changed, 4 insertions(+)
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 1de356d8..342a913b 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -713,6 +713,7 @@ vddk_open (int readonly)
nbdkit_error ("calloc: %m");
return NULL;
}
+ h->readonly = readonly;
h->commands = (command_queue) empty_vector;
pthread_mutex_init (&h->commands_lock, NULL);
pthread_cond_init (&h->commands_cond, NULL);
diff --git a/plugins/vddk/vddk.h b/plugins/vddk/vddk.h
index 1d1069cc..3586c5da 100644
--- a/plugins/vddk/vddk.h
+++ b/plugins/vddk/vddk.h
@@ -166,6 +166,9 @@ struct vddk_handle {
pthread_cond_t commands_cond; /* condition (queue size 0 -> 1) */
uint64_t id; /* next command ID */
+ /* Cached readonly flag from the open call. */
+ int readonly;
+
/* Cached disk size in bytes (set in get_size()). */
uint64_t size;
};
--
2.47.1

View File

@ -0,0 +1,250 @@
From 6df932e5a77250d9946b91a02a143bdd28ee2f77 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 May 2025 13:49:10 +0100
Subject: [PATCH] vddk: Move minimum version of VDDK to 6.7
We can now assume that QueryAllocatedBlocks exists, as well as a
couple of other functions, simplifying the code.
This was released in 2018 (7 years ago) so there's been plenty of time
to upgrade.
(cherry picked from commit 7c90116664b1b1a3c3756d6a79d6d483bc6062dd)
---
plugins/vddk/nbdkit-vddk-plugin.pod | 8 ++---
plugins/vddk/vddk-stubs.h | 32 +++++++++++---------
plugins/vddk/vddk.c | 43 ++++++---------------------
plugins/vddk/worker.c | 14 ++-------
tests/dummy-vddk.c | 45 +++++++++++++++++++++++++++--
5 files changed, 76 insertions(+), 66 deletions(-)
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
index da3e9f9a..279da2d0 100644
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
@@ -397,15 +397,13 @@ is I<not> recommended.
=over 4
-=item VDDK E<ge> 6.5 (released Nov 2016)
+=item VDDK E<ge> 6.7 (released Apr 2018)
This is the minimum version of VDDK supported. Older versions will
not work.
-=item VDDK 6.7 (released Apr 2018)
-
-This is the first version that supported the
-C<VixDiskLib_QueryAllocatedBlocks> API. This is required to provide
+This is also the first version that supported the
+C<VixDiskLib_QueryAllocatedBlocks> API. This is used to provide
sparseness (extent) information over NBD.
=item VDDK 8.0.3 (released Jun 2024)
diff --git a/plugins/vddk/vddk-stubs.h b/plugins/vddk/vddk-stubs.h
index 8e5afdb8..d768c51a 100644
--- a/plugins/vddk/vddk-stubs.h
+++ b/plugins/vddk/vddk-stubs.h
@@ -132,16 +132,22 @@ STUB (VixDiskLib_Wait,
VixError,
(VixDiskLibHandle handle));
-/* Added in VDDK 6.7, these will be NULL for earlier versions: */
-OPTIONAL_STUB (VixDiskLib_QueryAllocatedBlocks,
- VixError,
- (VixDiskLibHandle diskHandle,
- uint64_t start_sector, uint64_t nr_sectors,
- uint64_t chunk_size,
- VixDiskLibBlockList **block_list));
-OPTIONAL_STUB (VixDiskLib_FreeBlockList,
- VixError,
- (VixDiskLibBlockList *block_list));
-OPTIONAL_STUB (VixDiskLib_AllocateConnectParams,
- VixDiskLibConnectParams *,
- (void));
+/* Added in VDDK 6.7. */
+STUB (VixDiskLib_QueryAllocatedBlocks,
+ VixError,
+ (VixDiskLibHandle diskHandle,
+ uint64_t start_sector, uint64_t nr_sectors,
+ uint64_t chunk_size,
+ VixDiskLibBlockList **block_list));
+STUB (VixDiskLib_FreeBlockList,
+ VixError,
+ (VixDiskLibBlockList *block_list));
+STUB (VixDiskLib_AllocateConnectParams,
+ VixDiskLibConnectParams *,
+ (void));
+
+/* OPTIONAL_STUB can be used to add new APIs which are not supported
+ * by older versions of VDDK, and to make them NULL if not present.
+ * However VDDK >= 6.7 has everything we need for now so we are no
+ * longer using this macro.
+ */
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 342a913b..60f75b51 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -666,37 +666,6 @@ vddk_dump_plugin (void)
/* Lock protecting open/close calls - see above. */
static pthread_mutex_t open_close_lock = PTHREAD_MUTEX_INITIALIZER;
-static inline VixDiskLibConnectParams *
-allocate_connect_params (void)
-{
- VixDiskLibConnectParams *ret;
-
- if (VixDiskLib_AllocateConnectParams != NULL) {
- VDDK_CALL_START (VixDiskLib_AllocateConnectParams, "")
- ret = VixDiskLib_AllocateConnectParams ();
- VDDK_CALL_END (VixDiskLib_AllocateConnectParams, 0);
- }
- else
- ret = calloc (1, sizeof (VixDiskLibConnectParams));
-
- return ret;
-}
-
-static inline void
-free_connect_params (VixDiskLibConnectParams *params)
-{
- /* Only use FreeConnectParams if AllocateConnectParams was
- * originally called. Otherwise use free.
- */
- if (VixDiskLib_AllocateConnectParams != NULL) {
- VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params")
- VixDiskLib_FreeConnectParams (params);
- VDDK_CALL_END (VixDiskLib_FreeConnectParams, 0);
- }
- else
- free (params);
-}
-
/* Create the per-connection handle. */
static void *
vddk_open (int readonly)
@@ -718,7 +687,9 @@ vddk_open (int readonly)
pthread_mutex_init (&h->commands_lock, NULL);
pthread_cond_init (&h->commands_cond, NULL);
- h->params = allocate_connect_params ();
+ VDDK_CALL_START (VixDiskLib_AllocateConnectParams, "")
+ h->params = VixDiskLib_AllocateConnectParams ();
+ VDDK_CALL_END (VixDiskLib_AllocateConnectParams, 0);
if (h->params == NULL) {
nbdkit_error ("allocate VixDiskLibConnectParams: %m");
goto err0;
@@ -851,7 +822,9 @@ vddk_open (int readonly)
VixDiskLib_Disconnect (h->connection);
VDDK_CALL_END (VixDiskLib_Disconnect, 0);
err1:
- free_connect_params (h->params);
+ VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params")
+ VixDiskLib_FreeConnectParams (h->params);
+ VDDK_CALL_END (VixDiskLib_FreeConnectParams, 0);
err0:
pthread_mutex_destroy (&h->commands_lock);
pthread_cond_destroy (&h->commands_cond);
@@ -877,7 +850,9 @@ vddk_close (void *handle)
VixDiskLib_Disconnect (h->connection);
VDDK_CALL_END (VixDiskLib_Disconnect, 0);
- free_connect_params (h->params);
+ VDDK_CALL_START (VixDiskLib_FreeConnectParams, "params")
+ VixDiskLib_FreeConnectParams (h->params);
+ VDDK_CALL_END (VixDiskLib_FreeConnectParams, 0);
pthread_mutex_destroy (&h->commands_lock);
pthread_cond_destroy (&h->commands_cond);
command_queue_reset (&h->commands);
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 3bf1d5c2..076b2bd7 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -309,23 +309,13 @@ do_can_extents (struct command *cmd, struct vddk_handle *h)
VixError err;
VixDiskLibBlockList *block_list;
- /* This call was added in VDDK 6.7. In earlier versions the
- * function pointer will be NULL and we cannot query extents.
- */
- if (VixDiskLib_QueryAllocatedBlocks == NULL) {
- nbdkit_debug ("can_extents: VixDiskLib_QueryAllocatedBlocks == NULL, "
- "probably this is VDDK < 6.7");
- return 0;
- }
-
/* Suppress errors around this call. See:
* https://bugzilla.redhat.com/show_bug.cgi?id=1709211#c7
*/
error_suppression = 1;
- /* However even when the call is available it rarely works well so
- * the best thing we can do here is to try the call and if it's
- * non-functional return false.
+ /* Try the QueryAllocatedBlocks call and if it's non-functional
+ * return false.
*/
VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
"handle, 0, %d sectors, %d sectors",
diff --git a/tests/dummy-vddk.c b/tests/dummy-vddk.c
index 0e69012a..01284f38 100644
--- a/tests/dummy-vddk.c
+++ b/tests/dummy-vddk.c
@@ -116,11 +116,52 @@ VixDiskLib_FreeErrorText (char *text)
free (text);
}
+NBDKIT_DLL_PUBLIC VixError
+VixDiskLib_QueryAllocatedBlocks (VixDiskLibHandle diskHandle,
+ uint64_t start_sector, uint64_t nr_sectors,
+ uint64_t chunk_size,
+ VixDiskLibBlockList **block_list)
+{
+ VixDiskLibBlockList *ret;
+
+ /* This is safe because ret->blocks is a 1-sized array and we only
+ * use 1 entry here.
+ */
+ ret = calloc (1, sizeof *ret);
+ if (ret == NULL)
+ abort ();
+
+ /* Pretend it's all allocated. */
+ ret->numBlocks = 1;
+ ret->blocks[0].offset = start_sector;
+ ret->blocks[0].length = nr_sectors;
+
+ *block_list = ret;
+ return VIX_OK;
+}
+
+NBDKIT_DLL_PUBLIC VixError
+VixDiskLib_FreeBlockList (VixDiskLibBlockList *block_list)
+{
+ free (block_list);
+ return VIX_OK;
+}
+
+NBDKIT_DLL_PUBLIC VixDiskLibConnectParams *
+VixDiskLib_AllocateConnectParams (void)
+{
+ VixDiskLibConnectParams *ret;
+
+ ret = calloc (1, sizeof *ret);
+ if (ret == NULL)
+ abort ();
+ return ret;
+}
+
NBDKIT_DLL_PUBLIC void
VixDiskLib_FreeConnectParams (VixDiskLibConnectParams *params)
{
- /* never called since we don't define optional AllocateConnectParams */
- abort ();
+ free (params);
}
NBDKIT_DLL_PUBLIC VixError
--
2.47.1

View File

@ -0,0 +1,78 @@
From 0c3394942b857815feebe7586b2cb18e2bf113e5 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 May 2025 13:59:28 +0100
Subject: [PATCH] vddk: Unconditionally test QueryAllocatedBlocks
Except in rare cases like a suddenly dropped connection, we always
call vddk_can_extents and therefore do this test. We might as well do
it unconditionally when the worker thread starts up.
This simplifies following commits where we may do more work based on
this flag when the worker thread starts up.
(cherry picked from commit 787db3e8ecfd81b683f541065daee75665ab47e0)
---
plugins/vddk/worker.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 076b2bd7..6efcc6f6 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -303,8 +303,13 @@ do_flush (struct command *cmd, struct vddk_handle *h)
return 0;
}
-static int
-do_can_extents (struct command *cmd, struct vddk_handle *h)
+/* Try the QueryAllocatedBlocks call and if it's non-functional return
+ * false. At some point in future, perhaps when we move to baseline
+ * VDDK >= 7, we can just assume it works and remove this test
+ * entirely.
+ */
+static bool
+test_can_extents (struct vddk_handle *h)
{
VixError err;
VixDiskLibBlockList *block_list;
@@ -314,9 +319,6 @@ do_can_extents (struct command *cmd, struct vddk_handle *h)
*/
error_suppression = 1;
- /* Try the QueryAllocatedBlocks call and if it's non-functional
- * return false.
- */
VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
"handle, 0, %d sectors, %d sectors",
VIXDISKLIB_MIN_CHUNK_SIZE, VIXDISKLIB_MIN_CHUNK_SIZE)
@@ -502,6 +504,10 @@ vddk_worker_thread (void *handle)
{
struct vddk_handle *h = handle;
bool stop = false;
+ bool can_extents;
+
+ /* Test if QueryAllocatedBlocks will work. */
+ can_extents = test_can_extents (h);
while (!stop) {
struct command *cmd;
@@ -544,12 +550,13 @@ vddk_worker_thread (void *handle)
break;
case CAN_EXTENTS:
- r = do_can_extents (cmd, h);
- if (r >= 0)
- *(int *)cmd->ptr = r;
+ *(int *)cmd->ptr = can_extents;
+ r = 0;
break;
case EXTENTS:
+ /* If we returned false above, we should never be called here. */
+ assert (can_extents);
r = do_extents (cmd, h);
break;
--
2.47.1

View File

@ -0,0 +1,349 @@
From 3be0221d8cbf5cc916efb9975b4f6a2d85044991 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Fri, 30 May 2025 15:06:07 +0100
Subject: [PATCH] vddk: Pre-cache the extents for readonly connections
As explained in detail in the code comment, QueryAllocatedBlocks has
very poor performance. We can partially work around this by
pre-caching extents when the first NBD_BLOCK_STATUS request is made.
We only do this for readonly connections, since it would be very
complex to do it for writable connections where the extent information
could change under us. And we only do it on the first
NBD_BLOCK_STATUS request, so we know that the caller is interested in
extents.
Benchmarking
------------
This improves performance, dramatically in some cases:
Size Used% [ni] [nm] [nc] [qi] [qm] [qc] [dd]
before 64G 16% 17 21 354 6 62 180
after " " 18 13 59 6 7 66 57
nc=178 MBytes/s dd=1150 MBytes/sec
before 128G 5.5% 17 29 646 6 50 151
after " " 17 14 68 6 8 71
nc=106 MBytes/s
before 128G 45% 17 30 1073 6 52 578
after " " 17 14 457 6 8 506 143
nc=128 MBytes/s dd=409 MBytes/sec
(all times in seconds)
[ni] nbdinfo $uri
Note: Makes two connections, unlike qemu-img info.
[nm] nbdinfo --map --totals $uri
Note: Slower than it ought to be, needs investigation.
[nc] nbdcopy -p $uri null:
[qi] qemu-img info $uri
[qm] qemu-img map $uri
[qc] qemu-img convert -p -n -f raw $uri \
-O raw 'json:{"file.driver":"null-co","file.size":"1E"}'
Note: Requests the map up front, which is why it performs better
than nbdcopy on the "before" version since reads are not being
serialized by concurrent calls to QueryAllocatedBlocks.
[dd] dd if=*-flat.vmdk of=/dev/null bs=16777216
Note: This command was run on the ESXi server where the storage
is assumed to be local. ESXi has very limited tools available
(eg. no "fio" etc). Also the cp command is from Busybox and is
notably slower than dd. To get the accurate copying rate I
assumed that this command copies all data on disk to /dev/null,
skipping reading holes where thin provisioned. IOW using the "ls
-s" output as the number of blocks read.
It should be noted that the after/[nc] numbers are not very stable.
In the last test where [nc] = 457, I see a deviation of as much as 10%
either side over multiple runs.
The network used in the test is 25 Gbps and clearly we are nowhere
near able to reach that. A more likely upper limit is the speed of
reading from the disks ([dd]). There is also a large gap between our
performance and that number. VMware is thought to impose a
per-connection limit of around 1 Gbps on NFC connections, and there
are other limitations
(https://knowledge.broadcom.com/external/article/307001/nfc-performance-is-slow-resulting-in-mig.html).
Tuning NFC makes no observable difference
-----------------------------------------
Further tuning of NFC is possible
(https://techdocs.broadcom.com/us/en/vmware-cis/vsphere/vsphere-supervisor/7-0/best-practices-for-nbd-transport.html).
Using compression (nbdkit vddk plugin 'compression' option) is
possible but in my test it makes things much slower. This is using
the first VM from the tests above:
[nc]
(no compression) 59 (178 MBytes/sec)
compression=zlib 323 ( 33 MBytes/sec)
VMware documentation also suggests using a configuration file
containing the entries below (the configuration file is placed
somewhere on the client, and referenced using the
config=/path/to/config.ini parameter):
vixDiskLib.nfcAio.Session.BufSizeIn64KB=32
vixDiskLib.nfcAio.Session.BufCount=16
This made no difference for me, at least when testing a single
conversion. Separate tests done by the MTV team suggest it may
improve performance if you are converting multiple disks / VMs in
parallel
(https://docs.redhat.com/en/documentation/migration_toolkit_for_virtualization/2.7/html/installing_and_using_the_migration_toolkit_for_virtualization/mtv-performance-recommendation_mtv#mtv-aio-buffer-key-findings_mtv)
Some VMware documentation also suggests:
config.nfc.useSSL=false
but this also made no difference.
Some VMware documentation suggests using unbuffered I/O
(unbuffered=true) but in my test this caused a large slow down.
Continue to disable multi-conn
------------------------------
We have recommended against using multi-conn with the VDDK plugin,
because we observed some slow down. This commit makes no difference
to this advice. The same amount of slow down is still observed. (In
virt-v2v we use --filter=multi-conn multi-conn-mode=disable to ensure
it is never used.)
(cherry picked from commit 5a882e74cae3dbaa09bf3b942a02f9947b12f6e5)
---
plugins/vddk/vddk.c | 2 +
plugins/vddk/vddk.h | 3 +
plugins/vddk/worker.c | 166 +++++++++++++++++++++++++++++++++++++++++-
3 files changed, 170 insertions(+), 1 deletion(-)
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
index 60f75b51..3c63014e 100644
--- a/plugins/vddk/vddk.c
+++ b/plugins/vddk/vddk.c
@@ -843,6 +843,8 @@ vddk_close (void *handle)
send_command_and_wait (h, &stop_cmd);
pthread_join (h->thread, NULL);
+ nbdkit_extents_free (h->extents);
+
VDDK_CALL_START (VixDiskLib_Close, "handle")
VixDiskLib_Close (h->handle);
VDDK_CALL_END (VixDiskLib_Close, 0);
diff --git a/plugins/vddk/vddk.h b/plugins/vddk/vddk.h
index 3586c5da..461fb528 100644
--- a/plugins/vddk/vddk.h
+++ b/plugins/vddk/vddk.h
@@ -171,6 +171,9 @@ struct vddk_handle {
/* Cached disk size in bytes (set in get_size()). */
uint64_t size;
+
+ /* Cached extents for readonly disks. */
+ struct nbdkit_extents *extents;
};
/* reexec.c */
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
index 6efcc6f6..3925fb91 100644
--- a/plugins/vddk/worker.c
+++ b/plugins/vddk/worker.c
@@ -37,6 +37,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <inttypes.h>
+#include <time.h>
#include <pthread.h>
@@ -380,7 +381,7 @@ add_extent (struct nbdkit_extents *extents,
}
static int
-do_extents (struct command *cmd, struct vddk_handle *h)
+get_extents_slow (struct command *cmd, struct vddk_handle *h)
{
const uint32_t count = cmd->count;
const uint64_t offset = cmd->offset;
@@ -496,6 +497,169 @@ do_extents (struct command *cmd, struct vddk_handle *h)
return 0;
}
+static int
+pre_cache_extents (struct vddk_handle *h)
+{
+ struct nbdkit_extents *extents;
+ uint64_t start_sector = 0;
+ uint64_t nr_chunks_remaining =
+ h->size / VIXDISKLIB_MIN_CHUNK_SIZE / VIXDISKLIB_SECTOR_SIZE;
+ uint64_t position = 0;
+
+ extents = nbdkit_extents_new (0, h->size);
+ if (extents == NULL)
+ return -1;
+
+ /* Scan through the disk reading whole "chunks" (32 GB), the most
+ * efficient way to use QueryAllocatedBlocks.
+ */
+ while (nr_chunks_remaining > 0) {
+ VixError err;
+ uint32_t i;
+ uint64_t nr_chunks, nr_sectors;
+ VixDiskLibBlockList *block_list;
+
+ assert (IS_ALIGNED (start_sector, VIXDISKLIB_MIN_CHUNK_SIZE));
+
+ nr_chunks = MIN (nr_chunks_remaining, VIXDISKLIB_MAX_CHUNK_NUMBER);
+ nr_sectors = nr_chunks * VIXDISKLIB_MIN_CHUNK_SIZE;
+
+ VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
+ "handle, %" PRIu64 " sectors, %" PRIu64 " sectors, "
+ "%d sectors",
+ start_sector, nr_sectors, VIXDISKLIB_MIN_CHUNK_SIZE)
+ err = VixDiskLib_QueryAllocatedBlocks (h->handle,
+ start_sector, nr_sectors,
+ VIXDISKLIB_MIN_CHUNK_SIZE,
+ &block_list);
+ VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
+ if (err != VIX_OK) {
+ VDDK_ERROR (err, "VixDiskLib_QueryAllocatedBlocks");
+ nbdkit_extents_free (extents);
+ return -1;
+ }
+
+ for (i = 0; i < block_list->numBlocks; ++i) {
+ uint64_t blk_offset, blk_length;
+
+ blk_offset = block_list->blocks[i].offset * VIXDISKLIB_SECTOR_SIZE;
+ blk_length = block_list->blocks[i].length * VIXDISKLIB_SECTOR_SIZE;
+
+ /* The query returns allocated blocks. We must insert holes
+ * between the blocks as necessary.
+ */
+ if ((position < blk_offset &&
+ add_extent (extents, &position, blk_offset, true) == -1) ||
+ (add_extent (extents,
+ &position, blk_offset + blk_length, false) == -1)) {
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
+ VixDiskLib_FreeBlockList (block_list);
+ VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
+ nbdkit_extents_free (extents);
+ return -1;
+ }
+ }
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
+ VixDiskLib_FreeBlockList (block_list);
+ VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
+
+ /* There's an implicit hole after the returned list of blocks,
+ * up to the end of the QueryAllocatedBlocks request.
+ */
+ if (add_extent (extents,
+ &position,
+ (start_sector + nr_sectors) * VIXDISKLIB_SECTOR_SIZE,
+ true) == -1) {
+ nbdkit_extents_free (extents);
+ return -1;
+ }
+
+ start_sector += nr_sectors;
+ nr_chunks_remaining -= nr_chunks;
+ }
+
+ /* Add the allocated unaligned bit at the end. */
+ if (position < h->size) {
+ if (add_extent (extents, &position, h->size, false) == -1) {
+ nbdkit_extents_free (extents);
+ return -1;
+ }
+ }
+
+ /* Save the pre-cached extents in the handle. */
+ h->extents = extents;
+ return 0;
+}
+
+static int
+get_extents_from_cache (struct command *cmd, struct vddk_handle *h)
+{
+ struct nbdkit_extents *rextents = cmd->ptr;
+ struct nbdkit_extent e;
+ size_t i;
+
+ /* We can just copy from the pre-cached extents in the handle which
+ * cover the entire disk, into the returned extents, because
+ * nbdkit_add_extent does the right thing.
+ */
+ for (i = 0; i < nbdkit_extents_count (h->extents); ++i) {
+ e = nbdkit_get_extent (h->extents, i);
+ if (nbdkit_add_extent (rextents, e.offset, e.length, e.type) == -1)
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Handle extents.
+ *
+ * Oh QueryAllocatedBlocks, how much I hate you. The API has two
+ * enormous problems: (a) It's slow, taking about 1 second per
+ * invocation regardless of how much or little data you request. (b)
+ * It serialises all other requests to the disk, like concurrent
+ * reads.
+ *
+ * NBD / nbdkit doesn't help much either by having a 4GB - 1 byte
+ * limit on the size of extent requests. This means that for each 4GB
+ * of disk, we will need to run QueryAllocatedBlocks twice. For a 1TB
+ * virtual disk, about 500 seconds would be used directly in the API
+ * calls, and much more time is lost because of serialization.
+ *
+ * To work around these problems, in the readonly case (used by
+ * virt-v2v), when the first NBD_BLOCK_STATUS request is received, we
+ * will read over the whole disk and cache the extents. We will read
+ * in 32 GB chunks (the maximum possible for the underlying
+ * QueryAllocatedBlocks API). For a 1TB disk this will take ~ 30
+ * seconds, but avoids all the overheads above. The cached extents
+ * are stored in the handle, and subsequent NBD_BLOCK_STATUS will use
+ * the cache only.
+ *
+ * For writable disks we can't easily do any caching so don't attempt
+ * it.
+ */
+static int
+do_extents (struct command *cmd, struct vddk_handle *h)
+{
+ if (h->readonly && !h->extents) {
+ time_t start_t, end_t;
+
+ time (&start_t);
+ nbdkit_debug ("vddk: pre-caching extents");
+
+ if (pre_cache_extents (h) == -1)
+ return -1;
+
+ time (&end_t);
+ nbdkit_debug ("vddk: finished pre-caching extents in %d second(s)",
+ (int) (end_t - start_t));
+ }
+
+ if (h->extents)
+ return get_extents_from_cache (cmd, h);
+ else
+ return get_extents_slow (cmd, h);
+}
+
/* Background worker thread, one per connection, which is where the
* VDDK commands are issued.
*/
--
2.47.1

View File

@ -55,7 +55,7 @@
Name: nbdkit
Version: 1.42.2
Release: 4%{?dist}
Release: 5%{?dist}
Summary: NBD server
License: BSD-3-Clause
@ -95,6 +95,14 @@ Patch0012: 0012-file-Fix-do_fallocate-debugging-on-Alpine.patch
Patch0013: 0013-file-Rename-h-can_zeroout-to-h-can_blkzeroout-to-ref.patch
Patch0014: 0014-file-zero-Document-implicit-order-that-we-will-try-z.patch
Patch0015: 0015-file-zero-Use-BLKDISCARD-method-if-may_trim-is-set.patch
Patch0016: 0016-vddk-Debug-length-of-extents-when-using-D-vddk.exten.patch
Patch0017: 0017-cacheextents-Mark-this-filter-as-deprecated.patch
Patch0018: 0018-include-Move-some-extent-functions-to-nbdkit-common..patch
Patch0019: 0019-vddk-Display-command-type-in-command-completed-messa.patch
Patch0020: 0020-vddk-Cache-the-readonly-flag-from-the-.open-call-in-.patch
Patch0021: 0021-vddk-Move-minimum-version-of-VDDK-to-6.7.patch
Patch0022: 0022-vddk-Unconditionally-test-QueryAllocatedBlocks.patch
Patch0023: 0023-vddk-Pre-cache-the-extents-for-readonly-connections.patch
# For automatic RPM Provides generation.
# See: https://rpm-software-management.github.io/rpm/manual/dependency_generators.html
@ -1537,7 +1545,7 @@ fi
%changelog
* Thu May 01 2025 Richard W.M. Jones <rjones@redhat.com> - 1.42.2-4
* Mon Jun 02 2025 Richard W.M. Jones <rjones@redhat.com> - 1.42.2-5
- Rebase to nbdkit 1.42.2
- Synch the spec file with Fedora Rawhide.
- nbdkit-ondemand-plugin moves into a new subpackage.
@ -1547,6 +1555,8 @@ fi
resolves: RHEL-85515
- Allow nbdkit-file-plugin to zero and trim block devices
resolves: RHEL-89371
- vddk: Pre-cache the extents for readonly connections
resolves: RHEL-94825
* Mon Jan 06 2025 Richard W.M. Jones <rjones@redhat.com> - 1.40.4-3
- vddk: Avoid reading partial chunk beyond the end of the disk