752d86cd28
related: rhbz#2011709
1288 lines
41 KiB
Diff
1288 lines
41 KiB
Diff
From 13223e8e3219d0310ce4d94093bbdb7732a891fb Mon Sep 17 00:00:00 2001
|
|
From: "Richard W.M. Jones" <rjones@redhat.com>
|
|
Date: Wed, 27 Oct 2021 10:17:22 +0100
|
|
Subject: [PATCH] vddk: Implement parallel thread model
|
|
|
|
Since VDDK 6.0, asynchronous read and write operations are available.
|
|
This commit makes use of these, allowing us to use the parallel thread
|
|
model for increased performance.
|
|
|
|
Note that at least VDDK 6.5 is required because VDDK 6.0 had a
|
|
different and incompatible signature for VixDiskLibCompletionCB.
|
|
|
|
Also note at least vSphere 6.7 is required for asynch calls to make
|
|
any performance difference. In older versions they work
|
|
synchronously.
|
|
|
|
In the parallel thread model, nbdkit will be calling us in parallel
|
|
from multiple nbdkit threads. VDDK does not allow multiple threads to
|
|
simultaneously call VDDK operations on the same handle. So we create
|
|
a background thread per handle (== connection).
|
|
|
|
Only the background thread makes VDDK calls[1]. The background thread
|
|
handles a mix of synchronous (like extents, flush) and asynchronous
|
|
(like read, write) operations, but all from one thread.
|
|
|
|
Parallel nbdkit threads issue commands to the background thread
|
|
associated with each handle, and wait until they are retired.
|
|
|
|
[1] All VDDK calls except for connecting and disconnecting which for
|
|
different reasons are protected by a global lock, so I did not need to
|
|
change those.
|
|
|
|
(cherry picked from commit 1eecf15fc3d8ea253ccec4f5883fdbb9aa6f8c2b)
|
|
---
|
|
plugins/vddk/Makefile.am | 1 +
|
|
plugins/vddk/nbdkit-vddk-plugin.pod | 11 +-
|
|
plugins/vddk/vddk.c | 380 +++++--------------
|
|
plugins/vddk/vddk.h | 49 ++-
|
|
plugins/vddk/worker.c | 567 ++++++++++++++++++++++++++++
|
|
tests/dummy-vddk.c | 32 ++
|
|
6 files changed, 745 insertions(+), 295 deletions(-)
|
|
create mode 100644 plugins/vddk/worker.c
|
|
|
|
diff --git a/plugins/vddk/Makefile.am b/plugins/vddk/Makefile.am
|
|
index 4f470ff9..f8382fc9 100644
|
|
--- a/plugins/vddk/Makefile.am
|
|
+++ b/plugins/vddk/Makefile.am
|
|
@@ -49,6 +49,7 @@ nbdkit_vddk_plugin_la_SOURCES = \
|
|
stats.c \
|
|
vddk-structs.h \
|
|
vddk-stubs.h \
|
|
+ worker.c \
|
|
$(top_srcdir)/include/nbdkit-plugin.h \
|
|
$(NULL)
|
|
|
|
diff --git a/plugins/vddk/nbdkit-vddk-plugin.pod b/plugins/vddk/nbdkit-vddk-plugin.pod
|
|
index 1c16d096..ce82a734 100644
|
|
--- a/plugins/vddk/nbdkit-vddk-plugin.pod
|
|
+++ b/plugins/vddk/nbdkit-vddk-plugin.pod
|
|
@@ -523,6 +523,14 @@ read bandwidth to the VMware server.
|
|
|
|
Same as above, but for writing and flushing writes.
|
|
|
|
+=item C<ReadAsync>
|
|
+
|
|
+=item C<WriteAsync>
|
|
+
|
|
+Same as above, but for asynchronous read and write calls introduced in
|
|
+nbdkit 1.30. Unfortunately at the moment the amount of time spent in
|
|
+these calls is not accounted for correctly.
|
|
+
|
|
=item C<QueryAllocatedBlocks>
|
|
|
|
This call is used to query information about the sparseness of the
|
|
@@ -580,7 +588,8 @@ Debug extents returned by C<QueryAllocatedBlocks>.
|
|
|
|
=item B<-D vddk.datapath=0>
|
|
|
|
-Suppress debugging of datapath calls (C<Read> and C<Write>).
|
|
+Suppress debugging of datapath calls (C<Read>, C<ReadAsync>, C<Write>
|
|
+and C<WriteAsync>).
|
|
|
|
=item B<-D vddk.stats=1>
|
|
|
|
diff --git a/plugins/vddk/vddk.c b/plugins/vddk/vddk.c
|
|
index 67ac775c..9f223db0 100644
|
|
--- a/plugins/vddk/vddk.c
|
|
+++ b/plugins/vddk/vddk.c
|
|
@@ -50,9 +50,6 @@
|
|
#include <nbdkit-plugin.h>
|
|
|
|
#include "cleanup.h"
|
|
-#include "minmax.h"
|
|
-#include "rounding.h"
|
|
-#include "tvdiff.h"
|
|
#include "vector.h"
|
|
|
|
#include "vddk.h"
|
|
@@ -522,23 +519,18 @@ vddk_dump_plugin (void)
|
|
/* The rules on threads and VDDK are here:
|
|
* https://code.vmware.com/docs/11750/virtual-disk-development-kit-programming-guide/GUID-6BE903E8-DC70-46D9-98E4-E34A2002C2AD.html
|
|
*
|
|
- * Before nbdkit 1.22 we used SERIALIZE_ALL_REQUESTS. Since nbdkit
|
|
- * 1.22 we changed this to SERIALIZE_REQUESTS and added a mutex around
|
|
- * calls to VixDiskLib_Open and VixDiskLib_Close. This is not quite
|
|
- * within the letter of the rules, but is within the spirit.
|
|
+ * Before nbdkit 1.22 we used SERIALIZE_ALL_REQUESTS. In nbdkit
|
|
+ * 1.22-1.28 we changed this to SERIALIZE_REQUESTS and added a mutex
|
|
+ * around calls to VixDiskLib_Open and VixDiskLib_Close. In nbdkit
|
|
+ * 1.30 and above we assign a background thread per connection to do
|
|
+ * asynch operations and use the PARALLEL model. We still need the
|
|
+ * lock around Open and Close.
|
|
*/
|
|
-#define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS
|
|
+#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
|
|
|
|
/* Lock protecting open/close calls - see above. */
|
|
static pthread_mutex_t open_close_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
-/* The per-connection handle. */
|
|
-struct vddk_handle {
|
|
- VixDiskLibConnectParams *params; /* connection parameters */
|
|
- VixDiskLibConnection connection; /* connection */
|
|
- VixDiskLibHandle handle; /* disk handle */
|
|
-};
|
|
-
|
|
static inline VixDiskLibConnectParams *
|
|
allocate_connect_params (void)
|
|
{
|
|
@@ -579,12 +571,16 @@ vddk_open (int readonly)
|
|
VixError err;
|
|
uint32_t flags;
|
|
const char *transport_mode;
|
|
+ int pterr;
|
|
|
|
- h = malloc (sizeof *h);
|
|
+ h = calloc (1, sizeof *h);
|
|
if (h == NULL) {
|
|
- nbdkit_error ("malloc: %m");
|
|
+ nbdkit_error ("calloc: %m");
|
|
return NULL;
|
|
}
|
|
+ h->commands = (command_queue) empty_vector;
|
|
+ pthread_mutex_init (&h->commands_lock, NULL);
|
|
+ pthread_cond_init (&h->commands_cond, NULL);
|
|
|
|
h->params = allocate_connect_params ();
|
|
if (h->params == NULL) {
|
|
@@ -661,8 +657,22 @@ vddk_open (int readonly)
|
|
VDDK_CALL_END (VixDiskLib_GetTransportMode, 0);
|
|
nbdkit_debug ("transport mode: %s", transport_mode);
|
|
|
|
+ /* Start the background thread which actually does the asynchronous
|
|
+ * work.
|
|
+ */
|
|
+ pterr = pthread_create (&h->thread, NULL, vddk_worker_thread, h);
|
|
+ if (pterr != 0) {
|
|
+ errno = pterr;
|
|
+ nbdkit_error ("pthread_create: %m");
|
|
+ goto err3;
|
|
+ }
|
|
+
|
|
return h;
|
|
|
|
+ err3:
|
|
+ VDDK_CALL_START (VixDiskLib_Close, "handle")
|
|
+ VixDiskLib_Close (h->handle);
|
|
+ VDDK_CALL_END (VixDiskLib_Close, 0);
|
|
err2:
|
|
VDDK_CALL_START (VixDiskLib_Disconnect, "connection")
|
|
VixDiskLib_Disconnect (h->connection);
|
|
@@ -670,6 +680,8 @@ vddk_open (int readonly)
|
|
err1:
|
|
free_connect_params (h->params);
|
|
err0:
|
|
+ pthread_mutex_destroy (&h->commands_lock);
|
|
+ pthread_cond_destroy (&h->commands_cond);
|
|
free (h);
|
|
return NULL;
|
|
}
|
|
@@ -680,6 +692,10 @@ vddk_close (void *handle)
|
|
{
|
|
ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&open_close_lock);
|
|
struct vddk_handle *h = handle;
|
|
+ struct command stop_cmd = { .type = STOP };
|
|
+
|
|
+ send_command_and_wait (h, &stop_cmd);
|
|
+ pthread_join (h->thread, NULL);
|
|
|
|
VDDK_CALL_START (VixDiskLib_Close, "handle")
|
|
VixDiskLib_Close (h->handle);
|
|
@@ -689,6 +705,9 @@ vddk_close (void *handle)
|
|
VDDK_CALL_END (VixDiskLib_Disconnect, 0);
|
|
|
|
free_connect_params (h->params);
|
|
+ pthread_mutex_destroy (&h->commands_lock);
|
|
+ pthread_cond_destroy (&h->commands_cond);
|
|
+ command_queue_reset (&h->commands);
|
|
free (h);
|
|
}
|
|
|
|
@@ -697,54 +716,29 @@ static int64_t
|
|
vddk_get_size (void *handle)
|
|
{
|
|
struct vddk_handle *h = handle;
|
|
- VixDiskLibInfo *info;
|
|
- VixError err;
|
|
uint64_t size;
|
|
+ struct command get_size_cmd = { .type = GET_SIZE, .ptr = &size };
|
|
|
|
- VDDK_CALL_START (VixDiskLib_GetInfo, "handle, &info")
|
|
- err = VixDiskLib_GetInfo (h->handle, &info);
|
|
- VDDK_CALL_END (VixDiskLib_GetInfo, 0);
|
|
- if (err != VIX_OK) {
|
|
- VDDK_ERROR (err, "VixDiskLib_GetInfo");
|
|
+ if (send_command_and_wait (h, &get_size_cmd) == -1)
|
|
return -1;
|
|
- }
|
|
-
|
|
- size = info->capacity * (uint64_t)VIXDISKLIB_SECTOR_SIZE;
|
|
-
|
|
- if (vddk_debug_diskinfo) {
|
|
- nbdkit_debug ("disk info: capacity: %" PRIu64 " sectors "
|
|
- "(%" PRIi64 " bytes)",
|
|
- info->capacity, size);
|
|
- nbdkit_debug ("disk info: biosGeo: C:%" PRIu32 " H:%" PRIu32 " S:%" PRIu32,
|
|
- info->biosGeo.cylinders,
|
|
- info->biosGeo.heads,
|
|
- info->biosGeo.sectors);
|
|
- nbdkit_debug ("disk info: physGeo: C:%" PRIu32 " H:%" PRIu32 " S:%" PRIu32,
|
|
- info->physGeo.cylinders,
|
|
- info->physGeo.heads,
|
|
- info->physGeo.sectors);
|
|
- nbdkit_debug ("disk info: adapter type: %d",
|
|
- (int) info->adapterType);
|
|
- nbdkit_debug ("disk info: num links: %d", info->numLinks);
|
|
- nbdkit_debug ("disk info: parent filename hint: %s",
|
|
- info->parentFileNameHint ? : "NULL");
|
|
- nbdkit_debug ("disk info: uuid: %s",
|
|
- info->uuid ? : "NULL");
|
|
- if (library_version >= 7) {
|
|
- nbdkit_debug ("disk info: sector size: "
|
|
- "logical %" PRIu32 " physical %" PRIu32,
|
|
- info->logicalSectorSize,
|
|
- info->physicalSectorSize);
|
|
- }
|
|
- }
|
|
-
|
|
- VDDK_CALL_START (VixDiskLib_FreeInfo, "info")
|
|
- VixDiskLib_FreeInfo (info);
|
|
- VDDK_CALL_END (VixDiskLib_FreeInfo, 0);
|
|
|
|
return (int64_t) size;
|
|
}
|
|
|
|
+static int
|
|
+vddk_can_fua (void *handle)
|
|
+{
|
|
+ /* The Flush call was not available in VDDK < 6.0. */
|
|
+ return VixDiskLib_Flush != NULL ? NBDKIT_FUA_NATIVE : NBDKIT_FUA_NONE;
|
|
+}
|
|
+
|
|
+static int
|
|
+vddk_can_flush (void *handle)
|
|
+{
|
|
+ /* The Flush call was not available in VDDK < 6.0. */
|
|
+ return VixDiskLib_Flush != NULL;
|
|
+}
|
|
+
|
|
/* Read data from the file.
|
|
*
|
|
* Note that reads have to be aligned to sectors (XXX).
|
|
@@ -754,32 +748,14 @@ vddk_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
|
|
uint32_t flags)
|
|
{
|
|
struct vddk_handle *h = handle;
|
|
- VixError err;
|
|
+ struct command read_cmd = {
|
|
+ .type = READ,
|
|
+ .ptr = buf,
|
|
+ .count = count,
|
|
+ .offset = offset,
|
|
+ };
|
|
|
|
- /* Align to sectors. */
|
|
- if (!IS_ALIGNED (offset, VIXDISKLIB_SECTOR_SIZE)) {
|
|
- nbdkit_error ("%s is not aligned to sectors", "read");
|
|
- return -1;
|
|
- }
|
|
- if (!IS_ALIGNED (count, VIXDISKLIB_SECTOR_SIZE)) {
|
|
- nbdkit_error ("%s is not aligned to sectors", "read");
|
|
- return -1;
|
|
- }
|
|
- offset /= VIXDISKLIB_SECTOR_SIZE;
|
|
- count /= VIXDISKLIB_SECTOR_SIZE;
|
|
-
|
|
- VDDK_CALL_START (VixDiskLib_Read,
|
|
- "handle, %" PRIu64 " sectors, "
|
|
- "%" PRIu32 " sectors, buffer",
|
|
- offset, count)
|
|
- err = VixDiskLib_Read (h->handle, offset, count, buf);
|
|
- VDDK_CALL_END (VixDiskLib_Read, count * VIXDISKLIB_SECTOR_SIZE);
|
|
- if (err != VIX_OK) {
|
|
- VDDK_ERROR (err, "VixDiskLib_Read");
|
|
- return -1;
|
|
- }
|
|
-
|
|
- return 0;
|
|
+ return send_command_and_wait (h, &read_cmd);
|
|
}
|
|
|
|
static int vddk_flush (void *handle, uint32_t flags);
|
|
@@ -792,32 +768,17 @@ static int
|
|
vddk_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
|
|
uint32_t flags)
|
|
{
|
|
+ struct vddk_handle *h = handle;
|
|
const bool fua = flags & NBDKIT_FLAG_FUA;
|
|
- struct vddk_handle *h = handle;
|
|
- VixError err;
|
|
+ struct command write_cmd = {
|
|
+ .type = WRITE,
|
|
+ .ptr = (void *) buf,
|
|
+ .count = count,
|
|
+ .offset = offset,
|
|
+ };
|
|
|
|
- /* Align to sectors. */
|
|
- if (!IS_ALIGNED (offset, VIXDISKLIB_SECTOR_SIZE)) {
|
|
- nbdkit_error ("%s is not aligned to sectors", "write");
|
|
+ if (send_command_and_wait (h, &write_cmd) == -1)
|
|
return -1;
|
|
- }
|
|
- if (!IS_ALIGNED (count, VIXDISKLIB_SECTOR_SIZE)) {
|
|
- nbdkit_error ("%s is not aligned to sectors", "write");
|
|
- return -1;
|
|
- }
|
|
- offset /= VIXDISKLIB_SECTOR_SIZE;
|
|
- count /= VIXDISKLIB_SECTOR_SIZE;
|
|
-
|
|
- VDDK_CALL_START (VixDiskLib_Write,
|
|
- "handle, %" PRIu64 " sectors, "
|
|
- "%" PRIu32 " sectors, buffer",
|
|
- offset, count)
|
|
- err = VixDiskLib_Write (h->handle, offset, count, buf);
|
|
- VDDK_CALL_END (VixDiskLib_Write, count * VIXDISKLIB_SECTOR_SIZE);
|
|
- if (err != VIX_OK) {
|
|
- VDDK_ERROR (err, "VixDiskLib_Write");
|
|
- return -1;
|
|
- }
|
|
|
|
if (fua) {
|
|
if (vddk_flush (handle, 0) == -1)
|
|
@@ -827,126 +788,32 @@ vddk_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset,
|
|
return 0;
|
|
}
|
|
|
|
-static int
|
|
-vddk_can_fua (void *handle)
|
|
-{
|
|
- /* The Flush call was not available in VDDK < 6.0. */
|
|
- return VixDiskLib_Flush != NULL ? NBDKIT_FUA_NATIVE : NBDKIT_FUA_NONE;
|
|
-}
|
|
-
|
|
-static int
|
|
-vddk_can_flush (void *handle)
|
|
-{
|
|
- /* The Flush call was not available in VDDK < 6.0. */
|
|
- return VixDiskLib_Flush != NULL;
|
|
-}
|
|
-
|
|
/* Flush data to the file. */
|
|
static int
|
|
vddk_flush (void *handle, uint32_t flags)
|
|
{
|
|
struct vddk_handle *h = handle;
|
|
- VixError err;
|
|
+ struct command flush_cmd = {
|
|
+ .type = FLUSH,
|
|
+ };
|
|
|
|
- /* The documentation for Flush is missing, but the comment in the
|
|
- * header file seems to indicate that it waits for WriteAsync
|
|
- * commands to finish. We don't use WriteAsync, and in any case
|
|
- * there's a new function Wait to wait for those. However I
|
|
- * verified using strace that in fact Flush does call fsync on the
|
|
- * file so it appears to be the correct call to use here.
|
|
- */
|
|
-
|
|
- VDDK_CALL_START (VixDiskLib_Flush, "handle")
|
|
- err = VixDiskLib_Flush (h->handle);
|
|
- VDDK_CALL_END (VixDiskLib_Flush, 0);
|
|
- if (err != VIX_OK) {
|
|
- VDDK_ERROR (err, "VixDiskLib_Flush");
|
|
- return -1;
|
|
- }
|
|
-
|
|
- return 0;
|
|
+ return send_command_and_wait (h, &flush_cmd);
|
|
}
|
|
|
|
static int
|
|
vddk_can_extents (void *handle)
|
|
{
|
|
struct vddk_handle *h = handle;
|
|
- VixError err;
|
|
- VixDiskLibBlockList *block_list;
|
|
+ int ret;
|
|
+ struct command can_extents_cmd = {
|
|
+ .type = CAN_EXTENTS,
|
|
+ .ptr = &ret,
|
|
+ };
|
|
|
|
- /* This call was added in VDDK 6.7. In earlier versions the
|
|
- * function pointer will be NULL and we cannot query extents.
|
|
- */
|
|
- if (VixDiskLib_QueryAllocatedBlocks == NULL) {
|
|
- nbdkit_debug ("can_extents: VixDiskLib_QueryAllocatedBlocks == NULL, "
|
|
- "probably this is VDDK < 6.7");
|
|
- return 0;
|
|
- }
|
|
-
|
|
- /* Suppress errors around this call. See:
|
|
- * https://bugzilla.redhat.com/show_bug.cgi?id=1709211#c7
|
|
- */
|
|
- error_suppression = 1;
|
|
-
|
|
- /* However even when the call is available it rarely works well so
|
|
- * the best thing we can do here is to try the call and if it's
|
|
- * non-functional return false.
|
|
- */
|
|
- VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
|
|
- "handle, 0, %d sectors, %d sectors",
|
|
- VIXDISKLIB_MIN_CHUNK_SIZE, VIXDISKLIB_MIN_CHUNK_SIZE)
|
|
- err = VixDiskLib_QueryAllocatedBlocks (h->handle,
|
|
- 0, VIXDISKLIB_MIN_CHUNK_SIZE,
|
|
- VIXDISKLIB_MIN_CHUNK_SIZE,
|
|
- &block_list);
|
|
- VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
|
|
- error_suppression = 0;
|
|
- if (err == VIX_OK) {
|
|
- VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
|
|
- VixDiskLib_FreeBlockList (block_list);
|
|
- VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
|
|
- }
|
|
- if (err != VIX_OK) {
|
|
- char *errmsg = VixDiskLib_GetErrorText (err, NULL);
|
|
- nbdkit_debug ("can_extents: VixDiskLib_QueryAllocatedBlocks test failed, "
|
|
- "extents support will be disabled: "
|
|
- "original error: %s",
|
|
- errmsg);
|
|
- VixDiskLib_FreeErrorText (errmsg);
|
|
- return 0;
|
|
- }
|
|
-
|
|
- return 1;
|
|
-}
|
|
-
|
|
-static int
|
|
-add_extent (struct nbdkit_extents *extents,
|
|
- uint64_t *position, uint64_t next_position, bool is_hole)
|
|
-{
|
|
- uint32_t type = 0;
|
|
- const uint64_t length = next_position - *position;
|
|
-
|
|
- if (is_hole) {
|
|
- type = NBDKIT_EXTENT_HOLE;
|
|
- /* Images opened as single link might be backed by another file in the
|
|
- chain, so the holes are not guaranteed to be zeroes. */
|
|
- if (!single_link)
|
|
- type |= NBDKIT_EXTENT_ZERO;
|
|
- }
|
|
-
|
|
- assert (*position <= next_position);
|
|
- if (*position == next_position)
|
|
- return 0;
|
|
-
|
|
- if (vddk_debug_extents)
|
|
- nbdkit_debug ("adding extent type %s at [%" PRIu64 "...%" PRIu64 "]",
|
|
- is_hole ? "hole" : "allocated data",
|
|
- *position, next_position-1);
|
|
- if (nbdkit_add_extent (extents, *position, length, type) == -1)
|
|
+ if (send_command_and_wait (h, &can_extents_cmd) == -1)
|
|
return -1;
|
|
|
|
- *position = next_position;
|
|
- return 0;
|
|
+ return ret;
|
|
}
|
|
|
|
static int
|
|
@@ -955,88 +822,15 @@ vddk_extents (void *handle, uint32_t count, uint64_t offset, uint32_t flags,
|
|
{
|
|
struct vddk_handle *h = handle;
|
|
bool req_one = flags & NBDKIT_FLAG_REQ_ONE;
|
|
- uint64_t position, end, start_sector;
|
|
-
|
|
- position = offset;
|
|
- end = offset + count;
|
|
-
|
|
- /* We can only query whole chunks. Therefore start with the first
|
|
- * chunk before offset.
|
|
- */
|
|
- start_sector =
|
|
- ROUND_DOWN (offset, VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE)
|
|
- / VIXDISKLIB_SECTOR_SIZE;
|
|
- while (start_sector * VIXDISKLIB_SECTOR_SIZE < end) {
|
|
- VixError err;
|
|
- uint32_t i;
|
|
- uint64_t nr_chunks, nr_sectors;
|
|
- VixDiskLibBlockList *block_list;
|
|
-
|
|
- assert (IS_ALIGNED (start_sector, VIXDISKLIB_MIN_CHUNK_SIZE));
|
|
-
|
|
- nr_chunks =
|
|
- ROUND_UP (end - start_sector * VIXDISKLIB_SECTOR_SIZE,
|
|
- VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE)
|
|
- / (VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE);
|
|
- nr_chunks = MIN (nr_chunks, VIXDISKLIB_MAX_CHUNK_NUMBER);
|
|
- nr_sectors = nr_chunks * VIXDISKLIB_MIN_CHUNK_SIZE;
|
|
-
|
|
- VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
|
|
- "handle, %" PRIu64 " sectors, %" PRIu64 " sectors, "
|
|
- "%d sectors",
|
|
- start_sector, nr_sectors, VIXDISKLIB_MIN_CHUNK_SIZE)
|
|
- err = VixDiskLib_QueryAllocatedBlocks (h->handle,
|
|
- start_sector, nr_sectors,
|
|
- VIXDISKLIB_MIN_CHUNK_SIZE,
|
|
- &block_list);
|
|
- VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
|
|
- if (err != VIX_OK) {
|
|
- VDDK_ERROR (err, "VixDiskLib_QueryAllocatedBlocks");
|
|
- return -1;
|
|
- }
|
|
-
|
|
- for (i = 0; i < block_list->numBlocks; ++i) {
|
|
- uint64_t blk_offset, blk_length;
|
|
-
|
|
- blk_offset = block_list->blocks[i].offset * VIXDISKLIB_SECTOR_SIZE;
|
|
- blk_length = block_list->blocks[i].length * VIXDISKLIB_SECTOR_SIZE;
|
|
-
|
|
- /* The query returns allocated blocks. We must insert holes
|
|
- * between the blocks as necessary.
|
|
- */
|
|
- if ((position < blk_offset &&
|
|
- add_extent (extents, &position, blk_offset, true) == -1) ||
|
|
- (add_extent (extents,
|
|
- &position, blk_offset + blk_length, false) == -1)) {
|
|
- VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
|
|
- VixDiskLib_FreeBlockList (block_list);
|
|
- VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
|
|
- return -1;
|
|
- }
|
|
- }
|
|
- VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
|
|
- VixDiskLib_FreeBlockList (block_list);
|
|
- VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
|
|
-
|
|
- /* There's an implicit hole after the returned list of blocks, up
|
|
- * to the end of the QueryAllocatedBlocks request.
|
|
- */
|
|
- if (add_extent (extents,
|
|
- &position,
|
|
- (start_sector + nr_sectors) * VIXDISKLIB_SECTOR_SIZE,
|
|
- true) == -1)
|
|
- return -1;
|
|
-
|
|
- start_sector += nr_sectors;
|
|
-
|
|
- /* If one extent was requested, as long as we've added an extent
|
|
- * overlapping the original offset we're done.
|
|
- */
|
|
- if (req_one && position > offset)
|
|
- break;
|
|
- }
|
|
-
|
|
- return 0;
|
|
+ struct command extents_cmd = {
|
|
+ .type = EXTENTS,
|
|
+ .ptr = extents,
|
|
+ .count = count,
|
|
+ .offset = offset,
|
|
+ .req_one = req_one,
|
|
+ };
|
|
+
|
|
+ return send_command_and_wait (h, &extents_cmd);
|
|
}
|
|
|
|
static struct nbdkit_plugin plugin = {
|
|
diff --git a/plugins/vddk/vddk.h b/plugins/vddk/vddk.h
|
|
index 1400589d..be0b3492 100644
|
|
--- a/plugins/vddk/vddk.h
|
|
+++ b/plugins/vddk/vddk.h
|
|
@@ -90,7 +90,9 @@ extern int vddk_debug_stats;
|
|
/* GCC can optimize this away at compile time: */ \
|
|
const bool datapath = \
|
|
strcmp (#fn, "VixDiskLib_Read") == 0 || \
|
|
- strcmp (#fn, "VixDiskLib_Write") == 0; \
|
|
+ strcmp (#fn, "VixDiskLib_ReadAsync") == 0 || \
|
|
+ strcmp (#fn, "VixDiskLib_Write") == 0 || \
|
|
+ strcmp (#fn, "VixDiskLib_WriteAsync") == 0; \
|
|
if (vddk_debug_stats) \
|
|
gettimeofday (&start_t, NULL); \
|
|
if (!datapath || vddk_debug_datapath) \
|
|
@@ -120,6 +122,46 @@ extern int vddk_debug_stats;
|
|
VDDK_CALL_END (VixDiskLib_FreeErrorText, 0); \
|
|
} while (0)
|
|
|
|
+/* Queue of asynchronous commands sent to the background thread. */
|
|
+enum command_type { GET_SIZE, READ, WRITE, FLUSH, CAN_EXTENTS, EXTENTS, STOP };
|
|
+struct command {
|
|
+ /* These fields are set by the caller. */
|
|
+ enum command_type type; /* command */
|
|
+ void *ptr; /* buffer, extents list, return values */
|
|
+ uint32_t count; /* READ, WRITE, EXTENTS */
|
|
+ uint64_t offset; /* READ, WRITE, EXTENTS */
|
|
+ bool req_one; /* EXTENTS NBDKIT_FLAG_REQ_ONE */
|
|
+
|
|
+ /* This field is set to a unique value by send_command_and_wait. */
|
|
+ uint64_t id; /* serial number */
|
|
+
|
|
+ /* These fields are used by the internal implementation. */
|
|
+ pthread_mutex_t mutex; /* completion mutex */
|
|
+ pthread_cond_t cond; /* completion condition */
|
|
+ enum { SUBMITTED, SUCCEEDED, FAILED } status;
|
|
+};
|
|
+
|
|
+DEFINE_VECTOR_TYPE(command_queue, struct command *)
|
|
+
|
|
+/* The per-connection handle. */
|
|
+struct vddk_handle {
|
|
+ VixDiskLibConnectParams *params; /* connection parameters */
|
|
+ VixDiskLibConnection connection; /* connection */
|
|
+ VixDiskLibHandle handle; /* disk handle */
|
|
+
|
|
+ pthread_t thread; /* background thread for asynch work */
|
|
+
|
|
+ /* Command queue of commands sent to the background thread. Use
|
|
+ * send_command_and_wait to add a command. Only the background
|
|
+ * thread must make VDDK API calls (apart from opening and closing).
|
|
+ * The lock protects all of these fields.
|
|
+ */
|
|
+ pthread_mutex_t commands_lock; /* lock */
|
|
+ command_queue commands; /* command queue */
|
|
+ pthread_cond_t commands_cond; /* condition (queue size 0 -> 1) */
|
|
+ uint64_t id; /* next command ID */
|
|
+};
|
|
+
|
|
/* reexec.c */
|
|
extern bool noreexec;
|
|
extern char *reexeced;
|
|
@@ -141,4 +183,9 @@ extern pthread_mutex_t stats_lock;
|
|
#undef OPTIONAL_STUB
|
|
extern void display_stats (void);
|
|
|
|
+/* worker.c */
|
|
+extern const char *command_type_string (enum command_type type);
|
|
+extern int send_command_and_wait (struct vddk_handle *h, struct command *cmd);
|
|
+extern void *vddk_worker_thread (void *handle);
|
|
+
|
|
#endif /* NBDKIT_VDDK_H */
|
|
diff --git a/plugins/vddk/worker.c b/plugins/vddk/worker.c
|
|
new file mode 100644
|
|
index 00000000..2a1d4f26
|
|
--- /dev/null
|
|
+++ b/plugins/vddk/worker.c
|
|
@@ -0,0 +1,567 @@
|
|
+/* nbdkit
|
|
+ * Copyright (C) 2013-2021 Red Hat Inc.
|
|
+ *
|
|
+ * Redistribution and use in source and binary forms, with or without
|
|
+ * modification, are permitted provided that the following conditions are
|
|
+ * met:
|
|
+ *
|
|
+ * * Redistributions of source code must retain the above copyright
|
|
+ * notice, this list of conditions and the following disclaimer.
|
|
+ *
|
|
+ * * Redistributions in binary form must reproduce the above copyright
|
|
+ * notice, this list of conditions and the following disclaimer in the
|
|
+ * documentation and/or other materials provided with the distribution.
|
|
+ *
|
|
+ * * Neither the name of Red Hat nor the names of its contributors may be
|
|
+ * used to endorse or promote products derived from this software without
|
|
+ * specific prior written permission.
|
|
+ *
|
|
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
|
|
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
|
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
|
|
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
|
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
+ * SUCH DAMAGE.
|
|
+ */
|
|
+
|
|
+#include <config.h>
|
|
+
|
|
+#include <stdio.h>
|
|
+#include <stdlib.h>
|
|
+#include <stdint.h>
|
|
+#include <inttypes.h>
|
|
+
|
|
+#include <pthread.h>
|
|
+
|
|
+#define NBDKIT_API_VERSION 2
|
|
+#include <nbdkit-plugin.h>
|
|
+
|
|
+#include "cleanup.h"
|
|
+#include "minmax.h"
|
|
+#include "rounding.h"
|
|
+#include "vector.h"
|
|
+
|
|
+#include "vddk.h"
|
|
+
|
|
+const char *
|
|
+command_type_string (enum command_type type)
|
|
+{
|
|
+ switch (type) {
|
|
+ case GET_SIZE: return "get_size";
|
|
+ case READ: return "read";
|
|
+ case WRITE: return "write";
|
|
+ case FLUSH: return "flush";
|
|
+ case CAN_EXTENTS: return "can_extents";
|
|
+ case EXTENTS: return "extents";
|
|
+ case STOP: return "stop";
|
|
+ default: abort ();
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Send command to the background thread and wait for completion.
|
|
+ *
|
|
+ * Returns 0 for OK
|
|
+ * On error, calls nbdkit_error and returns -1.
|
|
+ */
|
|
+int
|
|
+send_command_and_wait (struct vddk_handle *h, struct command *cmd)
|
|
+{
|
|
+ /* Add the command to the command queue. */
|
|
+ {
|
|
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->commands_lock);
|
|
+ cmd->id = h->id++;
|
|
+
|
|
+ if (command_queue_append (&h->commands, cmd) == -1)
|
|
+ /* On error command_queue_append will call nbdkit_error. */
|
|
+ return -1;
|
|
+
|
|
+ /* Signal the caller if it could be sleeping on an empty queue. */
|
|
+ if (h->commands.size == 1)
|
|
+ pthread_cond_signal (&h->commands_cond);
|
|
+
|
|
+ /* This will be used to signal command completion back to us. */
|
|
+ pthread_mutex_init (&cmd->mutex, NULL);
|
|
+ pthread_cond_init (&cmd->cond, NULL);
|
|
+ }
|
|
+
|
|
+ /* Wait for the command to be completed by the background thread. */
|
|
+ {
|
|
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex);
|
|
+ while (cmd->status == SUBMITTED)
|
|
+ pthread_cond_wait (&cmd->cond, &cmd->mutex);
|
|
+ }
|
|
+
|
|
+ pthread_mutex_destroy (&cmd->mutex);
|
|
+ pthread_cond_destroy (&cmd->cond);
|
|
+
|
|
+ /* On error the background thread will call nbdkit_error. */
|
|
+ switch (cmd->status) {
|
|
+ case SUCCEEDED: return 0;
|
|
+ case FAILED: return -1;
|
|
+ default: abort ();
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Asynchronous commands are completed when this function is called. */
|
|
+static void
|
|
+complete_command (void *vp, VixError result)
|
|
+{
|
|
+ struct command *cmd = vp;
|
|
+
|
|
+ if (vddk_debug_datapath)
|
|
+ nbdkit_debug ("command %" PRIu64 " completed", cmd->id);
|
|
+
|
|
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex);
|
|
+
|
|
+ if (result == VIX_OK) {
|
|
+ cmd->status = SUCCEEDED;
|
|
+ } else {
|
|
+ VDDK_ERROR (result, "command %" PRIu64 ": asynchronous %s failed",
|
|
+ cmd->id, command_type_string (cmd->type));
|
|
+ cmd->status = FAILED;
|
|
+ }
|
|
+
|
|
+ pthread_cond_signal (&cmd->cond);
|
|
+}
|
|
+
|
|
+/* Wait for any asynchronous commands to complete. */
|
|
+static int
|
|
+do_stop (struct command *cmd, struct vddk_handle *h)
|
|
+{
|
|
+ VixError err;
|
|
+
|
|
+ /* Because we assume VDDK >= 6.5, VixDiskLib_Wait must exist. */
|
|
+ VDDK_CALL_START (VixDiskLib_Wait, "handle")
|
|
+ err = VixDiskLib_Wait (h->handle);
|
|
+ VDDK_CALL_END (VixDiskLib_Wait, 0);
|
|
+ if (err != VIX_OK) {
|
|
+ VDDK_ERROR (err, "VixDiskLib_Wait");
|
|
+ /* In the end this error indication is ignored because it only
|
|
+ * happens on the close path when we cannot handle errors.
|
|
+ */
|
|
+ return -1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Get size command. */
|
|
+static int64_t
|
|
+do_get_size (struct command *cmd, struct vddk_handle *h)
|
|
+{
|
|
+ VixError err;
|
|
+ VixDiskLibInfo *info;
|
|
+ uint64_t size;
|
|
+
|
|
+ VDDK_CALL_START (VixDiskLib_GetInfo, "handle, &info")
|
|
+ err = VixDiskLib_GetInfo (h->handle, &info);
|
|
+ VDDK_CALL_END (VixDiskLib_GetInfo, 0);
|
|
+ if (err != VIX_OK) {
|
|
+ VDDK_ERROR (err, "VixDiskLib_GetInfo");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ size = info->capacity * (uint64_t)VIXDISKLIB_SECTOR_SIZE;
|
|
+
|
|
+ if (vddk_debug_diskinfo) {
|
|
+ nbdkit_debug ("disk info: capacity: %" PRIu64 " sectors "
|
|
+ "(%" PRIi64 " bytes)",
|
|
+ info->capacity, size);
|
|
+ nbdkit_debug ("disk info: biosGeo: C:%" PRIu32 " H:%" PRIu32 " S:%" PRIu32,
|
|
+ info->biosGeo.cylinders,
|
|
+ info->biosGeo.heads,
|
|
+ info->biosGeo.sectors);
|
|
+ nbdkit_debug ("disk info: physGeo: C:%" PRIu32 " H:%" PRIu32 " S:%" PRIu32,
|
|
+ info->physGeo.cylinders,
|
|
+ info->physGeo.heads,
|
|
+ info->physGeo.sectors);
|
|
+ nbdkit_debug ("disk info: adapter type: %d",
|
|
+ (int) info->adapterType);
|
|
+ nbdkit_debug ("disk info: num links: %d", info->numLinks);
|
|
+ nbdkit_debug ("disk info: parent filename hint: %s",
|
|
+ info->parentFileNameHint ? : "NULL");
|
|
+ nbdkit_debug ("disk info: uuid: %s",
|
|
+ info->uuid ? : "NULL");
|
|
+ if (library_version >= 7) {
|
|
+ nbdkit_debug ("disk info: sector size: "
|
|
+ "logical %" PRIu32 " physical %" PRIu32,
|
|
+ info->logicalSectorSize,
|
|
+ info->physicalSectorSize);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ VDDK_CALL_START (VixDiskLib_FreeInfo, "info")
|
|
+ VixDiskLib_FreeInfo (info);
|
|
+ VDDK_CALL_END (VixDiskLib_FreeInfo, 0);
|
|
+
|
|
+ return (int64_t) size;
|
|
+}
|
|
+
|
|
+static int
|
|
+do_read (struct command *cmd, struct vddk_handle *h)
|
|
+{
|
|
+ VixError err;
|
|
+ uint32_t count = cmd->count;
|
|
+ uint64_t offset = cmd->offset;
|
|
+ void *buf = cmd->ptr;
|
|
+
|
|
+ /* Align to sectors. */
|
|
+ if (!IS_ALIGNED (offset, VIXDISKLIB_SECTOR_SIZE)) {
|
|
+ nbdkit_error ("%s is not aligned to sectors", "read");
|
|
+ return -1;
|
|
+ }
|
|
+ if (!IS_ALIGNED (count, VIXDISKLIB_SECTOR_SIZE)) {
|
|
+ nbdkit_error ("%s is not aligned to sectors", "read");
|
|
+ return -1;
|
|
+ }
|
|
+ offset /= VIXDISKLIB_SECTOR_SIZE;
|
|
+ count /= VIXDISKLIB_SECTOR_SIZE;
|
|
+
|
|
+ VDDK_CALL_START (VixDiskLib_ReadAsync,
|
|
+ "handle, %" PRIu64 " sectors, "
|
|
+ "%" PRIu32 " sectors, buffer, callback, %" PRIu64,
|
|
+ offset, count, cmd->id)
|
|
+ err = VixDiskLib_ReadAsync (h->handle, offset, count, buf,
|
|
+ complete_command, cmd);
|
|
+ VDDK_CALL_END (VixDiskLib_ReadAsync, count * VIXDISKLIB_SECTOR_SIZE);
|
|
+ if (err != VIX_ASYNC) {
|
|
+ VDDK_ERROR (err, "VixDiskLib_ReadAsync");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+do_write (struct command *cmd, struct vddk_handle *h)
|
|
+{
|
|
+ VixError err;
|
|
+ uint32_t count = cmd->count;
|
|
+ uint64_t offset = cmd->offset;
|
|
+ const void *buf = cmd->ptr;
|
|
+
|
|
+ /* Align to sectors. */
|
|
+ if (!IS_ALIGNED (offset, VIXDISKLIB_SECTOR_SIZE)) {
|
|
+ nbdkit_error ("%s is not aligned to sectors", "write");
|
|
+ return -1;
|
|
+ }
|
|
+ if (!IS_ALIGNED (count, VIXDISKLIB_SECTOR_SIZE)) {
|
|
+ nbdkit_error ("%s is not aligned to sectors", "write");
|
|
+ return -1;
|
|
+ }
|
|
+ offset /= VIXDISKLIB_SECTOR_SIZE;
|
|
+ count /= VIXDISKLIB_SECTOR_SIZE;
|
|
+
|
|
+ VDDK_CALL_START (VixDiskLib_WriteAsync,
|
|
+ "handle, %" PRIu64 " sectors, "
|
|
+ "%" PRIu32 " sectors, buffer, callback, %" PRIu64,
|
|
+ offset, count, cmd->id)
|
|
+ err = VixDiskLib_WriteAsync (h->handle, offset, count, buf,
|
|
+ complete_command, cmd);
|
|
+ VDDK_CALL_END (VixDiskLib_WriteAsync, count * VIXDISKLIB_SECTOR_SIZE);
|
|
+ if (err != VIX_ASYNC) {
|
|
+ VDDK_ERROR (err, "VixDiskLib_WriteAsync");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+do_flush (struct command *cmd, struct vddk_handle *h)
|
|
+{
|
|
+ VixError err;
|
|
+
|
|
+ /* It seems safer to wait for outstanding asynchronous commands to
|
|
+ * complete before doing a flush, so do this but ignore errors
|
|
+ * except to print them.
|
|
+ */
|
|
+ VDDK_CALL_START (VixDiskLib_Wait, "handle")
|
|
+ err = VixDiskLib_Wait (h->handle);
|
|
+ VDDK_CALL_END (VixDiskLib_Wait, 0);
|
|
+ if (err != VIX_OK)
|
|
+ VDDK_ERROR (err, "VixDiskLib_Wait");
|
|
+
|
|
+ /* The documentation for Flush is missing, but the comment in the
|
|
+ * header file seems to indicate that it waits for WriteAsync
|
|
+ * commands to finish. There's a new function Wait to wait for
|
|
+ * those. However I verified using strace that in fact Flush calls
|
|
+ * fsync on the file so it appears to be the correct call to use
|
|
+ * here.
|
|
+ */
|
|
+ VDDK_CALL_START (VixDiskLib_Flush, "handle")
|
|
+ err = VixDiskLib_Flush (h->handle);
|
|
+ VDDK_CALL_END (VixDiskLib_Flush, 0);
|
|
+ if (err != VIX_OK) {
|
|
+ VDDK_ERROR (err, "VixDiskLib_Flush");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+do_can_extents (struct command *cmd, struct vddk_handle *h)
|
|
+{
|
|
+ VixError err;
|
|
+ VixDiskLibBlockList *block_list;
|
|
+
|
|
+ /* This call was added in VDDK 6.7. In earlier versions the
|
|
+ * function pointer will be NULL and we cannot query extents.
|
|
+ */
|
|
+ if (VixDiskLib_QueryAllocatedBlocks == NULL) {
|
|
+ nbdkit_debug ("can_extents: VixDiskLib_QueryAllocatedBlocks == NULL, "
|
|
+ "probably this is VDDK < 6.7");
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ /* Suppress errors around this call. See:
|
|
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1709211#c7
|
|
+ */
|
|
+ error_suppression = 1;
|
|
+
|
|
+ /* However even when the call is available it rarely works well so
|
|
+ * the best thing we can do here is to try the call and if it's
|
|
+ * non-functional return false.
|
|
+ */
|
|
+ VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
|
|
+ "handle, 0, %d sectors, %d sectors",
|
|
+ VIXDISKLIB_MIN_CHUNK_SIZE, VIXDISKLIB_MIN_CHUNK_SIZE)
|
|
+ err = VixDiskLib_QueryAllocatedBlocks (h->handle,
|
|
+ 0, VIXDISKLIB_MIN_CHUNK_SIZE,
|
|
+ VIXDISKLIB_MIN_CHUNK_SIZE,
|
|
+ &block_list);
|
|
+ VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
|
|
+ error_suppression = 0;
|
|
+ if (err == VIX_OK) {
|
|
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
|
|
+ VixDiskLib_FreeBlockList (block_list);
|
|
+ VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
|
|
+ }
|
|
+ if (err != VIX_OK) {
|
|
+ char *errmsg = VixDiskLib_GetErrorText (err, NULL);
|
|
+ nbdkit_debug ("can_extents: "
|
|
+ "VixDiskLib_QueryAllocatedBlocks test failed, "
|
|
+ "extents support will be disabled: "
|
|
+ "original error: %s",
|
|
+ errmsg);
|
|
+ VixDiskLib_FreeErrorText (errmsg);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/* Add an extent to the list of extents. */
|
|
+static int
|
|
+add_extent (struct nbdkit_extents *extents,
|
|
+ uint64_t *position, uint64_t next_position, bool is_hole)
|
|
+{
|
|
+ uint32_t type = 0;
|
|
+ const uint64_t length = next_position - *position;
|
|
+
|
|
+ if (is_hole) {
|
|
+ type = NBDKIT_EXTENT_HOLE;
|
|
+ /* Images opened as single link might be backed by another file in the
|
|
+ chain, so the holes are not guaranteed to be zeroes. */
|
|
+ if (!single_link)
|
|
+ type |= NBDKIT_EXTENT_ZERO;
|
|
+ }
|
|
+
|
|
+ assert (*position <= next_position);
|
|
+ if (*position == next_position)
|
|
+ return 0;
|
|
+
|
|
+ if (vddk_debug_extents)
|
|
+ nbdkit_debug ("adding extent type %s at [%" PRIu64 "...%" PRIu64 "]",
|
|
+ is_hole ? "hole" : "allocated data",
|
|
+ *position, next_position-1);
|
|
+ if (nbdkit_add_extent (extents, *position, length, type) == -1)
|
|
+ return -1;
|
|
+
|
|
+ *position = next_position;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+do_extents (struct command *cmd, struct vddk_handle *h)
|
|
+{
|
|
+ uint32_t count = cmd->count;
|
|
+ uint64_t offset = cmd->offset;
|
|
+ bool req_one = cmd->req_one;
|
|
+ struct nbdkit_extents *extents = cmd->ptr;
|
|
+ uint64_t position, end, start_sector;
|
|
+
|
|
+ position = offset;
|
|
+ end = offset + count;
|
|
+
|
|
+ /* We can only query whole chunks. Therefore start with the
|
|
+ * first chunk before offset.
|
|
+ */
|
|
+ start_sector =
|
|
+ ROUND_DOWN (offset, VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE)
|
|
+ / VIXDISKLIB_SECTOR_SIZE;
|
|
+ while (start_sector * VIXDISKLIB_SECTOR_SIZE < end) {
|
|
+ VixError err;
|
|
+ uint32_t i;
|
|
+ uint64_t nr_chunks, nr_sectors;
|
|
+ VixDiskLibBlockList *block_list;
|
|
+
|
|
+ assert (IS_ALIGNED (start_sector, VIXDISKLIB_MIN_CHUNK_SIZE));
|
|
+
|
|
+ nr_chunks =
|
|
+ ROUND_UP (end - start_sector * VIXDISKLIB_SECTOR_SIZE,
|
|
+ VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE)
|
|
+ / (VIXDISKLIB_MIN_CHUNK_SIZE * VIXDISKLIB_SECTOR_SIZE);
|
|
+ nr_chunks = MIN (nr_chunks, VIXDISKLIB_MAX_CHUNK_NUMBER);
|
|
+ nr_sectors = nr_chunks * VIXDISKLIB_MIN_CHUNK_SIZE;
|
|
+
|
|
+ VDDK_CALL_START (VixDiskLib_QueryAllocatedBlocks,
|
|
+ "handle, %" PRIu64 " sectors, %" PRIu64 " sectors, "
|
|
+ "%d sectors",
|
|
+ start_sector, nr_sectors, VIXDISKLIB_MIN_CHUNK_SIZE)
|
|
+ err = VixDiskLib_QueryAllocatedBlocks (h->handle,
|
|
+ start_sector, nr_sectors,
|
|
+ VIXDISKLIB_MIN_CHUNK_SIZE,
|
|
+ &block_list);
|
|
+ VDDK_CALL_END (VixDiskLib_QueryAllocatedBlocks, 0);
|
|
+ if (err != VIX_OK) {
|
|
+ VDDK_ERROR (err, "VixDiskLib_QueryAllocatedBlocks");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < block_list->numBlocks; ++i) {
|
|
+ uint64_t blk_offset, blk_length;
|
|
+
|
|
+ blk_offset = block_list->blocks[i].offset * VIXDISKLIB_SECTOR_SIZE;
|
|
+ blk_length = block_list->blocks[i].length * VIXDISKLIB_SECTOR_SIZE;
|
|
+
|
|
+ /* The query returns allocated blocks. We must insert holes
|
|
+ * between the blocks as necessary.
|
|
+ */
|
|
+ if ((position < blk_offset &&
|
|
+ add_extent (extents, &position, blk_offset, true) == -1) ||
|
|
+ (add_extent (extents,
|
|
+ &position, blk_offset + blk_length, false) == -1)) {
|
|
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
|
|
+ VixDiskLib_FreeBlockList (block_list);
|
|
+ VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
|
|
+ return -1;
|
|
+ }
|
|
+ }
|
|
+ VDDK_CALL_START (VixDiskLib_FreeBlockList, "block_list")
|
|
+ VixDiskLib_FreeBlockList (block_list);
|
|
+ VDDK_CALL_END (VixDiskLib_FreeBlockList, 0);
|
|
+
|
|
+ /* There's an implicit hole after the returned list of blocks,
|
|
+ * up to the end of the QueryAllocatedBlocks request.
|
|
+ */
|
|
+ if (add_extent (extents,
|
|
+ &position,
|
|
+ (start_sector + nr_sectors) * VIXDISKLIB_SECTOR_SIZE,
|
|
+ true) == -1) {
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ start_sector += nr_sectors;
|
|
+
|
|
+ /* If one extent was requested, as long as we've added an extent
|
|
+ * overlapping the original offset we're done.
|
|
+ */
|
|
+ if (req_one && position > offset)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Background worker thread, one per connection, which is where the
|
|
+ * VDDK commands are issued.
|
|
+ */
|
|
+void *
|
|
+vddk_worker_thread (void *handle)
|
|
+{
|
|
+ struct vddk_handle *h = handle;
|
|
+ bool stop = false;
|
|
+
|
|
+ while (!stop) {
|
|
+ struct command *cmd;
|
|
+ int r;
|
|
+ bool async = false;
|
|
+
|
|
+ /* Wait until we are sent at least one command. */
|
|
+ {
|
|
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&h->commands_lock);
|
|
+ while (h->commands.size == 0)
|
|
+ pthread_cond_wait (&h->commands_cond, &h->commands_lock);
|
|
+ cmd = h->commands.ptr[0];
|
|
+ command_queue_remove (&h->commands, 0);
|
|
+ }
|
|
+
|
|
+ switch (cmd->type) {
|
|
+ case STOP:
|
|
+ r = do_stop (cmd, h);
|
|
+ stop = true;
|
|
+ break;
|
|
+
|
|
+ case GET_SIZE: {
|
|
+ int64_t size = do_get_size (cmd, h);
|
|
+ if (size == -1)
|
|
+ r = -1;
|
|
+ else {
|
|
+ r = 0;
|
|
+ *(uint64_t *)cmd->ptr = size;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ case READ:
|
|
+ r = do_read (cmd, h);
|
|
+ /* If async is true, don't retire this command now. */
|
|
+ async = r == 0;
|
|
+ break;
|
|
+
|
|
+ case WRITE:
|
|
+ r = do_write (cmd, h);
|
|
+ /* If async is true, don't retire this command now. */
|
|
+ async = r == 0;
|
|
+ break;
|
|
+
|
|
+ case FLUSH:
|
|
+ r = do_flush (cmd, h);
|
|
+ break;
|
|
+
|
|
+ case CAN_EXTENTS:
|
|
+ r = do_can_extents (cmd, h);
|
|
+ if (r >= 0)
|
|
+ *(int *)cmd->ptr = r;
|
|
+ break;
|
|
+
|
|
+ case EXTENTS:
|
|
+ r = do_extents (cmd, h);
|
|
+ break;
|
|
+
|
|
+ default: abort (); /* impossible, but keeps GCC happy */
|
|
+ } /* switch */
|
|
+
|
|
+ if (!async) {
|
|
+ /* Update the command status. */
|
|
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex);
|
|
+ cmd->status = r >= 0 ? SUCCEEDED : FAILED;
|
|
+
|
|
+ /* For synchronous commands signal the caller thread that the
|
|
+ * command has completed. (Asynchronous commands are completed in
|
|
+ * the callback handler).
|
|
+ */
|
|
+ pthread_cond_signal (&cmd->cond);
|
|
+ }
|
|
+ } /* while (!stop) */
|
|
+
|
|
+ /* Exit the worker thread. */
|
|
+ return NULL;
|
|
+}
|
|
diff --git a/tests/dummy-vddk.c b/tests/dummy-vddk.c
|
|
index cb88380c..b6f12042 100644
|
|
--- a/tests/dummy-vddk.c
|
|
+++ b/tests/dummy-vddk.c
|
|
@@ -188,6 +188,19 @@ VixDiskLib_Read (VixDiskLibHandle handle,
|
|
return VIX_OK;
|
|
}
|
|
|
|
+NBDKIT_DLL_PUBLIC VixError
|
|
+VixDiskLib_ReadAsync (VixDiskLibHandle handle,
|
|
+ uint64_t start_sector, uint64_t nr_sectors,
|
|
+ unsigned char *buf,
|
|
+ VixDiskLibCompletionCB callback, void *data)
|
|
+{
|
|
+ size_t offset = start_sector * VIXDISKLIB_SECTOR_SIZE;
|
|
+
|
|
+ memcpy (buf, disk + offset, nr_sectors * VIXDISKLIB_SECTOR_SIZE);
|
|
+ callback (data, VIX_OK);
|
|
+ return VIX_ASYNC;
|
|
+}
|
|
+
|
|
NBDKIT_DLL_PUBLIC VixError
|
|
VixDiskLib_Write (VixDiskLibHandle handle,
|
|
uint64_t start_sector, uint64_t nr_sectors,
|
|
@@ -199,6 +212,25 @@ VixDiskLib_Write (VixDiskLibHandle handle,
|
|
return VIX_OK;
|
|
}
|
|
|
|
+NBDKIT_DLL_PUBLIC VixError
|
|
+VixDiskLib_WriteAsync (VixDiskLibHandle handle,
|
|
+ uint64_t start_sector, uint64_t nr_sectors,
|
|
+ const unsigned char *buf,
|
|
+ VixDiskLibCompletionCB callback, void *data)
|
|
+{
|
|
+ size_t offset = start_sector * VIXDISKLIB_SECTOR_SIZE;
|
|
+
|
|
+ memcpy (disk + offset, buf, nr_sectors * VIXDISKLIB_SECTOR_SIZE);
|
|
+ callback (data, VIX_OK);
|
|
+ return VIX_ASYNC;
|
|
+}
|
|
+
|
|
+NBDKIT_DLL_PUBLIC VixError
|
|
+VixDiskLib_Flush (VixDiskLibHandle handle)
|
|
+{
|
|
+ return VIX_OK;
|
|
+}
|
|
+
|
|
NBDKIT_DLL_PUBLIC VixError
|
|
VixDiskLib_Wait (VixDiskLibHandle handle)
|
|
{
|
|
--
|
|
2.31.1
|
|
|