diff --git a/SOURCES/0001-generator-Print-full-error-in-handle_reply_error.patch b/SOURCES/0001-generator-Print-full-error-in-handle_reply_error.patch index 3f6a11f..53dbfb6 100644 --- a/SOURCES/0001-generator-Print-full-error-in-handle_reply_error.patch +++ b/SOURCES/0001-generator-Print-full-error-in-handle_reply_error.patch @@ -187,5 +187,5 @@ index 45893a8b..6c7cc45c 100644 return 0; } -- -2.43.0 +2.47.1 diff --git a/SOURCES/0002-lib-Don-t-overwrite-error-in-nbd_opt_-go-info.patch b/SOURCES/0002-lib-Don-t-overwrite-error-in-nbd_opt_-go-info.patch index 51ef83d..0ba1b6f 100644 --- a/SOURCES/0002-lib-Don-t-overwrite-error-in-nbd_opt_-go-info.patch +++ b/SOURCES/0002-lib-Don-t-overwrite-error-in-nbd_opt_-go-info.patch @@ -34,5 +34,5 @@ index 600265a0..5872dd54 100644 } return r; -- -2.43.0 +2.47.1 diff --git a/SOURCES/0003-generator-Restore-assignment-to-local-err.patch b/SOURCES/0003-generator-Restore-assignment-to-local-err.patch index 337e5c5..b28a00c 100644 --- a/SOURCES/0003-generator-Restore-assignment-to-local-err.patch +++ b/SOURCES/0003-generator-Restore-assignment-to-local-err.patch @@ -39,5 +39,5 @@ index 3945411e..699e24aa 100644 nbd_internal_free_option (h); SET_NEXT_STATE (%.NEGOTIATING); -- -2.43.0 +2.47.1 diff --git a/SOURCES/0004-generator-states-newstyle.c-Quote-untrusted-string-f.patch b/SOURCES/0004-generator-states-newstyle.c-Quote-untrusted-string-f.patch index d3dcdb7..91297bd 100644 --- a/SOURCES/0004-generator-states-newstyle.c-Quote-untrusted-string-f.patch +++ b/SOURCES/0004-generator-states-newstyle.c-Quote-untrusted-string-f.patch @@ -171,5 +171,5 @@ index 6c7cc45c..8c483bd2 100644 return 0; } -- -2.43.0 +2.47.1 diff --git a/SOURCES/0005-generator-states-newstyle.c-Don-t-sign-extend-escape.patch b/SOURCES/0005-generator-states-newstyle.c-Don-t-sign-extend-escape.patch index e311c10..efa8db2 100644 --- a/SOURCES/0005-generator-states-newstyle.c-Don-t-sign-extend-escape.patch +++ b/SOURCES/0005-generator-states-newstyle.c-Don-t-sign-extend-escape.patch @@ -23,5 +23,5 @@ index 8c483bd2..1e026a8a 100644 fprintf (fp, ": "); for (i = 0; i < replylen; ++i) { -- -2.43.0 +2.47.1 diff --git a/SOURCES/0006-copy-Set-the-total-size-in-bytes-copied.patch b/SOURCES/0006-copy-Set-the-total-size-in-bytes-copied.patch new file mode 100644 index 0000000..6dbf62b --- /dev/null +++ b/SOURCES/0006-copy-Set-the-total-size-in-bytes-copied.patch @@ -0,0 +1,49 @@ +From 1c9d65c6100edfd6050b34dcf29a7a1bbdf5e89a Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Sat, 29 Mar 2025 14:00:39 +0000 +Subject: [PATCH] copy: Set the total size in bytes copied + +Ensure that src->size contains the total size in bytes copied. There +is (only) one place where this is not known in advance, which is when +we are reading from a pipe. + +(cherry picked from commit afe4f390a65a0d1b9f3625bf90c73726866e0a64) +--- + copy/main.c | 3 +++ + copy/synch-copying.c | 6 ++++++ + 2 files changed, 9 insertions(+) + +diff --git a/copy/main.c b/copy/main.c +index ae9840ae..cd7b2fd2 100644 +--- a/copy/main.c ++++ b/copy/main.c +@@ -479,6 +479,9 @@ main (int argc, char *argv[]) + /* Always set the progress bar to 100% at the end of the copy. */ + progress_bar (1, 1); + ++ /* We should always know the total size copied here. */ ++ assert (src->size >= 0); ++ + /* Shut down the source side. */ + src->ops->close (src); + +diff --git a/copy/synch-copying.c b/copy/synch-copying.c +index 2f6627bf..200c97f6 100644 +--- a/copy/synch-copying.c ++++ b/copy/synch-copying.c +@@ -53,6 +53,12 @@ synch_copying (void) + offset += r; + progress_bar (offset, src->size); + } ++ ++ /* Record the total amount of data that was copied. In all other ++ * cases, src->size will already be set to the true size, so here ++ * is the only place we have to set this. ++ */ ++ src->size = offset; + } + + /* Otherwise we know how much we're copying, so we can copy in whole +-- +2.47.1 + diff --git a/SOURCES/0007-copy-Add-blkhash-option.patch b/SOURCES/0007-copy-Add-blkhash-option.patch new file mode 100644 index 0000000..cb53553 --- /dev/null +++ b/SOURCES/0007-copy-Add-blkhash-option.patch @@ -0,0 +1,1111 @@ +From bdbc77b131043cba3e6db511f09cc59a1872f809 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Sat, 29 Mar 2025 11:46:52 +0000 +Subject: [PATCH] copy: Add --blkhash option + +This option calculates the blkhash (similar to checksum) of the file +as it is copied. Blkhash is described here: + + https://gitlab.com/nirs/blkhash + +and in more detail in this paper: + + Soffer, N. and Waisbard, E. (2024). An Efficient Hash Function + Construction for Sparse Data. In Proceedings of the 21st + International Conference on Security and Cryptography - SECRYPT; + ISBN 978-989-758-709-2; ISSN 2184-7711, SciTePress, pages + 698-703. DOI: 10.5220/0012764500003767. + +Thanks: Nir Soffer +(cherry picked from commit c6ed852f71fb25e1de8093631c5cfc1c7135d571) +--- + copy/Makefile.am | 12 + + copy/blkhash.c | 490 ++++++++++++++++++++++++++++++++++ + copy/copy-blkhash-known.sh | 83 ++++++ + copy/copy-blkhash-pattern.sh | 49 ++++ + copy/copy-blkhash-randfile.sh | 45 ++++ + copy/main.c | 81 +++++- + copy/multi-thread-copying.c | 12 +- + copy/nbdcopy.h | 12 + + copy/nbdcopy.pod | 55 +++- + copy/synch-copying.c | 3 + + 10 files changed, 836 insertions(+), 6 deletions(-) + create mode 100644 copy/blkhash.c + create mode 100755 copy/copy-blkhash-known.sh + create mode 100755 copy/copy-blkhash-pattern.sh + create mode 100755 copy/copy-blkhash-randfile.sh + +diff --git a/copy/Makefile.am b/copy/Makefile.am +index c42accab..403f98ba 100644 +--- a/copy/Makefile.am ++++ b/copy/Makefile.am +@@ -18,6 +18,9 @@ + include $(top_srcdir)/subdir-rules.mk + + EXTRA_DIST = \ ++ copy-blkhash-known.sh \ ++ copy-blkhash-pattern.sh \ ++ copy-blkhash-randfile.sh \ + copy-block-to-nbd.sh \ + copy-file-to-file.sh \ + copy-file-to-nbd.sh \ +@@ -65,6 +68,7 @@ TESTS = + + nbdcopy_SOURCES = \ + nbdcopy.h \ ++ blkhash.c \ + file-ops.c \ + main.c \ + multi-thread-copying.c \ +@@ -82,8 +86,10 @@ nbdcopy_CPPFLAGS = \ + nbdcopy_CFLAGS = \ + $(WARNINGS_CFLAGS) \ + $(PTHREAD_CFLAGS) \ ++ $(GNUTLS_CFLAGS) \ + $(NULL) + nbdcopy_LDADD = \ ++ $(GNUTLS_LIBS) \ + $(PTHREAD_LIBS) \ + $(top_builddir)/common/utils/libutils.la \ + $(top_builddir)/lib/libnbd.la \ +@@ -150,6 +156,12 @@ TESTS += \ + endif + + if HAVE_GNUTLS ++TESTS += \ ++ copy-blkhash-known.sh \ ++ copy-blkhash-pattern.sh \ ++ copy-blkhash-randfile.sh \ ++ $(NULL) ++ + if HAVE_PSKTOOL + TESTS += copy-tls.sh + endif +diff --git a/copy/blkhash.c b/copy/blkhash.c +new file mode 100644 +index 00000000..622d8a39 +--- /dev/null ++++ b/copy/blkhash.c +@@ -0,0 +1,490 @@ ++/* NBD client library in userspace. ++ * Copyright Red Hat ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef HAVE_GNUTLS ++#include ++#include ++#endif ++ ++#include ++ ++#include "byte-swapping.h" ++#include "ispowerof2.h" ++#include "iszero.h" ++#include "minmax.h" ++#include "rounding.h" ++#include "vector.h" ++ ++#include "nbdcopy.h" ++ ++#ifdef HAVE_GNUTLS ++ ++/* We will have one of these structs per blkhash block. */ ++struct block { ++ /* unknown => We haven't seen this block yet. 'ptr' is NULL. ++ * ++ * zero => The block is all zeroes. 'ptr' is NULL. ++ * ++ * data => The block is all data, and we have seen the whole block, ++ * and the hash has been computed. 'ptr' points to the computed ++ * hash. 'n' is unused. ++ * ++ * incomplete => Part of the block was seen. 'ptr' points to the ++ * data block, waiting to be completed. 'n' is the number of bytes ++ * seen so far. We will compute the hash and turn this into a ++ * 'data' or 'zero' block, either when we have seen all bytes of ++ * this block, or at the end. ++ * ++ * Note that this code assumes that we are called exactly once for a ++ * range in the disk image. ++ */ ++ enum { block_unknown = 0, block_zero, block_data, block_incomplete } type; ++ void *ptr; ++ size_t n; ++}; ++ ++DEFINE_VECTOR_TYPE(blocks, struct block); ++static blocks block_vec; ++ ++static void ++free_struct_block (struct block b) ++{ ++ free (b.ptr); ++} ++ ++/* Since nbdcopy is multi-threaded, we need to use locks to protect ++ * access to shared resources. But also because computing digests is ++ * very compute intensive, we must allow those to run in parallel as ++ * much as possible. Therefore the locking is carefully chosen to ++ * protect critical resources while allowing (most) hashing to happen ++ * in parallel. ++ * ++ * 'bv_lock' protects access to 'block_vec', and is needed whenever ++ * the vector might be extended. ++ * ++ * It's safe to hash complete blocks without acquiring any lock (since ++ * we should only be called once per complete block). However ++ * 'incomplete_lock' must be acquired whenever we deal with incomplete ++ * blocks as we might be called in parallel for those. ++ */ ++static pthread_mutex_t bv_lock = PTHREAD_MUTEX_INITIALIZER; ++static pthread_mutex_t incomplete_lock = PTHREAD_MUTEX_INITIALIZER; ++ ++/* Length of the digests of this algorithm in bytes. */ ++static size_t alg_len; ++ ++void ++init_blkhash (void) ++{ ++ if (blkhash_alg == GNUTLS_DIG_UNKNOWN) return; ++ ++ assert (is_power_of_2 (blkhash_size)); ++ ++ alg_len = gnutls_hash_get_len (blkhash_alg); ++ ++ /* If we know the source size in advance, reserve the block vector. ++ * We don't always know this (src->size == -1), eg. if reading from ++ * a pipe. If the size is exactly zero we don't need to reserve ++ * anything. ++ */ ++ if (src->size > 0) { ++ if (blocks_reserve_exactly (&block_vec, ++ DIV_ROUND_UP (src->size, blkhash_size)) == -1) { ++ perror ("nbdcopy: realloc"); ++ exit (EXIT_FAILURE); ++ } ++ } ++} ++ ++/* Single block update functions. */ ++static struct block ++get_block (uint64_t blknum) ++{ ++ struct block b; ++ ++ pthread_mutex_lock (&bv_lock); ++ ++ /* Grow the underlying storage if needed. */ ++ if (block_vec.cap <= blknum) { ++ if (blocks_reserve (&block_vec, blknum - block_vec.cap + 1) == -1) { ++ perror ("nbdcopy: realloc"); ++ exit (EXIT_FAILURE); ++ } ++ } ++ ++ /* Initialize new blocks if needed. */ ++ if (block_vec.len <= blknum) { ++ size_t i; ++ for (i = block_vec.len; i <= blknum; ++i) { ++ block_vec.ptr[i].type = block_unknown; ++ block_vec.ptr[i].ptr = NULL; ++ block_vec.ptr[i].n = 0; ++ } ++ block_vec.len = blknum+1; ++ } ++ ++ b = block_vec.ptr[blknum]; ++ ++ pthread_mutex_unlock (&bv_lock); ++ ++ return b; ++} ++ ++static void ++put_block (uint64_t blknum, struct block b) ++{ ++ pthread_mutex_lock (&bv_lock); ++ block_vec.ptr[blknum] = b; ++ pthread_mutex_unlock (&bv_lock); ++} ++ ++/* Compute the hash of a single block of data and return it. This is ++ * normally a full block of size blkhash_size, but may be a smaller ++ * block at the end of the file. ++ */ ++static void * ++compute_one_block_hash (const void *buf, size_t len) ++{ ++ gnutls_hash_hd_t dig; ++ int r; ++ void *digest; ++ ++ /* Create the digest handle. */ ++ r = gnutls_hash_init (&dig, blkhash_alg); ++ if (r < 0) { ++ fprintf (stderr, "nbdcopy: gnutls_hash_init: %s\n", gnutls_strerror (r)); ++ exit (EXIT_FAILURE); ++ } ++ ++ /* Allocate space for the result. */ ++ digest = malloc (alg_len); ++ if (digest == NULL) { ++ perror ("nbdcopy: malloc"); ++ exit (EXIT_FAILURE); ++ } ++ ++ r = gnutls_hash (dig, buf, len); ++ if (r < 0) { ++ fprintf (stderr, "nbdcopy: gnutls_hash: %s\n", gnutls_strerror (r)); ++ exit (EXIT_FAILURE); ++ } ++ ++ gnutls_hash_deinit (dig, digest); ++ return digest; /* caller must free */ ++} ++ ++/* We have received a complete block. Compute the hash for this ++ * block. If buf == NULL, sets the block to zero. Note this function ++ * assumes we can only be called once per complete block, so locking ++ * is unnecessary (apart from inside the calls to get/put_block). ++ */ ++static void ++set_complete_block (uint64_t blknum, const char *buf) ++{ ++ struct block b = get_block (blknum); ++ void *p; ++ ++ /* Assert that we haven't seen this block before. */ ++ assert (b.type == block_unknown); ++ ++ if (buf) { ++ b.type = block_data; ++ ++ /* Compute the hash of the whole block now. */ ++ p = compute_one_block_hash (buf, blkhash_size); ++ b.ptr = p; ++ } ++ else { ++ b.type = block_zero; ++ /* Hash is computed for all zero blocks in one go at the end. */ ++ } ++ ++ put_block (blknum, b); ++} ++ ++static void finish_block (struct block *b); ++ ++/* We have received a partial block. Store or update what we have. ++ * If this completes the block, then do what is needed. If buf == ++ * NULL, this is a partial zero instead. ++ */ ++static void ++set_incomplete_block (uint64_t blknum, ++ uint64_t blkoffs, uint64_t len, ++ const char *buf) ++{ ++ /* We must acquire the incomplete_lock here, see locking comment above. */ ++ pthread_mutex_lock (&incomplete_lock); ++ ++ struct block b = get_block (blknum); ++ ++ switch (b.type) { ++ case block_data: ++ case block_zero: ++ /* We shouldn't have seen the complete block before. */ ++ abort (); ++ ++ case block_unknown: ++ /* Allocate the block. */ ++ b.ptr = calloc (1, blkhash_size); ++ if (b.ptr == NULL) { ++ perror ("nbdcopy: calloc"); ++ exit (EXIT_FAILURE); ++ } ++ b.n = 0; ++ b.type = block_incomplete; ++ ++ /*FALLTHROUGH*/ ++ case block_incomplete: ++ if (buf) ++ /* Add the partial data to the block. */ ++ memcpy ((char *)b.ptr + blkoffs, buf, len); ++ else ++ /* Add the partial zeroes to the block. */ ++ memset ((char *)b.ptr + blkoffs, 0, len); ++ b.n += len; ++ ++ /* If the block is now complete, finish it off. */ ++ if (b.n == blkhash_size) ++ finish_block (&b); ++ ++ put_block (blknum, b); ++ } ++ ++ pthread_mutex_unlock (&incomplete_lock); ++} ++ ++static void ++finish_block (struct block *b) ++{ ++ void *p; ++ ++ assert (b->type == block_incomplete); ++ ++ if (b->n == blkhash_size && is_zero (b->ptr, blkhash_size)) { ++ b->type = block_zero; ++ free (b->ptr); ++ b->ptr = NULL; ++ } ++ else { ++ b->type = block_data; ++ /* Compute the hash of the block. */ ++ p = compute_one_block_hash (b->ptr, b->n); ++ free (b->ptr); ++ b->ptr = p; ++ } ++} ++ ++/* Called from either synch-copying.c or multi-thread-copying.c to ++ * update the hash with some data (or zero if buf == NULL). ++ */ ++void ++update_blkhash (const char *buf, uint64_t offset, size_t len) ++{ ++ uint64_t blknum, blkoffs; ++ ++ if (blkhash_alg == GNUTLS_DIG_UNKNOWN) return; ++ ++ if (verbose) { ++ fprintf (stderr, "blkhash: %s " ++ "[0x%" PRIx64 " - 0x%" PRIx64 "] (length %zu)\n", ++ buf ? "data" : "zero", ++ offset, offset+len, len); ++ } ++ ++ /* Iterate over the blocks. */ ++ blknum = offset / blkhash_size; ++ blkoffs = offset % blkhash_size; ++ ++ /* Unaligned head */ ++ if (blkoffs) { ++ uint64_t n = MIN (blkhash_size - blkoffs, len); ++ set_incomplete_block (blknum, blkoffs, n, buf); ++ if (buf) buf += n; ++ len -= n; ++ offset += n; ++ blknum++; ++ } ++ ++ /* Aligned body */ ++ while (len >= blkhash_size) { ++ set_complete_block (blknum, buf); ++ if (buf) buf += blkhash_size; ++ len -= blkhash_size; ++ offset += blkhash_size; ++ blknum++; ++ } ++ ++ /* Unaligned tail */ ++ if (len) { ++ set_incomplete_block (blknum, 0, len, buf); ++ } ++} ++ ++/* Called after copying to finish and print the resulting blkhash. */ ++void ++finish_blkhash (uint64_t total_size) ++{ ++ gnutls_hash_hd_t dig; ++ size_t i; ++ struct block *b; ++ void *zero_block; ++ void *zero_digest; ++ int r; ++ const uint64_t total_size_le = htole64 (total_size); ++ unsigned char *final_digest; ++ FILE *fp; ++ ++ if (blkhash_alg == GNUTLS_DIG_UNKNOWN) return; ++ ++ if (verbose) { ++ fprintf (stderr, "blkhash: total size 0x%" PRIx64 "\n", total_size); ++ fprintf (stderr, "blkhash: number of blocks %zu\n", block_vec.len); ++ } ++ ++ /* If the last block is incomplete, finish it. */ ++ if (block_vec.len > 0) { ++ b = &block_vec.ptr[block_vec.len-1]; ++ if (b->type == block_incomplete) ++ finish_block (b); ++ } ++ ++ /* There must be no other unknown or incomplete blocks left. */ ++ for (i = 0; i < block_vec.len; ++i) { ++ b = &block_vec.ptr[i]; ++ assert (b->type != block_unknown); ++ assert (b->type != block_incomplete); ++ } ++ ++ /* Calculate the hash of a zero block. */ ++ zero_block = calloc (1, blkhash_size); ++ if (zero_block == NULL) { ++ perror ("nbdcopy: calloc"); ++ exit (EXIT_FAILURE); ++ } ++ zero_digest = compute_one_block_hash (zero_block, blkhash_size); ++ free (zero_block); ++ ++ /* Now compute the blkhash. */ ++ r = gnutls_hash_init (&dig, blkhash_alg); ++ if (r < 0) { ++ fprintf (stderr, "nbdcopy: gnutls_hash_init: %s\n", gnutls_strerror (r)); ++ exit (EXIT_FAILURE); ++ } ++ ++ for (i = 0; i < block_vec.len; ++i) { ++ b = &block_vec.ptr[i]; ++ ++ switch (b->type) { ++ case block_unknown: ++ case block_incomplete: ++ abort (); /* see assertion above */ ++ ++ case block_data: ++ /* Mix in the block digest. */ ++ r = gnutls_hash (dig, b->ptr, alg_len); ++ if (r < 0) { ++ fprintf (stderr, "nbdcopy: gnutls_hash: %s\n", gnutls_strerror (r)); ++ exit (EXIT_FAILURE); ++ } ++ break; ++ ++ case block_zero: ++ /* Block is zero, mix in the zero digest. */ ++ r = gnutls_hash (dig, zero_digest, alg_len); ++ if (r < 0) { ++ fprintf (stderr, "nbdcopy: gnutls_hash: %s\n", gnutls_strerror (r)); ++ exit (EXIT_FAILURE); ++ } ++ break; ++ } ++ } ++ ++ free (zero_digest); ++ ++ /* Append the length at the end. */ ++ r = gnutls_hash (dig, &total_size_le, sizeof total_size_le); ++ if (r < 0) { ++ fprintf (stderr, "nbdcopy: gnutls_hash: %s\n", gnutls_strerror (r)); ++ exit (EXIT_FAILURE); ++ } ++ ++ /* Get the final digest. */ ++ final_digest = malloc (alg_len); ++ if (final_digest == NULL) { ++ perror ("nbdcopy: malloc"); ++ exit (EXIT_FAILURE); ++ } ++ ++ gnutls_hash_deinit (dig, final_digest); ++ ++ /* Print the final digest. */ ++ if (blkhash_file != NULL) { ++ fp = fopen (blkhash_file, "w"); ++ if (fp == NULL) { ++ perror (blkhash_file); ++ exit (EXIT_FAILURE); ++ } ++ } ++ else { ++ fp = stdout; ++ } ++ for (i = 0; i < alg_len; ++i) ++ fprintf (fp, "%02x", final_digest[i]); ++ fprintf (fp, "\n"); ++ fflush (fp); ++ if (blkhash_file != NULL) ++ fclose (fp); ++ ++ free (final_digest); ++ ++ /* Free the hashes and vector. */ ++ blocks_iter (&block_vec, free_struct_block); ++ blocks_reset (&block_vec); ++} ++ ++#else /* !HAVE_GNUTLS */ ++ ++void ++init_blkhash (void) ++{ ++ /* nothing */ ++} ++ ++void ++update_blkhash (const char *buf, uint64_t offset, size_t len) ++{ ++ /* nothing */ ++} ++ ++void ++finish_blkhash (uint64_t total_size) ++{ ++ /* nothing */ ++} ++ ++#endif /* !HAVE_GNUTLS */ +diff --git a/copy/copy-blkhash-known.sh b/copy/copy-blkhash-known.sh +new file mode 100755 +index 00000000..ca398eac +--- /dev/null ++++ b/copy/copy-blkhash-known.sh +@@ -0,0 +1,83 @@ ++#!/usr/bin/env bash ++# nbd client library in userspace ++# Copyright Red Hat ++# ++# This library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2 of the License, or (at your option) any later version. ++# ++# This library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with this library; if not, write to the Free Software ++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ ++# Test --blkhash option. ++ ++. ../tests/functions.sh ++ ++set -e ++set -x ++ ++requires $NBDKIT --exit-with-parent --version ++requires $NBDKIT --exit-with-parent data --version ++ ++hashfile=copy-blkhash-known.hash ++cleanup_fn rm -f $hashfile ++rm -f $hashfile ++ ++do_test () { ++ data="$1" ++ hash="$2" ++ expected="$3" ++ ++ export hash hashfile ++ $NBDKIT -U - data "$data" \ ++ --run 'nbdcopy --blkhash=$hash --blkhash-file=$hashfile \ ++ "$uri" null:' ++ cat $hashfile ++ test "$expected" = "$(cat $hashfile)" ++} ++ ++# Instances of the data plugin and the corresponding hash that we ++# previously cross-checked against blkhash's test/blkhash.py ++ ++do_test "" \ ++ sha256 \ ++ af5570f5a1810b7af78caf4bc70a660f0df51e42baf91d4de5b2328de0e83dfc ++ ++do_test '"hello"' \ ++ md5 \ ++ f741ac9ce55f5325906bb14e9c05d467 ++ ++do_test '"hello"' \ ++ sha256 \ ++ 337355feb53a5309d5aba92796223c2c84ffab930e706c01fef573a2722545e6 ++ ++do_test '"hello"' \ ++ sha512 \ ++ eca04a593cf12ec4132993da709048e25a2f1be3526e132fb521ec9d41f023ec4018b3fd07b014a33e36bb5fa145b36991f431e62f9e1a93bebea6c9565682c1 ++ ++do_test '"hello"' \ ++ md5/4 \ ++ 8262896de34125dec173722c920e8bd0 ++ ++do_test '"hello" @1048576 "goodbye"' \ ++ sha256 \ ++ 61b8f3a8cea76e16eeff7ce27f1b7711c1f1e437f5038cec17773772a4bded28 ++ ++do_test '"12345678"*512*256' \ ++ md5 \ ++ 84fc21ac2f49ac283ff399378d834d1a ++ ++do_test '"12345678"*512*256' \ ++ sha256 \ ++ cbb388edd25e567b85f504c7b345497f9fb4f6bbf4e39768809184b9f9e678f8 ++ ++do_test '"12345678"*512*256' \ ++ sha512/512k \ ++ 379f7eb1628058c7abbc4c96941ac972074815ea9ef4aca95eefb2b4f9c29f64023fff8d966e9fddf08d07bdba548e75298917f10268fdf9ba636c2321a2214e +diff --git a/copy/copy-blkhash-pattern.sh b/copy/copy-blkhash-pattern.sh +new file mode 100755 +index 00000000..f135f54d +--- /dev/null ++++ b/copy/copy-blkhash-pattern.sh +@@ -0,0 +1,49 @@ ++#!/usr/bin/env bash ++# nbd client library in userspace ++# Copyright Red Hat ++# ++# This library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2 of the License, or (at your option) any later version. ++# ++# This library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with this library; if not, write to the Free Software ++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ ++# Test --blkhash option against a large plugin with known content. ++ ++. ../tests/functions.sh ++ ++set -e ++set -x ++ ++requires $NBDKIT --exit-with-parent --version ++requires $NBDKIT --exit-with-parent pattern --version ++ ++hashfile_sha256=copy-blkhash-pattern.hash256 ++hashfile_sha512=copy-blkhash-pattern.hash512 ++cleanup_fn rm -f $hashfile_sha256 $hashfile_sha512 ++rm -f $hashfile_sha256 $hashfile_sha512 ++ ++export hashfile_sha256 hashfile_sha512 ++ ++expected_sha256=6750a1c3d78e46eaffb0d094624825dea88f0c7098b2424fce776c0748442649 ++expected_sha512=aef2905a223b2b9b565374ce9671bcb434fc944b0a108c8b5b98769d830b6c61b9567de177791a092514675c3a3e0740758c6a5a171ae71d844c60315f07e334 ++ ++$NBDKIT -U - pattern 1G \ ++ --run ' ++ nbdcopy --blkhash --blkhash-file=$hashfile_sha256 "$uri" null: && ++ nbdcopy --blkhash=sha512/512k --blkhash-file=$hashfile_sha512 \ ++ "$uri" null: ++' ++cat $hashfile_sha256 ++test "$expected_sha256" = "$(cat $hashfile_sha256)" ++ ++cat $hashfile_sha512 ++test "$expected_sha512" = "$(cat $hashfile_sha512)" +diff --git a/copy/copy-blkhash-randfile.sh b/copy/copy-blkhash-randfile.sh +new file mode 100755 +index 00000000..029237c4 +--- /dev/null ++++ b/copy/copy-blkhash-randfile.sh +@@ -0,0 +1,45 @@ ++#!/usr/bin/env bash ++# nbd client library in userspace ++# Copyright Red Hat ++# ++# This library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2 of the License, or (at your option) any later version. ++# ++# This library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with this library; if not, write to the Free Software ++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ ++# Test --blkhash option. ++ ++. ../tests/functions.sh ++ ++set -e ++set -x ++ ++requires $DD --version ++requires $DD oflag=seek_bytes + #include + #include ++#include + + #ifdef HAVE_SYS_IOCTL_H + #include + #endif + +-#include ++#ifdef HAVE_GNUTLS ++#include ++#endif + + #include + +@@ -48,6 +51,11 @@ + #include "nbdcopy.h" + + bool allocated; /* --allocated flag */ ++#ifdef HAVE_GNUTLS /* --blkhash */ ++gnutls_digest_algorithm_t blkhash_alg = GNUTLS_DIG_UNKNOWN; ++#endif ++unsigned blkhash_size = 65536; ++const char *blkhash_file; /* --blkhash-file (NULL = stdout) */ + unsigned connections = 4; /* --connections */ + bool target_is_zero; /* --target-is-zero flag */ + bool extents = true; /* ! --no-extents flag */ +@@ -76,7 +84,8 @@ usage (FILE *fp, int exitcode) + "\n" + "Copy to and from an NBD server:\n" + "\n" +-" nbdcopy [--allocated] [-C N|--connections=N]\n" ++" nbdcopy [--allocated] [--blkhash=DIGEST] [--blkhash-file=FILENAME]\n" ++" [-C N|--connections=N]\n" + " [--destination-is-zero|--target-is-zero] [--flush]\n" + " [--no-extents] [-p|--progress|--progress=FD]\n" + " [--queue-size=N] [--request-size=N] [-R N|--requests=N]\n" +@@ -113,6 +122,8 @@ main (int argc, char *argv[]) + LONG_OPTIONS, + SHORT_OPTIONS, + ALLOCATED_OPTION, ++ BLKHASH_OPTION, ++ BLKHASH_FILE_OPTION, + TARGET_IS_ZERO_OPTION, + FLUSH_OPTION, + NO_EXTENTS_OPTION, +@@ -125,6 +136,8 @@ main (int argc, char *argv[]) + { "help", no_argument, NULL, HELP_OPTION }, + { "long-options", no_argument, NULL, LONG_OPTIONS }, + { "allocated", no_argument, NULL, ALLOCATED_OPTION }, ++ { "blkhash", optional_argument, NULL, BLKHASH_OPTION }, ++ { "blkhash-file", required_argument, NULL, BLKHASH_FILE_OPTION }, + { "connections", required_argument, NULL, 'C' }, + { "destination-is-zero", no_argument, NULL, TARGET_IS_ZERO_OPTION }, + { "flush", no_argument, NULL, FLUSH_OPTION }, +@@ -179,6 +192,64 @@ main (int argc, char *argv[]) + allocated = true; + break; + ++ case BLKHASH_OPTION: ++#ifdef HAVE_GNUTLS ++ if (optarg == NULL || optarg[0] == '\0') { ++ blkhash_alg = GNUTLS_DIG_SHA256; ++ blkhash_size = 65536; ++ } ++ else { ++ i = strcspn (optarg, "/"); ++ if (i == 3 && strncasecmp (optarg, "md5", i) == 0) ++ blkhash_alg = GNUTLS_DIG_MD5; ++ else if (i == 4 && strncasecmp (optarg, "sha1", i) == 0) ++ blkhash_alg = GNUTLS_DIG_SHA1; ++ else if (i == 6 && strncasecmp (optarg, "sha256", i) == 0) ++ blkhash_alg = GNUTLS_DIG_SHA256; ++ else if (i == 6 && strncasecmp (optarg, "sha512", i) == 0) ++ blkhash_alg = GNUTLS_DIG_SHA512; ++ else { ++ fprintf (stderr, "%s: %s: unknown digest algorithm '%s'\n", ++ prog, "--blkhash", optarg); ++ exit (EXIT_FAILURE); ++ } ++ if (optarg[i] == '/') { ++ i64 = human_size_parse (&optarg[i+1], &error, &pstr); ++ if (i64 == -1) { ++ fprintf (stderr, "%s: %s: %s: %s\n", ++ prog, "--blkhash", error, pstr); ++ exit (EXIT_FAILURE); ++ } ++ if (! is_power_of_2 (blkhash_size)) { ++ fprintf (stderr, "%s: %s is not a power of two: %s\n", ++ prog, "--blkhash", &optarg[i+1]); ++ exit (EXIT_FAILURE); ++ } ++ if (i64 > UINT_MAX) { ++ fprintf (stderr, "%s: %s is too large: %s\n", ++ prog, "--blkhash", &optarg[i+1]); ++ exit (EXIT_FAILURE); ++ } ++ blkhash_size = i64; ++ } ++ } ++ break; ++#else ++ fprintf (stderr, "%s: %s: option not supported in this build\n", ++ prog, "--blkhash"); ++ exit (EXIT_FAILURE); ++#endif ++ ++ case BLKHASH_FILE_OPTION: ++#ifdef HAVE_GNUTLS ++ blkhash_file = optarg; ++ break; ++#else ++ fprintf (stderr, "%s: %s: option not supported in this build\n", ++ prog, "--blkhash-file"); ++ exit (EXIT_FAILURE); ++#endif ++ + case TARGET_IS_ZERO_OPTION: + target_is_zero = true; + break; +@@ -369,6 +440,9 @@ main (int argc, char *argv[]) + exit (EXIT_FAILURE); + } + ++ /* Initialize the blkhash function (if used). */ ++ init_blkhash (); ++ + /* If multi-conn is not supported, force connections to 1. */ + if (! src->ops->can_multi_conn (src) || ! dst->ops->can_multi_conn (dst)) + connections = 1; +@@ -482,6 +556,9 @@ main (int argc, char *argv[]) + /* We should always know the total size copied here. */ + assert (src->size >= 0); + ++ /* Finish and print the blkhash. */ ++ finish_blkhash (src->size); ++ + /* Shut down the source side. */ + src->ops->close (src); + +diff --git a/copy/multi-thread-copying.c b/copy/multi-thread-copying.c +index a75fb265..89588e6e 100644 +--- a/copy/multi-thread-copying.c ++++ b/copy/multi-thread-copying.c +@@ -265,8 +265,10 @@ worker_thread (void *wp) + * THREAD_WORK_SIZE, so there is no danger of overflowing + * size_t. + */ +- command = create_command (zeroing_start, offset-zeroing_start, +- true, w); ++ uint64_t zeroing_len = offset - zeroing_start; ++ ++ update_blkhash (NULL, zeroing_start, zeroing_len); ++ command = create_command (zeroing_start, zeroing_len, true, w); + fill_dst_range_with_zeroes (command); + is_zeroing = false; + } +@@ -297,6 +299,9 @@ worker_thread (void *wp) + * THREAD_WORK_SIZE, so there is no danger of overflowing + * size_t. + */ ++ uint64_t zeroing_len = offset - zeroing_start; ++ ++ update_blkhash (NULL, zeroing_start, zeroing_len); + command = create_command (zeroing_start, offset - zeroing_start, + true, w); + fill_dst_range_with_zeroes (command); +@@ -505,6 +510,9 @@ finished_read (void *vp, int *error) + exit (EXIT_FAILURE); + } + ++ update_blkhash (slice_ptr (command->slice), command->offset, ++ command->slice.len); ++ + if (allocated || sparse_size == 0) { + /* If sparseness detection (see below) is turned off then we write + * the whole command. +diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h +index e223191c..297978e5 100644 +--- a/copy/nbdcopy.h ++++ b/copy/nbdcopy.h +@@ -24,6 +24,10 @@ + #include + #include + ++#ifdef HAVE_GNUTLS ++#include ++#endif ++ + #include + + #include "vector.h" +@@ -227,6 +231,11 @@ extern void asynch_notify_read_write_not_supported (struct rw *rw, + size_t index); + + extern bool allocated; ++#ifdef HAVE_GNUTLS ++extern gnutls_digest_algorithm_t blkhash_alg; ++#endif ++extern unsigned blkhash_size; ++extern const char *blkhash_file; + extern unsigned connections; + extern bool target_is_zero; + extern bool extents; +@@ -246,5 +255,8 @@ extern const char *prog; + extern void progress_bar (off_t pos, int64_t size); + extern void synch_copying (void); + extern void multi_thread_copying (void); ++extern void init_blkhash (void); ++extern void update_blkhash (const char *buf, uint64_t offset, size_t len); ++extern void finish_blkhash (uint64_t total_size); + + #endif /* NBDCOPY_H */ +diff --git a/copy/nbdcopy.pod b/copy/nbdcopy.pod +index 940e37ad..3efe2b1b 100644 +--- a/copy/nbdcopy.pod ++++ b/copy/nbdcopy.pod +@@ -4,7 +4,8 @@ nbdcopy - copy to and from an NBD server + + =head1 SYNOPSIS + +- nbdcopy [--allocated] [-C N|--connections=N] ++ nbdcopy [--allocated] [--blkhash=DIGEST] [--blkhash-file=FILE] ++ [-C N|--connections=N] + [--destination-is-zero|--target-is-zero] [--flush] + [--no-extents] [-p|--progress|--progress=FD] + [--queue-size=N] [--request-size=N] [-R N|--requests=N] +@@ -50,6 +51,11 @@ option this will print a progress bar. + + Copy a full disk from one NBD server to another. + ++=head2 nbdcopy nbd://server1 nbd://server2 --blkhash ++ ++Copy a full disk from one NBD server to another, computing the blkhash ++(similar to a checksum) of the disk and printing that. ++ + =head2 nbdcopy -- [ qemu-nbd -r -f qcow2 https://example.com/disk.qcow2 ] - + + Run L as a subprocess to open URL +@@ -106,6 +112,49 @@ I<--no-extents>), or by detecting runs of zeroes (see I<-S>). If you + use I<--allocated> then nbdcopy creates a fully allocated, non-sparse + output on the destination. + ++=item B<--blkhash> ++ ++=item B<--blkhash=md5> ++ ++=item B<--blkhash=md5/>SIZE ++ ++=item B<--blkhash=sha1> ++ ++=item B<--blkhash=sha1/>SIZE ++ ++=item B<--blkhash=sha256> ++ ++=item B<--blkhash=sha256/>SIZE ++ ++=item B<--blkhash=sha512> ++ ++=item B<--blkhash=sha512/>SIZE ++ ++Compute the blkhash of the disk image during the copy and print it at ++the end. Blkhash (L) is an algorithm ++similar to a checksum except that it can be computed in parallel. ++Note that it is not compatible with programs like L or ++L. Using this option will make nbdcopy slower. ++ ++You can choose the digest function from C, C, C ++(recommended), or C. You can also choose the block size, the ++default being C<64k> (recommended). ++ ++The I<--blkhash> option without parameters selects sha256/64k. ++ ++To compute the blkhash of a file without copying it, you can do: ++ ++ nbdcopy --blkhash -- disk.raw null: ++ ++or if the format is qcow2: ++ ++ nbdcopy --blkhash -- [ qemu-nbd -f qcow2 disk.qcow2 ] null: ++ ++=item B<--blkhash-file=>FILE ++ ++If I<--blkhash> is selected, choose where to print the blkhash to. ++The default is stdout. ++ + =item B<-C> N + + =item B<--connections=>N +@@ -306,7 +355,9 @@ L, + L, + L, + L, +-L. ++L, ++L, ++L. + + =head1 AUTHORS + +diff --git a/copy/synch-copying.c b/copy/synch-copying.c +index 200c97f6..4c65c86d 100644 +--- a/copy/synch-copying.c ++++ b/copy/synch-copying.c +@@ -49,6 +49,7 @@ synch_copying (void) + size_t r; + + while ((r = src->ops->synch_read (src, buf, request_size, offset)) > 0) { ++ update_blkhash ((const char *) buf, offset, request_size); + dst->ops->synch_write (dst, buf, r, offset); + offset += r; + progress_bar (offset, src->size); +@@ -82,6 +83,7 @@ synch_copying (void) + assert (exts.ptr[i].length <= count); + + if (exts.ptr[i].zero) { ++ update_blkhash (NULL, offset, exts.ptr[i].length); + if (!dst->ops->synch_zero (dst, offset, exts.ptr[i].length, false) && + !dst->ops->synch_zero (dst, offset, exts.ptr[i].length, true)) { + /* If efficient zeroing (punching a hole or allocating +@@ -103,6 +105,7 @@ synch_copying (void) + exit (EXIT_FAILURE); + } + ++ update_blkhash ((const char *) buf, offset, r); + dst->ops->synch_write (dst, buf, r, offset); + offset += r; + progress_bar (offset, src->size); +-- +2.47.1 + diff --git a/SOURCES/0008-copy-Fix-crash-when-blkhash-size-is-not-a-power-of-2.patch b/SOURCES/0008-copy-Fix-crash-when-blkhash-size-is-not-a-power-of-2.patch new file mode 100644 index 0000000..9936408 --- /dev/null +++ b/SOURCES/0008-copy-Fix-crash-when-blkhash-size-is-not-a-power-of-2.patch @@ -0,0 +1,33 @@ +From 7c92a9c782970c168f12107eb5cf7816d4741710 Mon Sep 17 00:00:00 2001 +From: "Richard W.M. Jones" +Date: Mon, 7 Apr 2025 11:35:25 +0100 +Subject: [PATCH] copy: Fix crash when blkhash size is not a power of 2 + +nbdcopy: blkhash.c:105: init_blkhash: Assertion `is_power_of_2 (blkhash_size)' failed. + +The check for this was wrong, resulting in a later assertion failure +instead of an error message. + +Reported-by: Vera Wu +Fixes: https://issues.redhat.com/browse/RHEL-85513 +(cherry picked from commit 6c6e0822c854e423d79bef87caf1c20c5bdb5eb5) +--- + copy/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/copy/main.c b/copy/main.c +index 76b09ded..613b9ede 100644 +--- a/copy/main.c ++++ b/copy/main.c +@@ -220,7 +220,7 @@ main (int argc, char *argv[]) + prog, "--blkhash", error, pstr); + exit (EXIT_FAILURE); + } +- if (! is_power_of_2 (blkhash_size)) { ++ if (! is_power_of_2 (i64)) { + fprintf (stderr, "%s: %s is not a power of two: %s\n", + prog, "--blkhash", &optarg[i+1]); + exit (EXIT_FAILURE); +-- +2.47.1 + diff --git a/SOURCES/0009-copy-Define-block_type-outside-of-block-struct.patch b/SOURCES/0009-copy-Define-block_type-outside-of-block-struct.patch new file mode 100644 index 0000000..37e4203 --- /dev/null +++ b/SOURCES/0009-copy-Define-block_type-outside-of-block-struct.patch @@ -0,0 +1,66 @@ +From b7cd0e53f61fc72be3025f0e1969507279af8842 Mon Sep 17 00:00:00 2001 +From: Nir Soffer +Date: Sun, 13 Apr 2025 14:51:09 +0000 +Subject: [PATCH] copy: Define block_type outside of block struct + +This make the code easier to follow and maintain. + +(cherry picked from commit dc5f0e6c79e7aa03ba634b71d4780f6d7d039cdd) +--- + copy/blkhash.c | 38 ++++++++++++++++++++------------------ + 1 file changed, 20 insertions(+), 18 deletions(-) + +diff --git a/copy/blkhash.c b/copy/blkhash.c +index 622d8a39..526db4d2 100644 +--- a/copy/blkhash.c ++++ b/copy/blkhash.c +@@ -43,26 +43,28 @@ + + #ifdef HAVE_GNUTLS + ++/* unknown => We haven't seen this block yet. 'ptr' is NULL. ++ * ++ * zero => The block is all zeroes. 'ptr' is NULL. ++ * ++ * data => The block is all data, and we have seen the whole block, ++ * and the hash has been computed. 'ptr' points to the computed ++ * hash. 'n' is unused. ++ * ++ * incomplete => Part of the block was seen. 'ptr' points to the ++ * data block, waiting to be completed. 'n' is the number of bytes ++ * seen so far. We will compute the hash and turn this into a ++ * 'data' or 'zero' block, either when we have seen all bytes of ++ * this block, or at the end. ++ * ++ * Note that this code assumes that we are called exactly once for a ++ * range in the disk image. ++ */ ++enum block_type { block_unknown = 0, block_zero, block_data, block_incomplete }; ++ + /* We will have one of these structs per blkhash block. */ + struct block { +- /* unknown => We haven't seen this block yet. 'ptr' is NULL. +- * +- * zero => The block is all zeroes. 'ptr' is NULL. +- * +- * data => The block is all data, and we have seen the whole block, +- * and the hash has been computed. 'ptr' points to the computed +- * hash. 'n' is unused. +- * +- * incomplete => Part of the block was seen. 'ptr' points to the +- * data block, waiting to be completed. 'n' is the number of bytes +- * seen so far. We will compute the hash and turn this into a +- * 'data' or 'zero' block, either when we have seen all bytes of +- * this block, or at the end. +- * +- * Note that this code assumes that we are called exactly once for a +- * range in the disk image. +- */ +- enum { block_unknown = 0, block_zero, block_data, block_incomplete } type; ++ enum block_type type; + void *ptr; + size_t n; + }; +-- +2.47.1 + diff --git a/SOURCES/0010-copy-Shrink-struct-block.patch b/SOURCES/0010-copy-Shrink-struct-block.patch new file mode 100644 index 0000000..6fa0dd3 --- /dev/null +++ b/SOURCES/0010-copy-Shrink-struct-block.patch @@ -0,0 +1,78 @@ +From 298297a2ac28dc443b64cf0610b53e3c72bf4d39 Mon Sep 17 00:00:00 2001 +From: Nir Soffer +Date: Sun, 13 Apr 2025 14:54:31 +0000 +Subject: [PATCH] copy: Shrink struct block + +Change n to uint32_t since block size bigger than 4g does not make +sense. Move the type field to the end to shrink struct size from 24 +bytes to 16. + +This minimizes memory usage and improves locality. For example we can +have 4 blocks in a single cache line instead of 2.5. + +Testing shows up to 8% improvement in time and 33% in maximum resident +set size with 1000g empty image. With images full of zeros or images +full of non-zero bytes we see lower memory usage but no difference in +time. + +| size | content | tool | source | version | memory | time | +|--------|---------|------------|--------|---------|----------|----------| +| 1000g | hole | nbdcopy | file | before | 644716k | 3.33s | +| 1000g | hole | nbdcopy | file | after | 516716k | 3.10s | +| 1000g | hole | nbdcopy | nbd | before | 388844k | 1.13s | +| 1000g | hole | nbdcopy | nbd | after | 260716k | 1.04s | +| 1000g | hole | blksum | nbd | - | 10792k | 0.29s | +| 1000g | hole | sha256sum | file | - | *2796k | *445.00s | +|--------|---------|------------|--------|---------|----------|----------| +| 10g | zero | nbdcopy | file | before | 20236k | 1.33s | +| 10g | zero | nbdcopy | file | after | 18796k | 1.32s | +| 10g | zero | nbdcopy | nbd | before | 32648k | 8.21s | +| 10g | zero | nbdcopy | nbd | after | 31416k | 8.23s | +| 10g | zero | nbdcopy | pipe | before | 19052k | 4.56s | +| 10g | zero | nbdcopy | pipe | after | 17772k | 4.56s | +| 10g | zero | blksum | nbd | - | 13948k | 3.90s | +| 10g | zero | blksum | pipe | - | 10340k | 0.55s | +| 10g | zero | sha256sum | file | - | 2796k | 4.45s | +|--------|---------|------------|--------|---------|----------|----------| +| 10g | data | nbdcopy | file | before | 20224k | 1.28s | +| 10g | data | nbdcopy | file | after | 19036k | 1.26s | +| 10g | data | nbdcopy | nbd | before | 32792k | 8.02s | +| 10g | data | nbdcopy | nbd | after | 31512k | 8.02s | +| 10g | data | nbdcopy | pipe | before | 19052k | 4.56s | +| 10g | data | nbdcopy | pipe | after | 17772k | 4.57s | +| 10g | data | blksum | nbd | - | 13888k | 3.88s | +| 10g | data | blksum | pipe | - | 12512k | 1.10s | +| 10g | data | sha256sum | file | - | 2788k | 4.49s | + +* estimated based on 10g image + +Measured using: + + /usr/bin/time -f "memory=%Mk time=%es" ./nbdcopy --blkhash ... + +Tested on Fedora 41 VM on MacBook Pro M2 Max. + +(cherry picked from commit f3e1b5fe8423558b49a2b829c0fe13f601b475f2) +--- + copy/blkhash.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/copy/blkhash.c b/copy/blkhash.c +index 526db4d2..41253ec8 100644 +--- a/copy/blkhash.c ++++ b/copy/blkhash.c +@@ -64,9 +64,9 @@ enum block_type { block_unknown = 0, block_zero, block_data, block_incomplete }; + + /* We will have one of these structs per blkhash block. */ + struct block { +- enum block_type type; + void *ptr; +- size_t n; ++ uint32_t n; ++ enum block_type type; + }; + + DEFINE_VECTOR_TYPE(blocks, struct block); +-- +2.47.1 + diff --git a/SOURCES/0011-copy-Enable-zero-optimization-for-allocated-extents.patch b/SOURCES/0011-copy-Enable-zero-optimization-for-allocated-extents.patch new file mode 100644 index 0000000..7caac8f --- /dev/null +++ b/SOURCES/0011-copy-Enable-zero-optimization-for-allocated-extents.patch @@ -0,0 +1,65 @@ +From fa6a07a8fd5cc3216eb53cd2ad54e9e0dea42036 Mon Sep 17 00:00:00 2001 +From: Nir Soffer +Date: Sun, 13 Apr 2025 23:39:15 +0000 +Subject: [PATCH] copy: Enable zero optimization for allocated extents + +We optimized zero extents but computed the hash for all data blocks, +including data blocks full of zeros. Detecting a zero block is 20-100 +times faster than computing a hash, depending on the machine and the +hash algorithm. + +When adding a completed block, detect zero blocks and mark the block as +zero block, saving the computation of the hash and the allocation of the +digest buffer. + +This optimization is already implemented for incomplete blocks. + +Testing shows that computing a hash for image full of zeros is up to 7.4 +times faster, and memory usage is up to 40% lower. + +| size | content | tool | source | version | memory | time | +|--------|---------|------------|--------|---------|----------|----------| +| 10g | zero | nbdcopy | file | before | 20236k | 1.33s | +| 10g | zero | nbdcopy | file | after | 13212k | 0.33s | +| 10g | zero | nbdcopy | nbd | before | 32648k | 8.21s | +| 10g | zero | nbdcopy | nbd | after | 24996k | 3.32s | +| 10g | zero | nbdcopy | pipe | before | 19052k | 4.56s | +| 10g | zero | nbdcopy | pipe | after | 11244k | 0.61s | +| 10g | zero | blksum | nbd | - | 13948k | 3.90s | +| 10g | zero | blksum | pipe | - | 10340k | 0.55s | +| 10g | zero | sha256sum | file | - | 2796k | 4.45s | +|--------|---------|------------|--------|---------|----------|----------| +| 10g | data | nbdcopy | file | before | 20224k | 1.28s | +| 10g | data | nbdcopy | file | after | 20400k | 1.28s | +| 10g | data | nbdcopy | nbd | before | 32792k | 8.02s | +| 10g | data | nbdcopy | nbd | after | 32536k | 8.01s | +| 10g | data | nbdcopy | pipe | before | 19052k | 4.56s | +| 10g | data | nbdcopy | pipe | after | 19048k | 4.55s | +| 10g | data | blksum | nbd | - | 13888k | 3.88s | +| 10g | data | blksum | pipe | - | 12512k | 1.10s | +| 10g | data | sha256sum | file | - | 2788k | 4.49s | + +(cherry picked from commit efbe283f9fcfc8b4e57370f71356b1bfe7ffd0a4) +--- + copy/blkhash.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/copy/blkhash.c b/copy/blkhash.c +index 41253ec8..92ffafbd 100644 +--- a/copy/blkhash.c ++++ b/copy/blkhash.c +@@ -213,7 +213,10 @@ set_complete_block (uint64_t blknum, const char *buf) + /* Assert that we haven't seen this block before. */ + assert (b.type == block_unknown); + +- if (buf) { ++ /* Detecting a zero block is 20-100 times faster than computing a hash ++ * depending on the machine and the algorithm. ++ */ ++ if (buf && !is_zero (buf, blkhash_size)) { + b.type = block_data; + + /* Compute the hash of the whole block now. */ +-- +2.47.1 + diff --git a/SOURCES/0012-copy-Fix-corrupted-hash-on-incomplete-read.patch b/SOURCES/0012-copy-Fix-corrupted-hash-on-incomplete-read.patch new file mode 100644 index 0000000..bbf44fa --- /dev/null +++ b/SOURCES/0012-copy-Fix-corrupted-hash-on-incomplete-read.patch @@ -0,0 +1,39 @@ +From fcac97261f26ad486e45dedfdfa6da3ee04fe6ca Mon Sep 17 00:00:00 2001 +From: Nir Soffer +Date: Mon, 14 Apr 2025 21:40:16 +0000 +Subject: [PATCH] copy: Fix corrupted hash on incomplete read + +When using synchronous read with unknown file size, if the read was +shorter than request size, we updated the hash with the complete buffer, +inserting leftover bytes from the previous read into the hash. + +I'm not sure if there is validation for source size and number of blocks +in the blocks vector, so this can generate a corrupted hash silently. + +We probably need to validate later that the image size matches the size +of the hashed data. + +I could not reproduce a corrupted hash, the issue discovered by reading +the code. + +(cherry picked from commit 49cd9fbc0022c0ae5bc5d0b9dd48219dfb92b2f7) +--- + copy/synch-copying.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/copy/synch-copying.c b/copy/synch-copying.c +index 4c65c86d..b030a85a 100644 +--- a/copy/synch-copying.c ++++ b/copy/synch-copying.c +@@ -49,7 +49,7 @@ synch_copying (void) + size_t r; + + while ((r = src->ops->synch_read (src, buf, request_size, offset)) > 0) { +- update_blkhash ((const char *) buf, offset, request_size); ++ update_blkhash ((const char *) buf, offset, r); + dst->ops->synch_write (dst, buf, r, offset); + offset += r; + progress_bar (offset, src->size); +-- +2.47.1 + diff --git a/SOURCES/copy-patches.sh b/SOURCES/copy-patches.sh index 297e5b2..0333bd5 100755 --- a/SOURCES/copy-patches.sh +++ b/SOURCES/copy-patches.sh @@ -6,7 +6,7 @@ set -e # directory. Use it like this: # ./copy-patches.sh -rhel_version=9.6 +rhel_version=9.7 # Check we're in the right directory. if [ ! -f libnbd.spec ]; then diff --git a/SPECS/libnbd.spec b/SPECS/libnbd.spec index 153bf98..cea566c 100644 --- a/SPECS/libnbd.spec +++ b/SPECS/libnbd.spec @@ -21,7 +21,7 @@ Name: libnbd Version: 1.20.3 -Release: 1%{?dist} +Release: 4%{?dist} Summary: NBD client library in userspace License: LGPL-2.0-or-later AND BSD-3-Clause @@ -46,6 +46,13 @@ Patch0002: 0002-lib-Don-t-overwrite-error-in-nbd_opt_-go-info.patch Patch0003: 0003-generator-Restore-assignment-to-local-err.patch Patch0004: 0004-generator-states-newstyle.c-Quote-untrusted-string-f.patch Patch0005: 0005-generator-states-newstyle.c-Don-t-sign-extend-escape.patch +Patch0006: 0006-copy-Set-the-total-size-in-bytes-copied.patch +Patch0007: 0007-copy-Add-blkhash-option.patch +Patch0008: 0008-copy-Fix-crash-when-blkhash-size-is-not-a-power-of-2.patch +Patch0009: 0009-copy-Define-block_type-outside-of-block-struct.patch +Patch0010: 0010-copy-Shrink-struct-block.patch +Patch0011: 0011-copy-Enable-zero-optimization-for-allocated-extents.patch +Patch0012: 0012-copy-Fix-corrupted-hash-on-incomplete-read.patch %if 0%{verify_tarball_signature} BuildRequires: gnupg2 @@ -390,6 +397,10 @@ make %{?_smp_mflags} check || { %changelog +* Tue Apr 15 2025 Richard W.M. Jones - 1.20.3-4 +- Add nbdcopy --blkhash option + resolves: RHEL-85509 + * Sat Sep 28 2024 Richard W.M. Jones - 1.20.3-1 - Rebase to libnbd 1.20.3