From 303b319488b652efc68472e1580f5ec0df3e8eba Mon Sep 17 00:00:00 2001
Message-ID: <303b319488b652efc68472e1580f5ec0df3e8eba.1778441857.git.khanicov@redhat.com>
From: Ondrej Kozina <okozina@redhat.com>
Date: Wed, 10 Dec 2025 16:37:20 +0100
Subject: [PATCH] reencrypt: add more gracefull reencryption error path.

This adds proper reencryption error path for non critical
failures not requiring full LUKS2 reencryption recovery.

While non-critical reencryption failures were properly identified
in former code the graceful recovery was never implemented.
This affected the state of live device mappings after failed
reencription. For example, read error on data device did not trigger
LUKS2 recovery scenario (correctly), but the overlay reencryption
device stack for online reencryption remained stuck with hotzone layer
suspended.

This patch addresses the issue.
---
 lib/luks2/luks2_reencrypt.c | 86 +++++++++++++++++++++++++++++++++----
 1 file changed, 78 insertions(+), 8 deletions(-)

diff --git a/lib/luks2/luks2_reencrypt.c b/lib/luks2/luks2_reencrypt.c
index 21bd6674..1845a782 100644
--- a/lib/luks2/luks2_reencrypt.c
+++ b/lib/luks2/luks2_reencrypt.c
@@ -2374,11 +2374,6 @@ err:
 	return r;
 }
 
-/* TODO:
- * 	1) audit error path. any error in this routine is fatal and should be unlikely.
- * 	   usually it would hint some collision with another userspace process touching
- * 	   dm devices directly.
- */
 static reenc_status_t reenc_refresh_helper_devices(struct crypt_device *cd, const char *overlay,
 		const char *hotzone)
 {
@@ -4125,14 +4120,24 @@ static reenc_status_t reencrypt_step(struct crypt_device *cd,
 	/* metadata commit point */
 	r = reencrypt_hotzone_protect_final(cd, hdr, rh->reenc_keyslot, rp, rh->reenc_buffer, rh->read);
 	if (r < 0) {
-		/* severity normal */
+		/*
+		 * Nothing was written in hotzone area yet. Even if metadata write failed the previous
+		 * state is still valid. If the metadata write passed and there was another
+		 * error it's harmless to do recovery. Recovery may be run several times with no
+		 * negative side effect.
+		 */
 		log_err(cd, _("Failed to write reencryption resilience metadata."));
 		return REENC_ERR_ROLLBACK_MEMORY;
 	}
 
 	r = crypt_storage_wrapper_decrypt(rh->cw1, rh->offset, rh->reenc_buffer, rh->read);
 	if (r) {
-		/* severity normal */
+		/*
+		 * Ideally, this would be specific error (REENC_ERR_ROLLBACK_METADATA) case where
+		 * it would rollback on-disk metadata to the last valid state (still no write in
+		 * hotzone area). But it's not worth the effort. This will trigger full LUKS2
+		 * reencryption recovery despite not being necessary.
+		 */
 		log_err(cd, _("Decryption failed."));
 		return REENC_ERR_ROLLBACK_MEMORY;
 	}
@@ -4156,7 +4161,6 @@ static reenc_status_t reencrypt_step(struct crypt_device *cd,
 	}
 
 	if (online) {
-		/* severity normal */
 		log_dbg(cd, "Resuming device %s", rh->hotzone_name);
 		r = dm_resume_device(cd, rh->hotzone_name, DM_RESUME_PRIVATE);
 		if (r) {
@@ -4246,6 +4250,7 @@ static int replace_hotzone_device_with_error(struct crypt_device *cd, struct luk
 
 static int teardown_overlay_devices(struct crypt_device *cd, struct luks2_reencrypt *rh)
 {
+	bool overlay_suspended, hotzone_suspended;
 	int r;
 
 	/* Reload device with current LUKS2 segments */
@@ -4255,6 +4260,44 @@ static int teardown_overlay_devices(struct crypt_device *cd, struct luks2_reencr
 		return r;
 	}
 
+	overlay_suspended = dm_status_suspended(cd, rh->overlay_name) > 0;
+	hotzone_suspended = dm_status_suspended(cd, rh->hotzone_name) > 0;
+
+	/*
+	 * The overlay (if suspended) may hold already queued I/Os.
+	 * Reload the overlay device with the table identical to the one
+	 * loaded to the top level device. The overlay device will dropped
+	 * shortly after successful top level device resume.
+	 */
+	if (overlay_suspended) {
+		log_dbg(cd, "Reverting suspended device %s to previous metadata segments", rh->overlay_name);
+		r = LUKS2_reload(cd, rh->overlay_name, rh->vks, rh->device_size, rh->flags);
+		if (r) {
+			log_err(cd, _("Failed to reload device %s."), rh->overlay_name);
+			return r;
+		}
+	}
+
+	/*
+	 * if the hotzone is suspended we must error all pending I/O waiting in the device. The
+	 * reencryption step was not completed and the pending I/O would corrupt the data on data
+	 * device.
+	 *
+	 * If the hotzone table replacement fails we must abort!
+	 */
+	if (hotzone_suspended && (r = replace_hotzone_device_with_error(cd, rh)))
+		return r;
+
+	if (overlay_suspended) {
+		/* Resume will pass since the hotzone (if previously suspended) is now
+		 * replaced with live dm-error table */
+		r = dm_resume_device(cd, rh->overlay_name, DM_RESUME_PRIVATE);
+		if (r) {
+			log_err(cd, _("Failed to resume device %s."), rh->overlay_name);
+			return r;
+		}
+	}
+
 	/* Now we can switch original top level device away from overlay device */
 	r = dm_resume_device(cd, rh->device_name, DM_SUSPEND_SKIP_LOCKFS | DM_SUSPEND_NOFLUSH);
 	if (r) {
@@ -4322,6 +4365,29 @@ static int reencrypt_teardown_ok(struct crypt_device *cd, struct luks2_hdr *hdr,
 	return 0;
 }
 
+static void reencrypt_teardown_rollback(struct crypt_device *cd, struct luks2_hdr *hdr,
+		struct luks2_reencrypt *rh)
+{
+	/*
+	 * We cannot rollback for REENC_PROTECTION_NONE. It does not commit metadata as
+	 * it progresses. In this case, the device stack is intentionally left as-is.
+	 */
+	if (rh->rp.type <= REENC_PROTECTION_NONE)
+		return;
+
+	/*
+	 * If metadata rollback fails, we cannot proceed with device teardown
+	 * as we do not have proper metadata snapshot for LUKS2_reload().
+	 */
+	if (LUKS2_hdr_rollback(cd, hdr))
+		return;
+
+	if (!rh->online)
+		return;
+
+	teardown_overlay_devices(cd, rh);
+}
+
 static void reencrypt_teardown_fatal(struct crypt_device *cd, struct luks2_reencrypt *rh)
 {
 	log_err(cd, _("Fatal error while reencrypting chunk starting at %" PRIu64 ", %" PRIu64 " sectors long."),
@@ -4347,6 +4413,10 @@ static int reencrypt_teardown(struct crypt_device *cd, struct luks2_hdr *hdr,
 			progress(rh->device_size, rh->progress, usrptr);
 		r = reencrypt_teardown_ok(cd, hdr, rh);
 		break;
+	case REENC_ERR_ROLLBACK_MEMORY:
+		reencrypt_teardown_rollback(cd, hdr, rh);
+		r = -EINVAL;
+		break;
 	case REENC_ERR_FATAL:
 		reencrypt_teardown_fatal(cd, rh);
 		/* fall-through */
-- 
2.53.0