91 lines
3.4 KiB
Diff
91 lines
3.4 KiB
Diff
|
From 21cadc8ed200ad9af13b217b441b9f12cd2163ee Mon Sep 17 00:00:00 2001
|
||
|
From: Kevin Wolf <kwolf@redhat.com>
|
||
|
Date: Wed, 13 Mar 2024 16:30:00 +0100
|
||
|
Subject: [PATCH 1/4] mirror: Don't call job_pause_point() under graph lock
|
||
|
|
||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||
|
RH-MergeRequest: 231: Fix deadlock and crash during storage migration
|
||
|
RH-Jira: RHEL-28125
|
||
|
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||
|
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||
|
RH-Commit: [1/3] 093910fcd5ea1516d0ed48a9cd73dad6170590f3 (kmwolf/centos-qemu-kvm)
|
||
|
|
||
|
Calling job_pause_point() while holding the graph reader lock
|
||
|
potentially results in a deadlock: bdrv_graph_wrlock() first drains
|
||
|
everything, including the mirror job, which pauses it. The job is only
|
||
|
unpaused at the end of the drain section, which is when the graph writer
|
||
|
lock has been successfully taken. However, if the job happens to be
|
||
|
paused at a pause point where it still holds the reader lock, the writer
|
||
|
lock can't be taken as long as the job is still paused.
|
||
|
|
||
|
Mark job_pause_point() as GRAPH_UNLOCKED and fix mirror accordingly.
|
||
|
|
||
|
Cc: qemu-stable@nongnu.org
|
||
|
Buglink: https://issues.redhat.com/browse/RHEL-28125
|
||
|
Fixes: 004915a96a7a ("block: Protect bs->backing with graph_lock")
|
||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||
|
Message-ID: <20240313153000.33121-1-kwolf@redhat.com>
|
||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||
|
(cherry picked from commit ae5a40e8581185654a667fbbf7e4adbc2a2a3e45)
|
||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||
|
---
|
||
|
block/mirror.c | 10 ++++++----
|
||
|
include/qemu/job.h | 2 +-
|
||
|
2 files changed, 7 insertions(+), 5 deletions(-)
|
||
|
|
||
|
diff --git a/block/mirror.c b/block/mirror.c
|
||
|
index 5145eb53e1..1bdce3b657 100644
|
||
|
--- a/block/mirror.c
|
||
|
+++ b/block/mirror.c
|
||
|
@@ -479,9 +479,9 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
|
||
|
return bytes_handled;
|
||
|
}
|
||
|
|
||
|
-static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s)
|
||
|
+static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s)
|
||
|
{
|
||
|
- BlockDriverState *source = s->mirror_top_bs->backing->bs;
|
||
|
+ BlockDriverState *source;
|
||
|
MirrorOp *pseudo_op;
|
||
|
int64_t offset;
|
||
|
/* At least the first dirty chunk is mirrored in one iteration. */
|
||
|
@@ -489,6 +489,10 @@ static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s)
|
||
|
bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
|
||
|
int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);
|
||
|
|
||
|
+ bdrv_graph_co_rdlock();
|
||
|
+ source = s->mirror_top_bs->backing->bs;
|
||
|
+ bdrv_graph_co_rdunlock();
|
||
|
+
|
||
|
bdrv_dirty_bitmap_lock(s->dirty_bitmap);
|
||
|
offset = bdrv_dirty_iter_next(s->dbi);
|
||
|
if (offset < 0) {
|
||
|
@@ -1066,9 +1070,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
|
||
|
mirror_wait_for_free_in_flight_slot(s);
|
||
|
continue;
|
||
|
} else if (cnt != 0) {
|
||
|
- bdrv_graph_co_rdlock();
|
||
|
mirror_iteration(s);
|
||
|
- bdrv_graph_co_rdunlock();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
diff --git a/include/qemu/job.h b/include/qemu/job.h
|
||
|
index 9ea98b5927..2b873f2576 100644
|
||
|
--- a/include/qemu/job.h
|
||
|
+++ b/include/qemu/job.h
|
||
|
@@ -483,7 +483,7 @@ void job_enter(Job *job);
|
||
|
*
|
||
|
* Called with job_mutex *not* held.
|
||
|
*/
|
||
|
-void coroutine_fn job_pause_point(Job *job);
|
||
|
+void coroutine_fn GRAPH_UNLOCKED job_pause_point(Job *job);
|
||
|
|
||
|
/**
|
||
|
* @job: The job that calls the function.
|
||
|
--
|
||
|
2.39.3
|
||
|
|