qemu-kvm/kvm-block-move-drain-outside-of-bdrv_attach_child_common.patch
Miroslav Rezanina ebd0a23ab5 * Mon Jul 28 2025 Miroslav Rezanina <mrezanin@redhat.com> - 10.0.0-8
- kvm-migration-multifd-move-macros-to-multifd-header.patch [RHEL-59697]
- kvm-migration-refactor-channel-discovery-mechanism.patch [RHEL-59697]
- kvm-migration-Add-save_postcopy_prepare-savevm-handler.patch [RHEL-59697]
- kvm-migration-ram-Implement-save_postcopy_prepare.patch [RHEL-59697]
- kvm-tests-qtest-migration-consolidate-set-capabilities.patch [RHEL-59697]
- kvm-migration-write-zero-pages-when-postcopy-enabled.patch [RHEL-59697]
- kvm-migration-enable-multifd-and-postcopy-together.patch [RHEL-59697]
- kvm-migration-Add-qtest-for-migration-over-RDMA.patch [RHEL-59697]
- kvm-qtest-migration-rdma-Enforce-RLIMIT_MEMLOCK-128MB-re.patch [RHEL-59697]
- kvm-qtest-migration-rdma-Add-test-for-rdma-migration-wit.patch [RHEL-59697]
- kvm-tests-qtest-migration-add-postcopy-tests-with-multif.patch [RHEL-59697]
- kvm-file-posix-Fix-aio-threads-performance-regression-af.patch [RHEL-96854]
- kvm-block-remove-outdated-comments-about-AioContext-lock.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-read-locked-bdrv_reopen_.patch [RHEL-88561]
- kvm-block-snapshot-move-drain-outside-of-read-locked-bdr.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-read-locked-bdrv_inactiv.patch [RHEL-88561]
- kvm-block-mark-bdrv_parent_change_aio_context-GRAPH_RDLO.patch [RHEL-88561]
- kvm-block-mark-change_aio_ctx-callback-and-instances-as-.patch [RHEL-88561]
- kvm-block-mark-bdrv_child_change_aio_context-GRAPH_RDLOC.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_change_aio_context-.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_try_change_aio_cont.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_attach_child_common.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_set_backing_hd_drai.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_root_attach_child.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_attach_child.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-quorum_add_child.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_root_unref_child.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-quorum_del_child.patch [RHEL-88561]
- kvm-blockdev-drain-while-unlocked-in-internal_snapshot_a.patch [RHEL-88561]
- kvm-blockdev-drain-while-unlocked-in-external_snapshot_a.patch [RHEL-88561]
- kvm-block-mark-bdrv_drained_begin-and-friends-as-GRAPH_U.patch [RHEL-88561]
- kvm-iotests-graph-changes-while-io-remove-image-file-aft.patch [RHEL-88561]
- kvm-iotests-graph-changes-while-io-add-test-case-with-re.patch [RHEL-88561]
- Resolves: RHEL-59697
  (Allow multifd+postcopy features being enabled together, but only use multifd during precopy )
- Resolves: RHEL-96854
  (Performance Degradation(aio=threads) between Upstream Commit b75c5f9 and 984a32f)
- Resolves: RHEL-88561
  (qemu graph deadlock during job-dismiss)
2025-07-28 02:29:30 -04:00

261 lines
9.7 KiB
Diff

From faa96d060a393458e2e1f9ba77c53adf7d52bc85 Mon Sep 17 00:00:00 2001
From: Fiona Ebner <f.ebner@proxmox.com>
Date: Fri, 30 May 2025 17:10:47 +0200
Subject: [PATCH 22/33] block: move drain outside of
bdrv_attach_child_common(_abort)()
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 393: block: do not drain while holding the graph lock
RH-Jira: RHEL-88561
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Commit: [10/21] 0b8f078c657c094dbd6f967eaaccafd8112a9bda (kmwolf/centos-qemu-kvm)
This is part of resolving the deadlock mentioned in commit "block:
move draining out of bdrv_change_aio_context() and mark GRAPH_RDLOCK".
The function bdrv_attach_child_common_abort() is used only as the
abort callback in bdrv_attach_child_common_drv transactions, so the
tran_finalize() calls of such transactions need to be in drained
sections too.
All code paths are covered:
The bdrv_attach_child_common_drv transactions are only used in
bdrv_attach_child_common(), so it is enough to check callers of
bdrv_attach_child_common() following the transactions.
bdrv_attach_child_common() is called by:
1. bdrv_attach_child_noperm(), which does not finalize the
transaction yet.
2. bdrv_root_attach_child(), where a drained section is introduced.
bdrv_attach_child_noperm() is called by:
1. bdrv_attach_child(), where a drained section is introduced.
2. bdrv_set_file_or_backing_noperm(), which does not finalize the
transaction yet.
3. bdrv_append(), where a drained section is introduced.
bdrv_set_file_or_backing_noperm() is called by:
1. bdrv_set_backing_hd_drained(), where a drained section is
introduced.
2. bdrv_reopen_parse_file_or_backing(), which does not finalize the
transaction yet. Draining the old child bs currently happens under
the graph lock there. This is replaced with an assertion, because
the drain will be moved further up to the caller.
bdrv_reopen_parse_file_or_backing() is called by:
1. bdrv_reopen_prepare(), which does not finalize the transaction yet.
bdrv_reopen_prepare() is called by:
1. bdrv_reopen_multiple(), which does finalize the transaction. It is
called after bdrv_reopen_queue(), which starts a drained section.
The drained section ends, when bdrv_reopen_queue_free() is called
at the end of bdrv_reopen_multiple().
This resolves all code paths.
The functions bdrv_set_backing_hd_drained(), bdrv_attach_child() and
bdrv_root_attach_child() run under the graph lock, so they are not
actually allowed to drain. This will be addressed in the following
commits.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Message-ID: <20250530151125.955508-11-f.ebner@proxmox.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 2b833595aa21679145cfe67ba720113b165c19ef)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block.c | 40 ++++++++++++++++++++++++----------------
1 file changed, 24 insertions(+), 16 deletions(-)
diff --git a/block.c b/block.c
index 3c2e8c5592..2c18e0a4fa 100644
--- a/block.c
+++ b/block.c
@@ -3028,10 +3028,8 @@ static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque)
bdrv_replace_child_noperm(s->child, NULL);
if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
- bdrv_drain_all_begin();
bdrv_try_change_aio_context_locked(bs, s->old_child_ctx, NULL,
&error_abort);
- bdrv_drain_all_end();
}
if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) {
@@ -3043,10 +3041,8 @@ static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque)
/* No need to visit `child`, because it has been detached already */
visited = g_hash_table_new(NULL, NULL);
- bdrv_drain_all_begin();
ret = s->child->klass->change_aio_ctx(s->child, s->old_parent_ctx,
visited, tran, &error_abort);
- bdrv_drain_all_end();
g_hash_table_destroy(visited);
/* transaction is supposed to always succeed */
@@ -3075,6 +3071,9 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
*
* Both @parent_bs and @child_bs can move to a different AioContext in this
* function.
+ *
+ * All block nodes must be drained before this function is called until after
+ * the transaction is finalized.
*/
static BdrvChild * GRAPH_WRLOCK
bdrv_attach_child_common(BlockDriverState *child_bs,
@@ -3118,10 +3117,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
parent_ctx = bdrv_child_get_parent_aio_context(new_child);
if (child_ctx != parent_ctx) {
Error *local_err = NULL;
- bdrv_drain_all_begin();
int ret = bdrv_try_change_aio_context_locked(child_bs, parent_ctx, NULL,
&local_err);
- bdrv_drain_all_end();
if (ret < 0 && child_class->change_aio_ctx) {
Transaction *aio_ctx_tran = tran_new();
@@ -3129,11 +3126,9 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
bool ret_child;
g_hash_table_add(visited, new_child);
- bdrv_drain_all_begin();
ret_child = child_class->change_aio_ctx(new_child, child_ctx,
visited, aio_ctx_tran,
NULL);
- bdrv_drain_all_end();
if (ret_child == true) {
error_free(local_err);
ret = 0;
@@ -3189,6 +3184,9 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
*
* After calling this function, the transaction @tran may only be completed
* while holding a writer lock for the graph.
+ *
+ * All block nodes must be drained before this function is called until after
+ * the transaction is finalized.
*/
static BdrvChild * GRAPH_WRLOCK
bdrv_attach_child_noperm(BlockDriverState *parent_bs,
@@ -3244,6 +3242,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
GLOBAL_STATE_CODE();
+ bdrv_drain_all_begin();
child = bdrv_attach_child_common(child_bs, child_name, child_class,
child_role, perm, shared_perm, opaque,
tran, errp);
@@ -3256,6 +3255,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
out:
tran_finalize(tran, ret);
+ bdrv_drain_all_end();
bdrv_schedule_unref(child_bs);
@@ -3283,6 +3283,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
GLOBAL_STATE_CODE();
+ bdrv_drain_all_begin();
child = bdrv_attach_child_noperm(parent_bs, child_bs, child_name,
child_class, child_role, tran, errp);
if (!child) {
@@ -3297,6 +3298,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
out:
tran_finalize(tran, ret);
+ bdrv_drain_all_end();
bdrv_schedule_unref(child_bs);
@@ -3465,6 +3467,9 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
*
* After calling this function, the transaction @tran may only be completed
* while holding a writer lock for the graph.
+ *
+ * All block nodes must be drained before this function is called until after
+ * the transaction is finalized.
*/
static int GRAPH_WRLOCK
bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
@@ -3573,6 +3578,7 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs,
assert(bs->backing->bs->quiesce_counter > 0);
}
+ bdrv_drain_all_begin();
ret = bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
if (ret < 0) {
goto out;
@@ -3581,6 +3587,7 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs,
ret = bdrv_refresh_perms(bs, tran, errp);
out:
tran_finalize(tran, ret);
+ bdrv_drain_all_end();
return ret;
}
@@ -4721,6 +4728,9 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
* Return 0 on success, otherwise return < 0 and set @errp.
*
* @reopen_state->bs can move to a different AioContext in this function.
+ *
+ * All block nodes must be drained before this function is called until after
+ * the transaction is finalized.
*/
static int GRAPH_UNLOCKED
bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
@@ -4814,7 +4824,7 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
if (old_child_bs) {
bdrv_ref(old_child_bs);
- bdrv_drained_begin(old_child_bs);
+ assert(old_child_bs->quiesce_counter > 0);
}
bdrv_graph_rdunlock_main_loop();
@@ -4826,7 +4836,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
bdrv_graph_wrunlock();
if (old_child_bs) {
- bdrv_drained_end(old_child_bs);
bdrv_unref(old_child_bs);
}
@@ -4855,6 +4864,9 @@ out_rdlock:
*
* After calling this function, the transaction @change_child_tran may only be
* completed while holding a writer lock for the graph.
+ *
+ * All block nodes must be drained before this function is called until after
+ * the transaction is finalized.
*/
static int GRAPH_UNLOCKED
bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
@@ -5501,9 +5513,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
assert(!bs_new->backing);
bdrv_graph_rdunlock_main_loop();
- bdrv_drained_begin(bs_top);
- bdrv_drained_begin(bs_new);
-
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
@@ -5525,9 +5535,7 @@ out:
bdrv_refresh_limits(bs_top, NULL, NULL);
bdrv_graph_wrunlock();
-
- bdrv_drained_end(bs_top);
- bdrv_drained_end(bs_new);
+ bdrv_drain_all_end();
return ret;
}
--
2.39.3