119 lines
6.0 KiB
Diff
119 lines
6.0 KiB
Diff
From 808d2c94f53b9b29f44c2c5d9baea66d63ceddfc Mon Sep 17 00:00:00 2001
|
|
From: Kevin Wolf <kwolf@redhat.com>
|
|
Date: Wed, 14 Aug 2019 09:20:43 +0100
|
|
Subject: [PATCH 07/10] block: Fix AioContext switch for bs->drv == NULL
|
|
|
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
Message-id: <20190814092043.15421-2-kwolf@redhat.com>
|
|
Patchwork-id: 89976
|
|
O-Subject: [RHEL-8.1.0 qemu-kvm PATCH 1/1] block: Fix AioContext switch for bs->drv == NULL
|
|
Bugzilla: 1716347
|
|
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
RH-Acked-by: John Snow <jsnow@redhat.com>
|
|
|
|
Even for block nodes with bs->drv == NULL, we can't just ignore a
|
|
bdrv_set_aio_context() call. Leaving the node in its old context can
|
|
mean that it's still in an iothread context in bdrv_close_all() during
|
|
shutdown, resulting in an attempted unlock of the AioContext lock which
|
|
we don't hold.
|
|
|
|
This is an example stack trace of a related crash:
|
|
|
|
#0 0x00007ffff59da57f in raise () at /lib64/libc.so.6
|
|
#1 0x00007ffff59c4895 in abort () at /lib64/libc.so.6
|
|
#2 0x0000555555b97b1e in error_exit (err=<optimized out>, msg=msg@entry=0x555555d386d0 <__func__.19059> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36
|
|
#3 0x0000555555b97f7f in qemu_mutex_unlock_impl (mutex=mutex@entry=0x5555568002f0, file=file@entry=0x555555d378df "util/async.c", line=line@entry=507) at util/qemu-thread-posix.c:97
|
|
#4 0x0000555555b92f55 in aio_context_release (ctx=ctx@entry=0x555556800290) at util/async.c:507
|
|
#5 0x0000555555b05cf8 in bdrv_prwv_co (child=child@entry=0x7fffc80012f0, offset=offset@entry=131072, qiov=qiov@entry=0x7fffffffd4f0, is_write=is_write@entry=true, flags=flags@entry=0)
|
|
at block/io.c:833
|
|
#6 0x0000555555b060a9 in bdrv_pwritev (qiov=0x7fffffffd4f0, offset=131072, child=0x7fffc80012f0) at block/io.c:990
|
|
#7 0x0000555555b060a9 in bdrv_pwrite (child=0x7fffc80012f0, offset=131072, buf=<optimized out>, bytes=<optimized out>) at block/io.c:990
|
|
#8 0x0000555555ae172b in qcow2_cache_entry_flush (bs=bs@entry=0x555556810680, c=c@entry=0x5555568cc740, i=i@entry=0) at block/qcow2-cache.c:51
|
|
#9 0x0000555555ae18dd in qcow2_cache_write (bs=bs@entry=0x555556810680, c=0x5555568cc740) at block/qcow2-cache.c:248
|
|
#10 0x0000555555ae15de in qcow2_cache_flush (bs=0x555556810680, c=<optimized out>) at block/qcow2-cache.c:259
|
|
#11 0x0000555555ae16b1 in qcow2_cache_flush_dependency (c=0x5555568a1700, c=0x5555568a1700, bs=0x555556810680) at block/qcow2-cache.c:194
|
|
#12 0x0000555555ae16b1 in qcow2_cache_entry_flush (bs=bs@entry=0x555556810680, c=c@entry=0x5555568a1700, i=i@entry=0) at block/qcow2-cache.c:194
|
|
#13 0x0000555555ae18dd in qcow2_cache_write (bs=bs@entry=0x555556810680, c=0x5555568a1700) at block/qcow2-cache.c:248
|
|
#14 0x0000555555ae15de in qcow2_cache_flush (bs=bs@entry=0x555556810680, c=<optimized out>) at block/qcow2-cache.c:259
|
|
#15 0x0000555555ad242c in qcow2_inactivate (bs=bs@entry=0x555556810680) at block/qcow2.c:2124
|
|
#16 0x0000555555ad2590 in qcow2_close (bs=0x555556810680) at block/qcow2.c:2153
|
|
#17 0x0000555555ab0c62 in bdrv_close (bs=0x555556810680) at block.c:3358
|
|
#18 0x0000555555ab0c62 in bdrv_delete (bs=0x555556810680) at block.c:3542
|
|
#19 0x0000555555ab0c62 in bdrv_unref (bs=0x555556810680) at block.c:4598
|
|
#20 0x0000555555af4d72 in blk_remove_bs (blk=blk@entry=0x5555568103d0) at block/block-backend.c:785
|
|
#21 0x0000555555af4dbb in blk_remove_all_bs () at block/block-backend.c:483
|
|
#22 0x0000555555aae02f in bdrv_close_all () at block.c:3412
|
|
#23 0x00005555557f9796 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4776
|
|
|
|
The reproducer I used is a qcow2 image on gluster volume, where the
|
|
virtual disk size (4 GB) is larger than the gluster volume size (64M),
|
|
so we can easily trigger an ENOSPC. This backend is assigned to a
|
|
virtio-blk device using an iothread, and then from the guest a
|
|
'dd if=/dev/zero of=/dev/vda bs=1G count=1' causes the VM to stop
|
|
because of an I/O error. qemu_gluster_co_flush_to_disk() sets
|
|
bs->drv = NULL on error, so when virtio-blk stops the dataplane, the
|
|
block nodes stay in the iothread AioContext. A 'quit' monitor command
|
|
issued from this paused state crashes the process.
|
|
|
|
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1631227
|
|
Cc: qemu-stable@nongnu.org
|
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
|
Reviewed-by: Max Reitz <mreitz@redhat.com>
|
|
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
|
|
(cherry picked from commit 1bffe1ae7a7b707c3a14ea2ccd00d3609d3ce4d8)
|
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
---
|
|
block.c | 12 ++----------
|
|
1 file changed, 2 insertions(+), 10 deletions(-)
|
|
|
|
diff --git a/block.c b/block.c
|
|
index 8f3ceea..37af100 100644
|
|
--- a/block.c
|
|
+++ b/block.c
|
|
@@ -4923,10 +4923,6 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
|
|
BdrvAioNotifier *baf, *baf_tmp;
|
|
BdrvChild *child;
|
|
|
|
- if (!bs->drv) {
|
|
- return;
|
|
- }
|
|
-
|
|
assert(!bs->walking_aio_notifiers);
|
|
bs->walking_aio_notifiers = true;
|
|
QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
|
|
@@ -4941,7 +4937,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
|
|
*/
|
|
bs->walking_aio_notifiers = false;
|
|
|
|
- if (bs->drv->bdrv_detach_aio_context) {
|
|
+ if (bs->drv && bs->drv->bdrv_detach_aio_context) {
|
|
bs->drv->bdrv_detach_aio_context(bs);
|
|
}
|
|
QLIST_FOREACH(child, &bs->children, next) {
|
|
@@ -4960,10 +4956,6 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
|
|
BdrvAioNotifier *ban, *ban_tmp;
|
|
BdrvChild *child;
|
|
|
|
- if (!bs->drv) {
|
|
- return;
|
|
- }
|
|
-
|
|
if (bs->quiesce_counter) {
|
|
aio_disable_external(new_context);
|
|
}
|
|
@@ -4973,7 +4965,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
|
|
QLIST_FOREACH(child, &bs->children, next) {
|
|
bdrv_attach_aio_context(child->bs, new_context);
|
|
}
|
|
- if (bs->drv->bdrv_attach_aio_context) {
|
|
+ if (bs->drv && bs->drv->bdrv_attach_aio_context) {
|
|
bs->drv->bdrv_attach_aio_context(bs, new_context);
|
|
}
|
|
|
|
--
|
|
1.8.3.1
|
|
|