126 lines
6.0 KiB
Diff
126 lines
6.0 KiB
Diff
|
From 23d161ad92d783275ad56f3acb663f7a21b809f4 Mon Sep 17 00:00:00 2001
|
||
|
From: Eric Blake <eblake@redhat.com>
|
||
|
Date: Mon, 8 Feb 2021 22:56:59 -0300
|
||
|
Subject: [PATCH 01/54] block/nbd: only detach existing iochannel from
|
||
|
aio_context
|
||
|
|
||
|
RH-Author: Eric Blake <eblake@redhat.com>
|
||
|
Message-id: <20210208225701.110110-2-eblake@redhat.com>
|
||
|
Patchwork-id: 101005
|
||
|
O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v4 1/3] block/nbd: only detach existing iochannel from aio_context
|
||
|
Bugzilla: 1887883
|
||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||
|
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
|
||
|
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
||
|
|
||
|
From: Roman Kagan <rvkagan@yandex-team.ru>
|
||
|
|
||
|
When the reconnect in NBD client is in progress, the iochannel used for
|
||
|
NBD connection doesn't exist. Therefore an attempt to detach it from
|
||
|
the aio_context of the parent BlockDriverState results in a NULL pointer
|
||
|
dereference.
|
||
|
|
||
|
The problem is triggerable, in particular, when an outgoing migration is
|
||
|
about to finish, and stopping the dataplane tries to move the
|
||
|
BlockDriverState from the iothread aio_context to the main loop. If the
|
||
|
NBD connection is lost before this point, and the NBD client has entered
|
||
|
the reconnect procedure, QEMU crashes:
|
||
|
|
||
|
#0 qemu_aio_coroutine_enter (ctx=0x5618056c7580, co=0x0)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/util/qemu-coroutine.c:109
|
||
|
#1 0x00005618034b1b68 in nbd_client_attach_aio_context_bh (
|
||
|
opaque=0x561805ed4c00) at /build/qemu-6MF7tq/qemu-5.0.1/block/nbd.c:164
|
||
|
#2 0x000056180353116b in aio_wait_bh (opaque=0x7f60e1e63700)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-wait.c:55
|
||
|
#3 0x0000561803530633 in aio_bh_call (bh=0x7f60d40a7e80)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/util/async.c:136
|
||
|
#4 aio_bh_poll (ctx=ctx@entry=0x5618056c7580)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/util/async.c:164
|
||
|
#5 0x0000561803533e5a in aio_poll (ctx=ctx@entry=0x5618056c7580,
|
||
|
blocking=blocking@entry=true)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-posix.c:650
|
||
|
#6 0x000056180353128d in aio_wait_bh_oneshot (ctx=0x5618056c7580,
|
||
|
cb=<optimized out>, opaque=<optimized out>)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-wait.c:71
|
||
|
#7 0x000056180345c50a in bdrv_attach_aio_context (new_context=0x5618056c7580,
|
||
|
bs=0x561805ed4c00) at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6172
|
||
|
#8 bdrv_set_aio_context_ignore (bs=bs@entry=0x561805ed4c00,
|
||
|
new_context=new_context@entry=0x5618056c7580,
|
||
|
ignore=ignore@entry=0x7f60e1e63780)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6237
|
||
|
#9 0x000056180345c969 in bdrv_child_try_set_aio_context (
|
||
|
bs=bs@entry=0x561805ed4c00, ctx=0x5618056c7580,
|
||
|
ignore_child=<optimized out>, errp=<optimized out>)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6332
|
||
|
#10 0x00005618034957db in blk_do_set_aio_context (blk=0x56180695b3f0,
|
||
|
new_context=0x5618056c7580, update_root_node=update_root_node@entry=true,
|
||
|
errp=errp@entry=0x0)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/block/block-backend.c:1989
|
||
|
#11 0x00005618034980bd in blk_set_aio_context (blk=<optimized out>,
|
||
|
new_context=<optimized out>, errp=errp@entry=0x0)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/block/block-backend.c:2010
|
||
|
#12 0x0000561803197953 in virtio_blk_data_plane_stop (vdev=<optimized out>)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/hw/block/dataplane/virtio-blk.c:292
|
||
|
#13 0x00005618033d67bf in virtio_bus_stop_ioeventfd (bus=0x5618056d9f08)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/hw/virtio/virtio-bus.c:245
|
||
|
#14 0x00005618031c9b2e in virtio_vmstate_change (opaque=0x5618056d9f90,
|
||
|
running=0, state=<optimized out>)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/hw/virtio/virtio.c:3220
|
||
|
#15 0x0000561803208bfd in vm_state_notify (running=running@entry=0,
|
||
|
state=state@entry=RUN_STATE_FINISH_MIGRATE)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/softmmu/vl.c:1275
|
||
|
#16 0x0000561803155c02 in do_vm_stop (state=RUN_STATE_FINISH_MIGRATE,
|
||
|
send_stop=<optimized out>) at /build/qemu-6MF7tq/qemu-5.0.1/cpus.c:1032
|
||
|
#17 0x00005618033e3765 in migration_completion (s=0x5618056e6960)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:2914
|
||
|
#18 migration_iteration_run (s=0x5618056e6960)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:3275
|
||
|
#19 migration_thread (opaque=opaque@entry=0x5618056e6960)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:3439
|
||
|
#20 0x0000561803536ad6 in qemu_thread_start (args=<optimized out>)
|
||
|
at /build/qemu-6MF7tq/qemu-5.0.1/util/qemu-thread-posix.c:519
|
||
|
#21 0x00007f61085d06ba in start_thread ()
|
||
|
from /lib/x86_64-linux-gnu/libpthread.so.0
|
||
|
#22 0x00007f610830641d in sysctl () from /lib/x86_64-linux-gnu/libc.so.6
|
||
|
#23 0x0000000000000000 in ?? ()
|
||
|
|
||
|
Fix it by checking that the iochannel is non-null before trying to
|
||
|
detach it from the aio_context. If it is null, no detaching is needed,
|
||
|
and it will get reattached in the proper aio_context once the connection
|
||
|
is reestablished.
|
||
|
|
||
|
Signed-off-by: Roman Kagan <rvkagan@yandex-team.ru>
|
||
|
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
|
||
|
Message-Id: <20210129073859.683063-2-rvkagan@yandex-team.ru>
|
||
|
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||
|
(cherry picked from commit 3b5e4db6734d30e551101c0941b2a6140862ba40)
|
||
|
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||
|
Signed-off-by: Eduardo Lima (Etrunko) <etrunko@redhat.com>
|
||
|
---
|
||
|
block/nbd.c | 9 ++++++++-
|
||
|
1 file changed, 8 insertions(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/block/nbd.c b/block/nbd.c
|
||
|
index 42536702b6..ed7b6df10b 100644
|
||
|
--- a/block/nbd.c
|
||
|
+++ b/block/nbd.c
|
||
|
@@ -234,7 +234,14 @@ static void nbd_client_detach_aio_context(BlockDriverState *bs)
|
||
|
|
||
|
/* Timer is deleted in nbd_client_co_drain_begin() */
|
||
|
assert(!s->reconnect_delay_timer);
|
||
|
- qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
|
||
|
+ /*
|
||
|
+ * If reconnect is in progress we may have no ->ioc. It will be
|
||
|
+ * re-instantiated in the proper aio context once the connection is
|
||
|
+ * reestablished.
|
||
|
+ */
|
||
|
+ if (s->ioc) {
|
||
|
+ qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
static void nbd_client_attach_aio_context_bh(void *opaque)
|
||
|
--
|
||
|
2.27.0
|
||
|
|