204 lines
9.1 KiB
Diff
204 lines
9.1 KiB
Diff
From fdd1f3bf672ad8bb0a6db896ec8cbc797c31da1f Mon Sep 17 00:00:00 2001
|
|
From: Sergio Lopez Pascual <slp@redhat.com>
|
|
Date: Wed, 24 Jun 2020 13:24:53 -0400
|
|
Subject: [PATCH 11/12] virtio-blk: On restart, process queued requests in the
|
|
proper context
|
|
|
|
RH-Author: Sergio Lopez Pascual <slp@redhat.com>
|
|
Message-id: <20200624132453.111276-3-slp@redhat.com>
|
|
Patchwork-id: 97798
|
|
O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] virtio-blk: On restart, process queued requests in the proper context
|
|
Bugzilla:
|
|
RH-Acked-by: John Snow <jsnow@redhat.com>
|
|
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
|
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
|
|
|
On restart, we were scheduling a BH to process queued requests, which
|
|
would run before starting up the data plane, leading to those requests
|
|
being assigned and started on coroutines on the main context.
|
|
|
|
This could cause requests to be wrongly processed in parallel from
|
|
different threads (the main thread and the iothread managing the data
|
|
plane), potentially leading to multiple issues.
|
|
|
|
For example, stopping and resuming a VM multiple times while the guest
|
|
is generating I/O on a virtio_blk device can trigger a crash with a
|
|
stack tracing looking like this one:
|
|
|
|
<------>
|
|
Thread 2 (Thread 0x7ff736765700 (LWP 1062503)):
|
|
#0 0x00005567a13b99d6 in iov_memset
|
|
(iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803)
|
|
at util/iov.c:69
|
|
#1 0x00005567a13bab73 in qemu_iovec_memset
|
|
(qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530
|
|
#2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86
|
|
#3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217
|
|
#4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323
|
|
#5 0x00005567a12f43d9 in qemu_laio_process_completions_and_submit (s=0x7ff7182e8420)
|
|
at block/linux-aio.c:236
|
|
#6 0x00005567a12f44c2 in qemu_laio_poll_cb (opaque=0x7ff7182e8430) at block/linux-aio.c:267
|
|
#7 0x00005567a13aed83 in run_poll_handlers_once (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8)
|
|
at util/aio-posix.c:520
|
|
#8 0x00005567a13aee9f in run_poll_handlers (ctx=0x5567a2b58c70, max_ns=16000, timeout=0x7ff7367645f8)
|
|
at util/aio-posix.c:562
|
|
#9 0x00005567a13aefde in try_poll_mode (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8)
|
|
at util/aio-posix.c:597
|
|
#10 0x00005567a13af115 in aio_poll (ctx=0x5567a2b58c70, blocking=true) at util/aio-posix.c:639
|
|
#11 0x00005567a109acca in iothread_run (opaque=0x5567a2b29760) at iothread.c:75
|
|
#12 0x00005567a13b2790 in qemu_thread_start (args=0x5567a2b694c0) at util/qemu-thread-posix.c:519
|
|
#13 0x00007ff73eedf2de in start_thread () at /lib64/libpthread.so.0
|
|
#14 0x00007ff73ec10e83 in clone () at /lib64/libc.so.6
|
|
|
|
Thread 1 (Thread 0x7ff743986f00 (LWP 1062500)):
|
|
#0 0x00005567a13b99d6 in iov_memset
|
|
(iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803)
|
|
at util/iov.c:69
|
|
#1 0x00005567a13bab73 in qemu_iovec_memset
|
|
(qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530
|
|
#2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86
|
|
#3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217
|
|
#4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323
|
|
#5 0x00005567a12f4a2f in laio_do_submit (fd=19, laiocb=0x7ff5f4ff9ae0, offset=472363008, type=2)
|
|
at block/linux-aio.c:375
|
|
#6 0x00005567a12f4af2 in laio_co_submit
|
|
(bs=0x5567a2b8c460, s=0x7ff7182e8420, fd=19, offset=472363008, qiov=0x7ff5f4ff9ca0, type=2)
|
|
at block/linux-aio.c:394
|
|
#7 0x00005567a12f1803 in raw_co_prw
|
|
(bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, type=2)
|
|
at block/file-posix.c:1892
|
|
#8 0x00005567a12f1941 in raw_co_pwritev
|
|
(bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, flags=0)
|
|
at block/file-posix.c:1925
|
|
#9 0x00005567a12fe3e1 in bdrv_driver_pwritev
|
|
(bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, qiov_offset=0, flags=0)
|
|
at block/io.c:1183
|
|
#10 0x00005567a1300340 in bdrv_aligned_pwritev
|
|
(child=0x5567a2b5b070, req=0x7ff5f4ff9db0, offset=472363008, bytes=20480, align=512, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:1980
|
|
#11 0x00005567a1300b29 in bdrv_co_pwritev_part
|
|
(child=0x5567a2b5b070, offset=472363008, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0)
|
|
at block/io.c:2137
|
|
#12 0x00005567a12baba1 in qcow2_co_pwritev_task
|
|
(bs=0x5567a2b92740, file_cluster_offset=472317952, offset=487305216, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, l2meta=0x0) at block/qcow2.c:2444
|
|
#13 0x00005567a12bacdb in qcow2_co_pwritev_task_entry (task=0x5567a2b48540) at block/qcow2.c:2475
|
|
#14 0x00005567a13167d8 in aio_task_co (opaque=0x5567a2b48540) at block/aio_task.c:45
|
|
#15 0x00005567a13cf00c in coroutine_trampoline (i0=738245600, i1=32759) at util/coroutine-ucontext.c:115
|
|
#16 0x00007ff73eb622e0 in __start_context () at /lib64/libc.so.6
|
|
#17 0x00007ff6626f1350 in ()
|
|
#18 0x0000000000000000 in ()
|
|
<------>
|
|
|
|
This is also known to cause crashes with this message (assertion
|
|
failed):
|
|
|
|
aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'
|
|
|
|
RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1812765
|
|
Signed-off-by: Sergio Lopez <slp@redhat.com>
|
|
Message-Id: <20200603093240.40489-3-slp@redhat.com>
|
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
(cherry picked from commit 49b44549ace7890fffdf027fd3695218ee7f1121)
|
|
Signed-off-by: Sergio Lopez <slp@redhat.com>
|
|
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
---
|
|
hw/block/dataplane/virtio-blk.c | 8 ++++++++
|
|
hw/block/virtio-blk.c | 18 ++++++++++++------
|
|
include/hw/virtio/virtio-blk.h | 2 +-
|
|
3 files changed, 21 insertions(+), 7 deletions(-)
|
|
|
|
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
|
|
index 119906a5fe..ac495fd72a 100644
|
|
--- a/hw/block/dataplane/virtio-blk.c
|
|
+++ b/hw/block/dataplane/virtio-blk.c
|
|
@@ -220,6 +220,9 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
|
|
goto fail_guest_notifiers;
|
|
}
|
|
|
|
+ /* Process queued requests before the ones in vring */
|
|
+ virtio_blk_process_queued_requests(vblk, false);
|
|
+
|
|
/* Kick right away to begin processing requests already in vring */
|
|
for (i = 0; i < nvqs; i++) {
|
|
VirtQueue *vq = virtio_get_queue(s->vdev, i);
|
|
@@ -239,6 +242,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
|
|
return 0;
|
|
|
|
fail_guest_notifiers:
|
|
+ /*
|
|
+ * If we failed to set up the guest notifiers queued requests will be
|
|
+ * processed on the main context.
|
|
+ */
|
|
+ virtio_blk_process_queued_requests(vblk, false);
|
|
vblk->dataplane_disabled = true;
|
|
s->starting = false;
|
|
vblk->dataplane_started = true;
|
|
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
|
|
index 6ff29a05d6..493a263fa6 100644
|
|
--- a/hw/block/virtio-blk.c
|
|
+++ b/hw/block/virtio-blk.c
|
|
@@ -819,7 +819,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
|
|
virtio_blk_handle_output_do(s, vq);
|
|
}
|
|
|
|
-void virtio_blk_process_queued_requests(VirtIOBlock *s)
|
|
+void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh)
|
|
{
|
|
VirtIOBlockReq *req = s->rq;
|
|
MultiReqBuffer mrb = {};
|
|
@@ -847,7 +847,9 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s)
|
|
if (mrb.num_reqs) {
|
|
virtio_blk_submit_multireq(s->blk, &mrb);
|
|
}
|
|
- blk_dec_in_flight(s->conf.conf.blk);
|
|
+ if (is_bh) {
|
|
+ blk_dec_in_flight(s->conf.conf.blk);
|
|
+ }
|
|
aio_context_release(blk_get_aio_context(s->conf.conf.blk));
|
|
}
|
|
|
|
@@ -858,21 +860,25 @@ static void virtio_blk_dma_restart_bh(void *opaque)
|
|
qemu_bh_delete(s->bh);
|
|
s->bh = NULL;
|
|
|
|
- virtio_blk_process_queued_requests(s);
|
|
+ virtio_blk_process_queued_requests(s, true);
|
|
}
|
|
|
|
static void virtio_blk_dma_restart_cb(void *opaque, int running,
|
|
RunState state)
|
|
{
|
|
VirtIOBlock *s = opaque;
|
|
+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
|
|
+ VirtioBusState *bus = VIRTIO_BUS(qbus);
|
|
|
|
if (!running) {
|
|
return;
|
|
}
|
|
|
|
- if (!s->bh) {
|
|
- /* FIXME The data plane is not started yet, so these requests are
|
|
- * processed in the main thread. */
|
|
+ /*
|
|
+ * If ioeventfd is enabled, don't schedule the BH here as queued
|
|
+ * requests will be processed while starting the data plane.
|
|
+ */
|
|
+ if (!s->bh && !virtio_bus_ioeventfd_enabled(bus)) {
|
|
s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk),
|
|
virtio_blk_dma_restart_bh, s);
|
|
blk_inc_in_flight(s->conf.conf.blk);
|
|
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
|
|
index cf8eea2f58..e77f0db3b0 100644
|
|
--- a/include/hw/virtio/virtio-blk.h
|
|
+++ b/include/hw/virtio/virtio-blk.h
|
|
@@ -84,6 +84,6 @@ typedef struct MultiReqBuffer {
|
|
} MultiReqBuffer;
|
|
|
|
bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
|
|
-void virtio_blk_process_queued_requests(VirtIOBlock *s);
|
|
+void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh);
|
|
|
|
#endif
|
|
--
|
|
2.27.0
|
|
|