* Sun Mar 12 2023 Miroslav Rezanina <mrezanin@redhat.com> - 7.2.0-12
- kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch [bz#2155748] - kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch [bz#2155748] - kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch [bz#2155748] - kvm-qatomic-add-smp_mb__before-after_rmw.patch [bz#2175660] - kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch [bz#2175660] - kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch [bz#2175660] - kvm-edu-add-smp_mb__after_rmw.patch [bz#2175660] - kvm-aio-wait-switch-to-smp_mb__after_rmw.patch [bz#2175660] - kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch [bz#2175660] - kvm-physmem-add-missing-memory-barrier.patch [bz#2175660] - kvm-async-update-documentation-of-the-memory-barriers.patch [bz#2175660] - kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch [bz#2175660] - Resolves: bz#2155748 (qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed) - Resolves: bz#2175660 (Guest hangs when starting or rebooting)
This commit is contained in:
parent
cbc169813b
commit
de1852f087
50
kvm-aio-wait-switch-to-smp_mb__after_rmw.patch
Normal file
50
kvm-aio-wait-switch-to-smp_mb__after_rmw.patch
Normal file
@ -0,0 +1,50 @@
|
||||
From e9a9c0b023ae0dcbb14543b74063cca931d8230f Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 08/12] aio-wait: switch to smp_mb__after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [5/9] a90c96d148fdbec340a45dc6cedf3660d8be2aab (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit b532526a07ef3b903ead2e055fe6cc87b41057a3
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri Mar 3 11:03:52 2023 +0100
|
||||
|
||||
aio-wait: switch to smp_mb__after_rmw()
|
||||
|
||||
The barrier comes after an atomic increment, so it is enough to use
|
||||
smp_mb__after_rmw(); this avoids a double barrier on x86 systems.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
include/block/aio-wait.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||
index dd9a7f6461..da13357bb8 100644
|
||||
--- a/include/block/aio-wait.h
|
||||
+++ b/include/block/aio-wait.h
|
||||
@@ -85,7 +85,7 @@ extern AioWait global_aio_wait;
|
||||
/* Increment wait_->num_waiters before evaluating cond. */ \
|
||||
qatomic_inc(&wait_->num_waiters); \
|
||||
/* Paired with smp_mb in aio_wait_kick(). */ \
|
||||
- smp_mb(); \
|
||||
+ smp_mb__after_rmw(); \
|
||||
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
|
||||
while ((cond)) { \
|
||||
aio_poll(ctx_, true); \
|
||||
--
|
||||
2.39.1
|
||||
|
@ -0,0 +1,66 @@
|
||||
From 3d823dda6832b76fd3d776131008107b0b0f7166 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 12/12] async: clarify usage of barriers in the polling case
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [9/9] b4ea298d75a75bb61e07a27d1296e0095fbc2bbf (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 6229438cca037d42f44a96d38feb15cb102a444f
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon Mar 6 10:43:52 2023 +0100
|
||||
|
||||
async: clarify usage of barriers in the polling case
|
||||
|
||||
Explain that aio_context_notifier_poll() relies on
|
||||
aio_notify_accept() to catch all the memory writes that were
|
||||
done before ctx->notified was set to true.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/async.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 37d3e6036d..e0846baf93 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -472,8 +472,9 @@ void aio_notify_accept(AioContext *ctx)
|
||||
qatomic_set(&ctx->notified, false);
|
||||
|
||||
/*
|
||||
- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb
|
||||
- * in aio_notify.
|
||||
+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
|
||||
+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
|
||||
+ * with smp_wmb() in aio_notify.
|
||||
*/
|
||||
smp_mb();
|
||||
}
|
||||
@@ -496,6 +497,11 @@ static bool aio_context_notifier_poll(void *opaque)
|
||||
EventNotifier *e = opaque;
|
||||
AioContext *ctx = container_of(e, AioContext, notifier);
|
||||
|
||||
+ /*
|
||||
+ * No need for load-acquire because we just want to kick the
|
||||
+ * event loop. aio_notify_accept() takes care of synchronizing
|
||||
+ * the event loop with the producers.
|
||||
+ */
|
||||
return qatomic_read(&ctx->notified);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
111
kvm-async-update-documentation-of-the-memory-barriers.patch
Normal file
111
kvm-async-update-documentation-of-the-memory-barriers.patch
Normal file
@ -0,0 +1,111 @@
|
||||
From 29bcf843d796ffc2a0906dea947e4cdfe9f7ec60 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 11/12] async: update documentation of the memory barriers
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [8/9] 5ca20e4c8983e0bc1ecee66bead3472777abe4d1 (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 8dd48650b43dfde4ebea34191ac267e474bcc29e
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon Mar 6 10:15:06 2023 +0100
|
||||
|
||||
async: update documentation of the memory barriers
|
||||
|
||||
Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)",
|
||||
2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll()
|
||||
is happening when the bottom half is enqueued in the bh_list; not
|
||||
when the flags are set. Update the documentation to match.
|
||||
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/async.c | 33 +++++++++++++++++++--------------
|
||||
1 file changed, 19 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 63434ddae4..37d3e6036d 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -73,14 +73,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
|
||||
unsigned old_flags;
|
||||
|
||||
/*
|
||||
- * The memory barrier implicit in qatomic_fetch_or makes sure that:
|
||||
- * 1. idle & any writes needed by the callback are done before the
|
||||
- * locations are read in the aio_bh_poll.
|
||||
- * 2. ctx is loaded before the callback has a chance to execute and bh
|
||||
- * could be freed.
|
||||
+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
|
||||
+ * insertion starts after BH_PENDING is set.
|
||||
*/
|
||||
old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
|
||||
+
|
||||
if (!(old_flags & BH_PENDING)) {
|
||||
+ /*
|
||||
+ * At this point the bottom half becomes visible to aio_bh_poll().
|
||||
+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
|
||||
+ * aio_bh_poll(), ensuring that:
|
||||
+ * 1. any writes needed by the callback are visible from the callback
|
||||
+ * after aio_bh_dequeue() returns bh.
|
||||
+ * 2. ctx is loaded before the callback has a chance to execute and bh
|
||||
+ * could be freed.
|
||||
+ */
|
||||
QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
|
||||
}
|
||||
|
||||
@@ -106,11 +113,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
|
||||
QSLIST_REMOVE_HEAD(head, next);
|
||||
|
||||
/*
|
||||
- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory
|
||||
- * barrier ensures that the callback sees all writes done by the scheduling
|
||||
- * thread. It also ensures that the scheduling thread sees the cleared
|
||||
- * flag before bh->cb has run, and thus will call aio_notify again if
|
||||
- * necessary.
|
||||
+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
|
||||
+ * the removal finishes before BH_PENDING is reset.
|
||||
*/
|
||||
*flags = qatomic_fetch_and(&bh->flags,
|
||||
~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
|
||||
@@ -157,6 +161,7 @@ int aio_bh_poll(AioContext *ctx)
|
||||
BHListSlice *s;
|
||||
int ret = 0;
|
||||
|
||||
+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
|
||||
QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
|
||||
QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
|
||||
|
||||
@@ -446,15 +451,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
|
||||
void aio_notify(AioContext *ctx)
|
||||
{
|
||||
/*
|
||||
- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in
|
||||
- * aio_notify_accept.
|
||||
+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
|
||||
+ * smp_mb() in aio_notify_accept().
|
||||
*/
|
||||
smp_wmb();
|
||||
qatomic_set(&ctx->notified, true);
|
||||
|
||||
/*
|
||||
- * Write ctx->notified before reading ctx->notify_me. Pairs
|
||||
- * with smp_mb in aio_ctx_prepare or aio_poll.
|
||||
+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
|
||||
+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
|
||||
*/
|
||||
smp_mb();
|
||||
if (qatomic_read(&ctx->notify_me)) {
|
||||
--
|
||||
2.39.1
|
||||
|
127
kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch
Normal file
127
kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch
Normal file
@ -0,0 +1,127 @@
|
||||
From b886411a682b56bfe674f0a35d40c67c8e9dc87a Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 21 Feb 2023 16:22:17 -0500
|
||||
Subject: [PATCH 02/12] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel()
|
||||
race
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread
|
||||
RH-Bugzilla: 2155748
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
||||
RH-Commit: [2/3] eeeea43c25d8f4fa84591b05547fb77e4058abff (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
dma_blk_cb() only takes the AioContext lock around ->io_func(). That
|
||||
means the rest of dma_blk_cb() is not protected. In particular, the
|
||||
DMAAIOCB field accesses happen outside the lock.
|
||||
|
||||
There is a race when the main loop thread holds the AioContext lock and
|
||||
invokes scsi_device_purge_requests() -> bdrv_aio_cancel() ->
|
||||
dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb
|
||||
field determines how cancellation proceeds. If dma_aio_cancel() sees
|
||||
dbs->acb == NULL while dma_blk_cb() is still running, the request can be
|
||||
completed twice (-ECANCELED and the actual return value).
|
||||
|
||||
The following assertion can occur with virtio-scsi when an IOThread is
|
||||
used:
|
||||
|
||||
../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed.
|
||||
|
||||
Fix the race by holding the AioContext across dma_blk_cb(). Now
|
||||
dma_aio_cancel() under the AioContext lock will not see
|
||||
inconsistent/intermediate states.
|
||||
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230221212218.1378734-3-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
hw/scsi/scsi-disk.c | 4 +---
|
||||
softmmu/dma-helpers.c | 12 +++++++-----
|
||||
2 files changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
|
||||
index 5327f93f4c..b12d8b0816 100644
|
||||
--- a/hw/scsi/scsi-disk.c
|
||||
+++ b/hw/scsi/scsi-disk.c
|
||||
@@ -354,13 +354,12 @@ done:
|
||||
scsi_req_unref(&r->req);
|
||||
}
|
||||
|
||||
+/* Called with AioContext lock held */
|
||||
static void scsi_dma_complete(void *opaque, int ret)
|
||||
{
|
||||
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
-
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
@@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret)
|
||||
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||
}
|
||||
scsi_dma_complete_noio(r, ret);
|
||||
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||
}
|
||||
|
||||
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
|
||||
diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
|
||||
index 7820fec54c..2463964805 100644
|
||||
--- a/softmmu/dma-helpers.c
|
||||
+++ b/softmmu/dma-helpers.c
|
||||
@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
|
||||
static void dma_blk_cb(void *opaque, int ret)
|
||||
{
|
||||
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
|
||||
+ AioContext *ctx = dbs->ctx;
|
||||
dma_addr_t cur_addr, cur_len;
|
||||
void *mem;
|
||||
|
||||
trace_dma_blk_cb(dbs, ret);
|
||||
|
||||
+ aio_context_acquire(ctx);
|
||||
dbs->acb = NULL;
|
||||
dbs->offset += dbs->iov.size;
|
||||
|
||||
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
|
||||
dma_complete(dbs, ret);
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
dma_blk_unmap(dbs);
|
||||
|
||||
@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
|
||||
if (dbs->iov.size == 0) {
|
||||
trace_dma_map_wait(dbs);
|
||||
- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
|
||||
+ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
|
||||
cpu_register_map_client(dbs->bh);
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
|
||||
@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
|
||||
}
|
||||
|
||||
- aio_context_acquire(dbs->ctx);
|
||||
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
|
||||
dma_blk_cb, dbs, dbs->io_func_opaque);
|
||||
- aio_context_release(dbs->ctx);
|
||||
assert(dbs->acb);
|
||||
+out:
|
||||
+ aio_context_release(ctx);
|
||||
}
|
||||
|
||||
static void dma_aio_cancel(BlockAIOCB *acb)
|
||||
--
|
||||
2.39.1
|
||||
|
61
kvm-edu-add-smp_mb__after_rmw.patch
Normal file
61
kvm-edu-add-smp_mb__after_rmw.patch
Normal file
@ -0,0 +1,61 @@
|
||||
From 67bbeb056f75adc6c964468d876531ab68366fe0 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 07/12] edu: add smp_mb__after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [4/9] 2ad6fd6cb33fde39d2d017d94c0dde2152ad70c4 (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu Mar 2 11:16:13 2023 +0100
|
||||
|
||||
edu: add smp_mb__after_rmw()
|
||||
|
||||
Ensure ordering between clearing the COMPUTING flag and checking
|
||||
IRQFACT, and between setting the IRQFACT flag and checking
|
||||
COMPUTING. This ensures that no wakeups are lost.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
hw/misc/edu.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
|
||||
index e935c418d4..a1f8bc77e7 100644
|
||||
--- a/hw/misc/edu.c
|
||||
+++ b/hw/misc/edu.c
|
||||
@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
|
||||
case 0x20:
|
||||
if (val & EDU_STATUS_IRQFACT) {
|
||||
qatomic_or(&edu->status, EDU_STATUS_IRQFACT);
|
||||
+ /* Order check of the COMPUTING flag after setting IRQFACT. */
|
||||
+ smp_mb__after_rmw();
|
||||
} else {
|
||||
qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT);
|
||||
}
|
||||
@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque)
|
||||
qemu_mutex_unlock(&edu->thr_mutex);
|
||||
qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING);
|
||||
|
||||
+ /* Clear COMPUTING flag before checking IRQFACT. */
|
||||
+ smp_mb__after_rmw();
|
||||
+
|
||||
if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) {
|
||||
qemu_mutex_lock_iothread();
|
||||
edu_raise_irq(edu, FACT_IRQ);
|
||||
--
|
||||
2.39.1
|
||||
|
55
kvm-physmem-add-missing-memory-barrier.patch
Normal file
55
kvm-physmem-add-missing-memory-barrier.patch
Normal file
@ -0,0 +1,55 @@
|
||||
From 0dd4be411e35f00d006d89a15d9161f5d8783c1d Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 10/12] physmem: add missing memory barrier
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [7/9] ee4875cb8c564f0510e48b00a5d95c0e6ea6301b (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 33828ca11da08436e1b32f3e79dabce3061a0427
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri Mar 3 14:36:32 2023 +0100
|
||||
|
||||
physmem: add missing memory barrier
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
softmmu/physmem.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
|
||||
index 1b606a3002..772c9896cd 100644
|
||||
--- a/softmmu/physmem.c
|
||||
+++ b/softmmu/physmem.c
|
||||
@@ -3117,6 +3117,8 @@ void cpu_register_map_client(QEMUBH *bh)
|
||||
qemu_mutex_lock(&map_client_list_lock);
|
||||
client->bh = bh;
|
||||
QLIST_INSERT_HEAD(&map_client_list, client, link);
|
||||
+ /* Write map_client_list before reading in_use. */
|
||||
+ smp_mb();
|
||||
if (!qatomic_read(&bounce.in_use)) {
|
||||
cpu_notify_map_clients_locked();
|
||||
}
|
||||
@@ -3309,6 +3311,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
|
||||
qemu_vfree(bounce.buffer);
|
||||
bounce.buffer = NULL;
|
||||
memory_region_unref(bounce.mr);
|
||||
+ /* Clear in_use before reading map_client_list. */
|
||||
qatomic_mb_set(&bounce.in_use, false);
|
||||
cpu_notify_map_clients();
|
||||
}
|
||||
--
|
||||
2.39.1
|
||||
|
177
kvm-qatomic-add-smp_mb__before-after_rmw.patch
Normal file
177
kvm-qatomic-add-smp_mb__before-after_rmw.patch
Normal file
@ -0,0 +1,177 @@
|
||||
From 1fdc864f9ac927f3ea407f35f6771a4b2e8f509f Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 04/12] qatomic: add smp_mb__before/after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [1/9] e8d0b64670bff778d275b1fb477dcee0c109251a (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit ff00bed1897c3d27adc5b0cec6f6eeb5a7d13176
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu Mar 2 11:10:56 2023 +0100
|
||||
|
||||
qatomic: add smp_mb__before/after_rmw()
|
||||
|
||||
On ARM, seqcst loads and stores (which QEMU does not use) are compiled
|
||||
respectively as LDAR and STLR instructions. Even though LDAR is
|
||||
also used for load-acquire operations, it also waits for all STLRs to
|
||||
leave the store buffer. Thus, LDAR and STLR alone are load-acquire
|
||||
and store-release operations, but LDAR also provides store-against-load
|
||||
ordering as long as the previous store is a STLR.
|
||||
|
||||
Compare this to ARMv7, where store-release is DMB+STR and load-acquire
|
||||
is LDR+DMB, but an additional DMB is needed between store-seqcst and
|
||||
load-seqcst (e.g. DMB+STR+DMB+LDR+DMB); or with x86, where MOV provides
|
||||
load-acquire and store-release semantics and the two can be reordered.
|
||||
|
||||
Likewise, on ARM sequentially consistent read-modify-write operations only
|
||||
need to use LDAXR and STLXR respectively for the load and the store, while
|
||||
on x86 they need to use the stronger LOCK prefix.
|
||||
|
||||
In a strange twist of events, however, the _stronger_ semantics
|
||||
of the ARM instructions can end up causing bugs on ARM, not on x86.
|
||||
The problems occur when seqcst atomics are mixed with relaxed atomics.
|
||||
|
||||
QEMU's atomics try to bridge the Linux API (that most of the developers
|
||||
are familiar with) and the C11 API, and the two have a substantial
|
||||
difference:
|
||||
|
||||
- in Linux, strongly-ordered atomics such as atomic_add_return() affect
|
||||
the global ordering of _all_ memory operations, including for example
|
||||
READ_ONCE()/WRITE_ONCE()
|
||||
|
||||
- in C11, sequentially consistent atomics (except for seq-cst fences)
|
||||
only affect the ordering of sequentially consistent operations.
|
||||
In particular, since relaxed loads are done with LDR on ARM, they are
|
||||
not ordered against seqcst stores (which are done with STLR).
|
||||
|
||||
QEMU implements high-level synchronization primitives with the idea that
|
||||
the primitives contain the necessary memory barriers, and the callers can
|
||||
use relaxed atomics (qatomic_read/qatomic_set) or even regular accesses.
|
||||
This is very much incompatible with the C11 view that seqcst accesses
|
||||
are only ordered against other seqcst accesses, and requires using seqcst
|
||||
fences as in the following example:
|
||||
|
||||
qatomic_set(&y, 1); qatomic_set(&x, 1);
|
||||
smp_mb(); smp_mb();
|
||||
... qatomic_read(&x) ... ... qatomic_read(&y) ...
|
||||
|
||||
When a qatomic_*() read-modify write operation is used instead of one
|
||||
or both stores, developers that are more familiar with the Linux API may
|
||||
be tempted to omit the smp_mb(), which will work on x86 but not on ARM.
|
||||
|
||||
This nasty difference between Linux and C11 read-modify-write operations
|
||||
has already caused issues in util/async.c and more are being found.
|
||||
Provide something similar to Linux smp_mb__before/after_atomic(); this
|
||||
has the double function of documenting clearly why there is a memory
|
||||
barrier, and avoiding a double barrier on x86 and s390x systems.
|
||||
|
||||
The new macro can already be put to use in qatomic_mb_set().
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
docs/devel/atomics.rst | 26 +++++++++++++++++++++-----
|
||||
include/qemu/atomic.h | 17 ++++++++++++++++-
|
||||
2 files changed, 37 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst
|
||||
index 52baa0736d..10fbfc58bb 100644
|
||||
--- a/docs/devel/atomics.rst
|
||||
+++ b/docs/devel/atomics.rst
|
||||
@@ -25,7 +25,8 @@ provides macros that fall in three camps:
|
||||
|
||||
- weak atomic access and manual memory barriers: ``qatomic_read()``,
|
||||
``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``,
|
||||
- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``;
|
||||
+ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``,
|
||||
+ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``;
|
||||
|
||||
- sequentially consistent atomic access: everything else.
|
||||
|
||||
@@ -470,7 +471,7 @@ and memory barriers, and the equivalents in QEMU:
|
||||
sequential consistency.
|
||||
|
||||
- in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in
|
||||
- the total ordering enforced by sequentially-consistent operations.
|
||||
+ the ordering enforced by read-modify-write operations.
|
||||
This is because QEMU uses the C11 memory model. The following example
|
||||
is correct in Linux but not in QEMU:
|
||||
|
||||
@@ -486,9 +487,24 @@ and memory barriers, and the equivalents in QEMU:
|
||||
because the read of ``y`` can be moved (by either the processor or the
|
||||
compiler) before the write of ``x``.
|
||||
|
||||
- Fixing this requires an ``smp_mb()`` memory barrier between the write
|
||||
- of ``x`` and the read of ``y``. In the common case where only one thread
|
||||
- writes ``x``, it is also possible to write it like this:
|
||||
+ Fixing this requires a full memory barrier between the write of ``x`` and
|
||||
+ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and
|
||||
+ ``smp_mb__after_rmw()``; they act both as an optimization,
|
||||
+ avoiding the memory barrier on processors where it is unnecessary,
|
||||
+ and as a clarification of this corner case of the C11 memory model:
|
||||
+
|
||||
+ +--------------------------------+
|
||||
+ | QEMU (correct) |
|
||||
+ +================================+
|
||||
+ | :: |
|
||||
+ | |
|
||||
+ | a = qatomic_fetch_add(&x, 2);|
|
||||
+ | smp_mb__after_rmw(); |
|
||||
+ | b = qatomic_read(&y); |
|
||||
+ +--------------------------------+
|
||||
+
|
||||
+ In the common case where only one thread writes ``x``, it is also possible
|
||||
+ to write it like this:
|
||||
|
||||
+--------------------------------+
|
||||
| QEMU (correct) |
|
||||
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
|
||||
index 874134fd19..f85834ee8b 100644
|
||||
--- a/include/qemu/atomic.h
|
||||
+++ b/include/qemu/atomic.h
|
||||
@@ -245,6 +245,20 @@
|
||||
#define smp_wmb() smp_mb_release()
|
||||
#define smp_rmb() smp_mb_acquire()
|
||||
|
||||
+/*
|
||||
+ * SEQ_CST is weaker than the older __sync_* builtins and Linux
|
||||
+ * kernel read-modify-write atomics. Provide a macro to obtain
|
||||
+ * the same semantics.
|
||||
+ */
|
||||
+#if !defined(QEMU_SANITIZE_THREAD) && \
|
||||
+ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
|
||||
+# define smp_mb__before_rmw() signal_barrier()
|
||||
+# define smp_mb__after_rmw() signal_barrier()
|
||||
+#else
|
||||
+# define smp_mb__before_rmw() smp_mb()
|
||||
+# define smp_mb__after_rmw() smp_mb()
|
||||
+#endif
|
||||
+
|
||||
/* qatomic_mb_read/set semantics map Java volatile variables. They are
|
||||
* less expensive on some platforms (notably POWER) than fully
|
||||
* sequentially consistent operations.
|
||||
@@ -259,7 +273,8 @@
|
||||
#if !defined(QEMU_SANITIZE_THREAD) && \
|
||||
(defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
|
||||
/* This is more efficient than a store plus a fence. */
|
||||
-# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i))
|
||||
+# define qatomic_mb_set(ptr, i) \
|
||||
+ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); })
|
||||
#else
|
||||
# define qatomic_mb_set(ptr, i) \
|
||||
({ qatomic_store_release(ptr, i); smp_mb(); })
|
||||
--
|
||||
2.39.1
|
||||
|
75
kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch
Normal file
75
kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch
Normal file
@ -0,0 +1,75 @@
|
||||
From 7a9907c65e3e2bbb0c119acdbbeb4381e7f1d902 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 09/12] qemu-coroutine-lock: add smp_mb__after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [6/9] 4b1723b1ad670ec4c85240390b4fc15ff361154f (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit e3a3b6ec8169eab2feb241b4982585001512cd55
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri Mar 3 10:52:59 2023 +0100
|
||||
|
||||
qemu-coroutine-lock: add smp_mb__after_rmw()
|
||||
|
||||
mutex->from_push and mutex->handoff in qemu-coroutine-lock implement
|
||||
the familiar pattern:
|
||||
|
||||
write a write b
|
||||
smp_mb() smp_mb()
|
||||
read b read a
|
||||
|
||||
The memory barrier is required by the C memory model even after a
|
||||
SEQ_CST read-modify-write operation such as QSLIST_INSERT_HEAD_ATOMIC.
|
||||
Add it and avoid the unclear qatomic_mb_read() operation.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/qemu-coroutine-lock.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
|
||||
index 45c6b57374..c5897bd963 100644
|
||||
--- a/util/qemu-coroutine-lock.c
|
||||
+++ b/util/qemu-coroutine-lock.c
|
||||
@@ -202,10 +202,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
|
||||
trace_qemu_co_mutex_lock_entry(mutex, self);
|
||||
push_waiter(mutex, &w);
|
||||
|
||||
+ /*
|
||||
+ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set
|
||||
+ * in qemu_co_mutex_unlock.
|
||||
+ */
|
||||
+ smp_mb__after_rmw();
|
||||
+
|
||||
/* This is the "Responsibility Hand-Off" protocol; a lock() picks from
|
||||
* a concurrent unlock() the responsibility of waking somebody up.
|
||||
*/
|
||||
- old_handoff = qatomic_mb_read(&mutex->handoff);
|
||||
+ old_handoff = qatomic_read(&mutex->handoff);
|
||||
if (old_handoff &&
|
||||
has_waiters(mutex) &&
|
||||
qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) {
|
||||
@@ -304,6 +310,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
|
||||
}
|
||||
|
||||
our_handoff = mutex->sequence;
|
||||
+ /* Set handoff before checking for waiters. */
|
||||
qatomic_mb_set(&mutex->handoff, our_handoff);
|
||||
if (!has_waiters(mutex)) {
|
||||
/* The concurrent lock has not added itself yet, so it
|
||||
--
|
||||
2.39.1
|
||||
|
146
kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch
Normal file
146
kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch
Normal file
@ -0,0 +1,146 @@
|
||||
From aa61e4c437d29a791ea09a01f7230231f1e53356 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 05/12] qemu-thread-posix: cleanup, fix, document QemuEvent
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [2/9] c3bdf75f884e137c667316aaac96bb4a0b9ec2d9 (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 9586a1329f5dce6c1d7f4de53cf0536644d7e593
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu Mar 2 11:19:52 2023 +0100
|
||||
|
||||
qemu-thread-posix: cleanup, fix, document QemuEvent
|
||||
|
||||
QemuEvent is currently broken on ARM due to missing memory barriers
|
||||
after qatomic_*(). Apart from adding the memory barrier, a closer look
|
||||
reveals some unpaired memory barriers too. Document more clearly what
|
||||
is going on.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/qemu-thread-posix.c | 69 ++++++++++++++++++++++++++++------------
|
||||
1 file changed, 49 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
|
||||
index bae938c670..cc74f4ede0 100644
|
||||
--- a/util/qemu-thread-posix.c
|
||||
+++ b/util/qemu-thread-posix.c
|
||||
@@ -379,13 +379,21 @@ void qemu_event_destroy(QemuEvent *ev)
|
||||
|
||||
void qemu_event_set(QemuEvent *ev)
|
||||
{
|
||||
- /* qemu_event_set has release semantics, but because it *loads*
|
||||
+ assert(ev->initialized);
|
||||
+
|
||||
+ /*
|
||||
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
|
||||
+ *
|
||||
+ * qemu_event_set has release semantics, but because it *loads*
|
||||
* ev->value we need a full memory barrier here.
|
||||
*/
|
||||
- assert(ev->initialized);
|
||||
smp_mb();
|
||||
if (qatomic_read(&ev->value) != EV_SET) {
|
||||
- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
|
||||
+ int old = qatomic_xchg(&ev->value, EV_SET);
|
||||
+
|
||||
+ /* Pairs with memory barrier in kernel futex_wait system call. */
|
||||
+ smp_mb__after_rmw();
|
||||
+ if (old == EV_BUSY) {
|
||||
/* There were waiters, wake them up. */
|
||||
qemu_futex_wake(ev, INT_MAX);
|
||||
}
|
||||
@@ -394,18 +402,19 @@ void qemu_event_set(QemuEvent *ev)
|
||||
|
||||
void qemu_event_reset(QemuEvent *ev)
|
||||
{
|
||||
- unsigned value;
|
||||
-
|
||||
assert(ev->initialized);
|
||||
- value = qatomic_read(&ev->value);
|
||||
- smp_mb_acquire();
|
||||
- if (value == EV_SET) {
|
||||
- /*
|
||||
- * If there was a concurrent reset (or even reset+wait),
|
||||
- * do nothing. Otherwise change EV_SET->EV_FREE.
|
||||
- */
|
||||
- qatomic_or(&ev->value, EV_FREE);
|
||||
- }
|
||||
+
|
||||
+ /*
|
||||
+ * If there was a concurrent reset (or even reset+wait),
|
||||
+ * do nothing. Otherwise change EV_SET->EV_FREE.
|
||||
+ */
|
||||
+ qatomic_or(&ev->value, EV_FREE);
|
||||
+
|
||||
+ /*
|
||||
+ * Order reset before checking the condition in the caller.
|
||||
+ * Pairs with the first memory barrier in qemu_event_set().
|
||||
+ */
|
||||
+ smp_mb__after_rmw();
|
||||
}
|
||||
|
||||
void qemu_event_wait(QemuEvent *ev)
|
||||
@@ -413,20 +422,40 @@ void qemu_event_wait(QemuEvent *ev)
|
||||
unsigned value;
|
||||
|
||||
assert(ev->initialized);
|
||||
- value = qatomic_read(&ev->value);
|
||||
- smp_mb_acquire();
|
||||
+
|
||||
+ /*
|
||||
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
|
||||
+ * not go down the slow path, so this load-acquire is needed that
|
||||
+ * synchronizes with the first memory barrier in qemu_event_set().
|
||||
+ *
|
||||
+ * If we do go down the slow path, there is no requirement at all: we
|
||||
+ * might miss a qemu_event_set() here but ultimately the memory barrier in
|
||||
+ * qemu_futex_wait() will ensure the check is done correctly.
|
||||
+ */
|
||||
+ value = qatomic_load_acquire(&ev->value);
|
||||
if (value != EV_SET) {
|
||||
if (value == EV_FREE) {
|
||||
/*
|
||||
- * Leave the event reset and tell qemu_event_set that there
|
||||
- * are waiters. No need to retry, because there cannot be
|
||||
- * a concurrent busy->free transition. After the CAS, the
|
||||
- * event will be either set or busy.
|
||||
+ * Leave the event reset and tell qemu_event_set that there are
|
||||
+ * waiters. No need to retry, because there cannot be a concurrent
|
||||
+ * busy->free transition. After the CAS, the event will be either
|
||||
+ * set or busy.
|
||||
+ *
|
||||
+ * This cmpxchg doesn't have particular ordering requirements if it
|
||||
+ * succeeds (moving the store earlier can only cause qemu_event_set()
|
||||
+ * to issue _more_ wakeups), the failing case needs acquire semantics
|
||||
+ * like the load above.
|
||||
*/
|
||||
if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ /*
|
||||
+ * This is the final check for a concurrent set, so it does need
|
||||
+ * a smp_mb() pairing with the second barrier of qemu_event_set().
|
||||
+ * The barrier is inside the FUTEX_WAIT system call.
|
||||
+ */
|
||||
qemu_futex_wait(ev, EV_BUSY);
|
||||
}
|
||||
}
|
||||
--
|
||||
2.39.1
|
||||
|
162
kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch
Normal file
162
kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch
Normal file
@ -0,0 +1,162 @@
|
||||
From 02347869410fe53d814487501fb586f7dc614375 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:24:36 -0500
|
||||
Subject: [PATCH 06/12] qemu-thread-win32: cleanup, fix, document QemuEvent
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2175660
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [3/9] d228e9d6a4a75dd1f0a23a6dceaf4fea23d69192 (eesposit/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
|
||||
|
||||
commit 6c5df4b48f0c52a61342ecb307a43f4c2a3565c4
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu Mar 2 11:22:50 2023 +0100
|
||||
|
||||
qemu-thread-win32: cleanup, fix, document QemuEvent
|
||||
|
||||
QemuEvent is currently broken on ARM due to missing memory barriers
|
||||
after qatomic_*(). Apart from adding the memory barrier, a closer look
|
||||
reveals some unpaired memory barriers that are not really needed and
|
||||
complicated the functions unnecessarily. Also, it is relying on
|
||||
a memory barrier in ResetEvent(); the barrier _ought_ to be there
|
||||
but there is really no documentation about it, so make it explicit.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/qemu-thread-win32.c | 82 +++++++++++++++++++++++++++-------------
|
||||
1 file changed, 56 insertions(+), 26 deletions(-)
|
||||
|
||||
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
|
||||
index 69db254ac7..a7fe3cc345 100644
|
||||
--- a/util/qemu-thread-win32.c
|
||||
+++ b/util/qemu-thread-win32.c
|
||||
@@ -272,12 +272,20 @@ void qemu_event_destroy(QemuEvent *ev)
|
||||
void qemu_event_set(QemuEvent *ev)
|
||||
{
|
||||
assert(ev->initialized);
|
||||
- /* qemu_event_set has release semantics, but because it *loads*
|
||||
+
|
||||
+ /*
|
||||
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
|
||||
+ *
|
||||
+ * qemu_event_set has release semantics, but because it *loads*
|
||||
* ev->value we need a full memory barrier here.
|
||||
*/
|
||||
smp_mb();
|
||||
if (qatomic_read(&ev->value) != EV_SET) {
|
||||
- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
|
||||
+ int old = qatomic_xchg(&ev->value, EV_SET);
|
||||
+
|
||||
+ /* Pairs with memory barrier after ResetEvent. */
|
||||
+ smp_mb__after_rmw();
|
||||
+ if (old == EV_BUSY) {
|
||||
/* There were waiters, wake them up. */
|
||||
SetEvent(ev->event);
|
||||
}
|
||||
@@ -286,17 +294,19 @@ void qemu_event_set(QemuEvent *ev)
|
||||
|
||||
void qemu_event_reset(QemuEvent *ev)
|
||||
{
|
||||
- unsigned value;
|
||||
-
|
||||
assert(ev->initialized);
|
||||
- value = qatomic_read(&ev->value);
|
||||
- smp_mb_acquire();
|
||||
- if (value == EV_SET) {
|
||||
- /* If there was a concurrent reset (or even reset+wait),
|
||||
- * do nothing. Otherwise change EV_SET->EV_FREE.
|
||||
- */
|
||||
- qatomic_or(&ev->value, EV_FREE);
|
||||
- }
|
||||
+
|
||||
+ /*
|
||||
+ * If there was a concurrent reset (or even reset+wait),
|
||||
+ * do nothing. Otherwise change EV_SET->EV_FREE.
|
||||
+ */
|
||||
+ qatomic_or(&ev->value, EV_FREE);
|
||||
+
|
||||
+ /*
|
||||
+ * Order reset before checking the condition in the caller.
|
||||
+ * Pairs with the first memory barrier in qemu_event_set().
|
||||
+ */
|
||||
+ smp_mb__after_rmw();
|
||||
}
|
||||
|
||||
void qemu_event_wait(QemuEvent *ev)
|
||||
@@ -304,29 +314,49 @@ void qemu_event_wait(QemuEvent *ev)
|
||||
unsigned value;
|
||||
|
||||
assert(ev->initialized);
|
||||
- value = qatomic_read(&ev->value);
|
||||
- smp_mb_acquire();
|
||||
+
|
||||
+ /*
|
||||
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
|
||||
+ * not go down the slow path, so this load-acquire is needed that
|
||||
+ * synchronizes with the first memory barrier in qemu_event_set().
|
||||
+ *
|
||||
+ * If we do go down the slow path, there is no requirement at all: we
|
||||
+ * might miss a qemu_event_set() here but ultimately the memory barrier in
|
||||
+ * qemu_futex_wait() will ensure the check is done correctly.
|
||||
+ */
|
||||
+ value = qatomic_load_acquire(&ev->value);
|
||||
if (value != EV_SET) {
|
||||
if (value == EV_FREE) {
|
||||
- /* qemu_event_set is not yet going to call SetEvent, but we are
|
||||
- * going to do another check for EV_SET below when setting EV_BUSY.
|
||||
- * At that point it is safe to call WaitForSingleObject.
|
||||
+ /*
|
||||
+ * Here the underlying kernel event is reset, but qemu_event_set is
|
||||
+ * not yet going to call SetEvent. However, there will be another
|
||||
+ * check for EV_SET below when setting EV_BUSY. At that point it
|
||||
+ * is safe to call WaitForSingleObject.
|
||||
*/
|
||||
ResetEvent(ev->event);
|
||||
|
||||
- /* Tell qemu_event_set that there are waiters. No need to retry
|
||||
- * because there cannot be a concurrent busy->free transition.
|
||||
- * After the CAS, the event will be either set or busy.
|
||||
+ /*
|
||||
+ * It is not clear whether ResetEvent provides this barrier; kernel
|
||||
+ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry!
|
||||
+ */
|
||||
+ smp_mb();
|
||||
+
|
||||
+ /*
|
||||
+ * Leave the event reset and tell qemu_event_set that there are
|
||||
+ * waiters. No need to retry, because there cannot be a concurrent
|
||||
+ * busy->free transition. After the CAS, the event will be either
|
||||
+ * set or busy.
|
||||
*/
|
||||
if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
|
||||
- value = EV_SET;
|
||||
- } else {
|
||||
- value = EV_BUSY;
|
||||
+ return;
|
||||
}
|
||||
}
|
||||
- if (value == EV_BUSY) {
|
||||
- WaitForSingleObject(ev->event, INFINITE);
|
||||
- }
|
||||
+
|
||||
+ /*
|
||||
+ * ev->value is now EV_BUSY. Since we didn't observe EV_SET,
|
||||
+ * qemu_event_set() must observe EV_BUSY and call SetEvent().
|
||||
+ */
|
||||
+ WaitForSingleObject(ev->event, INFINITE);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
176
kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch
Normal file
176
kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch
Normal file
@ -0,0 +1,176 @@
|
||||
From 0a4f5bcc2a6f8ac31431e971c1dce9e6ab2191c2 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 21 Feb 2023 16:22:16 -0500
|
||||
Subject: [PATCH 01/12] scsi: protect req->aiocb with AioContext lock
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread
|
||||
RH-Bugzilla: 2155748
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
||||
RH-Commit: [1/3] 61727297bd31dfe18220b61f1d265ced0649c60d (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
If requests are being processed in the IOThread when a SCSIDevice is
|
||||
unplugged, scsi_device_purge_requests() -> scsi_req_cancel_async() races
|
||||
with I/O completion callbacks. Both threads load and store req->aiocb.
|
||||
This can lead to assert(r->req.aiocb == NULL) failures and undefined
|
||||
behavior.
|
||||
|
||||
Protect r->req.aiocb with the AioContext lock to prevent the race.
|
||||
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230221212218.1378734-2-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 7b7fc3d0102dafe8eb44802493036a526e921a71)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
hw/scsi/scsi-disk.c | 23 ++++++++++++++++-------
|
||||
hw/scsi/scsi-generic.c | 11 ++++++-----
|
||||
2 files changed, 22 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
|
||||
index e493c28814..5327f93f4c 100644
|
||||
--- a/hw/scsi/scsi-disk.c
|
||||
+++ b/hw/scsi/scsi-disk.c
|
||||
@@ -273,9 +273,11 @@ static void scsi_aio_complete(void *opaque, int ret)
|
||||
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
+
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
+
|
||||
if (scsi_disk_req_check_error(r, ret, true)) {
|
||||
goto done;
|
||||
}
|
||||
@@ -357,10 +359,11 @@ static void scsi_dma_complete(void *opaque, int ret)
|
||||
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
+
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
if (ret < 0) {
|
||||
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||
} else {
|
||||
@@ -393,10 +396,11 @@ static void scsi_read_complete(void *opaque, int ret)
|
||||
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
+
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
if (ret < 0) {
|
||||
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||
} else {
|
||||
@@ -446,10 +450,11 @@ static void scsi_do_read_cb(void *opaque, int ret)
|
||||
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
+
|
||||
assert (r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
if (ret < 0) {
|
||||
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||
} else {
|
||||
@@ -530,10 +535,11 @@ static void scsi_write_complete(void * opaque, int ret)
|
||||
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
+
|
||||
assert (r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
if (ret < 0) {
|
||||
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||
} else {
|
||||
@@ -1737,10 +1743,11 @@ static void scsi_unmap_complete(void *opaque, int ret)
|
||||
SCSIDiskReq *r = data->r;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
+
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
if (scsi_disk_req_check_error(r, ret, true)) {
|
||||
scsi_req_unref(&r->req);
|
||||
g_free(data);
|
||||
@@ -1816,9 +1823,11 @@ static void scsi_write_same_complete(void *opaque, int ret)
|
||||
SCSIDiskReq *r = data->r;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
+
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
+
|
||||
if (scsi_disk_req_check_error(r, ret, true)) {
|
||||
goto done;
|
||||
}
|
||||
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
|
||||
index 92cce20a4d..ac9fa662b4 100644
|
||||
--- a/hw/scsi/scsi-generic.c
|
||||
+++ b/hw/scsi/scsi-generic.c
|
||||
@@ -111,10 +111,11 @@ static void scsi_command_complete(void *opaque, int ret)
|
||||
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
|
||||
SCSIDevice *s = r->req.dev;
|
||||
|
||||
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||
+
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||
scsi_command_complete_noio(r, ret);
|
||||
aio_context_release(blk_get_aio_context(s->conf.blk));
|
||||
}
|
||||
@@ -269,11 +270,11 @@ static void scsi_read_complete(void * opaque, int ret)
|
||||
SCSIDevice *s = r->req.dev;
|
||||
int len;
|
||||
|
||||
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||
+
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||
-
|
||||
if (ret || r->req.io_canceled) {
|
||||
scsi_command_complete_noio(r, ret);
|
||||
goto done;
|
||||
@@ -386,11 +387,11 @@ static void scsi_write_complete(void * opaque, int ret)
|
||||
|
||||
trace_scsi_generic_write_complete(ret);
|
||||
|
||||
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||
+
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||
-
|
||||
if (ret || r->req.io_canceled) {
|
||||
scsi_command_complete_noio(r, ret);
|
||||
goto done;
|
||||
--
|
||||
2.39.1
|
||||
|
325
kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch
Normal file
325
kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch
Normal file
@ -0,0 +1,325 @@
|
||||
From c64027b1ff9856031c01009f4b5c3560d92cc998 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 21 Feb 2023 16:22:18 -0500
|
||||
Subject: [PATCH 03/12] virtio-scsi: reset SCSI devices from main loop thread
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread
|
||||
RH-Bugzilla: 2155748
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
||||
RH-Commit: [3/3] 2a29cb9600709a799daadb4addb58a747ed2e3a3 (stefanha/centos-stream-qemu-kvm)
|
||||
|
||||
When an IOThread is configured, the ctrl virtqueue is processed in the
|
||||
IOThread. TMFs that reset SCSI devices are currently called directly
|
||||
from the IOThread and trigger an assertion failure in blk_drain() from
|
||||
the following call stack:
|
||||
|
||||
virtio_scsi_handle_ctrl_req -> virtio_scsi_do_tmf -> device_code_reset
|
||||
-> scsi_disk_reset -> scsi_device_purge_requests -> blk_drain
|
||||
|
||||
../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed.
|
||||
|
||||
The blk_drain() function is not designed to be called from an IOThread
|
||||
because it needs the Big QEMU Lock (BQL).
|
||||
|
||||
This patch defers TMFs that reset SCSI devices to a Bottom Half (BH)
|
||||
that runs in the main loop thread under the BQL. This way it's safe to
|
||||
call blk_drain() and the assertion failure is avoided.
|
||||
|
||||
Introduce s->tmf_bh_list for tracking TMF requests that have been
|
||||
deferred to the BH. When the BH runs it will grab the entire list and
|
||||
process all requests. Care must be taken to clear the list when the
|
||||
virtio-scsi device is reset or unrealized. Otherwise deferred TMF
|
||||
requests could execute later and lead to use-after-free or other
|
||||
undefined behavior.
|
||||
|
||||
The s->resetting counter that's used by TMFs that reset SCSI devices is
|
||||
accessed from multiple threads. This patch makes that explicit by using
|
||||
atomic accessor functions. With this patch applied the counter is only
|
||||
modified by the main loop thread under the BQL but can be read by any
|
||||
thread.
|
||||
|
||||
Reported-by: Qing Wang <qinwang@redhat.com>
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230221212218.1378734-4-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit be2c42b97c3a3a395b2f05bad1b6c7de20ecf2a5)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
hw/scsi/virtio-scsi.c | 169 +++++++++++++++++++++++++-------
|
||||
include/hw/virtio/virtio-scsi.h | 11 ++-
|
||||
2 files changed, 143 insertions(+), 37 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
|
||||
index 6f6e2e32ba..7d27e4c2a1 100644
|
||||
--- a/hw/scsi/virtio-scsi.c
|
||||
+++ b/hw/scsi/virtio-scsi.c
|
||||
@@ -42,13 +42,11 @@ typedef struct VirtIOSCSIReq {
|
||||
QEMUSGList qsgl;
|
||||
QEMUIOVector resp_iov;
|
||||
|
||||
- union {
|
||||
- /* Used for two-stage request submission */
|
||||
- QTAILQ_ENTRY(VirtIOSCSIReq) next;
|
||||
+ /* Used for two-stage request submission and TMFs deferred to BH */
|
||||
+ QTAILQ_ENTRY(VirtIOSCSIReq) next;
|
||||
|
||||
- /* Used for cancellation of request during TMFs */
|
||||
- int remaining;
|
||||
- };
|
||||
+ /* Used for cancellation of request during TMFs */
|
||||
+ int remaining;
|
||||
|
||||
SCSIRequest *sreq;
|
||||
size_t resp_size;
|
||||
@@ -293,6 +291,122 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d)
|
||||
}
|
||||
}
|
||||
|
||||
+static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
|
||||
+{
|
||||
+ VirtIOSCSI *s = req->dev;
|
||||
+ SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
|
||||
+ BusChild *kid;
|
||||
+ int target;
|
||||
+
|
||||
+ switch (req->req.tmf.subtype) {
|
||||
+ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
|
||||
+ if (!d) {
|
||||
+ req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
|
||||
+ req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ qatomic_inc(&s->resetting);
|
||||
+ device_cold_reset(&d->qdev);
|
||||
+ qatomic_dec(&s->resetting);
|
||||
+ break;
|
||||
+
|
||||
+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
|
||||
+ target = req->req.tmf.lun[1];
|
||||
+ qatomic_inc(&s->resetting);
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
|
||||
+ SCSIDevice *d1 = SCSI_DEVICE(kid->child);
|
||||
+ if (d1->channel == 0 && d1->id == target) {
|
||||
+ device_cold_reset(&d1->qdev);
|
||||
+ }
|
||||
+ }
|
||||
+ rcu_read_unlock();
|
||||
+
|
||||
+ qatomic_dec(&s->resetting);
|
||||
+ break;
|
||||
+
|
||||
+ default:
|
||||
+ g_assert_not_reached();
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ object_unref(OBJECT(d));
|
||||
+
|
||||
+ virtio_scsi_acquire(s);
|
||||
+ virtio_scsi_complete_req(req);
|
||||
+ virtio_scsi_release(s);
|
||||
+}
|
||||
+
|
||||
+/* Some TMFs must be processed from the main loop thread */
|
||||
+static void virtio_scsi_do_tmf_bh(void *opaque)
|
||||
+{
|
||||
+ VirtIOSCSI *s = opaque;
|
||||
+ QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
|
||||
+ VirtIOSCSIReq *req;
|
||||
+ VirtIOSCSIReq *tmp;
|
||||
+
|
||||
+ GLOBAL_STATE_CODE();
|
||||
+
|
||||
+ virtio_scsi_acquire(s);
|
||||
+
|
||||
+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
|
||||
+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
|
||||
+ QTAILQ_INSERT_TAIL(&reqs, req, next);
|
||||
+ }
|
||||
+
|
||||
+ qemu_bh_delete(s->tmf_bh);
|
||||
+ s->tmf_bh = NULL;
|
||||
+
|
||||
+ virtio_scsi_release(s);
|
||||
+
|
||||
+ QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
|
||||
+ QTAILQ_REMOVE(&reqs, req, next);
|
||||
+ virtio_scsi_do_one_tmf_bh(req);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
|
||||
+{
|
||||
+ VirtIOSCSIReq *req;
|
||||
+ VirtIOSCSIReq *tmp;
|
||||
+
|
||||
+ GLOBAL_STATE_CODE();
|
||||
+
|
||||
+ virtio_scsi_acquire(s);
|
||||
+
|
||||
+ if (s->tmf_bh) {
|
||||
+ qemu_bh_delete(s->tmf_bh);
|
||||
+ s->tmf_bh = NULL;
|
||||
+ }
|
||||
+
|
||||
+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
|
||||
+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
|
||||
+
|
||||
+ /* SAM-6 6.3.2 Hard reset */
|
||||
+ req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
|
||||
+ virtio_scsi_complete_req(req);
|
||||
+ }
|
||||
+
|
||||
+ virtio_scsi_release(s);
|
||||
+}
|
||||
+
|
||||
+static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
|
||||
+{
|
||||
+ VirtIOSCSI *s = req->dev;
|
||||
+
|
||||
+ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
|
||||
+
|
||||
+ if (!s->tmf_bh) {
|
||||
+ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
|
||||
+ qemu_bh_schedule(s->tmf_bh);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* Return 0 if the request is ready to be completed and return to guest;
|
||||
* -EINPROGRESS if the request is submitted and will be completed later, in the
|
||||
* case of async cancellation. */
|
||||
@@ -300,8 +414,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||
{
|
||||
SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
|
||||
SCSIRequest *r, *next;
|
||||
- BusChild *kid;
|
||||
- int target;
|
||||
int ret = 0;
|
||||
|
||||
virtio_scsi_ctx_check(s, d);
|
||||
@@ -358,15 +470,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||
break;
|
||||
|
||||
case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
|
||||
- if (!d) {
|
||||
- goto fail;
|
||||
- }
|
||||
- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
|
||||
- goto incorrect_lun;
|
||||
- }
|
||||
- s->resetting++;
|
||||
- device_cold_reset(&d->qdev);
|
||||
- s->resetting--;
|
||||
+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
|
||||
+ virtio_scsi_defer_tmf_to_bh(req);
|
||||
+ ret = -EINPROGRESS;
|
||||
break;
|
||||
|
||||
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
|
||||
@@ -409,22 +515,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
|
||||
}
|
||||
break;
|
||||
|
||||
- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
|
||||
- target = req->req.tmf.lun[1];
|
||||
- s->resetting++;
|
||||
-
|
||||
- rcu_read_lock();
|
||||
- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
|
||||
- SCSIDevice *d1 = SCSI_DEVICE(kid->child);
|
||||
- if (d1->channel == 0 && d1->id == target) {
|
||||
- device_cold_reset(&d1->qdev);
|
||||
- }
|
||||
- }
|
||||
- rcu_read_unlock();
|
||||
-
|
||||
- s->resetting--;
|
||||
- break;
|
||||
-
|
||||
case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
|
||||
default:
|
||||
req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
|
||||
@@ -654,7 +744,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r)
|
||||
if (!req) {
|
||||
return;
|
||||
}
|
||||
- if (req->dev->resetting) {
|
||||
+ if (qatomic_read(&req->dev->resetting)) {
|
||||
req->resp.cmd.response = VIRTIO_SCSI_S_RESET;
|
||||
} else {
|
||||
req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED;
|
||||
@@ -830,9 +920,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
|
||||
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
|
||||
|
||||
assert(!s->dataplane_started);
|
||||
- s->resetting++;
|
||||
+
|
||||
+ virtio_scsi_reset_tmf_bh(s);
|
||||
+
|
||||
+ qatomic_inc(&s->resetting);
|
||||
bus_cold_reset(BUS(&s->bus));
|
||||
- s->resetting--;
|
||||
+ qatomic_dec(&s->resetting);
|
||||
|
||||
vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
|
||||
vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
|
||||
@@ -1052,6 +1145,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
|
||||
VirtIOSCSI *s = VIRTIO_SCSI(dev);
|
||||
Error *err = NULL;
|
||||
|
||||
+ QTAILQ_INIT(&s->tmf_bh_list);
|
||||
+
|
||||
virtio_scsi_common_realize(dev,
|
||||
virtio_scsi_handle_ctrl,
|
||||
virtio_scsi_handle_event,
|
||||
@@ -1089,6 +1184,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
|
||||
{
|
||||
VirtIOSCSI *s = VIRTIO_SCSI(dev);
|
||||
|
||||
+ virtio_scsi_reset_tmf_bh(s);
|
||||
+
|
||||
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
|
||||
virtio_scsi_common_unrealize(dev);
|
||||
}
|
||||
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
|
||||
index a36aad9c86..1c1cd77d6e 100644
|
||||
--- a/include/hw/virtio/virtio-scsi.h
|
||||
+++ b/include/hw/virtio/virtio-scsi.h
|
||||
@@ -75,13 +75,22 @@ struct VirtIOSCSICommon {
|
||||
VirtQueue **cmd_vqs;
|
||||
};
|
||||
|
||||
+struct VirtIOSCSIReq;
|
||||
+
|
||||
struct VirtIOSCSI {
|
||||
VirtIOSCSICommon parent_obj;
|
||||
|
||||
SCSIBus bus;
|
||||
- int resetting;
|
||||
+ int resetting; /* written from main loop thread, read from any thread */
|
||||
bool events_dropped;
|
||||
|
||||
+ /*
|
||||
+ * TMFs deferred to main loop BH. These fields are protected by
|
||||
+ * virtio_scsi_acquire().
|
||||
+ */
|
||||
+ QEMUBH *tmf_bh;
|
||||
+ QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
|
||||
+
|
||||
/* Fields for dataplane below */
|
||||
AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
|
||||
|
||||
--
|
||||
2.39.1
|
||||
|
@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \
|
||||
Summary: QEMU is a machine emulator and virtualizer
|
||||
Name: qemu-kvm
|
||||
Version: 7.2.0
|
||||
Release: 11%{?rcrel}%{?dist}%{?cc_suffix}
|
||||
Release: 12%{?rcrel}%{?dist}%{?cc_suffix}
|
||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||
# Epoch 15 used for RHEL 8
|
||||
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
|
||||
@ -376,6 +376,30 @@ Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch
|
||||
Patch114: kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch
|
||||
# For bz#2169904 - [SVVP] job 'Check SMBIOS Table Specific Requirements' failed on win2022
|
||||
Patch115: kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch
|
||||
# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed
|
||||
Patch116: kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch
|
||||
# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed
|
||||
Patch117: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch
|
||||
# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed
|
||||
Patch118: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch
|
||||
# For bz#2175660 - Guest hangs when starting or rebooting
|
||||
Patch119: kvm-qatomic-add-smp_mb__before-after_rmw.patch
|
||||
# For bz#2175660 - Guest hangs when starting or rebooting
|
||||
Patch120: kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch
|
||||
# For bz#2175660 - Guest hangs when starting or rebooting
|
||||
Patch121: kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch
|
||||
# For bz#2175660 - Guest hangs when starting or rebooting
|
||||
Patch122: kvm-edu-add-smp_mb__after_rmw.patch
|
||||
# For bz#2175660 - Guest hangs when starting or rebooting
|
||||
Patch123: kvm-aio-wait-switch-to-smp_mb__after_rmw.patch
|
||||
# For bz#2175660 - Guest hangs when starting or rebooting
|
||||
Patch124: kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch
|
||||
# For bz#2175660 - Guest hangs when starting or rebooting
|
||||
Patch125: kvm-physmem-add-missing-memory-barrier.patch
|
||||
# For bz#2175660 - Guest hangs when starting or rebooting
|
||||
Patch126: kvm-async-update-documentation-of-the-memory-barriers.patch
|
||||
# For bz#2175660 - Guest hangs when starting or rebooting
|
||||
Patch127: kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch
|
||||
|
||||
%if %{have_clang}
|
||||
BuildRequires: clang
|
||||
@ -1406,6 +1430,24 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Sun Mar 12 2023 Miroslav Rezanina <mrezanin@redhat.com> - 7.2.0-12
|
||||
- kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch [bz#2155748]
|
||||
- kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch [bz#2155748]
|
||||
- kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch [bz#2155748]
|
||||
- kvm-qatomic-add-smp_mb__before-after_rmw.patch [bz#2175660]
|
||||
- kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch [bz#2175660]
|
||||
- kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch [bz#2175660]
|
||||
- kvm-edu-add-smp_mb__after_rmw.patch [bz#2175660]
|
||||
- kvm-aio-wait-switch-to-smp_mb__after_rmw.patch [bz#2175660]
|
||||
- kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch [bz#2175660]
|
||||
- kvm-physmem-add-missing-memory-barrier.patch [bz#2175660]
|
||||
- kvm-async-update-documentation-of-the-memory-barriers.patch [bz#2175660]
|
||||
- kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch [bz#2175660]
|
||||
- Resolves: bz#2155748
|
||||
(qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed)
|
||||
- Resolves: bz#2175660
|
||||
(Guest hangs when starting or rebooting)
|
||||
|
||||
* Mon Mar 06 2023 Miroslav Rezanina <mrezanin@redhat.com> - 7.2.0-11
|
||||
- kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch [bz#2169904]
|
||||
- Resolves: bz#2169904
|
||||
|
Loading…
Reference in New Issue
Block a user