184 lines
6.8 KiB
Diff
184 lines
6.8 KiB
Diff
From 82a02aec3a8b3c2ac925d0b71ea4c35aa5d6463b Mon Sep 17 00:00:00 2001
|
|
From: Andrew Jones <drjones@redhat.com>
|
|
Date: Wed, 4 Aug 2021 03:27:24 -0400
|
|
Subject: [PATCH 2/2] async: use explicit memory barriers
|
|
|
|
RH-Author: Andrew Jones <drjones@redhat.com>
|
|
Message-id: <20210729134448.4995-3-drjones@redhat.com>
|
|
Patchwork-id: 101937
|
|
O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 2/2] async: use explicit memory barriers
|
|
Bugzilla: 1969848
|
|
RH-Acked-by: Gavin Shan <gshan@redhat.com>
|
|
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
|
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
|
From: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
|
When using C11 atomics, non-seqcst reads and writes do not participate
|
|
in the total order of seqcst operations. In util/async.c and util/aio-posix.c,
|
|
in particular, the pattern that we use
|
|
|
|
write ctx->notify_me write bh->scheduled
|
|
read bh->scheduled read ctx->notify_me
|
|
if !bh->scheduled, sleep if ctx->notify_me, notify
|
|
|
|
needs to use seqcst operations for both the write and the read. In
|
|
general this is something that we do not want, because there can be
|
|
many sources that are polled in addition to bottom halves. The
|
|
alternative is to place a seqcst memory barrier between the write
|
|
and the read. This also comes with a disadvantage, in that the
|
|
memory barrier is implicit on strongly-ordered architectures and
|
|
it wastes a few dozen clock cycles.
|
|
|
|
Fortunately, ctx->notify_me is never written concurrently by two
|
|
threads, so we can assert that and relax the writes to ctx->notify_me.
|
|
The resulting solution works and performs well on both aarch64 and x86.
|
|
|
|
Note that the atomic_set/atomic_read combination is not an atomic
|
|
read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED;
|
|
on x86, ATOMIC_RELAXED compiles to a locked operation.
|
|
|
|
Analyzed-by: Ying Fang <fangying1@huawei.com>
|
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
Tested-by: Ying Fang <fangying1@huawei.com>
|
|
Message-Id: <20200407140746.8041-6-pbonzini@redhat.com>
|
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
(cherry picked from commit 5710a3e09f9b85801e5ce70797a4a511e5fc9e2c)
|
|
Signed-off-by: Andrew Jones <drjones@redhat.com>
|
|
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
---
|
|
util/aio-posix.c | 16 ++++++++++++++--
|
|
util/aio-win32.c | 17 ++++++++++++++---
|
|
util/async.c | 16 ++++++++++++----
|
|
3 files changed, 40 insertions(+), 9 deletions(-)
|
|
|
|
diff --git a/util/aio-posix.c b/util/aio-posix.c
|
|
index abc396d030..8cfb25650d 100644
|
|
--- a/util/aio-posix.c
|
|
+++ b/util/aio-posix.c
|
|
@@ -624,6 +624,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
int64_t timeout;
|
|
int64_t start = 0;
|
|
|
|
+ /*
|
|
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
|
|
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
|
|
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
|
|
+ */
|
|
assert(in_aio_context_home_thread(ctx));
|
|
|
|
/* aio_notify can avoid the expensive event_notifier_set if
|
|
@@ -634,7 +639,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
* so disable the optimization now.
|
|
*/
|
|
if (blocking) {
|
|
- atomic_add(&ctx->notify_me, 2);
|
|
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
|
|
+ /*
|
|
+ * Write ctx->notify_me before computing the timeout
|
|
+ * (reading bottom half flags, etc.). Pairs with
|
|
+ * smp_mb in aio_notify().
|
|
+ */
|
|
+ smp_mb();
|
|
}
|
|
|
|
qemu_lockcnt_inc(&ctx->list_lock);
|
|
@@ -679,7 +690,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
}
|
|
|
|
if (blocking) {
|
|
- atomic_sub(&ctx->notify_me, 2);
|
|
+ /* Finish the poll before clearing the flag. */
|
|
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
|
|
aio_notify_accept(ctx);
|
|
}
|
|
|
|
diff --git a/util/aio-win32.c b/util/aio-win32.c
|
|
index a23b9c364d..729d533faf 100644
|
|
--- a/util/aio-win32.c
|
|
+++ b/util/aio-win32.c
|
|
@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
int count;
|
|
int timeout;
|
|
|
|
+ /*
|
|
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
|
|
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
|
|
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
|
|
+ */
|
|
+ assert(in_aio_context_home_thread(ctx));
|
|
progress = false;
|
|
|
|
/* aio_notify can avoid the expensive event_notifier_set if
|
|
@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
* so disable the optimization now.
|
|
*/
|
|
if (blocking) {
|
|
- atomic_add(&ctx->notify_me, 2);
|
|
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
|
|
+ /*
|
|
+ * Write ctx->notify_me before computing the timeout
|
|
+ * (reading bottom half flags, etc.). Pairs with
|
|
+ * smp_mb in aio_notify().
|
|
+ */
|
|
+ smp_mb();
|
|
}
|
|
|
|
qemu_lockcnt_inc(&ctx->list_lock);
|
|
@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
|
|
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
|
|
if (blocking) {
|
|
assert(first);
|
|
- assert(in_aio_context_home_thread(ctx));
|
|
- atomic_sub(&ctx->notify_me, 2);
|
|
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
|
|
aio_notify_accept(ctx);
|
|
}
|
|
|
|
diff --git a/util/async.c b/util/async.c
|
|
index b1fa5319e5..c65c58bbc9 100644
|
|
--- a/util/async.c
|
|
+++ b/util/async.c
|
|
@@ -220,7 +220,14 @@ aio_ctx_prepare(GSource *source, gint *timeout)
|
|
{
|
|
AioContext *ctx = (AioContext *) source;
|
|
|
|
- atomic_or(&ctx->notify_me, 1);
|
|
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1);
|
|
+
|
|
+ /*
|
|
+ * Write ctx->notify_me before computing the timeout
|
|
+ * (reading bottom half flags, etc.). Pairs with
|
|
+ * smp_mb in aio_notify().
|
|
+ */
|
|
+ smp_mb();
|
|
|
|
/* We assume there is no timeout already supplied */
|
|
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
|
|
@@ -238,7 +245,8 @@ aio_ctx_check(GSource *source)
|
|
AioContext *ctx = (AioContext *) source;
|
|
QEMUBH *bh;
|
|
|
|
- atomic_and(&ctx->notify_me, ~1);
|
|
+ /* Finish computing the timeout before clearing the flag. */
|
|
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);
|
|
aio_notify_accept(ctx);
|
|
|
|
for (bh = ctx->first_bh; bh; bh = bh->next) {
|
|
@@ -343,10 +351,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx)
|
|
void aio_notify(AioContext *ctx)
|
|
{
|
|
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
|
|
- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
|
|
+ * with smp_mb in aio_ctx_prepare or aio_poll.
|
|
*/
|
|
smp_mb();
|
|
- if (ctx->notify_me) {
|
|
+ if (atomic_read(&ctx->notify_me)) {
|
|
event_notifier_set(&ctx->notifier);
|
|
atomic_mb_set(&ctx->notified, true);
|
|
}
|
|
--
|
|
2.27.0
|
|
|