2fe1fc7b2d
- kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch [bz#1905805] - kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch [bz#1905805] - kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch [bz#1905805] - kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch [bz#1905805] - kvm-vhost-vdpa-add-support-for-config-interrupt.patch [bz#1905805] - kvm-virtio-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-vhost-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-virtio-net-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-virtio-mmio-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-virtio-pci-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch [bz#2159408] - kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch [bz#2124856] - kvm-block-drop-bdrv_remove_filter_or_cow_child.patch [bz#2155112] - kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch [bz#2155112] - kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch [bz#2155112] - kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch [bz#2155112] - kvm-block-Remove-drained_end_counter.patch [bz#2155112] - kvm-block-Inline-bdrv_drain_invoke.patch [bz#2155112] - kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch [bz#2155112] - kvm-block-Drain-individual-nodes-during-reopen.patch [bz#2155112] - kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch [bz#2155112] - kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch [bz#2155112] - kvm-block-Remove-subtree-drains.patch [bz#2155112] - kvm-block-Call-drain-callbacks-only-once.patch [bz#2155112] - kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch [bz#2155112] - kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch [bz#2155112] - kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch [bz#2155112] - kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch [bz#2155112] - kvm-accel-introduce-accelerator-blocker-API.patch [bz#1979276] - kvm-KVM-keep-track-of-running-ioctls.patch [bz#1979276] - kvm-kvm-Atomic-memslot-updates.patch [bz#1979276] - Resolves: bz#1905805 (support config interrupt in vhost-vdpa qemu) - Resolves: bz#2159408 ([s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8) - Resolves: bz#2124856 (VM with virtio interface and iommu=on will crash when try to migrate) - Resolves: bz#2155112 (Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)) - Resolves: bz#1979276 (SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on)
349 lines
11 KiB
Diff
349 lines
11 KiB
Diff
From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001
|
|
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
|
Date: Mon, 16 Jan 2023 07:16:41 -0500
|
|
Subject: [PATCH 29/31] accel: introduce accelerator blocker API
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
|
RH-MergeRequest: 138: accel: introduce accelerator blocker API
|
|
RH-Bugzilla: 1979276
|
|
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm)
|
|
|
|
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
|
|
|
|
commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1
|
|
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
|
Date: Fri Nov 11 10:47:56 2022 -0500
|
|
|
|
accel: introduce accelerator blocker API
|
|
|
|
This API allows the accelerators to prevent vcpus from issuing
|
|
new ioctls while execting a critical section marked with the
|
|
accel_ioctl_inhibit_begin/end functions.
|
|
|
|
Note that all functions submitting ioctls must mark where the
|
|
ioctl is being called with accel_{cpu_}ioctl_begin/end().
|
|
|
|
This API requires the caller to always hold the BQL.
|
|
API documentation is in sysemu/accel-blocker.h
|
|
|
|
Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt
|
|
(to minimize cache line bouncing) to keep avoid that new ioctls
|
|
run when the critical section starts, and a QemuEvent to wait
|
|
that all running ioctls finish.
|
|
|
|
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
|
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
|
Message-Id: <20221111154758.1372674-2-eesposit@redhat.com>
|
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
|
Conflicts:
|
|
util/meson.build: "interval-tree.c" does not exist
|
|
|
|
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
|
---
|
|
accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++
|
|
accel/meson.build | 2 +-
|
|
hw/core/cpu-common.c | 2 +
|
|
include/hw/core/cpu.h | 3 +
|
|
include/sysemu/accel-blocker.h | 56 ++++++++++++
|
|
util/meson.build | 2 +-
|
|
6 files changed, 217 insertions(+), 2 deletions(-)
|
|
create mode 100644 accel/accel-blocker.c
|
|
create mode 100644 include/sysemu/accel-blocker.h
|
|
|
|
diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c
|
|
new file mode 100644
|
|
index 0000000000..1e7f423462
|
|
--- /dev/null
|
|
+++ b/accel/accel-blocker.c
|
|
@@ -0,0 +1,154 @@
|
|
+/*
|
|
+ * Lock to inhibit accelerator ioctls
|
|
+ *
|
|
+ * Copyright (c) 2022 Red Hat Inc.
|
|
+ *
|
|
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to deal
|
|
+ * in the Software without restriction, including without limitation the rights
|
|
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
+ * copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
+ * THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "qemu/osdep.h"
|
|
+#include "qemu/thread.h"
|
|
+#include "qemu/main-loop.h"
|
|
+#include "hw/core/cpu.h"
|
|
+#include "sysemu/accel-blocker.h"
|
|
+
|
|
+static QemuLockCnt accel_in_ioctl_lock;
|
|
+static QemuEvent accel_in_ioctl_event;
|
|
+
|
|
+void accel_blocker_init(void)
|
|
+{
|
|
+ qemu_lockcnt_init(&accel_in_ioctl_lock);
|
|
+ qemu_event_init(&accel_in_ioctl_event, false);
|
|
+}
|
|
+
|
|
+void accel_ioctl_begin(void)
|
|
+{
|
|
+ if (likely(qemu_mutex_iothread_locked())) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
|
|
+ qemu_lockcnt_inc(&accel_in_ioctl_lock);
|
|
+}
|
|
+
|
|
+void accel_ioctl_end(void)
|
|
+{
|
|
+ if (likely(qemu_mutex_iothread_locked())) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ qemu_lockcnt_dec(&accel_in_ioctl_lock);
|
|
+ /* change event to SET. If event was BUSY, wake up all waiters */
|
|
+ qemu_event_set(&accel_in_ioctl_event);
|
|
+}
|
|
+
|
|
+void accel_cpu_ioctl_begin(CPUState *cpu)
|
|
+{
|
|
+ if (unlikely(qemu_mutex_iothread_locked())) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
|
|
+ qemu_lockcnt_inc(&cpu->in_ioctl_lock);
|
|
+}
|
|
+
|
|
+void accel_cpu_ioctl_end(CPUState *cpu)
|
|
+{
|
|
+ if (unlikely(qemu_mutex_iothread_locked())) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ qemu_lockcnt_dec(&cpu->in_ioctl_lock);
|
|
+ /* change event to SET. If event was BUSY, wake up all waiters */
|
|
+ qemu_event_set(&accel_in_ioctl_event);
|
|
+}
|
|
+
|
|
+static bool accel_has_to_wait(void)
|
|
+{
|
|
+ CPUState *cpu;
|
|
+ bool needs_to_wait = false;
|
|
+
|
|
+ CPU_FOREACH(cpu) {
|
|
+ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) {
|
|
+ /* exit the ioctl, if vcpu is running it */
|
|
+ qemu_cpu_kick(cpu);
|
|
+ needs_to_wait = true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock);
|
|
+}
|
|
+
|
|
+void accel_ioctl_inhibit_begin(void)
|
|
+{
|
|
+ CPUState *cpu;
|
|
+
|
|
+ /*
|
|
+ * We allow to inhibit only when holding the BQL, so we can identify
|
|
+ * when an inhibitor wants to issue an ioctl easily.
|
|
+ */
|
|
+ g_assert(qemu_mutex_iothread_locked());
|
|
+
|
|
+ /* Block further invocations of the ioctls outside the BQL. */
|
|
+ CPU_FOREACH(cpu) {
|
|
+ qemu_lockcnt_lock(&cpu->in_ioctl_lock);
|
|
+ }
|
|
+ qemu_lockcnt_lock(&accel_in_ioctl_lock);
|
|
+
|
|
+ /* Keep waiting until there are running ioctls */
|
|
+ while (true) {
|
|
+
|
|
+ /* Reset event to FREE. */
|
|
+ qemu_event_reset(&accel_in_ioctl_event);
|
|
+
|
|
+ if (accel_has_to_wait()) {
|
|
+ /*
|
|
+ * If event is still FREE, and there are ioctls still in progress,
|
|
+ * wait.
|
|
+ *
|
|
+ * If an ioctl finishes before qemu_event_wait(), it will change
|
|
+ * the event state to SET. This will prevent qemu_event_wait() from
|
|
+ * blocking, but it's not a problem because if other ioctls are
|
|
+ * still running the loop will iterate once more and reset the event
|
|
+ * status to FREE so that it can wait properly.
|
|
+ *
|
|
+ * If an ioctls finishes while qemu_event_wait() is blocking, then
|
|
+ * it will be waken up, but also here the while loop makes sure
|
|
+ * to re-enter the wait if there are other running ioctls.
|
|
+ */
|
|
+ qemu_event_wait(&accel_in_ioctl_event);
|
|
+ } else {
|
|
+ /* No ioctl is running */
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void accel_ioctl_inhibit_end(void)
|
|
+{
|
|
+ CPUState *cpu;
|
|
+
|
|
+ qemu_lockcnt_unlock(&accel_in_ioctl_lock);
|
|
+ CPU_FOREACH(cpu) {
|
|
+ qemu_lockcnt_unlock(&cpu->in_ioctl_lock);
|
|
+ }
|
|
+}
|
|
+
|
|
diff --git a/accel/meson.build b/accel/meson.build
|
|
index 259c35c4c8..061332610f 100644
|
|
--- a/accel/meson.build
|
|
+++ b/accel/meson.build
|
|
@@ -1,4 +1,4 @@
|
|
-specific_ss.add(files('accel-common.c'))
|
|
+specific_ss.add(files('accel-common.c', 'accel-blocker.c'))
|
|
softmmu_ss.add(files('accel-softmmu.c'))
|
|
user_ss.add(files('accel-user.c'))
|
|
|
|
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
|
|
index f9fdd46b9d..8d6a4b1b65 100644
|
|
--- a/hw/core/cpu-common.c
|
|
+++ b/hw/core/cpu-common.c
|
|
@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj)
|
|
cpu->nr_threads = 1;
|
|
|
|
qemu_mutex_init(&cpu->work_mutex);
|
|
+ qemu_lockcnt_init(&cpu->in_ioctl_lock);
|
|
QSIMPLEQ_INIT(&cpu->work_list);
|
|
QTAILQ_INIT(&cpu->breakpoints);
|
|
QTAILQ_INIT(&cpu->watchpoints);
|
|
@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj)
|
|
{
|
|
CPUState *cpu = CPU(obj);
|
|
|
|
+ qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
|
|
qemu_mutex_destroy(&cpu->work_mutex);
|
|
}
|
|
|
|
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
|
|
index 8830546121..2417597236 100644
|
|
--- a/include/hw/core/cpu.h
|
|
+++ b/include/hw/core/cpu.h
|
|
@@ -398,6 +398,9 @@ struct CPUState {
|
|
uint32_t kvm_fetch_index;
|
|
uint64_t dirty_pages;
|
|
|
|
+ /* Use by accel-block: CPU is executing an ioctl() */
|
|
+ QemuLockCnt in_ioctl_lock;
|
|
+
|
|
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
|
|
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
|
|
DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
|
|
diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
|
|
new file mode 100644
|
|
index 0000000000..72020529ef
|
|
--- /dev/null
|
|
+++ b/include/sysemu/accel-blocker.h
|
|
@@ -0,0 +1,56 @@
|
|
+/*
|
|
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
|
|
+ * running ones finish.
|
|
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
|
|
+ * release the BQL.
|
|
+ *
|
|
+ * Copyright (c) 2022 Red Hat Inc.
|
|
+ *
|
|
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
|
+ *
|
|
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
+ * See the COPYING file in the top-level directory.
|
|
+ */
|
|
+#ifndef ACCEL_BLOCKER_H
|
|
+#define ACCEL_BLOCKER_H
|
|
+
|
|
+#include "qemu/osdep.h"
|
|
+#include "sysemu/cpus.h"
|
|
+
|
|
+extern void accel_blocker_init(void);
|
|
+
|
|
+/*
|
|
+ * accel_{cpu_}ioctl_begin/end:
|
|
+ * Mark when ioctl is about to run or just finished.
|
|
+ *
|
|
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
|
|
+ * called, preventing new ioctls to run. They will continue only after
|
|
+ * accel_ioctl_inibith_end().
|
|
+ */
|
|
+extern void accel_ioctl_begin(void);
|
|
+extern void accel_ioctl_end(void);
|
|
+extern void accel_cpu_ioctl_begin(CPUState *cpu);
|
|
+extern void accel_cpu_ioctl_end(CPUState *cpu);
|
|
+
|
|
+/*
|
|
+ * accel_ioctl_inhibit_begin: start critical section
|
|
+ *
|
|
+ * This function makes sure that:
|
|
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
|
|
+ * 2) wait that all ioctls that were already running reach
|
|
+ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
|
|
+ *
|
|
+ * This allows the caller to access shared data or perform operations without
|
|
+ * worrying of concurrent vcpus accesses.
|
|
+ */
|
|
+extern void accel_ioctl_inhibit_begin(void);
|
|
+
|
|
+/*
|
|
+ * accel_ioctl_inhibit_end: end critical section started by
|
|
+ * accel_ioctl_inhibit_begin()
|
|
+ *
|
|
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
|
|
+ */
|
|
+extern void accel_ioctl_inhibit_end(void);
|
|
+
|
|
+#endif /* ACCEL_BLOCKER_H */
|
|
diff --git a/util/meson.build b/util/meson.build
|
|
index 25b9b61f98..85a5504c4d 100644
|
|
--- a/util/meson.build
|
|
+++ b/util/meson.build
|
|
@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c'))
|
|
util_ss.add(files('yank.c'))
|
|
util_ss.add(files('int128.c'))
|
|
util_ss.add(files('memalign.c'))
|
|
+util_ss.add(files('lockcnt.c'))
|
|
|
|
if have_user
|
|
util_ss.add(files('selfmap.c'))
|
|
@@ -71,7 +72,6 @@ endif
|
|
if have_block or have_ga
|
|
util_ss.add(files('aiocb.c', 'async.c'))
|
|
util_ss.add(files('base64.c'))
|
|
- util_ss.add(files('lockcnt.c'))
|
|
util_ss.add(files('main-loop.c'))
|
|
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
|
|
util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND'])))
|
|
--
|
|
2.31.1
|
|
|