550d33ded2
- kvm-qapi-machine.json-Add-cluster-id.patch [bz#2041823] - kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch [bz#2041823] - kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch [bz#2041823] - kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch [bz#2041823] - kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch [bz#2041823] - kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch [bz#2041823] - kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch [bz#2079938] - kvm-coroutine-Revert-to-constant-batch-size.patch [bz#2079938] - kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch [bz#2079347] - kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch [bz#2079347] - kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch [bz#2079347] - kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch [bz#2079347] - kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch [bz#2079347] - kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch [bz#2079347] - kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch [bz#1995710] - kvm-migration-Fix-operator-type.patch [bz#2064530] - Resolves: bz#2041823 ([aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken') - Resolves: bz#2079938 (qemu coredump when boot with multi disks (qemu) failed to set up stack guard page: Cannot allocate memory) - Resolves: bz#2079347 (Guest boot blocked when scsi disks using same iothread and 100% CPU consumption) - Resolves: bz#1995710 (RFE: Allow virtio-scsi CD-ROM media change with IOThreads) - Resolves: bz#2064530 (Rebuild qemu-kvm with clang-14)
139 lines
6.0 KiB
Diff
139 lines
6.0 KiB
Diff
From 345107bfd5537b51f34aaeb97d6161858bb6feee Mon Sep 17 00:00:00 2001
|
|
From: Kevin Wolf <kwolf@redhat.com>
|
|
Date: Tue, 10 May 2022 17:10:20 +0200
|
|
Subject: [PATCH 08/16] coroutine: Revert to constant batch size
|
|
|
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
|
RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size
|
|
RH-Commit: [2/2] 8a8a39af873854cdc8333d1a70f3479a97c3ec7a (kmwolf/centos-qemu-kvm)
|
|
RH-Bugzilla: 2079938
|
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
|
|
|
Commit 4c41c69e changed the way the coroutine pool is sized because for
|
|
virtio-blk devices with a large queue size and heavy I/O, it was just
|
|
too small and caused coroutines to be deleted and reallocated soon
|
|
afterwards. The change made the size dynamic based on the number of
|
|
queues and the queue size of virtio-blk devices.
|
|
|
|
There are two important numbers here: Slightly simplified, when a
|
|
coroutine terminates, it is generally stored in the global release pool
|
|
up to a certain pool size, and if the pool is full, it is freed.
|
|
Conversely, when allocating a new coroutine, the coroutines in the
|
|
release pool are reused if the pool already has reached a certain
|
|
minimum size (the batch size), otherwise we allocate new coroutines.
|
|
|
|
The problem after commit 4c41c69e is that it not only increases the
|
|
maximum pool size (which is the intended effect), but also the batch
|
|
size for reusing coroutines (which is a bug). It means that in cases
|
|
with many devices and/or a large queue size (which defaults to the
|
|
number of vcpus for virtio-blk-pci), many thousand coroutines could be
|
|
sitting in the release pool without being reused.
|
|
|
|
This is not only a waste of memory and allocations, but it actually
|
|
makes the QEMU process likely to hit the vm.max_map_count limit on Linux
|
|
because each coroutine requires two mappings (its stack and the guard
|
|
page for the stack), causing it to abort() in qemu_alloc_stack() because
|
|
when the limit is hit, mprotect() starts to fail with ENOMEM.
|
|
|
|
In order to fix the problem, change the batch size back to 64 to avoid
|
|
uselessly accumulating coroutines in the release pool, but keep the
|
|
dynamic maximum pool size so that coroutines aren't freed too early
|
|
in heavy I/O scenarios.
|
|
|
|
Note that this fix doesn't strictly make it impossible to hit the limit,
|
|
but this would only happen if most of the coroutines are actually in use
|
|
at the same time, not just sitting in a pool. This is the same behaviour
|
|
as we already had before commit 4c41c69e. Fully preventing this would
|
|
require allowing qemu_coroutine_create() to return an error, but it
|
|
doesn't seem to be a scenario that people hit in practice.
|
|
|
|
Cc: qemu-stable@nongnu.org
|
|
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938
|
|
Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
|
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
Message-Id: <20220510151020.105528-3-kwolf@redhat.com>
|
|
Tested-by: Hiroki Narukawa <hnarukaw@yahoo-corp.jp>
|
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
(cherry picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52)
|
|
|
|
Conflicts:
|
|
util/qemu-coroutine.c
|
|
|
|
Trivial merge conflict because we don't have commit ac387a08 downstream.
|
|
|
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
|
---
|
|
util/qemu-coroutine.c | 22 ++++++++++++++--------
|
|
1 file changed, 14 insertions(+), 8 deletions(-)
|
|
|
|
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
|
|
index faca0ca97c..804f672e0a 100644
|
|
--- a/util/qemu-coroutine.c
|
|
+++ b/util/qemu-coroutine.c
|
|
@@ -20,14 +20,20 @@
|
|
#include "qemu/coroutine_int.h"
|
|
#include "block/aio.h"
|
|
|
|
-/** Initial batch size is 64, and is increased on demand */
|
|
+/**
|
|
+ * The minimal batch size is always 64, coroutines from the release_pool are
|
|
+ * reused as soon as there are 64 coroutines in it. The maximum pool size starts
|
|
+ * with 64 and is increased on demand so that coroutines are not deleted even if
|
|
+ * they are not immediately reused.
|
|
+ */
|
|
enum {
|
|
- POOL_INITIAL_BATCH_SIZE = 64,
|
|
+ POOL_MIN_BATCH_SIZE = 64,
|
|
+ POOL_INITIAL_MAX_SIZE = 64,
|
|
};
|
|
|
|
/** Free list to speed up creation */
|
|
static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
|
|
-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE;
|
|
+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
|
|
static unsigned int release_pool_size;
|
|
static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
|
|
static __thread unsigned int alloc_pool_size;
|
|
@@ -51,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
|
|
if (CONFIG_COROUTINE_POOL) {
|
|
co = QSLIST_FIRST(&alloc_pool);
|
|
if (!co) {
|
|
- if (release_pool_size > qatomic_read(&pool_batch_size)) {
|
|
+ if (release_pool_size > POOL_MIN_BATCH_SIZE) {
|
|
/* Slow path; a good place to register the destructor, too. */
|
|
if (!coroutine_pool_cleanup_notifier.notify) {
|
|
coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
|
|
@@ -88,12 +94,12 @@ static void coroutine_delete(Coroutine *co)
|
|
co->caller = NULL;
|
|
|
|
if (CONFIG_COROUTINE_POOL) {
|
|
- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) {
|
|
+ if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
|
|
QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
|
|
qatomic_inc(&release_pool_size);
|
|
return;
|
|
}
|
|
- if (alloc_pool_size < qatomic_read(&pool_batch_size)) {
|
|
+ if (alloc_pool_size < qatomic_read(&pool_max_size)) {
|
|
QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
|
|
alloc_pool_size++;
|
|
return;
|
|
@@ -207,10 +213,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
|
|
|
|
void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
|
|
{
|
|
- qatomic_add(&pool_batch_size, additional_pool_size);
|
|
+ qatomic_add(&pool_max_size, additional_pool_size);
|
|
}
|
|
|
|
void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
|
|
{
|
|
- qatomic_sub(&pool_batch_size, removing_pool_size);
|
|
+ qatomic_sub(&pool_max_size, removing_pool_size);
|
|
}
|
|
--
|
|
2.31.1
|
|
|