From a799a516c56f2f4aac0d32f4bcc0efcc04857229 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 13 Jun 2023 05:19:43 -0400 Subject: [PATCH] * Tue Jun 13 2023 Miroslav Rezanina - 8.0.0-5 - kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch [bz#2186725] - kvm-graph-lock-Disable-locking-for-now.patch [bz#2186725] - kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch [bz#2186725] - kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch [bz#2186725] - kvm-memory-prevent-dma-reentracy-issues.patch [RHEL-516] - kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch [RHEL-516] - kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch [RHEL-516] - kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch [RHEL-516] - kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch [RHEL-516] - kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch [RHEL-516] - kvm-raven-disable-reentrancy-detection-for-iomem.patch [RHEL-516] - kvm-apic-disable-reentrancy-detection-for-apic-msi.patch [RHEL-516] - kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch [RHEL-516] - kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch [RHEL-516] - kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch [RHEL-516] - kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch [RHEL-516] - kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch [RHEL-516] - kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch [bz#2189423] - kvm-multifd-Fix-the-number-of-channels-ready.patch [bz#2196289] - kvm-util-async-teardown-wire-up-query-command-line-optio.patch [bz#2168500] - kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch [bz#2168500] - Resolves: bz#2186725 (Qemu hang when commit during fio running(iothread enable)) - Resolves: RHEL-516 (CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9]) - Resolves: bz#2189423 (Failed to migrate VM from rhel 9.3 to rhel 9.2) - Resolves: bz#2196289 (Fix number of ready channels on multifd) - Resolves: bz#2168500 ([IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part) --- ...le-reentrancy-detection-for-apic-msi.patch | 55 ++ ...ional-reentrancy-guard-to-the-BH-API.patch | 231 +++++++++ ...-use-after-free-on-re-entrancy-guard.patch | 70 +++ ...-disable-reentrancy-detection-for-io.patch | 57 +++ ...t-assert_bdrv_graph_readable-by-defa.patch | 121 +++++ ...ch-add-qemu_bh_new-aio_bh_new-checks.patch | 55 ++ kvm-graph-lock-Disable-locking-for-now.patch | 153 ++++++ ...CI_ERR_UNCOR_MASK-register-for-machi.patch | 118 +++++ ...qemu_bh_new-calls-with-qemu_bh_new_g.patch | 470 ++++++++++++++++++ ...5a-Fix-reentrancy-issues-in-the-LSI-.patch | 141 ++++++ ...ommit-with-iothreads-and-ongoing-I-O.patch | 144 ++++++ ...loongarch_ipi_iocsr-re-entrnacy-safe.patch | 53 ++ ...le-reentrancy-detection-for-MMIO-reg.patch | 70 +++ ...le-reentrancy-detection-for-script-R.patch | 58 +++ kvm-memory-prevent-dma-reentracy-issues.patch | 150 ++++++ ...checks-prior-to-unsetting-engaged_in.patch | 67 +++ ...ifd-Fix-the-number-of-channels-ready.patch | 58 +++ ...rained_poll-to-wake-coroutine-in-rig.patch | 159 ++++++ ...sable-reentrancy-detection-for-iomem.patch | 54 ++ ...rious-warning-with-asynchronous-tear.patch | 129 +++++ ...own-wire-up-query-command-line-optio.patch | 180 +++++++ qemu-kvm.spec | 77 ++- 22 files changed, 2669 insertions(+), 1 deletion(-) create mode 100644 kvm-apic-disable-reentrancy-detection-for-apic-msi.patch create mode 100644 kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch create mode 100644 kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch create mode 100644 kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch create mode 100644 kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch create mode 100644 kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch create mode 100644 kvm-graph-lock-Disable-locking-for-now.patch create mode 100644 kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch create mode 100644 kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch create mode 100644 kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch create mode 100644 kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch create mode 100644 kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch create mode 100644 kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch create mode 100644 kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch create mode 100644 kvm-memory-prevent-dma-reentracy-issues.patch create mode 100644 kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch create mode 100644 kvm-multifd-Fix-the-number-of-channels-ready.patch create mode 100644 kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch create mode 100644 kvm-raven-disable-reentrancy-detection-for-iomem.patch create mode 100644 kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch create mode 100644 kvm-util-async-teardown-wire-up-query-command-line-optio.patch diff --git a/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch b/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch new file mode 100644 index 0000000..69505f8 --- /dev/null +++ b/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch @@ -0,0 +1,55 @@ +From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 50795ee051a342c681a9b45671c552fbd6274db8 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:13 2023 -0400 + + apic: disable reentrancy detection for apic-msi + + As the code is designed for re-entrant calls to apic-msi, mark apic-msi + as reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-9-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/intc/apic.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/intc/apic.c b/hw/intc/apic.c +index 20b5a94073..ac3d47d231 100644 +--- a/hw/intc/apic.c ++++ b/hw/intc/apic.c +@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp) + memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", + APIC_SPACE_SIZE); + ++ /* ++ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can ++ * write back to apic-msi. As such mark the apic-msi region re-entrancy ++ * safe. ++ */ ++ s->io_memory.disable_reentrancy_guard = true; ++ + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); + local_apics[s->id] = s; + +-- +2.39.3 + diff --git a/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch b/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch new file mode 100644 index 0000000..65ba3be --- /dev/null +++ b/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch @@ -0,0 +1,231 @@ +From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 9c86c97f12c060bf7484dd931f38634e166a81f0 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:07 2023 -0400 + + async: Add an optional reentrancy guard to the BH API + + Devices can pass their MemoryReentrancyGuard (from their DeviceState), + when creating new BHes. Then, the async API will toggle the guard + before/after calling the BH call-back. This prevents bh->mmio reentrancy + issues. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-3-alxndr@bu.edu> + [thuth: Fix "line over 90 characters" checkpatch.pl error] + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + docs/devel/multiple-iothreads.txt | 7 +++++++ + include/block/aio.h | 18 ++++++++++++++++-- + include/qemu/main-loop.h | 7 +++++-- + tests/unit/ptimer-test-stubs.c | 3 ++- + util/async.c | 18 +++++++++++++++++- + util/main-loop.c | 6 ++++-- + util/trace-events | 1 + + 7 files changed, 52 insertions(+), 8 deletions(-) + +diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt +index 343120f2ef..a3e949f6b3 100644 +--- a/docs/devel/multiple-iothreads.txt ++++ b/docs/devel/multiple-iothreads.txt +@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext: + * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier + * LEGACY timer_new_ms() - create a timer + * LEGACY qemu_bh_new() - create a BH ++ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard + * LEGACY qemu_aio_wait() - run an event loop iteration + + Since they implicitly work on the main loop they cannot be used in code that +@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h): + * aio_set_event_notifier() - monitor an event notifier + * aio_timer_new() - create a timer + * aio_bh_new() - create a BH ++ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard + * aio_poll() - run an event loop iteration + ++The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard" ++argument, which is used to check for and prevent re-entrancy problems. For ++BHs associated with devices, the reentrancy-guard is contained in the ++corresponding DeviceState and named "mem_reentrancy_guard". ++ + The AioContext can be obtained from the IOThread using + iothread_get_aio_context() or for the main loop using qemu_get_aio_context(). + Code that takes an AioContext argument works both in IOThreads or the main +diff --git a/include/block/aio.h b/include/block/aio.h +index 543717f294..db6f23c619 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -23,6 +23,8 @@ + #include "qemu/thread.h" + #include "qemu/timer.h" + #include "block/graph-lock.h" ++#include "hw/qdev-core.h" ++ + + typedef struct BlockAIOCB BlockAIOCB; + typedef void BlockCompletionFunc(void *opaque, int ret); +@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + * is opaque and must be allocated prior to its use. + * + * @name: A human-readable identifier for debugging purposes. ++ * @reentrancy_guard: A guard set when entering a cb to prevent ++ * device-reentrancy issues + */ + QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, +- const char *name); ++ const char *name, MemReentrancyGuard *reentrancy_guard); + + /** + * aio_bh_new: Allocate a new bottom half structure +@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + * string. + */ + #define aio_bh_new(ctx, cb, opaque) \ +- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) ++ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) ++ ++/** ++ * aio_bh_new_guarded: Allocate a new bottom half structure with a ++ * reentrancy_guard ++ * ++ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name ++ * string. ++ */ ++#define aio_bh_new_guarded(ctx, cb, opaque, guard) \ ++ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) + + /** + * aio_notify: Force processing of pending events. +diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h +index b3e54e00bc..68e70e61aa 100644 +--- a/include/qemu/main-loop.h ++++ b/include/qemu/main-loop.h +@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); + + /* internal interfaces */ + ++#define qemu_bh_new_guarded(cb, opaque, guard) \ ++ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard) + #define qemu_bh_new(cb, opaque) \ +- qemu_bh_new_full((cb), (opaque), (stringify(cb))) +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); ++ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard); + void qemu_bh_schedule_idle(QEMUBH *bh); + + enum { +diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c +index f2bfcede93..8c9407c560 100644 +--- a/tests/unit/ptimer-test-stubs.c ++++ b/tests/unit/ptimer-test-stubs.c +@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) + return deadline; + } + +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard) + { + QEMUBH *bh = g_new(QEMUBH, 1); + +diff --git a/util/async.c b/util/async.c +index 21016a1ac7..a9b528c370 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -65,6 +65,7 @@ struct QEMUBH { + void *opaque; + QSLIST_ENTRY(QEMUBH) next; + unsigned flags; ++ MemReentrancyGuard *reentrancy_guard; + }; + + /* Called concurrently from any thread */ +@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, + } + + QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, +- const char *name) ++ const char *name, MemReentrancyGuard *reentrancy_guard) + { + QEMUBH *bh; + bh = g_new(QEMUBH, 1); +@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + .cb = cb, + .opaque = opaque, + .name = name, ++ .reentrancy_guard = reentrancy_guard, + }; + return bh; + } + + void aio_bh_call(QEMUBH *bh) + { ++ bool last_engaged_in_io = false; ++ ++ if (bh->reentrancy_guard) { ++ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; ++ if (bh->reentrancy_guard->engaged_in_io) { ++ trace_reentrant_aio(bh->ctx, bh->name); ++ } ++ bh->reentrancy_guard->engaged_in_io = true; ++ } ++ + bh->cb(bh->opaque); ++ ++ if (bh->reentrancy_guard) { ++ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; ++ } + } + + /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ +diff --git a/util/main-loop.c b/util/main-loop.c +index e180c85145..7022f02ef8 100644 +--- a/util/main-loop.c ++++ b/util/main-loop.c +@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) + + /* Functions to operate on the main QEMU AioContext. */ + +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard) + { +- return aio_bh_new_full(qemu_aio_context, cb, opaque, name); ++ return aio_bh_new_full(qemu_aio_context, cb, opaque, name, ++ reentrancy_guard); + } + + /* +diff --git a/util/trace-events b/util/trace-events +index 16f78d8fe5..3f7e766683 100644 +--- a/util/trace-events ++++ b/util/trace-events +@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" + # async.c + aio_co_schedule(void *ctx, void *co) "ctx %p co %p" + aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" ++reentrant_aio(void *ctx, const char *name) "ctx %p name %s" + + # thread-pool.c + thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" +-- +2.39.3 + diff --git a/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch b/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch new file mode 100644 index 0000000..df71fa2 --- /dev/null +++ b/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch @@ -0,0 +1,70 @@ +From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 7915bd06f25e1803778081161bf6fa10c42dc7cd +Author: Alexander Bulekov +Date: Mon May 1 10:19:56 2023 -0400 + + async: avoid use-after-free on re-entrancy guard + + A BH callback can free the BH, causing a use-after-free in aio_bh_call. + Fix that by keeping a local copy of the re-entrancy guard pointer. + + Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513 + Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API") + Signed-off-by: Alexander Bulekov + Message-Id: <20230501141956.3444868-1-alxndr@bu.edu> + Reviewed-by: Thomas Huth + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + util/async.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/util/async.c b/util/async.c +index a9b528c370..cd1a1815f9 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh) + { + bool last_engaged_in_io = false; + +- if (bh->reentrancy_guard) { +- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; +- if (bh->reentrancy_guard->engaged_in_io) { ++ /* Make a copy of the guard-pointer as cb may free the bh */ ++ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard; ++ if (reentrancy_guard) { ++ last_engaged_in_io = reentrancy_guard->engaged_in_io; ++ if (reentrancy_guard->engaged_in_io) { + trace_reentrant_aio(bh->ctx, bh->name); + } +- bh->reentrancy_guard->engaged_in_io = true; ++ reentrancy_guard->engaged_in_io = true; + } + + bh->cb(bh->opaque); + +- if (bh->reentrancy_guard) { +- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; ++ if (reentrancy_guard) { ++ reentrancy_guard->engaged_in_io = last_engaged_in_io; + } + } + +-- +2.39.3 + diff --git a/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch b/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch new file mode 100644 index 0000000..6d9abb8 --- /dev/null +++ b/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch @@ -0,0 +1,57 @@ +From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for + iomem + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:11 2023 -0400 + + bcm2835_property: disable reentrancy detection for iomem + + As the code is designed for re-entrant calls from bcm2835_property to + bcm2835_mbox and back into bcm2835_property, mark iomem as + reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-7-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/misc/bcm2835_property.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c +index 890ae7bae5..de056ea2df 100644 +--- a/hw/misc/bcm2835_property.c ++++ b/hw/misc/bcm2835_property.c +@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj) + + memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s, + TYPE_BCM2835_PROPERTY, 0x10); ++ ++ /* ++ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from ++ * iomem. As such, mark iomem as re-entracy safe. ++ */ ++ s->iomem.disable_reentrancy_guard = true; ++ + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); + sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq); + } +-- +2.39.3 + diff --git a/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch b/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch new file mode 100644 index 0000000..b6eebf3 --- /dev/null +++ b/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch @@ -0,0 +1,121 @@ +From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 1 May 2023 13:34:43 -0400 +Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by + default + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm) + +reader_count() is a performance bottleneck because the global +aio_context_list_lock mutex causes thread contention. Put this debugging +assertion behind a new ./configure --enable-debug-graph-lock option and +disable it by default. + +The --enable-debug-graph-lock option is also enabled by the more general +--enable-debug option. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230501173443.153062-1-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf) +Signed-off-by: Kevin Wolf +--- + block/graph-lock.c | 3 +++ + configure | 1 + + meson.build | 2 ++ + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 4 ++++ + 5 files changed, 12 insertions(+) + +diff --git a/block/graph-lock.c b/block/graph-lock.c +index 454c31e691..259a7a0bde 100644 +--- a/block/graph-lock.c ++++ b/block/graph-lock.c +@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void) + + void assert_bdrv_graph_readable(void) + { ++ /* reader_count() is slow due to aio_context_list_lock lock contention */ ++#ifdef CONFIG_DEBUG_GRAPH_LOCK + assert(qemu_in_main_thread() || reader_count()); ++#endif + } + + void assert_bdrv_graph_writable(void) +diff --git a/configure b/configure +index 800b5850f4..a62a3e6be9 100755 +--- a/configure ++++ b/configure +@@ -806,6 +806,7 @@ for opt do + --enable-debug) + # Enable debugging options that aren't excessively noisy + debug_tcg="yes" ++ meson_option_parse --enable-debug-graph-lock "" + meson_option_parse --enable-debug-mutex "" + meson_option_add -Doptimization=0 + fortify_source="no" +diff --git a/meson.build b/meson.build +index c44d05a13f..d964e741e7 100644 +--- a/meson.build ++++ b/meson.build +@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool + have_coroutine_pool = false + endif + config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool) ++config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock')) + config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex')) + config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage')) + config_host_data.set('CONFIG_GPROF', get_option('gprof')) +@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')} + summary_info += {'static build': config_host.has_key('CONFIG_STATIC')} + summary_info += {'malloc trim support': has_malloc_trim} + summary_info += {'membarrier': have_membarrier} ++summary_info += {'debug graph lock': get_option('debug_graph_lock')} + summary_info += {'debug stack usage': get_option('debug_stack_usage')} + summary_info += {'mutex debugging': get_option('debug_mutex')} + summary_info += {'memory allocator': get_option('malloc')} +diff --git a/meson_options.txt b/meson_options.txt +index fc9447d267..bc857fe68b 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false, + description: 'dummy RNG, avoid using /dev/(u)random and getrandom()') + option('coroutine_pool', type: 'boolean', value: true, + description: 'coroutine freelist (better performance)') ++option('debug_graph_lock', type: 'boolean', value: false, ++ description: 'graph lock debugging support') + option('debug_mutex', type: 'boolean', value: false, + description: 'mutex debugging support') + option('debug_stack_usage', type: 'boolean', value: false, +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 009fab1515..30e1f25259 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -21,6 +21,8 @@ meson_options_help() { + printf "%s\n" ' QEMU' + printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' + printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' ++ printf "%s\n" ' --enable-debug-graph-lock' ++ printf "%s\n" ' graph lock debugging support' + printf "%s\n" ' --enable-debug-mutex mutex debugging support' + printf "%s\n" ' --enable-debug-stack-usage' + printf "%s\n" ' measure coroutine stack usage' +@@ -249,6 +251,8 @@ _meson_option_parse() { + --datadir=*) quote_sh "-Ddatadir=$2" ;; + --enable-dbus-display) printf "%s" -Ddbus_display=enabled ;; + --disable-dbus-display) printf "%s" -Ddbus_display=disabled ;; ++ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;; ++ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;; + --enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;; + --disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;; + --enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;; +-- +2.39.3 + diff --git a/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch b/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch new file mode 100644 index 0000000..4173648 --- /dev/null +++ b/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch @@ -0,0 +1,55 @@ +From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit ef56ffbdd6b0605dc1e305611287b948c970e236 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:08 2023 -0400 + + checkpatch: add qemu_bh_new/aio_bh_new checks + + Advise authors to use the _guarded versions of the APIs, instead. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-4-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + scripts/checkpatch.pl | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl +index d768171dcf..eeaec436eb 100755 +--- a/scripts/checkpatch.pl ++++ b/scripts/checkpatch.pl +@@ -2865,6 +2865,14 @@ sub process { + if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { + ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); + } ++# recommend qemu_bh_new_guarded instead of qemu_bh_new ++ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) { ++ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr); ++ } ++# recommend aio_bh_new_guarded instead of aio_bh_new ++ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) { ++ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr); ++ } + # check for module_init(), use category-specific init macros explicitly please + if ($line =~ /^module_init\s*\(/) { + ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); +-- +2.39.3 + diff --git a/kvm-graph-lock-Disable-locking-for-now.patch b/kvm-graph-lock-Disable-locking-for-now.patch new file mode 100644 index 0000000..77086e5 --- /dev/null +++ b/kvm-graph-lock-Disable-locking-for-now.patch @@ -0,0 +1,153 @@ +From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:32 +0200 +Subject: [PATCH 02/21] graph-lock: Disable locking for now + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm) + +In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They +come from callers that hold an AioContext lock, which is not allowed +during polling. In theory, we could temporarily release the lock, but +callers are inconsistent about whether they hold a lock, and if they do, +some are also confused about which one they hold. While all of this is +fixable, it's not trivial, and the best course of action for 8.0.1 is +probably just disabling the graph locking code temporarily. + +We don't currently rely on graph locking yet. It is supposed to replace +the AioContext lock eventually to enable multiqueue support, but as long +as we still have the AioContext lock, it is sufficient without the graph +lock. Once the AioContext lock goes away, the deadlock doesn't exist any +more either and this commit can be reverted. (Of course, it can also be +reverted while the AioContext lock still exists if the callers have been +fixed.) + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-2-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d) +Signed-off-by: Kevin Wolf +--- + block/graph-lock.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/block/graph-lock.c b/block/graph-lock.c +index 259a7a0bde..2490926c90 100644 +--- a/block/graph-lock.c ++++ b/block/graph-lock.c +@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock; + /* Protects the list of aiocontext and orphaned_reader_count */ + static QemuMutex aio_context_list_lock; + ++#if 0 + /* Written and read with atomic operations. */ + static int has_writer; ++#endif + + /* + * A reader coroutine could move from an AioContext to another. +@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx) + g_free(ctx->bdrv_graph); + } + ++#if 0 + static uint32_t reader_count(void) + { + BdrvGraphRWlock *brdv_graph; +@@ -105,10 +108,17 @@ static uint32_t reader_count(void) + assert((int32_t)rd >= 0); + return rd; + } ++#endif + + void bdrv_graph_wrlock(void) + { + GLOBAL_STATE_CODE(); ++ /* ++ * TODO Some callers hold an AioContext lock when this is called, which ++ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or ++ * AioContext locks are gone). ++ */ ++#if 0 + assert(!qatomic_read(&has_writer)); + + /* Make sure that constantly arriving new I/O doesn't cause starvation */ +@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void) + } while (reader_count() >= 1); + + bdrv_drain_all_end(); ++#endif + } + + void bdrv_graph_wrunlock(void) + { + GLOBAL_STATE_CODE(); ++#if 0 + QEMU_LOCK_GUARD(&aio_context_list_lock); + assert(qatomic_read(&has_writer)); + +@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void) + + /* Wake up all coroutine that are waiting to read the graph */ + qemu_co_enter_all(&reader_queue, &aio_context_list_lock); ++#endif + } + + void coroutine_fn bdrv_graph_co_rdlock(void) + { ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + BdrvGraphRWlock *bdrv_graph; + bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; + +@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void) + qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); + } + } ++#endif + } + + void coroutine_fn bdrv_graph_co_rdunlock(void) + { ++#if 0 + BdrvGraphRWlock *bdrv_graph; + bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; + +@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void) + if (qatomic_read(&has_writer)) { + aio_wait_kick(); + } ++#endif + } + + void bdrv_graph_rdlock_main_loop(void) +@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void) + void assert_bdrv_graph_readable(void) + { + /* reader_count() is slow due to aio_context_list_lock lock contention */ ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + #ifdef CONFIG_DEBUG_GRAPH_LOCK + assert(qemu_in_main_thread() || reader_count()); + #endif ++#endif + } + + void assert_bdrv_graph_writable(void) + { + assert(qemu_in_main_thread()); ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + assert(qatomic_read(&has_writer)); ++#endif + } +-- +2.39.3 + diff --git a/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch b/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch new file mode 100644 index 0000000..164bea7 --- /dev/null +++ b/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch @@ -0,0 +1,118 @@ +From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 2 May 2023 21:27:02 -0300 +Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine + type < 8.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 +RH-Bugzilla: 2189423 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm) + +Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK +set for machine types < 8.0 will cause migration to fail if the target +QEMU version is < 8.0.0 : + +qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0 +qemu-system-x86_64: Failed to load PCIDevice:config +qemu-system-x86_64: Failed to load e1000e:parent_obj +qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e' +qemu-system-x86_64: load of migration failed: Invalid argument + +The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0, +with this cmdline: + +./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX] + +In order to fix this, property x-pcie-err-unc-mask was introduced to +control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by +default, but is disabled if machine type <= 7.2. + +Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register") +Suggested-by: Michael S. Tsirkin +Signed-off-by: Leonardo Bras +Message-Id: <20230503002701.854329-1-leobras@redhat.com> +Reviewed-by: Jonathan Cameron +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576 +Tested-by: Fiona Ebner +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f) +Signed-off-by: Leonardo Bras +--- + hw/core/machine.c | 1 + + hw/pci/pci.c | 2 ++ + hw/pci/pcie_aer.c | 11 +++++++---- + include/hw/pci/pci.h | 2 ++ + 4 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 0e0120b7f2..c28702b690 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = { + { "e1000e", "migrate-timadj", "off" }, + { "virtio-mem", "x-early-migration", "false" }, + { "migration", "x-preempt-pre-7-2", "true" }, ++ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, + }; + const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index def5000e7b..8ad4349e96 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -79,6 +79,8 @@ static Property pci_props[] = { + DEFINE_PROP_STRING("failover_pair_id", PCIDevice, + failover_pair_id), + DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), ++ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, ++ QEMU_PCIE_ERR_UNC_MASK_BITNR, true), + DEFINE_PROP_END_OF_LIST() + }; + +diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c +index 103667c368..374d593ead 100644 +--- a/hw/pci/pcie_aer.c ++++ b/hw/pci/pcie_aer.c +@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, + + pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, + PCI_ERR_UNC_SUPPORTED); +- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, +- PCI_ERR_UNC_MASK_DEFAULT); +- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, +- PCI_ERR_UNC_SUPPORTED); ++ ++ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { ++ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, ++ PCI_ERR_UNC_MASK_DEFAULT); ++ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, ++ PCI_ERR_UNC_SUPPORTED); ++ } + + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, + PCI_ERR_UNC_SEVERITY_DEFAULT); +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index d5a40cd058..6dc6742fc4 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -207,6 +207,8 @@ enum { + QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), + #define QEMU_PCIE_CXL_BITNR 10 + QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), ++#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11 ++ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), + }; + + typedef struct PCIINTxRoute { +-- +2.39.3 + diff --git a/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch b/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch new file mode 100644 index 0000000..08ee94f --- /dev/null +++ b/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch @@ -0,0 +1,470 @@ +From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with + qemu_bh_new_guarded + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit f63192b0544af5d3e4d5edfd85ab520fcf671377 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:09 2023 -0400 + + hw: replace most qemu_bh_new calls with qemu_bh_new_guarded + + This protects devices from bh->mmio reentrancy issues. + + Thanks: Thomas Huth for diagnosing OS X test failure. + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Michael S. Tsirkin + Reviewed-by: Paul Durrant + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-5-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/9pfs/xen-9p-backend.c | 5 ++++- + hw/block/dataplane/virtio-blk.c | 3 ++- + hw/block/dataplane/xen-block.c | 5 +++-- + hw/char/virtio-serial-bus.c | 3 ++- + hw/display/qxl.c | 9 ++++++--- + hw/display/virtio-gpu.c | 6 ++++-- + hw/ide/ahci.c | 3 ++- + hw/ide/ahci_internal.h | 1 + + hw/ide/core.c | 4 +++- + hw/misc/imx_rngc.c | 6 ++++-- + hw/misc/macio/mac_dbdma.c | 2 +- + hw/net/virtio-net.c | 3 ++- + hw/nvme/ctrl.c | 6 ++++-- + hw/scsi/mptsas.c | 3 ++- + hw/scsi/scsi-bus.c | 3 ++- + hw/scsi/vmw_pvscsi.c | 3 ++- + hw/usb/dev-uas.c | 3 ++- + hw/usb/hcd-dwc2.c | 3 ++- + hw/usb/hcd-ehci.c | 3 ++- + hw/usb/hcd-uhci.c | 2 +- + hw/usb/host-libusb.c | 6 ++++-- + hw/usb/redirect.c | 6 ++++-- + hw/usb/xen-usb.c | 3 ++- + hw/virtio/virtio-balloon.c | 5 +++-- + hw/virtio/virtio-crypto.c | 3 ++- + 25 files changed, 66 insertions(+), 33 deletions(-) + +diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c +index 74f3a05f88..0e266c552b 100644 +--- a/hw/9pfs/xen-9p-backend.c ++++ b/hw/9pfs/xen-9p-backend.c +@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev { + + int num_rings; + Xen9pfsRing *rings; ++ MemReentrancyGuard mem_reentrancy_guard; + } Xen9pfsDev; + + static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev); +@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) + xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data + + XEN_FLEX_RING_SIZE(ring_order); + +- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]); ++ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh, ++ &xen_9pdev->rings[i], ++ &xen_9pdev->mem_reentrancy_guard); + xen_9pdev->rings[i].out_cons = 0; + xen_9pdev->rings[i].out_size = 0; + xen_9pdev->rings[i].inprogress = false; +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index b28d81737e..a6202997ee 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + } else { + s->ctx = qemu_get_aio_context(); + } +- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); ++ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s, ++ &DEVICE(vdev)->mem_reentrancy_guard); + s->batch_notify_vqs = bitmap_new(conf->num_queues); + + *dataplane = s; +diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c +index 734da42ea7..d8bc39d359 100644 +--- a/hw/block/dataplane/xen-block.c ++++ b/hw/block/dataplane/xen-block.c +@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, + } else { + dataplane->ctx = qemu_get_aio_context(); + } +- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, +- dataplane); ++ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, ++ dataplane, ++ &DEVICE(xendev)->mem_reentrancy_guard); + + return dataplane; + } +diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c +index 7d4601cb5d..dd619f0731 100644 +--- a/hw/char/virtio-serial-bus.c ++++ b/hw/char/virtio-serial-bus.c +@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) + return; + } + +- port->bh = qemu_bh_new(flush_queued_data_bh, port); ++ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, ++ &dev->mem_reentrancy_guard); + port->elem = NULL; + } + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index 80ce1e9a93..f1c0eb7dfc 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp) + + qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); + +- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl); ++ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl, ++ &DEVICE(qxl)->mem_reentrancy_guard); + qxl_reset_state(qxl); + +- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl); +- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd); ++ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl, ++ &DEVICE(qxl)->mem_reentrancy_guard); ++ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd, ++ &DEVICE(qxl)->mem_reentrancy_guard); + } + + static void qxl_realize_primary(PCIDevice *dev, Error **errp) +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index 5e15c79b94..66ac9b6cc5 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) + + g->ctrl_vq = virtio_get_queue(vdev, 0); + g->cursor_vq = virtio_get_queue(vdev, 1); +- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g); +- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g); ++ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, ++ &qdev->mem_reentrancy_guard); ++ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, ++ &qdev->mem_reentrancy_guard); + QTAILQ_INIT(&g->reslist); + QTAILQ_INIT(&g->cmdq); + QTAILQ_INIT(&g->fenceq); +diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c +index 55902e1df7..4e76d6b191 100644 +--- a/hw/ide/ahci.c ++++ b/hw/ide/ahci.c +@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma) + ahci_write_fis_d2h(ad); + + if (ad->port_regs.cmd_issue && !ad->check_bh) { +- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); ++ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad, ++ &ad->mem_reentrancy_guard); + qemu_bh_schedule(ad->check_bh); + } + } +diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h +index 303fcd7235..2480455372 100644 +--- a/hw/ide/ahci_internal.h ++++ b/hw/ide/ahci_internal.h +@@ -321,6 +321,7 @@ struct AHCIDevice { + bool init_d2h_sent; + AHCICmdHdr *cur_cmd; + NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; ++ MemReentrancyGuard mem_reentrancy_guard; + }; + + struct AHCIPCIState { +diff --git a/hw/ide/core.c b/hw/ide/core.c +index 45d14a25e9..de48ff9f86 100644 +--- a/hw/ide/core.c ++++ b/hw/ide/core.c +@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim( + BlockCompletionFunc *cb, void *cb_opaque, void *opaque) + { + IDEState *s = opaque; ++ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; + TrimAIOCB *iocb; + + /* Paired with a decrement in ide_trim_bh_cb() */ +@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim( + + iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); + iocb->s = s; +- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); ++ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, ++ &DEVICE(dev)->mem_reentrancy_guard); + iocb->ret = 0; + iocb->qiov = qiov; + iocb->i = -1; +diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c +index 632c03779c..082c6980ad 100644 +--- a/hw/misc/imx_rngc.c ++++ b/hw/misc/imx_rngc.c +@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp) + sysbus_init_mmio(sbd, &s->iomem); + + sysbus_init_irq(sbd, &s->irq); +- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s); +- s->seed_bh = qemu_bh_new(imx_rngc_seed, s); ++ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s, ++ &dev->mem_reentrancy_guard); ++ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s, ++ &dev->mem_reentrancy_guard); + } + + static void imx_rngc_reset(DeviceState *dev) +diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c +index 43bb1f56ba..80a789f32b 100644 +--- a/hw/misc/macio/mac_dbdma.c ++++ b/hw/misc/macio/mac_dbdma.c +@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp) + { + DBDMAState *s = MAC_DBDMA(dev); + +- s->bh = qemu_bh_new(DBDMA_run_bh, s); ++ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard); + } + + static void mac_dbdma_class_init(ObjectClass *oc, void *data) +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 53e1c32643..447f669921 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) + n->vqs[index].tx_vq = + virtio_add_queue(vdev, n->net_conf.tx_queue_size, + virtio_net_handle_tx_bh); +- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); ++ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], ++ &DEVICE(vdev)->mem_reentrancy_guard); + } + + n->vqs[index].tx_waiting = 0; +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index ac24eeb5ed..e5a468975e 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, + QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); + } + +- sq->bh = qemu_bh_new(nvme_process_sq, sq); ++ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq, ++ &DEVICE(sq->ctrl)->mem_reentrancy_guard); + + if (n->dbbuf_enabled) { + sq->db_addr = n->dbbuf_dbs + (sqid << 3); +@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, + } + } + n->cq[cqid] = cq; +- cq->bh = qemu_bh_new(nvme_post_cqes, cq); ++ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq, ++ &DEVICE(cq->ctrl)->mem_reentrancy_guard); + } + + static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) +diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c +index c485da792c..3de288b454 100644 +--- a/hw/scsi/mptsas.c ++++ b/hw/scsi/mptsas.c +@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) + } + s->max_devices = MPTSAS_NUM_PORTS; + +- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); ++ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s, ++ &DEVICE(dev)->mem_reentrancy_guard); + + scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); + } +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index c97176110c..3c20b47ad0 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) + AioContext *ctx = blk_get_aio_context(s->conf.blk); + /* The reference is dropped in scsi_dma_restart_bh.*/ + object_ref(OBJECT(s)); +- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); ++ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, ++ &DEVICE(s)->mem_reentrancy_guard); + qemu_bh_schedule(s->bh); + } + } +diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c +index fa76696855..4de34536e9 100644 +--- a/hw/scsi/vmw_pvscsi.c ++++ b/hw/scsi/vmw_pvscsi.c +@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) + pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET); + } + +- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); ++ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s, ++ &DEVICE(pci_dev)->mem_reentrancy_guard); + + scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); + /* override default SCSI bus hotplug-handler, with pvscsi's one */ +diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c +index 88f99c05d5..f013ded91e 100644 +--- a/hw/usb/dev-uas.c ++++ b/hw/usb/dev-uas.c +@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) + + QTAILQ_INIT(&uas->results); + QTAILQ_INIT(&uas->requests); +- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); ++ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas, ++ &d->mem_reentrancy_guard); + + dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); + scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); +diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c +index 8755e9cbb0..a0c4e782b2 100644 +--- a/hw/usb/hcd-dwc2.c ++++ b/hw/usb/hcd-dwc2.c +@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp) + s->fi = USB_FRMINTVL - 1; + s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); +- s->async_bh = qemu_bh_new(dwc2_work_bh, s); ++ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s, ++ &dev->mem_reentrancy_guard); + + sysbus_init_irq(sbd, &s->irq); + } +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index d4da8dcb8d..c930c60921 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) + } + + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); +- s->async_bh = qemu_bh_new(ehci_work_bh, s); ++ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s, ++ &dev->mem_reentrancy_guard); + s->device = dev; + + s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 8ac1175ad2..77baaa7a6b 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL); + } + } +- s->bh = qemu_bh_new(uhci_bh, s); ++ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); + s->num_ports_vmstate = NB_PORTS; + QTAILQ_INIT(&s->queues); +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index 176868d345..f500db85ab 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque) + static void usb_host_nodev(USBHostDevice *s) + { + if (!s->bh_nodev) { +- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s); ++ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s, ++ &DEVICE(s)->mem_reentrancy_guard); + } + qemu_bh_schedule(s->bh_nodev); + } +@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id) + USBHostDevice *dev = opaque; + + if (!dev->bh_postld) { +- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev); ++ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); + } + qemu_bh_schedule(dev->bh_postld); + dev->bh_postld_pending = true; +diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c +index fd7df599bc..39fbaaab16 100644 +--- a/hw/usb/redirect.c ++++ b/hw/usb/redirect.c +@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp) + } + } + +- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); +- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev); ++ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); ++ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); + dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev); + + packet_id_queue_init(&dev->cancelled, dev, "cancelled"); +diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c +index 66cb3f7c24..38ee660a30 100644 +--- a/hw/usb/xen-usb.c ++++ b/hw/usb/xen-usb.c +@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev) + + QTAILQ_INIT(&usbif->req_free_q); + QSIMPLEQ_INIT(&usbif->hotplug_q); +- usbif->bh = qemu_bh_new(usbback_bh, usbif); ++ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif, ++ &DEVICE(xendev)->mem_reentrancy_guard); + } + + static int usbback_free(struct XenLegacyDevice *xendev) +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index 43092aa634..5186e831dd 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) + precopy_add_notifier(&s->free_page_hint_notify); + + object_ref(OBJECT(s->iothread)); +- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), +- virtio_ballloon_get_free_page_hints, s); ++ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread), ++ virtio_ballloon_get_free_page_hints, s, ++ &dev->mem_reentrancy_guard); + } + + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index 802e1b9659..2fe804510f 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) + vcrypto->vqs[i].dataq = + virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); + vcrypto->vqs[i].dataq_bh = +- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); ++ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], ++ &dev->mem_reentrancy_guard); + vcrypto->vqs[i].vcrypto = vcrypto; + } + +-- +2.39.3 + diff --git a/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch b/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch new file mode 100644 index 0000000..efa966e --- /dev/null +++ b/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch @@ -0,0 +1,141 @@ +From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 29 May 2023 14:21:08 -0400 +Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI + controller (CVE-2023-0330) + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit b987718bbb1d0eabf95499b976212dd5f0120d75 +Author: Thomas Huth +Date: Mon May 22 11:10:11 2023 +0200 + + hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330) + + We cannot use the generic reentrancy guard in the LSI code, so + we have to manually prevent endless reentrancy here. The problematic + lsi_execute_script() function has already a way to detect whether + too many instructions have been executed - we just have to slightly + change the logic here that it also takes into account if the function + has been called too often in a reentrant way. + + The code in fuzz-lsi53c895a-test.c has been taken from an earlier + patch by Mauro Matteo Cascella. + + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563 + Message-Id: <20230522091011.1082574-1-thuth@redhat.com> + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Alexander Bulekov + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 23 +++++++++++++++------ + tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++ + 2 files changed, 50 insertions(+), 6 deletions(-) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index 048436352b..f7d45b0b20 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s) + uint32_t addr, addr_high; + int opcode; + int insn_processed = 0; ++ static int reentrancy_level; ++ ++ reentrancy_level++; + + s->istat1 |= LSI_ISTAT1_SRUN; + again: +- if (++insn_processed > LSI_MAX_INSN) { +- /* Some windows drivers make the device spin waiting for a memory +- location to change. If we have been executed a lot of code then +- assume this is the case and force an unexpected device disconnect. +- This is apparently sufficient to beat the drivers into submission. +- */ ++ /* ++ * Some windows drivers make the device spin waiting for a memory location ++ * to change. If we have executed more than LSI_MAX_INSN instructions then ++ * assume this is the case and force an unexpected device disconnect. This ++ * is apparently sufficient to beat the drivers into submission. ++ * ++ * Another issue (CVE-2023-0330) can occur if the script is programmed to ++ * trigger itself again and again. Avoid this problem by stopping after ++ * being called multiple times in a reentrant way (8 is an arbitrary value ++ * which should be enough for all valid use cases). ++ */ ++ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) { + if (!(s->sien0 & LSI_SIST0_UDC)) { + qemu_log_mask(LOG_GUEST_ERROR, + "lsi_scsi: inf. loop with UDC masked"); +@@ -1596,6 +1605,8 @@ again: + } + } + trace_lsi_execute_script_stop(); ++ ++ reentrancy_level--; + } + + static uint8_t lsi_reg_readb(LSIState *s, int offset) +diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c +index 2012bd54b7..1b55928b9f 100644 +--- a/tests/qtest/fuzz-lsi53c895a-test.c ++++ b/tests/qtest/fuzz-lsi53c895a-test.c +@@ -8,6 +8,36 @@ + #include "qemu/osdep.h" + #include "libqtest.h" + ++/* ++ * This used to trigger a DMA reentrancy issue ++ * leading to memory corruption bugs like stack ++ * overflow or use-after-free ++ * https://gitlab.com/qemu-project/qemu/-/issues/1563 ++ */ ++static void test_lsi_dma_reentrancy(void) ++{ ++ QTestState *s; ++ ++ s = qtest_init("-M q35 -m 512M -nodefaults " ++ "-blockdev driver=null-co,node-name=null0 " ++ "-device lsi53c810 -device scsi-cd,drive=null0"); ++ ++ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */ ++ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */ ++ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */ ++ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/ ++ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */ ++ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/ ++ qtest_writel(s, 0xff000000, 0xc0000024); ++ qtest_writel(s, 0xff000114, 0x00000080); ++ qtest_writel(s, 0xff00012c, 0xff000000); ++ qtest_writel(s, 0xff000004, 0xff000114); ++ qtest_writel(s, 0xff000008, 0xff100014); ++ qtest_writel(s, 0xff10002f, 0x000000ff); ++ ++ qtest_quit(s); ++} ++ + /* + * This used to trigger a UAF in lsi_do_msgout() + * https://gitlab.com/qemu-project/qemu/-/issues/972 +@@ -124,5 +154,8 @@ int main(int argc, char **argv) + qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req", + test_lsi_do_msgout_cancel_req); + ++ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy", ++ test_lsi_dma_reentrancy); ++ + return g_test_run(); + } +-- +2.39.3 + diff --git a/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch b/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch new file mode 100644 index 0000000..1fc5697 --- /dev/null +++ b/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch @@ -0,0 +1,144 @@ +From 399bfc04fb8352af6d2f4c984e68c334d2043368 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:34 +0200 +Subject: [PATCH 04/21] iotests: Test commit with iothreads and ongoing I/O + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] 1e42fde5951ae12bddc4eea2320f066f7079878f (kmwolf/centos-qemu-kvm) + +This tests exercises graph locking, draining, and graph modifications +with AioContext switches a lot. Amongst others, it serves as a +regression test for bdrv_graph_wrlock() deadlocking because it is called +with a locked AioContext and for AioContext handling in the NBD server. + +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-4-kwolf@redhat.com> +Tested-by: Eric Blake +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 95fdd8db61848d31fde1d9b32da7f3f76babfa25) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/iotests.py | 4 ++ + .../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++-- + .../tests/graph-changes-while-io.out | 4 +- + 3 files changed, 58 insertions(+), 6 deletions(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 3e82c634cf..7073579a7d 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ + assert self._qmp is not None + return self._qmp.cmd(cmd, args) + ++ def get_qmp(self) -> QEMUMonitorProtocol: ++ assert self._qmp is not None ++ return self._qmp ++ + def stop(self, kill_signal=15): + self._p.send_signal(kill_signal) + self._p.wait() +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io +index 7664f33689..750e7d4d38 100755 +--- a/tests/qemu-iotests/tests/graph-changes-while-io ++++ b/tests/qemu-iotests/tests/graph-changes-while-io +@@ -22,19 +22,19 @@ + import os + from threading import Thread + import iotests +-from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ +- QemuStorageDaemon ++from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ ++ QMPTestCase, QemuStorageDaemon + + + top = os.path.join(iotests.test_dir, 'top.img') + nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') + + +-def do_qemu_img_bench() -> None: ++def do_qemu_img_bench(count: int = 2000000) -> None: + """ + Do some I/O requests on `nbd_sock`. + """ +- qemu_img('bench', '-f', 'raw', '-c', '2000000', ++ qemu_img('bench', '-f', 'raw', '-c', str(count), + f'nbd+unix:///node0?socket={nbd_sock}') + + +@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase): + + bench_thr.join() + ++ def test_commit_while_io(self) -> None: ++ # Run qemu-img bench in the background ++ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, )) ++ bench_thr.start() ++ ++ qemu_io('-c', 'write 0 64k', top) ++ qemu_io('-c', 'write 128k 64k', top) ++ ++ result = self.qsd.qmp('blockdev-add', { ++ 'driver': imgfmt, ++ 'node-name': 'overlay', ++ 'backing': None, ++ 'file': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.qsd.qmp('blockdev-snapshot', { ++ 'node': 'node0', ++ 'overlay': 'overlay', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ # While qemu-img bench is running, repeatedly commit overlay to node0 ++ while bench_thr.is_alive(): ++ result = self.qsd.qmp('block-commit', { ++ 'job-id': 'job0', ++ 'device': 'overlay', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.qsd.qmp('block-job-cancel', { ++ 'device': 'job0', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ cancelled = False ++ while not cancelled: ++ for event in self.qsd.get_qmp().get_events(wait=10.0): ++ if event['event'] != 'JOB_STATUS_CHANGE': ++ continue ++ if event['data']['status'] == 'null': ++ cancelled = True ++ ++ bench_thr.join() ++ + if __name__ == '__main__': + # Format must support raw backing files + iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out +index ae1213e6f8..fbc63e62f8 100644 +--- a/tests/qemu-iotests/tests/graph-changes-while-io.out ++++ b/tests/qemu-iotests/tests/graph-changes-while-io.out +@@ -1,5 +1,5 @@ +-. ++.. + ---------------------------------------------------------------------- +-Ran 1 tests ++Ran 2 tests + + OK +-- +2.39.3 + diff --git a/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch b/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch new file mode 100644 index 0000000..c1100a5 --- /dev/null +++ b/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch @@ -0,0 +1,53 @@ +From 6de2f37d9a5db6578554929227377e4fd6d2feb3 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 14/21] loongarch: mark loongarch_ipi_iocsr re-entrnacy safe + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/13] 02435b9148b906960137de32eb5a3c4961e44a57 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 6d0589e0e6c64b888864a2bf980537be20389264 +Author: Alexander Bulekov +Date: Sat May 6 07:21:45 2023 -0400 + + loongarch: mark loongarch_ipi_iocsr re-entrnacy safe + + loongarch_ipi_iocsr MRs rely on re-entrant IO through the ipi_send + function. As such, mark these MRs re-entrancy-safe. + + Fixes: a2e1753b80 ("memory: prevent dma-reentracy issues") + Signed-off-by: Alexander Bulekov + Reviewed-by: Song Gao + Message-Id: <20230506112145.3563708-1-alxndr@bu.edu> + Signed-off-by: Song Gao + +Signed-off-by: Jon Maloy +--- + hw/intc/loongarch_ipi.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c +index aa4bf9eb74..40e98af2ce 100644 +--- a/hw/intc/loongarch_ipi.c ++++ b/hw/intc/loongarch_ipi.c +@@ -215,6 +215,10 @@ static void loongarch_ipi_init(Object *obj) + for (cpu = 0; cpu < MAX_IPI_CORE_NUM; cpu++) { + memory_region_init_io(&s->ipi_iocsr_mem[cpu], obj, &loongarch_ipi_ops, + &lams->ipi_core[cpu], "loongarch_ipi_iocsr", 0x48); ++ ++ /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ ++ s->ipi_iocsr_mem[cpu].disable_reentrancy_guard = true; ++ + sysbus_init_mmio(sbd, &s->ipi_iocsr_mem[cpu]); + + memory_region_init_io(&s->ipi64_iocsr_mem[cpu], obj, &loongarch_ipi64_ops, +-- +2.39.3 + diff --git a/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch b/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch new file mode 100644 index 0000000..359d53f --- /dev/null +++ b/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch @@ -0,0 +1,70 @@ +From 0660a7a6994db0db9f6d0b84f6345aa06dc61761 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 29 May 2023 14:21:08 -0400 +Subject: [PATCH 16/21] lsi53c895a: disable reentrancy detection for MMIO + region, too + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [12/13] fb9da8b68cdf0dc0b0bd8fb8540849c944d0bf20 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit d139fe9ad8a27bcc50b4ead77d2f97d191a0e95e +Author: Thomas Huth +Date: Tue May 16 11:05:56 2023 +0200 + + lsi53c895a: disable reentrancy detection for MMIO region, too + + While trying to use a SCSI disk on the LSI controller with an + older version of Fedora (25), I'm getting: + + qemu: warning: Blocked re-entrant IO on MemoryRegion: lsi-mmio at addr: 0x34 + + and the SCSI controller is not usable. Seems like we have to + disable the reentrancy checker for the MMIO region, too, to + get this working again. + + The problem could be reproduced it like this: + + ./qemu-system-x86_64 -accel kvm -m 2G -machine q35 \ + -device lsi53c810,id=lsi1 -device scsi-hd,drive=d0 \ + -drive if=none,id=d0,file=.../somedisk.qcow2 \ + -cdrom Fedora-Everything-netinst-i386-25-1.3.iso + + Where somedisk.qcow2 is an image that contains already some partitions + and file systems. + + In the boot menu of Fedora, go to + "Troubleshooting" -> "Rescue a Fedora system" -> "3) Skip to shell" + + Then check "dmesg | grep -i 53c" for failure messages, and try to mount + a partition from somedisk.qcow2. + + Message-Id: <20230516090556.553813-1-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index db27872963..048436352b 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -2307,6 +2307,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) + * re-entrancy guard. + */ + s->ram_io.disable_reentrancy_guard = true; ++ s->mmio_io.disable_reentrancy_guard = true; + + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); + qdev_init_gpio_out(d, &s->ext_irq, 1); +-- +2.39.3 + diff --git a/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch b/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch new file mode 100644 index 0000000..e671c92 --- /dev/null +++ b/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch @@ -0,0 +1,58 @@ +From 621808c6c4da3adcc073231493d487d6360386c9 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 09/21] lsi53c895a: disable reentrancy detection for script RAM + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/13] 765d65fc3fb735eb4b52a408ccff91b538ad32b6 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:10 2023 -0400 + + lsi53c895a: disable reentrancy detection for script RAM + + As the code is designed to use the memory APIs to access the script ram, + disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion. + + In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion. + + Reported-by: Fiona Ebner + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-6-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index af93557a9a..db27872963 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) + memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, + "lsi-io", 256); + ++ /* ++ * Since we use the address-space API to interact with ram_io, disable the ++ * re-entrancy guard. ++ */ ++ s->ram_io.disable_reentrancy_guard = true; ++ + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); + qdev_init_gpio_out(d, &s->ext_irq, 1); + +-- +2.39.3 + diff --git a/kvm-memory-prevent-dma-reentracy-issues.patch b/kvm-memory-prevent-dma-reentracy-issues.patch new file mode 100644 index 0000000..d3697dc --- /dev/null +++ b/kvm-memory-prevent-dma-reentracy-issues.patch @@ -0,0 +1,150 @@ +From 0bc9295be331781491e993b6f1b0dca959194f13 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 05/21] memory: prevent dma-reentracy issues + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/13] d4a762d3b156200a65d09cde58cd6d77b229071e (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 +CVE: CVE-2023-0330 + +commit a2e1753b8054344f32cf94f31c6399a58794a380 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:06 2023 -0400 + + memory: prevent dma-reentracy issues + + Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. + This flag is set/checked prior to calling a device's MemoryRegion + handlers, and set when device code initiates DMA. The purpose of this + flag is to prevent two types of DMA-based reentrancy issues: + + 1.) mmio -> dma -> mmio case + 2.) bh -> dma write -> mmio case + + These issues have led to problems such as stack-exhaustion and + use-after-frees. + + Summary of the problem from Peter Maydell: + https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com + + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 + Resolves: CVE-2023-0330 + + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-2-alxndr@bu.edu> + [thuth: Replace warn_report() with warn_report_once()] + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + include/exec/memory.h | 5 +++++ + include/hw/qdev-core.h | 7 +++++++ + softmmu/memory.c | 16 ++++++++++++++++ + 3 files changed, 28 insertions(+) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 15ade918ba..e45ce6061f 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -767,6 +767,8 @@ struct MemoryRegion { + bool is_iommu; + RAMBlock *ram_block; + Object *owner; ++ /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ ++ DeviceState *dev; + + const MemoryRegionOps *ops; + void *opaque; +@@ -791,6 +793,9 @@ struct MemoryRegion { + unsigned ioeventfd_nb; + MemoryRegionIoeventfd *ioeventfds; + RamDiscardManager *rdm; /* Only for RAM */ ++ ++ /* For devices designed to perform re-entrant IO into their own IO MRs */ ++ bool disable_reentrancy_guard; + }; + + struct IOMMUMemoryRegion { +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index bd50ad5ee1..7623703943 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -162,6 +162,10 @@ struct NamedClockList { + QLIST_ENTRY(NamedClockList) node; + }; + ++typedef struct { ++ bool engaged_in_io; ++} MemReentrancyGuard; ++ + /** + * DeviceState: + * @realized: Indicates whether the device has been fully constructed. +@@ -194,6 +198,9 @@ struct DeviceState { + int alias_required_for_version; + ResettableState reset; + GSList *unplug_blockers; ++ ++ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ ++ MemReentrancyGuard mem_reentrancy_guard; + }; + + struct DeviceListener { +diff --git a/softmmu/memory.c b/softmmu/memory.c +index b1a6cae6f5..b7b3386e9d 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_size_max = 4; + } + ++ /* Do not allow more than one simultaneous access to a device's IO Regions */ ++ if (mr->dev && !mr->disable_reentrancy_guard && ++ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { ++ if (mr->dev->mem_reentrancy_guard.engaged_in_io) { ++ warn_report_once("Blocked re-entrant IO on MemoryRegion: " ++ "%s at addr: 0x%" HWADDR_PRIX, ++ memory_region_name(mr), addr); ++ return MEMTX_ACCESS_ERROR; ++ } ++ mr->dev->mem_reentrancy_guard.engaged_in_io = true; ++ } ++ + /* FIXME: support unaligned access? */ + access_size = MAX(MIN(size, access_size_max), access_size_min); + access_mask = MAKE_64BIT_MASK(0, access_size * 8); +@@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_mask, attrs); + } + } ++ if (mr->dev) { ++ mr->dev->mem_reentrancy_guard.engaged_in_io = false; ++ } + return r; + } + +@@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, + } + mr->name = g_strdup(name); + mr->owner = owner; ++ mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); + mr->ram_block = NULL; + + if (name) { +-- +2.39.3 + diff --git a/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch b/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch new file mode 100644 index 0000000..f45abea --- /dev/null +++ b/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch @@ -0,0 +1,67 @@ +From 3f2042e33acb6db91594e12ebd63b9abd9e753cc Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 7 Jun 2023 11:45:09 -0400 +Subject: [PATCH 15/21] memory: stricter checks prior to unsetting + engaged_in_io + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [11/13] b8e1a4b49dd7fa3b7948d32f46dfe1d7f7a4c1cf (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 3884bf6468ac6bbb58c2b3feaa74e87f821b52f3 +Author: Alexander Bulekov +Date: Tue May 16 04:40:02 2023 -0400 + + memory: stricter checks prior to unsetting engaged_in_io + + engaged_in_io could be unset by an MR with re-entrancy checks disabled. + Ensure that only MRs that can set the engaged_in_io flag can unset it. + + Signed-off-by: Alexander Bulekov + Message-Id: <20230516084002.3813836-1-alxndr@bu.edu> + Reviewed-by: Darren Kenny + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + softmmu/memory.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/softmmu/memory.c b/softmmu/memory.c +index b7b3386e9d..26424f1d78 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -534,6 +534,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + unsigned access_size; + unsigned i; + MemTxResult r = MEMTX_OK; ++ bool reentrancy_guard_applied = false; + + if (!access_size_min) { + access_size_min = 1; +@@ -552,6 +553,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + return MEMTX_ACCESS_ERROR; + } + mr->dev->mem_reentrancy_guard.engaged_in_io = true; ++ reentrancy_guard_applied = true; + } + + /* FIXME: support unaligned access? */ +@@ -568,7 +570,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_mask, attrs); + } + } +- if (mr->dev) { ++ if (mr->dev && reentrancy_guard_applied) { + mr->dev->mem_reentrancy_guard.engaged_in_io = false; + } + return r; +-- +2.39.3 + diff --git a/kvm-multifd-Fix-the-number-of-channels-ready.patch b/kvm-multifd-Fix-the-number-of-channels-ready.patch new file mode 100644 index 0000000..abf21e6 --- /dev/null +++ b/kvm-multifd-Fix-the-number-of-channels-ready.patch @@ -0,0 +1,58 @@ +From af6f2a543c7db6d67d33fd12615a50e57fc3fe66 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 26 Apr 2023 12:20:36 +0200 +Subject: [PATCH 19/21] multifd: Fix the number of channels ready +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 171: multifd: Fix the number of channels ready +RH-Bugzilla: 2196289 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] a5e271ba249d85b27a68d3cff10480ca3a112c5d (LeoBras/centos-qemu-kvm) + +We don't wait in the sem when we are doing a sync_main. Make it wait +there. To make things clearer, we mark the channel ready at the +begining of the thread loop. + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit d2026ee117147893f8d80f060cede6d872ecbd7f) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cce3ad6988..6a59c03dd2 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -635,6 +635,7 @@ int multifd_send_sync_main(QEMUFile *f) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + ++ qemu_sem_wait(&multifd_send_state->channels_ready); + trace_multifd_send_sync_main_wait(p->id); + qemu_sem_wait(&p->sem_sync); + +@@ -668,6 +669,7 @@ static void *multifd_send_thread(void *opaque) + p->num_packets = 1; + + while (true) { ++ qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_wait(&p->sem); + + if (qatomic_read(&multifd_send_state->exiting)) { +@@ -736,7 +738,6 @@ static void *multifd_send_thread(void *opaque) + if (flags & MULTIFD_FLAG_SYNC) { + qemu_sem_post(&p->sem_sync); + } +- qemu_sem_post(&multifd_send_state->channels_ready); + } else if (p->quit) { + qemu_mutex_unlock(&p->mutex); + break; +-- +2.39.3 + diff --git a/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch b/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch new file mode 100644 index 0000000..214b6dd --- /dev/null +++ b/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch @@ -0,0 +1,159 @@ +From 639f65d2cd4c6627a1d22c4b418b41400fe40154 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:33 +0200 +Subject: [PATCH 03/21] nbd/server: Fix drained_poll to wake coroutine in right + AioContext + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 177092e61360c2feb04377890b32fdeb2d1cfefc (kmwolf/centos-qemu-kvm) + +nbd_drained_poll() generally runs in the main thread, not whatever +iothread the NBD server coroutine is meant to run in, so it can't +directly reenter the coroutines to wake them up. + +The code seems to have the right intention, it specifies the correct +AioContext when it calls qemu_aio_coroutine_enter(). However, this +functions doesn't schedule the coroutine to run in that AioContext, but +it assumes it is already called in the home thread of the AioContext. + +To fix this, add a new thread-safe qio_channel_wake_read() that can be +called in the main thread to wake up the coroutine in its AioContext, +and use this in nbd_drained_poll(). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-3-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 7c1f51bf38de8cea4ed5030467646c37b46edeb7) +Signed-off-by: Kevin Wolf +--- + include/io/channel.h | 10 ++++++++++ + io/channel.c | 33 +++++++++++++++++++++++++++------ + nbd/server.c | 3 +-- + 3 files changed, 38 insertions(+), 8 deletions(-) + +diff --git a/include/io/channel.h b/include/io/channel.h +index 153fbd2904..2b905423a9 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -757,6 +757,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc); + void coroutine_fn qio_channel_yield(QIOChannel *ioc, + GIOCondition condition); + ++/** ++ * qio_channel_wake_read: ++ * @ioc: the channel object ++ * ++ * If qio_channel_yield() is currently waiting for the channel to become ++ * readable, interrupt it and reenter immediately. This function is safe to call ++ * from any thread. ++ */ ++void qio_channel_wake_read(QIOChannel *ioc); ++ + /** + * qio_channel_wait: + * @ioc: the channel object +diff --git a/io/channel.c b/io/channel.c +index a8c7f11649..3c9b7beb65 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include "block/aio-wait.h" + #include "io/channel.h" + #include "qapi/error.h" + #include "qemu/main-loop.h" +@@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc, + static void qio_channel_restart_read(void *opaque) + { + QIOChannel *ioc = opaque; +- Coroutine *co = ioc->read_coroutine; ++ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); ++ ++ if (!co) { ++ return; ++ } + + /* Assert that aio_co_wake() reenters the coroutine directly */ + assert(qemu_get_current_aio_context() == +@@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque) + static void qio_channel_restart_write(void *opaque) + { + QIOChannel *ioc = opaque; +- Coroutine *co = ioc->write_coroutine; ++ Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL); ++ ++ if (!co) { ++ return; ++ } + + /* Assert that aio_co_wake() reenters the coroutine directly */ + assert(qemu_get_current_aio_context() == +@@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc) + void coroutine_fn qio_channel_yield(QIOChannel *ioc, + GIOCondition condition) + { ++ AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context(); ++ + assert(qemu_in_coroutine()); ++ assert(in_aio_context_home_thread(ioc_ctx)); ++ + if (condition == G_IO_IN) { + assert(!ioc->read_coroutine); + ioc->read_coroutine = qemu_coroutine_self(); +@@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, + } + qio_channel_set_aio_fd_handlers(ioc); + qemu_coroutine_yield(); ++ assert(in_aio_context_home_thread(ioc_ctx)); + + /* Allow interrupting the operation by reentering the coroutine other than + * through the aio_fd_handlers. */ +- if (condition == G_IO_IN && ioc->read_coroutine) { +- ioc->read_coroutine = NULL; ++ if (condition == G_IO_IN) { ++ assert(ioc->read_coroutine == NULL); + qio_channel_set_aio_fd_handlers(ioc); +- } else if (condition == G_IO_OUT && ioc->write_coroutine) { +- ioc->write_coroutine = NULL; ++ } else if (condition == G_IO_OUT) { ++ assert(ioc->write_coroutine == NULL); + qio_channel_set_aio_fd_handlers(ioc); + } + } + ++void qio_channel_wake_read(QIOChannel *ioc) ++{ ++ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); ++ if (co) { ++ aio_co_wake(co); ++ } ++} + + static gboolean qio_channel_wait_complete(QIOChannel *ioc, + GIOCondition condition, +diff --git a/nbd/server.c b/nbd/server.c +index 3d8d0d81df..ea47522e8f 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque) + * enter it here so we don't depend on the client to wake it up. + */ + if (client->recv_coroutine != NULL && client->read_yielding) { +- qemu_aio_coroutine_enter(exp->common.ctx, +- client->recv_coroutine); ++ qio_channel_wake_read(client->ioc); + } + + return true; +-- +2.39.3 + diff --git a/kvm-raven-disable-reentrancy-detection-for-iomem.patch b/kvm-raven-disable-reentrancy-detection-for-iomem.patch new file mode 100644 index 0000000..4a4a2cc --- /dev/null +++ b/kvm-raven-disable-reentrancy-detection-for-iomem.patch @@ -0,0 +1,54 @@ +From 936e21428a04524ccffeb36110d1aa61de9f44e5 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 11/21] raven: disable reentrancy detection for iomem + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/13] 48278583aa1ab08b912f49cd8b3a79d1bb3abf5f (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 6dad5a6810d9c60ca320d01276f6133bbcfa1fc7 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:12 2023 -0400 + + raven: disable reentrancy detection for iomem + + As the code is designed for re-entrant calls from raven_io_ops to + pci-conf, mark raven_io_ops as reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Message-Id: <20230427211013.2994127-8-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/pci-host/raven.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c +index 072ffe3c5e..9a11ac4b2b 100644 +--- a/hw/pci-host/raven.c ++++ b/hw/pci-host/raven.c +@@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj) + memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); + address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); + ++ /* ++ * Raven's raven_io_ops use the address-space API to access pci-conf-idx ++ * (which is also owned by the raven device). As such, mark the ++ * pci_io_non_contiguous as re-entrancy safe. ++ */ ++ s->pci_io_non_contiguous.disable_reentrancy_guard = true; ++ + /* CPU address space */ + memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, + &s->pci_io); +-- +2.39.3 + diff --git a/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch b/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch new file mode 100644 index 0000000..ecf1353 --- /dev/null +++ b/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch @@ -0,0 +1,129 @@ +From 3cab2a638a10ece2b76d9f33a3c5dc6f64f1bbaa Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Wed, 10 May 2023 12:55:31 +0200 +Subject: [PATCH 21/21] s390x/pv: Fix spurious warning with asynchronous + teardown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests +RH-Bugzilla: 2168500 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cédric Le Goater +RH-Commit: [2/2] cb690d3155ea22c6df00a4d75b72f501515e5556 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 + +Kernel commit 292a7d6fca33 ("KVM: s390: pv: fix asynchronous teardown +for small VMs") causes the KVM_PV_ASYNC_CLEANUP_PREPARE ioctl to fail +if the VM is not larger than 2GiB. QEMU would attempt it and fail, +print an error message, and then proceed with a normal teardown. + +Avoid attempting to use asynchronous teardown altogether when the VM is +not larger than 2 GiB. This will avoid triggering the error message and +also avoid pointless overhead; normal teardown is fast enough for small +VMs. + +Reported-by: Marc Hartmayer +Fixes: c3a073c610 ("s390x/pv: Add support for asynchronous teardown for reboot") +Link: https://lore.kernel.org/all/20230421085036.52511-2-imbrenda@linux.ibm.com/ +Signed-off-by: Claudio Imbrenda +Message-Id: <20230510105531.30623-2-imbrenda@linux.ibm.com> +Reviewed-by: Thomas Huth +[thuth: Fix inline function parameter in pv.h] +Signed-off-by: Thomas Huth +(cherry picked from commit 88693ab2a53f2f3d25cb39a7b5034ab391bc5a81) +--- + hw/s390x/pv.c | 10 ++++++++-- + hw/s390x/s390-virtio-ccw.c | 2 +- + include/hw/s390x/pv.h | 6 +++--- + 3 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 49ea38236c..b63f3784c6 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -13,6 +13,7 @@ + + #include + ++#include "qemu/units.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/kvm.h" +@@ -115,7 +116,7 @@ static void *s390_pv_do_unprot_async_fn(void *p) + return NULL; + } + +-bool s390_pv_vm_try_disable_async(void) ++bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) + { + /* + * t is only needed to create the thread; once qemu_thread_create +@@ -123,7 +124,12 @@ bool s390_pv_vm_try_disable_async(void) + */ + QemuThread t; + +- if (!kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { ++ /* ++ * If the feature is not present or if the VM is not larger than 2 GiB, ++ * KVM_PV_ASYNC_CLEANUP_PREPARE fill fail; no point in attempting it. ++ */ ++ if ((MACHINE(ms)->maxram_size <= 2 * GiB) || ++ !kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { + return false; + } + if (s390_pv_cmd(KVM_PV_ASYNC_CLEANUP_PREPARE, NULL) != 0) { +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 6a0b93c63d..d95c595f88 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -330,7 +330,7 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) + + static void s390_machine_unprotect(S390CcwMachineState *ms) + { +- if (!s390_pv_vm_try_disable_async()) { ++ if (!s390_pv_vm_try_disable_async(ms)) { + s390_pv_vm_disable(); + } + ms->pv = false; +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index 966306a9db..7b935e2246 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -14,10 +14,10 @@ + + #include "qapi/error.h" + #include "sysemu/kvm.h" ++#include "hw/s390x/s390-virtio-ccw.h" + + #ifdef CONFIG_KVM + #include "cpu.h" +-#include "hw/s390x/s390-virtio-ccw.h" + + static inline bool s390_is_pv(void) + { +@@ -41,7 +41,7 @@ static inline bool s390_is_pv(void) + int s390_pv_query_info(void); + int s390_pv_vm_enable(void); + void s390_pv_vm_disable(void); +-bool s390_pv_vm_try_disable_async(void); ++bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); + int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); + int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); + void s390_pv_prep_reset(void); +@@ -61,7 +61,7 @@ static inline bool s390_is_pv(void) { return false; } + static inline int s390_pv_query_info(void) { return 0; } + static inline int s390_pv_vm_enable(void) { return 0; } + static inline void s390_pv_vm_disable(void) {} +-static inline bool s390_pv_vm_try_disable_async(void) { return false; } ++static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } + static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } + static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } + static inline void s390_pv_prep_reset(void) {} +-- +2.39.3 + diff --git a/kvm-util-async-teardown-wire-up-query-command-line-optio.patch b/kvm-util-async-teardown-wire-up-query-command-line-optio.patch new file mode 100644 index 0000000..8c468d8 --- /dev/null +++ b/kvm-util-async-teardown-wire-up-query-command-line-optio.patch @@ -0,0 +1,180 @@ +From c1502b0cd16378d6d5bd4259b90bf81a5fb5aad3 Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Fri, 5 May 2023 14:00:51 +0200 +Subject: [PATCH 20/21] util/async-teardown: wire up query-command-line-options +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests +RH-Bugzilla: 2168500 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/2] 76e5f25df2c02721f5a29f552ee3061be589abb2 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 + +Add new -run-with option with an async-teardown=on|off parameter. It is +visible in the output of query-command-line-options QMP command, so it +can be discovered and used by libvirt. + +The option -async-teardown is now redundant, deprecate it. + +Reported-by: Boris Fiuczynski +Fixes: c891c24b1a ("os-posix: asynchronous teardown for shutdown on Linux") +Signed-off-by: Claudio Imbrenda +Message-Id: <20230505120051.36605-2-imbrenda@linux.ibm.com> +[thuth: Add curly braces to fix error with GCC 8.5, fix bug in deprecated.rst] +Signed-off-by: Thomas Huth + +(cherry picked from commit 80bd81cadd127c1e2fc784612a52abe392670ba4) +Conflicts: + docs/about/deprecated.rst (missing context from other patches) +Signed-off-by: Thomas Huth +--- + docs/about/deprecated.rst | 5 +++++ + os-posix.c | 14 ++++++++++++++ + qemu-options.hx | 34 +++++++++++++++++++++++----------- + util/async-teardown.c | 21 +++++++++++++++++++++ + 4 files changed, 63 insertions(+), 11 deletions(-) + +diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst +index 1ca9dc33d6..52893fcf38 100644 +--- a/docs/about/deprecated.rst ++++ b/docs/about/deprecated.rst +@@ -111,6 +111,11 @@ Use ``-machine acpi=off`` instead. + The HAXM project has been retired (see https://github.com/intel/haxm#status). + Use "whpx" (on Windows) or "hvf" (on macOS) instead. + ++``-async-teardown`` (since 8.1) ++''''''''''''''''''''''''''''''' ++ ++Use ``-run-with async-teardown=on`` instead. ++ + + QEMU Machine Protocol (QMP) commands + ------------------------------------ +diff --git a/os-posix.c b/os-posix.c +index 5adc69f560..90ea71725f 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -36,6 +36,8 @@ + #include "qemu/log.h" + #include "sysemu/runstate.h" + #include "qemu/cutils.h" ++#include "qemu/config-file.h" ++#include "qemu/option.h" + + #ifdef CONFIG_LINUX + #include +@@ -152,9 +154,21 @@ int os_parse_cmd_args(int index, const char *optarg) + daemonize = 1; + break; + #if defined(CONFIG_LINUX) ++ /* deprecated */ + case QEMU_OPTION_asyncteardown: + init_async_teardown(); + break; ++ case QEMU_OPTION_run_with: { ++ QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"), ++ optarg, false); ++ if (!opts) { ++ exit(1); ++ } ++ if (qemu_opt_get_bool(opts, "async-teardown", false)) { ++ init_async_teardown(); ++ } ++ break; ++ } + #endif + default: + return -1; +diff --git a/qemu-options.hx b/qemu-options.hx +index 52b49f1f6a..b18f933703 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -4766,20 +4766,32 @@ DEF("qtest-log", HAS_ARG, QEMU_OPTION_qtest_log, "", QEMU_ARCH_ALL) + DEF("async-teardown", 0, QEMU_OPTION_asyncteardown, + "-async-teardown enable asynchronous teardown\n", + QEMU_ARCH_ALL) +-#endif + SRST + ``-async-teardown`` +- Enable asynchronous teardown. A new process called "cleanup/" +- will be created at startup sharing the address space with the main qemu +- process, using clone. It will wait for the main qemu process to +- terminate completely, and then exit. +- This allows qemu to terminate very quickly even if the guest was +- huge, leaving the teardown of the address space to the cleanup +- process. Since the cleanup process shares the same cgroups as the +- main qemu process, accounting is performed correctly. This only +- works if the cleanup process is not forcefully killed with SIGKILL +- before the main qemu process has terminated completely. ++ This option is deprecated and should no longer be used. The new option ++ ``-run-with async-teardown=on`` is a replacement. + ERST ++DEF("run-with", HAS_ARG, QEMU_OPTION_run_with, ++ "-run-with async-teardown[=on|off]\n" ++ " misc QEMU process lifecycle options\n" ++ " async-teardown=on enables asynchronous teardown\n", ++ QEMU_ARCH_ALL) ++SRST ++``-run-with`` ++ Set QEMU process lifecycle options. ++ ++ ``async-teardown=on`` enables asynchronous teardown. A new process called ++ "cleanup/" will be created at startup sharing the address ++ space with the main QEMU process, using clone. It will wait for the ++ main QEMU process to terminate completely, and then exit. This allows ++ QEMU to terminate very quickly even if the guest was huge, leaving the ++ teardown of the address space to the cleanup process. Since the cleanup ++ process shares the same cgroups as the main QEMU process, accounting is ++ performed correctly. This only works if the cleanup process is not ++ forcefully killed with SIGKILL before the main QEMU process has ++ terminated completely. ++ERST ++#endif + + DEF("msg", HAS_ARG, QEMU_OPTION_msg, + "-msg [timestamp[=on|off]][,guest-name=[on|off]]\n" +diff --git a/util/async-teardown.c b/util/async-teardown.c +index 62cdeb0f20..3ab19c8740 100644 +--- a/util/async-teardown.c ++++ b/util/async-teardown.c +@@ -12,6 +12,9 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/config-file.h" ++#include "qemu/option.h" ++#include "qemu/module.h" + #include + #include + #include +@@ -144,3 +147,21 @@ void init_async_teardown(void) + clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL); + sigprocmask(SIG_SETMASK, &old_signals, NULL); + } ++ ++static QemuOptsList qemu_run_with_opts = { ++ .name = "run-with", ++ .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head), ++ .desc = { ++ { ++ .name = "async-teardown", ++ .type = QEMU_OPT_BOOL, ++ }, ++ { /* end of list */ } ++ }, ++}; ++ ++static void register_teardown(void) ++{ ++ qemu_add_opts(&qemu_run_with_opts); ++} ++opts_init(register_teardown); +-- +2.39.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e7c5444..073081a 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 4%{?rcrel}%{?dist}%{?cc_suffix} +Release: 5%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -309,6 +309,48 @@ Patch77: kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch Patch78: kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch # For bz#2185688 - [qemu-kvm] no response with QMP command block_resize Patch79: kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch80: kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch81: kvm-graph-lock-Disable-locking-for-now.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch82: kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch83: kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch84: kvm-memory-prevent-dma-reentracy-issues.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch85: kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch86: kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch87: kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch88: kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch89: kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch90: kvm-raven-disable-reentrancy-detection-for-iomem.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch91: kvm-apic-disable-reentrancy-detection-for-apic-msi.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch92: kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch93: kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch94: kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch95: kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch96: kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch +# For bz#2189423 - Failed to migrate VM from rhel 9.3 to rhel 9.2 +Patch97: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch +# For bz#2196289 - Fix number of ready channels on multifd +Patch98: kvm-multifd-Fix-the-number-of-channels-ready.patch +# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part +Patch99: kvm-util-async-teardown-wire-up-query-command-line-optio.patch +# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part +Patch100: kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch %if %{have_clang} BuildRequires: clang @@ -1333,6 +1375,39 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Jun 13 2023 Miroslav Rezanina - 8.0.0-5 +- kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch [bz#2186725] +- kvm-graph-lock-Disable-locking-for-now.patch [bz#2186725] +- kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch [bz#2186725] +- kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch [bz#2186725] +- kvm-memory-prevent-dma-reentracy-issues.patch [RHEL-516] +- kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch [RHEL-516] +- kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch [RHEL-516] +- kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch [RHEL-516] +- kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch [RHEL-516] +- kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch [RHEL-516] +- kvm-raven-disable-reentrancy-detection-for-iomem.patch [RHEL-516] +- kvm-apic-disable-reentrancy-detection-for-apic-msi.patch [RHEL-516] +- kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch [RHEL-516] +- kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch [RHEL-516] +- kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch [RHEL-516] +- kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch [RHEL-516] +- kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch [RHEL-516] +- kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch [bz#2189423] +- kvm-multifd-Fix-the-number-of-channels-ready.patch [bz#2196289] +- kvm-util-async-teardown-wire-up-query-command-line-optio.patch [bz#2168500] +- kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch [bz#2168500] +- Resolves: bz#2186725 + (Qemu hang when commit during fio running(iothread enable)) +- Resolves: RHEL-516 + (CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9]) +- Resolves: bz#2189423 + (Failed to migrate VM from rhel 9.3 to rhel 9.2) +- Resolves: bz#2196289 + (Fix number of ready channels on multifd) +- Resolves: bz#2168500 + ([IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part) + * Mon May 22 2023 Miroslav Rezanina - 8.0.0-4 - kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch [bz#2058982] - kvm-util-mmap-alloc-qemu_fd_getfs.patch [bz#2057267]