Compare commits
No commits in common. "c8-stream-rhel" and "c8s-stream-rhel" have entirely different histories.
c8-stream-
...
c8s-stream
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,5 +1 @@
|
||||
SOURCES/qemu-6.2.0.tar.xz
|
||||
SOURCES/tests_data_acpi_pc_SSDT.dimmpxm
|
||||
SOURCES/tests_data_acpi_q35_FACP.slic
|
||||
SOURCES/tests_data_acpi_q35_SSDT.dimmpxm
|
||||
SOURCES/tests_data_acpi_virt_SSDT.memhp
|
@ -1,5 +1 @@
|
||||
68cd61a466170115b88817e2d52db2cd7a92f43a SOURCES/qemu-6.2.0.tar.xz
|
||||
c4b34092bc5af1ba7febfca1477320fb024e8acd SOURCES/tests_data_acpi_pc_SSDT.dimmpxm
|
||||
19349e3517143bd1af56a5444e927ba37a111f72 SOURCES/tests_data_acpi_q35_FACP.slic
|
||||
4632d10ae8cedad4d5d760ed211f83f0dc81005d SOURCES/tests_data_acpi_q35_SSDT.dimmpxm
|
||||
ef12eed43cc357fb134db6fa3c7ffc83e222a97d SOURCES/tests_data_acpi_virt_SSDT.memhp
|
||||
|
@ -1,36 +0,0 @@
|
||||
From a707eff49800045d07afbcd8a74617c50b960151 Mon Sep 17 00:00:00 2001
|
||||
From: German Maglione <gmaglione@redhat.com>
|
||||
Date: Thu, 10 Oct 2024 13:23:25 +0200
|
||||
Subject: [PATCH] Fix thread-pool-size default value in the man page
|
||||
|
||||
RH-Author: German Maglione <None>
|
||||
RH-MergeRequest: 417: Fix thread-pool-size default value in the man page
|
||||
RH-Jira: RHEL-26197
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-Commit: [1/1] bdf22ed4600ac7f02a4b08c54f162b1f89c44a99
|
||||
|
||||
The current --thread-pool-size default value is 0, let's reflect it
|
||||
in the man page.
|
||||
|
||||
Signed-off-by: German Maglione <gmaglione@redhat.com>
|
||||
---
|
||||
docs/tools/virtiofsd.rst | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst
|
||||
index 07ac0be551..fb3d59c449 100644
|
||||
--- a/docs/tools/virtiofsd.rst
|
||||
+++ b/docs/tools/virtiofsd.rst
|
||||
@@ -120,7 +120,7 @@ Options
|
||||
.. option:: --thread-pool-size=NUM
|
||||
|
||||
Restrict the number of worker threads per request queue to NUM. The default
|
||||
- is 64.
|
||||
+ is 0.
|
||||
|
||||
.. option:: --cache=none|auto|always
|
||||
|
||||
--
|
||||
2.45.2
|
||||
|
@ -1,82 +0,0 @@
|
||||
From 9bacf8c4104ff3cff2e0e2c2179ec4fda633167f Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Mon, 16 Jan 2023 07:51:08 -0500
|
||||
Subject: [PATCH 05/11] KVM: keep track of running ioctls
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 247: accel: introduce accelerator blocker API
|
||||
RH-Bugzilla: 2161188
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/3] 357508389e2a0fd996206b406e9e235e50b5f0b6
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188
|
||||
|
||||
commit a27dd2de68f37ba96fe164a42121daa5f0750afc
|
||||
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Fri Nov 11 10:47:57 2022 -0500
|
||||
|
||||
KVM: keep track of running ioctls
|
||||
|
||||
Using the new accel-blocker API, mark where ioctls are being called
|
||||
in KVM. Next, we will implement the critical section that will take
|
||||
care of performing memslots modifications atomically, therefore
|
||||
preventing any new ioctl from running and allowing the running ones
|
||||
to finish.
|
||||
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Message-Id: <20221111154758.1372674-3-eesposit@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index 8f2a53438f..221aadfda7 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -2337,6 +2337,7 @@ static int kvm_init(MachineState *ms)
|
||||
assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size);
|
||||
|
||||
s->sigmask_len = 8;
|
||||
+ accel_blocker_init();
|
||||
|
||||
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
||||
QTAILQ_INIT(&s->kvm_sw_breakpoints);
|
||||
@@ -3018,7 +3019,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
|
||||
va_end(ap);
|
||||
|
||||
trace_kvm_vm_ioctl(type, arg);
|
||||
+ accel_ioctl_begin();
|
||||
ret = ioctl(s->vmfd, type, arg);
|
||||
+ accel_ioctl_end();
|
||||
if (ret == -1) {
|
||||
ret = -errno;
|
||||
}
|
||||
@@ -3036,7 +3039,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
|
||||
va_end(ap);
|
||||
|
||||
trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
|
||||
+ accel_cpu_ioctl_begin(cpu);
|
||||
ret = ioctl(cpu->kvm_fd, type, arg);
|
||||
+ accel_cpu_ioctl_end(cpu);
|
||||
if (ret == -1) {
|
||||
ret = -errno;
|
||||
}
|
||||
@@ -3054,7 +3059,9 @@ int kvm_device_ioctl(int fd, int type, ...)
|
||||
va_end(ap);
|
||||
|
||||
trace_kvm_device_ioctl(fd, type, arg);
|
||||
+ accel_ioctl_begin();
|
||||
ret = ioctl(fd, type, arg);
|
||||
+ accel_ioctl_end();
|
||||
if (ret == -1) {
|
||||
ret = -errno;
|
||||
}
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,181 +0,0 @@
|
||||
From 440ee491240f2f02f9a6082d8aad98d88c1039dd Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon, 15 Jan 2024 14:00:04 +0100
|
||||
Subject: [PATCH 1/5] MAINTAINERS: split out s390x sections
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 348: s390x: Provide some more useful information if decryption of a PV image fails
|
||||
RH-Jira: RHEL-18214
|
||||
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Commit: [1/5] a71a3c11922481f97c36570e361088d17474e481
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-18214
|
||||
|
||||
commit 56e34834029c7c6862cb0095d95ad83c50485f88
|
||||
Author: Cornelia Huck <cohuck@redhat.com>
|
||||
Date: Wed Dec 22 11:55:48 2021 +0100
|
||||
|
||||
MAINTAINERS: split out s390x sections
|
||||
|
||||
Split out some more specialized devices etc., so that we can build
|
||||
smarter lists of people to be put on cc: in the future.
|
||||
|
||||
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Acked-by: David Hildenbrand <david@redhat.com>
|
||||
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
|
||||
Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
Acked-by: Halil Pasic <pasic@linux.ibm.com>
|
||||
Acked-by: Eric Farman <farman@linux.ibm.com>
|
||||
Message-Id: <20211222105548.356852-1-cohuck@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
MAINTAINERS | 85 ++++++++++++++++++++++++++++++++++++++++++++++-------
|
||||
1 file changed, 74 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index 7543eb4d59..b893206fc3 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -297,7 +297,6 @@ M: David Hildenbrand <david@redhat.com>
|
||||
S: Maintained
|
||||
F: target/s390x/
|
||||
F: target/s390x/tcg
|
||||
-F: target/s390x/cpu_models_*.[ch]
|
||||
F: hw/s390x/
|
||||
F: disas/s390.c
|
||||
F: tests/tcg/s390x/
|
||||
@@ -396,16 +395,10 @@ M: Halil Pasic <pasic@linux.ibm.com>
|
||||
M: Christian Borntraeger <borntraeger@de.ibm.com>
|
||||
S: Supported
|
||||
F: target/s390x/kvm/
|
||||
-F: target/s390x/ioinst.[ch]
|
||||
F: target/s390x/machine.c
|
||||
F: target/s390x/sigp.c
|
||||
-F: target/s390x/cpu_features*.[ch]
|
||||
-F: target/s390x/cpu_models.[ch]
|
||||
F: hw/s390x/pv.c
|
||||
F: include/hw/s390x/pv.h
|
||||
-F: hw/intc/s390_flic.c
|
||||
-F: hw/intc/s390_flic_kvm.c
|
||||
-F: include/hw/s390x/s390_flic.h
|
||||
F: gdb-xml/s390*.xml
|
||||
T: git https://github.com/borntraeger/qemu.git s390-next
|
||||
L: qemu-s390x@nongnu.org
|
||||
@@ -1529,12 +1522,8 @@ S390 Virtio-ccw
|
||||
M: Halil Pasic <pasic@linux.ibm.com>
|
||||
M: Christian Borntraeger <borntraeger@de.ibm.com>
|
||||
S: Supported
|
||||
-F: hw/char/sclp*.[hc]
|
||||
-F: hw/char/terminal3270.c
|
||||
F: hw/s390x/
|
||||
F: include/hw/s390x/
|
||||
-F: hw/watchdog/wdt_diag288.c
|
||||
-F: include/hw/watchdog/wdt_diag288.h
|
||||
F: configs/devices/s390x-softmmu/default.mak
|
||||
F: tests/avocado/machine_s390_ccw_virtio.py
|
||||
T: git https://github.com/borntraeger/qemu.git s390-next
|
||||
@@ -1559,6 +1548,37 @@ F: hw/s390x/s390-pci*
|
||||
F: include/hw/s390x/s390-pci*
|
||||
L: qemu-s390x@nongnu.org
|
||||
|
||||
+S390 channel subsystem
|
||||
+M: Halil Pasic <pasic@linux.ibm.com>
|
||||
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
+S: Supported
|
||||
+F: hw/s390x/ccw-device.[ch]
|
||||
+F: hw/s390x/css.c
|
||||
+F: hw/s390x/css-bridge.c
|
||||
+F: include/hw/s390x/css.h
|
||||
+F: include/hw/s390x/css-bridge.h
|
||||
+F: include/hw/s390x/ioinst.h
|
||||
+F: target/s390x/ioinst.c
|
||||
+L: qemu-s390x@nongnu.org
|
||||
+
|
||||
+S390 CPU models
|
||||
+M: David Hildenbrand <david@redhat.com>
|
||||
+S: Maintained
|
||||
+F: target/s390x/cpu_features*.[ch]
|
||||
+F: target/s390x/cpu_models.[ch]
|
||||
+L: qemu-s390x@nongnu.org
|
||||
+
|
||||
+S390 SCLP-backed devices
|
||||
+M: Halil Pasic <pasic@linux.ibm.com>
|
||||
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
+S: Supported
|
||||
+F: include/hw/s390x/event-facility.h
|
||||
+F: include/hw/s390x/sclp.h
|
||||
+F: hw/char/sclp*.[hc]
|
||||
+F: hw/s390x/event-facility.c
|
||||
+F: hw/s390x/sclp*.c
|
||||
+L: qemu-s390x@nongnu.org
|
||||
+
|
||||
X86 Machines
|
||||
------------
|
||||
PC
|
||||
@@ -1956,6 +1976,7 @@ M: Halil Pasic <pasic@linux.ibm.com>
|
||||
S: Supported
|
||||
F: hw/s390x/virtio-ccw*.[hc]
|
||||
F: hw/s390x/vhost-vsock-ccw.c
|
||||
+F: hw/s390x/vhost-user-fs-ccw.c
|
||||
T: git https://gitlab.com/cohuck/qemu.git s390-next
|
||||
T: git https://github.com/borntraeger/qemu.git s390-next
|
||||
L: qemu-s390x@nongnu.org
|
||||
@@ -2294,6 +2315,48 @@ F: hw/timer/mips_gictimer.c
|
||||
F: include/hw/intc/mips_gic.h
|
||||
F: include/hw/timer/mips_gictimer.h
|
||||
|
||||
+S390 3270 device
|
||||
+M: Halil Pasic <pasic@linux.ibm.com>
|
||||
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
+S: Odd fixes
|
||||
+F: include/hw/s390x/3270-ccw.h
|
||||
+F: hw/char/terminal3270.c
|
||||
+F: hw/s390x/3270-ccw.c
|
||||
+L: qemu-s390x@nongnu.org
|
||||
+
|
||||
+S390 diag 288 watchdog
|
||||
+M: Halil Pasic <pasic@linux.ibm.com>
|
||||
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
+S: Supported
|
||||
+F: hw/watchdog/wdt_diag288.c
|
||||
+F: include/hw/watchdog/wdt_diag288.h
|
||||
+L: qemu-s390x@nongnu.org
|
||||
+
|
||||
+S390 storage key device
|
||||
+M: Halil Pasic <pasic@linux.ibm.com>
|
||||
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
+S: Supported
|
||||
+F: hw/s390x/storage-keys.h
|
||||
+F: hw/390x/s390-skeys*.c
|
||||
+L: qemu-s390x@nongnu.org
|
||||
+
|
||||
+S390 storage attribute device
|
||||
+M: Halil Pasic <pasic@linux.ibm.com>
|
||||
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
+S: Supported
|
||||
+F: hw/s390x/storage-attributes.h
|
||||
+F: hw/s390/s390-stattrib*.c
|
||||
+L: qemu-s390x@nongnu.org
|
||||
+
|
||||
+S390 floating interrupt controller
|
||||
+M: Halil Pasic <pasic@linux.ibm.com>
|
||||
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
+M: David Hildenbrand <david@redhat.com>
|
||||
+S: Supported
|
||||
+F: hw/intc/s390_flic*.c
|
||||
+F: include/hw/s390x/s390_flic.h
|
||||
+L: qemu-s390x@nongnu.org
|
||||
+
|
||||
Subsystems
|
||||
----------
|
||||
Overall Audio backends
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,43 +0,0 @@
|
||||
From f1480fe9a4054113ddacd218961e29f31c33d329 Mon Sep 17 00:00:00 2001
|
||||
From: Peter Xu <peterx@redhat.com>
|
||||
Date: Wed, 6 Sep 2023 16:29:23 -0400
|
||||
Subject: [PATCH 2/3] RHEL: Enable "x-not-migrate-acpi-index" for all pre-RHEL8
|
||||
guests
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Peter Xu <peterx@redhat.com>
|
||||
RH-MergeRequest: 343: acpi: fix acpi_index migration
|
||||
RH-Jira: RHEL-20189
|
||||
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Prasad Pandit <None>
|
||||
RH-Commit: [2/2] 0a26a71236e68dd7feb5d2063254090e3852d6ba
|
||||
|
||||
The acpi index migration is simply broken before for all pre-RHEL8
|
||||
branches. Don't migrate it for all of them.
|
||||
|
||||
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 2724f6848a..6650a3d7b7 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -44,6 +44,10 @@ GlobalProperty hw_compat_rhel_8_6[] = {
|
||||
* we need do disable it downstream on the latest hw_compat_rhel_8.
|
||||
*/
|
||||
{ "vhost-vsock-device", "seqpacket", "off" },
|
||||
+ /*
|
||||
+ * RHEL-2186: all rhel8 machines should not migrate acpi index.
|
||||
+ */
|
||||
+ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"},
|
||||
};
|
||||
const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6);
|
||||
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,349 +0,0 @@
|
||||
From a5e7bb1f7a88efb5574266a76e80fd7604d19921 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Mon, 16 Jan 2023 07:49:59 -0500
|
||||
Subject: [PATCH 04/11] accel: introduce accelerator blocker API
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 247: accel: introduce accelerator blocker API
|
||||
RH-Bugzilla: 2161188
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/3] 9d3d7f9554974a79042c915763288cce07aef135
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188
|
||||
|
||||
commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1
|
||||
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Fri Nov 11 10:47:56 2022 -0500
|
||||
|
||||
accel: introduce accelerator blocker API
|
||||
|
||||
This API allows the accelerators to prevent vcpus from issuing
|
||||
new ioctls while execting a critical section marked with the
|
||||
accel_ioctl_inhibit_begin/end functions.
|
||||
|
||||
Note that all functions submitting ioctls must mark where the
|
||||
ioctl is being called with accel_{cpu_}ioctl_begin/end().
|
||||
|
||||
This API requires the caller to always hold the BQL.
|
||||
API documentation is in sysemu/accel-blocker.h
|
||||
|
||||
Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt
|
||||
(to minimize cache line bouncing) to keep avoid that new ioctls
|
||||
run when the critical section starts, and a QemuEvent to wait
|
||||
that all running ioctls finish.
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-Id: <20221111154758.1372674-2-eesposit@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Conflicts:
|
||||
util/meson.build: files are missing in rhel 8.8.0
|
||||
namely int128.c, memalign.c and interval-tree.c
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++
|
||||
accel/meson.build | 2 +-
|
||||
hw/core/cpu-common.c | 2 +
|
||||
include/hw/core/cpu.h | 3 +
|
||||
include/sysemu/accel-blocker.h | 56 ++++++++++++
|
||||
util/meson.build | 2 +-
|
||||
6 files changed, 217 insertions(+), 2 deletions(-)
|
||||
create mode 100644 accel/accel-blocker.c
|
||||
create mode 100644 include/sysemu/accel-blocker.h
|
||||
|
||||
diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c
|
||||
new file mode 100644
|
||||
index 0000000000..1e7f423462
|
||||
--- /dev/null
|
||||
+++ b/accel/accel-blocker.c
|
||||
@@ -0,0 +1,154 @@
|
||||
+/*
|
||||
+ * Lock to inhibit accelerator ioctls
|
||||
+ *
|
||||
+ * Copyright (c) 2022 Red Hat Inc.
|
||||
+ *
|
||||
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
+ *
|
||||
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
+ * of this software and associated documentation files (the "Software"), to deal
|
||||
+ * in the Software without restriction, including without limitation the rights
|
||||
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
+ * copies of the Software, and to permit persons to whom the Software is
|
||||
+ * furnished to do so, subject to the following conditions:
|
||||
+ *
|
||||
+ * The above copyright notice and this permission notice shall be included in
|
||||
+ * all copies or substantial portions of the Software.
|
||||
+ *
|
||||
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
+ * THE SOFTWARE.
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "qemu/thread.h"
|
||||
+#include "qemu/main-loop.h"
|
||||
+#include "hw/core/cpu.h"
|
||||
+#include "sysemu/accel-blocker.h"
|
||||
+
|
||||
+static QemuLockCnt accel_in_ioctl_lock;
|
||||
+static QemuEvent accel_in_ioctl_event;
|
||||
+
|
||||
+void accel_blocker_init(void)
|
||||
+{
|
||||
+ qemu_lockcnt_init(&accel_in_ioctl_lock);
|
||||
+ qemu_event_init(&accel_in_ioctl_event, false);
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_begin(void)
|
||||
+{
|
||||
+ if (likely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
|
||||
+ qemu_lockcnt_inc(&accel_in_ioctl_lock);
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_end(void)
|
||||
+{
|
||||
+ if (likely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ qemu_lockcnt_dec(&accel_in_ioctl_lock);
|
||||
+ /* change event to SET. If event was BUSY, wake up all waiters */
|
||||
+ qemu_event_set(&accel_in_ioctl_event);
|
||||
+}
|
||||
+
|
||||
+void accel_cpu_ioctl_begin(CPUState *cpu)
|
||||
+{
|
||||
+ if (unlikely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
|
||||
+ qemu_lockcnt_inc(&cpu->in_ioctl_lock);
|
||||
+}
|
||||
+
|
||||
+void accel_cpu_ioctl_end(CPUState *cpu)
|
||||
+{
|
||||
+ if (unlikely(qemu_mutex_iothread_locked())) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ qemu_lockcnt_dec(&cpu->in_ioctl_lock);
|
||||
+ /* change event to SET. If event was BUSY, wake up all waiters */
|
||||
+ qemu_event_set(&accel_in_ioctl_event);
|
||||
+}
|
||||
+
|
||||
+static bool accel_has_to_wait(void)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+ bool needs_to_wait = false;
|
||||
+
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) {
|
||||
+ /* exit the ioctl, if vcpu is running it */
|
||||
+ qemu_cpu_kick(cpu);
|
||||
+ needs_to_wait = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock);
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_inhibit_begin(void)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+
|
||||
+ /*
|
||||
+ * We allow to inhibit only when holding the BQL, so we can identify
|
||||
+ * when an inhibitor wants to issue an ioctl easily.
|
||||
+ */
|
||||
+ g_assert(qemu_mutex_iothread_locked());
|
||||
+
|
||||
+ /* Block further invocations of the ioctls outside the BQL. */
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ qemu_lockcnt_lock(&cpu->in_ioctl_lock);
|
||||
+ }
|
||||
+ qemu_lockcnt_lock(&accel_in_ioctl_lock);
|
||||
+
|
||||
+ /* Keep waiting until there are running ioctls */
|
||||
+ while (true) {
|
||||
+
|
||||
+ /* Reset event to FREE. */
|
||||
+ qemu_event_reset(&accel_in_ioctl_event);
|
||||
+
|
||||
+ if (accel_has_to_wait()) {
|
||||
+ /*
|
||||
+ * If event is still FREE, and there are ioctls still in progress,
|
||||
+ * wait.
|
||||
+ *
|
||||
+ * If an ioctl finishes before qemu_event_wait(), it will change
|
||||
+ * the event state to SET. This will prevent qemu_event_wait() from
|
||||
+ * blocking, but it's not a problem because if other ioctls are
|
||||
+ * still running the loop will iterate once more and reset the event
|
||||
+ * status to FREE so that it can wait properly.
|
||||
+ *
|
||||
+ * If an ioctls finishes while qemu_event_wait() is blocking, then
|
||||
+ * it will be waken up, but also here the while loop makes sure
|
||||
+ * to re-enter the wait if there are other running ioctls.
|
||||
+ */
|
||||
+ qemu_event_wait(&accel_in_ioctl_event);
|
||||
+ } else {
|
||||
+ /* No ioctl is running */
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void accel_ioctl_inhibit_end(void)
|
||||
+{
|
||||
+ CPUState *cpu;
|
||||
+
|
||||
+ qemu_lockcnt_unlock(&accel_in_ioctl_lock);
|
||||
+ CPU_FOREACH(cpu) {
|
||||
+ qemu_lockcnt_unlock(&cpu->in_ioctl_lock);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
diff --git a/accel/meson.build b/accel/meson.build
|
||||
index dfd808d2c8..801b4d44e8 100644
|
||||
--- a/accel/meson.build
|
||||
+++ b/accel/meson.build
|
||||
@@ -1,4 +1,4 @@
|
||||
-specific_ss.add(files('accel-common.c'))
|
||||
+specific_ss.add(files('accel-common.c', 'accel-blocker.c'))
|
||||
softmmu_ss.add(files('accel-softmmu.c'))
|
||||
user_ss.add(files('accel-user.c'))
|
||||
|
||||
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
|
||||
index 9e3241b430..b6e83acf0a 100644
|
||||
--- a/hw/core/cpu-common.c
|
||||
+++ b/hw/core/cpu-common.c
|
||||
@@ -238,6 +238,7 @@ static void cpu_common_initfn(Object *obj)
|
||||
cpu->nr_threads = 1;
|
||||
|
||||
qemu_mutex_init(&cpu->work_mutex);
|
||||
+ qemu_lockcnt_init(&cpu->in_ioctl_lock);
|
||||
QSIMPLEQ_INIT(&cpu->work_list);
|
||||
QTAILQ_INIT(&cpu->breakpoints);
|
||||
QTAILQ_INIT(&cpu->watchpoints);
|
||||
@@ -249,6 +250,7 @@ static void cpu_common_finalize(Object *obj)
|
||||
{
|
||||
CPUState *cpu = CPU(obj);
|
||||
|
||||
+ qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
|
||||
qemu_mutex_destroy(&cpu->work_mutex);
|
||||
}
|
||||
|
||||
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
|
||||
index e948e81f1a..49d9c73f97 100644
|
||||
--- a/include/hw/core/cpu.h
|
||||
+++ b/include/hw/core/cpu.h
|
||||
@@ -383,6 +383,9 @@ struct CPUState {
|
||||
uint32_t kvm_fetch_index;
|
||||
uint64_t dirty_pages;
|
||||
|
||||
+ /* Use by accel-block: CPU is executing an ioctl() */
|
||||
+ QemuLockCnt in_ioctl_lock;
|
||||
+
|
||||
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
|
||||
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
|
||||
DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
|
||||
diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
|
||||
new file mode 100644
|
||||
index 0000000000..72020529ef
|
||||
--- /dev/null
|
||||
+++ b/include/sysemu/accel-blocker.h
|
||||
@@ -0,0 +1,56 @@
|
||||
+/*
|
||||
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
|
||||
+ * running ones finish.
|
||||
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
|
||||
+ * release the BQL.
|
||||
+ *
|
||||
+ * Copyright (c) 2022 Red Hat Inc.
|
||||
+ *
|
||||
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
+ *
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ */
|
||||
+#ifndef ACCEL_BLOCKER_H
|
||||
+#define ACCEL_BLOCKER_H
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "sysemu/cpus.h"
|
||||
+
|
||||
+extern void accel_blocker_init(void);
|
||||
+
|
||||
+/*
|
||||
+ * accel_{cpu_}ioctl_begin/end:
|
||||
+ * Mark when ioctl is about to run or just finished.
|
||||
+ *
|
||||
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
|
||||
+ * called, preventing new ioctls to run. They will continue only after
|
||||
+ * accel_ioctl_inibith_end().
|
||||
+ */
|
||||
+extern void accel_ioctl_begin(void);
|
||||
+extern void accel_ioctl_end(void);
|
||||
+extern void accel_cpu_ioctl_begin(CPUState *cpu);
|
||||
+extern void accel_cpu_ioctl_end(CPUState *cpu);
|
||||
+
|
||||
+/*
|
||||
+ * accel_ioctl_inhibit_begin: start critical section
|
||||
+ *
|
||||
+ * This function makes sure that:
|
||||
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
|
||||
+ * 2) wait that all ioctls that were already running reach
|
||||
+ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
|
||||
+ *
|
||||
+ * This allows the caller to access shared data or perform operations without
|
||||
+ * worrying of concurrent vcpus accesses.
|
||||
+ */
|
||||
+extern void accel_ioctl_inhibit_begin(void);
|
||||
+
|
||||
+/*
|
||||
+ * accel_ioctl_inhibit_end: end critical section started by
|
||||
+ * accel_ioctl_inhibit_begin()
|
||||
+ *
|
||||
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
|
||||
+ */
|
||||
+extern void accel_ioctl_inhibit_end(void);
|
||||
+
|
||||
+#endif /* ACCEL_BLOCKER_H */
|
||||
diff --git a/util/meson.build b/util/meson.build
|
||||
index 05b593055a..b5f153b0e8 100644
|
||||
--- a/util/meson.build
|
||||
+++ b/util/meson.build
|
||||
@@ -48,6 +48,7 @@ util_ss.add(files('transactions.c'))
|
||||
util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c'))
|
||||
util_ss.add(files('guest-random.c'))
|
||||
util_ss.add(files('yank.c'))
|
||||
+util_ss.add(files('lockcnt.c'))
|
||||
|
||||
if have_user
|
||||
util_ss.add(files('selfmap.c'))
|
||||
@@ -69,7 +70,6 @@ if have_block
|
||||
util_ss.add(files('hexdump.c'))
|
||||
util_ss.add(files('iova-tree.c'))
|
||||
util_ss.add(files('iov.c', 'qemu-sockets.c', 'uri.c'))
|
||||
- util_ss.add(files('lockcnt.c'))
|
||||
util_ss.add(files('main-loop.c'))
|
||||
util_ss.add(files('nvdimm-utils.c'))
|
||||
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,165 +0,0 @@
|
||||
From 3deffc03c2e9b0053eec5aeb5b5d633dfe29f499 Mon Sep 17 00:00:00 2001
|
||||
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||
Date: Wed, 6 Apr 2022 14:58:12 -0400
|
||||
Subject: [PATCH 1/3] acpi: fix acpi_index migration
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Peter Xu <peterx@redhat.com>
|
||||
RH-MergeRequest: 343: acpi: fix acpi_index migration
|
||||
RH-Jira: RHEL-20189
|
||||
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Prasad Pandit <None>
|
||||
RH-Commit: [1/2] c5b9cdf5791cd856207b7df7e2ef5df360ec8de4
|
||||
|
||||
vmstate_acpi_pcihp_use_acpi_index() was expecting AcpiPciHpState
|
||||
as state but it actually received PIIX4PMState, because
|
||||
VMSTATE_PCI_HOTPLUG is a macro and not another struct.
|
||||
So it ended up accessing random pointer, which resulted
|
||||
in 'false' return value and acpi_index field wasn't ever
|
||||
sent.
|
||||
|
||||
However in 7.0 that pointer de-references to value > 0, and
|
||||
destination QEMU starts to expect the field which isn't
|
||||
sent in migratioon stream from older QEMU (6.2 and older).
|
||||
As result migration fails with:
|
||||
qemu-system-x86_64: Missing section footer for 0000:00:01.3/piix4_pm
|
||||
qemu-system-x86_64: load of migration failed: Invalid argument
|
||||
|
||||
In addition with QEMU-6.2, destination due to not expected
|
||||
state, also never expects the acpi_index field in migration
|
||||
stream.
|
||||
|
||||
Q35 is not affected as it always sends/expects the field as
|
||||
long as acpi based PCI hotplug is enabled.
|
||||
|
||||
Fix issue by introducing compat knob to never send/expect
|
||||
acpi_index in migration stream for 6.2 and older PC machine
|
||||
types and always send it for 7.0 and newer PC machine types.
|
||||
|
||||
Diagnosed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Fixes: b32bd76 ("pci: introduce acpi-index property for PCI device")
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/932
|
||||
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit a83c2844903c45aa7d32cdd17305f23ce2c56ab9)
|
||||
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||
---
|
||||
hw/acpi/acpi-pci-hotplug-stub.c | 4 ----
|
||||
hw/acpi/pcihp.c | 6 ------
|
||||
hw/acpi/piix4.c | 15 ++++++++++++++-
|
||||
hw/core/machine.c | 5 +++++
|
||||
include/hw/acpi/pcihp.h | 2 --
|
||||
5 files changed, 19 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c
|
||||
index 734e4c5986..a43f6dafc9 100644
|
||||
--- a/hw/acpi/acpi-pci-hotplug-stub.c
|
||||
+++ b/hw/acpi/acpi-pci-hotplug-stub.c
|
||||
@@ -41,7 +41,3 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off)
|
||||
return;
|
||||
}
|
||||
|
||||
-bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id)
|
||||
-{
|
||||
- return false;
|
||||
-}
|
||||
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
|
||||
index be0e846b34..ec861661c3 100644
|
||||
--- a/hw/acpi/pcihp.c
|
||||
+++ b/hw/acpi/pcihp.c
|
||||
@@ -559,12 +559,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
|
||||
OBJ_PROP_FLAG_READ);
|
||||
}
|
||||
|
||||
-bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id)
|
||||
-{
|
||||
- AcpiPciHpState *s = opaque;
|
||||
- return s->acpi_index;
|
||||
-}
|
||||
-
|
||||
const VMStateDescription vmstate_acpi_pcihp_pci_status = {
|
||||
.name = "acpi_pcihp_pci_status",
|
||||
.version_id = 1,
|
||||
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
|
||||
index 8d6011c0a3..033e75ce5b 100644
|
||||
--- a/hw/acpi/piix4.c
|
||||
+++ b/hw/acpi/piix4.c
|
||||
@@ -82,6 +82,7 @@ struct PIIX4PMState {
|
||||
AcpiPciHpState acpi_pci_hotplug;
|
||||
bool use_acpi_hotplug_bridge;
|
||||
bool use_acpi_root_pci_hotplug;
|
||||
+ bool not_migrate_acpi_index;
|
||||
|
||||
uint8_t disable_s3;
|
||||
uint8_t disable_s4;
|
||||
@@ -269,6 +270,16 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id)
|
||||
return pm_smbus_vmstate_needed();
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * This is a fudge to turn off the acpi_index field,
|
||||
+ * whose test was always broken on piix4 with 6.2 and older machine types.
|
||||
+ */
|
||||
+static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
|
||||
+{
|
||||
+ PIIX4PMState *s = PIIX4_PM(opaque);
|
||||
+ return s->use_acpi_hotplug_bridge && !s->not_migrate_acpi_index;
|
||||
+}
|
||||
+
|
||||
/* qemu-kvm 1.2 uses version 3 but advertised as 2
|
||||
* To support incoming qemu-kvm 1.2 migration, change version_id
|
||||
* and minimum_version_id to 2 below (which breaks migration from
|
||||
@@ -299,7 +310,7 @@ static const VMStateDescription vmstate_acpi = {
|
||||
struct AcpiPciHpPciStatus),
|
||||
VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, PIIX4PMState,
|
||||
vmstate_test_use_acpi_hotplug_bridge,
|
||||
- vmstate_acpi_pcihp_use_acpi_index),
|
||||
+ vmstate_test_migrate_acpi_index),
|
||||
VMSTATE_END_OF_LIST()
|
||||
},
|
||||
.subsections = (const VMStateDescription*[]) {
|
||||
@@ -654,6 +665,8 @@ static Property piix4_pm_properties[] = {
|
||||
DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState,
|
||||
acpi_memory_hotplug.is_enabled, true),
|
||||
DEFINE_PROP_BOOL("smm-compat", PIIX4PMState, smm_compat, false),
|
||||
+ DEFINE_PROP_BOOL("x-not-migrate-acpi-index", PIIX4PMState,
|
||||
+ not_migrate_acpi_index, false),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index 76fcabec7a..2724f6848a 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -331,6 +331,11 @@ GlobalProperty hw_compat_rhel_7_1[] = {
|
||||
};
|
||||
const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1);
|
||||
|
||||
+GlobalProperty hw_compat_6_2[] = {
|
||||
+ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"},
|
||||
+};
|
||||
+const size_t hw_compat_6_2_len = G_N_ELEMENTS(hw_compat_6_2);
|
||||
+
|
||||
GlobalProperty hw_compat_6_1[] = {
|
||||
{ "vhost-user-vsock-device", "seqpacket", "off" },
|
||||
{ "nvme-ns", "shared", "off" },
|
||||
diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h
|
||||
index af1a169fc3..7e268c2c9c 100644
|
||||
--- a/include/hw/acpi/pcihp.h
|
||||
+++ b/include/hw/acpi/pcihp.h
|
||||
@@ -73,8 +73,6 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off);
|
||||
|
||||
extern const VMStateDescription vmstate_acpi_pcihp_pci_status;
|
||||
|
||||
-bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id);
|
||||
-
|
||||
#define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp, test_acpi_index) \
|
||||
VMSTATE_UINT32_TEST(pcihp.hotplug_select, state, \
|
||||
test_pcihp), \
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,50 +0,0 @@
|
||||
From 953c5c0982b61b0a3f8f03452844b5487eb22fc7 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:13:17 -0500
|
||||
Subject: [PATCH 06/13] aio-wait: switch to smp_mb__after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2168472
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Commit: [6/10] 9f30f97754139ffd18d36b2350f9ed4e59ac496e
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
|
||||
|
||||
commit b532526a07ef3b903ead2e055fe6cc87b41057a3
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri Mar 3 11:03:52 2023 +0100
|
||||
|
||||
aio-wait: switch to smp_mb__after_rmw()
|
||||
|
||||
The barrier comes after an atomic increment, so it is enough to use
|
||||
smp_mb__after_rmw(); this avoids a double barrier on x86 systems.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
include/block/aio-wait.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||
index 54840f8622..03b6394c78 100644
|
||||
--- a/include/block/aio-wait.h
|
||||
+++ b/include/block/aio-wait.h
|
||||
@@ -82,7 +82,7 @@ extern AioWait global_aio_wait;
|
||||
/* Increment wait_->num_waiters before evaluating cond. */ \
|
||||
qatomic_inc(&wait_->num_waiters); \
|
||||
/* Paired with smp_mb in aio_wait_kick(). */ \
|
||||
- smp_mb(); \
|
||||
+ smp_mb__after_rmw(); \
|
||||
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
|
||||
while ((cond)) { \
|
||||
aio_poll(ctx_, true); \
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,86 +0,0 @@
|
||||
From d7eae0ff4c7f7f7bf10f10272adf7c6971c0db9b Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 09:26:35 -0500
|
||||
Subject: [PATCH 01/13] aio_wait_kick: add missing memory barrier
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2168472
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Commit: [1/10] eb774aee79864052e14e706d931e52e7bd1162c8
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
|
||||
|
||||
commit 7455ff1aa01564cc175db5b2373e610503ad4411
|
||||
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Tue May 24 13:30:54 2022 -0400
|
||||
|
||||
aio_wait_kick: add missing memory barrier
|
||||
|
||||
It seems that aio_wait_kick always required a memory barrier
|
||||
or atomic operation in the caller, but nobody actually
|
||||
took care of doing it.
|
||||
|
||||
Let's put the barrier in the function instead, and pair it
|
||||
with another one in AIO_WAIT_WHILE. Read aio_wait_kick()
|
||||
comment for further explanation.
|
||||
|
||||
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Message-Id: <20220524173054.12651-1-eesposit@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
include/block/aio-wait.h | 2 ++
|
||||
util/aio-wait.c | 16 +++++++++++++++-
|
||||
2 files changed, 17 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||
index b39eefb38d..54840f8622 100644
|
||||
--- a/include/block/aio-wait.h
|
||||
+++ b/include/block/aio-wait.h
|
||||
@@ -81,6 +81,8 @@ extern AioWait global_aio_wait;
|
||||
AioContext *ctx_ = (ctx); \
|
||||
/* Increment wait_->num_waiters before evaluating cond. */ \
|
||||
qatomic_inc(&wait_->num_waiters); \
|
||||
+ /* Paired with smp_mb in aio_wait_kick(). */ \
|
||||
+ smp_mb(); \
|
||||
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
|
||||
while ((cond)) { \
|
||||
aio_poll(ctx_, true); \
|
||||
diff --git a/util/aio-wait.c b/util/aio-wait.c
|
||||
index bdb3d3af22..98c5accd29 100644
|
||||
--- a/util/aio-wait.c
|
||||
+++ b/util/aio-wait.c
|
||||
@@ -35,7 +35,21 @@ static void dummy_bh_cb(void *opaque)
|
||||
|
||||
void aio_wait_kick(void)
|
||||
{
|
||||
- /* The barrier (or an atomic op) is in the caller. */
|
||||
+ /*
|
||||
+ * Paired with smp_mb in AIO_WAIT_WHILE. Here we have:
|
||||
+ * write(condition);
|
||||
+ * aio_wait_kick() {
|
||||
+ * smp_mb();
|
||||
+ * read(num_waiters);
|
||||
+ * }
|
||||
+ *
|
||||
+ * And in AIO_WAIT_WHILE:
|
||||
+ * write(num_waiters);
|
||||
+ * smp_mb();
|
||||
+ * read(condition);
|
||||
+ */
|
||||
+ smp_mb();
|
||||
+
|
||||
if (qatomic_read(&global_aio_wait.num_waiters)) {
|
||||
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
|
||||
}
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,56 +0,0 @@
|
||||
From 47d027147694fde94dd73305ee53b6a136cbeced Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 08/15] apic: disable reentrancy detection for apic-msi
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [8/12] 25c3cf99b00cd9adc10d6e7afa9c3e3b7da08de2 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
|
||||
commit 50795ee051a342c681a9b45671c552fbd6274db8
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:13 2023 -0400
|
||||
|
||||
apic: disable reentrancy detection for apic-msi
|
||||
|
||||
As the code is designed for re-entrant calls to apic-msi, mark apic-msi
|
||||
as reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-9-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/intc/apic.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
|
||||
index 3df11c34d6..a7c2b301a8 100644
|
||||
--- a/hw/intc/apic.c
|
||||
+++ b/hw/intc/apic.c
|
||||
@@ -883,6 +883,13 @@ static void apic_realize(DeviceState *dev, Error **errp)
|
||||
memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi",
|
||||
APIC_SPACE_SIZE);
|
||||
|
||||
+ /*
|
||||
+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can
|
||||
+ * write back to apic-msi. As such mark the apic-msi region re-entrancy
|
||||
+ * safe.
|
||||
+ */
|
||||
+ s->io_memory.disable_reentrancy_guard = true;
|
||||
+
|
||||
s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s);
|
||||
local_apics[s->id] = s;
|
||||
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,235 +0,0 @@
|
||||
From 8996ac4369de7e0cb6f911db6f47c3e4ae88c8aa Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 02/15] async: Add an optional reentrancy guard to the BH API
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/12] b03f247e242a6cdb3eebec36477234ac77dcd20c (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
Conflict: The file block/graph-lock.h, inluded from include/block/aio.h,
|
||||
doesn't exist in this code version. The code compiles without
|
||||
issues if this include is just omitted, so we do that.
|
||||
|
||||
commit 9c86c97f12c060bf7484dd931f38634e166a81f0
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:07 2023 -0400
|
||||
|
||||
async: Add an optional reentrancy guard to the BH API
|
||||
|
||||
Devices can pass their MemoryReentrancyGuard (from their DeviceState),
|
||||
when creating new BHes. Then, the async API will toggle the guard
|
||||
before/after calling the BH call-back. This prevents bh->mmio reentrancy
|
||||
issues.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-3-alxndr@bu.edu>
|
||||
[thuth: Fix "line over 90 characters" checkpatch.pl error]
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
docs/devel/multiple-iothreads.txt | 7 +++++++
|
||||
include/block/aio.h | 18 ++++++++++++++++--
|
||||
include/qemu/main-loop.h | 7 +++++--
|
||||
tests/unit/ptimer-test-stubs.c | 3 ++-
|
||||
util/async.c | 18 +++++++++++++++++-
|
||||
util/main-loop.c | 6 ++++--
|
||||
util/trace-events | 1 +
|
||||
7 files changed, 52 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
|
||||
index aeb997bed5..a11576bc74 100644
|
||||
--- a/docs/devel/multiple-iothreads.txt
|
||||
+++ b/docs/devel/multiple-iothreads.txt
|
||||
@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext:
|
||||
* LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
|
||||
* LEGACY timer_new_ms() - create a timer
|
||||
* LEGACY qemu_bh_new() - create a BH
|
||||
+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard
|
||||
* LEGACY qemu_aio_wait() - run an event loop iteration
|
||||
|
||||
Since they implicitly work on the main loop they cannot be used in code that
|
||||
@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h):
|
||||
* aio_set_event_notifier() - monitor an event notifier
|
||||
* aio_timer_new() - create a timer
|
||||
* aio_bh_new() - create a BH
|
||||
+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard
|
||||
* aio_poll() - run an event loop iteration
|
||||
|
||||
+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard"
|
||||
+argument, which is used to check for and prevent re-entrancy problems. For
|
||||
+BHs associated with devices, the reentrancy-guard is contained in the
|
||||
+corresponding DeviceState and named "mem_reentrancy_guard".
|
||||
+
|
||||
The AioContext can be obtained from the IOThread using
|
||||
iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
|
||||
Code that takes an AioContext argument works both in IOThreads or the main
|
||||
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||
index 47fbe9d81f..c7da152985 100644
|
||||
--- a/include/block/aio.h
|
||||
+++ b/include/block/aio.h
|
||||
@@ -22,6 +22,8 @@
|
||||
#include "qemu/event_notifier.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/timer.h"
|
||||
+#include "hw/qdev-core.h"
|
||||
+
|
||||
|
||||
typedef struct BlockAIOCB BlockAIOCB;
|
||||
typedef void BlockCompletionFunc(void *opaque, int ret);
|
||||
@@ -321,9 +323,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
* is opaque and must be allocated prior to its use.
|
||||
*
|
||||
* @name: A human-readable identifier for debugging purposes.
|
||||
+ * @reentrancy_guard: A guard set when entering a cb to prevent
|
||||
+ * device-reentrancy issues
|
||||
*/
|
||||
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
- const char *name);
|
||||
+ const char *name, MemReentrancyGuard *reentrancy_guard);
|
||||
|
||||
/**
|
||||
* aio_bh_new: Allocate a new bottom half structure
|
||||
@@ -332,7 +336,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
* string.
|
||||
*/
|
||||
#define aio_bh_new(ctx, cb, opaque) \
|
||||
- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
|
||||
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL)
|
||||
+
|
||||
+/**
|
||||
+ * aio_bh_new_guarded: Allocate a new bottom half structure with a
|
||||
+ * reentrancy_guard
|
||||
+ *
|
||||
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
|
||||
+ * string.
|
||||
+ */
|
||||
+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
|
||||
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)
|
||||
|
||||
/**
|
||||
* aio_notify: Force processing of pending events.
|
||||
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
|
||||
index 8dbc6fcb89..85dd5ada9e 100644
|
||||
--- a/include/qemu/main-loop.h
|
||||
+++ b/include/qemu/main-loop.h
|
||||
@@ -294,9 +294,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
|
||||
|
||||
void qemu_fd_register(int fd);
|
||||
|
||||
+#define qemu_bh_new_guarded(cb, opaque, guard) \
|
||||
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard)
|
||||
#define qemu_bh_new(cb, opaque) \
|
||||
- qemu_bh_new_full((cb), (opaque), (stringify(cb)))
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
|
||||
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard);
|
||||
void qemu_bh_schedule_idle(QEMUBH *bh);
|
||||
|
||||
enum {
|
||||
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
|
||||
index 2a3ef58799..a7a2d08e7e 100644
|
||||
--- a/tests/unit/ptimer-test-stubs.c
|
||||
+++ b/tests/unit/ptimer-test-stubs.c
|
||||
@@ -108,7 +108,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
|
||||
return deadline;
|
||||
}
|
||||
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
QEMUBH *bh = g_new(QEMUBH, 1);
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 2a63bf90f2..1fff02e7fc 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -62,6 +62,7 @@ struct QEMUBH {
|
||||
void *opaque;
|
||||
QSLIST_ENTRY(QEMUBH) next;
|
||||
unsigned flags;
|
||||
+ MemReentrancyGuard *reentrancy_guard;
|
||||
};
|
||||
|
||||
/* Called concurrently from any thread */
|
||||
@@ -127,7 +128,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
|
||||
}
|
||||
|
||||
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
- const char *name)
|
||||
+ const char *name, MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
QEMUBH *bh;
|
||||
bh = g_new(QEMUBH, 1);
|
||||
@@ -136,13 +137,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
|
||||
.cb = cb,
|
||||
.opaque = opaque,
|
||||
.name = name,
|
||||
+ .reentrancy_guard = reentrancy_guard,
|
||||
};
|
||||
return bh;
|
||||
}
|
||||
|
||||
void aio_bh_call(QEMUBH *bh)
|
||||
{
|
||||
+ bool last_engaged_in_io = false;
|
||||
+
|
||||
+ if (bh->reentrancy_guard) {
|
||||
+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
|
||||
+ if (bh->reentrancy_guard->engaged_in_io) {
|
||||
+ trace_reentrant_aio(bh->ctx, bh->name);
|
||||
+ }
|
||||
+ bh->reentrancy_guard->engaged_in_io = true;
|
||||
+ }
|
||||
+
|
||||
bh->cb(bh->opaque);
|
||||
+
|
||||
+ if (bh->reentrancy_guard) {
|
||||
+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
|
||||
diff --git a/util/main-loop.c b/util/main-loop.c
|
||||
index 06b18b195c..1eacf04691 100644
|
||||
--- a/util/main-loop.c
|
||||
+++ b/util/main-loop.c
|
||||
@@ -544,9 +544,11 @@ void main_loop_wait(int nonblocking)
|
||||
|
||||
/* Functions to operate on the main QEMU AioContext. */
|
||||
|
||||
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
|
||||
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard)
|
||||
{
|
||||
- return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
|
||||
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
|
||||
+ reentrancy_guard);
|
||||
}
|
||||
|
||||
/*
|
||||
diff --git a/util/trace-events b/util/trace-events
|
||||
index c8f53d7d9f..dc3b1eb3bf 100644
|
||||
--- a/util/trace-events
|
||||
+++ b/util/trace-events
|
||||
@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
|
||||
# async.c
|
||||
aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
|
||||
aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
|
||||
+reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
|
||||
|
||||
# thread-pool.c
|
||||
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,71 +0,0 @@
|
||||
From d754050d260e2ad890cecd975df6e163c531b40e Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 09/15] async: avoid use-after-free on re-entrancy guard
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [9/12] d357650e581c3921bbfe3e2fde5e3f55853b5fab (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
|
||||
commit 7915bd06f25e1803778081161bf6fa10c42dc7cd
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Mon May 1 10:19:56 2023 -0400
|
||||
|
||||
async: avoid use-after-free on re-entrancy guard
|
||||
|
||||
A BH callback can free the BH, causing a use-after-free in aio_bh_call.
|
||||
Fix that by keeping a local copy of the re-entrancy guard pointer.
|
||||
|
||||
Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513
|
||||
Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API")
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Message-Id: <20230501141956.3444868-1-alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
util/async.c | 14 ++++++++------
|
||||
1 file changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 1fff02e7fc..ffe0541c3b 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -146,18 +146,20 @@ void aio_bh_call(QEMUBH *bh)
|
||||
{
|
||||
bool last_engaged_in_io = false;
|
||||
|
||||
- if (bh->reentrancy_guard) {
|
||||
- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
|
||||
- if (bh->reentrancy_guard->engaged_in_io) {
|
||||
+ /* Make a copy of the guard-pointer as cb may free the bh */
|
||||
+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
|
||||
+ if (reentrancy_guard) {
|
||||
+ last_engaged_in_io = reentrancy_guard->engaged_in_io;
|
||||
+ if (reentrancy_guard->engaged_in_io) {
|
||||
trace_reentrant_aio(bh->ctx, bh->name);
|
||||
}
|
||||
- bh->reentrancy_guard->engaged_in_io = true;
|
||||
+ reentrancy_guard->engaged_in_io = true;
|
||||
}
|
||||
|
||||
bh->cb(bh->opaque);
|
||||
|
||||
- if (bh->reentrancy_guard) {
|
||||
- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
+ if (reentrancy_guard) {
|
||||
+ reentrancy_guard->engaged_in_io = last_engaged_in_io;
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,66 +0,0 @@
|
||||
From 187eb7a418af93375e42298d06e231e2bec3cf00 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:15:42 -0500
|
||||
Subject: [PATCH 10/13] async: clarify usage of barriers in the polling case
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2168472
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Commit: [10/10] 3be07ccc6137a0336becfe63a818d9cbadb38e9c
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
|
||||
|
||||
commit 6229438cca037d42f44a96d38feb15cb102a444f
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon Mar 6 10:43:52 2023 +0100
|
||||
|
||||
async: clarify usage of barriers in the polling case
|
||||
|
||||
Explain that aio_context_notifier_poll() relies on
|
||||
aio_notify_accept() to catch all the memory writes that were
|
||||
done before ctx->notified was set to true.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/async.c | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 795fe699b6..2a63bf90f2 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -463,8 +463,9 @@ void aio_notify_accept(AioContext *ctx)
|
||||
qatomic_set(&ctx->notified, false);
|
||||
|
||||
/*
|
||||
- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb
|
||||
- * in aio_notify.
|
||||
+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
|
||||
+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
|
||||
+ * with smp_wmb() in aio_notify.
|
||||
*/
|
||||
smp_mb();
|
||||
}
|
||||
@@ -487,6 +488,11 @@ static bool aio_context_notifier_poll(void *opaque)
|
||||
EventNotifier *e = opaque;
|
||||
AioContext *ctx = container_of(e, AioContext, notifier);
|
||||
|
||||
+ /*
|
||||
+ * No need for load-acquire because we just want to kick the
|
||||
+ * event loop. aio_notify_accept() takes care of synchronizing
|
||||
+ * the event loop with the producers.
|
||||
+ */
|
||||
return qatomic_read(&ctx->notified);
|
||||
}
|
||||
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,111 +0,0 @@
|
||||
From ea3856bb545d19499602830cdc3076d83a981e7a Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:15:36 -0500
|
||||
Subject: [PATCH 09/13] async: update documentation of the memory barriers
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2168472
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Commit: [9/10] d471da2acf7a107cf75f3327c5e8d7456307160e
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
|
||||
|
||||
commit 8dd48650b43dfde4ebea34191ac267e474bcc29e
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon Mar 6 10:15:06 2023 +0100
|
||||
|
||||
async: update documentation of the memory barriers
|
||||
|
||||
Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)",
|
||||
2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll()
|
||||
is happening when the bottom half is enqueued in the bh_list; not
|
||||
when the flags are set. Update the documentation to match.
|
||||
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/async.c | 33 +++++++++++++++++++--------------
|
||||
1 file changed, 19 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/util/async.c b/util/async.c
|
||||
index 6f6717a34b..795fe699b6 100644
|
||||
--- a/util/async.c
|
||||
+++ b/util/async.c
|
||||
@@ -71,14 +71,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
|
||||
unsigned old_flags;
|
||||
|
||||
/*
|
||||
- * The memory barrier implicit in qatomic_fetch_or makes sure that:
|
||||
- * 1. idle & any writes needed by the callback are done before the
|
||||
- * locations are read in the aio_bh_poll.
|
||||
- * 2. ctx is loaded before the callback has a chance to execute and bh
|
||||
- * could be freed.
|
||||
+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
|
||||
+ * insertion starts after BH_PENDING is set.
|
||||
*/
|
||||
old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
|
||||
+
|
||||
if (!(old_flags & BH_PENDING)) {
|
||||
+ /*
|
||||
+ * At this point the bottom half becomes visible to aio_bh_poll().
|
||||
+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
|
||||
+ * aio_bh_poll(), ensuring that:
|
||||
+ * 1. any writes needed by the callback are visible from the callback
|
||||
+ * after aio_bh_dequeue() returns bh.
|
||||
+ * 2. ctx is loaded before the callback has a chance to execute and bh
|
||||
+ * could be freed.
|
||||
+ */
|
||||
QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
|
||||
}
|
||||
|
||||
@@ -97,11 +104,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
|
||||
QSLIST_REMOVE_HEAD(head, next);
|
||||
|
||||
/*
|
||||
- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory
|
||||
- * barrier ensures that the callback sees all writes done by the scheduling
|
||||
- * thread. It also ensures that the scheduling thread sees the cleared
|
||||
- * flag before bh->cb has run, and thus will call aio_notify again if
|
||||
- * necessary.
|
||||
+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
|
||||
+ * the removal finishes before BH_PENDING is reset.
|
||||
*/
|
||||
*flags = qatomic_fetch_and(&bh->flags,
|
||||
~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
|
||||
@@ -148,6 +152,7 @@ int aio_bh_poll(AioContext *ctx)
|
||||
BHListSlice *s;
|
||||
int ret = 0;
|
||||
|
||||
+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
|
||||
QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
|
||||
QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
|
||||
|
||||
@@ -437,15 +442,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
|
||||
void aio_notify(AioContext *ctx)
|
||||
{
|
||||
/*
|
||||
- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in
|
||||
- * aio_notify_accept.
|
||||
+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
|
||||
+ * smp_mb() in aio_notify_accept().
|
||||
*/
|
||||
smp_wmb();
|
||||
qatomic_set(&ctx->notified, true);
|
||||
|
||||
/*
|
||||
- * Write ctx->notified before reading ctx->notify_me. Pairs
|
||||
- * with smp_mb in aio_ctx_prepare or aio_poll.
|
||||
+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
|
||||
+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
|
||||
*/
|
||||
smp_mb();
|
||||
if (qatomic_read(&ctx->notify_me)) {
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,58 +0,0 @@
|
||||
From 7715635d018351e0a5c4c25aec2c71a2fe3b9e69 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 06/15] bcm2835_property: disable reentrancy detection for
|
||||
iomem
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [6/12] 4d6187430ca1c4309a36824c0c6815d2a763db1a (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
|
||||
commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:11 2023 -0400
|
||||
|
||||
bcm2835_property: disable reentrancy detection for iomem
|
||||
|
||||
As the code is designed for re-entrant calls from bcm2835_property to
|
||||
bcm2835_mbox and back into bcm2835_property, mark iomem as
|
||||
reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-7-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/misc/bcm2835_property.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c
|
||||
index 73941bdae9..022b5a849c 100644
|
||||
--- a/hw/misc/bcm2835_property.c
|
||||
+++ b/hw/misc/bcm2835_property.c
|
||||
@@ -377,6 +377,13 @@ static void bcm2835_property_init(Object *obj)
|
||||
|
||||
memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s,
|
||||
TYPE_BCM2835_PROPERTY, 0x10);
|
||||
+
|
||||
+ /*
|
||||
+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from
|
||||
+ * iomem. As such, mark iomem as re-entracy safe.
|
||||
+ */
|
||||
+ s->iomem.disable_reentrancy_guard = true;
|
||||
+
|
||||
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
|
||||
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq);
|
||||
}
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,359 +0,0 @@
|
||||
From 1f7520baa6f0bf02ccba2ebfe7d1d5bf6520f95a Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Tue, 11 Apr 2023 19:34:16 +0200
|
||||
Subject: [PATCH 2/5] block: Collapse padded I/O vecs exceeding IOV_MAX
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 291: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2141964
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/5] 1d86ce8398e4ab66e308a686f9855c963e52b0a9
|
||||
|
||||
When processing vectored guest requests that are not aligned to the
|
||||
storage request alignment, we pad them by adding head and/or tail
|
||||
buffers for a read-modify-write cycle.
|
||||
|
||||
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
|
||||
with this padding, the vector can exceed that limit. As of
|
||||
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
|
||||
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
|
||||
limit, instead returning an error to the guest.
|
||||
|
||||
To the guest, this appears as a random I/O error. We should not return
|
||||
an I/O error to the guest when it issued a perfectly valid request.
|
||||
|
||||
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
|
||||
longer than IOV_MAX, which generally seems to work (because the guest
|
||||
assumes a smaller alignment than we really have, file-posix's
|
||||
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
|
||||
so emulate the request, so that the IOV_MAX does not matter). However,
|
||||
that does not seem exactly great.
|
||||
|
||||
I see two ways to fix this problem:
|
||||
1. We split such long requests into two requests.
|
||||
2. We join some elements of the vector into new buffers to make it
|
||||
shorter.
|
||||
|
||||
I am wary of (1), because it seems like it may have unintended side
|
||||
effects.
|
||||
|
||||
(2) on the other hand seems relatively simple to implement, with
|
||||
hopefully few side effects, so this patch does that.
|
||||
|
||||
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
|
||||
is effectively replaced by the new function bdrv_create_padded_qiov(),
|
||||
which not only wraps the request IOV with padding head/tail, but also
|
||||
ensures that the resulting vector will not have more than IOV_MAX
|
||||
elements. Putting that functionality into qemu_iovec_init_extended() is
|
||||
infeasible because it requires allocating a bounce buffer; doing so
|
||||
would require many more parameters (buffer alignment, how to initialize
|
||||
the buffer, and out parameters like the buffer, its length, and the
|
||||
original elements), which is not reasonable.
|
||||
|
||||
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
|
||||
functionality into bdrv_create_padded_qiov() by using public
|
||||
qemu_iovec_* functions, so that is what this patch does.
|
||||
|
||||
Because bdrv_pad_request() was the only "serious" user of
|
||||
qemu_iovec_init_extended(), the next patch will remove the latter
|
||||
function, so the functionality is not implemented twice.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a)
|
||||
|
||||
Conflicts:
|
||||
block/io.c: Downstream bdrv_pad_request() has no @flags
|
||||
parameter.
|
||||
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
|
||||
1 file changed, 151 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index c3e7301613..0fe8f0dd40 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1624,6 +1624,14 @@ out:
|
||||
* @merge_reads is true for small requests,
|
||||
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
|
||||
* head and tail exist but @buf_len == align and @tail_buf == @buf.
|
||||
+ *
|
||||
+ * @write is true for write requests, false for read requests.
|
||||
+ *
|
||||
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
|
||||
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
|
||||
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
|
||||
+ * I/O vector elements so for read requests, the data can be copied back after
|
||||
+ * the read is done.
|
||||
*/
|
||||
typedef struct BdrvRequestPadding {
|
||||
uint8_t *buf;
|
||||
@@ -1632,11 +1640,17 @@ typedef struct BdrvRequestPadding {
|
||||
size_t head;
|
||||
size_t tail;
|
||||
bool merge_reads;
|
||||
+ bool write;
|
||||
QEMUIOVector local_qiov;
|
||||
+
|
||||
+ uint8_t *collapse_bounce_buf;
|
||||
+ size_t collapse_len;
|
||||
+ QEMUIOVector pre_collapse_qiov;
|
||||
} BdrvRequestPadding;
|
||||
|
||||
static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
int64_t offset, int64_t bytes,
|
||||
+ bool write,
|
||||
BdrvRequestPadding *pad)
|
||||
{
|
||||
int64_t align = bs->bl.request_alignment;
|
||||
@@ -1668,6 +1682,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
|
||||
pad->tail_buf = pad->buf + pad->buf_len - align;
|
||||
}
|
||||
|
||||
+ pad->write = write;
|
||||
+
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1733,8 +1749,23 @@ zero_mem:
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
+/**
|
||||
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
|
||||
+ */
|
||||
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
|
||||
{
|
||||
+ if (pad->collapse_bounce_buf) {
|
||||
+ if (!pad->write) {
|
||||
+ /*
|
||||
+ * If padding required elements in the vector to be collapsed into a
|
||||
+ * bounce buffer, copy the bounce buffer content back
|
||||
+ */
|
||||
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+ qemu_vfree(pad->collapse_bounce_buf);
|
||||
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
|
||||
+ }
|
||||
if (pad->buf) {
|
||||
qemu_vfree(pad->buf);
|
||||
qemu_iovec_destroy(&pad->local_qiov);
|
||||
@@ -1742,6 +1773,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
memset(pad, 0, sizeof(*pad));
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
|
||||
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
|
||||
+ *
|
||||
+ * To ensure this, when necessary, the first two or three elements of @iov are
|
||||
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
|
||||
+ * bounce buffer in pad->local_qiov.
|
||||
+ *
|
||||
+ * After performing a read request, the data from the bounce buffer must be
|
||||
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
|
||||
+ */
|
||||
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
|
||||
+ BdrvRequestPadding *pad,
|
||||
+ struct iovec *iov, int niov,
|
||||
+ size_t iov_offset, size_t bytes)
|
||||
+{
|
||||
+ int padded_niov, surplus_count, collapse_count;
|
||||
+
|
||||
+ /* Assert this invariant */
|
||||
+ assert(niov <= IOV_MAX);
|
||||
+
|
||||
+ /*
|
||||
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
|
||||
+ * to the guest is not ideal, but there is little else we can do. At least
|
||||
+ * this will practically never happen on 64-bit systems.
|
||||
+ */
|
||||
+ if (SIZE_MAX - pad->head < bytes ||
|
||||
+ SIZE_MAX - pad->head - bytes < pad->tail)
|
||||
+ {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ /* Length of the resulting IOV if we just concatenated everything */
|
||||
+ padded_niov = !!pad->head + niov + !!pad->tail;
|
||||
+
|
||||
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
|
||||
+
|
||||
+ if (pad->head) {
|
||||
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
|
||||
+ * Instead, merge the first two or three elements of @iov to reduce the
|
||||
+ * number of vector elements as necessary.
|
||||
+ */
|
||||
+ if (padded_niov > IOV_MAX) {
|
||||
+ /*
|
||||
+ * Only head and tail can have lead to the number of entries exceeding
|
||||
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
|
||||
+ * to reduce the number of elements by `surplus_count`, so we merge that
|
||||
+ * many elements plus one into one element.
|
||||
+ */
|
||||
+ surplus_count = padded_niov - IOV_MAX;
|
||||
+ assert(surplus_count <= !!pad->head + !!pad->tail);
|
||||
+ collapse_count = surplus_count + 1;
|
||||
+
|
||||
+ /*
|
||||
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
|
||||
+ * advance `iov` (and associated variables) by those elements.
|
||||
+ */
|
||||
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
|
||||
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
|
||||
+ collapse_count, iov_offset, SIZE_MAX);
|
||||
+ iov += collapse_count;
|
||||
+ iov_offset = 0;
|
||||
+ niov -= collapse_count;
|
||||
+ bytes -= pad->pre_collapse_qiov.size;
|
||||
+
|
||||
+ /*
|
||||
+ * Construct the bounce buffer to match the length of the to-collapse
|
||||
+ * vector elements, and for write requests, initialize it with the data
|
||||
+ * from those elements. Then add it to `pad->local_qiov`.
|
||||
+ */
|
||||
+ pad->collapse_len = pad->pre_collapse_qiov.size;
|
||||
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
|
||||
+ if (pad->write) {
|
||||
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+ qemu_iovec_add(&pad->local_qiov,
|
||||
+ pad->collapse_bounce_buf, pad->collapse_len);
|
||||
+ }
|
||||
+
|
||||
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
|
||||
+
|
||||
+ if (pad->tail) {
|
||||
+ qemu_iovec_add(&pad->local_qiov,
|
||||
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
|
||||
+ }
|
||||
+
|
||||
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* bdrv_pad_request
|
||||
*
|
||||
@@ -1749,6 +1875,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
* read of padding, bdrv_padding_rmw_read() should be called separately if
|
||||
* needed.
|
||||
*
|
||||
+ * @write is true for write requests, false for read requests.
|
||||
+ *
|
||||
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
|
||||
* - on function start they represent original request
|
||||
* - on failure or when padding is not needed they are unchanged
|
||||
@@ -1757,25 +1885,33 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
|
||||
static int bdrv_pad_request(BlockDriverState *bs,
|
||||
QEMUIOVector **qiov, size_t *qiov_offset,
|
||||
int64_t *offset, int64_t *bytes,
|
||||
+ bool write,
|
||||
BdrvRequestPadding *pad, bool *padded)
|
||||
{
|
||||
int ret;
|
||||
+ struct iovec *sliced_iov;
|
||||
+ int sliced_niov;
|
||||
+ size_t sliced_head, sliced_tail;
|
||||
|
||||
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
|
||||
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
|
||||
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
*padded = false;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
|
||||
- *qiov, *qiov_offset, *bytes,
|
||||
- pad->buf + pad->buf_len - pad->tail,
|
||||
- pad->tail);
|
||||
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
|
||||
+ &sliced_head, &sliced_tail,
|
||||
+ &sliced_niov);
|
||||
+
|
||||
+ /* Guaranteed by bdrv_check_qiov_request() */
|
||||
+ assert(*bytes <= SIZE_MAX);
|
||||
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
+ sliced_head, *bytes);
|
||||
if (ret < 0) {
|
||||
- bdrv_padding_destroy(pad);
|
||||
+ bdrv_padding_finalize(pad);
|
||||
return ret;
|
||||
}
|
||||
*bytes += pad->head + pad->tail;
|
||||
@@ -1836,8 +1972,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
flags |= BDRV_REQ_COPY_ON_READ;
|
||||
}
|
||||
|
||||
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
- NULL);
|
||||
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
|
||||
+ &pad, NULL);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -1847,7 +1983,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
|
||||
bs->bl.request_alignment,
|
||||
qiov, qiov_offset, flags);
|
||||
tracked_request_end(&req);
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
fail:
|
||||
bdrv_dec_in_flight(bs);
|
||||
@@ -2167,7 +2303,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
|
||||
bool padding;
|
||||
BdrvRequestPadding pad;
|
||||
|
||||
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
|
||||
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
|
||||
if (padding) {
|
||||
bdrv_make_request_serialising(req, align);
|
||||
|
||||
@@ -2214,7 +2350,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
|
||||
}
|
||||
|
||||
out:
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2280,8 +2416,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
|
||||
* alignment only if there is no ZERO flag.
|
||||
*/
|
||||
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
|
||||
- &padded);
|
||||
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
|
||||
+ &pad, &padded);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
@@ -2310,7 +2446,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
|
||||
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
|
||||
qiov, qiov_offset, flags);
|
||||
|
||||
- bdrv_padding_destroy(&pad);
|
||||
+ bdrv_padding_finalize(&pad);
|
||||
|
||||
out:
|
||||
tracked_request_end(&req);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,75 +0,0 @@
|
||||
From b9866279996ee065cb524bf30bc70e22efbab303 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Fri, 14 Jul 2023 10:59:38 +0200
|
||||
Subject: [PATCH 5/5] block: Fix pad_request's request restriction
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 291: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2141964
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [5/5] f9188bd089d6c67185ea1accde20d491a2ed3193
|
||||
|
||||
bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
|
||||
which bdrv_check_qiov_request() does not guarantee.
|
||||
|
||||
bdrv_check_request32() however will guarantee this, and both of
|
||||
bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
|
||||
bdrv_co_pwritev_part()) already run it before calling
|
||||
bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call
|
||||
bdrv_check_request32() without expecting error, too.
|
||||
|
||||
In effect, this patch will not change guest-visible behavior. It is a
|
||||
clean-up to tighten a condition to match what is guaranteed by our
|
||||
callers, and which exists purely to show clearly why the subsequent
|
||||
assertion (`assert(*bytes <= SIZE_MAX)`) is always true.
|
||||
|
||||
Note there is a difference between the interfaces of
|
||||
bdrv_check_qiov_request() and bdrv_check_request32(): The former takes
|
||||
an errp, the latter does not, so we can no longer just pass
|
||||
&error_abort. Instead, we need to check the returned value. While we
|
||||
do expect success (because the callers have already run this function),
|
||||
an assert(ret == 0) is not much simpler than just to return an error if
|
||||
it occurs, so let us handle errors by returning them up the stack now.
|
||||
|
||||
Reported-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-id: 20230714085938.202730-1-hreitz@redhat.com
|
||||
Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a
|
||||
("block: Collapse padded I/O vecs exceeding IOV_MAX")
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/io.c | 8 ++++++--
|
||||
1 file changed, 6 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 0fe8f0dd40..8ae57728a6 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -1893,7 +1893,11 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
int sliced_niov;
|
||||
size_t sliced_head, sliced_tail;
|
||||
|
||||
- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
|
||||
+ /* Should have been checked by the caller already */
|
||||
+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
+ }
|
||||
|
||||
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
|
||||
if (padded) {
|
||||
@@ -1906,7 +1910,7 @@ static int bdrv_pad_request(BlockDriverState *bs,
|
||||
&sliced_head, &sliced_tail,
|
||||
&sliced_niov);
|
||||
|
||||
- /* Guaranteed by bdrv_check_qiov_request() */
|
||||
+ /* Guaranteed by bdrv_check_request32() */
|
||||
assert(*bytes <= SIZE_MAX);
|
||||
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
|
||||
sliced_head, *bytes);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,260 +0,0 @@
|
||||
From c4ba1f1755031a0ac2f600ed8c17e7dcb6b2b857 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Wed, 5 Jun 2024 19:56:51 -0400
|
||||
Subject: [PATCH 5/5] block: Parse filenames only when explicitly requested
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 5: EMBARGOED CVE-2024-4467 for rhel-8.10.z (PRDSC)
|
||||
RH-Jira: RHEL-35616
|
||||
RH-CVE: CVE-2024-4467
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [5/5] a3e197add64fc6950c4ac576e34d833dfae7ee34
|
||||
|
||||
Conflicts: - brdv_open_child_common(): bdrv_graph_wrlock/unlock()
|
||||
don't exist in this code version. We ignore them.
|
||||
bdrv_open_inherit(): no_coroutine_fn/GRAPH_UNLOCKED
|
||||
doesn't exist. We ignore it.
|
||||
- Changes to bdrv_open_file_child() didn't apply cleanly,
|
||||
but fixing it is straight-forward.
|
||||
- GLOBAL_STATE_CODE() not present in this code. Ignoring it.
|
||||
- bdrv_open_file_child(): Need to continue setting of
|
||||
parent->file.
|
||||
|
||||
commit f44c2941d4419e60f16dea3e9adca164e75aa78d
|
||||
Author: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu Apr 25 14:56:02 2024 +0200
|
||||
|
||||
block: Parse filenames only when explicitly requested
|
||||
|
||||
When handling image filenames from legacy options such as -drive or from
|
||||
tools, these filenames are parsed for protocol prefixes, including for
|
||||
the json:{} pseudo-protocol.
|
||||
|
||||
This behaviour is intended for filenames that come directly from the
|
||||
command line and for backing files, which may come from the image file
|
||||
itself. Higher level management tools generally take care to verify that
|
||||
untrusted images don't contain a bad (or any) backing file reference;
|
||||
'qemu-img info' is a suitable tool for this.
|
||||
|
||||
However, for other files that can be referenced in images, such as
|
||||
qcow2 data files or VMDK extents, the string from the image file is
|
||||
usually not verified by management tools - and 'qemu-img info' wouldn't
|
||||
be suitable because in contrast to backing files, it already opens these
|
||||
other referenced files. So here the string should be interpreted as a
|
||||
literal local filename. More complex configurations need to be specified
|
||||
explicitly on the command line or in QMP.
|
||||
|
||||
This patch changes bdrv_open_inherit() so that it only parses filenames
|
||||
if a new parameter parse_filename is true. It is set for the top level
|
||||
in bdrv_open(), for the file child and for the backing file child. All
|
||||
other callers pass false and disable filename parsing this way.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Upstream: N/A, embargoed
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
block.c | 81 +++++++++++++++++++++++++++++++++++++++------------------
|
||||
1 file changed, 56 insertions(+), 25 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 889f878565..ddebf50efa 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -82,6 +82,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
|
||||
BlockDriverState *parent,
|
||||
const BdrvChildClass *child_class,
|
||||
BdrvChildRole child_role,
|
||||
+ bool parse_filename,
|
||||
Error **errp);
|
||||
|
||||
static bool bdrv_recurse_has_child(BlockDriverState *bs,
|
||||
@@ -1926,7 +1927,8 @@ static void parse_json_protocol(QDict *options, const char **pfilename,
|
||||
* block driver has been specified explicitly.
|
||||
*/
|
||||
static int bdrv_fill_options(QDict **options, const char *filename,
|
||||
- int *flags, Error **errp)
|
||||
+ int *flags, bool allow_parse_filename,
|
||||
+ Error **errp)
|
||||
{
|
||||
const char *drvname;
|
||||
bool protocol = *flags & BDRV_O_PROTOCOL;
|
||||
@@ -1966,7 +1968,7 @@ static int bdrv_fill_options(QDict **options, const char *filename,
|
||||
if (protocol && filename) {
|
||||
if (!qdict_haskey(*options, "filename")) {
|
||||
qdict_put_str(*options, "filename", filename);
|
||||
- parse_filename = true;
|
||||
+ parse_filename = allow_parse_filename;
|
||||
} else {
|
||||
error_setg(errp, "Can't specify 'file' and 'filename' options at "
|
||||
"the same time");
|
||||
@@ -3439,7 +3441,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
|
||||
}
|
||||
|
||||
backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs,
|
||||
- &child_of_bds, bdrv_backing_role(bs), errp);
|
||||
+ &child_of_bds, bdrv_backing_role(bs), true,
|
||||
+ errp);
|
||||
if (!backing_hd) {
|
||||
bs->open_flags |= BDRV_O_NO_BACKING;
|
||||
error_prepend(errp, "Could not open backing file: ");
|
||||
@@ -3472,7 +3475,8 @@ free_exit:
|
||||
static BlockDriverState *
|
||||
bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
|
||||
BlockDriverState *parent, const BdrvChildClass *child_class,
|
||||
- BdrvChildRole child_role, bool allow_none, Error **errp)
|
||||
+ BdrvChildRole child_role, bool allow_none,
|
||||
+ bool parse_filename, Error **errp)
|
||||
{
|
||||
BlockDriverState *bs = NULL;
|
||||
QDict *image_options;
|
||||
@@ -3503,7 +3507,8 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
|
||||
}
|
||||
|
||||
bs = bdrv_open_inherit(filename, reference, image_options, 0,
|
||||
- parent, child_class, child_role, errp);
|
||||
+ parent, child_class, child_role, parse_filename,
|
||||
+ errp);
|
||||
if (!bs) {
|
||||
goto done;
|
||||
}
|
||||
@@ -3513,6 +3518,29 @@ done:
|
||||
return bs;
|
||||
}
|
||||
|
||||
+static BdrvChild *bdrv_open_child_common(const char *filename,
|
||||
+ QDict *options, const char *bdref_key,
|
||||
+ BlockDriverState *parent,
|
||||
+ const BdrvChildClass *child_class,
|
||||
+ BdrvChildRole child_role,
|
||||
+ bool allow_none, bool parse_filename,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ BlockDriverState *bs;
|
||||
+ BdrvChild *child;
|
||||
+
|
||||
+ bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
|
||||
+ child_role, allow_none, parse_filename, errp);
|
||||
+ if (bs == NULL) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
+ child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
|
||||
+ errp);
|
||||
+
|
||||
+ return child;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Opens a disk image whose options are given as BlockdevRef in another block
|
||||
* device's options.
|
||||
@@ -3534,20 +3562,17 @@ BdrvChild *bdrv_open_child(const char *filename,
|
||||
BdrvChildRole child_role,
|
||||
bool allow_none, Error **errp)
|
||||
{
|
||||
- BlockDriverState *bs;
|
||||
-
|
||||
- bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
|
||||
- child_role, allow_none, errp);
|
||||
- if (bs == NULL) {
|
||||
- return NULL;
|
||||
- }
|
||||
-
|
||||
- return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
|
||||
- errp);
|
||||
+ return bdrv_open_child_common(filename, options, bdref_key, parent,
|
||||
+ child_class, child_role, allow_none, false,
|
||||
+ errp);
|
||||
}
|
||||
|
||||
/*
|
||||
- * Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
|
||||
+ * This does mostly the same as bdrv_open_child(), but for opening the primary
|
||||
+ * child of a node. A notable difference from bdrv_open_child() is that it
|
||||
+ * enables filename parsing for protocol names (including json:).
|
||||
+ *
|
||||
+ * @parent can move to a different AioContext in this function.
|
||||
*/
|
||||
int bdrv_open_file_child(const char *filename,
|
||||
QDict *options, const char *bdref_key,
|
||||
@@ -3558,8 +3583,9 @@ int bdrv_open_file_child(const char *filename,
|
||||
role = parent->drv->is_filter ?
|
||||
(BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE;
|
||||
|
||||
- parent->file = bdrv_open_child(filename, options, bdref_key, parent,
|
||||
- &child_of_bds, role, false, errp);
|
||||
+ parent->file = bdrv_open_child_common(filename, options, bdref_key, parent,
|
||||
+ &child_of_bds, role, false, true,
|
||||
+ errp);
|
||||
|
||||
return parent->file ? 0 : -EINVAL;
|
||||
}
|
||||
@@ -3599,7 +3625,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
|
||||
|
||||
}
|
||||
|
||||
- bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp);
|
||||
+ bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, false,
|
||||
+ errp);
|
||||
obj = NULL;
|
||||
qobject_unref(obj);
|
||||
visit_free(v);
|
||||
@@ -3690,6 +3717,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
|
||||
BlockDriverState *parent,
|
||||
const BdrvChildClass *child_class,
|
||||
BdrvChildRole child_role,
|
||||
+ bool parse_filename,
|
||||
Error **errp)
|
||||
{
|
||||
int ret;
|
||||
@@ -3733,9 +3761,11 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
|
||||
}
|
||||
|
||||
/* json: syntax counts as explicit options, as if in the QDict */
|
||||
- parse_json_protocol(options, &filename, &local_err);
|
||||
- if (local_err) {
|
||||
- goto fail;
|
||||
+ if (parse_filename) {
|
||||
+ parse_json_protocol(options, &filename, &local_err);
|
||||
+ if (local_err) {
|
||||
+ goto fail;
|
||||
+ }
|
||||
}
|
||||
|
||||
bs->explicit_options = qdict_clone_shallow(options);
|
||||
@@ -3760,7 +3790,8 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
|
||||
parent->open_flags, parent->options);
|
||||
}
|
||||
|
||||
- ret = bdrv_fill_options(&options, filename, &flags, &local_err);
|
||||
+ ret = bdrv_fill_options(&options, filename, &flags, parse_filename,
|
||||
+ &local_err);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -3829,7 +3860,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
|
||||
|
||||
file_bs = bdrv_open_child_bs(filename, options, "file", bs,
|
||||
&child_of_bds, BDRV_CHILD_IMAGE,
|
||||
- true, &local_err);
|
||||
+ true, true, &local_err);
|
||||
if (local_err) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -3974,7 +4005,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
|
||||
QDict *options, int flags, Error **errp)
|
||||
{
|
||||
return bdrv_open_inherit(filename, reference, options, flags, NULL,
|
||||
- NULL, 0, errp);
|
||||
+ NULL, 0, true, errp);
|
||||
}
|
||||
|
||||
/* Return true if the NULL-terminated @list contains @str */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,566 +0,0 @@
|
||||
From 996680dd6d5afd51918e600126dbfed4dfe89e05 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Sun, 9 Jun 2024 23:08:39 -0400
|
||||
Subject: [PATCH 4/5] block: introduce bdrv_open_file_child() helper
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 5: EMBARGOED CVE-2024-4467 for rhel-8.10.z (PRDSC)
|
||||
RH-Jira: RHEL-35616
|
||||
RH-CVE: CVE-2024-4467
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [4/5] 9f582a9aff740eb9ec6f64bfec94854038d8545f
|
||||
|
||||
Conflicts: - copy-before-write.c::cbw_copy() is an older version than
|
||||
upstream, but introduction of the new function is
|
||||
straight-forward.
|
||||
- include/block/block-global-state.h doesn't exist in this
|
||||
code version. Adding the prototype to
|
||||
include/block/block.h instead.
|
||||
- struct BlockDriver has no field 'filtered_child_is_backing'
|
||||
We remove the corresponding assert() in the new function.
|
||||
|
||||
commit 83930780325b144a5908c45b3957b9b6457b3831
|
||||
Author: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Date: Tue Jul 26 23:11:21 2022 +0300
|
||||
|
||||
block: introduce bdrv_open_file_child() helper
|
||||
|
||||
Almost all drivers call bdrv_open_child() similarly. Let's create a
|
||||
helper for this.
|
||||
|
||||
The only not updated drivers that call bdrv_open_child() to set
|
||||
bs->file are raw-format and snapshot-access:
|
||||
raw-format sometimes want to have filtered child but
|
||||
don't set drv->is_filter to true.
|
||||
snapshot-access wants only DATA | PRIMARY
|
||||
|
||||
Possibly we should implement drv->is_filter_func() handler, to consider
|
||||
raw-format as filter when it works as filter.. But it's another story.
|
||||
|
||||
Note also, that we decrease assignments to bs->file in code: it helps
|
||||
us restrict modifying this field in further commit.
|
||||
|
||||
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20220726201134.924743-3-vsementsov@yandex-team.ru>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
block.c | 18 ++++++++++++++++++
|
||||
block/blkdebug.c | 9 +++------
|
||||
block/blklogwrites.c | 7 ++-----
|
||||
block/blkreplay.c | 7 ++-----
|
||||
block/blkverify.c | 9 +++------
|
||||
block/bochs.c | 7 +++----
|
||||
block/cloop.c | 7 +++----
|
||||
block/copy-before-write.c | 9 ++++-----
|
||||
block/copy-on-read.c | 9 ++++-----
|
||||
block/crypto.c | 11 ++++++-----
|
||||
block/dmg.c | 7 +++----
|
||||
block/filter-compress.c | 8 +++-----
|
||||
block/parallels.c | 7 +++----
|
||||
block/preallocate.c | 9 ++++-----
|
||||
block/qcow.c | 6 ++----
|
||||
block/qcow2.c | 8 ++++----
|
||||
block/qed.c | 8 ++++----
|
||||
block/replication.c | 8 +++-----
|
||||
block/throttle.c | 8 +++-----
|
||||
block/vdi.c | 7 +++----
|
||||
block/vhdx.c | 7 +++----
|
||||
block/vmdk.c | 7 +++----
|
||||
block/vpc.c | 7 +++----
|
||||
include/block/block.h | 3 +++
|
||||
24 files changed, 92 insertions(+), 101 deletions(-)
|
||||
|
||||
diff --git a/block.c b/block.c
|
||||
index 0ac5b163d2..889f878565 100644
|
||||
--- a/block.c
|
||||
+++ b/block.c
|
||||
@@ -3546,6 +3546,24 @@ BdrvChild *bdrv_open_child(const char *filename,
|
||||
errp);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
|
||||
+ */
|
||||
+int bdrv_open_file_child(const char *filename,
|
||||
+ QDict *options, const char *bdref_key,
|
||||
+ BlockDriverState *parent, Error **errp)
|
||||
+{
|
||||
+ BdrvChildRole role;
|
||||
+
|
||||
+ role = parent->drv->is_filter ?
|
||||
+ (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE;
|
||||
+
|
||||
+ parent->file = bdrv_open_child(filename, options, bdref_key, parent,
|
||||
+ &child_of_bds, role, false, errp);
|
||||
+
|
||||
+ return parent->file ? 0 : -EINVAL;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* TODO Future callers may need to specify parent/child_class in order for
|
||||
* option inheritance to work. Existing callers use it for the root node.
|
||||
diff --git a/block/blkdebug.c b/block/blkdebug.c
|
||||
index bbf2948703..5fcfc8ac6f 100644
|
||||
--- a/block/blkdebug.c
|
||||
+++ b/block/blkdebug.c
|
||||
@@ -503,12 +503,9 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
|
||||
/* Open the image file */
|
||||
- bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
|
||||
- bs, &child_of_bds,
|
||||
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
||||
- false, errp);
|
||||
- if (!bs->file) {
|
||||
- ret = -EINVAL;
|
||||
+ ret = bdrv_open_file_child(qemu_opt_get(opts, "x-image"), options, "image",
|
||||
+ bs, errp);
|
||||
+ if (ret < 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
|
||||
index f7a251e91f..f66a617eb3 100644
|
||||
--- a/block/blklogwrites.c
|
||||
+++ b/block/blklogwrites.c
|
||||
@@ -155,11 +155,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
|
||||
/* Open the file */
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, false,
|
||||
- errp);
|
||||
- if (!bs->file) {
|
||||
- ret = -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
diff --git a/block/blkreplay.c b/block/blkreplay.c
|
||||
index dcbe780ddb..76a0b8d12a 100644
|
||||
--- a/block/blkreplay.c
|
||||
+++ b/block/blkreplay.c
|
||||
@@ -26,11 +26,8 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
int ret;
|
||||
|
||||
/* Open the image file */
|
||||
- bs->file = bdrv_open_child(NULL, options, "image", bs, &child_of_bds,
|
||||
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
||||
- false, errp);
|
||||
- if (!bs->file) {
|
||||
- ret = -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "image", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
diff --git a/block/blkverify.c b/block/blkverify.c
|
||||
index d1facf5ba9..920e891684 100644
|
||||
--- a/block/blkverify.c
|
||||
+++ b/block/blkverify.c
|
||||
@@ -121,12 +121,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
|
||||
/* Open the raw file */
|
||||
- bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw",
|
||||
- bs, &child_of_bds,
|
||||
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
||||
- false, errp);
|
||||
- if (!bs->file) {
|
||||
- ret = -EINVAL;
|
||||
+ ret = bdrv_open_file_child(qemu_opt_get(opts, "x-raw"), options, "raw",
|
||||
+ bs, errp);
|
||||
+ if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
diff --git a/block/bochs.c b/block/bochs.c
|
||||
index 4d68658087..b2dc06bbfd 100644
|
||||
--- a/block/bochs.c
|
||||
+++ b/block/bochs.c
|
||||
@@ -110,10 +110,9 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
|
||||
diff --git a/block/cloop.c b/block/cloop.c
|
||||
index b8c6d0eccd..bee87da173 100644
|
||||
--- a/block/cloop.c
|
||||
+++ b/block/cloop.c
|
||||
@@ -71,10 +71,9 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
/* read header */
|
||||
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
|
||||
index c30a5ff8de..8aa2cb6a85 100644
|
||||
--- a/block/copy-before-write.c
|
||||
+++ b/block/copy-before-write.c
|
||||
@@ -150,12 +150,11 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
{
|
||||
BDRVCopyBeforeWriteState *s = bs->opaque;
|
||||
BdrvDirtyBitmap *copy_bitmap;
|
||||
+ int ret;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
||||
- false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds,
|
||||
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
|
||||
index 1fc7fb3333..815ac1d835 100644
|
||||
--- a/block/copy-on-read.c
|
||||
+++ b/block/copy-on-read.c
|
||||
@@ -41,12 +41,11 @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
BDRVStateCOR *state = bs->opaque;
|
||||
/* Find a bottom node name, if any */
|
||||
const char *bottom_node = qdict_get_try_str(options, "bottom");
|
||||
+ int ret;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
||||
- false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
bs->supported_read_flags = BDRV_REQ_PREFETCH;
|
||||
diff --git a/block/crypto.c b/block/crypto.c
|
||||
index c8ba4681e2..abfce39230 100644
|
||||
--- a/block/crypto.c
|
||||
+++ b/block/crypto.c
|
||||
@@ -260,15 +260,14 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
|
||||
{
|
||||
BlockCrypto *crypto = bs->opaque;
|
||||
QemuOpts *opts = NULL;
|
||||
- int ret = -EINVAL;
|
||||
+ int ret;
|
||||
QCryptoBlockOpenOptions *open_opts = NULL;
|
||||
unsigned int cflags = 0;
|
||||
QDict *cryptoopts = NULL;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
bs->supported_write_flags = BDRV_REQ_FUA &
|
||||
@@ -276,6 +275,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
|
||||
|
||||
opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
|
||||
if (!qemu_opts_absorb_qdict(opts, options, errp)) {
|
||||
+ ret = -EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@@ -284,6 +284,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
|
||||
|
||||
open_opts = block_crypto_open_opts_init(cryptoopts, errp);
|
||||
if (!open_opts) {
|
||||
+ ret = -EINVAL;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
diff --git a/block/dmg.c b/block/dmg.c
|
||||
index 447901fbb8..38c363dd39 100644
|
||||
--- a/block/dmg.c
|
||||
+++ b/block/dmg.c
|
||||
@@ -439,10 +439,9 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
block_module_load_one("dmg-bz2");
|
||||
diff --git a/block/filter-compress.c b/block/filter-compress.c
|
||||
index d5be538619..305716c86c 100644
|
||||
--- a/block/filter-compress.c
|
||||
+++ b/block/filter-compress.c
|
||||
@@ -30,11 +30,9 @@
|
||||
static int compress_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
Error **errp)
|
||||
{
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
||||
- false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ int ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
if (!bs->file->bs->drv || !block_driver_can_compress(bs->file->bs->drv)) {
|
||||
diff --git a/block/parallels.c b/block/parallels.c
|
||||
index 6ebad2a2bb..ed4debd899 100644
|
||||
--- a/block/parallels.c
|
||||
+++ b/block/parallels.c
|
||||
@@ -735,10 +735,9 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
Error *local_err = NULL;
|
||||
char *buf;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
|
||||
diff --git a/block/preallocate.c b/block/preallocate.c
|
||||
index 1d4233f730..332408bdc9 100644
|
||||
--- a/block/preallocate.c
|
||||
+++ b/block/preallocate.c
|
||||
@@ -134,6 +134,7 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
Error **errp)
|
||||
{
|
||||
BDRVPreallocateState *s = bs->opaque;
|
||||
+ int ret;
|
||||
|
||||
/*
|
||||
* s->data_end and friends should be initialized on permission update.
|
||||
@@ -141,11 +142,9 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
*/
|
||||
s->file_end = s->zero_start = s->data_end = -EINVAL;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
||||
- false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
if (!preallocate_absorb_opts(&s->opts, options, bs->file->bs, errp)) {
|
||||
diff --git a/block/qcow.c b/block/qcow.c
|
||||
index c39940f33e..544a17261f 100644
|
||||
--- a/block/qcow.c
|
||||
+++ b/block/qcow.c
|
||||
@@ -120,10 +120,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
qdict_extract_subqdict(options, &encryptopts, "encrypt.");
|
||||
encryptfmt = qdict_get_try_str(encryptopts, "format");
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- ret = -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
diff --git a/block/qcow2.c b/block/qcow2.c
|
||||
index 6ee1919612..29ea157e6b 100644
|
||||
--- a/block/qcow2.c
|
||||
+++ b/block/qcow2.c
|
||||
@@ -1907,11 +1907,11 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
.errp = errp,
|
||||
.ret = -EINPROGRESS
|
||||
};
|
||||
+ int ret;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
/* Initialise locks */
|
||||
diff --git a/block/qed.c b/block/qed.c
|
||||
index 558d3646c4..e3b06a3d00 100644
|
||||
--- a/block/qed.c
|
||||
+++ b/block/qed.c
|
||||
@@ -558,11 +558,11 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
.errp = errp,
|
||||
.ret = -EINPROGRESS
|
||||
};
|
||||
+ int ret;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
bdrv_qed_init_state(bs);
|
||||
diff --git a/block/replication.c b/block/replication.c
|
||||
index 55c8f894aa..2f17397764 100644
|
||||
--- a/block/replication.c
|
||||
+++ b/block/replication.c
|
||||
@@ -88,11 +88,9 @@ static int replication_open(BlockDriverState *bs, QDict *options,
|
||||
const char *mode;
|
||||
const char *top_id;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
||||
- false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
ret = -EINVAL;
|
||||
diff --git a/block/throttle.c b/block/throttle.c
|
||||
index 6e8d52fa24..4fb5798c27 100644
|
||||
--- a/block/throttle.c
|
||||
+++ b/block/throttle.c
|
||||
@@ -78,11 +78,9 @@ static int throttle_open(BlockDriverState *bs, QDict *options,
|
||||
char *group;
|
||||
int ret;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
||||
- false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
bs->supported_write_flags = bs->file->bs->supported_write_flags |
|
||||
BDRV_REQ_WRITE_UNCHANGED;
|
||||
diff --git a/block/vdi.c b/block/vdi.c
|
||||
index bdc58d726e..c50c0ed61f 100644
|
||||
--- a/block/vdi.c
|
||||
+++ b/block/vdi.c
|
||||
@@ -376,10 +376,9 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
int ret;
|
||||
QemuUUID uuid_link, uuid_parent;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
logout("\n");
|
||||
diff --git a/block/vhdx.c b/block/vhdx.c
|
||||
index 356ec4c455..e7d6d7509a 100644
|
||||
--- a/block/vhdx.c
|
||||
+++ b/block/vhdx.c
|
||||
@@ -996,10 +996,9 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
uint64_t signature;
|
||||
Error *local_err = NULL;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
s->bat = NULL;
|
||||
diff --git a/block/vmdk.c b/block/vmdk.c
|
||||
index 0dfab6e941..7d7e56b36c 100644
|
||||
--- a/block/vmdk.c
|
||||
+++ b/block/vmdk.c
|
||||
@@ -1262,10 +1262,9 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
BDRVVmdkState *s = bs->opaque;
|
||||
uint32_t magic;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
buf = vmdk_read_desc(bs->file, 0, errp);
|
||||
diff --git a/block/vpc.c b/block/vpc.c
|
||||
index 297a26262a..430cab1cbb 100644
|
||||
--- a/block/vpc.c
|
||||
+++ b/block/vpc.c
|
||||
@@ -232,10 +232,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
int ret;
|
||||
int64_t bs_size;
|
||||
|
||||
- bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_IMAGE, false, errp);
|
||||
- if (!bs->file) {
|
||||
- return -EINVAL;
|
||||
+ ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
|
||||
+ if (ret < 0) {
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
|
||||
diff --git a/include/block/block.h b/include/block/block.h
|
||||
index e5dd22b034..f885f113ef 100644
|
||||
--- a/include/block/block.h
|
||||
+++ b/include/block/block.h
|
||||
@@ -376,6 +376,9 @@ BdrvChild *bdrv_open_child(const char *filename,
|
||||
const BdrvChildClass *child_class,
|
||||
BdrvChildRole child_role,
|
||||
bool allow_none, Error **errp);
|
||||
+int bdrv_open_file_child(const char *filename,
|
||||
+ QDict *options, const char *bdref_key,
|
||||
+ BlockDriverState *parent, Error **errp);
|
||||
BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
|
||||
int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
|
||||
Error **errp);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,153 +0,0 @@
|
||||
From 192f956f2b0761f270070555f8feb1f0544e5558 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Wed, 9 Nov 2022 17:54:48 +0100
|
||||
Subject: [PATCH 01/11] block/mirror: Do not wait for active writes
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load
|
||||
RH-Bugzilla: 2125119
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/3] 652d1e55b954f13eaec2c86f58735d4942837e16
|
||||
|
||||
Waiting for all active writes to settle before daring to create a
|
||||
background copying operation means that we will never do background
|
||||
operations while the guest does anything (in write-blocking mode), and
|
||||
therefore cannot converge. Yes, we also will not diverge, but actually
|
||||
converging would be even nicer.
|
||||
|
||||
It is unclear why we did decide to wait for all active writes to settle
|
||||
before creating a background operation, but it just does not seem
|
||||
necessary. Active writes will put themselves into the in_flight bitmap
|
||||
and thus properly block actually conflicting background requests.
|
||||
|
||||
It is important for active requests to wait on overlapping background
|
||||
requests, which we do in active_write_prepare(). However, so far it was
|
||||
not documented why it is important. Add such documentation now, and
|
||||
also to the other call of mirror_wait_on_conflicts(), so that it becomes
|
||||
more clear why and when requests need to actively wait for other
|
||||
requests to settle.
|
||||
|
||||
Another thing to note is that of course we need to ensure that there are
|
||||
no active requests when the job completes, but that is done by virtue of
|
||||
the BDS being drained anyway, so there cannot be any active requests at
|
||||
that point.
|
||||
|
||||
With this change, we will need to explicitly keep track of how many
|
||||
bytes are in flight in active requests so that
|
||||
job_progress_set_remaining() in mirror_run() can set the correct number
|
||||
of remaining bytes.
|
||||
|
||||
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2123297
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221109165452.67927-2-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit d69a879bdf1aed586478eaa161ee064fe1b92f1a)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/mirror.c | 37 ++++++++++++++++++++++++++++++-------
|
||||
1 file changed, 30 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/block/mirror.c b/block/mirror.c
|
||||
index efec2c7674..282f428cb7 100644
|
||||
--- a/block/mirror.c
|
||||
+++ b/block/mirror.c
|
||||
@@ -81,6 +81,7 @@ typedef struct MirrorBlockJob {
|
||||
int max_iov;
|
||||
bool initial_zeroing_ongoing;
|
||||
int in_active_write_counter;
|
||||
+ int64_t active_write_bytes_in_flight;
|
||||
bool prepared;
|
||||
bool in_drain;
|
||||
} MirrorBlockJob;
|
||||
@@ -493,6 +494,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
|
||||
}
|
||||
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
|
||||
|
||||
+ /*
|
||||
+ * Wait for concurrent requests to @offset. The next loop will limit the
|
||||
+ * copied area based on in_flight_bitmap so we only copy an area that does
|
||||
+ * not overlap with concurrent in-flight requests. Still, we would like to
|
||||
+ * copy something, so wait until there are at least no more requests to the
|
||||
+ * very beginning of the area.
|
||||
+ */
|
||||
mirror_wait_on_conflicts(NULL, s, offset, 1);
|
||||
|
||||
job_pause_point(&s->common.job);
|
||||
@@ -993,12 +1001,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
|
||||
int64_t cnt, delta;
|
||||
bool should_complete;
|
||||
|
||||
- /* Do not start passive operations while there are active
|
||||
- * writes in progress */
|
||||
- while (s->in_active_write_counter) {
|
||||
- mirror_wait_for_any_operation(s, true);
|
||||
- }
|
||||
-
|
||||
if (s->ret < 0) {
|
||||
ret = s->ret;
|
||||
goto immediate_exit;
|
||||
@@ -1015,7 +1017,9 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
|
||||
/* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
|
||||
* the number of bytes currently being processed; together those are
|
||||
* the current remaining operation length */
|
||||
- job_progress_set_remaining(&s->common.job, s->bytes_in_flight + cnt);
|
||||
+ job_progress_set_remaining(&s->common.job,
|
||||
+ s->bytes_in_flight + cnt +
|
||||
+ s->active_write_bytes_in_flight);
|
||||
|
||||
/* Note that even when no rate limit is applied we need to yield
|
||||
* periodically with no pending I/O so that bdrv_drain_all() returns.
|
||||
@@ -1073,6 +1077,10 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
|
||||
|
||||
s->in_drain = true;
|
||||
bdrv_drained_begin(bs);
|
||||
+
|
||||
+ /* Must be zero because we are drained */
|
||||
+ assert(s->in_active_write_counter == 0);
|
||||
+
|
||||
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
|
||||
if (cnt > 0 || mirror_flush(s) < 0) {
|
||||
bdrv_drained_end(bs);
|
||||
@@ -1306,6 +1314,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
|
||||
}
|
||||
|
||||
job_progress_increase_remaining(&job->common.job, bytes);
|
||||
+ job->active_write_bytes_in_flight += bytes;
|
||||
|
||||
switch (method) {
|
||||
case MIRROR_METHOD_COPY:
|
||||
@@ -1327,6 +1336,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
|
||||
abort();
|
||||
}
|
||||
|
||||
+ job->active_write_bytes_in_flight -= bytes;
|
||||
if (ret >= 0) {
|
||||
job_progress_update(&job->common.job, bytes);
|
||||
} else {
|
||||
@@ -1375,6 +1385,19 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s,
|
||||
|
||||
s->in_active_write_counter++;
|
||||
|
||||
+ /*
|
||||
+ * Wait for concurrent requests affecting the area. If there are already
|
||||
+ * running requests that are copying off now-to-be stale data in the area,
|
||||
+ * we must wait for them to finish before we begin writing fresh data to the
|
||||
+ * target so that the write operations appear in the correct order.
|
||||
+ * Note that background requests (see mirror_iteration()) in contrast only
|
||||
+ * wait for conflicting requests at the start of the dirty area, and then
|
||||
+ * (based on the in_flight_bitmap) truncate the area to copy so it will not
|
||||
+ * conflict with any requests beyond that. For active writes, however, we
|
||||
+ * cannot truncate that area. The request from our parent must be blocked
|
||||
+ * until the area is copied in full. Therefore, we must wait for the whole
|
||||
+ * area to become free of concurrent requests.
|
||||
+ */
|
||||
mirror_wait_on_conflicts(op, s, offset, bytes);
|
||||
|
||||
bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,76 +0,0 @@
|
||||
From 57c79ed20cb73aa9aa4dd7487379b85ea3f936f6 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Wed, 9 Nov 2022 17:54:49 +0100
|
||||
Subject: [PATCH 02/11] block/mirror: Drop mirror_wait_for_any_operation()
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load
|
||||
RH-Bugzilla: 2125119
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [2/3] dec37883bcc491441ae08d9592d1ec26a47765c0
|
||||
|
||||
mirror_wait_for_free_in_flight_slot() is the only remaining user of
|
||||
mirror_wait_for_any_operation(), so inline the latter into the former.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221109165452.67927-3-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit eb994912993077f178ccb43b20e422ecf9ae4ac7)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/mirror.c | 21 ++++++++-------------
|
||||
1 file changed, 8 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/block/mirror.c b/block/mirror.c
|
||||
index 282f428cb7..6b02555ad7 100644
|
||||
--- a/block/mirror.c
|
||||
+++ b/block/mirror.c
|
||||
@@ -304,19 +304,21 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
|
||||
}
|
||||
|
||||
static inline void coroutine_fn
|
||||
-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
|
||||
+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
|
||||
{
|
||||
MirrorOp *op;
|
||||
|
||||
QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
|
||||
- /* Do not wait on pseudo ops, because it may in turn wait on
|
||||
+ /*
|
||||
+ * Do not wait on pseudo ops, because it may in turn wait on
|
||||
* some other operation to start, which may in fact be the
|
||||
* caller of this function. Since there is only one pseudo op
|
||||
* at any given time, we will always find some real operation
|
||||
- * to wait on. */
|
||||
- if (!op->is_pseudo_op && op->is_in_flight &&
|
||||
- op->is_active_write == active)
|
||||
- {
|
||||
+ * to wait on.
|
||||
+ * Also, do not wait on active operations, because they do not
|
||||
+ * use up in-flight slots.
|
||||
+ */
|
||||
+ if (!op->is_pseudo_op && op->is_in_flight && !op->is_active_write) {
|
||||
qemu_co_queue_wait(&op->waiting_requests, NULL);
|
||||
return;
|
||||
}
|
||||
@@ -324,13 +326,6 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
|
||||
abort();
|
||||
}
|
||||
|
||||
-static inline void coroutine_fn
|
||||
-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
|
||||
-{
|
||||
- /* Only non-active operations use up in-flight slots */
|
||||
- mirror_wait_for_any_operation(s, false);
|
||||
-}
|
||||
-
|
||||
/* Perform a mirror copy operation.
|
||||
*
|
||||
* *op->bytes_handled is set to the number of bytes copied after and
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,75 +0,0 @@
|
||||
From b1f5aa5a342a25dc558ee9d435fed0643fe5155f Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Reitz <hreitz@redhat.com>
|
||||
Date: Wed, 9 Nov 2022 17:54:50 +0100
|
||||
Subject: [PATCH 03/11] block/mirror: Fix NULL s->job in active writes
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load
|
||||
RH-Bugzilla: 2125119
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [3/3] 49d7ebd15667151a6e14228a8260cfdd0aa27a78
|
||||
|
||||
There is a small gap in mirror_start_job() before putting the mirror
|
||||
filter node into the block graph (bdrv_append() call) and the actual job
|
||||
being created. Before the job is created, MirrorBDSOpaque.job is NULL.
|
||||
|
||||
It is possible that requests come in when bdrv_drained_end() is called,
|
||||
and those requests would see MirrorBDSOpaque.job == NULL. Have our
|
||||
filter node handle that case gracefully.
|
||||
|
||||
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
|
||||
Message-Id: <20221109165452.67927-4-hreitz@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit da93d5c84e56e6b4e84aa8e98b6b984c9b6bb528)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
block/mirror.c | 20 ++++++++++++--------
|
||||
1 file changed, 12 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/block/mirror.c b/block/mirror.c
|
||||
index 6b02555ad7..50289fca49 100644
|
||||
--- a/block/mirror.c
|
||||
+++ b/block/mirror.c
|
||||
@@ -1438,11 +1438,13 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
|
||||
MirrorOp *op = NULL;
|
||||
MirrorBDSOpaque *s = bs->opaque;
|
||||
int ret = 0;
|
||||
- bool copy_to_target;
|
||||
+ bool copy_to_target = false;
|
||||
|
||||
- copy_to_target = s->job->ret >= 0 &&
|
||||
- !job_is_cancelled(&s->job->common.job) &&
|
||||
- s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
|
||||
+ if (s->job) {
|
||||
+ copy_to_target = s->job->ret >= 0 &&
|
||||
+ !job_is_cancelled(&s->job->common.job) &&
|
||||
+ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
|
||||
+ }
|
||||
|
||||
if (copy_to_target) {
|
||||
op = active_write_prepare(s->job, offset, bytes);
|
||||
@@ -1487,11 +1489,13 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
|
||||
QEMUIOVector bounce_qiov;
|
||||
void *bounce_buf;
|
||||
int ret = 0;
|
||||
- bool copy_to_target;
|
||||
+ bool copy_to_target = false;
|
||||
|
||||
- copy_to_target = s->job->ret >= 0 &&
|
||||
- !job_is_cancelled(&s->job->common.job) &&
|
||||
- s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
|
||||
+ if (s->job) {
|
||||
+ copy_to_target = s->job->ret >= 0 &&
|
||||
+ !job_is_cancelled(&s->job->common.job) &&
|
||||
+ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
|
||||
+ }
|
||||
|
||||
if (copy_to_target) {
|
||||
/* The guest might concurrently modify the data to write; but
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,104 +0,0 @@
|
||||
From 636e32b4c570ddb20266b6672311174353644f0e Mon Sep 17 00:00:00 2001
|
||||
From: Keith Busch <kbusch@kernel.org>
|
||||
Date: Thu, 29 Sep 2022 13:05:22 -0700
|
||||
Subject: [PATCH 1/2] block: move bdrv_qiov_is_aligned to file-posix
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 411: block: Fix iov_len check in bdrv_qiov_is_aligned()
|
||||
RH-Jira: RHEL-60553
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-Commit: [1/2] 682c1b81b42959d9d91e0f68cd70e9753e53a279
|
||||
|
||||
There is only user of bdrv_qiov_is_aligned(), so move the alignment
|
||||
function to there and make it static.
|
||||
|
||||
Signed-off-by: Keith Busch <kbusch@kernel.org>
|
||||
Message-Id: <20220929200523.3218710-2-kbusch@meta.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit a7c5f67a78569f8c275ea4ea9962e9c79b9d03cb)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/file-posix.c | 20 ++++++++++++++++++++
|
||||
block/io.c | 20 --------------------
|
||||
include/block/block.h | 1 -
|
||||
3 files changed, 20 insertions(+), 21 deletions(-)
|
||||
|
||||
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||
index b283093e5b..b404e1544f 100644
|
||||
--- a/block/file-posix.c
|
||||
+++ b/block/file-posix.c
|
||||
@@ -2051,6 +2051,26 @@ static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs,
|
||||
return thread_pool_submit_co(pool, func, arg);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Check if all memory in this vector is sector aligned.
|
||||
+ */
|
||||
+static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||
+{
|
||||
+ int i;
|
||||
+ size_t alignment = bdrv_min_mem_align(bs);
|
||||
+
|
||||
+ for (i = 0; i < qiov->niov; i++) {
|
||||
+ if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
|
||||
+ return false;
|
||||
+ }
|
||||
+ if (qiov->iov[i].iov_len % alignment) {
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
|
||||
uint64_t bytes, QEMUIOVector *qiov, int type)
|
||||
{
|
||||
diff --git a/block/io.c b/block/io.c
|
||||
index 8ae57728a6..639e171eff 100644
|
||||
--- a/block/io.c
|
||||
+++ b/block/io.c
|
||||
@@ -3375,26 +3375,6 @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
|
||||
return mem;
|
||||
}
|
||||
|
||||
-/*
|
||||
- * Check if all memory in this vector is sector aligned.
|
||||
- */
|
||||
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||
-{
|
||||
- int i;
|
||||
- size_t alignment = bdrv_min_mem_align(bs);
|
||||
-
|
||||
- for (i = 0; i < qiov->niov; i++) {
|
||||
- if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
|
||||
- return false;
|
||||
- }
|
||||
- if (qiov->iov[i].iov_len % alignment) {
|
||||
- return false;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
void bdrv_io_plug(BlockDriverState *bs)
|
||||
{
|
||||
BdrvChild *child;
|
||||
diff --git a/include/block/block.h b/include/block/block.h
|
||||
index f885f113ef..09b374b496 100644
|
||||
--- a/include/block/block.h
|
||||
+++ b/include/block/block.h
|
||||
@@ -622,7 +622,6 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size);
|
||||
void *qemu_blockalign0(BlockDriverState *bs, size_t size);
|
||||
void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
|
||||
void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
|
||||
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
|
||||
|
||||
void bdrv_enable_copy_on_read(BlockDriverState *bs);
|
||||
void bdrv_disable_copy_on_read(BlockDriverState *bs);
|
||||
--
|
||||
2.45.2
|
||||
|
@ -1,48 +0,0 @@
|
||||
From 9009b674a01dc0cd92c319c87714b5aca6e639f8 Mon Sep 17 00:00:00 2001
|
||||
From: Keith Busch <kbusch@kernel.org>
|
||||
Date: Thu, 29 Sep 2022 13:05:23 -0700
|
||||
Subject: [PATCH 2/2] block: use the request length for iov alignment
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 411: block: Fix iov_len check in bdrv_qiov_is_aligned()
|
||||
RH-Jira: RHEL-60553
|
||||
RH-Acked-by: Eric Blake <eblake@redhat.com>
|
||||
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-Commit: [2/2] 0e01d51cfb21ca43283626c2367e5c5d0d531736
|
||||
|
||||
An iov length needs to be aligned to the logical block size, which may
|
||||
be larger than the memory alignment.
|
||||
|
||||
Tested-by: Jens Axboe <axboe@kernel.dk>
|
||||
Signed-off-by: Keith Busch <kbusch@kernel.org>
|
||||
Message-Id: <20220929200523.3218710-3-kbusch@meta.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 25474d90aa50bd32e0de395a33d8de42dd6f2aef)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/file-posix.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||
index b404e1544f..b84c5725cc 100644
|
||||
--- a/block/file-posix.c
|
||||
+++ b/block/file-posix.c
|
||||
@@ -2058,12 +2058,13 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||
{
|
||||
int i;
|
||||
size_t alignment = bdrv_min_mem_align(bs);
|
||||
+ size_t len = bs->bl.request_alignment;
|
||||
|
||||
for (i = 0; i < qiov->niov; i++) {
|
||||
if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
|
||||
return false;
|
||||
}
|
||||
- if (qiov->iov[i].iov_len % alignment) {
|
||||
+ if (qiov->iov[i].iov_len % len) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.45.2
|
||||
|
@ -1,56 +0,0 @@
|
||||
From 866a3b56f6a2d43f3cf7b3313fb41808bc5e6e1f Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 03/15] checkpatch: add qemu_bh_new/aio_bh_new checks
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/12] 620b480b0878c18223f3cc103450bc16aa6d7e21 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
|
||||
commit ef56ffbdd6b0605dc1e305611287b948c970e236
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:08 2023 -0400
|
||||
|
||||
checkpatch: add qemu_bh_new/aio_bh_new checks
|
||||
|
||||
Advise authors to use the _guarded versions of the APIs, instead.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-4-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
scripts/checkpatch.pl | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
|
||||
index cb8eff233e..b2428e80cc 100755
|
||||
--- a/scripts/checkpatch.pl
|
||||
+++ b/scripts/checkpatch.pl
|
||||
@@ -2858,6 +2858,14 @@ sub process {
|
||||
if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
|
||||
ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
|
||||
}
|
||||
+# recommend qemu_bh_new_guarded instead of qemu_bh_new
|
||||
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) {
|
||||
+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr);
|
||||
+ }
|
||||
+# recommend aio_bh_new_guarded instead of aio_bh_new
|
||||
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) {
|
||||
+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr);
|
||||
+ }
|
||||
# check for module_init(), use category-specific init macros explicitly please
|
||||
if ($line =~ /^module_init\s*\(/) {
|
||||
ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,127 +0,0 @@
|
||||
From 103608465b8bd2edf7f9aaef5c3c93309ccf9ec2 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Tue, 21 Feb 2023 16:22:17 -0500
|
||||
Subject: [PATCH 12/13] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel()
|
||||
race
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 264: scsi: protect req->aiocb with AioContext lock
|
||||
RH-Bugzilla: 2090990
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [2/3] 14f5835093ba8c5111f3ada2fe87730371aca733
|
||||
|
||||
dma_blk_cb() only takes the AioContext lock around ->io_func(). That
|
||||
means the rest of dma_blk_cb() is not protected. In particular, the
|
||||
DMAAIOCB field accesses happen outside the lock.
|
||||
|
||||
There is a race when the main loop thread holds the AioContext lock and
|
||||
invokes scsi_device_purge_requests() -> bdrv_aio_cancel() ->
|
||||
dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb
|
||||
field determines how cancellation proceeds. If dma_aio_cancel() sees
|
||||
dbs->acb == NULL while dma_blk_cb() is still running, the request can be
|
||||
completed twice (-ECANCELED and the actual return value).
|
||||
|
||||
The following assertion can occur with virtio-scsi when an IOThread is
|
||||
used:
|
||||
|
||||
../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed.
|
||||
|
||||
Fix the race by holding the AioContext across dma_blk_cb(). Now
|
||||
dma_aio_cancel() under the AioContext lock will not see
|
||||
inconsistent/intermediate states.
|
||||
|
||||
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-Id: <20230221212218.1378734-3-stefanha@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
hw/scsi/scsi-disk.c | 4 +---
|
||||
softmmu/dma-helpers.c | 12 +++++++-----
|
||||
2 files changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
|
||||
index 179ce22c4a..c8109a673e 100644
|
||||
--- a/hw/scsi/scsi-disk.c
|
||||
+++ b/hw/scsi/scsi-disk.c
|
||||
@@ -351,13 +351,12 @@ done:
|
||||
scsi_req_unref(&r->req);
|
||||
}
|
||||
|
||||
+/* Called with AioContext lock held */
|
||||
static void scsi_dma_complete(void *opaque, int ret)
|
||||
{
|
||||
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||
|
||||
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||
-
|
||||
assert(r->req.aiocb != NULL);
|
||||
r->req.aiocb = NULL;
|
||||
|
||||
@@ -367,7 +366,6 @@ static void scsi_dma_complete(void *opaque, int ret)
|
||||
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||
}
|
||||
scsi_dma_complete_noio(r, ret);
|
||||
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||
}
|
||||
|
||||
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
|
||||
diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
|
||||
index 7d766a5e89..42af18719a 100644
|
||||
--- a/softmmu/dma-helpers.c
|
||||
+++ b/softmmu/dma-helpers.c
|
||||
@@ -127,17 +127,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
|
||||
static void dma_blk_cb(void *opaque, int ret)
|
||||
{
|
||||
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
|
||||
+ AioContext *ctx = dbs->ctx;
|
||||
dma_addr_t cur_addr, cur_len;
|
||||
void *mem;
|
||||
|
||||
trace_dma_blk_cb(dbs, ret);
|
||||
|
||||
+ aio_context_acquire(ctx);
|
||||
dbs->acb = NULL;
|
||||
dbs->offset += dbs->iov.size;
|
||||
|
||||
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
|
||||
dma_complete(dbs, ret);
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
dma_blk_unmap(dbs);
|
||||
|
||||
@@ -177,9 +179,9 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
|
||||
if (dbs->iov.size == 0) {
|
||||
trace_dma_map_wait(dbs);
|
||||
- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
|
||||
+ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
|
||||
cpu_register_map_client(dbs->bh);
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
|
||||
@@ -187,11 +189,11 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
|
||||
}
|
||||
|
||||
- aio_context_acquire(dbs->ctx);
|
||||
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
|
||||
dma_blk_cb, dbs, dbs->io_func_opaque);
|
||||
- aio_context_release(dbs->ctx);
|
||||
assert(dbs->acb);
|
||||
+out:
|
||||
+ aio_context_release(ctx);
|
||||
}
|
||||
|
||||
static void dma_aio_cancel(BlockAIOCB *acb)
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,69 +0,0 @@
|
||||
From 837e09b1a8a38b53488f59aad090fbe6bb94e257 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Fri, 17 Nov 2023 11:32:37 +0100
|
||||
Subject: [PATCH 2/3] dump: Add arch cleanup function
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 323: Fix problem that secure execution guest might remain in "paused" state after failed dump
|
||||
RH-Jira: RHEL-16696
|
||||
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Commit: [2/3] b70f406dec88ffd4877f3d5d580fc8f821bdb252
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-16696
|
||||
|
||||
commit e72629e5149aba6f44122ea6d2a803ef136a0c6b
|
||||
Author: Janosch Frank <frankja@linux.ibm.com>
|
||||
Date: Thu Nov 9 12:04:42 2023 +0000
|
||||
|
||||
dump: Add arch cleanup function
|
||||
|
||||
Some architectures (s390x) need to cleanup after a failed dump to be
|
||||
able to continue to run the vm. Add a cleanup function pointer and
|
||||
call it if it's set.
|
||||
|
||||
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
Message-ID: <20231109120443.185979-3-frankja@linux.ibm.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
dump/dump.c | 4 ++++
|
||||
include/sysemu/dump-arch.h | 1 +
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
diff --git a/dump/dump.c b/dump/dump.c
|
||||
index 5dee060b73..93edb89547 100644
|
||||
--- a/dump/dump.c
|
||||
+++ b/dump/dump.c
|
||||
@@ -100,6 +100,10 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
|
||||
|
||||
static int dump_cleanup(DumpState *s)
|
||||
{
|
||||
+ if (s->dump_info.arch_cleanup_fn) {
|
||||
+ s->dump_info.arch_cleanup_fn(s);
|
||||
+ }
|
||||
+
|
||||
guest_phys_blocks_free(&s->guest_phys_blocks);
|
||||
memory_mapping_list_free(&s->list);
|
||||
close(s->fd);
|
||||
diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h
|
||||
index 59bbc9be38..743916e46c 100644
|
||||
--- a/include/sysemu/dump-arch.h
|
||||
+++ b/include/sysemu/dump-arch.h
|
||||
@@ -24,6 +24,7 @@ typedef struct ArchDumpInfo {
|
||||
void (*arch_sections_add_fn)(DumpState *s);
|
||||
uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff);
|
||||
int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff);
|
||||
+ void (*arch_cleanup_fn)(DumpState *s);
|
||||
} ArchDumpInfo;
|
||||
|
||||
struct GuestPhysBlockList; /* memory_mapping.h */
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,61 +0,0 @@
|
||||
From 7693449b235bbab6d32a1b87fa1d0e101c786f3b Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:11:14 -0500
|
||||
Subject: [PATCH 05/13] edu: add smp_mb__after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2168472
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Commit: [5/10] 300901290e08b253b1278eedc39cd07c1e202b96
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
|
||||
|
||||
commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu Mar 2 11:16:13 2023 +0100
|
||||
|
||||
edu: add smp_mb__after_rmw()
|
||||
|
||||
Ensure ordering between clearing the COMPUTING flag and checking
|
||||
IRQFACT, and between setting the IRQFACT flag and checking
|
||||
COMPUTING. This ensures that no wakeups are lost.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
hw/misc/edu.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
|
||||
index e935c418d4..a1f8bc77e7 100644
|
||||
--- a/hw/misc/edu.c
|
||||
+++ b/hw/misc/edu.c
|
||||
@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
|
||||
case 0x20:
|
||||
if (val & EDU_STATUS_IRQFACT) {
|
||||
qatomic_or(&edu->status, EDU_STATUS_IRQFACT);
|
||||
+ /* Order check of the COMPUTING flag after setting IRQFACT. */
|
||||
+ smp_mb__after_rmw();
|
||||
} else {
|
||||
qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT);
|
||||
}
|
||||
@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque)
|
||||
qemu_mutex_unlock(&edu->thr_mutex);
|
||||
qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING);
|
||||
|
||||
+ /* Clear COMPUTING flag before checking IRQFACT. */
|
||||
+ smp_mb__after_rmw();
|
||||
+
|
||||
if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) {
|
||||
qemu_mutex_lock_iothread();
|
||||
edu_raise_irq(edu, FACT_IRQ);
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,105 +0,0 @@
|
||||
From 939c75ab92ac608893cad0e46f55527950518a57 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 5 Mar 2024 11:36:15 -0500
|
||||
Subject: [PATCH 1/3] glib-compat: Introduce g_memdup2() wrapper
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 353: ui/clipboard: mark type as not available when there is no data
|
||||
RH-Jira: RHEL-19628
|
||||
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||
RH-Acked-by: Gerd Hoffmann <None>
|
||||
RH-Commit: [1/2] f401c63303ef558bfcbb36e4c8fcc8bf2b1c3eb4 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-19628
|
||||
CVE: CVE-2023-6683
|
||||
Upstream: Merged
|
||||
|
||||
commit 2c674fada72079583a3f2cc1790b16a0259c4fa0
|
||||
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Date: Fri Sep 3 19:44:44 2021 +0200
|
||||
|
||||
glib-compat: Introduce g_memdup2() wrapper
|
||||
When experimenting raising GLIB_VERSION_MIN_REQUIRED to 2.68
|
||||
(Fedora 34 provides GLib 2.68.1) we get:
|
||||
|
||||
hw/virtio/virtio-crypto.c:245:24: error: 'g_memdup' is deprecated: Use 'g_memdup2' instead [-Werror,-Wdeprecated-declarations]
|
||||
...
|
||||
|
||||
g_memdup() has been updated by g_memdup2() to fix eventual security
|
||||
issues (size argument is 32-bit and could be truncated / wrapping).
|
||||
GLib recommends to copy their static inline version of g_memdup2():
|
||||
https://discourse.gnome.org/t/port-your-module-from-g-memdup-to-g-memdup2-now/5538
|
||||
|
||||
Our glib-compat.h provides a comment explaining how to deal with
|
||||
these deprecated declarations (see commit e71e8cc0355
|
||||
"glib: enforce the minimum required version and warn about old APIs").
|
||||
|
||||
Following this comment suggestion, implement the g_memdup2_qemu()
|
||||
wrapper to g_memdup2(), and use the safer equivalent inlined when
|
||||
we are using pre-2.68 GLib.
|
||||
|
||||
Reported-by: Eric Blake <eblake@redhat.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Message-Id: <20210903174510.751630-3-philmd@redhat.com>
|
||||
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
include/glib-compat.h | 37 +++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 37 insertions(+)
|
||||
|
||||
diff --git a/include/glib-compat.h b/include/glib-compat.h
|
||||
index 9e95c888f5..8d01a8c01f 100644
|
||||
--- a/include/glib-compat.h
|
||||
+++ b/include/glib-compat.h
|
||||
@@ -68,6 +68,43 @@
|
||||
* without generating warnings.
|
||||
*/
|
||||
|
||||
+/*
|
||||
+ * g_memdup2_qemu:
|
||||
+ * @mem: (nullable): the memory to copy.
|
||||
+ * @byte_size: the number of bytes to copy.
|
||||
+ *
|
||||
+ * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
|
||||
+ * from @mem. If @mem is %NULL it returns %NULL.
|
||||
+ *
|
||||
+ * This replaces g_memdup(), which was prone to integer overflows when
|
||||
+ * converting the argument from a #gsize to a #guint.
|
||||
+ *
|
||||
+ * This static inline version is a backport of the new public API from
|
||||
+ * GLib 2.68, kept internal to GLib for backport to older stable releases.
|
||||
+ * See https://gitlab.gnome.org/GNOME/glib/-/issues/2319.
|
||||
+ *
|
||||
+ * Returns: (nullable): a pointer to the newly-allocated copy of the memory,
|
||||
+ * or %NULL if @mem is %NULL.
|
||||
+ */
|
||||
+static inline gpointer g_memdup2_qemu(gconstpointer mem, gsize byte_size)
|
||||
+{
|
||||
+#if GLIB_CHECK_VERSION(2, 68, 0)
|
||||
+ return g_memdup2(mem, byte_size);
|
||||
+#else
|
||||
+ gpointer new_mem;
|
||||
+
|
||||
+ if (mem && byte_size != 0) {
|
||||
+ new_mem = g_malloc(byte_size);
|
||||
+ memcpy(new_mem, mem, byte_size);
|
||||
+ } else {
|
||||
+ new_mem = NULL;
|
||||
+ }
|
||||
+
|
||||
+ return new_mem;
|
||||
+#endif
|
||||
+}
|
||||
+#define g_memdup2(m, s) g_memdup2_qemu(m, s)
|
||||
+
|
||||
#if defined(G_OS_UNIX)
|
||||
/*
|
||||
* Note: The fallback implementation is not MT-safe, and it returns a copy of
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,119 +0,0 @@
|
||||
From 4f6f881de10e31cac4636d5fde4b7ed4c8affadb Mon Sep 17 00:00:00 2001
|
||||
From: Eric Auger <eric.auger@redhat.com>
|
||||
Date: Thu, 4 Jan 2024 12:02:31 +0100
|
||||
Subject: [PATCH 3/3] hw/arm/virt: Do not load efi-virtio.rom for all
|
||||
virtio-net-pci variants
|
||||
|
||||
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||
RH-MergeRequest: 344: hw/arm/virt: Do not load efi-virtio.rom for any virtio-net-pci variants
|
||||
RH-Jira: RHEL-14870
|
||||
RH-Acked-by: Gerd Hoffmann <None>
|
||||
RH-Acked-by: Sebastian Ott <None>
|
||||
RH-Commit: [1/1] ffeaa78ad0a1cff5b49009dfb32d25e5cadc0e05
|
||||
|
||||
Upstream: RHEL-only
|
||||
Brew: http://brewweb.engineering.redhat.com/brew/taskinfo?taskID=5785640
|
||||
|
||||
Currently arm_rhel_compat just sets the romfile to "" for
|
||||
virtio-net-pci and not for transitional and non transitional
|
||||
variants. However, on aarch64 RHEL, efi-virtio.rom is not
|
||||
shipped so transitional and non-transitional variants cannot
|
||||
be used and the following error is obeserved:
|
||||
|
||||
"Could not open option rom 'efi-virtio.rom': No such file or directory"
|
||||
|
||||
In practice, we do not need any rom file for those virtio-net-pci
|
||||
variants either because edk2 already brings the full functionality.
|
||||
|
||||
So let's change the applied compat to cover all the variants. While
|
||||
at it also change the way arm_rhel_compat is applied. Instead of
|
||||
applying it from the latest _virt_options(), which is error prone
|
||||
when upgrading the machine type, let's apply it before calling
|
||||
*virt_options in the non abstract machine class. That way the setting
|
||||
will apply to any machine type without any need to add it in any
|
||||
future machine types.
|
||||
|
||||
We don't really care keeping non void romfiles for transitional and
|
||||
non transitional devices on previous machine types because this
|
||||
was not working anyway.
|
||||
|
||||
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 42 ++++++++++++++++++++++++++++--------------
|
||||
1 file changed, 28 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index dbf0a6d62f..46c72a9611 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -108,11 +108,39 @@
|
||||
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
|
||||
#endif /* disabled for RHEL */
|
||||
|
||||
+/*
|
||||
+ * This variable is for changes to properties that are RHEL specific,
|
||||
+ * different to the current upstream and to be applied to the latest
|
||||
+ * machine type. They may be overriden by older machine compats.
|
||||
+ *
|
||||
+ * virtio-net-pci variant romfiles are not needed because edk2 does
|
||||
+ * fully support the pxe boot. Besides virtio romfiles are not shipped
|
||||
+ * on rhel/aarch64.
|
||||
+ */
|
||||
+GlobalProperty arm_rhel_compat[] = {
|
||||
+ {"virtio-net-pci", "romfile", "" },
|
||||
+ {"virtio-net-pci-transitional", "romfile", "" },
|
||||
+ {"virtio-net-pci-non-transitional", "romfile", "" },
|
||||
+};
|
||||
+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
|
||||
+
|
||||
+/*
|
||||
+ * This cannot be called from the rhel_virt_class_init() because
|
||||
+ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new()
|
||||
+ * only is called on virt-rhelm.n.s non abstract class init.
|
||||
+ */
|
||||
+static void arm_rhel_compat_set(MachineClass *mc)
|
||||
+{
|
||||
+ compat_props_add(mc->compat_props, arm_rhel_compat,
|
||||
+ arm_rhel_compat_len);
|
||||
+}
|
||||
+
|
||||
#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \
|
||||
static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \
|
||||
void *data) \
|
||||
{ \
|
||||
MachineClass *mc = MACHINE_CLASS(oc); \
|
||||
+ arm_rhel_compat_set(mc); \
|
||||
rhel##m##n##s##_virt_options(mc); \
|
||||
mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \
|
||||
if (latest) { \
|
||||
@@ -136,19 +164,6 @@
|
||||
#define DEFINE_RHEL_MACHINE(major, minor, subminor) \
|
||||
DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false)
|
||||
|
||||
-/* This variable is for changes to properties that are RHEL specific,
|
||||
- * different to the current upstream and to be applied to the latest
|
||||
- * machine type.
|
||||
- */
|
||||
-GlobalProperty arm_rhel_compat[] = {
|
||||
- {
|
||||
- .driver = "virtio-net-pci",
|
||||
- .property = "romfile",
|
||||
- .value = "",
|
||||
- },
|
||||
-};
|
||||
-const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
|
||||
-
|
||||
/* Number of external interrupt lines to configure the GIC with */
|
||||
#define NUM_IRQS 256
|
||||
|
||||
@@ -3240,7 +3255,6 @@ type_init(rhel_machine_init);
|
||||
|
||||
static void rhel860_virt_options(MachineClass *mc)
|
||||
{
|
||||
- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
|
||||
}
|
||||
DEFINE_RHEL_MACHINE_AS_LATEST(8, 6, 0)
|
||||
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,61 +0,0 @@
|
||||
From f4623ea611a74c684b0097b98a803cbe7ffb0825 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Thu, 18 Jul 2024 09:26:55 -0400
|
||||
Subject: [PATCH 5/6] hw/char/virtio-serial-bus: Protect from DMA re-entrancy
|
||||
bugs
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 380: QEMU: virtio: DMA reentrancy issue leads to double free vulnerability
|
||||
RH-Jira: RHEL-32276
|
||||
RH-Acked-by: Gerd Hoffmann <None>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [5/6] fc8a445ebf6e763cd1482cd1f7ee23e5b5bbb388 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-32276
|
||||
CVE: CVE-2024-3446
|
||||
Upstream: Merged
|
||||
|
||||
commit b4295bff25f7b50de1d9cc94a9c6effd40056bca
|
||||
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Date: Thu Apr 4 20:56:35 2024 +0200
|
||||
|
||||
hw/char/virtio-serial-bus: Protect from DMA re-entrancy bugs
|
||||
|
||||
Replace qemu_bh_new_guarded() by virtio_bh_new_guarded()
|
||||
so the bus and device use the same guard. Otherwise the
|
||||
DMA-reentrancy protection can be bypassed.
|
||||
|
||||
Fixes: CVE-2024-3446
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Suggested-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
|
||||
Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Message-Id: <20240409105537.18308-4-philmd@linaro.org>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/char/virtio-serial-bus.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
|
||||
index f18124b155..791b7ac59e 100644
|
||||
--- a/hw/char/virtio-serial-bus.c
|
||||
+++ b/hw/char/virtio-serial-bus.c
|
||||
@@ -985,8 +985,7 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
- port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port,
|
||||
- &dev->mem_reentrancy_guard);
|
||||
+ port->bh = virtio_bh_new_guarded(dev, flush_queued_data_bh, port);
|
||||
port->elem = NULL;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,160 +0,0 @@
|
||||
From d37035373a266644b241aab1f041ab09c9185540 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Thu, 18 Jul 2024 09:29:54 -0400
|
||||
Subject: [PATCH 4/6] hw/display/virtio-gpu: Protect from DMA re-entrancy bugs
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 380: QEMU: virtio: DMA reentrancy issue leads to double free vulnerability
|
||||
RH-Jira: RHEL-32276
|
||||
RH-Acked-by: Gerd Hoffmann <None>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/6] e3cd21742228528a1a74ea62d55b5941d3efb261 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-32276
|
||||
CVE: CVE-2024-3446
|
||||
Upstream: Merged
|
||||
|
||||
commit ba28e0ff4d95b56dc334aac2730ab3651ffc3132
|
||||
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Date: Thu Apr 4 20:56:27 2024 +0200
|
||||
|
||||
hw/display/virtio-gpu: Protect from DMA re-entrancy bugs
|
||||
|
||||
Replace qemu_bh_new_guarded() by virtio_bh_new_guarded()
|
||||
so the bus and device use the same guard. Otherwise the
|
||||
DMA-reentrancy protection can be bypassed:
|
||||
|
||||
$ cat << EOF | qemu-system-i386 -display none -nodefaults \
|
||||
-machine q35,accel=qtest \
|
||||
-m 512M \
|
||||
-device virtio-gpu \
|
||||
-qtest stdio
|
||||
outl 0xcf8 0x80000820
|
||||
outl 0xcfc 0xe0004000
|
||||
outl 0xcf8 0x80000804
|
||||
outw 0xcfc 0x06
|
||||
write 0xe0004030 0x4 0x024000e0
|
||||
write 0xe0004028 0x1 0xff
|
||||
write 0xe0004020 0x4 0x00009300
|
||||
write 0xe000401c 0x1 0x01
|
||||
write 0x101 0x1 0x04
|
||||
write 0x103 0x1 0x1c
|
||||
write 0x9301c8 0x1 0x18
|
||||
write 0x105 0x1 0x1c
|
||||
write 0x107 0x1 0x1c
|
||||
write 0x109 0x1 0x1c
|
||||
write 0x10b 0x1 0x00
|
||||
write 0x10d 0x1 0x00
|
||||
write 0x10f 0x1 0x00
|
||||
write 0x111 0x1 0x00
|
||||
write 0x113 0x1 0x00
|
||||
write 0x115 0x1 0x00
|
||||
write 0x117 0x1 0x00
|
||||
write 0x119 0x1 0x00
|
||||
write 0x11b 0x1 0x00
|
||||
write 0x11d 0x1 0x00
|
||||
write 0x11f 0x1 0x00
|
||||
write 0x121 0x1 0x00
|
||||
write 0x123 0x1 0x00
|
||||
write 0x125 0x1 0x00
|
||||
write 0x127 0x1 0x00
|
||||
write 0x129 0x1 0x00
|
||||
write 0x12b 0x1 0x00
|
||||
write 0x12d 0x1 0x00
|
||||
write 0x12f 0x1 0x00
|
||||
write 0x131 0x1 0x00
|
||||
write 0x133 0x1 0x00
|
||||
write 0x135 0x1 0x00
|
||||
write 0x137 0x1 0x00
|
||||
write 0x139 0x1 0x00
|
||||
write 0xe0007003 0x1 0x00
|
||||
EOF
|
||||
...
|
||||
=================================================================
|
||||
==276099==ERROR: AddressSanitizer: heap-use-after-free on address 0x60d000011178
|
||||
at pc 0x562cc3b736c7 bp 0x7ffed49dee60 sp 0x7ffed49dee58
|
||||
READ of size 8 at 0x60d000011178 thread T0
|
||||
#0 0x562cc3b736c6 in virtio_gpu_ctrl_response hw/display/virtio-gpu.c:180:42
|
||||
#1 0x562cc3b7c40b in virtio_gpu_ctrl_response_nodata hw/display/virtio-gpu.c:192:5
|
||||
#2 0x562cc3b7c40b in virtio_gpu_simple_process_cmd hw/display/virtio-gpu.c:1015:13
|
||||
#3 0x562cc3b82873 in virtio_gpu_process_cmdq hw/display/virtio-gpu.c:1050:9
|
||||
#4 0x562cc4a85514 in aio_bh_call util/async.c:169:5
|
||||
#5 0x562cc4a85c52 in aio_bh_poll util/async.c:216:13
|
||||
#6 0x562cc4a1a79b in aio_dispatch util/aio-posix.c:423:5
|
||||
#7 0x562cc4a8a2da in aio_ctx_dispatch util/async.c:358:5
|
||||
#8 0x7f36840547a8 in g_main_context_dispatch (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x547a8)
|
||||
#9 0x562cc4a8b753 in glib_pollfds_poll util/main-loop.c:290:9
|
||||
#10 0x562cc4a8b753 in os_host_main_loop_wait util/main-loop.c:313:5
|
||||
#11 0x562cc4a8b753 in main_loop_wait util/main-loop.c:592:11
|
||||
#12 0x562cc3938186 in qemu_main_loop system/runstate.c:782:9
|
||||
#13 0x562cc43b7af5 in qemu_default_main system/main.c:37:14
|
||||
#14 0x7f3683a6c189 in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16
|
||||
#15 0x7f3683a6c244 in __libc_start_main csu/../csu/libc-start.c:381:3
|
||||
#16 0x562cc2a58ac0 in _start (qemu-system-i386+0x231bac0)
|
||||
|
||||
0x60d000011178 is located 56 bytes inside of 136-byte region [0x60d000011140,0x60d0000111c8)
|
||||
freed by thread T0 here:
|
||||
#0 0x562cc2adb662 in __interceptor_free (qemu-system-i386+0x239e662)
|
||||
#1 0x562cc3b86b21 in virtio_gpu_reset hw/display/virtio-gpu.c:1524:9
|
||||
#2 0x562cc416e20e in virtio_reset hw/virtio/virtio.c:2145:9
|
||||
#3 0x562cc37c5644 in virtio_pci_reset hw/virtio/virtio-pci.c:2249:5
|
||||
#4 0x562cc4233758 in memory_region_write_accessor system/memory.c:497:5
|
||||
#5 0x562cc4232eea in access_with_adjusted_size system/memory.c:573:18
|
||||
|
||||
previously allocated by thread T0 here:
|
||||
#0 0x562cc2adb90e in malloc (qemu-system-i386+0x239e90e)
|
||||
#1 0x7f368405a678 in g_malloc (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x5a678)
|
||||
#2 0x562cc4163ffc in virtqueue_split_pop hw/virtio/virtio.c:1612:12
|
||||
#3 0x562cc4163ffc in virtqueue_pop hw/virtio/virtio.c:1783:16
|
||||
#4 0x562cc3b91a95 in virtio_gpu_handle_ctrl hw/display/virtio-gpu.c:1112:15
|
||||
#5 0x562cc4a85514 in aio_bh_call util/async.c:169:5
|
||||
#6 0x562cc4a85c52 in aio_bh_poll util/async.c:216:13
|
||||
#7 0x562cc4a1a79b in aio_dispatch util/aio-posix.c:423:5
|
||||
|
||||
SUMMARY: AddressSanitizer: heap-use-after-free hw/display/virtio-gpu.c:180:42 in virtio_gpu_ctrl_response
|
||||
|
||||
With this change, the same reproducer triggers:
|
||||
|
||||
qemu-system-i386: warning: Blocked re-entrant IO on MemoryRegion: virtio-pci-common-virtio-gpu at addr: 0x6
|
||||
|
||||
Fixes: CVE-2024-3446
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Reported-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reported-by: Yongkang Jia <kangel@zju.edu.cn>
|
||||
Reported-by: Xiao Lei <nop.leixiao@gmail.com>
|
||||
Reported-by: Yiming Tao <taoym@zju.edu.cn>
|
||||
Buglink: https://bugs.launchpad.net/qemu/+bug/1888606
|
||||
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
|
||||
Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Message-Id: <20240409105537.18308-3-philmd@linaro.org>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/display/virtio-gpu.c | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
|
||||
index c28ce1ea72..64fdc18478 100644
|
||||
--- a/hw/display/virtio-gpu.c
|
||||
+++ b/hw/display/virtio-gpu.c
|
||||
@@ -1334,10 +1334,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
|
||||
|
||||
g->ctrl_vq = virtio_get_queue(vdev, 0);
|
||||
g->cursor_vq = virtio_get_queue(vdev, 1);
|
||||
- g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g,
|
||||
- &qdev->mem_reentrancy_guard);
|
||||
- g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g,
|
||||
- &qdev->mem_reentrancy_guard);
|
||||
+ g->ctrl_bh = virtio_bh_new_guarded(qdev, virtio_gpu_ctrl_bh, g);
|
||||
+ g->cursor_bh = virtio_bh_new_guarded(qdev, virtio_gpu_cursor_bh, g);
|
||||
g->reset_bh = qemu_bh_new(virtio_gpu_reset_bh, g);
|
||||
qemu_cond_init(&g->reset_cond);
|
||||
QTAILQ_INIT(&g->reslist);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,128 +0,0 @@
|
||||
From 2308abf0c5da2fe35a0721318c31d22e077663c2 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Fri, 24 Nov 2023 12:17:11 -0500
|
||||
Subject: [PATCH 1/2] hw/ide: reset: cancel async DMA operation before
|
||||
resetting state
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 335: hw/ide: reset: cancel async DMA operation before resetting state
|
||||
RH-Jira: RHEL-15437
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Commit: [1/2] b0f5f7f888559a210f1c6b3c545e337dbbc9cf22 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-15437
|
||||
CVE: CVE-2023-5088
|
||||
Upstream: Merged
|
||||
|
||||
commit 7d7512019fc40c577e2bdd61f114f31a9eb84a8e
|
||||
Author: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Date: Wed Sep 6 15:09:21 2023 +0200
|
||||
|
||||
hw/ide: reset: cancel async DMA operation before resetting state
|
||||
|
||||
If there is a pending DMA operation during ide_bus_reset(), the fact
|
||||
that the IDEState is already reset before the operation is canceled
|
||||
can be problematic. In particular, ide_dma_cb() might be called and
|
||||
then use the reset IDEState which contains the signature after the
|
||||
reset. When used to construct the IO operation this leads to
|
||||
ide_get_sector() returning 0 and nsector being 1. This is particularly
|
||||
bad, because a write command will thus destroy the first sector which
|
||||
often contains a partition table or similar.
|
||||
|
||||
Traces showing the unsolicited write happening with IDEState
|
||||
0x5595af6949d0 being used after reset:
|
||||
|
||||
> ahci_port_write ahci(0x5595af6923f0)[0]: port write [reg:PxSCTL] @ 0x2c: 0x00000300
|
||||
> ahci_reset_port ahci(0x5595af6923f0)[0]: reset port
|
||||
> ide_reset IDEstate 0x5595af6949d0
|
||||
> ide_reset IDEstate 0x5595af694da8
|
||||
> ide_bus_reset_aio aio_cancel
|
||||
> dma_aio_cancel dbs=0x7f64600089a0
|
||||
> dma_blk_cb dbs=0x7f64600089a0 ret=0
|
||||
> dma_complete dbs=0x7f64600089a0 ret=0 cb=0x5595acd40b30
|
||||
> ahci_populate_sglist ahci(0x5595af6923f0)[0]
|
||||
> ahci_dma_prepare_buf ahci(0x5595af6923f0)[0]: prepare buf limit=512 prepared=512
|
||||
> ide_dma_cb IDEState 0x5595af6949d0; sector_num=0 n=1 cmd=DMA WRITE
|
||||
> dma_blk_io dbs=0x7f6420802010 bs=0x5595ae2c6c30 offset=0 to_dev=1
|
||||
> dma_blk_cb dbs=0x7f6420802010 ret=0
|
||||
|
||||
> (gdb) p *qiov
|
||||
> $11 = {iov = 0x7f647c76d840, niov = 1, {{nalloc = 1, local_iov = {iov_base = 0x0,
|
||||
> iov_len = 512}}, {__pad = "\001\000\000\000\000\000\000\000\000\000\000",
|
||||
> size = 512}}}
|
||||
> (gdb) bt
|
||||
> #0 blk_aio_pwritev (blk=0x5595ae2c6c30, offset=0, qiov=0x7f6420802070, flags=0,
|
||||
> cb=0x5595ace6f0b0 <dma_blk_cb>, opaque=0x7f6420802010)
|
||||
> at ../block/block-backend.c:1682
|
||||
> #1 0x00005595ace6f185 in dma_blk_cb (opaque=0x7f6420802010, ret=<optimized out>)
|
||||
> at ../softmmu/dma-helpers.c:179
|
||||
> #2 0x00005595ace6f778 in dma_blk_io (ctx=0x5595ae0609f0,
|
||||
> sg=sg@entry=0x5595af694d00, offset=offset@entry=0, align=align@entry=512,
|
||||
> io_func=io_func@entry=0x5595ace6ee30 <dma_blk_write_io_func>,
|
||||
> io_func_opaque=io_func_opaque@entry=0x5595ae2c6c30,
|
||||
> cb=0x5595acd40b30 <ide_dma_cb>, opaque=0x5595af6949d0,
|
||||
> dir=DMA_DIRECTION_TO_DEVICE) at ../softmmu/dma-helpers.c:244
|
||||
> #3 0x00005595ace6f90a in dma_blk_write (blk=0x5595ae2c6c30,
|
||||
> sg=sg@entry=0x5595af694d00, offset=offset@entry=0, align=align@entry=512,
|
||||
> cb=cb@entry=0x5595acd40b30 <ide_dma_cb>, opaque=opaque@entry=0x5595af6949d0)
|
||||
> at ../softmmu/dma-helpers.c:280
|
||||
> #4 0x00005595acd40e18 in ide_dma_cb (opaque=0x5595af6949d0, ret=<optimized out>)
|
||||
> at ../hw/ide/core.c:953
|
||||
> #5 0x00005595ace6f319 in dma_complete (ret=0, dbs=0x7f64600089a0)
|
||||
> at ../softmmu/dma-helpers.c:107
|
||||
> #6 dma_blk_cb (opaque=0x7f64600089a0, ret=0) at ../softmmu/dma-helpers.c:127
|
||||
> #7 0x00005595ad12227d in blk_aio_complete (acb=0x7f6460005b10)
|
||||
> at ../block/block-backend.c:1527
|
||||
> #8 blk_aio_complete (acb=0x7f6460005b10) at ../block/block-backend.c:1524
|
||||
> #9 blk_aio_write_entry (opaque=0x7f6460005b10) at ../block/block-backend.c:1594
|
||||
> #10 0x00005595ad258cfb in coroutine_trampoline (i0=<optimized out>,
|
||||
> i1=<optimized out>) at ../util/coroutine-ucontext.c:177
|
||||
|
||||
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Tested-by: simon.rowe@nutanix.com
|
||||
Message-ID: <20230906130922.142845-1-f.ebner@proxmox.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/ide/core.c | 14 +++++++-------
|
||||
1 file changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/hw/ide/core.c b/hw/ide/core.c
|
||||
index 05a32d0a99..fd50c123e8 100644
|
||||
--- a/hw/ide/core.c
|
||||
+++ b/hw/ide/core.c
|
||||
@@ -2456,19 +2456,19 @@ static void ide_dummy_transfer_stop(IDEState *s)
|
||||
|
||||
void ide_bus_reset(IDEBus *bus)
|
||||
{
|
||||
- bus->unit = 0;
|
||||
- bus->cmd = 0;
|
||||
- ide_reset(&bus->ifs[0]);
|
||||
- ide_reset(&bus->ifs[1]);
|
||||
- ide_clear_hob(bus);
|
||||
-
|
||||
- /* pending async DMA */
|
||||
+ /* pending async DMA - needs the IDEState before it is reset */
|
||||
if (bus->dma->aiocb) {
|
||||
trace_ide_bus_reset_aio();
|
||||
blk_aio_cancel(bus->dma->aiocb);
|
||||
bus->dma->aiocb = NULL;
|
||||
}
|
||||
|
||||
+ bus->unit = 0;
|
||||
+ bus->cmd = 0;
|
||||
+ ide_reset(&bus->ifs[0]);
|
||||
+ ide_reset(&bus->ifs[1]);
|
||||
+ ide_clear_hob(bus);
|
||||
+
|
||||
/* reset dma provider too */
|
||||
if (bus->dma->ops->reset) {
|
||||
bus->dma->ops->reset(bus->dma);
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,449 +0,0 @@
|
||||
From 146cfb23b76b898f08690ffc14aab16d22a41404 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 04/15] hw: replace most qemu_bh_new calls with
|
||||
qemu_bh_new_guarded
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/12] 00c51d30246b3aa529f6043e35ee471660aa1fce (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
Conflicts: In hw/nvme/ctrl.c there are no calls to qemu_bh_new() at the two locations
|
||||
the replacement is done in the upstream commit. Instead, timer_new_ns() is
|
||||
used. We leave these functions unaltered.
|
||||
|
||||
commit f63192b0544af5d3e4d5edfd85ab520fcf671377
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:09 2023 -0400
|
||||
|
||||
hw: replace most qemu_bh_new calls with qemu_bh_new_guarded
|
||||
|
||||
This protects devices from bh->mmio reentrancy issues.
|
||||
|
||||
Thanks: Thomas Huth <thuth@redhat.com> for diagnosing OS X test failure.
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Reviewed-by: Paul Durrant <paul@xen.org>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-5-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/9pfs/xen-9p-backend.c | 5 ++++-
|
||||
hw/block/dataplane/virtio-blk.c | 3 ++-
|
||||
hw/block/dataplane/xen-block.c | 5 +++--
|
||||
hw/char/virtio-serial-bus.c | 3 ++-
|
||||
hw/display/qxl.c | 9 ++++++---
|
||||
hw/display/virtio-gpu.c | 6 ++++--
|
||||
hw/ide/ahci.c | 3 ++-
|
||||
hw/ide/ahci_internal.h | 1 +
|
||||
hw/ide/core.c | 4 +++-
|
||||
hw/misc/imx_rngc.c | 6 ++++--
|
||||
hw/misc/macio/mac_dbdma.c | 2 +-
|
||||
hw/net/virtio-net.c | 3 ++-
|
||||
hw/scsi/mptsas.c | 3 ++-
|
||||
hw/scsi/scsi-bus.c | 3 ++-
|
||||
hw/scsi/vmw_pvscsi.c | 3 ++-
|
||||
hw/usb/dev-uas.c | 3 ++-
|
||||
hw/usb/hcd-dwc2.c | 3 ++-
|
||||
hw/usb/hcd-ehci.c | 3 ++-
|
||||
hw/usb/hcd-uhci.c | 2 +-
|
||||
hw/usb/host-libusb.c | 6 ++++--
|
||||
hw/usb/redirect.c | 6 ++++--
|
||||
hw/usb/xen-usb.c | 3 ++-
|
||||
hw/virtio/virtio-balloon.c | 5 +++--
|
||||
hw/virtio/virtio-crypto.c | 3 ++-
|
||||
24 files changed, 62 insertions(+), 31 deletions(-)
|
||||
|
||||
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
|
||||
index 65c4979c3c..09f7c13588 100644
|
||||
--- a/hw/9pfs/xen-9p-backend.c
|
||||
+++ b/hw/9pfs/xen-9p-backend.c
|
||||
@@ -60,6 +60,7 @@ typedef struct Xen9pfsDev {
|
||||
|
||||
int num_rings;
|
||||
Xen9pfsRing *rings;
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
} Xen9pfsDev;
|
||||
|
||||
static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
|
||||
@@ -441,7 +442,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
|
||||
xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
|
||||
XEN_FLEX_RING_SIZE(ring_order);
|
||||
|
||||
- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
|
||||
+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh,
|
||||
+ &xen_9pdev->rings[i],
|
||||
+ &xen_9pdev->mem_reentrancy_guard);
|
||||
xen_9pdev->rings[i].out_cons = 0;
|
||||
xen_9pdev->rings[i].out_size = 0;
|
||||
xen_9pdev->rings[i].inprogress = false;
|
||||
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
|
||||
index ee5a5352dc..5f0de7da1e 100644
|
||||
--- a/hw/block/dataplane/virtio-blk.c
|
||||
+++ b/hw/block/dataplane/virtio-blk.c
|
||||
@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
|
||||
} else {
|
||||
s->ctx = qemu_get_aio_context();
|
||||
}
|
||||
- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
|
||||
+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s,
|
||||
+ &DEVICE(vdev)->mem_reentrancy_guard);
|
||||
s->batch_notify_vqs = bitmap_new(conf->num_queues);
|
||||
|
||||
*dataplane = s;
|
||||
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
|
||||
index 860787580a..07855feea6 100644
|
||||
--- a/hw/block/dataplane/xen-block.c
|
||||
+++ b/hw/block/dataplane/xen-block.c
|
||||
@@ -631,8 +631,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
|
||||
} else {
|
||||
dataplane->ctx = qemu_get_aio_context();
|
||||
}
|
||||
- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh,
|
||||
- dataplane);
|
||||
+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh,
|
||||
+ dataplane,
|
||||
+ &DEVICE(xendev)->mem_reentrancy_guard);
|
||||
|
||||
return dataplane;
|
||||
}
|
||||
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
|
||||
index f01ec2137c..f18124b155 100644
|
||||
--- a/hw/char/virtio-serial-bus.c
|
||||
+++ b/hw/char/virtio-serial-bus.c
|
||||
@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
- port->bh = qemu_bh_new(flush_queued_data_bh, port);
|
||||
+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
port->elem = NULL;
|
||||
}
|
||||
|
||||
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
|
||||
index bcd9e8716a..0f663b9912 100644
|
||||
--- a/hw/display/qxl.c
|
||||
+++ b/hw/display/qxl.c
|
||||
@@ -2206,11 +2206,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
|
||||
|
||||
qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);
|
||||
|
||||
- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl);
|
||||
+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
qxl_reset_state(qxl);
|
||||
|
||||
- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl);
|
||||
- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd);
|
||||
+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd,
|
||||
+ &DEVICE(qxl)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void qxl_realize_primary(PCIDevice *dev, Error **errp)
|
||||
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
|
||||
index d78b9700c7..ecf9079145 100644
|
||||
--- a/hw/display/virtio-gpu.c
|
||||
+++ b/hw/display/virtio-gpu.c
|
||||
@@ -1332,8 +1332,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
|
||||
|
||||
g->ctrl_vq = virtio_get_queue(vdev, 0);
|
||||
g->cursor_vq = virtio_get_queue(vdev, 1);
|
||||
- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
|
||||
- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
|
||||
+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g,
|
||||
+ &qdev->mem_reentrancy_guard);
|
||||
+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g,
|
||||
+ &qdev->mem_reentrancy_guard);
|
||||
QTAILQ_INIT(&g->reslist);
|
||||
QTAILQ_INIT(&g->cmdq);
|
||||
QTAILQ_INIT(&g->fenceq);
|
||||
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
|
||||
index a94c6e26fb..7488b28065 100644
|
||||
--- a/hw/ide/ahci.c
|
||||
+++ b/hw/ide/ahci.c
|
||||
@@ -1504,7 +1504,8 @@ static void ahci_cmd_done(const IDEDMA *dma)
|
||||
ahci_write_fis_d2h(ad);
|
||||
|
||||
if (ad->port_regs.cmd_issue && !ad->check_bh) {
|
||||
- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
|
||||
+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
|
||||
+ &ad->mem_reentrancy_guard);
|
||||
qemu_bh_schedule(ad->check_bh);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
|
||||
index 109de9e2d1..a7768dd69e 100644
|
||||
--- a/hw/ide/ahci_internal.h
|
||||
+++ b/hw/ide/ahci_internal.h
|
||||
@@ -321,6 +321,7 @@ struct AHCIDevice {
|
||||
bool init_d2h_sent;
|
||||
AHCICmdHdr *cur_cmd;
|
||||
NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct AHCIPCIState {
|
||||
diff --git a/hw/ide/core.c b/hw/ide/core.c
|
||||
index 15138225be..05a32d0a99 100644
|
||||
--- a/hw/ide/core.c
|
||||
+++ b/hw/ide/core.c
|
||||
@@ -510,6 +510,7 @@ BlockAIOCB *ide_issue_trim(
|
||||
BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
|
||||
{
|
||||
IDEState *s = opaque;
|
||||
+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
|
||||
TrimAIOCB *iocb;
|
||||
|
||||
/* Paired with a decrement in ide_trim_bh_cb() */
|
||||
@@ -517,7 +518,8 @@ BlockAIOCB *ide_issue_trim(
|
||||
|
||||
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
|
||||
iocb->s = s;
|
||||
- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
|
||||
+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
iocb->ret = 0;
|
||||
iocb->qiov = qiov;
|
||||
iocb->i = -1;
|
||||
diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c
|
||||
index 632c03779c..082c6980ad 100644
|
||||
--- a/hw/misc/imx_rngc.c
|
||||
+++ b/hw/misc/imx_rngc.c
|
||||
@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp)
|
||||
sysbus_init_mmio(sbd, &s->iomem);
|
||||
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s);
|
||||
- s->seed_bh = qemu_bh_new(imx_rngc_seed, s);
|
||||
+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void imx_rngc_reset(DeviceState *dev)
|
||||
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
|
||||
index e220f1a927..f6a9e76fe7 100644
|
||||
--- a/hw/misc/macio/mac_dbdma.c
|
||||
+++ b/hw/misc/macio/mac_dbdma.c
|
||||
@@ -912,7 +912,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
DBDMAState *s = MAC_DBDMA(dev);
|
||||
|
||||
- s->bh = qemu_bh_new(DBDMA_run_bh, s);
|
||||
+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static void mac_dbdma_class_init(ObjectClass *oc, void *data)
|
||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
||||
index 7e172ef829..ddaa8fa122 100644
|
||||
--- a/hw/net/virtio-net.c
|
||||
+++ b/hw/net/virtio-net.c
|
||||
@@ -2753,7 +2753,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
|
||||
n->vqs[index].tx_vq =
|
||||
virtio_add_queue(vdev, n->net_conf.tx_queue_size,
|
||||
virtio_net_handle_tx_bh);
|
||||
- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
|
||||
+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
|
||||
+ &DEVICE(vdev)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
n->vqs[index].tx_waiting = 0;
|
||||
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
|
||||
index f6c7765544..ab8aaca85d 100644
|
||||
--- a/hw/scsi/mptsas.c
|
||||
+++ b/hw/scsi/mptsas.c
|
||||
@@ -1313,7 +1313,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
|
||||
}
|
||||
s->max_devices = MPTSAS_NUM_PORTS;
|
||||
|
||||
- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s);
|
||||
+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
|
||||
scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info);
|
||||
}
|
||||
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
|
||||
index 77325d8cc7..b506ab7d04 100644
|
||||
--- a/hw/scsi/scsi-bus.c
|
||||
+++ b/hw/scsi/scsi-bus.c
|
||||
@@ -192,7 +192,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
|
||||
AioContext *ctx = blk_get_aio_context(s->conf.blk);
|
||||
/* The reference is dropped in scsi_dma_restart_bh.*/
|
||||
object_ref(OBJECT(s));
|
||||
- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s);
|
||||
+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
|
||||
+ &DEVICE(s)->mem_reentrancy_guard);
|
||||
qemu_bh_schedule(s->bh);
|
||||
}
|
||||
}
|
||||
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
|
||||
index cd76bd67ab..4c36febbc0 100644
|
||||
--- a/hw/scsi/vmw_pvscsi.c
|
||||
+++ b/hw/scsi/vmw_pvscsi.c
|
||||
@@ -1178,7 +1178,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
|
||||
pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET);
|
||||
}
|
||||
|
||||
- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
|
||||
+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s,
|
||||
+ &DEVICE(pci_dev)->mem_reentrancy_guard);
|
||||
|
||||
scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info);
|
||||
/* override default SCSI bus hotplug-handler, with pvscsi's one */
|
||||
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
|
||||
index 599d6b52a0..a36a7c3013 100644
|
||||
--- a/hw/usb/dev-uas.c
|
||||
+++ b/hw/usb/dev-uas.c
|
||||
@@ -935,7 +935,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp)
|
||||
|
||||
QTAILQ_INIT(&uas->results);
|
||||
QTAILQ_INIT(&uas->requests);
|
||||
- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
|
||||
+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas,
|
||||
+ &d->mem_reentrancy_guard);
|
||||
|
||||
dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
|
||||
scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info);
|
||||
diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c
|
||||
index e1d96acf7e..0e238f8422 100644
|
||||
--- a/hw/usb/hcd-dwc2.c
|
||||
+++ b/hw/usb/hcd-dwc2.c
|
||||
@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp)
|
||||
s->fi = USB_FRMINTVL - 1;
|
||||
s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s);
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s);
|
||||
- s->async_bh = qemu_bh_new(dwc2_work_bh, s);
|
||||
+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
}
|
||||
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
|
||||
index 6caa7ac6c2..df4ff6f2c1 100644
|
||||
--- a/hw/usb/hcd-ehci.c
|
||||
+++ b/hw/usb/hcd-ehci.c
|
||||
@@ -2528,7 +2528,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
|
||||
}
|
||||
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s);
|
||||
- s->async_bh = qemu_bh_new(ehci_work_bh, s);
|
||||
+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
s->device = dev;
|
||||
|
||||
s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
|
||||
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
|
||||
index 7930b868fa..469c5e57e9 100644
|
||||
--- a/hw/usb/hcd-uhci.c
|
||||
+++ b/hw/usb/hcd-uhci.c
|
||||
@@ -1195,7 +1195,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp)
|
||||
USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL);
|
||||
}
|
||||
}
|
||||
- s->bh = qemu_bh_new(uhci_bh, s);
|
||||
+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard);
|
||||
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s);
|
||||
s->num_ports_vmstate = NB_PORTS;
|
||||
QTAILQ_INIT(&s->queues);
|
||||
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
|
||||
index d0d46dd0a4..09b961116b 100644
|
||||
--- a/hw/usb/host-libusb.c
|
||||
+++ b/hw/usb/host-libusb.c
|
||||
@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque)
|
||||
static void usb_host_nodev(USBHostDevice *s)
|
||||
{
|
||||
if (!s->bh_nodev) {
|
||||
- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s);
|
||||
+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s,
|
||||
+ &DEVICE(s)->mem_reentrancy_guard);
|
||||
}
|
||||
qemu_bh_schedule(s->bh_nodev);
|
||||
}
|
||||
@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id)
|
||||
USBHostDevice *dev = opaque;
|
||||
|
||||
if (!dev->bh_postld) {
|
||||
- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev);
|
||||
+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
}
|
||||
qemu_bh_schedule(dev->bh_postld);
|
||||
dev->bh_postld_pending = true;
|
||||
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
|
||||
index 5f0ef9cb3b..59cd3cd7c4 100644
|
||||
--- a/hw/usb/redirect.c
|
||||
+++ b/hw/usb/redirect.c
|
||||
@@ -1437,8 +1437,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
|
||||
- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev);
|
||||
+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev,
|
||||
+ &DEVICE(dev)->mem_reentrancy_guard);
|
||||
dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev);
|
||||
|
||||
packet_id_queue_init(&dev->cancelled, dev, "cancelled");
|
||||
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
|
||||
index 0f7369e7ed..dec91294ad 100644
|
||||
--- a/hw/usb/xen-usb.c
|
||||
+++ b/hw/usb/xen-usb.c
|
||||
@@ -1021,7 +1021,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev)
|
||||
|
||||
QTAILQ_INIT(&usbif->req_free_q);
|
||||
QSIMPLEQ_INIT(&usbif->hotplug_q);
|
||||
- usbif->bh = qemu_bh_new(usbback_bh, usbif);
|
||||
+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif,
|
||||
+ &DEVICE(xendev)->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
static int usbback_free(struct XenLegacyDevice *xendev)
|
||||
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
|
||||
index 9a4f491b54..f503572e27 100644
|
||||
--- a/hw/virtio/virtio-balloon.c
|
||||
+++ b/hw/virtio/virtio-balloon.c
|
||||
@@ -917,8 +917,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
|
||||
precopy_add_notifier(&s->free_page_hint_notify);
|
||||
|
||||
object_ref(OBJECT(s->iothread));
|
||||
- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
|
||||
- virtio_ballloon_get_free_page_hints, s);
|
||||
+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread),
|
||||
+ virtio_ballloon_get_free_page_hints, s,
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
}
|
||||
|
||||
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
|
||||
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
|
||||
index 54f9bbb789..1be7bb543c 100644
|
||||
--- a/hw/virtio/virtio-crypto.c
|
||||
+++ b/hw/virtio/virtio-crypto.c
|
||||
@@ -817,7 +817,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
|
||||
vcrypto->vqs[i].dataq =
|
||||
virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
|
||||
vcrypto->vqs[i].dataq_bh =
|
||||
- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
|
||||
+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i],
|
||||
+ &dev->mem_reentrancy_guard);
|
||||
vcrypto->vqs[i].vcrypto = vcrypto;
|
||||
}
|
||||
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,283 +0,0 @@
|
||||
From 59f02a421ecdba6e856597367020926fc0cb5177 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon, 15 Jan 2024 18:52:30 +0100
|
||||
Subject: [PATCH 4/5] hw/s390x: Move KVM specific PV from hw/ to
|
||||
target/s390x/kvm/
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 348: s390x: Provide some more useful information if decryption of a PV image fails
|
||||
RH-Jira: RHEL-18214
|
||||
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Commit: [4/5] f6095bfdb89268007a0741665284955db4752d46
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-18214
|
||||
|
||||
commit f5f9c6ea11bc807664fdeb9354915c2c9cdcbd89
|
||||
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Date: Sat Jun 24 22:06:44 2023 +0200
|
||||
|
||||
hw/s390x: Move KVM specific PV from hw/ to target/s390x/kvm/
|
||||
|
||||
Protected Virtualization (PV) is not a real hardware device:
|
||||
it is a feature of the firmware on s390x that is exposed to
|
||||
userspace via the KVM interface.
|
||||
|
||||
Move the pv.c/pv.h files to target/s390x/kvm/ to make this clearer.
|
||||
|
||||
Suggested-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-Id: <20230624200644.23931-1-philmd@linaro.org>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Conflicts:
|
||||
hw/s390x/ipl.c
|
||||
hw/s390x/s390-virtio-ccw.c
|
||||
target/s390x/diag.c
|
||||
(simple contextual conflict due to differce with #include statements)
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
MAINTAINERS | 2 --
|
||||
hw/s390x/ipl.c | 2 +-
|
||||
hw/s390x/meson.build | 1 -
|
||||
hw/s390x/s390-pci-kvm.c | 2 +-
|
||||
hw/s390x/s390-virtio-ccw.c | 2 +-
|
||||
hw/s390x/tod-kvm.c | 2 +-
|
||||
target/s390x/arch_dump.c | 2 +-
|
||||
target/s390x/cpu-sysemu.c | 2 +-
|
||||
target/s390x/cpu_features.c | 2 +-
|
||||
target/s390x/cpu_models.c | 2 +-
|
||||
target/s390x/diag.c | 2 +-
|
||||
target/s390x/helper.c | 2 +-
|
||||
target/s390x/ioinst.c | 2 +-
|
||||
target/s390x/kvm/kvm.c | 2 +-
|
||||
target/s390x/kvm/meson.build | 1 +
|
||||
{hw/s390x => target/s390x/kvm}/pv.c | 2 +-
|
||||
{include/hw/s390x => target/s390x/kvm}/pv.h | 0
|
||||
17 files changed, 14 insertions(+), 16 deletions(-)
|
||||
rename {hw/s390x => target/s390x/kvm}/pv.c (99%)
|
||||
rename {include/hw/s390x => target/s390x/kvm}/pv.h (100%)
|
||||
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index b893206fc3..d74ca51154 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -397,8 +397,6 @@ S: Supported
|
||||
F: target/s390x/kvm/
|
||||
F: target/s390x/machine.c
|
||||
F: target/s390x/sigp.c
|
||||
-F: hw/s390x/pv.c
|
||||
-F: include/hw/s390x/pv.h
|
||||
F: gdb-xml/s390*.xml
|
||||
T: git https://github.com/borntraeger/qemu.git s390-next
|
||||
L: qemu-s390x@nongnu.org
|
||||
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
|
||||
index 9051d8652d..c25e247426 100644
|
||||
--- a/hw/s390x/ipl.c
|
||||
+++ b/hw/s390x/ipl.c
|
||||
@@ -27,7 +27,7 @@
|
||||
#include "hw/s390x/vfio-ccw.h"
|
||||
#include "hw/s390x/css.h"
|
||||
#include "hw/s390x/ebcdic.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
#include "ipl.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/config-file.h"
|
||||
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
|
||||
index 6e6e47fcda..bb3b42f613 100644
|
||||
--- a/hw/s390x/meson.build
|
||||
+++ b/hw/s390x/meson.build
|
||||
@@ -22,7 +22,6 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
|
||||
'tod-kvm.c',
|
||||
's390-skeys-kvm.c',
|
||||
's390-stattrib-kvm.c',
|
||||
- 'pv.c',
|
||||
's390-pci-kvm.c',
|
||||
))
|
||||
s390x_ss.add(when: 'CONFIG_TCG', if_true: files(
|
||||
diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c
|
||||
index 9134fe185f..ff41e4106d 100644
|
||||
--- a/hw/s390x/s390-pci-kvm.c
|
||||
+++ b/hw/s390x/s390-pci-kvm.c
|
||||
@@ -14,7 +14,7 @@
|
||||
#include <linux/kvm.h>
|
||||
|
||||
#include "kvm/kvm_s390x.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
#include "hw/s390x/s390-pci-bus.h"
|
||||
#include "hw/s390x/s390-pci-kvm.h"
|
||||
#include "hw/s390x/s390-pci-inst.h"
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index 17146469ee..7bfa5b4e8f 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -40,7 +40,7 @@
|
||||
#include "hw/qdev-properties.h"
|
||||
#include "hw/s390x/tod.h"
|
||||
#include "sysemu/sysemu.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
#include "migration/blocker.h"
|
||||
#include "qapi/visitor.h"
|
||||
|
||||
diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c
|
||||
index c804c979b5..9776cda50a 100644
|
||||
--- a/hw/s390x/tod-kvm.c
|
||||
+++ b/hw/s390x/tod-kvm.c
|
||||
@@ -13,7 +13,7 @@
|
||||
#include "qemu/module.h"
|
||||
#include "sysemu/runstate.h"
|
||||
#include "hw/s390x/tod.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
#include "kvm/kvm_s390x.h"
|
||||
|
||||
static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp)
|
||||
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
|
||||
index 3b1f178dc3..2554238c16 100644
|
||||
--- a/target/s390x/arch_dump.c
|
||||
+++ b/target/s390x/arch_dump.c
|
||||
@@ -17,8 +17,8 @@
|
||||
#include "s390x-internal.h"
|
||||
#include "elf.h"
|
||||
#include "sysemu/dump.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
#include "kvm/kvm_s390x.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
|
||||
struct S390xUserRegsStruct {
|
||||
uint64_t psw[2];
|
||||
diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c
|
||||
index 5471e01ee8..547287a949 100644
|
||||
--- a/target/s390x/cpu-sysemu.c
|
||||
+++ b/target/s390x/cpu-sysemu.c
|
||||
@@ -32,7 +32,7 @@
|
||||
#include "qapi/qapi-visit-run-state.h"
|
||||
#include "sysemu/hw_accel.h"
|
||||
|
||||
-#include "hw/s390x/pv.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
#include "hw/boards.h"
|
||||
#include "sysemu/sysemu.h"
|
||||
#include "sysemu/tcg.h"
|
||||
diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
|
||||
index 2e4e11d264..ebb155ce1c 100644
|
||||
--- a/target/s390x/cpu_features.c
|
||||
+++ b/target/s390x/cpu_features.c
|
||||
@@ -15,7 +15,7 @@
|
||||
#include "qemu/module.h"
|
||||
#include "cpu_features.h"
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
-#include "hw/s390x/pv.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
#endif
|
||||
|
||||
#define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
|
||||
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
|
||||
index e7c586c76e..100c5e7b3a 100644
|
||||
--- a/target/s390x/cpu_models.c
|
||||
+++ b/target/s390x/cpu_models.c
|
||||
@@ -22,7 +22,7 @@
|
||||
#include "qemu/qemu-print.h"
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
#include "sysemu/sysemu.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
#endif
|
||||
|
||||
#define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
|
||||
diff --git a/target/s390x/diag.c b/target/s390x/diag.c
|
||||
index 76b01dcd68..7c8714cc27 100644
|
||||
--- a/target/s390x/diag.c
|
||||
+++ b/target/s390x/diag.c
|
||||
@@ -19,9 +19,9 @@
|
||||
#include "sysemu/cpus.h"
|
||||
#include "hw/s390x/ipl.h"
|
||||
#include "hw/s390x/s390-virtio-ccw.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
#include "sysemu/kvm.h"
|
||||
#include "kvm/kvm_s390x.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
|
||||
int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3)
|
||||
{
|
||||
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
|
||||
index 6e35473c7f..860977126a 100644
|
||||
--- a/target/s390x/helper.c
|
||||
+++ b/target/s390x/helper.c
|
||||
@@ -24,7 +24,7 @@
|
||||
#include "exec/gdbstub.h"
|
||||
#include "qemu/timer.h"
|
||||
#include "hw/s390x/ioinst.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
#include "sysemu/hw_accel.h"
|
||||
#include "sysemu/runstate.h"
|
||||
#include "sysemu/tcg.h"
|
||||
diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c
|
||||
index bdae5090bc..409f3e3e63 100644
|
||||
--- a/target/s390x/ioinst.c
|
||||
+++ b/target/s390x/ioinst.c
|
||||
@@ -16,7 +16,7 @@
|
||||
#include "hw/s390x/ioinst.h"
|
||||
#include "trace.h"
|
||||
#include "hw/s390x/s390-pci-bus.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
|
||||
/* All I/O instructions but chsc use the s format */
|
||||
static uint64_t get_address_from_regs(CPUS390XState *env, uint32_t ipb,
|
||||
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
|
||||
index a963866ef4..6d1a6324b9 100644
|
||||
--- a/target/s390x/kvm/kvm.c
|
||||
+++ b/target/s390x/kvm/kvm.c
|
||||
@@ -51,7 +51,7 @@
|
||||
#include "exec/memattrs.h"
|
||||
#include "hw/s390x/s390-virtio-ccw.h"
|
||||
#include "hw/s390x/s390-virtio-hcall.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
|
||||
#ifndef DEBUG_KVM
|
||||
#define DEBUG_KVM 0
|
||||
diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
|
||||
index aef52b6686..739d5b9f54 100644
|
||||
--- a/target/s390x/kvm/meson.build
|
||||
+++ b/target/s390x/kvm/meson.build
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
|
||||
+ 'pv.c',
|
||||
'kvm.c'
|
||||
), if_false: files(
|
||||
'stubs.c'
|
||||
diff --git a/hw/s390x/pv.c b/target/s390x/kvm/pv.c
|
||||
similarity index 99%
|
||||
rename from hw/s390x/pv.c
|
||||
rename to target/s390x/kvm/pv.c
|
||||
index 8a1c71436b..e14db4f41a 100644
|
||||
--- a/hw/s390x/pv.c
|
||||
+++ b/target/s390x/kvm/pv.c
|
||||
@@ -19,9 +19,9 @@
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "exec/confidential-guest-support.h"
|
||||
#include "hw/s390x/ipl.h"
|
||||
-#include "hw/s390x/pv.h"
|
||||
#include "hw/s390x/sclp.h"
|
||||
#include "target/s390x/kvm/kvm_s390x.h"
|
||||
+#include "target/s390x/kvm/pv.h"
|
||||
|
||||
static bool info_valid;
|
||||
static struct kvm_s390_pv_info_vm info_vm;
|
||||
diff --git a/include/hw/s390x/pv.h b/target/s390x/kvm/pv.h
|
||||
similarity index 100%
|
||||
rename from include/hw/s390x/pv.h
|
||||
rename to target/s390x/kvm/pv.h
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,100 +0,0 @@
|
||||
From 053faafcf523b0ea4d841c0af8e7e26a2cddd5e8 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon, 15 Jan 2024 14:00:04 +0100
|
||||
Subject: [PATCH 3/5] hw/s390x/pv: Restrict Protected Virtualization to sysemu
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 348: s390x: Provide some more useful information if decryption of a PV image fails
|
||||
RH-Jira: RHEL-18214
|
||||
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Commit: [3/5] 17b11f9fd2b53c7d33c09a62f28cfca19b18e798
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-18214
|
||||
|
||||
commit 3ea7e312671686e616efa1b8caa5f5ce2d06543a
|
||||
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Date: Sat Dec 17 16:24:52 2022 +0100
|
||||
|
||||
hw/s390x/pv: Restrict Protected Virtualization to sysemu
|
||||
|
||||
Protected Virtualization is irrelevant in user emulation.
|
||||
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Message-Id: <20221217152454.96388-4-philmd@linaro.org>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
target/s390x/cpu_features.c | 4 ++++
|
||||
target/s390x/cpu_models.c | 4 +++-
|
||||
2 files changed, 7 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
|
||||
index 5528acd082..2e4e11d264 100644
|
||||
--- a/target/s390x/cpu_features.c
|
||||
+++ b/target/s390x/cpu_features.c
|
||||
@@ -14,7 +14,9 @@
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/module.h"
|
||||
#include "cpu_features.h"
|
||||
+#ifndef CONFIG_USER_ONLY
|
||||
#include "hw/s390x/pv.h"
|
||||
+#endif
|
||||
|
||||
#define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
|
||||
[S390_FEAT_##_FEAT] = { \
|
||||
@@ -107,6 +109,7 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
|
||||
feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
|
||||
}
|
||||
|
||||
+#ifndef CONFIG_USER_ONLY
|
||||
if (!s390_is_pv()) {
|
||||
return;
|
||||
}
|
||||
@@ -147,6 +150,7 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
|
||||
default:
|
||||
return;
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
|
||||
void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
|
||||
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
|
||||
index 454485e706..e7c586c76e 100644
|
||||
--- a/target/s390x/cpu_models.c
|
||||
+++ b/target/s390x/cpu_models.c
|
||||
@@ -22,8 +22,8 @@
|
||||
#include "qemu/qemu-print.h"
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
#include "sysemu/sysemu.h"
|
||||
-#endif
|
||||
#include "hw/s390x/pv.h"
|
||||
+#endif
|
||||
|
||||
#define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
|
||||
{ \
|
||||
@@ -236,6 +236,7 @@ bool s390_has_feat(S390Feat feat)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+#ifndef CONFIG_USER_ONLY
|
||||
if (s390_is_pv()) {
|
||||
switch (feat) {
|
||||
case S390_FEAT_DIAG_318:
|
||||
@@ -259,6 +260,7 @@ bool s390_has_feat(S390Feat feat)
|
||||
break;
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
return test_bit(feat, cpu->model->features);
|
||||
}
|
||||
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,260 +0,0 @@
|
||||
From 57a26ba1c4053cdc426653f921e66f7a8efd3ce7 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon, 22 May 2023 11:10:11 +0200
|
||||
Subject: [PATCH 12/15] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI
|
||||
controller (CVE-2023-0330)
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [12/12] 28f5e04344109d8514869c50468bef481437201d (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
|
||||
commit b987718bbb1d0eabf95499b976212dd5f0120d75
|
||||
Author: Thomas Huth <thuth@redhat.com>
|
||||
Date: Mon May 22 11:10:11 2023 +0200
|
||||
|
||||
hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330)
|
||||
|
||||
We cannot use the generic reentrancy guard in the LSI code, so
|
||||
we have to manually prevent endless reentrancy here. The problematic
|
||||
lsi_execute_script() function has already a way to detect whether
|
||||
too many instructions have been executed - we just have to slightly
|
||||
change the logic here that it also takes into account if the function
|
||||
has been called too often in a reentrant way.
|
||||
|
||||
The code in fuzz-lsi53c895a-test.c has been taken from an earlier
|
||||
patch by Mauro Matteo Cascella.
|
||||
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563
|
||||
Message-Id: <20230522091011.1082574-1-thuth@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/scsi/lsi53c895a.c | 23 +++--
|
||||
tests/qtest/fuzz-lsi53c895a-test.c | 161 +++++++++++++++++++++++++++++
|
||||
2 files changed, 178 insertions(+), 6 deletions(-)
|
||||
create mode 100644 tests/qtest/fuzz-lsi53c895a-test.c
|
||||
|
||||
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
|
||||
index 2b9cb2ac5d..b60786fd56 100644
|
||||
--- a/hw/scsi/lsi53c895a.c
|
||||
+++ b/hw/scsi/lsi53c895a.c
|
||||
@@ -1133,15 +1133,24 @@ static void lsi_execute_script(LSIState *s)
|
||||
uint32_t addr, addr_high;
|
||||
int opcode;
|
||||
int insn_processed = 0;
|
||||
+ static int reentrancy_level;
|
||||
+
|
||||
+ reentrancy_level++;
|
||||
|
||||
s->istat1 |= LSI_ISTAT1_SRUN;
|
||||
again:
|
||||
- if (++insn_processed > LSI_MAX_INSN) {
|
||||
- /* Some windows drivers make the device spin waiting for a memory
|
||||
- location to change. If we have been executed a lot of code then
|
||||
- assume this is the case and force an unexpected device disconnect.
|
||||
- This is apparently sufficient to beat the drivers into submission.
|
||||
- */
|
||||
+ /*
|
||||
+ * Some windows drivers make the device spin waiting for a memory location
|
||||
+ * to change. If we have executed more than LSI_MAX_INSN instructions then
|
||||
+ * assume this is the case and force an unexpected device disconnect. This
|
||||
+ * is apparently sufficient to beat the drivers into submission.
|
||||
+ *
|
||||
+ * Another issue (CVE-2023-0330) can occur if the script is programmed to
|
||||
+ * trigger itself again and again. Avoid this problem by stopping after
|
||||
+ * being called multiple times in a reentrant way (8 is an arbitrary value
|
||||
+ * which should be enough for all valid use cases).
|
||||
+ */
|
||||
+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) {
|
||||
if (!(s->sien0 & LSI_SIST0_UDC)) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR,
|
||||
"lsi_scsi: inf. loop with UDC masked");
|
||||
@@ -1595,6 +1604,8 @@ again:
|
||||
}
|
||||
}
|
||||
trace_lsi_execute_script_stop();
|
||||
+
|
||||
+ reentrancy_level--;
|
||||
}
|
||||
|
||||
static uint8_t lsi_reg_readb(LSIState *s, int offset)
|
||||
diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
new file mode 100644
|
||||
index 0000000000..1b55928b9f
|
||||
--- /dev/null
|
||||
+++ b/tests/qtest/fuzz-lsi53c895a-test.c
|
||||
@@ -0,0 +1,161 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
+/*
|
||||
+ * QTest fuzzer-generated testcase for LSI53C895A device
|
||||
+ *
|
||||
+ * Copyright (c) Red Hat
|
||||
+ */
|
||||
+
|
||||
+#include "qemu/osdep.h"
|
||||
+#include "libqtest.h"
|
||||
+
|
||||
+/*
|
||||
+ * This used to trigger a DMA reentrancy issue
|
||||
+ * leading to memory corruption bugs like stack
|
||||
+ * overflow or use-after-free
|
||||
+ * https://gitlab.com/qemu-project/qemu/-/issues/1563
|
||||
+ */
|
||||
+static void test_lsi_dma_reentrancy(void)
|
||||
+{
|
||||
+ QTestState *s;
|
||||
+
|
||||
+ s = qtest_init("-M q35 -m 512M -nodefaults "
|
||||
+ "-blockdev driver=null-co,node-name=null0 "
|
||||
+ "-device lsi53c810 -device scsi-cd,drive=null0");
|
||||
+
|
||||
+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */
|
||||
+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */
|
||||
+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */
|
||||
+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/
|
||||
+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */
|
||||
+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/
|
||||
+ qtest_writel(s, 0xff000000, 0xc0000024);
|
||||
+ qtest_writel(s, 0xff000114, 0x00000080);
|
||||
+ qtest_writel(s, 0xff00012c, 0xff000000);
|
||||
+ qtest_writel(s, 0xff000004, 0xff000114);
|
||||
+ qtest_writel(s, 0xff000008, 0xff100014);
|
||||
+ qtest_writel(s, 0xff10002f, 0x000000ff);
|
||||
+
|
||||
+ qtest_quit(s);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * This used to trigger a UAF in lsi_do_msgout()
|
||||
+ * https://gitlab.com/qemu-project/qemu/-/issues/972
|
||||
+ */
|
||||
+static void test_lsi_do_msgout_cancel_req(void)
|
||||
+{
|
||||
+ QTestState *s;
|
||||
+
|
||||
+ if (sizeof(void *) == 4) {
|
||||
+ g_test_skip("memory size too big for 32-bit build");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ s = qtest_init("-M q35 -m 2G -nodefaults "
|
||||
+ "-device lsi53c895a,id=scsi "
|
||||
+ "-device scsi-hd,drive=disk0 "
|
||||
+ "-drive file=null-co://,id=disk0,if=none,format=raw");
|
||||
+
|
||||
+ qtest_outl(s, 0xcf8, 0x80000810);
|
||||
+ qtest_outl(s, 0xcf8, 0xc000);
|
||||
+ qtest_outl(s, 0xcf8, 0x80000810);
|
||||
+ qtest_outw(s, 0xcfc, 0x7);
|
||||
+ qtest_outl(s, 0xcf8, 0x80000810);
|
||||
+ qtest_outl(s, 0xcfc, 0xc000);
|
||||
+ qtest_outl(s, 0xcf8, 0x80000804);
|
||||
+ qtest_outw(s, 0xcfc, 0x05);
|
||||
+ qtest_writeb(s, 0x69736c10, 0x08);
|
||||
+ qtest_writeb(s, 0x69736c13, 0x58);
|
||||
+ qtest_writeb(s, 0x69736c1a, 0x01);
|
||||
+ qtest_writeb(s, 0x69736c1b, 0x06);
|
||||
+ qtest_writeb(s, 0x69736c22, 0x01);
|
||||
+ qtest_writeb(s, 0x69736c23, 0x07);
|
||||
+ qtest_writeb(s, 0x69736c2b, 0x02);
|
||||
+ qtest_writeb(s, 0x69736c48, 0x08);
|
||||
+ qtest_writeb(s, 0x69736c4b, 0x58);
|
||||
+ qtest_writeb(s, 0x69736c52, 0x04);
|
||||
+ qtest_writeb(s, 0x69736c53, 0x06);
|
||||
+ qtest_writeb(s, 0x69736c5b, 0x02);
|
||||
+ qtest_outl(s, 0xc02d, 0x697300);
|
||||
+ qtest_writeb(s, 0x5a554662, 0x01);
|
||||
+ qtest_writeb(s, 0x5a554663, 0x07);
|
||||
+ qtest_writeb(s, 0x5a55466a, 0x10);
|
||||
+ qtest_writeb(s, 0x5a55466b, 0x22);
|
||||
+ qtest_writeb(s, 0x5a55466c, 0x5a);
|
||||
+ qtest_writeb(s, 0x5a55466d, 0x5a);
|
||||
+ qtest_writeb(s, 0x5a55466e, 0x34);
|
||||
+ qtest_writeb(s, 0x5a55466f, 0x5a);
|
||||
+ qtest_writeb(s, 0x5a345a5a, 0x77);
|
||||
+ qtest_writeb(s, 0x5a345a5b, 0x55);
|
||||
+ qtest_writeb(s, 0x5a345a5c, 0x51);
|
||||
+ qtest_writeb(s, 0x5a345a5d, 0x27);
|
||||
+ qtest_writeb(s, 0x27515577, 0x41);
|
||||
+ qtest_outl(s, 0xc02d, 0x5a5500);
|
||||
+ qtest_writeb(s, 0x364001d0, 0x08);
|
||||
+ qtest_writeb(s, 0x364001d3, 0x58);
|
||||
+ qtest_writeb(s, 0x364001da, 0x01);
|
||||
+ qtest_writeb(s, 0x364001db, 0x26);
|
||||
+ qtest_writeb(s, 0x364001dc, 0x0d);
|
||||
+ qtest_writeb(s, 0x364001dd, 0xae);
|
||||
+ qtest_writeb(s, 0x364001de, 0x41);
|
||||
+ qtest_writeb(s, 0x364001df, 0x5a);
|
||||
+ qtest_writeb(s, 0x5a41ae0d, 0xf8);
|
||||
+ qtest_writeb(s, 0x5a41ae0e, 0x36);
|
||||
+ qtest_writeb(s, 0x5a41ae0f, 0xd7);
|
||||
+ qtest_writeb(s, 0x5a41ae10, 0x36);
|
||||
+ qtest_writeb(s, 0x36d736f8, 0x0c);
|
||||
+ qtest_writeb(s, 0x36d736f9, 0x80);
|
||||
+ qtest_writeb(s, 0x36d736fa, 0x0d);
|
||||
+ qtest_outl(s, 0xc02d, 0x364000);
|
||||
+
|
||||
+ qtest_quit(s);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * This used to trigger the assert in lsi_do_dma()
|
||||
+ * https://bugs.launchpad.net/qemu/+bug/697510
|
||||
+ * https://bugs.launchpad.net/qemu/+bug/1905521
|
||||
+ * https://bugs.launchpad.net/qemu/+bug/1908515
|
||||
+ */
|
||||
+static void test_lsi_do_dma_empty_queue(void)
|
||||
+{
|
||||
+ QTestState *s;
|
||||
+
|
||||
+ s = qtest_init("-M q35 -nographic -monitor none -serial none "
|
||||
+ "-drive if=none,id=drive0,"
|
||||
+ "file=null-co://,file.read-zeroes=on,format=raw "
|
||||
+ "-device lsi53c895a,id=scsi0 "
|
||||
+ "-device scsi-hd,drive=drive0,"
|
||||
+ "bus=scsi0.0,channel=0,scsi-id=0,lun=0");
|
||||
+ qtest_outl(s, 0xcf8, 0x80001814);
|
||||
+ qtest_outl(s, 0xcfc, 0xe1068000);
|
||||
+ qtest_outl(s, 0xcf8, 0x80001818);
|
||||
+ qtest_outl(s, 0xcf8, 0x80001804);
|
||||
+ qtest_outw(s, 0xcfc, 0x7);
|
||||
+ qtest_outl(s, 0xcf8, 0x80002010);
|
||||
+
|
||||
+ qtest_writeb(s, 0xe106802e, 0xff); /* Fill DSP bits 16-23 */
|
||||
+ qtest_writeb(s, 0xe106802f, 0xff); /* Fill DSP bits 24-31: trigger SCRIPT */
|
||||
+
|
||||
+ qtest_quit(s);
|
||||
+}
|
||||
+
|
||||
+int main(int argc, char **argv)
|
||||
+{
|
||||
+ g_test_init(&argc, &argv, NULL);
|
||||
+
|
||||
+ if (!qtest_has_device("lsi53c895a")) {
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ qtest_add_func("fuzz/lsi53c895a/lsi_do_dma_empty_queue",
|
||||
+ test_lsi_do_dma_empty_queue);
|
||||
+
|
||||
+ qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req",
|
||||
+ test_lsi_do_msgout_cancel_req);
|
||||
+
|
||||
+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy",
|
||||
+ test_lsi_dma_reentrancy);
|
||||
+
|
||||
+ return g_test_run();
|
||||
+}
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,86 +0,0 @@
|
||||
From 1b62d61c495bf4cd3a819ab8d1ef024d153e0ece Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Thu, 18 Jul 2024 09:40:29 -0400
|
||||
Subject: [PATCH 3/6] hw/virtio: Introduce virtio_bh_new_guarded() helper
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 380: QEMU: virtio: DMA reentrancy issue leads to double free vulnerability
|
||||
RH-Jira: RHEL-32276
|
||||
RH-Acked-by: Gerd Hoffmann <None>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/6] 1cbde7ddb8393b72e2e8d457b5e2d739116567a9 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-32276
|
||||
CVE: CVE-2024-3446
|
||||
Upstream: Merged
|
||||
|
||||
commit ec0504b989ca61e03636384d3602b7bf07ffe4da
|
||||
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Date: Thu Apr 4 20:56:11 2024 +0200
|
||||
|
||||
hw/virtio: Introduce virtio_bh_new_guarded() helper
|
||||
|
||||
Introduce virtio_bh_new_guarded(), similar to qemu_bh_new_guarded()
|
||||
but using the transport memory guard, instead of the device one
|
||||
(there can only be one virtio device per virtio bus).
|
||||
|
||||
Inspired-by: Gerd Hoffmann <kraxel@redhat.com>
|
||||
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
|
||||
Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Message-Id: <20240409105537.18308-2-philmd@linaro.org>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/virtio/virtio.c | 10 ++++++++++
|
||||
include/hw/virtio/virtio.h | 7 +++++++
|
||||
2 files changed, 17 insertions(+)
|
||||
|
||||
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
|
||||
index ea7c079fb0..5ae9c44841 100644
|
||||
--- a/hw/virtio/virtio.c
|
||||
+++ b/hw/virtio/virtio.c
|
||||
@@ -3874,3 +3874,13 @@ static void virtio_register_types(void)
|
||||
}
|
||||
|
||||
type_init(virtio_register_types)
|
||||
+
|
||||
+QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev,
|
||||
+ QEMUBHFunc *cb, void *opaque,
|
||||
+ const char *name)
|
||||
+{
|
||||
+ DeviceState *transport = qdev_get_parent_bus(dev)->parent;
|
||||
+
|
||||
+ return qemu_bh_new_full(cb, opaque, name,
|
||||
+ &transport->mem_reentrancy_guard);
|
||||
+}
|
||||
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
|
||||
index 8bab9cfb75..731c631a81 100644
|
||||
--- a/include/hw/virtio/virtio.h
|
||||
+++ b/include/hw/virtio/virtio.h
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "standard-headers/linux/virtio_config.h"
|
||||
#include "standard-headers/linux/virtio_ring.h"
|
||||
#include "qom/object.h"
|
||||
+#include "block/aio.h"
|
||||
|
||||
/* A guest should never accept this. It implies negotiation is broken. */
|
||||
#define VIRTIO_F_BAD_FEATURE 30
|
||||
@@ -397,4 +398,10 @@ static inline bool virtio_device_disabled(VirtIODevice *vdev)
|
||||
bool virtio_legacy_allowed(VirtIODevice *vdev);
|
||||
bool virtio_legacy_check_disabled(VirtIODevice *vdev);
|
||||
|
||||
+QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev,
|
||||
+ QEMUBHFunc *cb, void *opaque,
|
||||
+ const char *name);
|
||||
+#define virtio_bh_new_guarded(dev, cb, opaque) \
|
||||
+ virtio_bh_new_guarded_full((dev), (cb), (opaque), (stringify(cb)))
|
||||
+
|
||||
#endif
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,62 +0,0 @@
|
||||
From 2ecbd673a0e2191821ce88128587f709936ad765 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Thu, 18 Jul 2024 09:21:27 -0400
|
||||
Subject: [PATCH 6/6] hw/virtio/virtio-crypto: Protect from DMA re-entrancy
|
||||
bugs
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 380: QEMU: virtio: DMA reentrancy issue leads to double free vulnerability
|
||||
RH-Jira: RHEL-32276
|
||||
RH-Acked-by: Gerd Hoffmann <None>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [6/6] 975ac4640fd8e7cbf3820757787ee7b1270173be (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-32276
|
||||
CVE: CVE-2024-3446
|
||||
Upstream: Merged
|
||||
|
||||
commit f4729ec39ad97a42ceaa7b5697f84f440ea6e5dc
|
||||
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Date: Thu Apr 4 20:56:41 2024 +0200
|
||||
|
||||
hw/virtio/virtio-crypto: Protect from DMA re-entrancy bugs
|
||||
|
||||
Replace qemu_bh_new_guarded() by virtio_bh_new_guarded()
|
||||
so the bus and device use the same guard. Otherwise the
|
||||
DMA-reentrancy protection can be bypassed.
|
||||
|
||||
Fixes: CVE-2024-3446
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Suggested-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Gerd Hoffmann <kraxel@redhat.com>
|
||||
Acked-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Message-Id: <20240409105537.18308-5-philmd@linaro.org>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/virtio/virtio-crypto.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
|
||||
index 1be7bb543c..1741d4aba1 100644
|
||||
--- a/hw/virtio/virtio-crypto.c
|
||||
+++ b/hw/virtio/virtio-crypto.c
|
||||
@@ -817,8 +817,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
|
||||
vcrypto->vqs[i].dataq =
|
||||
virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
|
||||
vcrypto->vqs[i].dataq_bh =
|
||||
- qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i],
|
||||
- &dev->mem_reentrancy_guard);
|
||||
+ virtio_bh_new_guarded(dev, virtio_crypto_dataq_bh,
|
||||
+ &vcrypto->vqs[i]);
|
||||
vcrypto->vqs[i].vcrypto = vcrypto;
|
||||
}
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,53 +0,0 @@
|
||||
From 18ac13c7d64266238bd44b2188e0d044af3c3377 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 15:14:14 -0400
|
||||
Subject: [PATCH 4/5] i386/cpu: Update how the EBX register of CPUID 0x8000001F
|
||||
is set
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 296: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214840
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [4/4] 8b236fd9bc4c177bfacf6220a429e711b5bf062e
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214840
|
||||
|
||||
commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:30 2022 -0500
|
||||
|
||||
i386/cpu: Update how the EBX register of CPUID 0x8000001F is set
|
||||
|
||||
Update the setting of CPUID 0x8000001F EBX to clearly document the ranges
|
||||
associated with fields being set.
|
||||
|
||||
Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
target/i386/cpu.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||||
index 9d3dcdcc0d..265f0aadfc 100644
|
||||
--- a/target/i386/cpu.c
|
||||
+++ b/target/i386/cpu.c
|
||||
@@ -5836,8 +5836,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||||
if (sev_enabled()) {
|
||||
*eax = 0x2;
|
||||
*eax |= sev_es_enabled() ? 0x8 : 0;
|
||||
- *ebx = sev_get_cbit_position();
|
||||
- *ebx |= sev_get_reduced_phys_bits() << 6;
|
||||
+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
|
||||
+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
|
||||
}
|
||||
break;
|
||||
default:
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,78 +0,0 @@
|
||||
From 19504ea76b6341c11213316402bb5194487e1f01 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 15:13:19 -0400
|
||||
Subject: [PATCH 3/5] i386/sev: Update checks and information related to
|
||||
reduced-phys-bits
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 296: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214840
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [3/4] b617173d2b15fa39cdc02b5c1ac4d52e9b0dfede
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214840
|
||||
|
||||
commit 8168fed9f84e3128f7628969ae78af49433d5ce7
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:29 2022 -0500
|
||||
|
||||
i386/sev: Update checks and information related to reduced-phys-bits
|
||||
|
||||
The value of the reduced-phys-bits parameter is propogated to the CPUID
|
||||
information exposed to the guest. Update the current validation check to
|
||||
account for the size of the CPUID field (6-bits), ensuring the value is
|
||||
in the range of 1 to 63.
|
||||
|
||||
Maintain backward compatibility, to an extent, by allowing a value greater
|
||||
than 1 (so that the previously documented value of 5 still works), but not
|
||||
allowing anything over 63.
|
||||
|
||||
Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <cca5341a95ac73f904e6300f10b04f9c62e4e8ff.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
target/i386/sev.c | 17 ++++++++++++++---
|
||||
1 file changed, 14 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/target/i386/sev.c b/target/i386/sev.c
|
||||
index 025ff7a6f8..ba6a65e90c 100644
|
||||
--- a/target/i386/sev.c
|
||||
+++ b/target/i386/sev.c
|
||||
@@ -892,15 +892,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
|
||||
host_cbitpos = ebx & 0x3f;
|
||||
|
||||
+ /*
|
||||
+ * The cbitpos value will be placed in bit positions 5:0 of the EBX
|
||||
+ * register of CPUID 0x8000001F. No need to verify the range as the
|
||||
+ * comparison against the host value accomplishes that.
|
||||
+ */
|
||||
if (host_cbitpos != sev->cbitpos) {
|
||||
error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
|
||||
__func__, host_cbitpos, sev->cbitpos);
|
||||
goto err;
|
||||
}
|
||||
|
||||
- if (sev->reduced_phys_bits < 1) {
|
||||
- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1,"
|
||||
- " requested '%d'", __func__, sev->reduced_phys_bits);
|
||||
+ /*
|
||||
+ * The reduced-phys-bits value will be placed in bit positions 11:6 of
|
||||
+ * the EBX register of CPUID 0x8000001F, so verify the supplied value
|
||||
+ * is in the range of 1 to 63.
|
||||
+ */
|
||||
+ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) {
|
||||
+ error_setg(errp, "%s: reduced_phys_bits check failed,"
|
||||
+ " it should be in the range of 1 to 63, requested '%d'",
|
||||
+ __func__, sev->reduced_phys_bits);
|
||||
goto err;
|
||||
}
|
||||
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,367 +0,0 @@
|
||||
From 88b5e059462a72ca758d84c0d4d0895a03baac50 Mon Sep 17 00:00:00 2001
|
||||
From: "manish.mishra" <manish.mishra@nutanix.com>
|
||||
Date: Tue, 20 Dec 2022 18:44:17 +0000
|
||||
Subject: [PATCH 1/3] io: Add support for MSG_PEEK for socket channel
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Peter Xu <peterx@redhat.com>
|
||||
RH-MergeRequest: 258: migration: Fix multifd crash due to channel disorder
|
||||
RH-Bugzilla: 2137740
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
RH-Commit: [1/2] 04fc6fae358599b8509f5355469d2e8720f01903
|
||||
|
||||
Conflicts:
|
||||
io/channel-null.c
|
||||
migration/channel-block.c
|
||||
|
||||
Because these two files do not exist in rhel8.8 tree, dropping the
|
||||
changes.
|
||||
|
||||
MSG_PEEK peeks at the channel, The data is treated as unread and
|
||||
the next read shall still return this data. This support is
|
||||
currently added only for socket class. Extra parameter 'flags'
|
||||
is added to io_readv calls to pass extra read flags like MSG_PEEK.
|
||||
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Suggested-by: Daniel P. Berrange <berrange@redhat.com>
|
||||
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
|
||||
Signed-off-by: Juan Quintela <quintela@redhat.com>
|
||||
(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3)
|
||||
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||
---
|
||||
chardev/char-socket.c | 4 ++--
|
||||
include/io/channel.h | 6 ++++++
|
||||
io/channel-buffer.c | 1 +
|
||||
io/channel-command.c | 1 +
|
||||
io/channel-file.c | 1 +
|
||||
io/channel-socket.c | 19 ++++++++++++++++++-
|
||||
io/channel-tls.c | 1 +
|
||||
io/channel-websock.c | 1 +
|
||||
io/channel.c | 16 ++++++++++++----
|
||||
migration/rdma.c | 1 +
|
||||
scsi/qemu-pr-helper.c | 2 +-
|
||||
tests/qtest/tpm-emu.c | 2 +-
|
||||
tests/unit/test-io-channel-socket.c | 1 +
|
||||
util/vhost-user-server.c | 2 +-
|
||||
14 files changed, 48 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
|
||||
index 836cfa0bc2..4cdf79e0c2 100644
|
||||
--- a/chardev/char-socket.c
|
||||
+++ b/chardev/char-socket.c
|
||||
@@ -339,11 +339,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
|
||||
if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
|
||||
ret = qio_channel_readv_full(s->ioc, &iov, 1,
|
||||
&msgfds, &msgfds_num,
|
||||
- NULL);
|
||||
+ 0, NULL);
|
||||
} else {
|
||||
ret = qio_channel_readv_full(s->ioc, &iov, 1,
|
||||
NULL, NULL,
|
||||
- NULL);
|
||||
+ 0, NULL);
|
||||
}
|
||||
|
||||
if (ret == QIO_CHANNEL_ERR_BLOCK) {
|
||||
diff --git a/include/io/channel.h b/include/io/channel.h
|
||||
index c680ee7480..716235d496 100644
|
||||
--- a/include/io/channel.h
|
||||
+++ b/include/io/channel.h
|
||||
@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
|
||||
|
||||
#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
|
||||
|
||||
+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
|
||||
+
|
||||
typedef enum QIOChannelFeature QIOChannelFeature;
|
||||
|
||||
enum QIOChannelFeature {
|
||||
@@ -41,6 +43,7 @@ enum QIOChannelFeature {
|
||||
QIO_CHANNEL_FEATURE_SHUTDOWN,
|
||||
QIO_CHANNEL_FEATURE_LISTEN,
|
||||
QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
|
||||
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
|
||||
};
|
||||
|
||||
|
||||
@@ -114,6 +117,7 @@ struct QIOChannelClass {
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp);
|
||||
int (*io_close)(QIOChannel *ioc,
|
||||
Error **errp);
|
||||
@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc,
|
||||
* @niov: the length of the @iov array
|
||||
* @fds: pointer to an array that will received file handles
|
||||
* @nfds: pointer filled with number of elements in @fds on return
|
||||
+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
|
||||
* @errp: pointer to a NULL-initialized error object
|
||||
*
|
||||
* Read data from the IO channel, storing it in the
|
||||
@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp);
|
||||
|
||||
|
||||
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
|
||||
index bf52011be2..8096180f85 100644
|
||||
--- a/io/channel-buffer.c
|
||||
+++ b/io/channel-buffer.c
|
||||
@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
|
||||
diff --git a/io/channel-command.c b/io/channel-command.c
|
||||
index 5ff1691bad..2834413b3a 100644
|
||||
--- a/io/channel-command.c
|
||||
+++ b/io/channel-command.c
|
||||
@@ -230,6 +230,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
|
||||
diff --git a/io/channel-file.c b/io/channel-file.c
|
||||
index 348a48545e..490f0e5d84 100644
|
||||
--- a/io/channel-file.c
|
||||
+++ b/io/channel-file.c
|
||||
@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
|
||||
diff --git a/io/channel-socket.c b/io/channel-socket.c
|
||||
index 6010ad7017..ca8b180b69 100644
|
||||
--- a/io/channel-socket.c
|
||||
+++ b/io/channel-socket.c
|
||||
@@ -174,6 +174,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
|
||||
}
|
||||
#endif
|
||||
|
||||
+ qio_channel_set_feature(QIO_CHANNEL(ioc),
|
||||
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -407,6 +410,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
+ qio_channel_set_feature(QIO_CHANNEL(cioc),
|
||||
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
|
||||
+
|
||||
trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
|
||||
return cioc;
|
||||
|
||||
@@ -497,6 +503,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
|
||||
@@ -518,6 +525,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
|
||||
}
|
||||
|
||||
+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
|
||||
+ sflags |= MSG_PEEK;
|
||||
+ }
|
||||
+
|
||||
retry:
|
||||
ret = recvmsg(sioc->fd, &msg, sflags);
|
||||
if (ret < 0) {
|
||||
@@ -625,11 +636,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
|
||||
ssize_t done = 0;
|
||||
ssize_t i;
|
||||
+ int sflags = 0;
|
||||
+
|
||||
+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
|
||||
+ sflags |= MSG_PEEK;
|
||||
+ }
|
||||
|
||||
for (i = 0; i < niov; i++) {
|
||||
ssize_t ret;
|
||||
@@ -637,7 +654,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
ret = recv(sioc->fd,
|
||||
iov[i].iov_base,
|
||||
iov[i].iov_len,
|
||||
- 0);
|
||||
+ sflags);
|
||||
if (ret < 0) {
|
||||
if (errno == EAGAIN) {
|
||||
if (done) {
|
||||
diff --git a/io/channel-tls.c b/io/channel-tls.c
|
||||
index 4ce890a538..c730cb8ec5 100644
|
||||
--- a/io/channel-tls.c
|
||||
+++ b/io/channel-tls.c
|
||||
@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
|
||||
diff --git a/io/channel-websock.c b/io/channel-websock.c
|
||||
index 035dd6075b..13c94f2afe 100644
|
||||
--- a/io/channel-websock.c
|
||||
+++ b/io/channel-websock.c
|
||||
@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
|
||||
diff --git a/io/channel.c b/io/channel.c
|
||||
index 0640941ac5..a8c7f11649 100644
|
||||
--- a/io/channel.c
|
||||
+++ b/io/channel.c
|
||||
@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
|
||||
@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
|
||||
return -1;
|
||||
}
|
||||
|
||||
- return klass->io_readv(ioc, iov, niov, fds, nfds, errp);
|
||||
+ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) &&
|
||||
+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
|
||||
+ error_setg_errno(errp, EINVAL,
|
||||
+ "Channel does not support peek read");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp);
|
||||
}
|
||||
|
||||
|
||||
@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc,
|
||||
while ((nlocal_iov > 0) || local_fds) {
|
||||
ssize_t len;
|
||||
len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
|
||||
- local_nfds, errp);
|
||||
+ local_nfds, 0, errp);
|
||||
if (len == QIO_CHANNEL_ERR_BLOCK) {
|
||||
if (qemu_in_coroutine()) {
|
||||
qio_channel_yield(ioc, G_IO_IN);
|
||||
@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
Error **errp)
|
||||
{
|
||||
- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp);
|
||||
+ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp);
|
||||
}
|
||||
|
||||
|
||||
@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc,
|
||||
Error **errp)
|
||||
{
|
||||
struct iovec iov = { .iov_base = buf, .iov_len = buflen };
|
||||
- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp);
|
||||
+ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp);
|
||||
}
|
||||
|
||||
|
||||
diff --git a/migration/rdma.c b/migration/rdma.c
|
||||
index 54acd2000e..dcf98bd7f8 100644
|
||||
--- a/migration/rdma.c
|
||||
+++ b/migration/rdma.c
|
||||
@@ -2917,6 +2917,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
|
||||
size_t niov,
|
||||
int **fds,
|
||||
size_t *nfds,
|
||||
+ int flags,
|
||||
Error **errp)
|
||||
{
|
||||
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
|
||||
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
|
||||
index f281daeced..12ec8e9368 100644
|
||||
--- a/scsi/qemu-pr-helper.c
|
||||
+++ b/scsi/qemu-pr-helper.c
|
||||
@@ -612,7 +612,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
|
||||
iov.iov_base = buf;
|
||||
iov.iov_len = sz;
|
||||
n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
|
||||
- &fds, &nfds, errp);
|
||||
+ &fds, &nfds, 0, errp);
|
||||
|
||||
if (n_read == QIO_CHANNEL_ERR_BLOCK) {
|
||||
qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
|
||||
diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c
|
||||
index 2994d1cf42..3cf1acaf7d 100644
|
||||
--- a/tests/qtest/tpm-emu.c
|
||||
+++ b/tests/qtest/tpm-emu.c
|
||||
@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data)
|
||||
int *pfd = NULL;
|
||||
size_t nfd = 0;
|
||||
|
||||
- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort);
|
||||
+ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort);
|
||||
cmd = be32_to_cpu(cmd);
|
||||
g_assert_cmpint(cmd, ==, CMD_SET_DATAFD);
|
||||
g_assert_cmpint(nfd, ==, 1);
|
||||
diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
|
||||
index 6713886d02..de2930f203 100644
|
||||
--- a/tests/unit/test-io-channel-socket.c
|
||||
+++ b/tests/unit/test-io-channel-socket.c
|
||||
@@ -452,6 +452,7 @@ static void test_io_channel_unix_fd_pass(void)
|
||||
G_N_ELEMENTS(iorecv),
|
||||
&fdrecv,
|
||||
&nfdrecv,
|
||||
+ 0,
|
||||
&error_abort);
|
||||
|
||||
g_assert(nfdrecv == G_N_ELEMENTS(fdsend));
|
||||
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
|
||||
index 783d847a6d..e6a9ef72b7 100644
|
||||
--- a/util/vhost-user-server.c
|
||||
+++ b/util/vhost-user-server.c
|
||||
@@ -102,7 +102,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
|
||||
* qio_channel_readv_full may have short reads, keeping calling it
|
||||
* until getting VHOST_USER_HDR_SIZE or 0 bytes in total
|
||||
*/
|
||||
- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
|
||||
+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
|
||||
if (rc < 0) {
|
||||
if (rc == QIO_CHANNEL_ERR_BLOCK) {
|
||||
assert(local_err == NULL);
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,68 +0,0 @@
|
||||
From 3cb587f460ec432f329fb83df034bbb7e79e17aa Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Wed, 5 Jun 2024 19:56:51 -0400
|
||||
Subject: [PATCH 2/5] iotests/244: Don't store data-file with protocol in image
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 5: EMBARGOED CVE-2024-4467 for rhel-8.10.z (PRDSC)
|
||||
RH-Jira: RHEL-35616
|
||||
RH-CVE: CVE-2024-4467
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/5] a422cfdba938e1bd857008ccbbddc695011ae0ff
|
||||
|
||||
commit 92e00dab8be1570b13172353d77d2af44cb4e22b
|
||||
Author: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu Apr 25 14:49:40 2024 +0200
|
||||
|
||||
iotests/244: Don't store data-file with protocol in image
|
||||
|
||||
We want to disable filename parsing for data files because it's too easy
|
||||
to abuse in malicious image files. Make the test ready for the change by
|
||||
passing the data file explicitly in command line options.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Upstream: N/A, embargoed
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/244 | 19 ++++++++++++++++---
|
||||
1 file changed, 16 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244
|
||||
index 3e61fa25bb..bb9cc6512f 100755
|
||||
--- a/tests/qemu-iotests/244
|
||||
+++ b/tests/qemu-iotests/244
|
||||
@@ -215,9 +215,22 @@ $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG"
|
||||
$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG"
|
||||
|
||||
# blkdebug doesn't support copy offloading, so this tests the error path
|
||||
-$QEMU_IMG amend -f $IMGFMT -o "data_file=blkdebug::$TEST_IMG.data" "$TEST_IMG"
|
||||
-$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG"
|
||||
-$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG"
|
||||
+test_img_with_blkdebug="json:{
|
||||
+ 'driver': 'qcow2',
|
||||
+ 'file': {
|
||||
+ 'driver': 'file',
|
||||
+ 'filename': '$TEST_IMG'
|
||||
+ },
|
||||
+ 'data-file': {
|
||||
+ 'driver': 'blkdebug',
|
||||
+ 'image': {
|
||||
+ 'driver': 'file',
|
||||
+ 'filename': '$TEST_IMG.data'
|
||||
+ }
|
||||
+ }
|
||||
+}"
|
||||
+$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$test_img_with_blkdebug"
|
||||
+$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$test_img_with_blkdebug"
|
||||
|
||||
echo
|
||||
echo "=== Flushing should flush the data file ==="
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,71 +0,0 @@
|
||||
From 59a84673079f9763e9507733e308442397aba703 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Wed, 5 Jun 2024 19:56:51 -0400
|
||||
Subject: [PATCH 3/5] iotests/270: Don't store data-file with json: prefix in
|
||||
image
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 5: EMBARGOED CVE-2024-4467 for rhel-8.10.z (PRDSC)
|
||||
RH-Jira: RHEL-35616
|
||||
RH-CVE: CVE-2024-4467
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [3/5] ac08690fd3ea3af6e24b2f6a8beedcfe469917a8
|
||||
|
||||
commit 705bcc2819ce8e0f8b9d660a93bc48de26413aec
|
||||
Author: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu Apr 25 14:49:40 2024 +0200
|
||||
|
||||
iotests/270: Don't store data-file with json: prefix in image
|
||||
|
||||
We want to disable filename parsing for data files because it's too easy
|
||||
to abuse in malicious image files. Make the test ready for the change by
|
||||
passing the data file explicitly in command line options.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Upstream: N/A, embargoed
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/270 | 14 +++++++++++---
|
||||
1 file changed, 11 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/270 b/tests/qemu-iotests/270
|
||||
index 74352342db..c37b674aa2 100755
|
||||
--- a/tests/qemu-iotests/270
|
||||
+++ b/tests/qemu-iotests/270
|
||||
@@ -60,8 +60,16 @@ _make_test_img -o cluster_size=2M,data_file="$TEST_IMG.orig" \
|
||||
# "write" 2G of data without using any space.
|
||||
# (qemu-img create does not like it, though, because null-co does not
|
||||
# support image creation.)
|
||||
-$QEMU_IMG amend -o data_file="json:{'driver':'null-co',,'size':'4294967296'}" \
|
||||
- "$TEST_IMG"
|
||||
+test_img_with_null_data="json:{
|
||||
+ 'driver': '$IMGFMT',
|
||||
+ 'file': {
|
||||
+ 'filename': '$TEST_IMG'
|
||||
+ },
|
||||
+ 'data-file': {
|
||||
+ 'driver': 'null-co',
|
||||
+ 'size':'4294967296'
|
||||
+ }
|
||||
+}"
|
||||
|
||||
# This gives us a range of:
|
||||
# 2^31 - 512 + 768 - 1 = 2^31 + 255 > 2^31
|
||||
@@ -74,7 +82,7 @@ $QEMU_IMG amend -o data_file="json:{'driver':'null-co',,'size':'4294967296'}" \
|
||||
# on L2 boundaries, we need large L2 tables; hence the cluster size of
|
||||
# 2 MB. (Anything from 256 kB should work, though, because then one L2
|
||||
# table covers 8 GB.)
|
||||
-$QEMU_IO -c "write 768 $((2 ** 31 - 512))" "$TEST_IMG" | _filter_qemu_io
|
||||
+$QEMU_IO -c "write 768 $((2 ** 31 - 512))" "$test_img_with_null_data" | _filter_qemu_io
|
||||
|
||||
_check_test_img
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,82 +0,0 @@
|
||||
From 9b5e69ce5f4ba9541e55d801af16ece4969379e9 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Fri, 9 Feb 2024 18:31:03 +0100
|
||||
Subject: [PATCH 4/4] iotests: Make 144 deterministic again
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 352: monitor: only run coroutine commands in qemu_aio_context
|
||||
RH-Jira: RHEL-7353
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [4/4] 4974a32174abefb509b7c46671a364b4b991449e
|
||||
|
||||
Since commit effd60c8 changed how QMP commands are processed, the order
|
||||
of the block-commit return value and job events in iotests 144 wasn't
|
||||
fixed and more and caused the test to fail intermittently.
|
||||
|
||||
Change the test to cache events first and then print them in a
|
||||
predefined order.
|
||||
|
||||
Waiting three times for JOB_STATUS_CHANGE is a bit uglier than just
|
||||
waiting for the JOB_STATUS_CHANGE that has "status": "ready", but the
|
||||
tooling we have doesn't seem to allow the latter easily.
|
||||
|
||||
Fixes: effd60c878176bcaf97fa7ce2b12d04bb8ead6f7
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2126
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-id: 20240209173103.239994-1-kwolf@redhat.com
|
||||
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
|
||||
(cherry picked from commit cc29c12ec629ba68a4a6cb7d165c94cc8502815a)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/144 | 12 +++++++++++-
|
||||
tests/qemu-iotests/144.out | 2 +-
|
||||
2 files changed, 12 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/144 b/tests/qemu-iotests/144
|
||||
index 60e9ddd75f..8c50d6487e 100755
|
||||
--- a/tests/qemu-iotests/144
|
||||
+++ b/tests/qemu-iotests/144
|
||||
@@ -83,12 +83,22 @@ echo
|
||||
echo === Performing block-commit on active layer ===
|
||||
echo
|
||||
|
||||
+capture_events="BLOCK_JOB_READY JOB_STATUS_CHANGE"
|
||||
+
|
||||
# Block commit on active layer, push the new overlay into base
|
||||
_send_qemu_cmd $h "{ 'execute': 'block-commit',
|
||||
'arguments': {
|
||||
'device': 'virtio0'
|
||||
}
|
||||
- }" "READY"
|
||||
+ }" "return"
|
||||
+
|
||||
+_wait_event $h "JOB_STATUS_CHANGE"
|
||||
+_wait_event $h "JOB_STATUS_CHANGE"
|
||||
+_wait_event $h "JOB_STATUS_CHANGE"
|
||||
+
|
||||
+_wait_event $h "BLOCK_JOB_READY"
|
||||
+
|
||||
+capture_events=
|
||||
|
||||
_send_qemu_cmd $h "{ 'execute': 'block-job-complete',
|
||||
'arguments': {
|
||||
diff --git a/tests/qemu-iotests/144.out b/tests/qemu-iotests/144.out
|
||||
index b3b4812015..2245ddfa10 100644
|
||||
--- a/tests/qemu-iotests/144.out
|
||||
+++ b/tests/qemu-iotests/144.out
|
||||
@@ -25,9 +25,9 @@ Formatting 'TEST_DIR/tmp.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off co
|
||||
'device': 'virtio0'
|
||||
}
|
||||
}
|
||||
+{"return": {}}
|
||||
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "virtio0"}}
|
||||
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "virtio0"}}
|
||||
-{"return": {}}
|
||||
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "virtio0"}}
|
||||
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
|
||||
{ 'execute': 'block-job-complete',
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,49 +0,0 @@
|
||||
From f164083416a9d09712b8cb8c654dd3b8988e6c5c Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 18 Jan 2024 09:48:21 -0500
|
||||
Subject: [PATCH 1/4] iotests: add filter_qmp_generated_node_ids()
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 352: monitor: only run coroutine commands in qemu_aio_context
|
||||
RH-Jira: RHEL-7353
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [1/4] cc276c8ef9e140203afc19fcd8b5b8e20577054d
|
||||
|
||||
Add a filter function for QMP responses that contain QEMU's
|
||||
automatically generated node ids. The ids change between runs and must
|
||||
be masked in the reference output.
|
||||
|
||||
The next commit will use this new function.
|
||||
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240118144823.1497953-2-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit da62b507a20510d819bcfbe8f5e573409b954006)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/iotests.py | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
|
||||
index 2ef493755c..fd41f93421 100644
|
||||
--- a/tests/qemu-iotests/iotests.py
|
||||
+++ b/tests/qemu-iotests/iotests.py
|
||||
@@ -521,6 +521,13 @@ def _filter(_key, value):
|
||||
def filter_generated_node_ids(msg):
|
||||
return re.sub("#block[0-9]+", "NODE_NAME", msg)
|
||||
|
||||
+def filter_qmp_generated_node_ids(qmsg):
|
||||
+ def _filter(_key, value):
|
||||
+ if is_str(value):
|
||||
+ return filter_generated_node_ids(value)
|
||||
+ return value
|
||||
+ return filter_qmp(qmsg, _filter)
|
||||
+
|
||||
def filter_img_info(output, filename):
|
||||
lines = []
|
||||
for line in output.split('\n'):
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,187 +0,0 @@
|
||||
From 084e211448f40c3e9d9b1907f6c98dca9f998bc3 Mon Sep 17 00:00:00 2001
|
||||
From: Hanna Czenczek <hreitz@redhat.com>
|
||||
Date: Tue, 11 Apr 2023 19:34:18 +0200
|
||||
Subject: [PATCH 4/5] iotests/iov-padding: New test
|
||||
|
||||
RH-Author: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-MergeRequest: 291: block: Split padded I/O vectors exceeding IOV_MAX
|
||||
RH-Bugzilla: 2141964
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [4/5] a80be9c26ebd5503745989cd6823cb4814264258
|
||||
|
||||
Test that even vectored IO requests with 1024 vector elements that are
|
||||
not aligned to the device's request alignment will succeed.
|
||||
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Message-Id: <20230411173418.19549-5-hreitz@redhat.com>
|
||||
(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d)
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++
|
||||
tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++
|
||||
2 files changed, 144 insertions(+)
|
||||
create mode 100755 tests/qemu-iotests/tests/iov-padding
|
||||
create mode 100644 tests/qemu-iotests/tests/iov-padding.out
|
||||
|
||||
diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding
|
||||
new file mode 100755
|
||||
index 0000000000..b9604900c7
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iov-padding
|
||||
@@ -0,0 +1,85 @@
|
||||
+#!/usr/bin/env bash
|
||||
+# group: rw quick
|
||||
+#
|
||||
+# Check the interaction of request padding (to fit alignment restrictions) with
|
||||
+# vectored I/O from the guest
|
||||
+#
|
||||
+# Copyright Red Hat
|
||||
+#
|
||||
+# This program is free software; you can redistribute it and/or modify
|
||||
+# it under the terms of the GNU General Public License as published by
|
||||
+# the Free Software Foundation; either version 2 of the License, or
|
||||
+# (at your option) any later version.
|
||||
+#
|
||||
+# This program is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+# GNU General Public License for more details.
|
||||
+#
|
||||
+# You should have received a copy of the GNU General Public License
|
||||
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
+#
|
||||
+
|
||||
+seq=$(basename $0)
|
||||
+echo "QA output created by $seq"
|
||||
+
|
||||
+status=1 # failure is the default!
|
||||
+
|
||||
+_cleanup()
|
||||
+{
|
||||
+ _cleanup_test_img
|
||||
+}
|
||||
+trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
+
|
||||
+# get standard environment, filters and checks
|
||||
+cd ..
|
||||
+. ./common.rc
|
||||
+. ./common.filter
|
||||
+
|
||||
+_supported_fmt raw
|
||||
+_supported_proto file
|
||||
+
|
||||
+_make_test_img 1M
|
||||
+
|
||||
+IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG"
|
||||
+
|
||||
+# Four combinations:
|
||||
+# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k
|
||||
+# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not
|
||||
+# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned
|
||||
+# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not
|
||||
+for start_offset in 4096 512; do
|
||||
+ for last_element_length in 512 4096; do
|
||||
+ length=$((1023 * 512 + $last_element_length))
|
||||
+
|
||||
+ echo
|
||||
+ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) =="
|
||||
+
|
||||
+ # Fill with data for testing
|
||||
+ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io
|
||||
+
|
||||
+ # 1023 512-byte buffers, and then one with length $last_element_length
|
||||
+ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length"
|
||||
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
|
||||
+ -c "writev $cmd_params" \
|
||||
+ --image-opts \
|
||||
+ "$IMGSPEC" \
|
||||
+ | _filter_qemu_io
|
||||
+
|
||||
+ # Read all patterns -- read the part we just wrote with writev twice,
|
||||
+ # once "normally", and once with a readv, so we see that that works, too
|
||||
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
|
||||
+ -c "read -P 1 0 $start_offset" \
|
||||
+ -c "read -P 2 $start_offset $length" \
|
||||
+ -c "readv $cmd_params" \
|
||||
+ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \
|
||||
+ --image-opts \
|
||||
+ "$IMGSPEC" \
|
||||
+ | _filter_qemu_io
|
||||
+ done
|
||||
+done
|
||||
+
|
||||
+# success, all done
|
||||
+echo "*** done"
|
||||
+rm -f $seq.full
|
||||
+status=0
|
||||
diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out
|
||||
new file mode 100644
|
||||
index 0000000000..e07a91fac7
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/iov-padding.out
|
||||
@@ -0,0 +1,59 @@
|
||||
+QA output created by iov-padding
|
||||
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 4096; length: 524288) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 524288/524288 bytes at offset 4096
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 4096/4096 bytes at offset 0
|
||||
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 4096
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 4096
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 520192/520192 bytes at offset 528384
|
||||
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 4096; length: 527872) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 527872/527872 bytes at offset 4096
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 4096/4096 bytes at offset 0
|
||||
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 4096
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 4096
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 516608/516608 bytes at offset 531968
|
||||
+504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 512; length: 524288) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 524288/524288 bytes at offset 512
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 512/512 bytes at offset 0
|
||||
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 512
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 524288/524288 bytes at offset 512
|
||||
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 523776/523776 bytes at offset 524800
|
||||
+511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+
|
||||
+== performing 1024-element vectored requests to image (offset: 512; length: 527872) ==
|
||||
+wrote 1048576/1048576 bytes at offset 0
|
||||
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+wrote 527872/527872 bytes at offset 512
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 512/512 bytes at offset 0
|
||||
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 512
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 527872/527872 bytes at offset 512
|
||||
+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+read 520192/520192 bytes at offset 528384
|
||||
+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+*** done
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,601 +0,0 @@
|
||||
From 968c8ff7ea7d43bf29d8e5f6e9e17f84168c22c4 Mon Sep 17 00:00:00 2001
|
||||
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Date: Thu, 18 Jan 2024 09:48:22 -0500
|
||||
Subject: [PATCH 2/4] iotests: port 141 to Python for reliable QMP testing
|
||||
|
||||
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-MergeRequest: 352: monitor: only run coroutine commands in qemu_aio_context
|
||||
RH-Jira: RHEL-7353
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Commit: [2/4] ff0899262544b1b61b4c7de2eb798b664fe5202e
|
||||
|
||||
The common.qemu bash functions allow tests to interact with the QMP
|
||||
monitor of a QEMU process. I spent two days trying to update 141 when
|
||||
the order of the test output changed, but found it would still fail
|
||||
occassionally because printf() and QMP events race with synchronous QMP
|
||||
communication.
|
||||
|
||||
I gave up and ported 141 to the existing Python API for QMP tests. The
|
||||
Python API is less affected by the order in which QEMU prints output
|
||||
because it does not print all QMP traffic by default.
|
||||
|
||||
The next commit changes the order in which QMP messages are received.
|
||||
Make 141 reliable first.
|
||||
|
||||
Cc: Hanna Czenczek <hreitz@redhat.com>
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Message-ID: <20240118144823.1497953-3-stefanha@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 9ee2dd4c22a3639c5462b3fc20df60c005c3de64)
|
||||
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
|
||||
Conflicts:
|
||||
tests/qemu-iotests/141
|
||||
tests/qemu-iotests/141.out
|
||||
|
||||
This commit replaces these files anyway, so apply our changes instead
|
||||
of dragging in more dependencies to resolve context conflicts.
|
||||
---
|
||||
tests/qemu-iotests/141 | 307 ++++++++++++++++---------------------
|
||||
tests/qemu-iotests/141.out | 204 ++++++------------------
|
||||
2 files changed, 178 insertions(+), 333 deletions(-)
|
||||
|
||||
diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141
|
||||
index 115cc1691e..a7d3985a02 100755
|
||||
--- a/tests/qemu-iotests/141
|
||||
+++ b/tests/qemu-iotests/141
|
||||
@@ -1,9 +1,12 @@
|
||||
-#!/usr/bin/env bash
|
||||
+#!/usr/bin/env python3
|
||||
# group: rw auto quick
|
||||
#
|
||||
# Test case for ejecting BDSs with block jobs still running on them
|
||||
#
|
||||
-# Copyright (C) 2016 Red Hat, Inc.
|
||||
+# Originally written in bash by Hanna Czenczek, ported to Python by Stefan
|
||||
+# Hajnoczi.
|
||||
+#
|
||||
+# Copyright Red Hat
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@@ -19,177 +22,129 @@
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
-# creator
|
||||
-owner=mreitz@redhat.com
|
||||
-
|
||||
-seq="$(basename $0)"
|
||||
-echo "QA output created by $seq"
|
||||
-
|
||||
-status=1 # failure is the default!
|
||||
-
|
||||
-_cleanup()
|
||||
-{
|
||||
- _cleanup_qemu
|
||||
- _cleanup_test_img
|
||||
- for img in "$TEST_DIR"/{b,m,o}.$IMGFMT; do
|
||||
- _rm_test_img "$img"
|
||||
- done
|
||||
-}
|
||||
-trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
-
|
||||
-# get standard environment, filters and checks
|
||||
-. ./common.rc
|
||||
-. ./common.filter
|
||||
-. ./common.qemu
|
||||
-
|
||||
-# Needs backing file and backing format support
|
||||
-_supported_fmt qcow2 qed
|
||||
-_supported_proto file
|
||||
-_supported_os Linux
|
||||
-
|
||||
-
|
||||
-test_blockjob()
|
||||
-{
|
||||
- _send_qemu_cmd $QEMU_HANDLE \
|
||||
- "{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': '$IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': '$TEST_IMG'
|
||||
- }}}" \
|
||||
- 'return'
|
||||
-
|
||||
- # If "$2" is an event, we may or may not see it before the
|
||||
- # {"return": {}}. Therefore, filter the {"return": {}} out both
|
||||
- # here and in the next command. (Naturally, if we do not see it
|
||||
- # here, we will see it before the next command can be executed,
|
||||
- # so it will appear in the next _send_qemu_cmd's output.)
|
||||
- _send_qemu_cmd $QEMU_HANDLE \
|
||||
- "$1" \
|
||||
- "$2" \
|
||||
- | _filter_img_create | _filter_qmp_empty_return
|
||||
-
|
||||
- # We want this to return an error because the block job is still running
|
||||
- _send_qemu_cmd $QEMU_HANDLE \
|
||||
- "{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}" \
|
||||
- 'error' | _filter_generated_node_ids | _filter_qmp_empty_return
|
||||
-
|
||||
- _send_qemu_cmd $QEMU_HANDLE \
|
||||
- "{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}" \
|
||||
- "$3"
|
||||
-
|
||||
- _send_qemu_cmd $QEMU_HANDLE \
|
||||
- "{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}" \
|
||||
- 'return'
|
||||
-}
|
||||
-
|
||||
-
|
||||
-TEST_IMG="$TEST_DIR/b.$IMGFMT" _make_test_img 1M
|
||||
-TEST_IMG="$TEST_DIR/m.$IMGFMT" _make_test_img -b "$TEST_DIR/b.$IMGFMT" -F $IMGFMT 1M
|
||||
-_make_test_img -b "$TEST_DIR/m.$IMGFMT" 1M -F $IMGFMT
|
||||
-
|
||||
-_launch_qemu -nodefaults
|
||||
-
|
||||
-_send_qemu_cmd $QEMU_HANDLE \
|
||||
- "{'execute': 'qmp_capabilities'}" \
|
||||
- 'return'
|
||||
-
|
||||
-echo
|
||||
-echo '=== Testing drive-backup ==='
|
||||
-echo
|
||||
-
|
||||
-# drive-backup will not send BLOCK_JOB_READY by itself, and cancelling the job
|
||||
-# will consequently result in BLOCK_JOB_CANCELLED being emitted.
|
||||
-
|
||||
-test_blockjob \
|
||||
- "{'execute': 'drive-backup',
|
||||
- 'arguments': {'job-id': 'job0',
|
||||
- 'device': 'drv0',
|
||||
- 'target': '$TEST_DIR/o.$IMGFMT',
|
||||
- 'format': '$IMGFMT',
|
||||
- 'sync': 'none'}}" \
|
||||
- 'return' \
|
||||
- '"status": "null"'
|
||||
-
|
||||
-echo
|
||||
-echo '=== Testing drive-mirror ==='
|
||||
-echo
|
||||
-
|
||||
-# drive-mirror will send BLOCK_JOB_READY basically immediately, and cancelling
|
||||
-# the job will consequently result in BLOCK_JOB_COMPLETED being emitted.
|
||||
-
|
||||
-test_blockjob \
|
||||
- "{'execute': 'drive-mirror',
|
||||
- 'arguments': {'job-id': 'job0',
|
||||
- 'device': 'drv0',
|
||||
- 'target': '$TEST_DIR/o.$IMGFMT',
|
||||
- 'format': '$IMGFMT',
|
||||
- 'sync': 'none'}}" \
|
||||
- 'BLOCK_JOB_READY' \
|
||||
- '"status": "null"'
|
||||
-
|
||||
-echo
|
||||
-echo '=== Testing active block-commit ==='
|
||||
-echo
|
||||
-
|
||||
-# An active block-commit will send BLOCK_JOB_READY basically immediately, and
|
||||
-# cancelling the job will consequently result in BLOCK_JOB_COMPLETED being
|
||||
-# emitted.
|
||||
-
|
||||
-test_blockjob \
|
||||
- "{'execute': 'block-commit',
|
||||
- 'arguments': {'job-id': 'job0', 'device': 'drv0'}}" \
|
||||
- 'BLOCK_JOB_READY' \
|
||||
- '"status": "null"'
|
||||
-
|
||||
-echo
|
||||
-echo '=== Testing non-active block-commit ==='
|
||||
-echo
|
||||
-
|
||||
-# Give block-commit something to work on, otherwise it would be done
|
||||
-# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just
|
||||
-# fine without the block job still running.
|
||||
-
|
||||
-$QEMU_IO -c 'write 0 1M' "$TEST_DIR/m.$IMGFMT" | _filter_qemu_io
|
||||
-
|
||||
-test_blockjob \
|
||||
- "{'execute': 'block-commit',
|
||||
- 'arguments': {'job-id': 'job0',
|
||||
- 'device': 'drv0',
|
||||
- 'top': '$TEST_DIR/m.$IMGFMT',
|
||||
- 'speed': 1}}" \
|
||||
- 'return' \
|
||||
- '"status": "null"'
|
||||
-
|
||||
-echo
|
||||
-echo '=== Testing block-stream ==='
|
||||
-echo
|
||||
-
|
||||
-# Give block-stream something to work on, otherwise it would be done
|
||||
-# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just
|
||||
-# fine without the block job still running.
|
||||
-
|
||||
-$QEMU_IO -c 'write 0 1M' "$TEST_DIR/b.$IMGFMT" | _filter_qemu_io
|
||||
-
|
||||
-# With some data to stream (and @speed set to 1), block-stream will not complete
|
||||
-# until we send the block-job-cancel command.
|
||||
-
|
||||
-test_blockjob \
|
||||
- "{'execute': 'block-stream',
|
||||
- 'arguments': {'job-id': 'job0',
|
||||
- 'device': 'drv0',
|
||||
- 'speed': 1}}" \
|
||||
- 'return' \
|
||||
- '"status": "null"'
|
||||
-
|
||||
-_cleanup_qemu
|
||||
-
|
||||
-# success, all done
|
||||
-echo "*** done"
|
||||
-rm -f $seq.full
|
||||
-status=0
|
||||
+import iotests
|
||||
+
|
||||
+# Common filters to mask values that vary in the test output
|
||||
+QMP_FILTERS = [iotests.filter_qmp_testfiles, \
|
||||
+ iotests.filter_qmp_imgfmt]
|
||||
+
|
||||
+
|
||||
+class TestCase:
|
||||
+ def __init__(self, name, vm, image_path, cancel_event):
|
||||
+ self.name = name
|
||||
+ self.vm = vm
|
||||
+ self.image_path = image_path
|
||||
+ self.cancel_event = cancel_event
|
||||
+
|
||||
+ def __enter__(self):
|
||||
+ iotests.log(f'=== Testing {self.name} ===')
|
||||
+ self.vm.qmp_log('blockdev-add', \
|
||||
+ node_name='drv0', \
|
||||
+ driver=iotests.imgfmt, \
|
||||
+ file={'driver': 'file', 'filename': self.image_path}, \
|
||||
+ filters=QMP_FILTERS)
|
||||
+
|
||||
+ def __exit__(self, *exc_details):
|
||||
+ # This is expected to fail because the job still exists
|
||||
+ self.vm.qmp_log('blockdev-del', node_name='drv0', \
|
||||
+ filters=[iotests.filter_qmp_generated_node_ids])
|
||||
+
|
||||
+ self.vm.qmp_log('block-job-cancel', device='job0')
|
||||
+ event = self.vm.event_wait(self.cancel_event)
|
||||
+ iotests.log(event, filters=[iotests.filter_qmp_event])
|
||||
+
|
||||
+ # This time it succeeds
|
||||
+ self.vm.qmp_log('blockdev-del', node_name='drv0')
|
||||
+
|
||||
+ # Separate test cases in output
|
||||
+ iotests.log('')
|
||||
+
|
||||
+
|
||||
+def main() -> None:
|
||||
+ with iotests.FilePath('bottom', 'middle', 'top', 'target') as \
|
||||
+ (bottom_path, middle_path, top_path, target_path), \
|
||||
+ iotests.VM() as vm:
|
||||
+
|
||||
+ iotests.log('Creating bottom <- middle <- top backing file chain...')
|
||||
+ IMAGE_SIZE='1M'
|
||||
+ iotests.qemu_img_create('-f', iotests.imgfmt, bottom_path, IMAGE_SIZE)
|
||||
+ iotests.qemu_img_create('-f', iotests.imgfmt, \
|
||||
+ '-F', iotests.imgfmt, \
|
||||
+ '-b', bottom_path, \
|
||||
+ middle_path, \
|
||||
+ IMAGE_SIZE)
|
||||
+ iotests.qemu_img_create('-f', iotests.imgfmt, \
|
||||
+ '-F', iotests.imgfmt, \
|
||||
+ '-b', middle_path, \
|
||||
+ top_path, \
|
||||
+ IMAGE_SIZE)
|
||||
+
|
||||
+ iotests.log('Starting VM...')
|
||||
+ vm.add_args('-nodefaults')
|
||||
+ vm.launch()
|
||||
+
|
||||
+ # drive-backup will not send BLOCK_JOB_READY by itself, and cancelling
|
||||
+ # the job will consequently result in BLOCK_JOB_CANCELLED being
|
||||
+ # emitted.
|
||||
+ with TestCase('drive-backup', vm, top_path, 'BLOCK_JOB_CANCELLED'):
|
||||
+ vm.qmp_log('drive-backup', \
|
||||
+ job_id='job0', \
|
||||
+ device='drv0', \
|
||||
+ target=target_path, \
|
||||
+ format=iotests.imgfmt, \
|
||||
+ sync='none', \
|
||||
+ filters=QMP_FILTERS)
|
||||
+
|
||||
+ # drive-mirror will send BLOCK_JOB_READY basically immediately, and
|
||||
+ # cancelling the job will consequently result in BLOCK_JOB_COMPLETED
|
||||
+ # being emitted.
|
||||
+ with TestCase('drive-mirror', vm, top_path, 'BLOCK_JOB_COMPLETED'):
|
||||
+ vm.qmp_log('drive-mirror', \
|
||||
+ job_id='job0', \
|
||||
+ device='drv0', \
|
||||
+ target=target_path, \
|
||||
+ format=iotests.imgfmt, \
|
||||
+ sync='none', \
|
||||
+ filters=QMP_FILTERS)
|
||||
+ event = vm.event_wait('BLOCK_JOB_READY')
|
||||
+ assert event is not None # silence mypy
|
||||
+ iotests.log(event, filters=[iotests.filter_qmp_event])
|
||||
+
|
||||
+ # An active block-commit will send BLOCK_JOB_READY basically
|
||||
+ # immediately, and cancelling the job will consequently result in
|
||||
+ # BLOCK_JOB_COMPLETED being emitted.
|
||||
+ with TestCase('active block-commit', vm, top_path, \
|
||||
+ 'BLOCK_JOB_COMPLETED'):
|
||||
+ vm.qmp_log('block-commit', \
|
||||
+ job_id='job0', \
|
||||
+ device='drv0')
|
||||
+ event = vm.event_wait('BLOCK_JOB_READY')
|
||||
+ assert event is not None # silence mypy
|
||||
+ iotests.log(event, filters=[iotests.filter_qmp_event])
|
||||
+
|
||||
+ # Give block-commit something to work on, otherwise it would be done
|
||||
+ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would
|
||||
+ # work just fine without the block job still running.
|
||||
+ iotests.qemu_io(middle_path, '-c', f'write 0 {IMAGE_SIZE}')
|
||||
+ with TestCase('non-active block-commit', vm, top_path, \
|
||||
+ 'BLOCK_JOB_CANCELLED'):
|
||||
+ vm.qmp_log('block-commit', \
|
||||
+ job_id='job0', \
|
||||
+ device='drv0', \
|
||||
+ top=middle_path, \
|
||||
+ speed=1, \
|
||||
+ filters=[iotests.filter_qmp_testfiles])
|
||||
+
|
||||
+ # Give block-stream something to work on, otherwise it would be done
|
||||
+ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would
|
||||
+ # work just fine without the block job still running.
|
||||
+ iotests.qemu_io(bottom_path, '-c', f'write 0 {IMAGE_SIZE}')
|
||||
+ with TestCase('block-stream', vm, top_path, 'BLOCK_JOB_CANCELLED'):
|
||||
+ vm.qmp_log('block-stream', \
|
||||
+ job_id='job0', \
|
||||
+ device='drv0', \
|
||||
+ speed=1)
|
||||
+
|
||||
+if __name__ == '__main__':
|
||||
+ iotests.script_main(main, supported_fmts=['qcow2', 'qed'],
|
||||
+ supported_protocols=['file'])
|
||||
diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out
|
||||
index c4c15fb275..91b7ba50af 100644
|
||||
--- a/tests/qemu-iotests/141.out
|
||||
+++ b/tests/qemu-iotests/141.out
|
||||
@@ -1,179 +1,69 @@
|
||||
-QA output created by 141
|
||||
-Formatting 'TEST_DIR/b.IMGFMT', fmt=IMGFMT size=1048576
|
||||
-Formatting 'TEST_DIR/m.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/b.IMGFMT backing_fmt=IMGFMT
|
||||
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m.IMGFMT backing_fmt=IMGFMT
|
||||
-{'execute': 'qmp_capabilities'}
|
||||
-{"return": {}}
|
||||
-
|
||||
+Creating bottom <- middle <- top backing file chain...
|
||||
+Starting VM...
|
||||
=== Testing drive-backup ===
|
||||
-
|
||||
-{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': 'IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': 'TEST_DIR/t.IMGFMT'
|
||||
- }}}
|
||||
-{"return": {}}
|
||||
-{'execute': 'drive-backup',
|
||||
-'arguments': {'job-id': 'job0',
|
||||
-'device': 'drv0',
|
||||
-'target': 'TEST_DIR/o.IMGFMT',
|
||||
-'format': 'IMGFMT',
|
||||
-'sync': 'none'}}
|
||||
-Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "drive-backup", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
|
||||
-{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}
|
||||
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
|
||||
{"return": {}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"return": {}}
|
||||
|
||||
=== Testing drive-mirror ===
|
||||
-
|
||||
-{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': 'IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': 'TEST_DIR/t.IMGFMT'
|
||||
- }}}
|
||||
-{"return": {}}
|
||||
-{'execute': 'drive-mirror',
|
||||
-'arguments': {'job-id': 'job0',
|
||||
-'device': 'drv0',
|
||||
-'target': 'TEST_DIR/o.IMGFMT',
|
||||
-'format': 'IMGFMT',
|
||||
-'sync': 'none'}}
|
||||
-Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "drive-mirror", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}}
|
||||
+{"return": {}}
|
||||
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: mirror"}}
|
||||
-{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}
|
||||
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
|
||||
{"return": {}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"return": {}}
|
||||
|
||||
=== Testing active block-commit ===
|
||||
-
|
||||
-{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': 'IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': 'TEST_DIR/t.IMGFMT'
|
||||
- }}}
|
||||
-{"return": {}}
|
||||
-{'execute': 'block-commit',
|
||||
-'arguments': {'job-id': 'job0', 'device': 'drv0'}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0"}}
|
||||
+{"return": {}}
|
||||
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
|
||||
-{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}
|
||||
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
|
||||
{"return": {}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"return": {}}
|
||||
|
||||
=== Testing non-active block-commit ===
|
||||
-
|
||||
-wrote 1048576/1048576 bytes at offset 0
|
||||
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
-{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': 'IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': 'TEST_DIR/t.IMGFMT'
|
||||
- }}}
|
||||
-{"return": {}}
|
||||
-{'execute': 'block-commit',
|
||||
-'arguments': {'job-id': 'job0',
|
||||
-'device': 'drv0',
|
||||
-'top': 'TEST_DIR/m.IMGFMT',
|
||||
-'speed': 1}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
-{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
|
||||
-{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}
|
||||
-{"return": {}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1, "top": "TEST_DIR/PID-middle"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
+{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
|
||||
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
|
||||
+{"return": {}}
|
||||
+{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"return": {}}
|
||||
|
||||
=== Testing block-stream ===
|
||||
-
|
||||
-wrote 1048576/1048576 bytes at offset 0
|
||||
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
-{'execute': 'blockdev-add',
|
||||
- 'arguments': {
|
||||
- 'node-name': 'drv0',
|
||||
- 'driver': 'IMGFMT',
|
||||
- 'file': {
|
||||
- 'driver': 'file',
|
||||
- 'filename': 'TEST_DIR/t.IMGFMT'
|
||||
- }}}
|
||||
-{"return": {}}
|
||||
-{'execute': 'block-stream',
|
||||
-'arguments': {'job-id': 'job0',
|
||||
-'device': 'drv0',
|
||||
-'speed': 1}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "block-stream", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1}}
|
||||
+{"return": {}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: stream"}}
|
||||
-{'execute': 'block-job-cancel',
|
||||
- 'arguments': {'device': 'job0'}}
|
||||
+{"execute": "block-job-cancel", "arguments": {"device": "job0"}}
|
||||
{"return": {}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
|
||||
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
|
||||
-{'execute': 'blockdev-del',
|
||||
- 'arguments': {'node-name': 'drv0'}}
|
||||
+{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
|
||||
+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}}
|
||||
{"return": {}}
|
||||
-*** done
|
||||
+
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,277 +0,0 @@
|
||||
From a0b12780f3cb97abad0a2c54d185c298d3f589e7 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Fri, 17 May 2024 21:50:15 -0500
|
||||
Subject: [PATCH 2/3] iotests: test NBD+TLS+iothread
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 398: nbd/server: CVE-2024-7409: Avoid use-after-free when closing server
|
||||
RH-Jira: RHEL-52611
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [2/3] f522ff5156086a83a7327c379dd3ccd8b583a421 (ebblake/qemu-kvm)
|
||||
|
||||
Prevent regressions when using NBD with TLS in the presence of
|
||||
iothreads, adding coverage the fix to qio channels made in the
|
||||
previous patch.
|
||||
|
||||
The shell function pick_unused_port() was copied from
|
||||
nbdkit.git/tests/functions.sh.in, where it had all authors from Red
|
||||
Hat, agreeing to the resulting relicensing from 2-clause BSD to GPLv2.
|
||||
|
||||
CC: qemu-stable@nongnu.org
|
||||
CC: "Richard W.M. Jones" <rjones@redhat.com>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240531180639.1392905-6-eblake@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
|
||||
(cherry picked from commit a73c99378022ebb785481e84cfe1e81097546268)
|
||||
Jira: https://issues.redhat.com/browse/RHEL-52611
|
||||
Conflicts:
|
||||
tests/qemu-iotests/tests/nbd-tls-iothread{,.out} - drop unknown
|
||||
"tls-hostname" parameter
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
---
|
||||
tests/qemu-iotests/tests/nbd-tls-iothread | 167 ++++++++++++++++++
|
||||
tests/qemu-iotests/tests/nbd-tls-iothread.out | 53 ++++++
|
||||
2 files changed, 220 insertions(+)
|
||||
create mode 100755 tests/qemu-iotests/tests/nbd-tls-iothread
|
||||
create mode 100644 tests/qemu-iotests/tests/nbd-tls-iothread.out
|
||||
|
||||
diff --git a/tests/qemu-iotests/tests/nbd-tls-iothread b/tests/qemu-iotests/tests/nbd-tls-iothread
|
||||
new file mode 100755
|
||||
index 0000000000..9e747e2639
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/nbd-tls-iothread
|
||||
@@ -0,0 +1,167 @@
|
||||
+#!/usr/bin/env bash
|
||||
+# group: rw quick
|
||||
+#
|
||||
+# Test of NBD+TLS+iothread
|
||||
+#
|
||||
+# Copyright (C) 2024 Red Hat, Inc.
|
||||
+#
|
||||
+# This program is free software; you can redistribute it and/or modify
|
||||
+# it under the terms of the GNU General Public License as published by
|
||||
+# the Free Software Foundation; either version 2 of the License, or
|
||||
+# (at your option) any later version.
|
||||
+#
|
||||
+# This program is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+# GNU General Public License for more details.
|
||||
+#
|
||||
+# You should have received a copy of the GNU General Public License
|
||||
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
+#
|
||||
+
|
||||
+# creator
|
||||
+owner=eblake@redhat.com
|
||||
+
|
||||
+seq=`basename $0`
|
||||
+echo "QA output created by $seq"
|
||||
+
|
||||
+status=1 # failure is the default!
|
||||
+
|
||||
+_cleanup()
|
||||
+{
|
||||
+ _cleanup_qemu
|
||||
+ _cleanup_test_img
|
||||
+ rm -f "$dst_image"
|
||||
+ tls_x509_cleanup
|
||||
+}
|
||||
+trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
+
|
||||
+# get standard environment, filters and checks
|
||||
+cd ..
|
||||
+. ./common.rc
|
||||
+. ./common.filter
|
||||
+. ./common.qemu
|
||||
+. ./common.tls
|
||||
+. ./common.nbd
|
||||
+
|
||||
+_supported_fmt qcow2 # Hardcoded to qcow2 command line and QMP below
|
||||
+_supported_proto file
|
||||
+
|
||||
+# pick_unused_port
|
||||
+#
|
||||
+# Picks and returns an "unused" port, setting the global variable
|
||||
+# $port.
|
||||
+#
|
||||
+# This is inherently racy, but we need it because qemu does not currently
|
||||
+# permit NBD+TLS over a Unix domain socket
|
||||
+pick_unused_port ()
|
||||
+{
|
||||
+ if ! (ss --version) >/dev/null 2>&1; then
|
||||
+ _notrun "ss utility required, skipped this test"
|
||||
+ fi
|
||||
+
|
||||
+ # Start at a random port to make it less likely that two parallel
|
||||
+ # tests will conflict.
|
||||
+ port=$(( 50000 + (RANDOM%15000) ))
|
||||
+ while ss -ltn | grep -sqE ":$port\b"; do
|
||||
+ ((port++))
|
||||
+ if [ $port -eq 65000 ]; then port=50000; fi
|
||||
+ done
|
||||
+ echo picked unused port
|
||||
+}
|
||||
+
|
||||
+tls_x509_init
|
||||
+
|
||||
+size=1G
|
||||
+DST_IMG="$TEST_DIR/dst.qcow2"
|
||||
+
|
||||
+echo
|
||||
+echo "== preparing TLS creds and spare port =="
|
||||
+
|
||||
+pick_unused_port
|
||||
+tls_x509_create_root_ca "ca1"
|
||||
+tls_x509_create_server "ca1" "server1"
|
||||
+tls_x509_create_client "ca1" "client1"
|
||||
+tls_obj_base=tls-creds-x509,id=tls0,verify-peer=true,dir="${tls_dir}"
|
||||
+
|
||||
+echo
|
||||
+echo "== preparing image =="
|
||||
+
|
||||
+_make_test_img $size
|
||||
+$QEMU_IMG create -f qcow2 "$DST_IMG" $size | _filter_img_create
|
||||
+
|
||||
+echo
|
||||
+echo === Starting Src QEMU ===
|
||||
+echo
|
||||
+
|
||||
+_launch_qemu -machine q35 \
|
||||
+ -object iothread,id=iothread0 \
|
||||
+ -object "${tls_obj_base}"/client1,endpoint=client \
|
||||
+ -device '{"driver":"pcie-root-port", "id":"root0", "multifunction":true,
|
||||
+ "bus":"pcie.0"}' \
|
||||
+ -device '{"driver":"virtio-scsi-pci", "id":"virtio_scsi_pci0",
|
||||
+ "bus":"root0", "iothread":"iothread0"}' \
|
||||
+ -device '{"driver":"scsi-hd", "id":"image1", "drive":"drive_image1",
|
||||
+ "bus":"virtio_scsi_pci0.0"}' \
|
||||
+ -blockdev '{"driver":"file", "cache":{"direct":true, "no-flush":false},
|
||||
+ "filename":"'"$TEST_IMG"'", "node-name":"drive_sys1"}' \
|
||||
+ -blockdev '{"driver":"qcow2", "node-name":"drive_image1",
|
||||
+ "file":"drive_sys1"}'
|
||||
+h1=$QEMU_HANDLE
|
||||
+_send_qemu_cmd $h1 '{"execute": "qmp_capabilities"}' 'return'
|
||||
+
|
||||
+echo
|
||||
+echo === Starting Dst VM2 ===
|
||||
+echo
|
||||
+
|
||||
+_launch_qemu -machine q35 \
|
||||
+ -object iothread,id=iothread0 \
|
||||
+ -object "${tls_obj_base}"/server1,endpoint=server \
|
||||
+ -device '{"driver":"pcie-root-port", "id":"root0", "multifunction":true,
|
||||
+ "bus":"pcie.0"}' \
|
||||
+ -device '{"driver":"virtio-scsi-pci", "id":"virtio_scsi_pci0",
|
||||
+ "bus":"root0", "iothread":"iothread0"}' \
|
||||
+ -device '{"driver":"scsi-hd", "id":"image1", "drive":"drive_image1",
|
||||
+ "bus":"virtio_scsi_pci0.0"}' \
|
||||
+ -blockdev '{"driver":"file", "cache":{"direct":true, "no-flush":false},
|
||||
+ "filename":"'"$DST_IMG"'", "node-name":"drive_sys1"}' \
|
||||
+ -blockdev '{"driver":"qcow2", "node-name":"drive_image1",
|
||||
+ "file":"drive_sys1"}' \
|
||||
+ -incoming defer
|
||||
+h2=$QEMU_HANDLE
|
||||
+_send_qemu_cmd $h2 '{"execute": "qmp_capabilities"}' 'return'
|
||||
+
|
||||
+echo
|
||||
+echo === Dst VM: Enable NBD server for incoming storage migration ===
|
||||
+echo
|
||||
+
|
||||
+_send_qemu_cmd $h2 '{"execute": "nbd-server-start", "arguments":
|
||||
+ {"addr": {"type": "inet", "data": {"host": "127.0.0.1", "port": "'$port'"}},
|
||||
+ "tls-creds": "tls0"}}' '{"return": {}}' | sed "s/\"$port\"/PORT/g"
|
||||
+_send_qemu_cmd $h2 '{"execute": "block-export-add", "arguments":
|
||||
+ {"node-name": "drive_image1", "type": "nbd", "writable": true,
|
||||
+ "id": "drive_image1"}}' '{"return": {}}'
|
||||
+
|
||||
+echo
|
||||
+echo === Src VM: Mirror to dst NBD for outgoing storage migration ===
|
||||
+echo
|
||||
+
|
||||
+_send_qemu_cmd $h1 '{"execute": "blockdev-add", "arguments":
|
||||
+ {"node-name": "mirror", "driver": "nbd",
|
||||
+ "server": {"type": "inet", "host": "127.0.0.1", "port": "'$port'"},
|
||||
+ "export": "drive_image1", "tls-creds": "tls0"}}' '{"return": {}}' | sed "s/\"$port\"/PORT/g"
|
||||
+_send_qemu_cmd $h1 '{"execute": "blockdev-mirror", "arguments":
|
||||
+ {"sync": "full", "device": "drive_image1", "target": "mirror",
|
||||
+ "job-id": "drive_image1_53"}}' '{"return": {}}'
|
||||
+_timed_wait_for $h1 '"ready"'
|
||||
+
|
||||
+echo
|
||||
+echo === Cleaning up ===
|
||||
+echo
|
||||
+
|
||||
+_send_qemu_cmd $h1 '{"execute":"quit"}' ''
|
||||
+_send_qemu_cmd $h2 '{"execute":"quit"}' ''
|
||||
+
|
||||
+echo "*** done"
|
||||
+rm -f $seq.full
|
||||
+status=0
|
||||
diff --git a/tests/qemu-iotests/tests/nbd-tls-iothread.out b/tests/qemu-iotests/tests/nbd-tls-iothread.out
|
||||
new file mode 100644
|
||||
index 0000000000..a3899fd2d7
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/nbd-tls-iothread.out
|
||||
@@ -0,0 +1,53 @@
|
||||
+QA output created by nbd-tls-iothread
|
||||
+
|
||||
+== preparing TLS creds and spare port ==
|
||||
+picked unused port
|
||||
+Generating a self signed certificate...
|
||||
+Generating a signed certificate...
|
||||
+Generating a signed certificate...
|
||||
+
|
||||
+== preparing image ==
|
||||
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
|
||||
+Formatting 'TEST_DIR/dst.IMGFMT', fmt=IMGFMT size=1073741824
|
||||
+
|
||||
+=== Starting Src QEMU ===
|
||||
+
|
||||
+{"execute": "qmp_capabilities"}
|
||||
+{"return": {}}
|
||||
+
|
||||
+=== Starting Dst VM2 ===
|
||||
+
|
||||
+{"execute": "qmp_capabilities"}
|
||||
+{"return": {}}
|
||||
+
|
||||
+=== Dst VM: Enable NBD server for incoming storage migration ===
|
||||
+
|
||||
+{"execute": "nbd-server-start", "arguments":
|
||||
+ {"addr": {"type": "inet", "data": {"host": "127.0.0.1", "port": PORT}},
|
||||
+ "tls-creds": "tls0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "block-export-add", "arguments":
|
||||
+ {"node-name": "drive_image1", "type": "nbd", "writable": true,
|
||||
+ "id": "drive_image1"}}
|
||||
+{"return": {}}
|
||||
+
|
||||
+=== Src VM: Mirror to dst NBD for outgoing storage migration ===
|
||||
+
|
||||
+{"execute": "blockdev-add", "arguments":
|
||||
+ {"node-name": "mirror", "driver": "nbd",
|
||||
+ "server": {"type": "inet", "host": "127.0.0.1", "port": PORT},
|
||||
+ "export": "drive_image1", "tls-creds": "tls0"}}
|
||||
+{"return": {}}
|
||||
+{"execute": "blockdev-mirror", "arguments":
|
||||
+ {"sync": "full", "device": "drive_image1", "target": "mirror",
|
||||
+ "job-id": "drive_image1_53"}}
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "drive_image1_53"}}
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "drive_image1_53"}}
|
||||
+{"return": {}}
|
||||
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "drive_image1_53"}}
|
||||
+
|
||||
+=== Cleaning up ===
|
||||
+
|
||||
+{"execute":"quit"}
|
||||
+{"execute":"quit"}
|
||||
+*** done
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,290 +0,0 @@
|
||||
From 93ec857c46911b95ed8e3abc6a9d432ae847c084 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Mon, 16 Jan 2023 07:51:56 -0500
|
||||
Subject: [PATCH 06/11] kvm: Atomic memslot updates
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 247: accel: introduce accelerator blocker API
|
||||
RH-Bugzilla: 2161188
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/3] 520e41c0f58066a7381a5f6b32b81bc01cce51c0
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188
|
||||
|
||||
commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39
|
||||
Author: David Hildenbrand <david@redhat.com>
|
||||
Date: Fri Nov 11 10:47:58 2022 -0500
|
||||
|
||||
kvm: Atomic memslot updates
|
||||
|
||||
If we update an existing memslot (e.g., resize, split), we temporarily
|
||||
remove the memslot to re-add it immediately afterwards. These updates
|
||||
are not atomic, especially not for KVM VCPU threads, such that we can
|
||||
get spurious faults.
|
||||
|
||||
Let's inhibit most KVM ioctls while performing relevant updates, such
|
||||
that we can perform the update just as if it would happen atomically
|
||||
without additional kernel support.
|
||||
|
||||
We capture the add/del changes and apply them in the notifier commit
|
||||
stage instead. There, we can check for overlaps and perform the ioctl
|
||||
inhibiting only if really required (-> overlap).
|
||||
|
||||
To keep things simple we don't perform additional checks that wouldn't
|
||||
actually result in an overlap -- such as !RAM memory regions in some
|
||||
cases (see kvm_set_phys_mem()).
|
||||
|
||||
To minimize cache-line bouncing, use a separate indicator
|
||||
(in_ioctl_lock) per CPU. Also, make sure to hold the kvm_slots_lock
|
||||
while performing both actions (removing+re-adding).
|
||||
|
||||
We have to wait until all IOCTLs were exited and block new ones from
|
||||
getting executed.
|
||||
|
||||
This approach cannot result in a deadlock as long as the inhibitor does
|
||||
not hold any locks that might hinder an IOCTL from getting finished and
|
||||
exited - something fairly unusual. The inhibitor will always hold the BQL.
|
||||
|
||||
AFAIKs, one possible candidate would be userfaultfd. If a page cannot be
|
||||
placed (e.g., during postcopy), because we're waiting for a lock, or if the
|
||||
userfaultfd thread cannot process a fault, because it is waiting for a
|
||||
lock, there could be a deadlock. However, the BQL is not applicable here,
|
||||
because any other guest memory access while holding the BQL would already
|
||||
result in a deadlock.
|
||||
|
||||
Nothing else in the kernel should block forever and wait for userspace
|
||||
intervention.
|
||||
|
||||
Note: pause_all_vcpus()/resume_all_vcpus() or
|
||||
start_exclusive()/end_exclusive() cannot be used, as they either drop
|
||||
the BQL or require to be called without the BQL - something inhibitors
|
||||
cannot handle. We need a low-level locking mechanism that is
|
||||
deadlock-free even when not releasing the BQL.
|
||||
|
||||
Signed-off-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Tested-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Message-Id: <20221111154758.1372674-4-eesposit@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Conflicts:
|
||||
accel/kvm/kvm-all.c: include "sysemu/dirtylimit.h" is missing in
|
||||
rhel 8.8.0
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 101 ++++++++++++++++++++++++++++++++++-----
|
||||
include/sysemu/kvm_int.h | 8 ++++
|
||||
2 files changed, 98 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index 221aadfda7..3b7bc39823 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "sysemu/kvm_int.h"
|
||||
#include "sysemu/runstate.h"
|
||||
#include "sysemu/cpus.h"
|
||||
+#include "sysemu/accel-blocker.h"
|
||||
#include "qemu/bswap.h"
|
||||
#include "exec/memory.h"
|
||||
#include "exec/ram_addr.h"
|
||||
@@ -45,6 +46,7 @@
|
||||
#include "qemu/guest-random.h"
|
||||
#include "sysemu/hw_accel.h"
|
||||
#include "kvm-cpus.h"
|
||||
+#include "qemu/range.h"
|
||||
|
||||
#include "hw/boards.h"
|
||||
|
||||
@@ -1334,6 +1336,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size)
|
||||
kvm_max_slot_size = max_slot_size;
|
||||
}
|
||||
|
||||
+/* Called with KVMMemoryListener.slots_lock held */
|
||||
static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
MemoryRegionSection *section, bool add)
|
||||
{
|
||||
@@ -1368,14 +1371,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
ram = memory_region_get_ram_ptr(mr) + mr_offset;
|
||||
ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset;
|
||||
|
||||
- kvm_slots_lock();
|
||||
-
|
||||
if (!add) {
|
||||
do {
|
||||
slot_size = MIN(kvm_max_slot_size, size);
|
||||
mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
|
||||
if (!mem) {
|
||||
- goto out;
|
||||
+ return;
|
||||
}
|
||||
if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
/*
|
||||
@@ -1413,7 +1414,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
start_addr += slot_size;
|
||||
size -= slot_size;
|
||||
} while (size);
|
||||
- goto out;
|
||||
+ return;
|
||||
}
|
||||
|
||||
/* register the new slot */
|
||||
@@ -1438,9 +1439,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
ram += slot_size;
|
||||
size -= slot_size;
|
||||
} while (size);
|
||||
-
|
||||
-out:
|
||||
- kvm_slots_unlock();
|
||||
}
|
||||
|
||||
static void *kvm_dirty_ring_reaper_thread(void *data)
|
||||
@@ -1492,18 +1490,95 @@ static void kvm_region_add(MemoryListener *listener,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
|
||||
+ KVMMemoryUpdate *update;
|
||||
+
|
||||
+ update = g_new0(KVMMemoryUpdate, 1);
|
||||
+ update->section = *section;
|
||||
|
||||
- memory_region_ref(section->mr);
|
||||
- kvm_set_phys_mem(kml, section, true);
|
||||
+ QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next);
|
||||
}
|
||||
|
||||
static void kvm_region_del(MemoryListener *listener,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
|
||||
+ KVMMemoryUpdate *update;
|
||||
+
|
||||
+ update = g_new0(KVMMemoryUpdate, 1);
|
||||
+ update->section = *section;
|
||||
+
|
||||
+ QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next);
|
||||
+}
|
||||
+
|
||||
+static void kvm_region_commit(MemoryListener *listener)
|
||||
+{
|
||||
+ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener,
|
||||
+ listener);
|
||||
+ KVMMemoryUpdate *u1, *u2;
|
||||
+ bool need_inhibit = false;
|
||||
+
|
||||
+ if (QSIMPLEQ_EMPTY(&kml->transaction_add) &&
|
||||
+ QSIMPLEQ_EMPTY(&kml->transaction_del)) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * We have to be careful when regions to add overlap with ranges to remove.
|
||||
+ * We have to simulate atomic KVM memslot updates by making sure no ioctl()
|
||||
+ * is currently active.
|
||||
+ *
|
||||
+ * The lists are order by addresses, so it's easy to find overlaps.
|
||||
+ */
|
||||
+ u1 = QSIMPLEQ_FIRST(&kml->transaction_del);
|
||||
+ u2 = QSIMPLEQ_FIRST(&kml->transaction_add);
|
||||
+ while (u1 && u2) {
|
||||
+ Range r1, r2;
|
||||
+
|
||||
+ range_init_nofail(&r1, u1->section.offset_within_address_space,
|
||||
+ int128_get64(u1->section.size));
|
||||
+ range_init_nofail(&r2, u2->section.offset_within_address_space,
|
||||
+ int128_get64(u2->section.size));
|
||||
+
|
||||
+ if (range_overlaps_range(&r1, &r2)) {
|
||||
+ need_inhibit = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ if (range_lob(&r1) < range_lob(&r2)) {
|
||||
+ u1 = QSIMPLEQ_NEXT(u1, next);
|
||||
+ } else {
|
||||
+ u2 = QSIMPLEQ_NEXT(u2, next);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ kvm_slots_lock();
|
||||
+ if (need_inhibit) {
|
||||
+ accel_ioctl_inhibit_begin();
|
||||
+ }
|
||||
+
|
||||
+ /* Remove all memslots before adding the new ones. */
|
||||
+ while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) {
|
||||
+ u1 = QSIMPLEQ_FIRST(&kml->transaction_del);
|
||||
+ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next);
|
||||
|
||||
- kvm_set_phys_mem(kml, section, false);
|
||||
- memory_region_unref(section->mr);
|
||||
+ kvm_set_phys_mem(kml, &u1->section, false);
|
||||
+ memory_region_unref(u1->section.mr);
|
||||
+
|
||||
+ g_free(u1);
|
||||
+ }
|
||||
+ while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) {
|
||||
+ u1 = QSIMPLEQ_FIRST(&kml->transaction_add);
|
||||
+ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next);
|
||||
+
|
||||
+ memory_region_ref(u1->section.mr);
|
||||
+ kvm_set_phys_mem(kml, &u1->section, true);
|
||||
+
|
||||
+ g_free(u1);
|
||||
+ }
|
||||
+
|
||||
+ if (need_inhibit) {
|
||||
+ accel_ioctl_inhibit_end();
|
||||
+ }
|
||||
+ kvm_slots_unlock();
|
||||
}
|
||||
|
||||
static void kvm_log_sync(MemoryListener *listener,
|
||||
@@ -1647,8 +1722,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
|
||||
kml->slots[i].slot = i;
|
||||
}
|
||||
|
||||
+ QSIMPLEQ_INIT(&kml->transaction_add);
|
||||
+ QSIMPLEQ_INIT(&kml->transaction_del);
|
||||
+
|
||||
kml->listener.region_add = kvm_region_add;
|
||||
kml->listener.region_del = kvm_region_del;
|
||||
+ kml->listener.commit = kvm_region_commit;
|
||||
kml->listener.log_start = kvm_log_start;
|
||||
kml->listener.log_stop = kvm_log_stop;
|
||||
kml->listener.priority = 10;
|
||||
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
|
||||
index 1f5487d9b7..7e18c0a3c0 100644
|
||||
--- a/include/sysemu/kvm_int.h
|
||||
+++ b/include/sysemu/kvm_int.h
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
#include "exec/memory.h"
|
||||
#include "qemu/accel.h"
|
||||
+#include "qemu/queue.h"
|
||||
#include "sysemu/kvm.h"
|
||||
|
||||
typedef struct KVMSlot
|
||||
@@ -30,10 +31,17 @@ typedef struct KVMSlot
|
||||
ram_addr_t ram_start_offset;
|
||||
} KVMSlot;
|
||||
|
||||
+typedef struct KVMMemoryUpdate {
|
||||
+ QSIMPLEQ_ENTRY(KVMMemoryUpdate) next;
|
||||
+ MemoryRegionSection section;
|
||||
+} KVMMemoryUpdate;
|
||||
+
|
||||
typedef struct KVMMemoryListener {
|
||||
MemoryListener listener;
|
||||
KVMSlot *slots;
|
||||
int as_id;
|
||||
+ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
|
||||
+ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
|
||||
} KVMMemoryListener;
|
||||
|
||||
void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,71 +0,0 @@
|
||||
From 8f19df61a101c1e57a1bce8adddb57a4a7123a77 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Tue, 16 May 2023 11:05:56 +0200
|
||||
Subject: [PATCH 11/15] lsi53c895a: disable reentrancy detection for MMIO
|
||||
region, too
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [11/12] 8016c86f8432f5ea06c831d1181e87e6d45a6a50 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
|
||||
commit d139fe9ad8a27bcc50b4ead77d2f97d191a0e95e
|
||||
Author: Thomas Huth <thuth@redhat.com>
|
||||
Date: Tue May 16 11:05:56 2023 +0200
|
||||
|
||||
lsi53c895a: disable reentrancy detection for MMIO region, too
|
||||
|
||||
While trying to use a SCSI disk on the LSI controller with an
|
||||
older version of Fedora (25), I'm getting:
|
||||
|
||||
qemu: warning: Blocked re-entrant IO on MemoryRegion: lsi-mmio at addr: 0x34
|
||||
|
||||
and the SCSI controller is not usable. Seems like we have to
|
||||
disable the reentrancy checker for the MMIO region, too, to
|
||||
get this working again.
|
||||
|
||||
The problem could be reproduced it like this:
|
||||
|
||||
./qemu-system-x86_64 -accel kvm -m 2G -machine q35 \
|
||||
-device lsi53c810,id=lsi1 -device scsi-hd,drive=d0 \
|
||||
-drive if=none,id=d0,file=.../somedisk.qcow2 \
|
||||
-cdrom Fedora-Everything-netinst-i386-25-1.3.iso
|
||||
|
||||
Where somedisk.qcow2 is an image that contains already some partitions
|
||||
and file systems.
|
||||
|
||||
In the boot menu of Fedora, go to
|
||||
"Troubleshooting" -> "Rescue a Fedora system" -> "3) Skip to shell"
|
||||
|
||||
Then check "dmesg | grep -i 53c" for failure messages, and try to mount
|
||||
a partition from somedisk.qcow2.
|
||||
|
||||
Message-Id: <20230516090556.553813-1-thuth@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/scsi/lsi53c895a.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
|
||||
index 1e15e13fbf..2b9cb2ac5d 100644
|
||||
--- a/hw/scsi/lsi53c895a.c
|
||||
+++ b/hw/scsi/lsi53c895a.c
|
||||
@@ -2306,6 +2306,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
|
||||
* re-entrancy guard.
|
||||
*/
|
||||
s->ram_io.disable_reentrancy_guard = true;
|
||||
+ s->mmio_io.disable_reentrancy_guard = true;
|
||||
|
||||
address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io");
|
||||
qdev_init_gpio_out(d, &s->ext_irq, 1);
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,59 +0,0 @@
|
||||
From 3cffdbf3224ac21016dbee69cb2382c322d4bfbb Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 05/15] lsi53c895a: disable reentrancy detection for script RAM
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [5/12] b5334c3a34b38ed1dccf0030d5704e51e00fdce3 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
|
||||
commit bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:10 2023 -0400
|
||||
|
||||
lsi53c895a: disable reentrancy detection for script RAM
|
||||
|
||||
As the code is designed to use the memory APIs to access the script ram,
|
||||
disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion.
|
||||
|
||||
In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion.
|
||||
|
||||
Reported-by: Fiona Ebner <f.ebner@proxmox.com>
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Message-Id: <20230427211013.2994127-6-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/scsi/lsi53c895a.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
|
||||
index 85e907a785..1e15e13fbf 100644
|
||||
--- a/hw/scsi/lsi53c895a.c
|
||||
+++ b/hw/scsi/lsi53c895a.c
|
||||
@@ -2301,6 +2301,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
|
||||
memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s,
|
||||
"lsi-io", 256);
|
||||
|
||||
+ /*
|
||||
+ * Since we use the address-space API to interact with ram_io, disable the
|
||||
+ * re-entrancy guard.
|
||||
+ */
|
||||
+ s->ram_io.disable_reentrancy_guard = true;
|
||||
+
|
||||
address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io");
|
||||
qdev_init_gpio_out(d, &s->ext_irq, 1);
|
||||
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,151 +0,0 @@
|
||||
From e0c811c2d13f995fe1b095f48637316be5978b0e Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 01/15] memory: prevent dma-reentracy issues
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/12] 8fced41b4b2105343e8f0250286b771bcb43c81f (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
CVE: CVE-2023-0330
|
||||
|
||||
commit a2e1753b8054344f32cf94f31c6399a58794a380
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:06 2023 -0400
|
||||
|
||||
memory: prevent dma-reentracy issues
|
||||
|
||||
Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
|
||||
This flag is set/checked prior to calling a device's MemoryRegion
|
||||
handlers, and set when device code initiates DMA. The purpose of this
|
||||
flag is to prevent two types of DMA-based reentrancy issues:
|
||||
|
||||
1.) mmio -> dma -> mmio case
|
||||
2.) bh -> dma write -> mmio case
|
||||
|
||||
These issues have led to problems such as stack-exhaustion and
|
||||
use-after-frees.
|
||||
|
||||
Summary of the problem from Peter Maydell:
|
||||
https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
|
||||
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
|
||||
Resolves: CVE-2023-0330
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Message-Id: <20230427211013.2994127-2-alxndr@bu.edu>
|
||||
[thuth: Replace warn_report() with warn_report_once()]
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
include/exec/memory.h | 5 +++++
|
||||
include/hw/qdev-core.h | 7 +++++++
|
||||
softmmu/memory.c | 16 ++++++++++++++++
|
||||
3 files changed, 28 insertions(+)
|
||||
|
||||
diff --git a/include/exec/memory.h b/include/exec/memory.h
|
||||
index 20f1b27377..e089f90f9b 100644
|
||||
--- a/include/exec/memory.h
|
||||
+++ b/include/exec/memory.h
|
||||
@@ -734,6 +734,8 @@ struct MemoryRegion {
|
||||
bool is_iommu;
|
||||
RAMBlock *ram_block;
|
||||
Object *owner;
|
||||
+ /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
|
||||
+ DeviceState *dev;
|
||||
|
||||
const MemoryRegionOps *ops;
|
||||
void *opaque;
|
||||
@@ -757,6 +759,9 @@ struct MemoryRegion {
|
||||
unsigned ioeventfd_nb;
|
||||
MemoryRegionIoeventfd *ioeventfds;
|
||||
RamDiscardManager *rdm; /* Only for RAM */
|
||||
+
|
||||
+ /* For devices designed to perform re-entrant IO into their own IO MRs */
|
||||
+ bool disable_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct IOMMUMemoryRegion {
|
||||
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
|
||||
index 20d3066595..14226f860d 100644
|
||||
--- a/include/hw/qdev-core.h
|
||||
+++ b/include/hw/qdev-core.h
|
||||
@@ -162,6 +162,10 @@ struct NamedClockList {
|
||||
QLIST_ENTRY(NamedClockList) node;
|
||||
};
|
||||
|
||||
+typedef struct {
|
||||
+ bool engaged_in_io;
|
||||
+} MemReentrancyGuard;
|
||||
+
|
||||
/**
|
||||
* DeviceState:
|
||||
* @realized: Indicates whether the device has been fully constructed.
|
||||
@@ -193,6 +197,9 @@ struct DeviceState {
|
||||
int instance_id_alias;
|
||||
int alias_required_for_version;
|
||||
ResettableState reset;
|
||||
+
|
||||
+ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
|
||||
+ MemReentrancyGuard mem_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct DeviceListener {
|
||||
diff --git a/softmmu/memory.c b/softmmu/memory.c
|
||||
index 7340e19ff5..102f0a4248 100644
|
||||
--- a/softmmu/memory.c
|
||||
+++ b/softmmu/memory.c
|
||||
@@ -541,6 +541,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
|
||||
access_size_max = 4;
|
||||
}
|
||||
|
||||
+ /* Do not allow more than one simultaneous access to a device's IO Regions */
|
||||
+ if (mr->dev && !mr->disable_reentrancy_guard &&
|
||||
+ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
|
||||
+ if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
|
||||
+ warn_report_once("Blocked re-entrant IO on MemoryRegion: "
|
||||
+ "%s at addr: 0x%" HWADDR_PRIX,
|
||||
+ memory_region_name(mr), addr);
|
||||
+ return MEMTX_ACCESS_ERROR;
|
||||
+ }
|
||||
+ mr->dev->mem_reentrancy_guard.engaged_in_io = true;
|
||||
+ }
|
||||
+
|
||||
/* FIXME: support unaligned access? */
|
||||
access_size = MAX(MIN(size, access_size_max), access_size_min);
|
||||
access_mask = MAKE_64BIT_MASK(0, access_size * 8);
|
||||
@@ -555,6 +567,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
|
||||
access_mask, attrs);
|
||||
}
|
||||
}
|
||||
+ if (mr->dev) {
|
||||
+ mr->dev->mem_reentrancy_guard.engaged_in_io = false;
|
||||
+ }
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -1169,6 +1184,7 @@ static void memory_region_do_init(MemoryRegion *mr,
|
||||
}
|
||||
mr->name = g_strdup(name);
|
||||
mr->owner = owner;
|
||||
+ mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
|
||||
mr->ram_block = NULL;
|
||||
|
||||
if (name) {
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,68 +0,0 @@
|
||||
From c24e38eb508b3fb42ce3ea62fe8de0be6a95a6a8 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Wed, 7 Jun 2023 11:45:09 -0400
|
||||
Subject: [PATCH 10/15] memory: stricter checks prior to unsetting
|
||||
engaged_in_io
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [10/12] 773b62a84b2bd4f5ee7fb8e1cfb3bb91c3a01de1 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
|
||||
commit 3884bf6468ac6bbb58c2b3feaa74e87f821b52f3
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Tue May 16 04:40:02 2023 -0400
|
||||
|
||||
memory: stricter checks prior to unsetting engaged_in_io
|
||||
|
||||
engaged_in_io could be unset by an MR with re-entrancy checks disabled.
|
||||
Ensure that only MRs that can set the engaged_in_io flag can unset it.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Message-Id: <20230516084002.3813836-1-alxndr@bu.edu>
|
||||
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
softmmu/memory.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/softmmu/memory.c b/softmmu/memory.c
|
||||
index 102f0a4248..6b98615357 100644
|
||||
--- a/softmmu/memory.c
|
||||
+++ b/softmmu/memory.c
|
||||
@@ -533,6 +533,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
|
||||
unsigned access_size;
|
||||
unsigned i;
|
||||
MemTxResult r = MEMTX_OK;
|
||||
+ bool reentrancy_guard_applied = false;
|
||||
|
||||
if (!access_size_min) {
|
||||
access_size_min = 1;
|
||||
@@ -551,6 +552,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
|
||||
return MEMTX_ACCESS_ERROR;
|
||||
}
|
||||
mr->dev->mem_reentrancy_guard.engaged_in_io = true;
|
||||
+ reentrancy_guard_applied = true;
|
||||
}
|
||||
|
||||
/* FIXME: support unaligned access? */
|
||||
@@ -567,7 +569,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
|
||||
access_mask, attrs);
|
||||
}
|
||||
}
|
||||
- if (mr->dev) {
|
||||
+ if (mr->dev && reentrancy_guard_applied) {
|
||||
mr->dev->mem_reentrancy_guard.engaged_in_io = false;
|
||||
}
|
||||
return r;
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,111 +0,0 @@
|
||||
From a1f2a51d1a789c46e806adb332236ca16d538bf9 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Tue, 2 May 2023 15:52:12 -0500
|
||||
Subject: [PATCH 3/5] migration: Attempt disk reactivation in more failure
|
||||
scenarios
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 273: migration: prevent source core dump if NFS dies mid-migration
|
||||
RH-Bugzilla: 2177957
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [3/3] e84bf1e7233c0273ca3136ecaa6b2cfc9c0efacb (ebblake/qemu-kvm)
|
||||
|
||||
Commit fe904ea824 added a fail_inactivate label, which tries to
|
||||
reactivate disks on the source after a failure while s->state ==
|
||||
MIGRATION_STATUS_ACTIVE, but didn't actually use the label if
|
||||
qemu_savevm_state_complete_precopy() failed. This failure to
|
||||
reactivate is also present in commit 6039dd5b1c (also covering the new
|
||||
s->state == MIGRATION_STATUS_DEVICE state) and 403d18ae (ensuring
|
||||
s->block_inactive is set more reliably).
|
||||
|
||||
Consolidate the two labels back into one - no matter HOW migration is
|
||||
failed, if there is any chance we can reach vm_start() after having
|
||||
attempted inactivation, it is essential that we have tried to restart
|
||||
disks before then. This also makes the cleanup more like
|
||||
migrate_fd_cancel().
|
||||
|
||||
Suggested-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-Id: <20230502205212.134680-1-eblake@redhat.com>
|
||||
Acked-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 6dab4c93ecfae48e2e67b984d1032c1e988d3005)
|
||||
[eblake: downstream migrate_colo() => migrate_colo_enabled()]
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
---
|
||||
migration/migration.c | 24 ++++++++++++++----------
|
||||
1 file changed, 14 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/migration/migration.c b/migration/migration.c
|
||||
index 6ba8eb0fdf..817170d52d 100644
|
||||
--- a/migration/migration.c
|
||||
+++ b/migration/migration.c
|
||||
@@ -3255,6 +3255,11 @@ static void migration_completion(MigrationState *s)
|
||||
MIGRATION_STATUS_DEVICE);
|
||||
}
|
||||
if (ret >= 0) {
|
||||
+ /*
|
||||
+ * Inactivate disks except in COLO, and track that we
|
||||
+ * have done so in order to remember to reactivate
|
||||
+ * them if migration fails or is cancelled.
|
||||
+ */
|
||||
s->block_inactive = !migrate_colo_enabled();
|
||||
qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
|
||||
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
|
||||
@@ -3290,13 +3295,13 @@ static void migration_completion(MigrationState *s)
|
||||
rp_error = await_return_path_close_on_source(s);
|
||||
trace_migration_return_path_end_after(rp_error);
|
||||
if (rp_error) {
|
||||
- goto fail_invalidate;
|
||||
+ goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (qemu_file_get_error(s->to_dst_file)) {
|
||||
trace_migration_completion_file_err();
|
||||
- goto fail_invalidate;
|
||||
+ goto fail;
|
||||
}
|
||||
|
||||
if (!migrate_colo_enabled()) {
|
||||
@@ -3306,26 +3311,25 @@ static void migration_completion(MigrationState *s)
|
||||
|
||||
return;
|
||||
|
||||
-fail_invalidate:
|
||||
- /* If not doing postcopy, vm_start() will be called: let's regain
|
||||
- * control on images.
|
||||
- */
|
||||
- if (s->state == MIGRATION_STATUS_ACTIVE ||
|
||||
- s->state == MIGRATION_STATUS_DEVICE) {
|
||||
+fail:
|
||||
+ if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
|
||||
+ s->state == MIGRATION_STATUS_DEVICE)) {
|
||||
+ /*
|
||||
+ * If not doing postcopy, vm_start() will be called: let's
|
||||
+ * regain control on images.
|
||||
+ */
|
||||
Error *local_err = NULL;
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
bdrv_invalidate_cache_all(&local_err);
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
- s->block_inactive = true;
|
||||
} else {
|
||||
s->block_inactive = false;
|
||||
}
|
||||
qemu_mutex_unlock_iothread();
|
||||
}
|
||||
|
||||
-fail:
|
||||
migrate_set_state(&s->state, current_active_state,
|
||||
MIGRATION_STATUS_FAILED);
|
||||
}
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,59 +0,0 @@
|
||||
From dd6d0eace90285c017ae40cba0ffa95ccd963ebd Mon Sep 17 00:00:00 2001
|
||||
From: Leonardo Bras <leobras@redhat.com>
|
||||
Date: Tue, 20 Jun 2023 14:51:03 -0300
|
||||
Subject: [PATCH 15/15] migration: Disable postcopy + multifd migration
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Leonardo Brás <leobras@redhat.com>
|
||||
RH-MergeRequest: 287: migration: Disable postcopy + multifd migration
|
||||
RH-Bugzilla: 2169733
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] 07d26fbac35b7586fe790304f03d316ed26a4ef2
|
||||
|
||||
Since the introduction of multifd, it's possible to perform a multifd
|
||||
migration and finish it using postcopy.
|
||||
|
||||
A bug introduced by yank (fixed on cfc3bcf373) was previously preventing
|
||||
a successful use of this migration scenario, and now thing should be
|
||||
working on most scenarios.
|
||||
|
||||
But since there is not enough testing/support nor any reported users for
|
||||
this scenario, we should disable this combination before it may cause any
|
||||
problems for users.
|
||||
|
||||
Suggested-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
Acked-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Signed-off-by: Juan Quintela <quintela@redhat.com>
|
||||
(cherry picked from commit b405dfff1ea3cf0530b628895b5a7a50dc8c6996)
|
||||
[leobras: moves logic from options.c -> migration.c and use cap_list
|
||||
instead of new_caps for backward compatibility]
|
||||
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
||||
---
|
||||
migration/migration.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/migration/migration.c b/migration/migration.c
|
||||
index 817170d52d..1ad82e63f0 100644
|
||||
--- a/migration/migration.c
|
||||
+++ b/migration/migration.c
|
||||
@@ -1246,6 +1246,11 @@ static bool migrate_caps_check(bool *cap_list,
|
||||
error_setg(errp, "Postcopy is not compatible with ignore-shared");
|
||||
return false;
|
||||
}
|
||||
+
|
||||
+ if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
|
||||
+ error_setg(errp, "Postcopy is not yet compatible with multifd");
|
||||
+ return false;
|
||||
+ }
|
||||
}
|
||||
|
||||
if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,117 +0,0 @@
|
||||
From 1b07c7663b6a5c19c9303088d63c39dba7e3bb36 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Fri, 14 Apr 2023 10:33:58 -0500
|
||||
Subject: [PATCH 1/5] migration: Handle block device inactivation failures
|
||||
better
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 273: migration: prevent source core dump if NFS dies mid-migration
|
||||
RH-Bugzilla: 2177957
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [1/3] 5892c17ca0a21d824d176e7398d12f7cf991651d (ebblake/qemu-kvm)
|
||||
|
||||
Consider what happens when performing a migration between two host
|
||||
machines connected to an NFS server serving multiple block devices to
|
||||
the guest, when the NFS server becomes unavailable. The migration
|
||||
attempts to inactivate all block devices on the source (a necessary
|
||||
step before the destination can take over); but if the NFS server is
|
||||
non-responsive, the attempt to inactivate can itself fail. When that
|
||||
happens, the destination fails to get the migrated guest (good,
|
||||
because the source wasn't able to flush everything properly):
|
||||
|
||||
(qemu) qemu-kvm: load of migration failed: Input/output error
|
||||
|
||||
at which point, our only hope for the guest is for the source to take
|
||||
back control. With the current code base, the host outputs a message, but then appears to resume:
|
||||
|
||||
(qemu) qemu-kvm: qemu_savevm_state_complete_precopy_non_iterable: bdrv_inactivate_all() failed (-1)
|
||||
|
||||
(src qemu)info status
|
||||
VM status: running
|
||||
|
||||
but a second migration attempt now asserts:
|
||||
|
||||
(src qemu) qemu-kvm: ../block.c:6738: int bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & BDRV_O_INACTIVE)' failed.
|
||||
|
||||
Whether the guest is recoverable on the source after the first failure
|
||||
is debatable, but what we do not want is to have qemu itself fail due
|
||||
to an assertion. It looks like the problem is as follows:
|
||||
|
||||
In migration.c:migration_completion(), the source sets 'inactivate' to
|
||||
true (since COLO is not enabled), then tries
|
||||
savevm.c:qemu_savevm_state_complete_precopy() with a request to
|
||||
inactivate block devices. In turn, this calls
|
||||
block.c:bdrv_inactivate_all(), which fails when flushing runs up
|
||||
against the non-responsive NFS server. With savevm failing, we are
|
||||
now left in a state where some, but not all, of the block devices have
|
||||
been inactivated; but migration_completion() then jumps to 'fail'
|
||||
rather than 'fail_invalidate' and skips an attempt to reclaim those
|
||||
those disks by calling bdrv_activate_all(). Even if we do attempt to
|
||||
reclaim disks, we aren't taking note of failure there, either.
|
||||
|
||||
Thus, we have reached a state where the migration engine has forgotten
|
||||
all state about whether a block device is inactive, because we did not
|
||||
set s->block_inactive in enough places; so migration allows the source
|
||||
to reach vm_start() and resume execution, violating the block layer
|
||||
invariant that the guest CPUs should not be restarted while a device
|
||||
is inactive. Note that the code in migration.c:migrate_fd_cancel()
|
||||
will also try to reactivate all block devices if s->block_inactive was
|
||||
set, but because we failed to set that flag after the first failure,
|
||||
the source assumes it has reclaimed all devices, even though it still
|
||||
has remaining inactivated devices and does not try again. Normally,
|
||||
qmp_cont() will also try to reactivate all disks (or correctly fail if
|
||||
the disks are not reclaimable because NFS is not yet back up), but the
|
||||
auto-resumption of the source after a migration failure does not go
|
||||
through qmp_cont(). And because we have left the block layer in an
|
||||
inconsistent state with devices still inactivated, the later migration
|
||||
attempt is hitting the assertion failure.
|
||||
|
||||
Since it is important to not resume the source with inactive disks,
|
||||
this patch marks s->block_inactive before attempting inactivation,
|
||||
rather than after succeeding, in order to prevent any vm_start() until
|
||||
it has successfully reactivated all devices.
|
||||
|
||||
See also https://bugzilla.redhat.com/show_bug.cgi?id=2058982
|
||||
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Acked-by: Lukas Straub <lukasstraub2@web.de>
|
||||
Tested-by: Lukas Straub <lukasstraub2@web.de>
|
||||
Signed-off-by: Juan Quintela <quintela@redhat.com>
|
||||
(cherry picked from commit 403d18ae384239876764bbfa111d6cc5dcb673d1)
|
||||
---
|
||||
migration/migration.c | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/migration/migration.c b/migration/migration.c
|
||||
index 0885549de0..08e5e8f013 100644
|
||||
--- a/migration/migration.c
|
||||
+++ b/migration/migration.c
|
||||
@@ -3256,13 +3256,11 @@ static void migration_completion(MigrationState *s)
|
||||
MIGRATION_STATUS_DEVICE);
|
||||
}
|
||||
if (ret >= 0) {
|
||||
+ s->block_inactive = inactivate;
|
||||
qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
|
||||
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
|
||||
inactivate);
|
||||
}
|
||||
- if (inactivate && ret >= 0) {
|
||||
- s->block_inactive = true;
|
||||
- }
|
||||
}
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
@@ -3321,6 +3319,7 @@ fail_invalidate:
|
||||
bdrv_invalidate_cache_all(&local_err);
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
+ s->block_inactive = true;
|
||||
} else {
|
||||
s->block_inactive = false;
|
||||
}
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,53 +0,0 @@
|
||||
From e79d0506184e861350d2a3e62dd986aa03d30aa8 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Thu, 20 Apr 2023 09:35:51 -0500
|
||||
Subject: [PATCH 2/5] migration: Minor control flow simplification
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 273: migration: prevent source core dump if NFS dies mid-migration
|
||||
RH-Bugzilla: 2177957
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Commit: [2/3] f00b21b6ebd377af79af93ac18f103f8dc0309d6 (ebblake/qemu-kvm)
|
||||
|
||||
No need to declare a temporary variable.
|
||||
|
||||
Suggested-by: Juan Quintela <quintela@redhat.com>
|
||||
Fixes: 1df36e8c6289 ("migration: Handle block device inactivation failures better")
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Signed-off-by: Juan Quintela <quintela@redhat.com>
|
||||
(cherry picked from commit 5d39f44d7ac5c63f53d4d0900ceba9521bc27e49)
|
||||
---
|
||||
migration/migration.c | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/migration/migration.c b/migration/migration.c
|
||||
index 08e5e8f013..6ba8eb0fdf 100644
|
||||
--- a/migration/migration.c
|
||||
+++ b/migration/migration.c
|
||||
@@ -3248,7 +3248,6 @@ static void migration_completion(MigrationState *s)
|
||||
ret = global_state_store();
|
||||
|
||||
if (!ret) {
|
||||
- bool inactivate = !migrate_colo_enabled();
|
||||
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||
trace_migration_completion_vm_stop(ret);
|
||||
if (ret >= 0) {
|
||||
@@ -3256,10 +3255,10 @@ static void migration_completion(MigrationState *s)
|
||||
MIGRATION_STATUS_DEVICE);
|
||||
}
|
||||
if (ret >= 0) {
|
||||
- s->block_inactive = inactivate;
|
||||
+ s->block_inactive = !migrate_colo_enabled();
|
||||
qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
|
||||
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
|
||||
- inactivate);
|
||||
+ s->block_inactive);
|
||||
}
|
||||
}
|
||||
qemu_mutex_unlock_iothread();
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,76 +0,0 @@
|
||||
From 34eae2d7ef928a7e0e10cc30fe76839c005998eb Mon Sep 17 00:00:00 2001
|
||||
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
||||
Date: Wed, 13 Apr 2022 12:33:29 +0100
|
||||
Subject: [PATCH 07/11] migration: Read state once
|
||||
|
||||
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
RH-MergeRequest: 249: migration: Read state once
|
||||
RH-Bugzilla: 2074205
|
||||
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
||||
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
||||
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Commit: [1/1] 9aa47b492a646fce4e66ebd9b7d7a85286d16051
|
||||
|
||||
The 'status' field for the migration is updated normally using
|
||||
an atomic operation from the migration thread.
|
||||
Most readers of it aren't that careful, and in most cases it doesn't
|
||||
matter.
|
||||
|
||||
In query_migrate->fill_source_migration_info the 'state'
|
||||
is read twice; the first time to decide which state fields to fill in,
|
||||
and then secondly to copy the state to the status field; that can end up
|
||||
with a status that's inconsistent; e.g. setting up the fields
|
||||
for 'setup' and then having an 'active' status. In that case
|
||||
libvirt gets upset by the lack of ram info.
|
||||
The symptom is:
|
||||
libvirt.libvirtError: internal error: migration was active, but no RAM info was set
|
||||
|
||||
Read the state exactly once in fill_source_migration_info.
|
||||
|
||||
This is a possible fix for:
|
||||
https://bugzilla.redhat.com/show_bug.cgi?id=2074205
|
||||
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <20220413113329.103696-1-dgilbert@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
(cherry picked from commit 552de79bfdd5e9e53847eb3c6d6e4cd898a4370e)
|
||||
---
|
||||
migration/migration.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/migration/migration.c b/migration/migration.c
|
||||
index 51e6726dac..d8b24a2c91 100644
|
||||
--- a/migration/migration.c
|
||||
+++ b/migration/migration.c
|
||||
@@ -1071,6 +1071,7 @@ static void populate_disk_info(MigrationInfo *info)
|
||||
static void fill_source_migration_info(MigrationInfo *info)
|
||||
{
|
||||
MigrationState *s = migrate_get_current();
|
||||
+ int state = qatomic_read(&s->state);
|
||||
GSList *cur_blocker = migration_blockers;
|
||||
|
||||
info->blocked_reasons = NULL;
|
||||
@@ -1090,7 +1091,7 @@ static void fill_source_migration_info(MigrationInfo *info)
|
||||
}
|
||||
info->has_blocked_reasons = info->blocked_reasons != NULL;
|
||||
|
||||
- switch (s->state) {
|
||||
+ switch (state) {
|
||||
case MIGRATION_STATUS_NONE:
|
||||
/* no migration has happened ever */
|
||||
/* do not overwrite destination migration status */
|
||||
@@ -1135,7 +1136,7 @@ static void fill_source_migration_info(MigrationInfo *info)
|
||||
info->has_status = true;
|
||||
break;
|
||||
}
|
||||
- info->status = s->state;
|
||||
+ info->status = state;
|
||||
}
|
||||
|
||||
typedef enum WriteTrackingSupport {
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,296 +0,0 @@
|
||||
From f21a343af4b4d0c6e5181ae0abd0f6280dc8296c Mon Sep 17 00:00:00 2001
|
||||
From: "manish.mishra" <manish.mishra@nutanix.com>
|
||||
Date: Tue, 20 Dec 2022 18:44:18 +0000
|
||||
Subject: [PATCH 2/3] migration: check magic value for deciding the mapping of
|
||||
channels
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Peter Xu <peterx@redhat.com>
|
||||
RH-MergeRequest: 258: migration: Fix multifd crash due to channel disorder
|
||||
RH-Bugzilla: 2137740
|
||||
RH-Acked-by: quintela1 <quintela@redhat.com>
|
||||
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
|
||||
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
RH-Commit: [2/2] f97bebef3d3e372cfd660e5ddb6cffba791840d2
|
||||
|
||||
Conflicts:
|
||||
migration/migration.c
|
||||
migration/multifd.c
|
||||
migration/postcopy-ram.c
|
||||
migration/postcopy-ram.h
|
||||
|
||||
There're a bunch of conflicts due to missing upstream patches on
|
||||
e.g. on qemufile reworks, postcopy preempt. We don't plan to have
|
||||
preempt in rhel8 at all, probably the same as the rest.
|
||||
|
||||
Current logic assumes that channel connections on the destination side are
|
||||
always established in the same order as the source and the first one will
|
||||
always be the main channel followed by the multifid or post-copy
|
||||
preemption channel. This may not be always true, as even if a channel has a
|
||||
connection established on the source side it can be in the pending state on
|
||||
the destination side and a newer connection can be established first.
|
||||
Basically causing out of order mapping of channels on the destination side.
|
||||
Currently, all channels except post-copy preempt send a magic number, this
|
||||
patch uses that magic number to decide the type of channel. This logic is
|
||||
applicable only for precopy(multifd) live migration, as mentioned, the
|
||||
post-copy preempt channel does not send any magic number. Also, tls live
|
||||
migrations already does tls handshake before creating other channels, so
|
||||
this issue is not possible with tls, hence this logic is avoided for tls
|
||||
live migrations. This patch uses read peek to check the magic number of
|
||||
channels so that current data/control stream management remains
|
||||
un-effected.
|
||||
|
||||
Reviewed-by: Peter Xu <peterx@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
|
||||
Reviewed-by: Juan Quintela <quintela@redhat.com>
|
||||
Suggested-by: Daniel P. Berrange <berrange@redhat.com>
|
||||
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
|
||||
Signed-off-by: Juan Quintela <quintela@redhat.com>
|
||||
(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2)
|
||||
Signed-off-by: Peter Xu <peterx@redhat.com>
|
||||
---
|
||||
migration/channel.c | 45 ++++++++++++++++++++++++++++++++++++++
|
||||
migration/channel.h | 5 +++++
|
||||
migration/migration.c | 51 +++++++++++++++++++++++++++++++------------
|
||||
migration/multifd.c | 19 ++++++++--------
|
||||
migration/multifd.h | 2 +-
|
||||
5 files changed, 98 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/migration/channel.c b/migration/channel.c
|
||||
index 086b5c0d8b..ee308fef23 100644
|
||||
--- a/migration/channel.c
|
||||
+++ b/migration/channel.c
|
||||
@@ -98,3 +98,48 @@ void migration_channel_connect(MigrationState *s,
|
||||
g_free(s->hostname);
|
||||
error_free(error);
|
||||
}
|
||||
+
|
||||
+
|
||||
+/**
|
||||
+ * @migration_channel_read_peek - Peek at migration channel, without
|
||||
+ * actually removing it from channel buffer.
|
||||
+ *
|
||||
+ * @ioc: the channel object
|
||||
+ * @buf: the memory region to read data into
|
||||
+ * @buflen: the number of bytes to read in @buf
|
||||
+ * @errp: pointer to a NULL-initialized error object
|
||||
+ *
|
||||
+ * Returns 0 if successful, returns -1 and sets @errp if fails.
|
||||
+ */
|
||||
+int migration_channel_read_peek(QIOChannel *ioc,
|
||||
+ const char *buf,
|
||||
+ const size_t buflen,
|
||||
+ Error **errp)
|
||||
+{
|
||||
+ ssize_t len = 0;
|
||||
+ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
|
||||
+
|
||||
+ while (true) {
|
||||
+ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL,
|
||||
+ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
|
||||
+
|
||||
+ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) {
|
||||
+ error_setg(errp,
|
||||
+ "Failed to peek at channel");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (len == buflen) {
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ /* 1ms sleep. */
|
||||
+ if (qemu_in_coroutine()) {
|
||||
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
|
||||
+ } else {
|
||||
+ g_usleep(1000);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/migration/channel.h b/migration/channel.h
|
||||
index 67a461c28a..5bdb8208a7 100644
|
||||
--- a/migration/channel.h
|
||||
+++ b/migration/channel.h
|
||||
@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s,
|
||||
QIOChannel *ioc,
|
||||
const char *hostname,
|
||||
Error *error_in);
|
||||
+
|
||||
+int migration_channel_read_peek(QIOChannel *ioc,
|
||||
+ const char *buf,
|
||||
+ const size_t buflen,
|
||||
+ Error **errp);
|
||||
#endif
|
||||
diff --git a/migration/migration.c b/migration/migration.c
|
||||
index d8b24a2c91..0885549de0 100644
|
||||
--- a/migration/migration.c
|
||||
+++ b/migration/migration.c
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "savevm.h"
|
||||
#include "qemu-file-channel.h"
|
||||
#include "qemu-file.h"
|
||||
+#include "channel.h"
|
||||
#include "migration/vmstate.h"
|
||||
#include "block/block.h"
|
||||
#include "qapi/error.h"
|
||||
@@ -637,10 +638,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp)
|
||||
{
|
||||
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
|
||||
- if (multifd_load_setup(errp) != 0) {
|
||||
- return false;
|
||||
- }
|
||||
-
|
||||
if (!mis->from_src_file) {
|
||||
mis->from_src_file = f;
|
||||
}
|
||||
@@ -701,10 +698,42 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp)
|
||||
void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
|
||||
{
|
||||
MigrationIncomingState *mis = migration_incoming_get_current();
|
||||
+ bool default_channel = true;
|
||||
+ uint32_t channel_magic = 0;
|
||||
Error *local_err = NULL;
|
||||
- bool start_migration;
|
||||
+ int ret = 0;
|
||||
|
||||
- if (!mis->from_src_file) {
|
||||
+ if (migrate_use_multifd() && !migrate_postcopy_ram() &&
|
||||
+ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
|
||||
+ /*
|
||||
+ * With multiple channels, it is possible that we receive channels
|
||||
+ * out of order on destination side, causing incorrect mapping of
|
||||
+ * source channels on destination side. Check channel MAGIC to
|
||||
+ * decide type of channel. Please note this is best effort, postcopy
|
||||
+ * preempt channel does not send any magic number so avoid it for
|
||||
+ * postcopy live migration. Also tls live migration already does
|
||||
+ * tls handshake while initializing main channel so with tls this
|
||||
+ * issue is not possible.
|
||||
+ */
|
||||
+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
|
||||
+ sizeof(channel_magic), &local_err);
|
||||
+
|
||||
+ if (ret != 0) {
|
||||
+ error_propagate(errp, local_err);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
|
||||
+ } else {
|
||||
+ default_channel = !mis->from_src_file;
|
||||
+ }
|
||||
+
|
||||
+ if (multifd_load_setup(errp) != 0) {
|
||||
+ error_setg(errp, "Failed to setup multifd channels");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (default_channel) {
|
||||
/* The first connection (multifd may have multiple) */
|
||||
QEMUFile *f = qemu_fopen_channel_input(ioc);
|
||||
|
||||
@@ -716,23 +745,17 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
|
||||
if (!migration_incoming_setup(f, errp)) {
|
||||
return;
|
||||
}
|
||||
-
|
||||
- /*
|
||||
- * Common migration only needs one channel, so we can start
|
||||
- * right now. Multifd needs more than one channel, we wait.
|
||||
- */
|
||||
- start_migration = !migrate_use_multifd();
|
||||
} else {
|
||||
/* Multiple connections */
|
||||
assert(migrate_use_multifd());
|
||||
- start_migration = multifd_recv_new_channel(ioc, &local_err);
|
||||
+ multifd_recv_new_channel(ioc, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
- if (start_migration) {
|
||||
+ if (migration_has_all_channels()) {
|
||||
migration_incoming_process();
|
||||
}
|
||||
}
|
||||
diff --git a/migration/multifd.c b/migration/multifd.c
|
||||
index 7c16523e6b..75ac052d2f 100644
|
||||
--- a/migration/multifd.c
|
||||
+++ b/migration/multifd.c
|
||||
@@ -1183,9 +1183,14 @@ int multifd_load_setup(Error **errp)
|
||||
uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
|
||||
uint8_t i;
|
||||
|
||||
- if (!migrate_use_multifd()) {
|
||||
+ /*
|
||||
+ * Return successfully if multiFD recv state is already initialised
|
||||
+ * or multiFD is not enabled.
|
||||
+ */
|
||||
+ if (multifd_recv_state || !migrate_use_multifd()) {
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
if (!migrate_multifd_is_allowed()) {
|
||||
error_setg(errp, "multifd is not supported by current protocol");
|
||||
return -1;
|
||||
@@ -1244,11 +1249,9 @@ bool multifd_recv_all_channels_created(void)
|
||||
|
||||
/*
|
||||
* Try to receive all multifd channels to get ready for the migration.
|
||||
- * - Return true and do not set @errp when correctly receiving all channels;
|
||||
- * - Return false and do not set @errp when correctly receiving the current one;
|
||||
- * - Return false and set @errp when failing to receive the current channel.
|
||||
+ * Sets @errp when failing to receive the current channel.
|
||||
*/
|
||||
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
|
||||
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
|
||||
{
|
||||
MultiFDRecvParams *p;
|
||||
Error *local_err = NULL;
|
||||
@@ -1261,7 +1264,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
|
||||
"failed to receive packet"
|
||||
" via multifd channel %d: ",
|
||||
qatomic_read(&multifd_recv_state->count));
|
||||
- return false;
|
||||
+ return;
|
||||
}
|
||||
trace_multifd_recv_new_channel(id);
|
||||
|
||||
@@ -1271,7 +1274,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
|
||||
id);
|
||||
multifd_recv_terminate_threads(local_err);
|
||||
error_propagate(errp, local_err);
|
||||
- return false;
|
||||
+ return;
|
||||
}
|
||||
p->c = ioc;
|
||||
object_ref(OBJECT(ioc));
|
||||
@@ -1282,6 +1285,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
|
||||
qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
qatomic_inc(&multifd_recv_state->count);
|
||||
- return qatomic_read(&multifd_recv_state->count) ==
|
||||
- migrate_multifd_channels();
|
||||
}
|
||||
diff --git a/migration/multifd.h b/migration/multifd.h
|
||||
index 11d5e273e6..9c0a2a0701 100644
|
||||
--- a/migration/multifd.h
|
||||
+++ b/migration/multifd.h
|
||||
@@ -20,7 +20,7 @@ void multifd_save_cleanup(void);
|
||||
int multifd_load_setup(Error **errp);
|
||||
int multifd_load_cleanup(Error **errp);
|
||||
bool multifd_recv_all_channels_created(void);
|
||||
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
|
||||
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
|
||||
void multifd_recv_sync_main(void);
|
||||
int multifd_send_sync_main(QEMUFile *f);
|
||||
int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
|
||||
--
|
||||
2.37.3
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,101 +0,0 @@
|
||||
From 676438ff8c42323c3e5d9e7eeeb1b3367999136c Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Thu, 22 Aug 2024 09:35:29 -0500
|
||||
Subject: [PATCH 3/3] nbd/server: CVE-2024-7409: Avoid use-after-free when
|
||||
closing server
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 398: nbd/server: CVE-2024-7409: Avoid use-after-free when closing server
|
||||
RH-Jira: RHEL-52611
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [3/3] 1ee35a40ded067a085bf6fcafa690b40976d7f2d (ebblake/qemu-kvm)
|
||||
|
||||
Commit 3e7ef738 plugged the use-after-free of the global nbd_server
|
||||
object, but overlooked a use-after-free of nbd_server->listener.
|
||||
Although this race is harder to hit, notice that our shutdown path
|
||||
first drops the reference count of nbd_server->listener, then triggers
|
||||
actions that can result in a pending client reaching the
|
||||
nbd_blockdev_client_closed() callback, which in turn calls
|
||||
qio_net_listener_set_client_func on a potentially stale object.
|
||||
|
||||
If we know we don't want any more clients to connect, and have already
|
||||
told the listener socket to shut down, then we should not be trying to
|
||||
update the listener socket's associated function.
|
||||
|
||||
Reproducer:
|
||||
|
||||
> #!/usr/bin/python3
|
||||
>
|
||||
> import os
|
||||
> from threading import Thread
|
||||
>
|
||||
> def start_stop():
|
||||
> while 1:
|
||||
> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-start",
|
||||
+"arguments":{"addr":{"type":"unix","data":{"path":"/tmp/nbd-sock"}}}}\'')
|
||||
> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-stop"}\'')
|
||||
>
|
||||
> def nbd_list():
|
||||
> while 1:
|
||||
> os.system('/path/to/build/qemu-nbd -L -k /tmp/nbd-sock')
|
||||
>
|
||||
> def test():
|
||||
> sst = Thread(target=start_stop)
|
||||
> sst.start()
|
||||
> nlt = Thread(target=nbd_list)
|
||||
> nlt.start()
|
||||
>
|
||||
> sst.join()
|
||||
> nlt.join()
|
||||
>
|
||||
> test()
|
||||
|
||||
Fixes: CVE-2024-7409
|
||||
Fixes: 3e7ef738c8 ("nbd/server: CVE-2024-7409: Close stray clients at server-stop")
|
||||
CC: qemu-stable@nongnu.org
|
||||
Reported-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240822143617.800419-2-eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
|
||||
(cherry picked from commit 3874f5f73c441c52f1c699c848d463b0eda01e4c)
|
||||
Jira: https://issues.redhat.com/browse/RHEL-52611
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
---
|
||||
blockdev-nbd.c | 12 ++++++++----
|
||||
1 file changed, 8 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
||||
index 87839c180b..b5d55e2518 100644
|
||||
--- a/blockdev-nbd.c
|
||||
+++ b/blockdev-nbd.c
|
||||
@@ -87,10 +87,13 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
||||
|
||||
static void nbd_update_server_watch(NBDServerData *s)
|
||||
{
|
||||
- if (!s->max_connections || s->connections < s->max_connections) {
|
||||
- qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, NULL);
|
||||
- } else {
|
||||
- qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL);
|
||||
+ if (s->listener) {
|
||||
+ if (!s->max_connections || s->connections < s->max_connections) {
|
||||
+ qio_net_listener_set_client_func(s->listener, nbd_accept, NULL,
|
||||
+ NULL);
|
||||
+ } else {
|
||||
+ qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,6 +111,7 @@ static void nbd_server_free(NBDServerData *server)
|
||||
*/
|
||||
qio_net_listener_disconnect(server->listener);
|
||||
object_unref(OBJECT(server->listener));
|
||||
+ server->listener = NULL;
|
||||
QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
|
||||
qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
|
||||
NULL);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,187 +0,0 @@
|
||||
From adfddc25c82576458442f61efb913e44d83bcbd0 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Tue, 6 Aug 2024 13:53:00 -0500
|
||||
Subject: [PATCH 2/5] nbd/server: CVE-2024-7409: Cap default max-connections to
|
||||
100
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 388: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-8.10.z]
|
||||
RH-Jira: RHEL-52611
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Richard W.M. Jones <rjones@redhat.com>
|
||||
RH-Commit: [2/4] 1f5d88d5644c46cbb957778254a993930b9d86dc (ebblake/qemu-kvm)
|
||||
|
||||
Allowing an unlimited number of clients to any web service is a recipe
|
||||
for a rudimentary denial of service attack: the client merely needs to
|
||||
open lots of sockets without closing them, until qemu no longer has
|
||||
any more fds available to allocate.
|
||||
|
||||
For qemu-nbd, we default to allowing only 1 connection unless more are
|
||||
explicitly asked for (-e or --shared); this was historically picked as
|
||||
a nice default (without an explicit -t, a non-persistent qemu-nbd goes
|
||||
away after a client disconnects, without needing any additional
|
||||
follow-up commands), and we are not going to change that interface now
|
||||
(besides, someday we want to point people towards qemu-storage-daemon
|
||||
instead of qemu-nbd).
|
||||
|
||||
But for qemu proper, and the newer qemu-storage-daemon, the QMP
|
||||
nbd-server-start command has historically had a default of unlimited
|
||||
number of connections, in part because unlike qemu-nbd it is
|
||||
inherently persistent until nbd-server-stop. Allowing multiple client
|
||||
sockets is particularly useful for clients that can take advantage of
|
||||
MULTI_CONN (creating parallel sockets to increase throughput),
|
||||
although known clients that do so (such as libnbd's nbdcopy) typically
|
||||
use only 8 or 16 connections (the benefits of scaling diminish once
|
||||
more sockets are competing for kernel attention). Picking a number
|
||||
large enough for typical use cases, but not unlimited, makes it
|
||||
slightly harder for a malicious client to perform a denial of service
|
||||
merely by opening lots of connections withot progressing through the
|
||||
handshake.
|
||||
|
||||
This change does not eliminate CVE-2024-7409 on its own, but reduces
|
||||
the chance for fd exhaustion or unlimited memory usage as an attack
|
||||
surface. On the other hand, by itself, it makes it more obvious that
|
||||
with a finite limit, we have the problem of an unauthenticated client
|
||||
holding 100 fds opened as a way to block out a legitimate client from
|
||||
being able to connect; thus, later patches will further add timeouts
|
||||
to reject clients that are not making progress.
|
||||
|
||||
This is an INTENTIONAL change in behavior, and will break any client
|
||||
of nbd-server-start that was not passing an explicit max-connections
|
||||
parameter, yet expects more than 100 simultaneous connections. We are
|
||||
not aware of any such client (as stated above, most clients aware of
|
||||
MULTI_CONN get by just fine on 8 or 16 connections, and probably cope
|
||||
with later connections failing by relying on the earlier connections;
|
||||
libvirt has not yet been passing max-connections, but generally
|
||||
creates NBD servers with the intent for a single client for the sake
|
||||
of live storage migration; meanwhile, the KubeSAN project anticipates
|
||||
a large cluster sharing multiple clients [up to 8 per node, and up to
|
||||
100 nodes in a cluster], but it currently uses qemu-nbd with an
|
||||
explicit --shared=0 rather than qemu-storage-daemon with
|
||||
nbd-server-start).
|
||||
|
||||
We considered using a deprecation period (declare that omitting
|
||||
max-parameters is deprecated, and make it mandatory in 3 releases -
|
||||
then we don't need to pick an arbitrary default); that has zero risk
|
||||
of breaking any apps that accidentally depended on more than 100
|
||||
connections, and where such breakage might not be noticed under unit
|
||||
testing but only under the larger loads of production usage. But it
|
||||
does not close the denial-of-service hole until far into the future,
|
||||
and requires all apps to change to add the parameter even if 100 was
|
||||
good enough. It also has a drawback that any app (like libvirt) that
|
||||
is accidentally relying on an unlimited default should seriously
|
||||
consider their own CVE now, at which point they are going to change to
|
||||
pass explicit max-connections sooner than waiting for 3 qemu releases.
|
||||
Finally, if our changed default breaks an app, that app can always
|
||||
pass in an explicit max-parameters with a larger value.
|
||||
|
||||
It is also intentional that the HMP interface to nbd-server-start is
|
||||
not changed to expose max-connections (any client needing to fine-tune
|
||||
things should be using QMP).
|
||||
|
||||
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240807174943.771624-12-eblake@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
[ericb: Expand commit message to summarize Dan's argument for why we
|
||||
break corner-case back-compat behavior without a deprecation period]
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
|
||||
(cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623)
|
||||
Conflicts:
|
||||
qapi/block-export.json - context (no multi-conn, older format)
|
||||
Jira: https://issues.redhat.com/browse/RHEL-52611
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
---
|
||||
block/monitor/block-hmp-cmds.c | 3 ++-
|
||||
blockdev-nbd.c | 8 ++++++++
|
||||
include/block/nbd.h | 7 +++++++
|
||||
qapi/block-export.json | 4 ++--
|
||||
4 files changed, 19 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
|
||||
index 2ac4aedfff..32a666b5dc 100644
|
||||
--- a/block/monitor/block-hmp-cmds.c
|
||||
+++ b/block/monitor/block-hmp-cmds.c
|
||||
@@ -411,7 +411,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
- nbd_server_start(addr, NULL, NULL, 0, &local_err);
|
||||
+ nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS,
|
||||
+ &local_err);
|
||||
qapi_free_SocketAddress(addr);
|
||||
if (local_err != NULL) {
|
||||
goto exit;
|
||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
||||
index b9e8dc78f3..4bd90bac16 100644
|
||||
--- a/blockdev-nbd.c
|
||||
+++ b/blockdev-nbd.c
|
||||
@@ -171,6 +171,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds,
|
||||
|
||||
void nbd_server_start_options(NbdServerOptions *arg, Error **errp)
|
||||
{
|
||||
+ if (!arg->has_max_connections) {
|
||||
+ arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
|
||||
+ }
|
||||
+
|
||||
nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz,
|
||||
arg->max_connections, errp);
|
||||
}
|
||||
@@ -183,6 +187,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr,
|
||||
{
|
||||
SocketAddress *addr_flat = socket_address_flatten(addr);
|
||||
|
||||
+ if (!has_max_connections) {
|
||||
+ max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
|
||||
+ }
|
||||
+
|
||||
nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp);
|
||||
qapi_free_SocketAddress(addr_flat);
|
||||
}
|
||||
diff --git a/include/block/nbd.h b/include/block/nbd.h
|
||||
index b71a297249..a31c34a8a6 100644
|
||||
--- a/include/block/nbd.h
|
||||
+++ b/include/block/nbd.h
|
||||
@@ -33,6 +33,13 @@ extern const BlockExportDriver blk_exp_nbd;
|
||||
*/
|
||||
#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
|
||||
|
||||
+/*
|
||||
+ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at
|
||||
+ * once; must be large enough to allow a MULTI_CONN-aware client like
|
||||
+ * nbdcopy to create its typical number of 8-16 sockets.
|
||||
+ */
|
||||
+#define NBD_DEFAULT_MAX_CONNECTIONS 100
|
||||
+
|
||||
/* Handshake phase structs - this struct is passed on the wire */
|
||||
|
||||
struct NBDOption {
|
||||
diff --git a/qapi/block-export.json b/qapi/block-export.json
|
||||
index c1b92ce1c1..181d7238fe 100644
|
||||
--- a/qapi/block-export.json
|
||||
+++ b/qapi/block-export.json
|
||||
@@ -21,7 +21,7 @@
|
||||
# recreated on the fly while the NBD server is active.
|
||||
# If missing, it will default to denying access (since 4.0).
|
||||
# @max-connections: The maximum number of connections to allow at the same
|
||||
-# time, 0 for unlimited. (since 5.2; default: 0)
|
||||
+# time, 0 for unlimited. (since 5.2; default: 100)
|
||||
#
|
||||
# Since: 4.2
|
||||
##
|
||||
@@ -50,7 +50,7 @@
|
||||
# recreated on the fly while the NBD server is active.
|
||||
# If missing, it will default to denying access (since 4.0).
|
||||
# @max-connections: The maximum number of connections to allow at the same
|
||||
-# time, 0 for unlimited. (since 5.2; default: 0)
|
||||
+# time, 0 for unlimited. (since 5.2; default: 100)
|
||||
#
|
||||
# Returns: error if the server is already running.
|
||||
#
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,180 +0,0 @@
|
||||
From 4ab086cdf9a5842c49f3fe59baff1747d863b97a Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Wed, 7 Aug 2024 12:23:13 -0500
|
||||
Subject: [PATCH 4/5] nbd/server: CVE-2024-7409: Close stray clients at
|
||||
server-stop
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 388: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-8.10.z]
|
||||
RH-Jira: RHEL-52611
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Richard W.M. Jones <rjones@redhat.com>
|
||||
RH-Commit: [4/4] 92a20764dbee3cf94181cab412d90cbf92b4a417 (ebblake/qemu-kvm)
|
||||
|
||||
A malicious client can attempt to connect to an NBD server, and then
|
||||
intentionally delay progress in the handshake, including if it does
|
||||
not know the TLS secrets. Although the previous two patches reduce
|
||||
this behavior by capping the default max-connections parameter and
|
||||
killing slow clients, they did not eliminate the possibility of a
|
||||
client waiting to close the socket until after the QMP nbd-server-stop
|
||||
command is executed, at which point qemu would SEGV when trying to
|
||||
dereference the NULL nbd_server global which is no longer present.
|
||||
This amounts to a denial of service attack. Worse, if another NBD
|
||||
server is started before the malicious client disconnects, I cannot
|
||||
rule out additional adverse effects when the old client interferes
|
||||
with the connection count of the new server (although the most likely
|
||||
is a crash due to an assertion failure when checking
|
||||
nbd_server->connections > 0).
|
||||
|
||||
For environments without this patch, the CVE can be mitigated by
|
||||
ensuring (such as via a firewall) that only trusted clients can
|
||||
connect to an NBD server. Note that using frameworks like libvirt
|
||||
that ensure that TLS is used and that nbd-server-stop is not executed
|
||||
while any trusted clients are still connected will only help if there
|
||||
is also no possibility for an untrusted client to open a connection
|
||||
but then stall on the NBD handshake.
|
||||
|
||||
Given the previous patches, it would be possible to guarantee that no
|
||||
clients remain connected by having nbd-server-stop sleep for longer
|
||||
than the default handshake deadline before finally freeing the global
|
||||
nbd_server object, but that could make QMP non-responsive for a long
|
||||
time. So intead, this patch fixes the problem by tracking all client
|
||||
sockets opened while the server is running, and forcefully closing any
|
||||
such sockets remaining without a completed handshake at the time of
|
||||
nbd-server-stop, then waiting until the coroutines servicing those
|
||||
sockets notice the state change. nbd-server-stop now has a second
|
||||
AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the
|
||||
blk_exp_close_all_type() that disconnects all clients that completed
|
||||
handshakes), but forced socket shutdown is enough to progress the
|
||||
coroutines and quickly tear down all clients before the server is
|
||||
freed, thus finally fixing the CVE.
|
||||
|
||||
This patch relies heavily on the fact that nbd/server.c guarantees
|
||||
that it only calls nbd_blockdev_client_closed() from the main loop
|
||||
(see the assertion in nbd_client_put() and the hoops used in
|
||||
nbd_client_put_nonzero() to achieve that); if we did not have that
|
||||
guarantee, we would also need a mutex protecting our accesses of the
|
||||
list of connections to survive re-entrancy from independent iothreads.
|
||||
|
||||
Although I did not actually try to test old builds, it looks like this
|
||||
problem has existed since at least commit 862172f45c (v2.12.0, 2017) -
|
||||
even back when that patch started using a QIONetListener to handle
|
||||
listening on multiple sockets, nbd_server_free() was already unaware
|
||||
that the nbd_blockdev_client_closed callback can be reached later by a
|
||||
client thread that has not completed handshakes (and therefore the
|
||||
client's socket never got added to the list closed in
|
||||
nbd_export_close_all), despite that patch intentionally tearing down
|
||||
the QIONetListener to prevent new clients.
|
||||
|
||||
Reported-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
|
||||
Fixes: CVE-2024-7409
|
||||
CC: qemu-stable@nongnu.org
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240807174943.771624-14-eblake@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
|
||||
(cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3)
|
||||
Conflicts:
|
||||
- blockdev-nbd.c:
|
||||
- qemu_in_main_thread() not backported, but only used in assertions so
|
||||
safe to drop
|
||||
- AIO_WAIT_WHILE_UNLOCKED() not backported, use AIO_WAIT_WHILE() like
|
||||
blk_exp_close_all_type()
|
||||
Jira: https://issues.redhat.com/browse/RHEL-52611
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
---
|
||||
blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 34 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
||||
index 4bd90bac16..87839c180b 100644
|
||||
--- a/blockdev-nbd.c
|
||||
+++ b/blockdev-nbd.c
|
||||
@@ -21,12 +21,18 @@
|
||||
#include "io/channel-socket.h"
|
||||
#include "io/net-listener.h"
|
||||
|
||||
+typedef struct NBDConn {
|
||||
+ QIOChannelSocket *cioc;
|
||||
+ QLIST_ENTRY(NBDConn) next;
|
||||
+} NBDConn;
|
||||
+
|
||||
typedef struct NBDServerData {
|
||||
QIONetListener *listener;
|
||||
QCryptoTLSCreds *tlscreds;
|
||||
char *tlsauthz;
|
||||
uint32_t max_connections;
|
||||
uint32_t connections;
|
||||
+ QLIST_HEAD(, NBDConn) conns;
|
||||
} NBDServerData;
|
||||
|
||||
static NBDServerData *nbd_server;
|
||||
@@ -46,6 +52,14 @@ bool nbd_server_is_running(void)
|
||||
|
||||
static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
|
||||
{
|
||||
+ NBDConn *conn = nbd_client_owner(client);
|
||||
+
|
||||
+ assert(nbd_server);
|
||||
+
|
||||
+ object_unref(OBJECT(conn->cioc));
|
||||
+ QLIST_REMOVE(conn, next);
|
||||
+ g_free(conn);
|
||||
+
|
||||
nbd_client_put(client);
|
||||
assert(nbd_server->connections > 0);
|
||||
nbd_server->connections--;
|
||||
@@ -55,14 +69,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
|
||||
static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
||||
gpointer opaque)
|
||||
{
|
||||
+ NBDConn *conn = g_new0(NBDConn, 1);
|
||||
+
|
||||
+ assert(nbd_server);
|
||||
nbd_server->connections++;
|
||||
+ object_ref(OBJECT(cioc));
|
||||
+ conn->cioc = cioc;
|
||||
+ QLIST_INSERT_HEAD(&nbd_server->conns, conn, next);
|
||||
nbd_update_server_watch(nbd_server);
|
||||
|
||||
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
|
||||
/* TODO - expose handshake timeout as QMP option */
|
||||
nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
|
||||
nbd_server->tlscreds, nbd_server->tlsauthz,
|
||||
- nbd_blockdev_client_closed, NULL);
|
||||
+ nbd_blockdev_client_closed, conn);
|
||||
}
|
||||
|
||||
static void nbd_update_server_watch(NBDServerData *s)
|
||||
@@ -76,12 +96,25 @@ static void nbd_update_server_watch(NBDServerData *s)
|
||||
|
||||
static void nbd_server_free(NBDServerData *server)
|
||||
{
|
||||
+ NBDConn *conn, *tmp;
|
||||
+
|
||||
if (!server) {
|
||||
return;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Forcefully close the listener socket, and any clients that have
|
||||
+ * not yet disconnected on their own.
|
||||
+ */
|
||||
qio_net_listener_disconnect(server->listener);
|
||||
object_unref(OBJECT(server->listener));
|
||||
+ QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
|
||||
+ qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
|
||||
+ NULL);
|
||||
+ }
|
||||
+
|
||||
+ AIO_WAIT_WHILE(NULL, server->connections > 0);
|
||||
+
|
||||
if (server->tlscreds) {
|
||||
object_unref(OBJECT(server->tlscreds));
|
||||
}
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,135 +0,0 @@
|
||||
From faac5261d5a9af155950c4e7779c5a4721562824 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Thu, 8 Aug 2024 16:05:08 -0500
|
||||
Subject: [PATCH 3/5] nbd/server: CVE-2024-7409: Drop non-negotiating clients
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 388: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-8.10.z]
|
||||
RH-Jira: RHEL-52611
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Richard W.M. Jones <rjones@redhat.com>
|
||||
RH-Commit: [3/4] 8c39829f8efbded9af018a4b915af266a55a793a (ebblake/qemu-kvm)
|
||||
|
||||
A client that opens a socket but does not negotiate is merely hogging
|
||||
qemu's resources (an open fd and a small amount of memory); and a
|
||||
malicious client that can access the port where NBD is listening can
|
||||
attempt a denial of service attack by intentionally opening and
|
||||
abandoning lots of unfinished connections. The previous patch put a
|
||||
default bound on the number of such ongoing connections, but once that
|
||||
limit is hit, no more clients can connect (including legitimate ones).
|
||||
The solution is to insist that clients complete handshake within a
|
||||
reasonable time limit, defaulting to 10 seconds. A client that has
|
||||
not successfully completed NBD_OPT_GO by then (including the case of
|
||||
where the client didn't know TLS credentials to even reach the point
|
||||
of NBD_OPT_GO) is wasting our time and does not deserve to stay
|
||||
connected. Later patches will allow fine-tuning the limit away from
|
||||
the default value (including disabling it for doing integration
|
||||
testing of the handshake process itself).
|
||||
|
||||
Note that this patch in isolation actually makes it more likely to see
|
||||
qemu SEGV after nbd-server-stop, as any client socket still connected
|
||||
when the server shuts down will now be closed after 10 seconds rather
|
||||
than at the client's whims. That will be addressed in the next patch.
|
||||
|
||||
For a demo of this patch in action:
|
||||
$ qemu-nbd -f raw -r -t -e 10 file &
|
||||
$ nbdsh --opt-mode -c '
|
||||
H = list()
|
||||
for i in range(20):
|
||||
print(i)
|
||||
H.insert(i, nbd.NBD())
|
||||
H[i].set_opt_mode(True)
|
||||
H[i].connect_uri("nbd://localhost")
|
||||
'
|
||||
$ kill $!
|
||||
|
||||
where later connections get to start progressing once earlier ones are
|
||||
forcefully dropped for taking too long, rather than hanging.
|
||||
|
||||
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240807174943.771624-13-eblake@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
[eblake: rebase to changes earlier in series, reduce scope of timer]
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
|
||||
(cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0)
|
||||
Conflicts:
|
||||
nbd/server.c - context with different aiocontext locking
|
||||
nbd/trace-events - context with no client-connection.c
|
||||
Jira: https://issues.redhat.com/browse/RHEL-52611
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
---
|
||||
nbd/server.c | 28 +++++++++++++++++++++++++++-
|
||||
nbd/trace-events | 1 +
|
||||
2 files changed, 28 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/nbd/server.c b/nbd/server.c
|
||||
index cc1b6838bf..1265068f70 100644
|
||||
--- a/nbd/server.c
|
||||
+++ b/nbd/server.c
|
||||
@@ -2701,22 +2701,48 @@ static void nbd_client_receive_next_request(NBDClient *client)
|
||||
}
|
||||
}
|
||||
|
||||
+static void nbd_handshake_timer_cb(void *opaque)
|
||||
+{
|
||||
+ QIOChannel *ioc = opaque;
|
||||
+
|
||||
+ trace_nbd_handshake_timer_cb();
|
||||
+ qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
|
||||
+}
|
||||
+
|
||||
static coroutine_fn void nbd_co_client_start(void *opaque)
|
||||
{
|
||||
NBDClient *client = opaque;
|
||||
Error *local_err = NULL;
|
||||
+ QEMUTimer *handshake_timer = NULL;
|
||||
|
||||
qemu_co_mutex_init(&client->send_lock);
|
||||
|
||||
- /* TODO - utilize client->handshake_max_secs */
|
||||
+ /*
|
||||
+ * Create a timer to bound the time spent in negotiation. If the
|
||||
+ * timer expires, it is likely nbd_negotiate will fail because the
|
||||
+ * socket was shutdown.
|
||||
+ */
|
||||
+ if (client->handshake_max_secs > 0) {
|
||||
+ handshake_timer = aio_timer_new(qemu_get_aio_context(),
|
||||
+ QEMU_CLOCK_REALTIME,
|
||||
+ SCALE_NS,
|
||||
+ nbd_handshake_timer_cb,
|
||||
+ client->sioc);
|
||||
+ timer_mod(handshake_timer,
|
||||
+ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
|
||||
+ client->handshake_max_secs * NANOSECONDS_PER_SECOND);
|
||||
+ }
|
||||
+
|
||||
if (nbd_negotiate(client, &local_err)) {
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
}
|
||||
+ timer_free(handshake_timer);
|
||||
client_close(client, false);
|
||||
return;
|
||||
}
|
||||
|
||||
+ timer_free(handshake_timer);
|
||||
nbd_client_receive_next_request(client);
|
||||
}
|
||||
|
||||
diff --git a/nbd/trace-events b/nbd/trace-events
|
||||
index c4919a2dd5..553546f1f2 100644
|
||||
--- a/nbd/trace-events
|
||||
+++ b/nbd/trace-events
|
||||
@@ -73,3 +73,4 @@ nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char *n
|
||||
nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) "Payload received: handle = %" PRIu64 ", len = %" PRIu32
|
||||
nbd_co_receive_align_compliance(const char *op, uint64_t from, uint32_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx32 ", align=0x%" PRIx32
|
||||
nbd_trip(void) "Reading request"
|
||||
+nbd_handshake_timer_cb(void) "client took too long to negotiate"
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,161 +0,0 @@
|
||||
From 00af174d1388ed2d2df7961ee78be6af3757a01c Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Wed, 30 Aug 2023 18:48:02 -0400
|
||||
Subject: [PATCH 1/3] nbd/server: Favor qemu_aio_context over iohandler context
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 398: nbd/server: CVE-2024-7409: Avoid use-after-free when closing server
|
||||
RH-Jira: RHEL-52611
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/3] 6ec0ef287fbc976175da83a0c14d9878e83affa2 (ebblake/qemu-kvm)
|
||||
|
||||
DOWNSTREAM ONLY - but based on an idea originally included as a
|
||||
side-effect in the larger upstream patch 06e0f098 "io: follow
|
||||
coroutine AioContext in qio_channel_yield()", as well as handling the
|
||||
state of the qio TLS channel before it is associated with a block
|
||||
device as an alternative to 199e84de "qio: Inherit
|
||||
follow_coroutine_ctx across TLS".
|
||||
|
||||
The NBD server code wants to use qio_channel_shutdown() followed by
|
||||
AIO_WAIT_WHILE() during nbd_server_free(), but cannot attach the ioc
|
||||
to an AioContext until the client has completed the handshake to the
|
||||
point that the server knows what block device to associate with the
|
||||
connection. The qio code is set up to handle connections with no
|
||||
AioContext in the iohandler context, but this context is specifically
|
||||
designed to NOT make progress during AIO_WAIT_WHILE(). In order to
|
||||
prevent things from deadlocking, the qio channels handling NBD
|
||||
handshake MUST be in the qemu_aio_context, so that an early shutdown
|
||||
triggered by nbd-server-stop can make progress.
|
||||
|
||||
Note that upstream handled the main qio channel by the use of
|
||||
qio_channel_set_follow_coroutine_ctx() in only one place in
|
||||
nbd/server.c; upstream handled the TLS channel by a more generic
|
||||
second patch that taught qio TLS channel to inherit the
|
||||
follow_coroutine_ctx status from its parent. But since this patch is
|
||||
already downstream only, the minimal diff is achieved by manually
|
||||
setting the status of the TLS channel in NBD code, rather than
|
||||
backporting the qio inheritance code. For testing that the second
|
||||
call to qio_channel_set_favor_qemu_aio_ctx() matters, I used this test
|
||||
setup (borrowing a pre-built PSK file for username alice from the
|
||||
libnbd project, and using IPv4 since this qemu is too old to support
|
||||
TLS over Unix sockets):
|
||||
|
||||
$ # in terminal 1:
|
||||
$ qemu-system-x86_64 --nographic --nodefaults --qmp stdio \
|
||||
--object tls-creds-psk,id=tls0,dir=/PATHTO/libnbd/tests,endpoint=server
|
||||
{"execute": "qmp_capabilities"}
|
||||
{"execute":"nbd-server-start","arguments":{"addr":{"type":"inet",
|
||||
"data":{"host":"127.0.0.1","port":"10809"}},"tls-creds":"tls0"}}
|
||||
|
||||
$ # in terminal 2:
|
||||
$ nbdsh -c 'h.set_uri_allow_local_file(True)' --opt-mode -u \
|
||||
'nbds://alice@127.0.0.1/?tls-psk-file=/PATHTO/libnbd/tests/keys.psk' \
|
||||
-c 'import time; time.sleep(15)'
|
||||
|
||||
$ # in terminal 1, before 10 seconds elapse
|
||||
{"execute":"nbd-server-stop"}
|
||||
{"execute":"quit"}
|
||||
|
||||
and observed that, when omitting the one-line TLS setting, qemu would
|
||||
hit the same deadlock with a TLS client as what I was observing for a
|
||||
non-TLS client without this entire patch.
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-52611
|
||||
Suggested-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
---
|
||||
include/io/channel.h | 16 ++++++++++++++++
|
||||
io/channel.c | 14 +++++++++++++-
|
||||
nbd/server.c | 2 ++
|
||||
3 files changed, 31 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/io/channel.h b/include/io/channel.h
|
||||
index 716235d496..f1ce19ea81 100644
|
||||
--- a/include/io/channel.h
|
||||
+++ b/include/io/channel.h
|
||||
@@ -84,6 +84,7 @@ struct QIOChannel {
|
||||
AioContext *ctx;
|
||||
Coroutine *read_coroutine;
|
||||
Coroutine *write_coroutine;
|
||||
+ bool favor_qemu_aio_ctx;
|
||||
#ifdef _WIN32
|
||||
HANDLE event; /* For use with GSource on Win32 */
|
||||
#endif
|
||||
@@ -498,6 +499,21 @@ int qio_channel_set_blocking(QIOChannel *ioc,
|
||||
bool enabled,
|
||||
Error **errp);
|
||||
|
||||
+/**
|
||||
+ * qio_channel_set_favor_qemu_aio_ctx:
|
||||
+ * @ioc: the channel object
|
||||
+ * @enabled: whether to fall back to qemu_aio_context
|
||||
+ *
|
||||
+ * If @enabled is true, calls to qio_channel_yield() with no AioContext
|
||||
+ * set use the qemu_aio_context instead of the global iohandler context.
|
||||
+ *
|
||||
+ * If @enabled is false, calls to qio_channel_yield() use the global iohandler
|
||||
+ * AioContext. This is may be used by coroutines that run in the main loop and
|
||||
+ * do not wish to respond to I/O during nested event loops. This is the
|
||||
+ * default for compatibility with code that is not aware of AioContexts.
|
||||
+ */
|
||||
+void qio_channel_set_favor_qemu_aio_ctx(QIOChannel *ioc, bool enabled);
|
||||
+
|
||||
/**
|
||||
* qio_channel_close:
|
||||
* @ioc: the channel object
|
||||
diff --git a/io/channel.c b/io/channel.c
|
||||
index a8c7f11649..74704d0464 100644
|
||||
--- a/io/channel.c
|
||||
+++ b/io/channel.c
|
||||
@@ -364,6 +364,12 @@ int qio_channel_set_blocking(QIOChannel *ioc,
|
||||
}
|
||||
|
||||
|
||||
+void qio_channel_set_favor_qemu_aio_ctx(QIOChannel *ioc, bool enabled)
|
||||
+{
|
||||
+ ioc->favor_qemu_aio_ctx = enabled;
|
||||
+}
|
||||
+
|
||||
+
|
||||
int qio_channel_close(QIOChannel *ioc,
|
||||
Error **errp)
|
||||
{
|
||||
@@ -545,7 +551,13 @@ static void qio_channel_set_aio_fd_handlers(QIOChannel *ioc)
|
||||
wr_handler = qio_channel_restart_write;
|
||||
}
|
||||
|
||||
- ctx = ioc->ctx ? ioc->ctx : iohandler_get_aio_context();
|
||||
+ if (ioc->ctx) {
|
||||
+ ctx = ioc->ctx;
|
||||
+ } else if (ioc->favor_qemu_aio_ctx) {
|
||||
+ ctx = qemu_get_aio_context();
|
||||
+ } else {
|
||||
+ ctx = iohandler_get_aio_context();
|
||||
+ }
|
||||
qio_channel_set_aio_fd_handler(ioc, ctx, rd_handler, wr_handler, ioc);
|
||||
}
|
||||
|
||||
diff --git a/nbd/server.c b/nbd/server.c
|
||||
index 1265068f70..41a2003300 100644
|
||||
--- a/nbd/server.c
|
||||
+++ b/nbd/server.c
|
||||
@@ -758,6 +758,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
+ qio_channel_set_favor_qemu_aio_ctx(QIO_CHANNEL(tioc), true);
|
||||
qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
|
||||
trace_nbd_negotiate_handle_starttls_handshake();
|
||||
data.loop = g_main_loop_new(g_main_context_default(), FALSE);
|
||||
@@ -1333,6 +1334,7 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
|
||||
*/
|
||||
|
||||
qio_channel_set_blocking(client->ioc, false, NULL);
|
||||
+ qio_channel_set_favor_qemu_aio_ctx(client->ioc, true);
|
||||
|
||||
trace_nbd_negotiate_begin();
|
||||
memcpy(buf, "NBDMAGIC", 8);
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,174 +0,0 @@
|
||||
From 0d204cb81aec2b13254a0bd53938f53bfea81cb5 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Wed, 7 Aug 2024 08:50:01 -0500
|
||||
Subject: [PATCH 1/5] nbd/server: Plumb in new args to nbd_client_add()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 388: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-8.10.z]
|
||||
RH-Jira: RHEL-52611
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Richard W.M. Jones <rjones@redhat.com>
|
||||
RH-Commit: [1/4] 292be8dd2df2a840b2200e31a27e9d17fdab91ad (ebblake/qemu-kvm)
|
||||
|
||||
Upcoming patches to fix a CVE need to track an opaque pointer passed
|
||||
in by the owner of a client object, as well as request for a time
|
||||
limit on how fast negotiation must complete. Prepare for that by
|
||||
changing the signature of nbd_client_new() and adding an accessor to
|
||||
get at the opaque pointer, although for now the two servers
|
||||
(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though
|
||||
they pass in a new default timeout value.
|
||||
|
||||
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-ID: <20240807174943.771624-11-eblake@redhat.com>
|
||||
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
||||
[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan]
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
|
||||
(cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac)
|
||||
Jira: https://issues.redhat.com/browse/RHEL-52611
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
---
|
||||
blockdev-nbd.c | 6 ++++--
|
||||
include/block/nbd.h | 11 ++++++++++-
|
||||
nbd/server.c | 20 +++++++++++++++++---
|
||||
qemu-nbd.c | 4 +++-
|
||||
4 files changed, 34 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
|
||||
index bdfa7ed3a5..b9e8dc78f3 100644
|
||||
--- a/blockdev-nbd.c
|
||||
+++ b/blockdev-nbd.c
|
||||
@@ -59,8 +59,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
||||
nbd_update_server_watch(nbd_server);
|
||||
|
||||
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
|
||||
- nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz,
|
||||
- nbd_blockdev_client_closed);
|
||||
+ /* TODO - expose handshake timeout as QMP option */
|
||||
+ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
|
||||
+ nbd_server->tlscreds, nbd_server->tlsauthz,
|
||||
+ nbd_blockdev_client_closed, NULL);
|
||||
}
|
||||
|
||||
static void nbd_update_server_watch(NBDServerData *s)
|
||||
diff --git a/include/block/nbd.h b/include/block/nbd.h
|
||||
index 78d101b774..b71a297249 100644
|
||||
--- a/include/block/nbd.h
|
||||
+++ b/include/block/nbd.h
|
||||
@@ -27,6 +27,12 @@
|
||||
|
||||
extern const BlockExportDriver blk_exp_nbd;
|
||||
|
||||
+/*
|
||||
+ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must
|
||||
+ * succeed at NBD_OPT_GO before being forcefully dropped as too slow.
|
||||
+ */
|
||||
+#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
|
||||
+
|
||||
/* Handshake phase structs - this struct is passed on the wire */
|
||||
|
||||
struct NBDOption {
|
||||
@@ -338,9 +344,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp);
|
||||
NBDExport *nbd_export_find(const char *name);
|
||||
|
||||
void nbd_client_new(QIOChannelSocket *sioc,
|
||||
+ uint32_t handshake_max_secs,
|
||||
QCryptoTLSCreds *tlscreds,
|
||||
const char *tlsauthz,
|
||||
- void (*close_fn)(NBDClient *, bool));
|
||||
+ void (*close_fn)(NBDClient *, bool),
|
||||
+ void *owner);
|
||||
+void *nbd_client_owner(NBDClient *client);
|
||||
void nbd_client_get(NBDClient *client);
|
||||
void nbd_client_put(NBDClient *client);
|
||||
|
||||
diff --git a/nbd/server.c b/nbd/server.c
|
||||
index 6db124cf53..cc1b6838bf 100644
|
||||
--- a/nbd/server.c
|
||||
+++ b/nbd/server.c
|
||||
@@ -120,10 +120,12 @@ typedef struct NBDExportMetaContexts {
|
||||
struct NBDClient {
|
||||
int refcount;
|
||||
void (*close_fn)(NBDClient *client, bool negotiated);
|
||||
+ void *owner;
|
||||
|
||||
NBDExport *exp;
|
||||
QCryptoTLSCreds *tlscreds;
|
||||
char *tlsauthz;
|
||||
+ uint32_t handshake_max_secs;
|
||||
QIOChannelSocket *sioc; /* The underlying data channel */
|
||||
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
|
||||
|
||||
@@ -2706,6 +2708,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
|
||||
|
||||
qemu_co_mutex_init(&client->send_lock);
|
||||
|
||||
+ /* TODO - utilize client->handshake_max_secs */
|
||||
if (nbd_negotiate(client, &local_err)) {
|
||||
if (local_err) {
|
||||
error_report_err(local_err);
|
||||
@@ -2718,14 +2721,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
|
||||
}
|
||||
|
||||
/*
|
||||
- * Create a new client listener using the given channel @sioc.
|
||||
+ * Create a new client listener using the given channel @sioc and @owner.
|
||||
* Begin servicing it in a coroutine. When the connection closes, call
|
||||
- * @close_fn with an indication of whether the client completed negotiation.
|
||||
+ * @close_fn with an indication of whether the client completed negotiation
|
||||
+ * within @handshake_max_secs seconds (0 for unbounded).
|
||||
*/
|
||||
void nbd_client_new(QIOChannelSocket *sioc,
|
||||
+ uint32_t handshake_max_secs,
|
||||
QCryptoTLSCreds *tlscreds,
|
||||
const char *tlsauthz,
|
||||
- void (*close_fn)(NBDClient *, bool))
|
||||
+ void (*close_fn)(NBDClient *, bool),
|
||||
+ void *owner)
|
||||
{
|
||||
NBDClient *client;
|
||||
Coroutine *co;
|
||||
@@ -2737,13 +2743,21 @@ void nbd_client_new(QIOChannelSocket *sioc,
|
||||
object_ref(OBJECT(client->tlscreds));
|
||||
}
|
||||
client->tlsauthz = g_strdup(tlsauthz);
|
||||
+ client->handshake_max_secs = handshake_max_secs;
|
||||
client->sioc = sioc;
|
||||
qio_channel_set_delay(QIO_CHANNEL(sioc), false);
|
||||
object_ref(OBJECT(client->sioc));
|
||||
client->ioc = QIO_CHANNEL(sioc);
|
||||
object_ref(OBJECT(client->ioc));
|
||||
client->close_fn = close_fn;
|
||||
+ client->owner = owner;
|
||||
|
||||
co = qemu_coroutine_create(nbd_co_client_start, client);
|
||||
qemu_coroutine_enter(co);
|
||||
}
|
||||
+
|
||||
+void *
|
||||
+nbd_client_owner(NBDClient *client)
|
||||
+{
|
||||
+ return client->owner;
|
||||
+}
|
||||
diff --git a/qemu-nbd.c b/qemu-nbd.c
|
||||
index c6c20df68a..f48abf379e 100644
|
||||
--- a/qemu-nbd.c
|
||||
+++ b/qemu-nbd.c
|
||||
@@ -363,7 +363,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
|
||||
|
||||
nb_fds++;
|
||||
nbd_update_server_watch();
|
||||
- nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed);
|
||||
+ /* TODO - expose handshake timeout as command line option */
|
||||
+ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
|
||||
+ tlscreds, tlsauthz, nbd_client_closed, NULL);
|
||||
}
|
||||
|
||||
static void nbd_update_server_watch(void)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
From 17c5524ada3f2ca9a9c645f540bedc5575302059 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Blake <eblake@redhat.com>
|
||||
Date: Mon, 3 Apr 2023 19:40:47 -0500
|
||||
Subject: [PATCH 5/5] nbd/server: Request TCP_NODELAY
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 274: nbd: improve TLS performance of NBD server
|
||||
RH-Bugzilla: 2035712
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [2/2] 092145077756cda2a4f849c5911031b0fc4a2134 (ebblake/qemu-kvm)
|
||||
|
||||
Nagle's algorithm adds latency in order to reduce network packet
|
||||
overhead on small packets. But when we are already using corking to
|
||||
merge smaller packets into transactional requests, the extra delay
|
||||
from TCP defaults just gets in the way (see recent commit bd2cd4a4).
|
||||
|
||||
For reference, qemu as an NBD client already requests TCP_NODELAY (see
|
||||
nbd_connect() in nbd/client-connection.c); as does libnbd as a client
|
||||
[1], and nbdkit as a server [2]. Furthermore, the NBD spec recommends
|
||||
the use of TCP_NODELAY [3].
|
||||
|
||||
[1] https://gitlab.com/nbdkit/libnbd/-/blob/a48a1142/generator/states-connect.c#L39
|
||||
[2] https://gitlab.com/nbdkit/nbdkit/-/blob/45b72f5b/server/sockets.c#L430
|
||||
[3] https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md#protocol-phases
|
||||
|
||||
CC: Florian Westphal <fw@strlen.de>
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
Message-Id: <20230404004047.142086-1-eblake@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit f1426881a827a6d3f31b65616c4a8db1e9e7c45e)
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
---
|
||||
nbd/server.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/nbd/server.c b/nbd/server.c
|
||||
index a5edc7f681..6db124cf53 100644
|
||||
--- a/nbd/server.c
|
||||
+++ b/nbd/server.c
|
||||
@@ -2738,6 +2738,7 @@ void nbd_client_new(QIOChannelSocket *sioc,
|
||||
}
|
||||
client->tlsauthz = g_strdup(tlsauthz);
|
||||
client->sioc = sioc;
|
||||
+ qio_channel_set_delay(QIO_CHANNEL(sioc), false);
|
||||
object_ref(OBJECT(client->sioc));
|
||||
client->ioc = QIO_CHANNEL(sioc);
|
||||
object_ref(OBJECT(client->ioc));
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,72 +0,0 @@
|
||||
From 170872370c6f3c916e741eb32d80431995d7a870 Mon Sep 17 00:00:00 2001
|
||||
From: Florian Westphal <fw@strlen.de>
|
||||
Date: Fri, 24 Mar 2023 11:47:20 +0100
|
||||
Subject: [PATCH 4/5] nbd/server: push pending frames after sending reply
|
||||
|
||||
RH-Author: Eric Blake <eblake@redhat.com>
|
||||
RH-MergeRequest: 274: nbd: improve TLS performance of NBD server
|
||||
RH-Bugzilla: 2035712
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [1/2] ab92c06c48810aa40380de0433dcac4c6e4be9a5 (ebblake/qemu-kvm)
|
||||
|
||||
qemu-nbd doesn't set TCP_NODELAY on the tcp socket.
|
||||
|
||||
Kernel waits for more data and avoids transmission of small packets.
|
||||
Without TLS this is barely noticeable, but with TLS this really shows.
|
||||
|
||||
Booting a VM via qemu-nbd on localhost (with tls) takes more than
|
||||
2 minutes on my system. tcpdump shows frequent wait periods, where no
|
||||
packets get sent for a 40ms period.
|
||||
|
||||
Add explicit (un)corking when processing (and responding to) requests.
|
||||
"TCP_CORK, &zero" after earlier "CORK, &one" will flush pending data.
|
||||
|
||||
VM Boot time:
|
||||
main: no tls: 23s, with tls: 2m45s
|
||||
patched: no tls: 14s, with tls: 15s
|
||||
|
||||
VM Boot time, qemu-nbd via network (same lan):
|
||||
main: no tls: 18s, with tls: 1m50s
|
||||
patched: no tls: 17s, with tls: 18s
|
||||
|
||||
Future optimization: if we could detect if there is another pending
|
||||
request we could defer the uncork operation because more data would be
|
||||
appended.
|
||||
|
||||
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||
Message-Id: <20230324104720.2498-1-fw@strlen.de>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit bd2cd4a441ded163b62371790876f28a9b834317)
|
||||
Signed-off-by: Eric Blake <eblake@redhat.com>
|
||||
---
|
||||
nbd/server.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/nbd/server.c b/nbd/server.c
|
||||
index 4630dd7322..a5edc7f681 100644
|
||||
--- a/nbd/server.c
|
||||
+++ b/nbd/server.c
|
||||
@@ -2647,6 +2647,8 @@ static coroutine_fn void nbd_trip(void *opaque)
|
||||
goto disconnect;
|
||||
}
|
||||
|
||||
+ qio_channel_set_cork(client->ioc, true);
|
||||
+
|
||||
if (ret < 0) {
|
||||
/* It wans't -EIO, so, according to nbd_co_receive_request()
|
||||
* semantics, we should return the error to the client. */
|
||||
@@ -2672,6 +2674,7 @@ static coroutine_fn void nbd_trip(void *opaque)
|
||||
goto disconnect;
|
||||
}
|
||||
|
||||
+ qio_channel_set_cork(client->ioc, false);
|
||||
done:
|
||||
nbd_request_put(req);
|
||||
nbd_client_put(client);
|
||||
--
|
||||
2.39.1
|
||||
|
@ -1,611 +0,0 @@
|
||||
From 2ae925a6d55a77627be8d1146f2b9ed139dbdb77 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Thu, 23 Nov 2023 11:30:46 -0500
|
||||
Subject: [PATCH 1/4] net: Provide MemReentrancyGuard * to qemu_new_nic()
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 331: net: Provide MemReentrancyGuard * to qemu_new_nic()
|
||||
RH-Jira: RHEL-7309
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
|
||||
RH-Acked-by: Jason Wang <jasowang@redhat.com>
|
||||
RH-Commit: [1/2] bc963fb349b90288f547de97a5cbe9a74f856419 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-7309
|
||||
CVE: CVE-2023-3019
|
||||
Upstream: Merged
|
||||
Conflicts: hw/net/hw/net/xen_nic.c seems to have undergone significant changes upstream,
|
||||
so the change had to be manually adapted to the old code.
|
||||
|
||||
commit 7d0fefdf81f5973334c344f6b8e1896c309dff66
|
||||
Author: Akihiko Odaki <akihiko.odaki@daynix.com>
|
||||
Date: Thu Jun 1 12:18:58 2023 +0900
|
||||
|
||||
net: Provide MemReentrancyGuard * to qemu_new_nic()
|
||||
|
||||
Recently MemReentrancyGuard was added to DeviceState to record that the
|
||||
device is engaging in I/O. The network device backend needs to update it
|
||||
when delivering a packet to a device.
|
||||
|
||||
In preparation for such a change, add MemReentrancyGuard * as a
|
||||
parameter of qemu_new_nic().
|
||||
|
||||
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
|
||||
Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/net/allwinner-sun8i-emac.c | 3 ++-
|
||||
hw/net/allwinner_emac.c | 3 ++-
|
||||
hw/net/cadence_gem.c | 3 ++-
|
||||
hw/net/dp8393x.c | 3 ++-
|
||||
hw/net/e1000.c | 3 ++-
|
||||
hw/net/e1000e.c | 2 +-
|
||||
hw/net/eepro100.c | 4 +++-
|
||||
hw/net/etraxfs_eth.c | 3 ++-
|
||||
hw/net/fsl_etsec/etsec.c | 3 ++-
|
||||
hw/net/ftgmac100.c | 3 ++-
|
||||
hw/net/i82596.c | 2 +-
|
||||
hw/net/imx_fec.c | 2 +-
|
||||
hw/net/lan9118.c | 3 ++-
|
||||
hw/net/mcf_fec.c | 3 ++-
|
||||
hw/net/mipsnet.c | 3 ++-
|
||||
hw/net/msf2-emac.c | 3 ++-
|
||||
hw/net/ne2000-isa.c | 3 ++-
|
||||
hw/net/ne2000-pci.c | 3 ++-
|
||||
hw/net/npcm7xx_emc.c | 3 ++-
|
||||
hw/net/opencores_eth.c | 3 ++-
|
||||
hw/net/pcnet.c | 3 ++-
|
||||
hw/net/rocker/rocker_fp.c | 4 ++--
|
||||
hw/net/rtl8139.c | 3 ++-
|
||||
hw/net/smc91c111.c | 3 ++-
|
||||
hw/net/spapr_llan.c | 3 ++-
|
||||
hw/net/stellaris_enet.c | 3 ++-
|
||||
hw/net/sungem.c | 2 +-
|
||||
hw/net/sunhme.c | 3 ++-
|
||||
hw/net/tulip.c | 3 ++-
|
||||
hw/net/virtio-net.c | 6 ++++--
|
||||
hw/net/vmxnet3.c | 2 +-
|
||||
hw/net/xen_nic.c | 3 ++-
|
||||
hw/net/xgmac.c | 3 ++-
|
||||
hw/net/xilinx_axienet.c | 3 ++-
|
||||
hw/net/xilinx_ethlite.c | 3 ++-
|
||||
hw/usb/dev-network.c | 3 ++-
|
||||
include/net/net.h | 1 +
|
||||
net/net.c | 1 +
|
||||
38 files changed, 72 insertions(+), 38 deletions(-)
|
||||
|
||||
diff --git a/hw/net/allwinner-sun8i-emac.c b/hw/net/allwinner-sun8i-emac.c
|
||||
index ff611f18fb..9d0885ee15 100644
|
||||
--- a/hw/net/allwinner-sun8i-emac.c
|
||||
+++ b/hw/net/allwinner-sun8i-emac.c
|
||||
@@ -810,7 +810,8 @@ static void allwinner_sun8i_emac_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_allwinner_sun8i_emac_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/allwinner_emac.c b/hw/net/allwinner_emac.c
|
||||
index ddddf35c45..b3d73143bf 100644
|
||||
--- a/hw/net/allwinner_emac.c
|
||||
+++ b/hw/net/allwinner_emac.c
|
||||
@@ -453,7 +453,8 @@ static void aw_emac_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_aw_emac_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
|
||||
fifo8_create(&s->rx_fifo, RX_FIFO_SIZE);
|
||||
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
|
||||
index 24b3a0ff66..cb61a76417 100644
|
||||
--- a/hw/net/cadence_gem.c
|
||||
+++ b/hw/net/cadence_gem.c
|
||||
@@ -1633,7 +1633,8 @@ static void gem_realize(DeviceState *dev, Error **errp)
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
|
||||
s->nic = qemu_new_nic(&net_gem_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
|
||||
if (s->jumbo_max_len > MAX_FRAME_SIZE) {
|
||||
error_setg(errp, "jumbo-max-len is greater than %d",
|
||||
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
|
||||
index 45b954e46c..abfcc6f69f 100644
|
||||
--- a/hw/net/dp8393x.c
|
||||
+++ b/hw/net/dp8393x.c
|
||||
@@ -943,7 +943,8 @@ static void dp8393x_realize(DeviceState *dev, Error **errp)
|
||||
"dp8393x-regs", SONIC_REG_COUNT << s->it_shift);
|
||||
|
||||
s->nic = qemu_new_nic(&net_dp83932_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
|
||||
s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s);
|
||||
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
|
||||
index 282d01e374..86da1ae39e 100644
|
||||
--- a/hw/net/e1000.c
|
||||
+++ b/hw/net/e1000.c
|
||||
@@ -1733,7 +1733,8 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
|
||||
macaddr);
|
||||
|
||||
d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
|
||||
- object_get_typename(OBJECT(d)), dev->id, d);
|
||||
+ object_get_typename(OBJECT(d)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, d);
|
||||
|
||||
qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
|
||||
|
||||
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
|
||||
index d35bc1f0b0..c6096fa848 100644
|
||||
--- a/hw/net/e1000e.c
|
||||
+++ b/hw/net/e1000e.c
|
||||
@@ -340,7 +340,7 @@ e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr)
|
||||
int i;
|
||||
|
||||
s->nic = qemu_new_nic(&net_e1000e_info, &s->conf,
|
||||
- object_get_typename(OBJECT(s)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(s)), dev->id, &dev->mem_reentrancy_guard, s);
|
||||
|
||||
s->core.max_queue_num = s->conf.peers.queues ? s->conf.peers.queues - 1 : 0;
|
||||
|
||||
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
|
||||
index 16e95ef9cc..16ca4dda04 100644
|
||||
--- a/hw/net/eepro100.c
|
||||
+++ b/hw/net/eepro100.c
|
||||
@@ -1865,7 +1865,9 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp)
|
||||
nic_reset(s);
|
||||
|
||||
s->nic = qemu_new_nic(&net_eepro100_info, &s->conf,
|
||||
- object_get_typename(OBJECT(pci_dev)), pci_dev->qdev.id, s);
|
||||
+ object_get_typename(OBJECT(pci_dev)),
|
||||
+ pci_dev->qdev.id,
|
||||
+ &pci_dev->qdev.mem_reentrancy_guard, s);
|
||||
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
TRACE(OTHER, logout("%s\n", qemu_get_queue(s->nic)->info_str));
|
||||
diff --git a/hw/net/etraxfs_eth.c b/hw/net/etraxfs_eth.c
|
||||
index 1b82aec794..ba57a978d1 100644
|
||||
--- a/hw/net/etraxfs_eth.c
|
||||
+++ b/hw/net/etraxfs_eth.c
|
||||
@@ -618,7 +618,8 @@ static void etraxfs_eth_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_etraxfs_info, &s->conf,
|
||||
- object_get_typename(OBJECT(s)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(s)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
|
||||
s->phy.read = tdk_read;
|
||||
diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c
|
||||
index bd9d62b559..f790613b52 100644
|
||||
--- a/hw/net/fsl_etsec/etsec.c
|
||||
+++ b/hw/net/fsl_etsec/etsec.c
|
||||
@@ -391,7 +391,8 @@ static void etsec_realize(DeviceState *dev, Error **errp)
|
||||
eTSEC *etsec = ETSEC_COMMON(dev);
|
||||
|
||||
etsec->nic = qemu_new_nic(&net_etsec_info, &etsec->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, etsec);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, etsec);
|
||||
qemu_format_nic_info_str(qemu_get_queue(etsec->nic), etsec->conf.macaddr.a);
|
||||
|
||||
etsec->ptimer = ptimer_init(etsec_timer_hit, etsec, PTIMER_POLICY_DEFAULT);
|
||||
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
|
||||
index 25685ba3a9..781e7f352e 100644
|
||||
--- a/hw/net/ftgmac100.c
|
||||
+++ b/hw/net/ftgmac100.c
|
||||
@@ -1111,7 +1111,8 @@ static void ftgmac100_realize(DeviceState *dev, Error **errp)
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
|
||||
s->nic = qemu_new_nic(&net_ftgmac100_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/i82596.c b/hw/net/i82596.c
|
||||
index ec21e2699a..dc64246f75 100644
|
||||
--- a/hw/net/i82596.c
|
||||
+++ b/hw/net/i82596.c
|
||||
@@ -743,7 +743,7 @@ void i82596_common_init(DeviceState *dev, I82596State *s, NetClientInfo *info)
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
}
|
||||
s->nic = qemu_new_nic(info, &s->conf, object_get_typename(OBJECT(dev)),
|
||||
- dev->id, s);
|
||||
+ dev->id, &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
|
||||
if (USE_TIMER) {
|
||||
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
|
||||
index 9c7035bc94..ed19ee9350 100644
|
||||
--- a/hw/net/imx_fec.c
|
||||
+++ b/hw/net/imx_fec.c
|
||||
@@ -1310,7 +1310,7 @@ static void imx_eth_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
s->nic = qemu_new_nic(&imx_eth_net_info, &s->conf,
|
||||
object_get_typename(OBJECT(dev)),
|
||||
- dev->id, s);
|
||||
+ dev->id, &dev->mem_reentrancy_guard, s);
|
||||
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
}
|
||||
diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c
|
||||
index 6aff424cbe..942bce9ae6 100644
|
||||
--- a/hw/net/lan9118.c
|
||||
+++ b/hw/net/lan9118.c
|
||||
@@ -1354,7 +1354,8 @@ static void lan9118_realize(DeviceState *dev, Error **errp)
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
|
||||
s->nic = qemu_new_nic(&net_lan9118_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
s->eeprom[0] = 0xa5;
|
||||
for (i = 0; i < 6; i++) {
|
||||
diff --git a/hw/net/mcf_fec.c b/hw/net/mcf_fec.c
|
||||
index 25e3e453ab..a6be7bf413 100644
|
||||
--- a/hw/net/mcf_fec.c
|
||||
+++ b/hw/net/mcf_fec.c
|
||||
@@ -643,7 +643,8 @@ static void mcf_fec_realize(DeviceState *dev, Error **errp)
|
||||
mcf_fec_state *s = MCF_FEC_NET(dev);
|
||||
|
||||
s->nic = qemu_new_nic(&net_mcf_fec_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/mipsnet.c b/hw/net/mipsnet.c
|
||||
index 2ade72dea0..8e925de867 100644
|
||||
--- a/hw/net/mipsnet.c
|
||||
+++ b/hw/net/mipsnet.c
|
||||
@@ -255,7 +255,8 @@ static void mipsnet_realize(DeviceState *dev, Error **errp)
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
|
||||
s->nic = qemu_new_nic(&net_mipsnet_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/msf2-emac.c b/hw/net/msf2-emac.c
|
||||
index 9278fdce0b..1efa3dbf01 100644
|
||||
--- a/hw/net/msf2-emac.c
|
||||
+++ b/hw/net/msf2-emac.c
|
||||
@@ -527,7 +527,8 @@ static void msf2_emac_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_msf2_emac_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/ne2000-isa.c b/hw/net/ne2000-isa.c
|
||||
index dd6f6e34d3..30bd20c293 100644
|
||||
--- a/hw/net/ne2000-isa.c
|
||||
+++ b/hw/net/ne2000-isa.c
|
||||
@@ -74,7 +74,8 @@ static void isa_ne2000_realizefn(DeviceState *dev, Error **errp)
|
||||
ne2000_reset(s);
|
||||
|
||||
s->nic = qemu_new_nic(&net_ne2000_isa_info, &s->c,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->c.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/ne2000-pci.c b/hw/net/ne2000-pci.c
|
||||
index 9e5d10859a..4f8a699081 100644
|
||||
--- a/hw/net/ne2000-pci.c
|
||||
+++ b/hw/net/ne2000-pci.c
|
||||
@@ -71,7 +71,8 @@ static void pci_ne2000_realize(PCIDevice *pci_dev, Error **errp)
|
||||
|
||||
s->nic = qemu_new_nic(&net_ne2000_info, &s->c,
|
||||
object_get_typename(OBJECT(pci_dev)),
|
||||
- pci_dev->qdev.id, s);
|
||||
+ pci_dev->qdev.id,
|
||||
+ &pci_dev->qdev.mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->c.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/npcm7xx_emc.c b/hw/net/npcm7xx_emc.c
|
||||
index 7c892f820f..dd1d0ad3bc 100644
|
||||
--- a/hw/net/npcm7xx_emc.c
|
||||
+++ b/hw/net/npcm7xx_emc.c
|
||||
@@ -802,7 +802,8 @@ static void npcm7xx_emc_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
qemu_macaddr_default_if_unset(&emc->conf.macaddr);
|
||||
emc->nic = qemu_new_nic(&net_npcm7xx_emc_info, &emc->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, emc);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, emc);
|
||||
qemu_format_nic_info_str(qemu_get_queue(emc->nic), emc->conf.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/opencores_eth.c b/hw/net/opencores_eth.c
|
||||
index 0b3dc3146e..f96d6ea2cc 100644
|
||||
--- a/hw/net/opencores_eth.c
|
||||
+++ b/hw/net/opencores_eth.c
|
||||
@@ -732,7 +732,8 @@ static void sysbus_open_eth_realize(DeviceState *dev, Error **errp)
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
|
||||
s->nic = qemu_new_nic(&net_open_eth_info, &s->conf,
|
||||
- object_get_typename(OBJECT(s)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(s)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
}
|
||||
|
||||
static void qdev_open_eth_reset(DeviceState *dev)
|
||||
diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
|
||||
index dcd3fc4948..da910a70bf 100644
|
||||
--- a/hw/net/pcnet.c
|
||||
+++ b/hw/net/pcnet.c
|
||||
@@ -1718,7 +1718,8 @@ void pcnet_common_init(DeviceState *dev, PCNetState *s, NetClientInfo *info)
|
||||
s->poll_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pcnet_poll_timer, s);
|
||||
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
- s->nic = qemu_new_nic(info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ s->nic = qemu_new_nic(info, &s->conf, object_get_typename(OBJECT(dev)),
|
||||
+ dev->id, &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
|
||||
/* Initialize the PROM */
|
||||
diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c
|
||||
index cbeed65bd5..0d21948ada 100644
|
||||
--- a/hw/net/rocker/rocker_fp.c
|
||||
+++ b/hw/net/rocker/rocker_fp.c
|
||||
@@ -241,8 +241,8 @@ FpPort *fp_port_alloc(Rocker *r, char *sw_name,
|
||||
port->conf.bootindex = -1;
|
||||
port->conf.peers = *peers;
|
||||
|
||||
- port->nic = qemu_new_nic(&fp_port_info, &port->conf,
|
||||
- sw_name, NULL, port);
|
||||
+ port->nic = qemu_new_nic(&fp_port_info, &port->conf, sw_name, NULL,
|
||||
+ &DEVICE(r)->mem_reentrancy_guard, port);
|
||||
qemu_format_nic_info_str(qemu_get_queue(port->nic),
|
||||
port->conf.macaddr.a);
|
||||
|
||||
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
|
||||
index 3ffb9dd22c..a3565c7159 100644
|
||||
--- a/hw/net/rtl8139.c
|
||||
+++ b/hw/net/rtl8139.c
|
||||
@@ -3400,7 +3400,8 @@ static void pci_rtl8139_realize(PCIDevice *dev, Error **errp)
|
||||
s->eeprom.contents[9] = s->conf.macaddr.a[4] | s->conf.macaddr.a[5] << 8;
|
||||
|
||||
s->nic = qemu_new_nic(&net_rtl8139_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), d->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), d->id,
|
||||
+ &d->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
|
||||
s->cplus_txbuffer = NULL;
|
||||
diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c
|
||||
index ad778cd8fc..4eda971ef3 100644
|
||||
--- a/hw/net/smc91c111.c
|
||||
+++ b/hw/net/smc91c111.c
|
||||
@@ -783,7 +783,8 @@ static void smc91c111_realize(DeviceState *dev, Error **errp)
|
||||
sysbus_init_irq(sbd, &s->irq);
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_smc91c111_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
/* ??? Save/restore. */
|
||||
}
|
||||
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
|
||||
index a6876a936d..475d5f3a34 100644
|
||||
--- a/hw/net/spapr_llan.c
|
||||
+++ b/hw/net/spapr_llan.c
|
||||
@@ -325,7 +325,8 @@ static void spapr_vlan_realize(SpaprVioDevice *sdev, Error **errp)
|
||||
memcpy(&dev->perm_mac.a, &dev->nicconf.macaddr.a, sizeof(dev->perm_mac.a));
|
||||
|
||||
dev->nic = qemu_new_nic(&net_spapr_vlan_info, &dev->nicconf,
|
||||
- object_get_typename(OBJECT(sdev)), sdev->qdev.id, dev);
|
||||
+ object_get_typename(OBJECT(sdev)), sdev->qdev.id,
|
||||
+ &sdev->qdev.mem_reentrancy_guard, dev);
|
||||
qemu_format_nic_info_str(qemu_get_queue(dev->nic), dev->nicconf.macaddr.a);
|
||||
|
||||
dev->rxp_timer = timer_new_us(QEMU_CLOCK_VIRTUAL, spapr_vlan_flush_rx_queue,
|
||||
diff --git a/hw/net/stellaris_enet.c b/hw/net/stellaris_enet.c
|
||||
index 8dd60783d8..6768a6912f 100644
|
||||
--- a/hw/net/stellaris_enet.c
|
||||
+++ b/hw/net/stellaris_enet.c
|
||||
@@ -492,7 +492,8 @@ static void stellaris_enet_realize(DeviceState *dev, Error **errp)
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
|
||||
s->nic = qemu_new_nic(&net_stellaris_enet_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/sungem.c b/hw/net/sungem.c
|
||||
index 3684a4d733..c12d44e9dc 100644
|
||||
--- a/hw/net/sungem.c
|
||||
+++ b/hw/net/sungem.c
|
||||
@@ -1361,7 +1361,7 @@ static void sungem_realize(PCIDevice *pci_dev, Error **errp)
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_sungem_info, &s->conf,
|
||||
object_get_typename(OBJECT(dev)),
|
||||
- dev->id, s);
|
||||
+ dev->id, &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic),
|
||||
s->conf.macaddr.a);
|
||||
}
|
||||
diff --git a/hw/net/sunhme.c b/hw/net/sunhme.c
|
||||
index fc34905f87..fa98528d71 100644
|
||||
--- a/hw/net/sunhme.c
|
||||
+++ b/hw/net/sunhme.c
|
||||
@@ -892,7 +892,8 @@ static void sunhme_realize(PCIDevice *pci_dev, Error **errp)
|
||||
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_sunhme_info, &s->conf,
|
||||
- object_get_typename(OBJECT(d)), d->id, s);
|
||||
+ object_get_typename(OBJECT(d)), d->id,
|
||||
+ &d->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/tulip.c b/hw/net/tulip.c
|
||||
index ca69f7ea5e..985c4c14a4 100644
|
||||
--- a/hw/net/tulip.c
|
||||
+++ b/hw/net/tulip.c
|
||||
@@ -981,7 +981,8 @@ static void pci_tulip_realize(PCIDevice *pci_dev, Error **errp)
|
||||
|
||||
s->nic = qemu_new_nic(&net_tulip_info, &s->c,
|
||||
object_get_typename(OBJECT(pci_dev)),
|
||||
- pci_dev->qdev.id, s);
|
||||
+ pci_dev->qdev.id,
|
||||
+ &pci_dev->qdev.mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->c.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
|
||||
index ddaa8fa122..f5f07f8e63 100644
|
||||
--- a/hw/net/virtio-net.c
|
||||
+++ b/hw/net/virtio-net.c
|
||||
@@ -3512,10 +3512,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
|
||||
* Happen when virtio_net_set_netclient_name has been called.
|
||||
*/
|
||||
n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
|
||||
- n->netclient_type, n->netclient_name, n);
|
||||
+ n->netclient_type, n->netclient_name,
|
||||
+ &dev->mem_reentrancy_guard, n);
|
||||
} else {
|
||||
n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, n);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, n);
|
||||
}
|
||||
|
||||
for (i = 0; i < n->max_queue_pairs; i++) {
|
||||
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
|
||||
index f65af4e9ef..d4df039c55 100644
|
||||
--- a/hw/net/vmxnet3.c
|
||||
+++ b/hw/net/vmxnet3.c
|
||||
@@ -2078,7 +2078,7 @@ static void vmxnet3_net_init(VMXNET3State *s)
|
||||
|
||||
s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
|
||||
object_get_typename(OBJECT(s)),
|
||||
- d->id, s);
|
||||
+ d->id, &d->mem_reentrancy_guard, s);
|
||||
|
||||
s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
|
||||
s->tx_sop = true;
|
||||
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
|
||||
index 5c815b4f0c..3d0b7820d3 100644
|
||||
--- a/hw/net/xen_nic.c
|
||||
+++ b/hw/net/xen_nic.c
|
||||
@@ -294,7 +294,8 @@ static int net_init(struct XenLegacyDevice *xendev)
|
||||
}
|
||||
|
||||
netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf,
|
||||
- "xen", NULL, netdev);
|
||||
+ "xen", NULL,
|
||||
+ &xendev->qdev.mem_reentrancy_guard, netdev);
|
||||
|
||||
snprintf(qemu_get_queue(netdev->nic)->info_str,
|
||||
sizeof(qemu_get_queue(netdev->nic)->info_str),
|
||||
diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c
|
||||
index 0ab6ae91aa..1f4f277d84 100644
|
||||
--- a/hw/net/xgmac.c
|
||||
+++ b/hw/net/xgmac.c
|
||||
@@ -402,7 +402,8 @@ static void xgmac_enet_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_xgmac_enet_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
|
||||
s->regs[XGMAC_ADDR_HIGH(0)] = (s->conf.macaddr.a[5] << 8) |
|
||||
diff --git a/hw/net/xilinx_axienet.c b/hw/net/xilinx_axienet.c
|
||||
index 990ff3a1c2..8a34243803 100644
|
||||
--- a/hw/net/xilinx_axienet.c
|
||||
+++ b/hw/net/xilinx_axienet.c
|
||||
@@ -968,7 +968,8 @@ static void xilinx_enet_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_xilinx_enet_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
|
||||
tdk_init(&s->TEMAC.phy);
|
||||
diff --git a/hw/net/xilinx_ethlite.c b/hw/net/xilinx_ethlite.c
|
||||
index 6e09f7e422..80cb869e22 100644
|
||||
--- a/hw/net/xilinx_ethlite.c
|
||||
+++ b/hw/net/xilinx_ethlite.c
|
||||
@@ -235,7 +235,8 @@ static void xilinx_ethlite_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_xilinx_ethlite_info, &s->conf,
|
||||
- object_get_typename(OBJECT(dev)), dev->id, s);
|
||||
+ object_get_typename(OBJECT(dev)), dev->id,
|
||||
+ &dev->mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
}
|
||||
|
||||
diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c
|
||||
index 6c49c16015..ae447a8bc3 100644
|
||||
--- a/hw/usb/dev-network.c
|
||||
+++ b/hw/usb/dev-network.c
|
||||
@@ -1362,7 +1362,8 @@ static void usb_net_realize(USBDevice *dev, Error **errp)
|
||||
|
||||
qemu_macaddr_default_if_unset(&s->conf.macaddr);
|
||||
s->nic = qemu_new_nic(&net_usbnet_info, &s->conf,
|
||||
- object_get_typename(OBJECT(s)), s->dev.qdev.id, s);
|
||||
+ object_get_typename(OBJECT(s)), s->dev.qdev.id,
|
||||
+ &s->dev.qdev.mem_reentrancy_guard, s);
|
||||
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
|
||||
snprintf(s->usbstring_mac, sizeof(s->usbstring_mac),
|
||||
"%02x%02x%02x%02x%02x%02x",
|
||||
diff --git a/include/net/net.h b/include/net/net.h
|
||||
index 523136c7ac..1457b6c014 100644
|
||||
--- a/include/net/net.h
|
||||
+++ b/include/net/net.h
|
||||
@@ -145,6 +145,7 @@ NICState *qemu_new_nic(NetClientInfo *info,
|
||||
NICConf *conf,
|
||||
const char *model,
|
||||
const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard,
|
||||
void *opaque);
|
||||
void qemu_del_nic(NICState *nic);
|
||||
NetClientState *qemu_get_subqueue(NICState *nic, int queue_index);
|
||||
diff --git a/net/net.c b/net/net.c
|
||||
index f0d14dbfc1..669e194c4b 100644
|
||||
--- a/net/net.c
|
||||
+++ b/net/net.c
|
||||
@@ -299,6 +299,7 @@ NICState *qemu_new_nic(NetClientInfo *info,
|
||||
NICConf *conf,
|
||||
const char *model,
|
||||
const char *name,
|
||||
+ MemReentrancyGuard *reentrancy_guard,
|
||||
void *opaque)
|
||||
{
|
||||
NetClientState **peers = conf->peers.ncs;
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,105 +0,0 @@
|
||||
From d58671091daf8c325a6f1cd87737d94b5fb51d12 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Thu, 23 Nov 2023 11:30:46 -0500
|
||||
Subject: [PATCH 2/4] net: Update MemReentrancyGuard for NIC
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 331: net: Provide MemReentrancyGuard * to qemu_new_nic()
|
||||
RH-Jira: RHEL-7309
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
|
||||
RH-Acked-by: Jason Wang <jasowang@redhat.com>
|
||||
RH-Commit: [2/2] b116efe725dd838c2cab9bd2240112f3c6c46d6a (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Jira: https://issues.redhat.com/browse/RHEL-7309
|
||||
CVE: CVE-2023-3019
|
||||
Upstream: Merged
|
||||
|
||||
commit 9050f976e447444ea6ee2ba12c9f77e4b0dc54bc
|
||||
Author: Akihiko Odaki <akihiko.odaki@daynix.com>
|
||||
Date: Thu Jun 1 12:18:59 2023 +0900
|
||||
|
||||
net: Update MemReentrancyGuard for NIC
|
||||
|
||||
Recently MemReentrancyGuard was added to DeviceState to record that the
|
||||
device is engaging in I/O. The network device backend needs to update it
|
||||
when delivering a packet to a device.
|
||||
|
||||
This implementation follows what bottom half does, but it does not add
|
||||
a tracepoint for the case that the network device backend started
|
||||
delivering a packet to a device which is already engaging in I/O. This
|
||||
is because such reentrancy frequently happens for
|
||||
qemu_flush_queued_packets() and is insignificant.
|
||||
|
||||
Fixes: CVE-2023-3019
|
||||
Reported-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
|
||||
Acked-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
include/net/net.h | 1 +
|
||||
net/net.c | 14 ++++++++++++++
|
||||
2 files changed, 15 insertions(+)
|
||||
|
||||
diff --git a/include/net/net.h b/include/net/net.h
|
||||
index 1457b6c014..11d4564ea1 100644
|
||||
--- a/include/net/net.h
|
||||
+++ b/include/net/net.h
|
||||
@@ -112,6 +112,7 @@ struct NetClientState {
|
||||
typedef struct NICState {
|
||||
NetClientState *ncs;
|
||||
NICConf *conf;
|
||||
+ MemReentrancyGuard *reentrancy_guard;
|
||||
void *opaque;
|
||||
bool peer_deleted;
|
||||
} NICState;
|
||||
diff --git a/net/net.c b/net/net.c
|
||||
index 669e194c4b..b3008a52b7 100644
|
||||
--- a/net/net.c
|
||||
+++ b/net/net.c
|
||||
@@ -312,6 +312,7 @@ NICState *qemu_new_nic(NetClientInfo *info,
|
||||
nic = g_malloc0(info->size + sizeof(NetClientState) * queues);
|
||||
nic->ncs = (void *)nic + info->size;
|
||||
nic->conf = conf;
|
||||
+ nic->reentrancy_guard = reentrancy_guard,
|
||||
nic->opaque = opaque;
|
||||
|
||||
for (i = 0; i < queues; i++) {
|
||||
@@ -767,6 +768,7 @@ static ssize_t qemu_deliver_packet_iov(NetClientState *sender,
|
||||
int iovcnt,
|
||||
void *opaque)
|
||||
{
|
||||
+ MemReentrancyGuard *owned_reentrancy_guard;
|
||||
NetClientState *nc = opaque;
|
||||
int ret;
|
||||
|
||||
@@ -779,12 +781,24 @@ static ssize_t qemu_deliver_packet_iov(NetClientState *sender,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+ if (nc->info->type != NET_CLIENT_DRIVER_NIC ||
|
||||
+ qemu_get_nic(nc)->reentrancy_guard->engaged_in_io) {
|
||||
+ owned_reentrancy_guard = NULL;
|
||||
+ } else {
|
||||
+ owned_reentrancy_guard = qemu_get_nic(nc)->reentrancy_guard;
|
||||
+ owned_reentrancy_guard->engaged_in_io = true;
|
||||
+ }
|
||||
+
|
||||
if (nc->info->receive_iov && !(flags & QEMU_NET_PACKET_FLAG_RAW)) {
|
||||
ret = nc->info->receive_iov(nc, iov, iovcnt);
|
||||
} else {
|
||||
ret = nc_sendv_compat(nc, iov, iovcnt, flags);
|
||||
}
|
||||
|
||||
+ if (owned_reentrancy_guard) {
|
||||
+ owned_reentrancy_guard->engaged_in_io = false;
|
||||
+ }
|
||||
+
|
||||
if (ret == 0) {
|
||||
nc->receive_disabled = 1;
|
||||
}
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,376 +0,0 @@
|
||||
From e11cffc152d9af9194139a37f86e357cb36298e8 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Thu, 25 May 2023 12:50:19 +0200
|
||||
Subject: [PATCH 22/22] pc-bios: Add support for List-Directed IPL from ECKD
|
||||
DASD
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 279: Backport latest s390x-related fixes from upstream QEMU for qemu-kvm in RHEL 8.9
|
||||
RH-Bugzilla: 2169308 2209605
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Commit: [21/21] cab945af05566d892459a7c8ea3f114310d6bb67
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2209605
|
||||
|
||||
commit 8af5d141713f5d20c4bc1719eb746ef8b1746bd6
|
||||
Author: Jared Rossi <jrossi@linux.ibm.com>
|
||||
Date: Tue Feb 21 12:45:48 2023 -0500
|
||||
|
||||
pc-bios: Add support for List-Directed IPL from ECKD DASD
|
||||
|
||||
Check for a List Directed IPL Boot Record, which would supersede the CCW type
|
||||
entries. If the record is valid, proceed to use the new style pointers
|
||||
and perform LD-IPL. Each block pointer is interpreted as either an LD-IPL
|
||||
pointer or a legacy CCW pointer depending on the type of IPL initiated.
|
||||
|
||||
In either case CCW- or LD-IPL is transparent to the user and will boot the same
|
||||
image regardless of which set of pointers is used. Because the interactive boot
|
||||
menu is only written with the old style pointers, the menu will be disabled for
|
||||
List Directed IPL from ECKD DASD.
|
||||
|
||||
If the LD-IPL fails, retry the IPL using the CCW type pointers.
|
||||
|
||||
If no LD-IPL boot record is found, simply perform CCW type IPL as usual.
|
||||
|
||||
Signed-off-by: Jared Rossi <jrossi@linux.ibm.com>
|
||||
Message-Id: <20230221174548.1866861-2-jrossi@linux.ibm.com>
|
||||
[thuth: Drop some superfluous parantheses]
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
pc-bios/s390-ccw/bootmap.c | 157 ++++++++++++++++++++++++++++---------
|
||||
pc-bios/s390-ccw/bootmap.h | 30 ++++++-
|
||||
2 files changed, 148 insertions(+), 39 deletions(-)
|
||||
|
||||
diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
|
||||
index 994e59c0b0..a2137449dc 100644
|
||||
--- a/pc-bios/s390-ccw/bootmap.c
|
||||
+++ b/pc-bios/s390-ccw/bootmap.c
|
||||
@@ -72,42 +72,74 @@ static inline void verify_boot_info(BootInfo *bip)
|
||||
"Bad block size in zIPL section of the 1st record.");
|
||||
}
|
||||
|
||||
-static block_number_t eckd_block_num(EckdCHS *chs)
|
||||
+static void eckd_format_chs(ExtEckdBlockPtr *ptr, bool ldipl,
|
||||
+ uint64_t *c,
|
||||
+ uint64_t *h,
|
||||
+ uint64_t *s)
|
||||
+{
|
||||
+ if (ldipl) {
|
||||
+ *c = ptr->ldptr.chs.cylinder;
|
||||
+ *h = ptr->ldptr.chs.head;
|
||||
+ *s = ptr->ldptr.chs.sector;
|
||||
+ } else {
|
||||
+ *c = ptr->bptr.chs.cylinder;
|
||||
+ *h = ptr->bptr.chs.head;
|
||||
+ *s = ptr->bptr.chs.sector;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static block_number_t eckd_chs_to_block(uint64_t c, uint64_t h, uint64_t s)
|
||||
{
|
||||
const uint64_t sectors = virtio_get_sectors();
|
||||
const uint64_t heads = virtio_get_heads();
|
||||
- const uint64_t cylinder = chs->cylinder
|
||||
- + ((chs->head & 0xfff0) << 12);
|
||||
- const uint64_t head = chs->head & 0x000f;
|
||||
+ const uint64_t cylinder = c + ((h & 0xfff0) << 12);
|
||||
+ const uint64_t head = h & 0x000f;
|
||||
const block_number_t block = sectors * heads * cylinder
|
||||
+ sectors * head
|
||||
- + chs->sector
|
||||
- - 1; /* block nr starts with zero */
|
||||
+ + s - 1; /* block nr starts with zero */
|
||||
return block;
|
||||
}
|
||||
|
||||
-static bool eckd_valid_address(BootMapPointer *p)
|
||||
+static block_number_t eckd_block_num(EckdCHS *chs)
|
||||
{
|
||||
- const uint64_t head = p->eckd.chs.head & 0x000f;
|
||||
+ return eckd_chs_to_block(chs->cylinder, chs->head, chs->sector);
|
||||
+}
|
||||
+
|
||||
+static block_number_t gen_eckd_block_num(ExtEckdBlockPtr *ptr, bool ldipl)
|
||||
+{
|
||||
+ uint64_t cyl, head, sec;
|
||||
+ eckd_format_chs(ptr, ldipl, &cyl, &head, &sec);
|
||||
+ return eckd_chs_to_block(cyl, head, sec);
|
||||
+}
|
||||
|
||||
+static bool eckd_valid_chs(uint64_t cyl, uint64_t head, uint64_t sector)
|
||||
+{
|
||||
if (head >= virtio_get_heads()
|
||||
- || p->eckd.chs.sector > virtio_get_sectors()
|
||||
- || p->eckd.chs.sector <= 0) {
|
||||
+ || sector > virtio_get_sectors()
|
||||
+ || sector <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!virtio_guessed_disk_nature() &&
|
||||
- eckd_block_num(&p->eckd.chs) >= virtio_get_blocks()) {
|
||||
+ eckd_chs_to_block(cyl, head, sector) >= virtio_get_blocks()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
-static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
|
||||
+static bool eckd_valid_address(ExtEckdBlockPtr *ptr, bool ldipl)
|
||||
+{
|
||||
+ uint64_t cyl, head, sec;
|
||||
+ eckd_format_chs(ptr, ldipl, &cyl, &head, &sec);
|
||||
+ return eckd_valid_chs(cyl, head, sec);
|
||||
+}
|
||||
+
|
||||
+static block_number_t load_eckd_segments(block_number_t blk, bool ldipl,
|
||||
+ uint64_t *address)
|
||||
{
|
||||
block_number_t block_nr;
|
||||
- int j, rc;
|
||||
+ int j, rc, count;
|
||||
BootMapPointer *bprs = (void *)_bprs;
|
||||
bool more_data;
|
||||
|
||||
@@ -117,7 +149,7 @@ static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
|
||||
do {
|
||||
more_data = false;
|
||||
for (j = 0;; j++) {
|
||||
- block_nr = eckd_block_num(&bprs[j].xeckd.bptr.chs);
|
||||
+ block_nr = gen_eckd_block_num(&bprs[j].xeckd, ldipl);
|
||||
if (is_null_block_number(block_nr)) { /* end of chunk */
|
||||
break;
|
||||
}
|
||||
@@ -129,11 +161,26 @@ static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
|
||||
break;
|
||||
}
|
||||
|
||||
- IPL_assert(block_size_ok(bprs[j].xeckd.bptr.size),
|
||||
+ /* List directed pointer does not store block size */
|
||||
+ IPL_assert(ldipl || block_size_ok(bprs[j].xeckd.bptr.size),
|
||||
"bad chunk block size");
|
||||
- IPL_assert(eckd_valid_address(&bprs[j]), "bad chunk ECKD addr");
|
||||
|
||||
- if ((bprs[j].xeckd.bptr.count == 0) && unused_space(&(bprs[j+1]),
|
||||
+ if (!eckd_valid_address(&bprs[j].xeckd, ldipl)) {
|
||||
+ /*
|
||||
+ * If an invalid address is found during LD-IPL then break and
|
||||
+ * retry as CCW
|
||||
+ */
|
||||
+ IPL_assert(ldipl, "bad chunk ECKD addr");
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (ldipl) {
|
||||
+ count = bprs[j].xeckd.ldptr.count;
|
||||
+ } else {
|
||||
+ count = bprs[j].xeckd.bptr.count;
|
||||
+ }
|
||||
+
|
||||
+ if (count == 0 && unused_space(&bprs[j + 1],
|
||||
sizeof(EckdBlockPtr))) {
|
||||
/* This is a "continue" pointer.
|
||||
* This ptr should be the last one in the current
|
||||
@@ -149,11 +196,10 @@ static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
|
||||
/* Load (count+1) blocks of code at (block_nr)
|
||||
* to memory (address).
|
||||
*/
|
||||
- rc = virtio_read_many(block_nr, (void *)(*address),
|
||||
- bprs[j].xeckd.bptr.count+1);
|
||||
+ rc = virtio_read_many(block_nr, (void *)(*address), count + 1);
|
||||
IPL_assert(rc == 0, "code chunk read failed");
|
||||
|
||||
- *address += (bprs[j].xeckd.bptr.count+1) * virtio_get_block_size();
|
||||
+ *address += (count + 1) * virtio_get_block_size();
|
||||
}
|
||||
} while (more_data);
|
||||
return block_nr;
|
||||
@@ -237,8 +283,10 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr,
|
||||
uint64_t address;
|
||||
BootMapTable *bmt = (void *)sec;
|
||||
BootMapScript *bms = (void *)sec;
|
||||
+ /* The S1B block number is NULL_BLOCK_NR if and only if it's an LD-IPL */
|
||||
+ bool ldipl = (s1b_block_nr == NULL_BLOCK_NR);
|
||||
|
||||
- if (menu_is_enabled_zipl()) {
|
||||
+ if (menu_is_enabled_zipl() && !ldipl) {
|
||||
loadparm = eckd_get_boot_menu_index(s1b_block_nr);
|
||||
}
|
||||
|
||||
@@ -249,7 +297,7 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr,
|
||||
memset(sec, FREE_SPACE_FILLER, sizeof(sec));
|
||||
read_block(bmt_block_nr, sec, "Cannot read Boot Map Table");
|
||||
|
||||
- block_nr = eckd_block_num(&bmt->entry[loadparm].xeckd.bptr.chs);
|
||||
+ block_nr = gen_eckd_block_num(&bmt->entry[loadparm].xeckd, ldipl);
|
||||
IPL_assert(block_nr != -1, "Cannot find Boot Map Table Entry");
|
||||
|
||||
memset(sec, FREE_SPACE_FILLER, sizeof(sec));
|
||||
@@ -264,13 +312,18 @@ static void run_eckd_boot_script(block_number_t bmt_block_nr,
|
||||
}
|
||||
|
||||
address = bms->entry[i].address.load_address;
|
||||
- block_nr = eckd_block_num(&bms->entry[i].blkptr.xeckd.bptr.chs);
|
||||
+ block_nr = gen_eckd_block_num(&bms->entry[i].blkptr.xeckd, ldipl);
|
||||
|
||||
do {
|
||||
- block_nr = load_eckd_segments(block_nr, &address);
|
||||
+ block_nr = load_eckd_segments(block_nr, ldipl, &address);
|
||||
} while (block_nr != -1);
|
||||
}
|
||||
|
||||
+ if (ldipl && bms->entry[i].type != BOOT_SCRIPT_EXEC) {
|
||||
+ /* Abort LD-IPL and retry as CCW-IPL */
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
IPL_assert(bms->entry[i].type == BOOT_SCRIPT_EXEC,
|
||||
"Unknown script entry type");
|
||||
write_reset_psw(bms->entry[i].address.load_address); /* no return */
|
||||
@@ -380,6 +433,23 @@ static void ipl_eckd_ldl(ECKD_IPL_mode_t mode)
|
||||
/* no return */
|
||||
}
|
||||
|
||||
+static block_number_t eckd_find_bmt(ExtEckdBlockPtr *ptr)
|
||||
+{
|
||||
+ block_number_t blockno;
|
||||
+ uint8_t tmp_sec[MAX_SECTOR_SIZE];
|
||||
+ BootRecord *br;
|
||||
+
|
||||
+ blockno = gen_eckd_block_num(ptr, 0);
|
||||
+ read_block(blockno, tmp_sec, "Cannot read boot record");
|
||||
+ br = (BootRecord *)tmp_sec;
|
||||
+ if (!magic_match(br->magic, ZIPL_MAGIC)) {
|
||||
+ /* If the boot record is invalid, return and try CCW-IPL instead */
|
||||
+ return NULL_BLOCK_NR;
|
||||
+ }
|
||||
+
|
||||
+ return gen_eckd_block_num(&br->pgt.xeckd, 1);
|
||||
+}
|
||||
+
|
||||
static void print_eckd_msg(void)
|
||||
{
|
||||
char msg[] = "Using ECKD scheme (block size *****), ";
|
||||
@@ -401,28 +471,43 @@ static void print_eckd_msg(void)
|
||||
|
||||
static void ipl_eckd(void)
|
||||
{
|
||||
- XEckdMbr *mbr = (void *)sec;
|
||||
- LDL_VTOC *vlbl = (void *)sec;
|
||||
+ IplVolumeLabel *vlbl = (void *)sec;
|
||||
+ LDL_VTOC *vtoc = (void *)sec;
|
||||
+ block_number_t ldipl_bmt; /* Boot Map Table for List-Directed IPL */
|
||||
|
||||
print_eckd_msg();
|
||||
|
||||
- /* Grab the MBR again */
|
||||
+ /* Block 2 can contain either the CDL VOL1 label or the LDL VTOC */
|
||||
memset(sec, FREE_SPACE_FILLER, sizeof(sec));
|
||||
- read_block(0, mbr, "Cannot read block 0 on DASD");
|
||||
+ read_block(2, vlbl, "Cannot read block 2");
|
||||
|
||||
- if (magic_match(mbr->magic, IPL1_MAGIC)) {
|
||||
- ipl_eckd_cdl(); /* only returns in case of error */
|
||||
- return;
|
||||
+ /*
|
||||
+ * First check for a list-directed-format pointer which would
|
||||
+ * supersede the CCW pointer.
|
||||
+ */
|
||||
+ if (eckd_valid_address((ExtEckdBlockPtr *)&vlbl->f.br, 0)) {
|
||||
+ ldipl_bmt = eckd_find_bmt((ExtEckdBlockPtr *)&vlbl->f.br);
|
||||
+ if (ldipl_bmt) {
|
||||
+ sclp_print("List-Directed\n");
|
||||
+ /* LD-IPL does not use the S1B bock, just make it NULL */
|
||||
+ run_eckd_boot_script(ldipl_bmt, NULL_BLOCK_NR);
|
||||
+ /* Only return in error, retry as CCW-IPL */
|
||||
+ sclp_print("Retrying IPL ");
|
||||
+ print_eckd_msg();
|
||||
+ }
|
||||
+ memset(sec, FREE_SPACE_FILLER, sizeof(sec));
|
||||
+ read_block(2, vtoc, "Cannot read block 2");
|
||||
}
|
||||
|
||||
- /* LDL/CMS? */
|
||||
- memset(sec, FREE_SPACE_FILLER, sizeof(sec));
|
||||
- read_block(2, vlbl, "Cannot read block 2");
|
||||
+ /* Not list-directed */
|
||||
+ if (magic_match(vtoc->magic, VOL1_MAGIC)) {
|
||||
+ ipl_eckd_cdl(); /* may return in error */
|
||||
+ }
|
||||
|
||||
- if (magic_match(vlbl->magic, CMS1_MAGIC)) {
|
||||
+ if (magic_match(vtoc->magic, CMS1_MAGIC)) {
|
||||
ipl_eckd_ldl(ECKD_CMS); /* no return */
|
||||
}
|
||||
- if (magic_match(vlbl->magic, LNX1_MAGIC)) {
|
||||
+ if (magic_match(vtoc->magic, LNX1_MAGIC)) {
|
||||
ipl_eckd_ldl(ECKD_LDL); /* no return */
|
||||
}
|
||||
|
||||
diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h
|
||||
index 3946aa3f8d..d4690a88c2 100644
|
||||
--- a/pc-bios/s390-ccw/bootmap.h
|
||||
+++ b/pc-bios/s390-ccw/bootmap.h
|
||||
@@ -45,9 +45,23 @@ typedef struct EckdBlockPtr {
|
||||
* it's 0 for TablePtr, ScriptPtr, and SectionPtr */
|
||||
} __attribute__ ((packed)) EckdBlockPtr;
|
||||
|
||||
-typedef struct ExtEckdBlockPtr {
|
||||
+typedef struct LdEckdCHS {
|
||||
+ uint32_t cylinder;
|
||||
+ uint8_t head;
|
||||
+ uint8_t sector;
|
||||
+} __attribute__ ((packed)) LdEckdCHS;
|
||||
+
|
||||
+typedef struct LdEckdBlockPtr {
|
||||
+ LdEckdCHS chs; /* cylinder/head/sector is an address of the block */
|
||||
+ uint8_t reserved[4];
|
||||
+ uint16_t count;
|
||||
+ uint32_t pad;
|
||||
+} __attribute__ ((packed)) LdEckdBlockPtr;
|
||||
+
|
||||
+/* bptr is used for CCW type IPL, while ldptr is for list-directed IPL */
|
||||
+typedef union ExtEckdBlockPtr {
|
||||
EckdBlockPtr bptr;
|
||||
- uint8_t reserved[8];
|
||||
+ LdEckdBlockPtr ldptr;
|
||||
} __attribute__ ((packed)) ExtEckdBlockPtr;
|
||||
|
||||
typedef union BootMapPointer {
|
||||
@@ -57,6 +71,15 @@ typedef union BootMapPointer {
|
||||
ExtEckdBlockPtr xeckd;
|
||||
} __attribute__ ((packed)) BootMapPointer;
|
||||
|
||||
+typedef struct BootRecord {
|
||||
+ uint8_t magic[4];
|
||||
+ uint32_t version;
|
||||
+ uint64_t res1;
|
||||
+ BootMapPointer pgt;
|
||||
+ uint8_t reserved[510 - 32];
|
||||
+ uint16_t os_id;
|
||||
+} __attribute__ ((packed)) BootRecord;
|
||||
+
|
||||
/* aka Program Table */
|
||||
typedef struct BootMapTable {
|
||||
uint8_t magic[4];
|
||||
@@ -292,7 +315,8 @@ typedef struct IplVolumeLabel {
|
||||
struct {
|
||||
unsigned char key[4]; /* == "VOL1" */
|
||||
unsigned char volser[6];
|
||||
- unsigned char reserved[6];
|
||||
+ unsigned char reserved[64];
|
||||
+ EckdCHS br; /* Location of Boot Record for list-directed IPL */
|
||||
} f;
|
||||
};
|
||||
} __attribute__((packed)) IplVolumeLabel;
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
From 01c09f31978154f0d2fd699621ae958a8c3ea2a5 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:15:24 -0500
|
||||
Subject: [PATCH 08/13] physmem: add missing memory barrier
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2168472
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Commit: [8/10] f6a9659f7cf40b78de6e85e4a7c06842273aa770
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
|
||||
|
||||
commit 33828ca11da08436e1b32f3e79dabce3061a0427
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri Mar 3 14:36:32 2023 +0100
|
||||
|
||||
physmem: add missing memory barrier
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
softmmu/physmem.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
|
||||
index 4d0ef5f92f..2b96fad302 100644
|
||||
--- a/softmmu/physmem.c
|
||||
+++ b/softmmu/physmem.c
|
||||
@@ -3087,6 +3087,8 @@ void cpu_register_map_client(QEMUBH *bh)
|
||||
qemu_mutex_lock(&map_client_list_lock);
|
||||
client->bh = bh;
|
||||
QLIST_INSERT_HEAD(&map_client_list, client, link);
|
||||
+ /* Write map_client_list before reading in_use. */
|
||||
+ smp_mb();
|
||||
if (!qatomic_read(&bounce.in_use)) {
|
||||
cpu_notify_map_clients_locked();
|
||||
}
|
||||
@@ -3279,6 +3281,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
|
||||
qemu_vfree(bounce.buffer);
|
||||
bounce.buffer = NULL;
|
||||
memory_region_unref(bounce.mr);
|
||||
+ /* Clear in_use before reading map_client_list. */
|
||||
qatomic_mb_set(&bounce.in_use, false);
|
||||
cpu_notify_map_clients();
|
||||
}
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
From 57ee29fbb08f7b89ee1b7c75b749392c08af3b03 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 15:23:54 -0400
|
||||
Subject: [PATCH 1/5] qapi, i386/sev: Change the reduced-phys-bits value from 5
|
||||
to 1
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 296: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214840
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [1/4] 4137cb3b57cbb175078bc908fb2301ea2b97fd17
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214840
|
||||
|
||||
commit 798a818f50a9bfc01e8b5943090de458863b897b
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:27 2022 -0500
|
||||
|
||||
qapi, i386/sev: Change the reduced-phys-bits value from 5 to 1
|
||||
|
||||
A guest only ever experiences, at most, 1 bit of reduced physical
|
||||
addressing. Change the query-sev-capabilities json comment to use 1.
|
||||
|
||||
Fixes: 31dd67f684 ("sev/i386: qmp: add query-sev-capabilities command")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <cb96d8e09154533af4b4e6988469bc0b32390b65.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
RHEL Notes:
|
||||
Conflicts: Context differences, since commit 811b4ec7f8eb<qapi, target/i386/sev: Add cpu0-id to query-sev-capabilities>
|
||||
is missing
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
qapi/misc-target.json | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/qapi/misc-target.json b/qapi/misc-target.json
|
||||
index 4bc45d2474..ede9052440 100644
|
||||
--- a/qapi/misc-target.json
|
||||
+++ b/qapi/misc-target.json
|
||||
@@ -205,7 +205,7 @@
|
||||
#
|
||||
# -> { "execute": "query-sev-capabilities" }
|
||||
# <- { "return": { "pdh": "8CCDD8DDD", "cert-chain": "888CCCDDDEE",
|
||||
-# "cbitpos": 47, "reduced-phys-bits": 5}}
|
||||
+# "cbitpos": 47, "reduced-phys-bits": 1}}
|
||||
#
|
||||
##
|
||||
{ 'command': 'query-sev-capabilities', 'returns': 'SevCapability',
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,177 +0,0 @@
|
||||
From e7d0e29d1962092af58d0445439671a6e1d91f71 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:10:33 -0500
|
||||
Subject: [PATCH 02/13] qatomic: add smp_mb__before/after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2168472
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Commit: [2/10] 1f87eb3157abcf23f020881cedce42f76497f348
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
|
||||
|
||||
commit ff00bed1897c3d27adc5b0cec6f6eeb5a7d13176
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu Mar 2 11:10:56 2023 +0100
|
||||
|
||||
qatomic: add smp_mb__before/after_rmw()
|
||||
|
||||
On ARM, seqcst loads and stores (which QEMU does not use) are compiled
|
||||
respectively as LDAR and STLR instructions. Even though LDAR is
|
||||
also used for load-acquire operations, it also waits for all STLRs to
|
||||
leave the store buffer. Thus, LDAR and STLR alone are load-acquire
|
||||
and store-release operations, but LDAR also provides store-against-load
|
||||
ordering as long as the previous store is a STLR.
|
||||
|
||||
Compare this to ARMv7, where store-release is DMB+STR and load-acquire
|
||||
is LDR+DMB, but an additional DMB is needed between store-seqcst and
|
||||
load-seqcst (e.g. DMB+STR+DMB+LDR+DMB); or with x86, where MOV provides
|
||||
load-acquire and store-release semantics and the two can be reordered.
|
||||
|
||||
Likewise, on ARM sequentially consistent read-modify-write operations only
|
||||
need to use LDAXR and STLXR respectively for the load and the store, while
|
||||
on x86 they need to use the stronger LOCK prefix.
|
||||
|
||||
In a strange twist of events, however, the _stronger_ semantics
|
||||
of the ARM instructions can end up causing bugs on ARM, not on x86.
|
||||
The problems occur when seqcst atomics are mixed with relaxed atomics.
|
||||
|
||||
QEMU's atomics try to bridge the Linux API (that most of the developers
|
||||
are familiar with) and the C11 API, and the two have a substantial
|
||||
difference:
|
||||
|
||||
- in Linux, strongly-ordered atomics such as atomic_add_return() affect
|
||||
the global ordering of _all_ memory operations, including for example
|
||||
READ_ONCE()/WRITE_ONCE()
|
||||
|
||||
- in C11, sequentially consistent atomics (except for seq-cst fences)
|
||||
only affect the ordering of sequentially consistent operations.
|
||||
In particular, since relaxed loads are done with LDR on ARM, they are
|
||||
not ordered against seqcst stores (which are done with STLR).
|
||||
|
||||
QEMU implements high-level synchronization primitives with the idea that
|
||||
the primitives contain the necessary memory barriers, and the callers can
|
||||
use relaxed atomics (qatomic_read/qatomic_set) or even regular accesses.
|
||||
This is very much incompatible with the C11 view that seqcst accesses
|
||||
are only ordered against other seqcst accesses, and requires using seqcst
|
||||
fences as in the following example:
|
||||
|
||||
qatomic_set(&y, 1); qatomic_set(&x, 1);
|
||||
smp_mb(); smp_mb();
|
||||
... qatomic_read(&x) ... ... qatomic_read(&y) ...
|
||||
|
||||
When a qatomic_*() read-modify write operation is used instead of one
|
||||
or both stores, developers that are more familiar with the Linux API may
|
||||
be tempted to omit the smp_mb(), which will work on x86 but not on ARM.
|
||||
|
||||
This nasty difference between Linux and C11 read-modify-write operations
|
||||
has already caused issues in util/async.c and more are being found.
|
||||
Provide something similar to Linux smp_mb__before/after_atomic(); this
|
||||
has the double function of documenting clearly why there is a memory
|
||||
barrier, and avoiding a double barrier on x86 and s390x systems.
|
||||
|
||||
The new macro can already be put to use in qatomic_mb_set().
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
docs/devel/atomics.rst | 26 +++++++++++++++++++++-----
|
||||
include/qemu/atomic.h | 17 ++++++++++++++++-
|
||||
2 files changed, 37 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst
|
||||
index 52baa0736d..10fbfc58bb 100644
|
||||
--- a/docs/devel/atomics.rst
|
||||
+++ b/docs/devel/atomics.rst
|
||||
@@ -25,7 +25,8 @@ provides macros that fall in three camps:
|
||||
|
||||
- weak atomic access and manual memory barriers: ``qatomic_read()``,
|
||||
``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``,
|
||||
- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``;
|
||||
+ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``,
|
||||
+ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``;
|
||||
|
||||
- sequentially consistent atomic access: everything else.
|
||||
|
||||
@@ -470,7 +471,7 @@ and memory barriers, and the equivalents in QEMU:
|
||||
sequential consistency.
|
||||
|
||||
- in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in
|
||||
- the total ordering enforced by sequentially-consistent operations.
|
||||
+ the ordering enforced by read-modify-write operations.
|
||||
This is because QEMU uses the C11 memory model. The following example
|
||||
is correct in Linux but not in QEMU:
|
||||
|
||||
@@ -486,9 +487,24 @@ and memory barriers, and the equivalents in QEMU:
|
||||
because the read of ``y`` can be moved (by either the processor or the
|
||||
compiler) before the write of ``x``.
|
||||
|
||||
- Fixing this requires an ``smp_mb()`` memory barrier between the write
|
||||
- of ``x`` and the read of ``y``. In the common case where only one thread
|
||||
- writes ``x``, it is also possible to write it like this:
|
||||
+ Fixing this requires a full memory barrier between the write of ``x`` and
|
||||
+ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and
|
||||
+ ``smp_mb__after_rmw()``; they act both as an optimization,
|
||||
+ avoiding the memory barrier on processors where it is unnecessary,
|
||||
+ and as a clarification of this corner case of the C11 memory model:
|
||||
+
|
||||
+ +--------------------------------+
|
||||
+ | QEMU (correct) |
|
||||
+ +================================+
|
||||
+ | :: |
|
||||
+ | |
|
||||
+ | a = qatomic_fetch_add(&x, 2);|
|
||||
+ | smp_mb__after_rmw(); |
|
||||
+ | b = qatomic_read(&y); |
|
||||
+ +--------------------------------+
|
||||
+
|
||||
+ In the common case where only one thread writes ``x``, it is also possible
|
||||
+ to write it like this:
|
||||
|
||||
+--------------------------------+
|
||||
| QEMU (correct) |
|
||||
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
|
||||
index 112a29910b..7855443cab 100644
|
||||
--- a/include/qemu/atomic.h
|
||||
+++ b/include/qemu/atomic.h
|
||||
@@ -243,6 +243,20 @@
|
||||
#define smp_wmb() smp_mb_release()
|
||||
#define smp_rmb() smp_mb_acquire()
|
||||
|
||||
+/*
|
||||
+ * SEQ_CST is weaker than the older __sync_* builtins and Linux
|
||||
+ * kernel read-modify-write atomics. Provide a macro to obtain
|
||||
+ * the same semantics.
|
||||
+ */
|
||||
+#if !defined(QEMU_SANITIZE_THREAD) && \
|
||||
+ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
|
||||
+# define smp_mb__before_rmw() signal_barrier()
|
||||
+# define smp_mb__after_rmw() signal_barrier()
|
||||
+#else
|
||||
+# define smp_mb__before_rmw() smp_mb()
|
||||
+# define smp_mb__after_rmw() smp_mb()
|
||||
+#endif
|
||||
+
|
||||
/* qatomic_mb_read/set semantics map Java volatile variables. They are
|
||||
* less expensive on some platforms (notably POWER) than fully
|
||||
* sequentially consistent operations.
|
||||
@@ -257,7 +271,8 @@
|
||||
#if !defined(__SANITIZE_THREAD__) && \
|
||||
(defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
|
||||
/* This is more efficient than a store plus a fence. */
|
||||
-# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i))
|
||||
+# define qatomic_mb_set(ptr, i) \
|
||||
+ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); })
|
||||
#else
|
||||
# define qatomic_mb_set(ptr, i) \
|
||||
({ qatomic_store_release(ptr, i); smp_mb(); })
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,209 +0,0 @@
|
||||
From 5cdbc87ab24a8cc4cf926158ec429d43d8a45f15 Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Wed, 5 Jun 2024 19:56:51 -0400
|
||||
Subject: [PATCH 1/5] qcow2: Don't open data_file with BDRV_O_NO_IO
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 5: EMBARGOED CVE-2024-4467 for rhel-8.10.z (PRDSC)
|
||||
RH-Jira: RHEL-35616
|
||||
RH-CVE: CVE-2024-4467
|
||||
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Commit: [1/5] 2e72d21c14d86645cf68eec78f49d5cc5d77581f
|
||||
|
||||
Conflicts: qcow2_do_open(): missing boolean ´open_data_file'.
|
||||
We assume it to be true.
|
||||
|
||||
commit f9843ce5c519901654a7d8ba43ee95ce25ca13c2
|
||||
Author: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu Apr 11 15:06:01 2024 +0200
|
||||
|
||||
qcow2: Don't open data_file with BDRV_O_NO_IO
|
||||
|
||||
One use case for 'qemu-img info' is verifying that untrusted images
|
||||
don't reference an unwanted external file, be it as a backing file or an
|
||||
external data file. To make sure that calling 'qemu-img info' can't
|
||||
already have undesired side effects with a malicious image, just don't
|
||||
open the data file at all with BDRV_O_NO_IO. If nothing ever tries to do
|
||||
I/O, we don't need to have it open.
|
||||
|
||||
This changes the output of iotests case 061, which used 'qemu-img info'
|
||||
to show that opening an image with an invalid data file fails. After
|
||||
this patch, it succeeds. Replace this part of the test with a qemu-io
|
||||
call, but keep the final 'qemu-img info' to show that the invalid data
|
||||
file is correctly displayed in the output.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Upstream: N/A, embargoed
|
||||
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
block/qcow2.c | 87 +++++++++++++++++++++++---------------
|
||||
tests/qemu-iotests/061 | 6 ++-
|
||||
tests/qemu-iotests/061.out | 8 +++-
|
||||
3 files changed, 62 insertions(+), 39 deletions(-)
|
||||
|
||||
diff --git a/block/qcow2.c b/block/qcow2.c
|
||||
index d509016756..6ee1919612 100644
|
||||
--- a/block/qcow2.c
|
||||
+++ b/block/qcow2.c
|
||||
@@ -1613,50 +1613,67 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
- /* Open external data file */
|
||||
- s->data_file = bdrv_open_child(NULL, options, "data-file", bs,
|
||||
- &child_of_bds, BDRV_CHILD_DATA,
|
||||
- true, errp);
|
||||
- if (*errp) {
|
||||
- ret = -EINVAL;
|
||||
- goto fail;
|
||||
- }
|
||||
+ if (flags & BDRV_O_NO_IO) {
|
||||
+ /*
|
||||
+ * Don't open the data file for 'qemu-img info' so that it can be used
|
||||
+ * to verify that an untrusted qcow2 image doesn't refer to external
|
||||
+ * files.
|
||||
+ *
|
||||
+ * Note: This still makes has_data_file() return true.
|
||||
+ */
|
||||
+ if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
|
||||
+ s->data_file = NULL;
|
||||
+ } else {
|
||||
+ s->data_file = bs->file;
|
||||
+ }
|
||||
+ qdict_extract_subqdict(options, NULL, "data-file.");
|
||||
+ qdict_del(options, "data-file");
|
||||
+ } else {
|
||||
+ /* Open external data file */
|
||||
+ s->data_file = bdrv_open_child(NULL, options, "data-file", bs,
|
||||
+ &child_of_bds, BDRV_CHILD_DATA,
|
||||
+ true, errp);
|
||||
+ if (*errp) {
|
||||
+ ret = -EINVAL;
|
||||
+ goto fail;
|
||||
+ }
|
||||
|
||||
- if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
|
||||
- if (!s->data_file && s->image_data_file) {
|
||||
- s->data_file = bdrv_open_child(s->image_data_file, options,
|
||||
- "data-file", bs, &child_of_bds,
|
||||
- BDRV_CHILD_DATA, false, errp);
|
||||
+ if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) {
|
||||
+ if (!s->data_file && s->image_data_file) {
|
||||
+ s->data_file = bdrv_open_child(s->image_data_file, options,
|
||||
+ "data-file", bs, &child_of_bds,
|
||||
+ BDRV_CHILD_DATA, false, errp);
|
||||
+ if (!s->data_file) {
|
||||
+ ret = -EINVAL;
|
||||
+ goto fail;
|
||||
+ }
|
||||
+ }
|
||||
if (!s->data_file) {
|
||||
+ error_setg(errp, "'data-file' is required for this image");
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
- }
|
||||
- if (!s->data_file) {
|
||||
- error_setg(errp, "'data-file' is required for this image");
|
||||
- ret = -EINVAL;
|
||||
- goto fail;
|
||||
- }
|
||||
|
||||
- /* No data here */
|
||||
- bs->file->role &= ~BDRV_CHILD_DATA;
|
||||
+ /* No data here */
|
||||
+ bs->file->role &= ~BDRV_CHILD_DATA;
|
||||
|
||||
- /* Must succeed because we have given up permissions if anything */
|
||||
- bdrv_child_refresh_perms(bs, bs->file, &error_abort);
|
||||
- } else {
|
||||
- if (s->data_file) {
|
||||
- error_setg(errp, "'data-file' can only be set for images with an "
|
||||
- "external data file");
|
||||
- ret = -EINVAL;
|
||||
- goto fail;
|
||||
- }
|
||||
+ /* Must succeed because we have given up permissions if anything */
|
||||
+ bdrv_child_refresh_perms(bs, bs->file, &error_abort);
|
||||
+ } else {
|
||||
+ if (s->data_file) {
|
||||
+ error_setg(errp, "'data-file' can only be set for images with an "
|
||||
+ "external data file");
|
||||
+ ret = -EINVAL;
|
||||
+ goto fail;
|
||||
+ }
|
||||
|
||||
- s->data_file = bs->file;
|
||||
+ s->data_file = bs->file;
|
||||
|
||||
- if (data_file_is_raw(bs)) {
|
||||
- error_setg(errp, "data-file-raw requires a data file");
|
||||
- ret = -EINVAL;
|
||||
- goto fail;
|
||||
+ if (data_file_is_raw(bs)) {
|
||||
+ error_setg(errp, "data-file-raw requires a data file");
|
||||
+ ret = -EINVAL;
|
||||
+ goto fail;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
|
||||
index 9507c223bd..6a5bd47efc 100755
|
||||
--- a/tests/qemu-iotests/061
|
||||
+++ b/tests/qemu-iotests/061
|
||||
@@ -322,12 +322,14 @@ $QEMU_IMG amend -o "data_file=foo" "$TEST_IMG"
|
||||
echo
|
||||
_make_test_img -o "compat=1.1,data_file=$TEST_IMG.data" 64M
|
||||
$QEMU_IMG amend -o "data_file=foo" "$TEST_IMG"
|
||||
-_img_info --format-specific
|
||||
+$QEMU_IO -c "read 0 4k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
|
||||
+$QEMU_IO -c "open -o data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" -c "read 0 4k" | _filter_qemu_io
|
||||
TEST_IMG="data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" _img_info --format-specific --image-opts
|
||||
|
||||
echo
|
||||
$QEMU_IMG amend -o "data_file=" --image-opts "data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG"
|
||||
-_img_info --format-specific
|
||||
+$QEMU_IO -c "read 0 4k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
|
||||
+$QEMU_IO -c "open -o data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" -c "read 0 4k" | _filter_qemu_io
|
||||
TEST_IMG="data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" _img_info --format-specific --image-opts
|
||||
|
||||
echo
|
||||
diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out
|
||||
index 7ecbd4dea8..99b2307a23 100644
|
||||
--- a/tests/qemu-iotests/061.out
|
||||
+++ b/tests/qemu-iotests/061.out
|
||||
@@ -545,7 +545,9 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
|
||||
qemu-img: data-file can only be set for images that use an external data file
|
||||
|
||||
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 data_file=TEST_DIR/t.IMGFMT.data
|
||||
-qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'foo': No such file or directory
|
||||
+qemu-io: can't open device TEST_DIR/t.IMGFMT: Could not open 'foo': No such file or directory
|
||||
+read 4096/4096 bytes at offset 0
|
||||
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
image: TEST_DIR/t.IMGFMT
|
||||
file format: IMGFMT
|
||||
virtual size: 64 MiB (67108864 bytes)
|
||||
@@ -560,7 +562,9 @@ Format specific information:
|
||||
corrupt: false
|
||||
extended l2: false
|
||||
|
||||
-qemu-img: Could not open 'TEST_DIR/t.IMGFMT': 'data-file' is required for this image
|
||||
+qemu-io: can't open device TEST_DIR/t.IMGFMT: 'data-file' is required for this image
|
||||
+read 4096/4096 bytes at offset 0
|
||||
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
image: TEST_DIR/t.IMGFMT
|
||||
file format: IMGFMT
|
||||
virtual size: 64 MiB (67108864 bytes)
|
||||
--
|
||||
2.39.3
|
||||
|
@ -1,67 +0,0 @@
|
||||
From 06c73c4b57dd1f47f819d719a63eb39fbe799304 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 12 Jan 2023 20:14:51 +0100
|
||||
Subject: [PATCH 1/4] qcow2: Fix theoretical corruption in store_bitmap() error
|
||||
path
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image
|
||||
RH-Bugzilla: 2147617
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [1/4] d0a26bed7b16db41e7baee1f8f2b3ae54e52dd52
|
||||
|
||||
In order to write the bitmap table to the image file, it is converted to
|
||||
big endian. If the write fails, it is passed to clear_bitmap_table() to
|
||||
free all of the clusters it had allocated before. However, if we don't
|
||||
convert it back to native endianness first, we'll free things at a wrong
|
||||
offset.
|
||||
|
||||
In practical terms, the offsets will be so high that we won't actually
|
||||
free any allocated clusters, but just run into an error, but in theory
|
||||
this can cause image corruption.
|
||||
|
||||
Cc: qemu-stable@nongnu.org
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230112191454.169353-2-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit b03dd9613bcf8fe948581b2b3585510cb525c382)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
block/qcow2-bitmap.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
|
||||
index 8fb4731551..869069415c 100644
|
||||
--- a/block/qcow2-bitmap.c
|
||||
+++ b/block/qcow2-bitmap.c
|
||||
@@ -115,7 +115,7 @@ static int update_header_sync(BlockDriverState *bs)
|
||||
return bdrv_flush(bs->file->bs);
|
||||
}
|
||||
|
||||
-static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size)
|
||||
+static inline void bitmap_table_bswap_be(uint64_t *bitmap_table, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
@@ -1401,9 +1401,10 @@ static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
- bitmap_table_to_be(tb, tb_size);
|
||||
+ bitmap_table_bswap_be(tb, tb_size);
|
||||
ret = bdrv_pwrite(bs->file, tb_offset, tb, tb_size * sizeof(tb[0]));
|
||||
if (ret < 0) {
|
||||
+ bitmap_table_bswap_be(tb, tb_size);
|
||||
error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file",
|
||||
bm_name);
|
||||
goto fail;
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,75 +0,0 @@
|
||||
From 2f03293910f3ac559f37d45c95325ae29638003a Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:15:14 -0500
|
||||
Subject: [PATCH 07/13] qemu-coroutine-lock: add smp_mb__after_rmw()
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2168472
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Commit: [7/10] 9cf1b6d3b0dd154489e75ad54a3000ea58983960
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
|
||||
|
||||
commit e3a3b6ec8169eab2feb241b4982585001512cd55
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri Mar 3 10:52:59 2023 +0100
|
||||
|
||||
qemu-coroutine-lock: add smp_mb__after_rmw()
|
||||
|
||||
mutex->from_push and mutex->handoff in qemu-coroutine-lock implement
|
||||
the familiar pattern:
|
||||
|
||||
write a write b
|
||||
smp_mb() smp_mb()
|
||||
read b read a
|
||||
|
||||
The memory barrier is required by the C memory model even after a
|
||||
SEQ_CST read-modify-write operation such as QSLIST_INSERT_HEAD_ATOMIC.
|
||||
Add it and avoid the unclear qatomic_mb_read() operation.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/qemu-coroutine-lock.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
|
||||
index 2669403839..a03ed0e664 100644
|
||||
--- a/util/qemu-coroutine-lock.c
|
||||
+++ b/util/qemu-coroutine-lock.c
|
||||
@@ -206,10 +206,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
|
||||
trace_qemu_co_mutex_lock_entry(mutex, self);
|
||||
push_waiter(mutex, &w);
|
||||
|
||||
+ /*
|
||||
+ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set
|
||||
+ * in qemu_co_mutex_unlock.
|
||||
+ */
|
||||
+ smp_mb__after_rmw();
|
||||
+
|
||||
/* This is the "Responsibility Hand-Off" protocol; a lock() picks from
|
||||
* a concurrent unlock() the responsibility of waking somebody up.
|
||||
*/
|
||||
- old_handoff = qatomic_mb_read(&mutex->handoff);
|
||||
+ old_handoff = qatomic_read(&mutex->handoff);
|
||||
if (old_handoff &&
|
||||
has_waiters(mutex) &&
|
||||
qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) {
|
||||
@@ -308,6 +314,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
|
||||
}
|
||||
|
||||
our_handoff = mutex->sequence;
|
||||
+ /* Set handoff before checking for waiters. */
|
||||
qatomic_mb_set(&mutex->handoff, our_handoff);
|
||||
if (!has_waiters(mutex)) {
|
||||
/* The concurrent lock has not added itself yet, so it
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 648193b48d8aeaded90fd657e3610d8040f505fc Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 12 Jan 2023 20:14:53 +0100
|
||||
Subject: [PATCH 3/4] qemu-img bitmap: Report errors while closing the image
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image
|
||||
RH-Bugzilla: 2147617
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [3/4] 8e13e09564718a0badd03af84f036246a46a0eba
|
||||
|
||||
blk_unref() can't report any errors that happen while closing the image.
|
||||
For example, if qcow2 hits an -ENOSPC error while writing out dirty
|
||||
bitmaps when it's closed, it prints error messages to stderr, but
|
||||
'qemu-img bitmap' won't see any error return value and will therefore
|
||||
look successful with exit code 0.
|
||||
|
||||
In order to fix this, manually inactivate the image first before calling
|
||||
blk_unref(). This already performs the operations that would be most
|
||||
likely to fail while closing the image, but it can still return errors.
|
||||
|
||||
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1330
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230112191454.169353-4-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit c5e477110dcb8ef4642dce399777c3dee68fa96c)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
qemu-img.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
diff --git a/qemu-img.c b/qemu-img.c
|
||||
index 18833f7d69..7d035c0c7f 100644
|
||||
--- a/qemu-img.c
|
||||
+++ b/qemu-img.c
|
||||
@@ -4622,6 +4622,7 @@ static int img_bitmap(int argc, char **argv)
|
||||
QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
|
||||
ImgBitmapAction *act, *act_next;
|
||||
const char *op;
|
||||
+ int inactivate_ret;
|
||||
|
||||
QSIMPLEQ_INIT(&actions);
|
||||
|
||||
@@ -4806,6 +4807,16 @@ static int img_bitmap(int argc, char **argv)
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
+ /*
|
||||
+ * Manually inactivate the images first because this way we can know whether
|
||||
+ * an error occurred. blk_unref() doesn't tell us about failures.
|
||||
+ */
|
||||
+ inactivate_ret = bdrv_inactivate_all();
|
||||
+ if (inactivate_ret < 0) {
|
||||
+ error_report("Error while closing the image: %s", strerror(-inactivate_ret));
|
||||
+ ret = 1;
|
||||
+ }
|
||||
+
|
||||
blk_unref(src);
|
||||
blk_unref(blk);
|
||||
qemu_opts_del(opts);
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,67 +0,0 @@
|
||||
From 2396df7fe527567e8e78761ef24ea1057ef6fa48 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 12 Jan 2023 20:14:52 +0100
|
||||
Subject: [PATCH 2/4] qemu-img commit: Report errors while closing the image
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image
|
||||
RH-Bugzilla: 2147617
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [2/4] 28f95bf76d1d63e2b0bed0c2ba5206bd3e5ea4f8
|
||||
|
||||
blk_unref() can't report any errors that happen while closing the image.
|
||||
For example, if qcow2 hits an -ENOSPC error while writing out dirty
|
||||
bitmaps when it's closed, it prints error messages to stderr, but
|
||||
'qemu-img commit' won't see any error return value and will therefore
|
||||
look successful with exit code 0.
|
||||
|
||||
In order to fix this, manually inactivate the image first before calling
|
||||
blk_unref(). This already performs the operations that would be most
|
||||
likely to fail while closing the image, but it can still return errors.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230112191454.169353-3-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 44efba2d713aca076c411594d0c1a2b99155eeb3)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
qemu-img.c | 13 +++++++++++++
|
||||
1 file changed, 13 insertions(+)
|
||||
|
||||
diff --git a/qemu-img.c b/qemu-img.c
|
||||
index f036a1d428..18833f7d69 100644
|
||||
--- a/qemu-img.c
|
||||
+++ b/qemu-img.c
|
||||
@@ -443,6 +443,11 @@ static BlockBackend *img_open(bool image_opts,
|
||||
blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
|
||||
force_share);
|
||||
}
|
||||
+
|
||||
+ if (blk) {
|
||||
+ blk_set_force_allow_inactivate(blk);
|
||||
+ }
|
||||
+
|
||||
return blk;
|
||||
}
|
||||
|
||||
@@ -1110,6 +1115,14 @@ unref_backing:
|
||||
done:
|
||||
qemu_progress_end();
|
||||
|
||||
+ /*
|
||||
+ * Manually inactivate the image first because this way we can know whether
|
||||
+ * an error occurred. blk_unref() doesn't tell us about failures.
|
||||
+ */
|
||||
+ ret = bdrv_inactivate_all();
|
||||
+ if (ret < 0 && !local_err) {
|
||||
+ error_setg_errno(&local_err, -ret, "Error while closing the image");
|
||||
+ }
|
||||
blk_unref(blk);
|
||||
|
||||
if (local_err) {
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,166 +0,0 @@
|
||||
From 7c6faae20638f58681df223e0ca44e0a6cb60d2d Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Wolf <kwolf@redhat.com>
|
||||
Date: Thu, 12 Jan 2023 20:14:54 +0100
|
||||
Subject: [PATCH 4/4] qemu-iotests: Test qemu-img bitmap/commit exit code on
|
||||
error
|
||||
|
||||
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||
RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image
|
||||
RH-Bugzilla: 2147617
|
||||
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
||||
RH-Commit: [4/4] fb2f9de98ddd2ee1d745119e4f15272ef44e0aae
|
||||
|
||||
This tests that when an error happens while writing back bitmaps to the
|
||||
image file in qcow2_inactivate(), 'qemu-img bitmap/commit' actually
|
||||
return an error value in their exit code instead of making the operation
|
||||
look successful to scripts.
|
||||
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
Message-Id: <20230112191454.169353-5-kwolf@redhat.com>
|
||||
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
(cherry picked from commit 07a4e1f8e5418f36424cd57d5d061b090a238c65)
|
||||
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||
---
|
||||
.../qemu-iotests/tests/qemu-img-close-errors | 96 +++++++++++++++++++
|
||||
.../tests/qemu-img-close-errors.out | 23 +++++
|
||||
2 files changed, 119 insertions(+)
|
||||
create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors
|
||||
create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out
|
||||
|
||||
diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors b/tests/qemu-iotests/tests/qemu-img-close-errors
|
||||
new file mode 100755
|
||||
index 0000000000..50bfb6cfa2
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/qemu-img-close-errors
|
||||
@@ -0,0 +1,96 @@
|
||||
+#!/usr/bin/env bash
|
||||
+# group: rw auto quick
|
||||
+#
|
||||
+# Check that errors while closing the image, in particular writing back dirty
|
||||
+# bitmaps, is correctly reported with a failing qemu-img exit code.
|
||||
+#
|
||||
+# Copyright (C) 2023 Red Hat, Inc.
|
||||
+#
|
||||
+# This program is free software; you can redistribute it and/or modify
|
||||
+# it under the terms of the GNU General Public License as published by
|
||||
+# the Free Software Foundation; either version 2 of the License, or
|
||||
+# (at your option) any later version.
|
||||
+#
|
||||
+# This program is distributed in the hope that it will be useful,
|
||||
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
+# GNU General Public License for more details.
|
||||
+#
|
||||
+# You should have received a copy of the GNU General Public License
|
||||
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
+#
|
||||
+
|
||||
+# creator
|
||||
+owner=kwolf@redhat.com
|
||||
+
|
||||
+seq="$(basename $0)"
|
||||
+echo "QA output created by $seq"
|
||||
+
|
||||
+status=1 # failure is the default!
|
||||
+
|
||||
+_cleanup()
|
||||
+{
|
||||
+ _cleanup_test_img
|
||||
+}
|
||||
+trap "_cleanup; exit \$status" 0 1 2 3 15
|
||||
+
|
||||
+# get standard environment, filters and checks
|
||||
+cd ..
|
||||
+. ./common.rc
|
||||
+. ./common.filter
|
||||
+
|
||||
+_supported_fmt qcow2
|
||||
+_supported_proto file
|
||||
+_supported_os Linux
|
||||
+
|
||||
+size=1G
|
||||
+
|
||||
+# The error we are going to use is ENOSPC. Depending on how many bitmaps we
|
||||
+# create in the backing file (and therefore increase the used up space), we get
|
||||
+# failures in different places. With a low number, only merging the bitmap
|
||||
+# fails, whereas with a higher number, already 'qemu-img commit' fails.
|
||||
+for max_bitmap in 6 7; do
|
||||
+ echo
|
||||
+ echo "=== Test with $max_bitmap bitmaps ==="
|
||||
+
|
||||
+ TEST_IMG="$TEST_IMG.base" _make_test_img -q $size
|
||||
+ for i in $(seq 1 $max_bitmap); do
|
||||
+ $QEMU_IMG bitmap --add "$TEST_IMG.base" "stale-bitmap-$i"
|
||||
+ done
|
||||
+
|
||||
+ # Simulate a block device of 128 MB by resizing the image file accordingly
|
||||
+ # and then enforcing the size with the raw driver
|
||||
+ $QEMU_IO -f raw -c "truncate 128M" "$TEST_IMG.base"
|
||||
+ BASE_JSON='json:{
|
||||
+ "driver": "qcow2",
|
||||
+ "file": {
|
||||
+ "driver": "raw",
|
||||
+ "size": 134217728,
|
||||
+ "file": {
|
||||
+ "driver": "file",
|
||||
+ "filename":"'"$TEST_IMG.base"'"
|
||||
+ }
|
||||
+ }
|
||||
+ }'
|
||||
+
|
||||
+ _make_test_img -q -b "$BASE_JSON" -F $IMGFMT
|
||||
+ $QEMU_IMG bitmap --add "$TEST_IMG" "good-bitmap"
|
||||
+
|
||||
+ $QEMU_IO -c 'write 0 126m' "$TEST_IMG" | _filter_qemu_io
|
||||
+
|
||||
+ $QEMU_IMG commit -d "$TEST_IMG" 2>&1 | _filter_generated_node_ids
|
||||
+ echo "qemu-img commit exit code: ${PIPESTATUS[0]}"
|
||||
+
|
||||
+ $QEMU_IMG bitmap --add "$BASE_JSON" "good-bitmap"
|
||||
+ echo "qemu-img bitmap --add exit code: $?"
|
||||
+
|
||||
+ $QEMU_IMG bitmap --merge "good-bitmap" -b "$TEST_IMG" "$BASE_JSON" \
|
||||
+ "good-bitmap" 2>&1 | _filter_generated_node_ids
|
||||
+ echo "qemu-img bitmap --merge exit code: ${PIPESTATUS[0]}"
|
||||
+done
|
||||
+
|
||||
+# success, all done
|
||||
+echo "*** done"
|
||||
+rm -f $seq.full
|
||||
+status=0
|
||||
+
|
||||
diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors.out b/tests/qemu-iotests/tests/qemu-img-close-errors.out
|
||||
new file mode 100644
|
||||
index 0000000000..1bfe88f176
|
||||
--- /dev/null
|
||||
+++ b/tests/qemu-iotests/tests/qemu-img-close-errors.out
|
||||
@@ -0,0 +1,23 @@
|
||||
+QA output created by qemu-img-close-errors
|
||||
+
|
||||
+=== Test with 6 bitmaps ===
|
||||
+wrote 132120576/132120576 bytes at offset 0
|
||||
+126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+Image committed.
|
||||
+qemu-img commit exit code: 0
|
||||
+qemu-img bitmap --add exit code: 0
|
||||
+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device
|
||||
+qemu-img: Error while closing the image: Invalid argument
|
||||
+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device
|
||||
+qemu-img bitmap --merge exit code: 1
|
||||
+
|
||||
+=== Test with 7 bitmaps ===
|
||||
+wrote 132120576/132120576 bytes at offset 0
|
||||
+126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
|
||||
+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device
|
||||
+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device
|
||||
+qemu-img: Error while closing the image: Invalid argument
|
||||
+qemu-img commit exit code: 1
|
||||
+qemu-img bitmap --add exit code: 0
|
||||
+qemu-img bitmap --merge exit code: 0
|
||||
+*** done
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,61 +0,0 @@
|
||||
From 095811c08557b0a2ad1a433d28699ead1e5ef664 Mon Sep 17 00:00:00 2001
|
||||
From: Bandan Das <bsd@redhat.com>
|
||||
Date: Thu, 3 Aug 2023 15:12:15 -0400
|
||||
Subject: [PATCH 2/5] qemu-options.hx: Update the reduced-phys-bits
|
||||
documentation
|
||||
|
||||
RH-Author: Bandan Das <None>
|
||||
RH-MergeRequest: 296: Updates to SEV reduced-phys-bits parameter
|
||||
RH-Bugzilla: 2214840
|
||||
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Commit: [2/4] f8e8f5aeff449a34ce90c6e55e2a51873a6e6a87
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214840
|
||||
|
||||
commit 326e3015c4c6f3197157ea0bb00826ae740e2fad
|
||||
Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Date: Fri Sep 30 10:14:28 2022 -0500
|
||||
|
||||
qemu-options.hx: Update the reduced-phys-bits documentation
|
||||
|
||||
A guest only ever experiences, at most, 1 bit of reduced physical
|
||||
addressing. Update the documentation to reflect this as well as change
|
||||
the example value on the reduced-phys-bits option.
|
||||
|
||||
Fixes: a9b4942f48 ("target/i386: add Secure Encrypted Virtualization (SEV) object")
|
||||
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
||||
Message-Id: <13a62ced1808546c1d398e2025cf85f4c94ae123.1664550870.git.thomas.lendacky@amd.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Bandan Das <bsd@redhat.com>
|
||||
---
|
||||
qemu-options.hx | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/qemu-options.hx b/qemu-options.hx
|
||||
index 4b7798088b..981248e283 100644
|
||||
--- a/qemu-options.hx
|
||||
+++ b/qemu-options.hx
|
||||
@@ -5204,7 +5204,7 @@ SRST
|
||||
physical address space. The ``reduced-phys-bits`` is used to
|
||||
provide the number of bits we loose in physical address space.
|
||||
Similar to C-bit, the value is Host family dependent. On EPYC,
|
||||
- the value should be 5.
|
||||
+ a guest will lose a maximum of 1 bit, so the value should be 1.
|
||||
|
||||
The ``sev-device`` provides the device file to use for
|
||||
communicating with the SEV firmware running inside AMD Secure
|
||||
@@ -5239,7 +5239,7 @@ SRST
|
||||
|
||||
# |qemu_system_x86| \\
|
||||
...... \\
|
||||
- -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=5 \\
|
||||
+ -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 \\
|
||||
-machine ...,memory-encryption=sev0 \\
|
||||
.....
|
||||
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,146 +0,0 @@
|
||||
From d46ca52c3f42add549bd3790a41d06594821334e Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:10:57 -0500
|
||||
Subject: [PATCH 03/13] qemu-thread-posix: cleanup, fix, document QemuEvent
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2168472
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Commit: [3/10] 746070c4d78c7f0a9ac4456d9aee69475acb8964
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
|
||||
|
||||
commit 9586a1329f5dce6c1d7f4de53cf0536644d7e593
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu Mar 2 11:19:52 2023 +0100
|
||||
|
||||
qemu-thread-posix: cleanup, fix, document QemuEvent
|
||||
|
||||
QemuEvent is currently broken on ARM due to missing memory barriers
|
||||
after qatomic_*(). Apart from adding the memory barrier, a closer look
|
||||
reveals some unpaired memory barriers too. Document more clearly what
|
||||
is going on.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/qemu-thread-posix.c | 69 ++++++++++++++++++++++++++++------------
|
||||
1 file changed, 49 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
|
||||
index e1225b63bd..dd3b6d4670 100644
|
||||
--- a/util/qemu-thread-posix.c
|
||||
+++ b/util/qemu-thread-posix.c
|
||||
@@ -430,13 +430,21 @@ void qemu_event_destroy(QemuEvent *ev)
|
||||
|
||||
void qemu_event_set(QemuEvent *ev)
|
||||
{
|
||||
- /* qemu_event_set has release semantics, but because it *loads*
|
||||
+ assert(ev->initialized);
|
||||
+
|
||||
+ /*
|
||||
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
|
||||
+ *
|
||||
+ * qemu_event_set has release semantics, but because it *loads*
|
||||
* ev->value we need a full memory barrier here.
|
||||
*/
|
||||
- assert(ev->initialized);
|
||||
smp_mb();
|
||||
if (qatomic_read(&ev->value) != EV_SET) {
|
||||
- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
|
||||
+ int old = qatomic_xchg(&ev->value, EV_SET);
|
||||
+
|
||||
+ /* Pairs with memory barrier in kernel futex_wait system call. */
|
||||
+ smp_mb__after_rmw();
|
||||
+ if (old == EV_BUSY) {
|
||||
/* There were waiters, wake them up. */
|
||||
qemu_futex_wake(ev, INT_MAX);
|
||||
}
|
||||
@@ -445,18 +453,19 @@ void qemu_event_set(QemuEvent *ev)
|
||||
|
||||
void qemu_event_reset(QemuEvent *ev)
|
||||
{
|
||||
- unsigned value;
|
||||
-
|
||||
assert(ev->initialized);
|
||||
- value = qatomic_read(&ev->value);
|
||||
- smp_mb_acquire();
|
||||
- if (value == EV_SET) {
|
||||
- /*
|
||||
- * If there was a concurrent reset (or even reset+wait),
|
||||
- * do nothing. Otherwise change EV_SET->EV_FREE.
|
||||
- */
|
||||
- qatomic_or(&ev->value, EV_FREE);
|
||||
- }
|
||||
+
|
||||
+ /*
|
||||
+ * If there was a concurrent reset (or even reset+wait),
|
||||
+ * do nothing. Otherwise change EV_SET->EV_FREE.
|
||||
+ */
|
||||
+ qatomic_or(&ev->value, EV_FREE);
|
||||
+
|
||||
+ /*
|
||||
+ * Order reset before checking the condition in the caller.
|
||||
+ * Pairs with the first memory barrier in qemu_event_set().
|
||||
+ */
|
||||
+ smp_mb__after_rmw();
|
||||
}
|
||||
|
||||
void qemu_event_wait(QemuEvent *ev)
|
||||
@@ -464,20 +473,40 @@ void qemu_event_wait(QemuEvent *ev)
|
||||
unsigned value;
|
||||
|
||||
assert(ev->initialized);
|
||||
- value = qatomic_read(&ev->value);
|
||||
- smp_mb_acquire();
|
||||
+
|
||||
+ /*
|
||||
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
|
||||
+ * not go down the slow path, so this load-acquire is needed that
|
||||
+ * synchronizes with the first memory barrier in qemu_event_set().
|
||||
+ *
|
||||
+ * If we do go down the slow path, there is no requirement at all: we
|
||||
+ * might miss a qemu_event_set() here but ultimately the memory barrier in
|
||||
+ * qemu_futex_wait() will ensure the check is done correctly.
|
||||
+ */
|
||||
+ value = qatomic_load_acquire(&ev->value);
|
||||
if (value != EV_SET) {
|
||||
if (value == EV_FREE) {
|
||||
/*
|
||||
- * Leave the event reset and tell qemu_event_set that there
|
||||
- * are waiters. No need to retry, because there cannot be
|
||||
- * a concurrent busy->free transition. After the CAS, the
|
||||
- * event will be either set or busy.
|
||||
+ * Leave the event reset and tell qemu_event_set that there are
|
||||
+ * waiters. No need to retry, because there cannot be a concurrent
|
||||
+ * busy->free transition. After the CAS, the event will be either
|
||||
+ * set or busy.
|
||||
+ *
|
||||
+ * This cmpxchg doesn't have particular ordering requirements if it
|
||||
+ * succeeds (moving the store earlier can only cause qemu_event_set()
|
||||
+ * to issue _more_ wakeups), the failing case needs acquire semantics
|
||||
+ * like the load above.
|
||||
*/
|
||||
if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ /*
|
||||
+ * This is the final check for a concurrent set, so it does need
|
||||
+ * a smp_mb() pairing with the second barrier of qemu_event_set().
|
||||
+ * The barrier is inside the FUTEX_WAIT system call.
|
||||
+ */
|
||||
qemu_futex_wait(ev, EV_BUSY);
|
||||
}
|
||||
}
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,162 +0,0 @@
|
||||
From fa730378c42567e77eaf3e70983108f31f9001b9 Mon Sep 17 00:00:00 2001
|
||||
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 08:11:05 -0500
|
||||
Subject: [PATCH 04/13] qemu-thread-win32: cleanup, fix, document QemuEvent
|
||||
|
||||
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
|
||||
RH-Bugzilla: 2168472
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Commit: [4/10] 43d5bd903b460d4c3c5793a456820e8c5c8521d9
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
|
||||
|
||||
commit 6c5df4b48f0c52a61342ecb307a43f4c2a3565c4
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Thu Mar 2 11:22:50 2023 +0100
|
||||
|
||||
qemu-thread-win32: cleanup, fix, document QemuEvent
|
||||
|
||||
QemuEvent is currently broken on ARM due to missing memory barriers
|
||||
after qatomic_*(). Apart from adding the memory barrier, a closer look
|
||||
reveals some unpaired memory barriers that are not really needed and
|
||||
complicated the functions unnecessarily. Also, it is relying on
|
||||
a memory barrier in ResetEvent(); the barrier _ought_ to be there
|
||||
but there is really no documentation about it, so make it explicit.
|
||||
|
||||
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
||||
Reviewed-by: David Hildenbrand <david@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||||
---
|
||||
util/qemu-thread-win32.c | 82 +++++++++++++++++++++++++++-------------
|
||||
1 file changed, 56 insertions(+), 26 deletions(-)
|
||||
|
||||
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
|
||||
index 52eb19f351..c10249bc2e 100644
|
||||
--- a/util/qemu-thread-win32.c
|
||||
+++ b/util/qemu-thread-win32.c
|
||||
@@ -246,12 +246,20 @@ void qemu_event_destroy(QemuEvent *ev)
|
||||
void qemu_event_set(QemuEvent *ev)
|
||||
{
|
||||
assert(ev->initialized);
|
||||
- /* qemu_event_set has release semantics, but because it *loads*
|
||||
+
|
||||
+ /*
|
||||
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
|
||||
+ *
|
||||
+ * qemu_event_set has release semantics, but because it *loads*
|
||||
* ev->value we need a full memory barrier here.
|
||||
*/
|
||||
smp_mb();
|
||||
if (qatomic_read(&ev->value) != EV_SET) {
|
||||
- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
|
||||
+ int old = qatomic_xchg(&ev->value, EV_SET);
|
||||
+
|
||||
+ /* Pairs with memory barrier after ResetEvent. */
|
||||
+ smp_mb__after_rmw();
|
||||
+ if (old == EV_BUSY) {
|
||||
/* There were waiters, wake them up. */
|
||||
SetEvent(ev->event);
|
||||
}
|
||||
@@ -260,17 +268,19 @@ void qemu_event_set(QemuEvent *ev)
|
||||
|
||||
void qemu_event_reset(QemuEvent *ev)
|
||||
{
|
||||
- unsigned value;
|
||||
-
|
||||
assert(ev->initialized);
|
||||
- value = qatomic_read(&ev->value);
|
||||
- smp_mb_acquire();
|
||||
- if (value == EV_SET) {
|
||||
- /* If there was a concurrent reset (or even reset+wait),
|
||||
- * do nothing. Otherwise change EV_SET->EV_FREE.
|
||||
- */
|
||||
- qatomic_or(&ev->value, EV_FREE);
|
||||
- }
|
||||
+
|
||||
+ /*
|
||||
+ * If there was a concurrent reset (or even reset+wait),
|
||||
+ * do nothing. Otherwise change EV_SET->EV_FREE.
|
||||
+ */
|
||||
+ qatomic_or(&ev->value, EV_FREE);
|
||||
+
|
||||
+ /*
|
||||
+ * Order reset before checking the condition in the caller.
|
||||
+ * Pairs with the first memory barrier in qemu_event_set().
|
||||
+ */
|
||||
+ smp_mb__after_rmw();
|
||||
}
|
||||
|
||||
void qemu_event_wait(QemuEvent *ev)
|
||||
@@ -278,29 +288,49 @@ void qemu_event_wait(QemuEvent *ev)
|
||||
unsigned value;
|
||||
|
||||
assert(ev->initialized);
|
||||
- value = qatomic_read(&ev->value);
|
||||
- smp_mb_acquire();
|
||||
+
|
||||
+ /*
|
||||
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
|
||||
+ * not go down the slow path, so this load-acquire is needed that
|
||||
+ * synchronizes with the first memory barrier in qemu_event_set().
|
||||
+ *
|
||||
+ * If we do go down the slow path, there is no requirement at all: we
|
||||
+ * might miss a qemu_event_set() here but ultimately the memory barrier in
|
||||
+ * qemu_futex_wait() will ensure the check is done correctly.
|
||||
+ */
|
||||
+ value = qatomic_load_acquire(&ev->value);
|
||||
if (value != EV_SET) {
|
||||
if (value == EV_FREE) {
|
||||
- /* qemu_event_set is not yet going to call SetEvent, but we are
|
||||
- * going to do another check for EV_SET below when setting EV_BUSY.
|
||||
- * At that point it is safe to call WaitForSingleObject.
|
||||
+ /*
|
||||
+ * Here the underlying kernel event is reset, but qemu_event_set is
|
||||
+ * not yet going to call SetEvent. However, there will be another
|
||||
+ * check for EV_SET below when setting EV_BUSY. At that point it
|
||||
+ * is safe to call WaitForSingleObject.
|
||||
*/
|
||||
ResetEvent(ev->event);
|
||||
|
||||
- /* Tell qemu_event_set that there are waiters. No need to retry
|
||||
- * because there cannot be a concurrent busy->free transition.
|
||||
- * After the CAS, the event will be either set or busy.
|
||||
+ /*
|
||||
+ * It is not clear whether ResetEvent provides this barrier; kernel
|
||||
+ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry!
|
||||
+ */
|
||||
+ smp_mb();
|
||||
+
|
||||
+ /*
|
||||
+ * Leave the event reset and tell qemu_event_set that there are
|
||||
+ * waiters. No need to retry, because there cannot be a concurrent
|
||||
+ * busy->free transition. After the CAS, the event will be either
|
||||
+ * set or busy.
|
||||
*/
|
||||
if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
|
||||
- value = EV_SET;
|
||||
- } else {
|
||||
- value = EV_BUSY;
|
||||
+ return;
|
||||
}
|
||||
}
|
||||
- if (value == EV_BUSY) {
|
||||
- WaitForSingleObject(ev->event, INFINITE);
|
||||
- }
|
||||
+
|
||||
+ /*
|
||||
+ * ev->value is now EV_BUSY. Since we didn't observe EV_SET,
|
||||
+ * qemu_event_set() must observe EV_BUSY and call SetEvent().
|
||||
+ */
|
||||
+ WaitForSingleObject(ev->event, INFINITE);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,55 +0,0 @@
|
||||
From c5cb3e97098834f9cf12b6c5260d9b43d68d64eb Mon Sep 17 00:00:00 2001
|
||||
From: Jon Maloy <jmaloy@redhat.com>
|
||||
Date: Tue, 9 May 2023 10:29:03 -0400
|
||||
Subject: [PATCH 07/15] raven: disable reentrancy detection for iomem
|
||||
|
||||
RH-Author: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
|
||||
RH-Bugzilla: 1999236
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [7/12] f41983390acba68043d386be090172dd17a5e58c (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
|
||||
Upstream: Merged
|
||||
CVE: CVE-2021-3750
|
||||
|
||||
commit 6dad5a6810d9c60ca320d01276f6133bbcfa1fc7
|
||||
Author: Alexander Bulekov <alxndr@bu.edu>
|
||||
Date: Thu Apr 27 17:10:12 2023 -0400
|
||||
|
||||
raven: disable reentrancy detection for iomem
|
||||
|
||||
As the code is designed for re-entrant calls from raven_io_ops to
|
||||
pci-conf, mark raven_io_ops as reentrancy-safe.
|
||||
|
||||
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
|
||||
Message-Id: <20230427211013.2994127-8-alxndr@bu.edu>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
|
||||
---
|
||||
hw/pci-host/raven.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c
|
||||
index 6e514f75eb..245b1653e4 100644
|
||||
--- a/hw/pci-host/raven.c
|
||||
+++ b/hw/pci-host/raven.c
|
||||
@@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj)
|
||||
memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000);
|
||||
address_space_init(&s->pci_io_as, &s->pci_io, "raven-io");
|
||||
|
||||
+ /*
|
||||
+ * Raven's raven_io_ops use the address-space API to access pci-conf-idx
|
||||
+ * (which is also owned by the raven device). As such, mark the
|
||||
+ * pci_io_non_contiguous as re-entrancy safe.
|
||||
+ */
|
||||
+ s->pci_io_non_contiguous.disable_reentrancy_guard = true;
|
||||
+
|
||||
/* CPU address space */
|
||||
memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR,
|
||||
&s->pci_io);
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,56 +0,0 @@
|
||||
From 76e75a129e59a33103aa7d1d92074ddcef556980 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Huth <thuth@redhat.com>
|
||||
Date: Tue, 12 Sep 2023 11:24:40 +0200
|
||||
Subject: [PATCH 3/5] redhat: Update linux-headers for kvm_s390_vm_cpu_uv_feat
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 321: Enable Secure Execution Crypto Passthrough for KVM on s390x
|
||||
RH-Bugzilla: 2111390
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Commit: [3/5] f1329f5ce5f66033ead7777384dcc1613cad1226
|
||||
|
||||
Upstream Status: rhel-only
|
||||
|
||||
This hunk is part of upstream commit da3c22c74a3c
|
||||
("linux-headers: Update to Linux v6.6-rc1"), but since that
|
||||
commit updates a lot of files and does not apply cleanly,
|
||||
we only focus on the necessary change here.
|
||||
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
linux-headers/asm-s390/kvm.h | 16 ++++++++++++++++
|
||||
1 file changed, 16 insertions(+)
|
||||
|
||||
diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h
|
||||
index f053b8304a..6706bdc5cc 100644
|
||||
--- a/linux-headers/asm-s390/kvm.h
|
||||
+++ b/linux-headers/asm-s390/kvm.h
|
||||
@@ -158,6 +158,22 @@ struct kvm_s390_vm_cpu_subfunc {
|
||||
__u8 reserved[1728];
|
||||
};
|
||||
|
||||
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6
|
||||
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST 7
|
||||
+
|
||||
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS 64
|
||||
+struct kvm_s390_vm_cpu_uv_feat {
|
||||
+ union {
|
||||
+ struct {
|
||||
+ __u64 : 4;
|
||||
+ __u64 ap : 1; /* bit 4 */
|
||||
+ __u64 ap_intr : 1; /* bit 5 */
|
||||
+ __u64 : 58;
|
||||
+ };
|
||||
+ __u64 feat;
|
||||
+ };
|
||||
+};
|
||||
+
|
||||
/* kvm attributes for crypto */
|
||||
#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0
|
||||
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,88 +0,0 @@
|
||||
From 3c7bc4319d4e475c820a63176d18afb7b4b2ed78 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 23 May 2023 12:34:33 +0200
|
||||
Subject: [PATCH 02/22] s390: kvm: adjust diag318 resets to retain data
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 279: Backport latest s390x-related fixes from upstream QEMU for qemu-kvm in RHEL 8.9
|
||||
RH-Bugzilla: 2169308 2209605
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Commit: [1/21] 16f2ff166efdd26a3be98d7c97d3b184598d1ca4
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2169308
|
||||
|
||||
commit c35aff184b2ed5be930da671ea25c857713555af
|
||||
Author: Collin L. Walling <walling@linux.ibm.com>
|
||||
Date: Wed Nov 17 10:23:03 2021 -0500
|
||||
|
||||
s390: kvm: adjust diag318 resets to retain data
|
||||
|
||||
The CPNC portion of the diag318 data is erroneously reset during an
|
||||
initial CPU reset caused by SIGP. Let's go ahead and relocate the
|
||||
diag318_info field within the CPUS390XState struct such that it is
|
||||
only zeroed during a clear reset. This way, the CPNC will be retained
|
||||
for each VCPU in the configuration after the diag318 instruction
|
||||
has been invoked.
|
||||
|
||||
The s390_machine_reset code already takes care of zeroing the diag318
|
||||
data on VM resets, which also cover resets caused by diag308.
|
||||
|
||||
Fixes: fabdada9357b ("s390: guest support for diagnose 0x318")
|
||||
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
|
||||
Signed-off-by: Collin Walling <walling@linux.ibm.com>
|
||||
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
|
||||
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
Message-Id: <20211117152303.627969-1-walling@linux.ibm.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
target/s390x/cpu.h | 4 ++--
|
||||
target/s390x/kvm/kvm.c | 4 ++++
|
||||
2 files changed, 6 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
|
||||
index ca3845d023..a75e559134 100644
|
||||
--- a/target/s390x/cpu.h
|
||||
+++ b/target/s390x/cpu.h
|
||||
@@ -63,6 +63,8 @@ struct CPUS390XState {
|
||||
uint64_t etoken; /* etoken */
|
||||
uint64_t etoken_extension; /* etoken extension */
|
||||
|
||||
+ uint64_t diag318_info;
|
||||
+
|
||||
/* Fields up to this point are not cleared by initial CPU reset */
|
||||
struct {} start_initial_reset_fields;
|
||||
|
||||
@@ -118,8 +120,6 @@ struct CPUS390XState {
|
||||
uint16_t external_call_addr;
|
||||
DECLARE_BITMAP(emergency_signals, S390_MAX_CPUS);
|
||||
|
||||
- uint64_t diag318_info;
|
||||
-
|
||||
#if !defined(CONFIG_USER_ONLY)
|
||||
uint64_t tlb_fill_tec; /* translation exception code during tlb_fill */
|
||||
int tlb_fill_exc; /* exception number seen during tlb_fill */
|
||||
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
|
||||
index d36b44f32a..8d36c377b5 100644
|
||||
--- a/target/s390x/kvm/kvm.c
|
||||
+++ b/target/s390x/kvm/kvm.c
|
||||
@@ -1598,6 +1598,10 @@ void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info)
|
||||
env->diag318_info = diag318_info;
|
||||
cs->kvm_run->s.regs.diag318 = diag318_info;
|
||||
cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
|
||||
+ /*
|
||||
+ * diag 318 info is zeroed during a clear reset and
|
||||
+ * diag 308 IPL subcodes.
|
||||
+ */
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,44 +0,0 @@
|
||||
From eb60b6cab9550a62f0b20a9e6d69547d651e3020 Mon Sep 17 00:00:00 2001
|
||||
From: Janosch Frank <frankja@linux.ibm.com>
|
||||
Date: Wed, 23 Aug 2023 16:22:15 +0200
|
||||
Subject: [PATCH 1/5] s390x/ap: fix missing subsystem reset registration
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 321: Enable Secure Execution Crypto Passthrough for KVM on s390x
|
||||
RH-Bugzilla: 2111390
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Commit: [1/5] 4ebe81bb6cc4fc137ca4ebc9c0cebdedc421cc91
|
||||
|
||||
A subsystem reset contains a reset of AP resources which has been
|
||||
missing. Adding the AP bridge to the list of device types that need
|
||||
reset fixes this issue.
|
||||
|
||||
Reviewed-by: Jason J. Herne <jjherne@linux.ibm.com>
|
||||
Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
|
||||
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
|
||||
Fixes: a51b3153 ("s390x/ap: base Adjunct Processor (AP) object model")
|
||||
Message-ID: <20230823142219.1046522-2-seiden@linux.ibm.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
(cherry picked from commit 297ec01f0b9864ea8209ca0ddc6643b4c0574bdb)
|
||||
---
|
||||
hw/s390x/s390-virtio-ccw.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index 4a7cd21cac..412d73715a 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -100,6 +100,7 @@ static const char *const reset_dev_types[] = {
|
||||
"s390-flic",
|
||||
"diag288",
|
||||
TYPE_S390_PCI_HOST_BRIDGE,
|
||||
+ TYPE_AP_BRIDGE,
|
||||
};
|
||||
|
||||
static void subsystem_reset(void)
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,168 +0,0 @@
|
||||
From 4d940934c304a71813dfa4598b20fafe9d2f5625 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 23 May 2023 12:34:33 +0200
|
||||
Subject: [PATCH 19/22] s390x/css: revert SCSW ctrl/flag bits on error
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 279: Backport latest s390x-related fixes from upstream QEMU for qemu-kvm in RHEL 8.9
|
||||
RH-Bugzilla: 2169308 2209605
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Commit: [18/21] e4d5797ab93ba4afd9978a1d3e1f9d05da301506
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2169308
|
||||
|
||||
commit f53b033e4cd2e7706df3cca04f3bf3c5ffc6b08c
|
||||
Author: Peter Jin <pjin@linux.ibm.com>
|
||||
Date: Thu Oct 27 23:23:41 2022 +0200
|
||||
|
||||
s390x/css: revert SCSW ctrl/flag bits on error
|
||||
|
||||
Revert the control and flag bits in the subchannel status word in case
|
||||
the SSCH operation fails with non-zero CC (ditto for CSCH and HSCH).
|
||||
According to POPS, the control and flag bits are only changed if SSCH,
|
||||
CSCH, and HSCH return CC 0, and no other action should be taken otherwise.
|
||||
In order to simulate that after the fact, the bits need to be reverted on
|
||||
non-zero CC.
|
||||
|
||||
While the do_subchannel_work logic for virtual (virtio) devices will
|
||||
return condition code 0, passthrough (vfio) devices may encounter
|
||||
errors from either the host kernel or real hardware that need to be
|
||||
accounted for after this point. This includes restoring the state of
|
||||
the Subchannel Status Word to reflect the subchannel, as these bits
|
||||
would not be set in the event of a non-zero condition code from the
|
||||
affected instructions.
|
||||
|
||||
Experimentation has shown that a failure on a START SUBCHANNEL (SSCH)
|
||||
to a passthrough device would leave the subchannel with the START
|
||||
PENDING activity control bit set, thus blocking subsequent SSCH
|
||||
operations in css_do_ssch() until some form of error recovery was
|
||||
undertaken since no interrupt would be expected.
|
||||
|
||||
Signed-off-by: Peter Jin <pjin@linux.ibm.com>
|
||||
Message-Id: <20221027212341.2904795-1-pjin@linux.ibm.com>
|
||||
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
[thuth: Updated the commit description to Eric's suggestion]
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/s390x/css.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 48 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
|
||||
index 7d9523f811..95d1b3a3ce 100644
|
||||
--- a/hw/s390x/css.c
|
||||
+++ b/hw/s390x/css.c
|
||||
@@ -1522,21 +1522,37 @@ IOInstEnding css_do_xsch(SubchDev *sch)
|
||||
IOInstEnding css_do_csch(SubchDev *sch)
|
||||
{
|
||||
SCHIB *schib = &sch->curr_status;
|
||||
+ uint16_t old_scsw_ctrl;
|
||||
+ IOInstEnding ccode;
|
||||
|
||||
if (~(schib->pmcw.flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
|
||||
return IOINST_CC_NOT_OPERATIONAL;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Save the current scsw.ctrl in case CSCH fails and we need
|
||||
+ * to revert the scsw to the status quo ante.
|
||||
+ */
|
||||
+ old_scsw_ctrl = schib->scsw.ctrl;
|
||||
+
|
||||
/* Trigger the clear function. */
|
||||
schib->scsw.ctrl &= ~(SCSW_CTRL_MASK_FCTL | SCSW_CTRL_MASK_ACTL);
|
||||
schib->scsw.ctrl |= SCSW_FCTL_CLEAR_FUNC | SCSW_ACTL_CLEAR_PEND;
|
||||
|
||||
- return do_subchannel_work(sch);
|
||||
+ ccode = do_subchannel_work(sch);
|
||||
+
|
||||
+ if (ccode != IOINST_CC_EXPECTED) {
|
||||
+ schib->scsw.ctrl = old_scsw_ctrl;
|
||||
+ }
|
||||
+
|
||||
+ return ccode;
|
||||
}
|
||||
|
||||
IOInstEnding css_do_hsch(SubchDev *sch)
|
||||
{
|
||||
SCHIB *schib = &sch->curr_status;
|
||||
+ uint16_t old_scsw_ctrl;
|
||||
+ IOInstEnding ccode;
|
||||
|
||||
if (~(schib->pmcw.flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
|
||||
return IOINST_CC_NOT_OPERATIONAL;
|
||||
@@ -1553,6 +1569,12 @@ IOInstEnding css_do_hsch(SubchDev *sch)
|
||||
return IOINST_CC_BUSY;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Save the current scsw.ctrl in case HSCH fails and we need
|
||||
+ * to revert the scsw to the status quo ante.
|
||||
+ */
|
||||
+ old_scsw_ctrl = schib->scsw.ctrl;
|
||||
+
|
||||
/* Trigger the halt function. */
|
||||
schib->scsw.ctrl |= SCSW_FCTL_HALT_FUNC;
|
||||
schib->scsw.ctrl &= ~SCSW_FCTL_START_FUNC;
|
||||
@@ -1564,7 +1586,13 @@ IOInstEnding css_do_hsch(SubchDev *sch)
|
||||
}
|
||||
schib->scsw.ctrl |= SCSW_ACTL_HALT_PEND;
|
||||
|
||||
- return do_subchannel_work(sch);
|
||||
+ ccode = do_subchannel_work(sch);
|
||||
+
|
||||
+ if (ccode != IOINST_CC_EXPECTED) {
|
||||
+ schib->scsw.ctrl = old_scsw_ctrl;
|
||||
+ }
|
||||
+
|
||||
+ return ccode;
|
||||
}
|
||||
|
||||
static void css_update_chnmon(SubchDev *sch)
|
||||
@@ -1605,6 +1633,8 @@ static void css_update_chnmon(SubchDev *sch)
|
||||
IOInstEnding css_do_ssch(SubchDev *sch, ORB *orb)
|
||||
{
|
||||
SCHIB *schib = &sch->curr_status;
|
||||
+ uint16_t old_scsw_ctrl, old_scsw_flags;
|
||||
+ IOInstEnding ccode;
|
||||
|
||||
if (~(schib->pmcw.flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
|
||||
return IOINST_CC_NOT_OPERATIONAL;
|
||||
@@ -1626,11 +1656,26 @@ IOInstEnding css_do_ssch(SubchDev *sch, ORB *orb)
|
||||
}
|
||||
sch->orb = *orb;
|
||||
sch->channel_prog = orb->cpa;
|
||||
+
|
||||
+ /*
|
||||
+ * Save the current scsw.ctrl and scsw.flags in case SSCH fails and we need
|
||||
+ * to revert the scsw to the status quo ante.
|
||||
+ */
|
||||
+ old_scsw_ctrl = schib->scsw.ctrl;
|
||||
+ old_scsw_flags = schib->scsw.flags;
|
||||
+
|
||||
/* Trigger the start function. */
|
||||
schib->scsw.ctrl |= (SCSW_FCTL_START_FUNC | SCSW_ACTL_START_PEND);
|
||||
schib->scsw.flags &= ~SCSW_FLAGS_MASK_PNO;
|
||||
|
||||
- return do_subchannel_work(sch);
|
||||
+ ccode = do_subchannel_work(sch);
|
||||
+
|
||||
+ if (ccode != IOINST_CC_EXPECTED) {
|
||||
+ schib->scsw.ctrl = old_scsw_ctrl;
|
||||
+ schib->scsw.flags = old_scsw_flags;
|
||||
+ }
|
||||
+
|
||||
+ return ccode;
|
||||
}
|
||||
|
||||
static void copy_irb_to_guest(IRB *dest, const IRB *src, const PMCW *pmcw,
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,68 +0,0 @@
|
||||
From 05b145a8d5b1c2f796069cdd81826c00cf7c983e Mon Sep 17 00:00:00 2001
|
||||
From: Janosch Frank <frankja@linux.ibm.com>
|
||||
Date: Fri, 1 Sep 2023 11:48:51 +0000
|
||||
Subject: [PATCH 2/5] s390x: do a subsystem reset before the unprotect on
|
||||
reboot
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||
RH-MergeRequest: 321: Enable Secure Execution Crypto Passthrough for KVM on s390x
|
||||
RH-Bugzilla: 2111390
|
||||
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||
RH-Commit: [2/5] ea430d236e1a20ddad7095d2e6d10f741f9a1907
|
||||
|
||||
Bound APQNs have to be reset before tearing down the secure config via
|
||||
s390_machine_unprotect(). Otherwise the Ultravisor will return a error
|
||||
code.
|
||||
|
||||
So let's do a subsystem_reset() which includes a AP reset before the
|
||||
unprotect call. We'll do a full device_reset() afterwards which will
|
||||
reset some devices twice. That's ok since we can't move the
|
||||
device_reset() before the unprotect as it includes a CPU clear reset
|
||||
which the Ultravisor does not expect at that point in time.
|
||||
|
||||
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
|
||||
Message-ID: <20230901114851.154357-1-frankja@linux.ibm.com>
|
||||
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
|
||||
Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
(cherry picked from commit ef1535901a07f2e49fa25c8bcee7f0b73801d824)
|
||||
|
||||
Conflicts:
|
||||
hw/s390x/s390-virtio-ccw.c
|
||||
(contextual conflict due to missing commit 7966d70f6f6b)
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-virtio-ccw.c | 10 ++++++++++
|
||||
1 file changed, 10 insertions(+)
|
||||
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index 412d73715a..17146469ee 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -430,10 +430,20 @@ static void s390_machine_reset(MachineState *machine)
|
||||
switch (reset_type) {
|
||||
case S390_RESET_EXTERNAL:
|
||||
case S390_RESET_REIPL:
|
||||
+ /*
|
||||
+ * Reset the subsystem which includes a AP reset. If a PV
|
||||
+ * guest had APQNs attached the AP reset is a prerequisite to
|
||||
+ * unprotecting since the UV checks if all APQNs are reset.
|
||||
+ */
|
||||
+ subsystem_reset();
|
||||
if (s390_is_pv()) {
|
||||
s390_machine_unprotect(ms);
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Device reset includes CPU clear resets so this has to be
|
||||
+ * done AFTER the unprotect call above.
|
||||
+ */
|
||||
qemu_devices_reset();
|
||||
s390_crypto_reset();
|
||||
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,73 +0,0 @@
|
||||
From 6c815e78cea7c26e9a3526cbb686f728eac31021 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 23 May 2023 12:34:33 +0200
|
||||
Subject: [PATCH 12/22] s390x: follow qdev tree to detect SCSI device on a CCW
|
||||
bus
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 279: Backport latest s390x-related fixes from upstream QEMU for qemu-kvm in RHEL 8.9
|
||||
RH-Bugzilla: 2169308 2209605
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Commit: [11/21] 97303bc9c356e8828d185868736b395bc0b70214
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/2169308
|
||||
|
||||
commit 7d2eb76d0407fc391b78df16d17f1e616ec3e228
|
||||
Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon Mar 28 09:40:00 2022 +0200
|
||||
|
||||
s390x: follow qdev tree to detect SCSI device on a CCW bus
|
||||
|
||||
Do not make assumptions on the parent type of the SCSIDevice, instead
|
||||
use object_dynamic_cast all the way up to the CcwDevice. This is cleaner
|
||||
because there is no guarantee that the bus is on a virtio-scsi device;
|
||||
that is only the case for the default configuration of QEMU's s390x
|
||||
target.
|
||||
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/s390x/ipl.c | 20 ++++++++++++--------
|
||||
1 file changed, 12 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
|
||||
index eb7fc4c4ae..9051d8652d 100644
|
||||
--- a/hw/s390x/ipl.c
|
||||
+++ b/hw/s390x/ipl.c
|
||||
@@ -376,14 +376,18 @@ static CcwDevice *s390_get_ccw_device(DeviceState *dev_st, int *devtype)
|
||||
object_dynamic_cast(OBJECT(dev_st),
|
||||
TYPE_SCSI_DEVICE);
|
||||
if (sd) {
|
||||
- SCSIBus *bus = scsi_bus_from_device(sd);
|
||||
- VirtIOSCSI *vdev = container_of(bus, VirtIOSCSI, bus);
|
||||
- VirtIOSCSICcw *scsi_ccw = container_of(vdev, VirtIOSCSICcw,
|
||||
- vdev);
|
||||
-
|
||||
- ccw_dev = (CcwDevice *)object_dynamic_cast(OBJECT(scsi_ccw),
|
||||
- TYPE_CCW_DEVICE);
|
||||
- tmp_dt = CCW_DEVTYPE_SCSI;
|
||||
+ SCSIBus *sbus = scsi_bus_from_device(sd);
|
||||
+ VirtIODevice *vdev = (VirtIODevice *)
|
||||
+ object_dynamic_cast(OBJECT(sbus->qbus.parent),
|
||||
+ TYPE_VIRTIO_DEVICE);
|
||||
+ if (vdev) {
|
||||
+ ccw_dev = (CcwDevice *)
|
||||
+ object_dynamic_cast(OBJECT(qdev_get_parent_bus(DEVICE(vdev))->parent),
|
||||
+ TYPE_CCW_DEVICE);
|
||||
+ if (ccw_dev) {
|
||||
+ tmp_dt = CCW_DEVTYPE_SCSI;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
}
|
||||
}
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,114 +0,0 @@
|
||||
From 2f0febd6813c4ad7f52e43afb3ecce7aef3557e6 Mon Sep 17 00:00:00 2001
|
||||
From: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Date: Fri, 28 Oct 2022 15:47:56 -0400
|
||||
Subject: [PATCH 08/11] s390x/pci: RPCIT second pass when mappings exhausted
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset
|
||||
RH-Bugzilla: 2163713
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/4] 0b4500b9247725b1ef0b290bb85392300a618cac
|
||||
|
||||
If we encounter a new mapping while the number of available DMA entries
|
||||
in vfio is 0, we are currently skipping that mapping which is a problem
|
||||
if we manage to free up DMA space after that within the same RPCIT --
|
||||
we will return to the guest with CC0 and have not mapped everything
|
||||
within the specified range. This issue was uncovered while testing
|
||||
changes to the s390 linux kernel iommu/dma code, where a different
|
||||
usage pattern was employed (new mappings start at the end of the
|
||||
aperture and work back towards the front, making us far more likely
|
||||
to encounter new mappings before invalidated mappings during a
|
||||
global refresh).
|
||||
|
||||
Fix this by tracking whether any mappings were skipped due to vfio
|
||||
DMA limit hitting 0; when this occurs, we still continue the range
|
||||
and unmap/map anything we can - then we must re-run the range again
|
||||
to pickup anything that was missed. This must occur in a loop until
|
||||
all requests are satisfied (success) or we detect that we are still
|
||||
unable to complete all mappings (return ZPCI_RPCIT_ST_INSUFF_RES).
|
||||
|
||||
Link: https://lore.kernel.org/linux-s390/20221019144435.369902-1-schnelle@linux.ibm.com/
|
||||
Fixes: 37fa32de70 ("s390x/pci: Honor DMA limits set by vfio")
|
||||
Reported-by: Niklas Schnelle <schnelle@linux.ibm.com>
|
||||
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Message-Id: <20221028194758.204007-2-mjrosato@linux.ibm.com>
|
||||
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
(cherry picked from commit 4a8d21ba50fc8625c3bd51dab903872952f95718)
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-pci-inst.c | 29 ++++++++++++++++++++++-------
|
||||
1 file changed, 22 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
|
||||
index 20a9bcc7af..7cc4bcf850 100644
|
||||
--- a/hw/s390x/s390-pci-inst.c
|
||||
+++ b/hw/s390x/s390-pci-inst.c
|
||||
@@ -677,8 +677,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
S390PCIBusDevice *pbdev;
|
||||
S390PCIIOMMU *iommu;
|
||||
S390IOTLBEntry entry;
|
||||
- hwaddr start, end;
|
||||
+ hwaddr start, end, sstart;
|
||||
uint32_t dma_avail;
|
||||
+ bool again;
|
||||
|
||||
if (env->psw.mask & PSW_MASK_PSTATE) {
|
||||
s390_program_interrupt(env, PGM_PRIVILEGED, ra);
|
||||
@@ -691,7 +692,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
}
|
||||
|
||||
fh = env->regs[r1] >> 32;
|
||||
- start = env->regs[r2];
|
||||
+ sstart = start = env->regs[r2];
|
||||
end = start + env->regs[r2 + 1];
|
||||
|
||||
pbdev = s390_pci_find_dev_by_fh(s390_get_phb(), fh);
|
||||
@@ -732,6 +733,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
goto err;
|
||||
}
|
||||
|
||||
+ retry:
|
||||
+ start = sstart;
|
||||
+ again = false;
|
||||
while (start < end) {
|
||||
error = s390_guest_io_table_walk(iommu->g_iota, start, &entry);
|
||||
if (error) {
|
||||
@@ -739,13 +743,24 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
}
|
||||
|
||||
start += entry.len;
|
||||
- while (entry.iova < start && entry.iova < end &&
|
||||
- (dma_avail > 0 || entry.perm == IOMMU_NONE)) {
|
||||
- dma_avail = s390_pci_update_iotlb(iommu, &entry);
|
||||
- entry.iova += TARGET_PAGE_SIZE;
|
||||
- entry.translated_addr += TARGET_PAGE_SIZE;
|
||||
+ while (entry.iova < start && entry.iova < end) {
|
||||
+ if (dma_avail > 0 || entry.perm == IOMMU_NONE) {
|
||||
+ dma_avail = s390_pci_update_iotlb(iommu, &entry);
|
||||
+ entry.iova += TARGET_PAGE_SIZE;
|
||||
+ entry.translated_addr += TARGET_PAGE_SIZE;
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * We are unable to make a new mapping at this time, continue
|
||||
+ * on and hopefully free up more space. Then attempt another
|
||||
+ * pass.
|
||||
+ */
|
||||
+ again = true;
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
+ if (again && dma_avail > 0)
|
||||
+ goto retry;
|
||||
err:
|
||||
if (error) {
|
||||
pbdev->state = ZPCI_FS_ERROR;
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,106 +0,0 @@
|
||||
From 52ad0cc8a82f7a4c3581146fb4d2046898163c4e Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 23 Jan 2024 13:59:24 +0100
|
||||
Subject: [PATCH 1/3] s390x/pci: avoid double enable/disable of aif
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 349: s390x: Fix reset ordering of passthrough ISM devices
|
||||
RH-Jira: RHEL-22411
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Commit: [1/3] 450e4ca607d801bce93415994250374d70fb72f6
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-22411
|
||||
|
||||
commit 07b2c8e034d80ff92e202405c494d2ff80fcf848
|
||||
Author: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Date: Thu Jan 18 13:51:49 2024 -0500
|
||||
|
||||
s390x/pci: avoid double enable/disable of aif
|
||||
|
||||
Use a flag to keep track of whether AIF is currently enabled. This can be
|
||||
used to avoid enabling/disabling AIF multiple times as well as to determine
|
||||
whether or not it should be disabled during reset processing.
|
||||
|
||||
Fixes: d0bc7091c2 ("s390x/pci: enable adapter event notification for interpreted devices")
|
||||
Reported-by: Cédric Le Goater <clg@redhat.com>
|
||||
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Message-ID: <20240118185151.265329-2-mjrosato@linux.ibm.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-pci-kvm.c | 25 +++++++++++++++++++++++--
|
||||
include/hw/s390x/s390-pci-bus.h | 1 +
|
||||
2 files changed, 24 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c
|
||||
index ff41e4106d..1ee510436c 100644
|
||||
--- a/hw/s390x/s390-pci-kvm.c
|
||||
+++ b/hw/s390x/s390-pci-kvm.c
|
||||
@@ -27,6 +27,7 @@ bool s390_pci_kvm_interp_allowed(void)
|
||||
|
||||
int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist)
|
||||
{
|
||||
+ int rc;
|
||||
struct kvm_s390_zpci_op args = {
|
||||
.fh = pbdev->fh,
|
||||
.op = KVM_S390_ZPCIOP_REG_AEN,
|
||||
@@ -38,15 +39,35 @@ int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist)
|
||||
.u.reg_aen.flags = (assist) ? 0 : KVM_S390_ZPCIOP_REGAEN_HOST
|
||||
};
|
||||
|
||||
- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
|
||||
+ if (pbdev->aif) {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
|
||||
+ if (rc == 0) {
|
||||
+ pbdev->aif = true;
|
||||
+ }
|
||||
+
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev)
|
||||
{
|
||||
+ int rc;
|
||||
+
|
||||
struct kvm_s390_zpci_op args = {
|
||||
.fh = pbdev->fh,
|
||||
.op = KVM_S390_ZPCIOP_DEREG_AEN
|
||||
};
|
||||
|
||||
- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
|
||||
+ if (!pbdev->aif) {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
|
||||
+ if (rc == 0) {
|
||||
+ pbdev->aif = false;
|
||||
+ }
|
||||
+
|
||||
+ return rc;
|
||||
}
|
||||
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
|
||||
index e0a9f9385b..7a658f5e30 100644
|
||||
--- a/include/hw/s390x/s390-pci-bus.h
|
||||
+++ b/include/hw/s390x/s390-pci-bus.h
|
||||
@@ -361,6 +361,7 @@ struct S390PCIBusDevice {
|
||||
bool unplug_requested;
|
||||
bool interp;
|
||||
bool forwarding_assist;
|
||||
+ bool aif;
|
||||
QTAILQ_ENTRY(S390PCIBusDevice) link;
|
||||
};
|
||||
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,125 +0,0 @@
|
||||
From b972c5a2763a91024725c147cf1691ed8e180c7c Mon Sep 17 00:00:00 2001
|
||||
From: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Date: Fri, 28 Oct 2022 15:47:57 -0400
|
||||
Subject: [PATCH 09/11] s390x/pci: coalesce unmap operations
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset
|
||||
RH-Bugzilla: 2163713
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [2/4] 7b5ee38eca565f5a7cbede4b9883ba3a508fb46c
|
||||
|
||||
Currently, each unmapped page is handled as an individual iommu
|
||||
region notification. Attempt to group contiguous unmap operations
|
||||
into fewer notifications to reduce overhead.
|
||||
|
||||
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Message-Id: <20221028194758.204007-3-mjrosato@linux.ibm.com>
|
||||
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
(cherry picked from commit ef536007c3301bbd6a787e4c2210ea289adaa6f0)
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-pci-inst.c | 51 ++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 51 insertions(+)
|
||||
|
||||
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
|
||||
index 7cc4bcf850..66e764f901 100644
|
||||
--- a/hw/s390x/s390-pci-inst.c
|
||||
+++ b/hw/s390x/s390-pci-inst.c
|
||||
@@ -640,6 +640,8 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
|
||||
}
|
||||
g_hash_table_remove(iommu->iotlb, &entry->iova);
|
||||
inc_dma_avail(iommu);
|
||||
+ /* Don't notify the iommu yet, maybe we can bundle contiguous unmaps */
|
||||
+ goto out;
|
||||
} else {
|
||||
if (cache) {
|
||||
if (cache->perm == entry->perm &&
|
||||
@@ -663,15 +665,44 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
|
||||
dec_dma_avail(iommu);
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * All associated iotlb entries have already been cleared, trigger the
|
||||
+ * unmaps.
|
||||
+ */
|
||||
memory_region_notify_iommu(&iommu->iommu_mr, 0, event);
|
||||
|
||||
out:
|
||||
return iommu->dma_limit ? iommu->dma_limit->avail : 1;
|
||||
}
|
||||
|
||||
+static void s390_pci_batch_unmap(S390PCIIOMMU *iommu, uint64_t iova,
|
||||
+ uint64_t len)
|
||||
+{
|
||||
+ uint64_t remain = len, start = iova, end = start + len - 1, mask, size;
|
||||
+ IOMMUTLBEvent event = {
|
||||
+ .type = IOMMU_NOTIFIER_UNMAP,
|
||||
+ .entry = {
|
||||
+ .target_as = &address_space_memory,
|
||||
+ .translated_addr = 0,
|
||||
+ .perm = IOMMU_NONE,
|
||||
+ },
|
||||
+ };
|
||||
+
|
||||
+ while (remain >= TARGET_PAGE_SIZE) {
|
||||
+ mask = dma_aligned_pow2_mask(start, end, 64);
|
||||
+ size = mask + 1;
|
||||
+ event.entry.iova = start;
|
||||
+ event.entry.addr_mask = mask;
|
||||
+ memory_region_notify_iommu(&iommu->iommu_mr, 0, event);
|
||||
+ start += size;
|
||||
+ remain -= size;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
{
|
||||
CPUS390XState *env = &cpu->env;
|
||||
+ uint64_t iova, coalesce = 0;
|
||||
uint32_t fh;
|
||||
uint16_t error = 0;
|
||||
S390PCIBusDevice *pbdev;
|
||||
@@ -742,6 +773,21 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
break;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * If this is an unmap of a PTE, let's try to coalesce multiple unmaps
|
||||
+ * into as few notifier events as possible.
|
||||
+ */
|
||||
+ if (entry.perm == IOMMU_NONE && entry.len == TARGET_PAGE_SIZE) {
|
||||
+ if (coalesce == 0) {
|
||||
+ iova = entry.iova;
|
||||
+ }
|
||||
+ coalesce += entry.len;
|
||||
+ } else if (coalesce > 0) {
|
||||
+ /* Unleash the coalesced unmap before processing a new map */
|
||||
+ s390_pci_batch_unmap(iommu, iova, coalesce);
|
||||
+ coalesce = 0;
|
||||
+ }
|
||||
+
|
||||
start += entry.len;
|
||||
while (entry.iova < start && entry.iova < end) {
|
||||
if (dma_avail > 0 || entry.perm == IOMMU_NONE) {
|
||||
@@ -759,6 +805,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
|
||||
}
|
||||
}
|
||||
}
|
||||
+ if (coalesce) {
|
||||
+ /* Unleash the coalesced unmap before finishing rpcit */
|
||||
+ s390_pci_batch_unmap(iommu, iova, coalesce);
|
||||
+ coalesce = 0;
|
||||
+ }
|
||||
if (again && dma_avail > 0)
|
||||
goto retry;
|
||||
err:
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,137 +0,0 @@
|
||||
From dda71c431be22772f3241af45b62737c988e85d4 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 23 Jan 2024 13:59:24 +0100
|
||||
Subject: [PATCH 3/3] s390x/pci: drive ISM reset from subsystem reset
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 349: s390x: Fix reset ordering of passthrough ISM devices
|
||||
RH-Jira: RHEL-22411
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Commit: [3/3] 42e89595dd5e24538a2d3f075391b4534497eece
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-22411
|
||||
|
||||
commit 68c691ca99a2538d6a53a70ce8a9ce06ee307ff1
|
||||
Author: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Date: Thu Jan 18 13:51:51 2024 -0500
|
||||
|
||||
s390x/pci: drive ISM reset from subsystem reset
|
||||
|
||||
ISM devices are sensitive to manipulation of the IOMMU, so the ISM device
|
||||
needs to be reset before the vfio-pci device is reset (triggering a full
|
||||
UNMAP). In order to ensure this occurs, trigger ISM device resets from
|
||||
subsystem_reset before triggering the PCI bus reset (which will also
|
||||
trigger vfio-pci reset). This only needs to be done for ISM devices
|
||||
which were enabled for use by the guest.
|
||||
Further, ensure that AIF is disabled as part of the reset event.
|
||||
|
||||
Fixes: ef1535901a ("s390x: do a subsystem reset before the unprotect on reboot")
|
||||
Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset")
|
||||
Reported-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Message-ID: <20240118185151.265329-4-mjrosato@linux.ibm.com>
|
||||
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-pci-bus.c | 26 +++++++++++++++++---------
|
||||
hw/s390x/s390-virtio-ccw.c | 8 ++++++++
|
||||
include/hw/s390x/s390-pci-bus.h | 1 +
|
||||
3 files changed, 26 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
|
||||
index 2d92848b0f..a8953693b9 100644
|
||||
--- a/hw/s390x/s390-pci-bus.c
|
||||
+++ b/hw/s390x/s390-pci-bus.c
|
||||
@@ -160,20 +160,12 @@ static void s390_pci_shutdown_notifier(Notifier *n, void *opaque)
|
||||
pci_device_reset(pbdev->pdev);
|
||||
}
|
||||
|
||||
-static void s390_pci_reset_cb(void *opaque)
|
||||
-{
|
||||
- S390PCIBusDevice *pbdev = opaque;
|
||||
-
|
||||
- pci_device_reset(pbdev->pdev);
|
||||
-}
|
||||
-
|
||||
static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev)
|
||||
{
|
||||
HotplugHandler *hotplug_ctrl;
|
||||
|
||||
if (pbdev->pft == ZPCI_PFT_ISM) {
|
||||
notifier_remove(&pbdev->shutdown_notifier);
|
||||
- qemu_unregister_reset(s390_pci_reset_cb, pbdev);
|
||||
}
|
||||
|
||||
/* Unplug the PCI device */
|
||||
@@ -1137,7 +1129,6 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||
if (pbdev->pft == ZPCI_PFT_ISM) {
|
||||
pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier;
|
||||
qemu_register_shutdown_notifier(&pbdev->shutdown_notifier);
|
||||
- qemu_register_reset(s390_pci_reset_cb, pbdev);
|
||||
}
|
||||
} else {
|
||||
pbdev->fh |= FH_SHM_EMUL;
|
||||
@@ -1284,6 +1275,23 @@ static void s390_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
|
||||
pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1);
|
||||
}
|
||||
|
||||
+void s390_pci_ism_reset(void)
|
||||
+{
|
||||
+ S390pciState *s = s390_get_phb();
|
||||
+
|
||||
+ S390PCIBusDevice *pbdev, *next;
|
||||
+
|
||||
+ /* Trigger reset event for each passthrough ISM device currently in-use */
|
||||
+ QTAILQ_FOREACH_SAFE(pbdev, &s->zpci_devs, link, next) {
|
||||
+ if (pbdev->interp && pbdev->pft == ZPCI_PFT_ISM &&
|
||||
+ pbdev->fh & FH_MASK_ENABLE) {
|
||||
+ s390_pci_kvm_aif_disable(pbdev);
|
||||
+
|
||||
+ pci_device_reset(pbdev->pdev);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void s390_pcihost_reset(DeviceState *dev)
|
||||
{
|
||||
S390pciState *s = S390_PCI_HOST_BRIDGE(dev);
|
||||
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||
index 94434c3bb1..51e5b39888 100644
|
||||
--- a/hw/s390x/s390-virtio-ccw.c
|
||||
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||
@@ -108,6 +108,14 @@ static void subsystem_reset(void)
|
||||
DeviceState *dev;
|
||||
int i;
|
||||
|
||||
+ /*
|
||||
+ * ISM firmware is sensitive to unexpected changes to the IOMMU, which can
|
||||
+ * occur during reset of the vfio-pci device (unmap of entire aperture).
|
||||
+ * Ensure any passthrough ISM devices are reset now, while CPUs are paused
|
||||
+ * but before vfio-pci cleanup occurs.
|
||||
+ */
|
||||
+ s390_pci_ism_reset();
|
||||
+
|
||||
for (i = 0; i < ARRAY_SIZE(reset_dev_types); i++) {
|
||||
dev = DEVICE(object_resolve_path_type("", reset_dev_types[i], NULL));
|
||||
if (dev) {
|
||||
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
|
||||
index 7a658f5e30..2bfad5563a 100644
|
||||
--- a/include/hw/s390x/s390-pci-bus.h
|
||||
+++ b/include/hw/s390x/s390-pci-bus.h
|
||||
@@ -401,5 +401,6 @@ S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
|
||||
const char *target);
|
||||
S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s,
|
||||
S390PCIBusDevice *pbdev);
|
||||
+void s390_pci_ism_reset(void);
|
||||
|
||||
#endif
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,71 +0,0 @@
|
||||
From fe70e87ef8d2f7e538867052e06012051919083f Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Tue, 23 Jan 2024 13:59:24 +0100
|
||||
Subject: [PATCH 2/3] s390x/pci: refresh fh before disabling aif
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 349: s390x: Fix reset ordering of passthrough ISM devices
|
||||
RH-Jira: RHEL-22411
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Commit: [2/3] 4a7d3fccdac508253bd7e5765973a08482022edb
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-22411
|
||||
|
||||
commit 30e35258e25c75c9d799c34fd89afcafffb37084
|
||||
Author: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Date: Thu Jan 18 13:51:50 2024 -0500
|
||||
|
||||
s390x/pci: refresh fh before disabling aif
|
||||
|
||||
Typically we refresh the host fh during CLP enable, however it's possible
|
||||
that the device goes through multiple reset events before the guest
|
||||
performs another CLP enable. Let's handle this for now by refreshing the
|
||||
host handle from vfio before disabling aif.
|
||||
|
||||
Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset")
|
||||
Reported-by: Cédric Le Goater <clg@redhat.com>
|
||||
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Message-ID: <20240118185151.265329-3-mjrosato@linux.ibm.com>
|
||||
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-pci-kvm.c | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c
|
||||
index 1ee510436c..9eef4fc3ec 100644
|
||||
--- a/hw/s390x/s390-pci-kvm.c
|
||||
+++ b/hw/s390x/s390-pci-kvm.c
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "hw/s390x/s390-pci-bus.h"
|
||||
#include "hw/s390x/s390-pci-kvm.h"
|
||||
#include "hw/s390x/s390-pci-inst.h"
|
||||
+#include "hw/s390x/s390-pci-vfio.h"
|
||||
#include "cpu_models.h"
|
||||
|
||||
bool s390_pci_kvm_interp_allowed(void)
|
||||
@@ -64,6 +65,14 @@ int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * The device may have already been reset but we still want to relinquish
|
||||
+ * the guest ISC, so always be sure to use an up-to-date host fh.
|
||||
+ */
|
||||
+ if (!s390_pci_get_host_fh(pbdev, &args.fh)) {
|
||||
+ return -EPERM;
|
||||
+ }
|
||||
+
|
||||
rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
|
||||
if (rc == 0) {
|
||||
pbdev->aif = false;
|
||||
--
|
||||
2.41.0
|
||||
|
@ -1,147 +0,0 @@
|
||||
From 9ec96a236be84e34b16681e658d3910fc3877a44 Mon Sep 17 00:00:00 2001
|
||||
From: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Date: Fri, 9 Dec 2022 14:57:00 -0500
|
||||
Subject: [PATCH 11/11] s390x/pci: reset ISM passthrough devices on shutdown
|
||||
and system reset
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset
|
||||
RH-Bugzilla: 2163713
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [4/4] c857d022c7c2f43cdeb66c4f6acfd9272c925b35
|
||||
|
||||
ISM device firmware stores unique state information that can
|
||||
can cause a wholesale unmap of the associated IOMMU (e.g. when
|
||||
we get a termination signal for QEMU) to trigger firmware errors
|
||||
because firmware believes we are attempting to invalidate entries
|
||||
that are still in-use by the guest OS (when in fact that guest is
|
||||
in the process of being terminated or rebooted).
|
||||
To alleviate this, register both a shutdown notifier (for unexpected
|
||||
termination cases e.g. virsh destroy) as well as a reset callback
|
||||
(for cases like guest OS reboot). For each of these scenarios, trigger
|
||||
PCI device reset; this is enough to indicate to firmware that the IOMMU
|
||||
is no longer in-use by the guest OS, making it safe to invalidate any
|
||||
associated IOMMU entries.
|
||||
|
||||
Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X")
|
||||
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Message-Id: <20221209195700.263824-1-mjrosato@linux.ibm.com>
|
||||
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||
[thuth: Adjusted the hunk in s390-pci-vfio.c due to different context]
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
(cherry picked from commit 03451953c79e6b31f7860ee0c35b28e181d573c1)
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++
|
||||
hw/s390x/s390-pci-vfio.c | 2 ++
|
||||
include/hw/s390x/s390-pci-bus.h | 5 +++++
|
||||
3 files changed, 35 insertions(+)
|
||||
|
||||
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
|
||||
index d8b1e44a02..2d92848b0f 100644
|
||||
--- a/hw/s390x/s390-pci-bus.c
|
||||
+++ b/hw/s390x/s390-pci-bus.c
|
||||
@@ -24,6 +24,8 @@
|
||||
#include "hw/pci/msi.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/module.h"
|
||||
+#include "sysemu/reset.h"
|
||||
+#include "sysemu/runstate.h"
|
||||
|
||||
#ifndef DEBUG_S390PCI_BUS
|
||||
#define DEBUG_S390PCI_BUS 0
|
||||
@@ -150,10 +152,30 @@ out:
|
||||
psccb->header.response_code = cpu_to_be16(rc);
|
||||
}
|
||||
|
||||
+static void s390_pci_shutdown_notifier(Notifier *n, void *opaque)
|
||||
+{
|
||||
+ S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice,
|
||||
+ shutdown_notifier);
|
||||
+
|
||||
+ pci_device_reset(pbdev->pdev);
|
||||
+}
|
||||
+
|
||||
+static void s390_pci_reset_cb(void *opaque)
|
||||
+{
|
||||
+ S390PCIBusDevice *pbdev = opaque;
|
||||
+
|
||||
+ pci_device_reset(pbdev->pdev);
|
||||
+}
|
||||
+
|
||||
static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev)
|
||||
{
|
||||
HotplugHandler *hotplug_ctrl;
|
||||
|
||||
+ if (pbdev->pft == ZPCI_PFT_ISM) {
|
||||
+ notifier_remove(&pbdev->shutdown_notifier);
|
||||
+ qemu_unregister_reset(s390_pci_reset_cb, pbdev);
|
||||
+ }
|
||||
+
|
||||
/* Unplug the PCI device */
|
||||
if (pbdev->pdev) {
|
||||
DeviceState *pdev = DEVICE(pbdev->pdev);
|
||||
@@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||
pbdev->fh |= FH_SHM_VFIO;
|
||||
pbdev->forwarding_assist = false;
|
||||
}
|
||||
+ /* Register shutdown notifier and reset callback for ISM devices */
|
||||
+ if (pbdev->pft == ZPCI_PFT_ISM) {
|
||||
+ pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier;
|
||||
+ qemu_register_shutdown_notifier(&pbdev->shutdown_notifier);
|
||||
+ qemu_register_reset(s390_pci_reset_cb, pbdev);
|
||||
+ }
|
||||
} else {
|
||||
pbdev->fh |= FH_SHM_EMUL;
|
||||
/* Always intercept emulated devices */
|
||||
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
|
||||
index 99806e2a84..69af35f4fe 100644
|
||||
--- a/hw/s390x/s390-pci-vfio.c
|
||||
+++ b/hw/s390x/s390-pci-vfio.c
|
||||
@@ -124,6 +124,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
|
||||
/* The following values remain 0 until we support other FMB formats */
|
||||
pbdev->zpci_fn.fmbl = 0;
|
||||
pbdev->zpci_fn.pft = 0;
|
||||
+ /* Store function type separately for type-specific behavior */
|
||||
+ pbdev->pft = cap->pft;
|
||||
|
||||
/*
|
||||
* If appropriate, reduce the size of the supported DMA aperture reported
|
||||
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
|
||||
index 1c46e3a269..e0a9f9385b 100644
|
||||
--- a/include/hw/s390x/s390-pci-bus.h
|
||||
+++ b/include/hw/s390x/s390-pci-bus.h
|
||||
@@ -39,6 +39,9 @@
|
||||
#define UID_CHECKING_ENABLED 0x01
|
||||
#define ZPCI_DTSM 0x40
|
||||
|
||||
+/* zPCI Function Types */
|
||||
+#define ZPCI_PFT_ISM 5
|
||||
+
|
||||
OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE)
|
||||
OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS)
|
||||
OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE)
|
||||
@@ -344,6 +347,7 @@ struct S390PCIBusDevice {
|
||||
uint16_t noi;
|
||||
uint16_t maxstbl;
|
||||
uint8_t sum;
|
||||
+ uint8_t pft;
|
||||
S390PCIGroup *pci_group;
|
||||
ClpRspQueryPci zpci_fn;
|
||||
S390MsixInfo msix;
|
||||
@@ -352,6 +356,7 @@ struct S390PCIBusDevice {
|
||||
MemoryRegion msix_notify_mr;
|
||||
IndAddr *summary_ind;
|
||||
IndAddr *indicator;
|
||||
+ Notifier shutdown_notifier;
|
||||
bool pci_unplug_request_processed;
|
||||
bool unplug_requested;
|
||||
bool interp;
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,91 +0,0 @@
|
||||
From a0b6c21b555566eb6bc38643269d14c82dfd0226 Mon Sep 17 00:00:00 2001
|
||||
From: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Date: Fri, 28 Oct 2022 15:47:58 -0400
|
||||
Subject: [PATCH 10/11] s390x/pci: shrink DMA aperture to be bound by vfio DMA
|
||||
limit
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset
|
||||
RH-Bugzilla: 2163713
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [3/4] aa241dd250ad5e696b67c87dddc31ee5aaee9c0e
|
||||
|
||||
Currently, s390x-pci performs accounting against the vfio DMA
|
||||
limit and triggers the guest to clean up mappings when the limit
|
||||
is reached. Let's go a step further and also limit the size of
|
||||
the supported DMA aperture reported to the guest based upon the
|
||||
initial vfio DMA limit reported for the container (if less than
|
||||
than the size reported by the firmware/host zPCI layer). This
|
||||
avoids processing sections of the guest DMA table during global
|
||||
refresh that, for common use cases, will never be used anway, and
|
||||
makes exhausting the vfio DMA limit due to mismatch between guest
|
||||
aperture size and host limit far less likely and more indicitive
|
||||
of an error.
|
||||
|
||||
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
Message-Id: <20221028194758.204007-4-mjrosato@linux.ibm.com>
|
||||
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
(cherry picked from commit df202e3ff3fccb49868e08f20d0bda86cb953fbe)
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/s390x/s390-pci-vfio.c | 11 +++++++++++
|
||||
include/hw/s390x/s390-pci-bus.h | 1 +
|
||||
2 files changed, 12 insertions(+)
|
||||
|
||||
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
|
||||
index 2aefa508a0..99806e2a84 100644
|
||||
--- a/hw/s390x/s390-pci-vfio.c
|
||||
+++ b/hw/s390x/s390-pci-vfio.c
|
||||
@@ -84,6 +84,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
|
||||
cnt->users = 1;
|
||||
cnt->avail = avail;
|
||||
QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
|
||||
+ pbdev->iommu->max_dma_limit = avail;
|
||||
return cnt;
|
||||
}
|
||||
|
||||
@@ -103,6 +104,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
|
||||
struct vfio_info_cap_header *hdr;
|
||||
struct vfio_device_info_cap_zpci_base *cap;
|
||||
VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
|
||||
+ uint64_t vfio_size;
|
||||
|
||||
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
|
||||
|
||||
@@ -122,6 +124,15 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
|
||||
/* The following values remain 0 until we support other FMB formats */
|
||||
pbdev->zpci_fn.fmbl = 0;
|
||||
pbdev->zpci_fn.pft = 0;
|
||||
+
|
||||
+ /*
|
||||
+ * If appropriate, reduce the size of the supported DMA aperture reported
|
||||
+ * to the guest based upon the vfio DMA limit.
|
||||
+ */
|
||||
+ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
|
||||
+ if (vfio_size < (cap->end_dma - cap->start_dma + 1)) {
|
||||
+ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
|
||||
+ }
|
||||
}
|
||||
|
||||
static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info,
|
||||
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
|
||||
index 0605fcea24..1c46e3a269 100644
|
||||
--- a/include/hw/s390x/s390-pci-bus.h
|
||||
+++ b/include/hw/s390x/s390-pci-bus.h
|
||||
@@ -278,6 +278,7 @@ struct S390PCIIOMMU {
|
||||
uint64_t g_iota;
|
||||
uint64_t pba;
|
||||
uint64_t pal;
|
||||
+ uint64_t max_dma_limit;
|
||||
GHashTable *iotlb;
|
||||
S390PCIDMACount *dma_limit;
|
||||
};
|
||||
--
|
||||
2.37.3
|
||||
|
@ -1,109 +0,0 @@
|
||||
From 2fc8489b70445a3db0a2e72c1f1edb4d61d404d6 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||
Date: Mon, 16 Jan 2023 18:46:05 +0100
|
||||
Subject: [PATCH] s390x/pv: Implement a CGS check helper
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||
RH-MergeRequest: 271: Secure guest can't boot with maximal number of vcpus (248)
|
||||
RH-Bugzilla: 2187159
|
||||
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
|
||||
RH-Commit: [1/1] c870d525c48ab6d0df964b5abe48efe2528c9883
|
||||
|
||||
When a protected VM is started with the maximum number of CPUs (248),
|
||||
the service call providing information on the CPUs requires more
|
||||
buffer space than allocated and QEMU disgracefully aborts :
|
||||
|
||||
LOADPARM=[........]
|
||||
Using virtio-blk.
|
||||
Using SCSI scheme.
|
||||
...................................................................................
|
||||
qemu-system-s390x: KVM_S390_MEM_OP failed: Argument list too long
|
||||
|
||||
When protected virtualization is initialized, compute the maximum
|
||||
number of vCPUs supported by the machine and return useful information
|
||||
to the user before the machine starts in case of error.
|
||||
|
||||
Suggested-by: Thomas Huth <thuth@redhat.com>
|
||||
Reviewed-by: Thomas Huth <thuth@redhat.com>
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
Message-Id: <20230116174607.2459498-2-clg@kaod.org>
|
||||
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||
(cherry picked from commit 75d7150c636569f6687f7e70a33be893be43eb5f)
|
||||
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||
---
|
||||
hw/s390x/pv.c | 40 ++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 40 insertions(+)
|
||||
|
||||
diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
|
||||
index 728ba24547..749e5db1ce 100644
|
||||
--- a/hw/s390x/pv.c
|
||||
+++ b/hw/s390x/pv.c
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "exec/confidential-guest-support.h"
|
||||
#include "hw/s390x/ipl.h"
|
||||
#include "hw/s390x/pv.h"
|
||||
+#include "hw/s390x/sclp.h"
|
||||
#include "target/s390x/kvm/kvm_s390x.h"
|
||||
|
||||
static bool info_valid;
|
||||
@@ -249,6 +250,41 @@ struct S390PVGuestClass {
|
||||
ConfidentialGuestSupportClass parent_class;
|
||||
};
|
||||
|
||||
+/*
|
||||
+ * If protected virtualization is enabled, the amount of data that the
|
||||
+ * Read SCP Info Service Call can use is limited to one page. The
|
||||
+ * available space also depends on the Extended-Length SCCB (ELS)
|
||||
+ * feature which can take more buffer space to store feature
|
||||
+ * information. This impacts the maximum number of CPUs supported in
|
||||
+ * the machine.
|
||||
+ */
|
||||
+static uint32_t s390_pv_get_max_cpus(void)
|
||||
+{
|
||||
+ int offset_cpu = s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) ?
|
||||
+ offsetof(ReadInfo, entries) : SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET;
|
||||
+
|
||||
+ return (TARGET_PAGE_SIZE - offset_cpu) / sizeof(CPUEntry);
|
||||
+}
|
||||
+
|
||||
+static bool s390_pv_check_cpus(Error **errp)
|
||||
+{
|
||||
+ MachineState *ms = MACHINE(qdev_get_machine());
|
||||
+ uint32_t pv_max_cpus = s390_pv_get_max_cpus();
|
||||
+
|
||||
+ if (ms->smp.max_cpus > pv_max_cpus) {
|
||||
+ error_setg(errp, "Protected VMs support a maximum of %d CPUs",
|
||||
+ pv_max_cpus);
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
+{
|
||||
+ return s390_pv_check_cpus(errp);
|
||||
+}
|
||||
+
|
||||
int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) {
|
||||
@@ -261,6 +297,10 @@ int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
return -1;
|
||||
}
|
||||
|
||||
+ if (!s390_pv_guest_check(cgs, errp)) {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
cgs->ready = true;
|
||||
|
||||
return 0;
|
||||
--
|
||||
2.39.1
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user