Compare commits

...

No commits in common. "imports/c8s-stream-rhel/qemu-kvm-6.2.0-5.module+el8.6.0+14025+ca131e0a" and "c8-stream-rhel" have entirely different histories.

312 changed files with 41997 additions and 4 deletions

4
.gitignore vendored
View File

@ -1 +1,5 @@
SOURCES/qemu-6.2.0.tar.xz
SOURCES/tests_data_acpi_pc_SSDT.dimmpxm
SOURCES/tests_data_acpi_q35_FACP.slic
SOURCES/tests_data_acpi_q35_SSDT.dimmpxm
SOURCES/tests_data_acpi_virt_SSDT.memhp

View File

@ -1 +1,5 @@
68cd61a466170115b88817e2d52db2cd7a92f43a SOURCES/qemu-6.2.0.tar.xz
c4b34092bc5af1ba7febfca1477320fb024e8acd SOURCES/tests_data_acpi_pc_SSDT.dimmpxm
19349e3517143bd1af56a5444e927ba37a111f72 SOURCES/tests_data_acpi_q35_FACP.slic
4632d10ae8cedad4d5d760ed211f83f0dc81005d SOURCES/tests_data_acpi_q35_SSDT.dimmpxm
ef12eed43cc357fb134db6fa3c7ffc83e222a97d SOURCES/tests_data_acpi_virt_SSDT.memhp

View File

@ -0,0 +1,87 @@
From cd49a32e9c9e33efc51652b68180a07683814b4d Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 11 Jul 2022 18:11:12 -0300
Subject: [PATCH 4/9] Add dirty-sync-missed-zero-copy migration stat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 201: Zero-copy-send fixes + improvements
RH-Commit: [4/8] 56cce61cf95aafc8dafae7531b43c166084abfec
RH-Bugzilla: 2110203
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20220711211112.18951-3-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit cf20c897338067ab4b70a4596fdccaf90c7e29a1)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/migration.c | 2 ++
monitor/hmp-cmds.c | 5 +++++
qapi/migration.json | 7 ++++++-
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/migration/migration.c b/migration/migration.c
index e100b30f00..952a26c5c2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1012,6 +1012,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
info->ram->normal_bytes = ram_counters.normal * page_size;
info->ram->mbps = s->mbps;
info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
+ info->ram->dirty_sync_missed_zero_copy =
+ ram_counters.dirty_sync_missed_zero_copy;
info->ram->postcopy_requests = ram_counters.postcopy_requests;
info->ram->page_size = page_size;
info->ram->multifd_bytes = ram_counters.multifd_bytes;
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 8c384dc1b2..f7216ab5d0 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -305,6 +305,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n",
info->ram->postcopy_bytes >> 10);
}
+ if (info->ram->dirty_sync_missed_zero_copy) {
+ monitor_printf(mon,
+ "Zero-copy-send fallbacks happened: %" PRIu64 " times\n",
+ info->ram->dirty_sync_missed_zero_copy);
+ }
}
if (info->has_disk) {
diff --git a/qapi/migration.json b/qapi/migration.json
index c8ec260ab0..94bc5c69db 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -55,6 +55,10 @@
# @postcopy-bytes: The number of bytes sent during the post-copy phase
# (since 7.0).
#
+# @dirty-sync-missed-zero-copy: Number of times dirty RAM synchronization could
+# not avoid copying dirty pages. This is between
+# 0 and @dirty-sync-count * @multifd-channels.
+# (since 7.1)
# Since: 0.14
##
{ 'struct': 'MigrationStats',
@@ -65,7 +69,8 @@
'postcopy-requests' : 'int', 'page-size' : 'int',
'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64',
'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64',
- 'postcopy-bytes' : 'uint64' } }
+ 'postcopy-bytes' : 'uint64',
+ 'dirty-sync-missed-zero-copy' : 'uint64' } }
##
# @XBZRLECacheStats:
--
2.31.1

View File

@ -0,0 +1,28 @@
From db6e042fe4fdc1a1bbf562a46b15d4d8e33e2fa6 Mon Sep 17 00:00:00 2001
From: Paul Lai <plai@redhat.com>
Date: Tue, 25 Jan 2022 15:16:22 -0500
Subject: [PATCH 4/7] Enable SGX -- RH Only
RH-Author: Paul Lai <None>
RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections
RH-Commit: [4/5] cea874f29984897ef1232fb7749c13203c888034
RH-Bugzilla: 1518984
RH-Acked-by: Paolo Bonzini <None>
RH-Acked-by: Bandan Das <None>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
---
configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 +
1 file changed, 1 insertion(+)
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
index ddf036f042..fdbbdf9742 100644
--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
@@ -102,3 +102,4 @@ CONFIG_TPM_CRB=y
CONFIG_TPM_TIS_ISA=y
CONFIG_TPM_EMULATOR=y
CONFIG_TPM_PASSTHROUGH=y
+CONFIG_SGX=y
--
2.27.0

View File

@ -0,0 +1,82 @@
From 9bacf8c4104ff3cff2e0e2c2179ec4fda633167f Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Mon, 16 Jan 2023 07:51:08 -0500
Subject: [PATCH 05/11] KVM: keep track of running ioctls
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 247: accel: introduce accelerator blocker API
RH-Bugzilla: 2161188
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/3] 357508389e2a0fd996206b406e9e235e50b5f0b6
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188
commit a27dd2de68f37ba96fe164a42121daa5f0750afc
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Fri Nov 11 10:47:57 2022 -0500
KVM: keep track of running ioctls
Using the new accel-blocker API, mark where ioctls are being called
in KVM. Next, we will implement the critical section that will take
care of performing memslots modifications atomically, therefore
preventing any new ioctl from running and allowing the running ones
to finish.
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Message-Id: <20221111154758.1372674-3-eesposit@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
accel/kvm/kvm-all.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 8f2a53438f..221aadfda7 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2337,6 +2337,7 @@ static int kvm_init(MachineState *ms)
assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size);
s->sigmask_len = 8;
+ accel_blocker_init();
#ifdef KVM_CAP_SET_GUEST_DEBUG
QTAILQ_INIT(&s->kvm_sw_breakpoints);
@@ -3018,7 +3019,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
va_end(ap);
trace_kvm_vm_ioctl(type, arg);
+ accel_ioctl_begin();
ret = ioctl(s->vmfd, type, arg);
+ accel_ioctl_end();
if (ret == -1) {
ret = -errno;
}
@@ -3036,7 +3039,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
va_end(ap);
trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
+ accel_cpu_ioctl_begin(cpu);
ret = ioctl(cpu->kvm_fd, type, arg);
+ accel_cpu_ioctl_end(cpu);
if (ret == -1) {
ret = -errno;
}
@@ -3054,7 +3059,9 @@ int kvm_device_ioctl(int fd, int type, ...)
va_end(ap);
trace_kvm_device_ioctl(fd, type, arg);
+ accel_ioctl_begin();
ret = ioctl(fd, type, arg);
+ accel_ioctl_end();
if (ret == -1) {
ret = -errno;
}
--
2.37.3

View File

@ -0,0 +1,109 @@
From ea5299b5dde7d0b6b2f93cb646e6a24c9f105466 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 23 Mar 2022 12:33:25 +0100
Subject: [PATCH 13/24] KVM: x86: workaround invalid CPUID[0xD,9] info on some
AMD processors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paul Lai <plai@redhat.com>
RH-MergeRequest: 176: Enable KVM AMX support
RH-Commit: [13/13] 38f147c911258e84e01336271ebd23a1c24371fc
RH-Bugzilla: 1916415
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Some AMD processors expose the PKRU extended save state even if they do not have
the related PKU feature in CPUID. Worse, when they do they report a size of
64, whereas the expected size of the PKRU extended save state is 8, therefore
the esa->size == eax assertion does not hold.
The state is already ignored by KVM_GET_SUPPORTED_CPUID because it
was not enabled in the host XCR0. However, QEMU kvm_cpu_xsave_init()
runs before QEMU invokes arch_prctl() to enable dynamically-enabled
save states such as XTILEDATA, and KVM_GET_SUPPORTED_CPUID hides save
states that have yet to be enabled. Therefore, kvm_cpu_xsave_init()
needs to consult the host CPUID instead of KVM_GET_SUPPORTED_CPUID,
and dies with an assertion failure.
When setting up the ExtSaveArea array to match the host, ignore features that
KVM does not report as supported. This will cause QEMU to skip the incorrect
CPUID leaf instead of tripping the assertion.
Closes: https://gitlab.com/qemu-project/qemu/-/issues/916
Reported-by: Daniel P. Berrangé <berrange@redhat.com>
Analyzed-by: Yang Zhong <yang.zhong@intel.com>
Reported-by: Peter Krempa <pkrempa@redhat.com>
Tested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 58f7db26f21c690cf9a669c314cfd7371506084a)
Signed-off-by: Paul Lai <plai@redhat.com>
---
target/i386/cpu.c | 4 ++--
target/i386/cpu.h | 2 ++
target/i386/kvm/kvm-cpu.c | 19 ++++++++++++-------
3 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 09e08f7f38..0543b846ff 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4980,8 +4980,8 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
return cpu_list;
}
-static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
- bool migratable_only)
+uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
+ bool migratable_only)
{
FeatureWordInfo *wi = &feature_word_info[w];
uint64_t r = 0;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8ab2a4042a..006b735fe4 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -604,6 +604,8 @@ typedef enum FeatureWord {
} FeatureWord;
typedef uint64_t FeatureWordArray[FEATURE_WORDS];
+uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
+ bool migratable_only);
/* cpuid_features bits */
#define CPUID_FP87 (1U << 0)
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index bdc967c484..74c1396a93 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -99,13 +99,18 @@ static void kvm_cpu_xsave_init(void)
for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) {
ExtSaveArea *esa = &x86_ext_save_areas[i];
- if (esa->size) {
- host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
- if (eax != 0) {
- assert(esa->size == eax);
- esa->offset = ebx;
- esa->ecx = ecx;
- }
+ if (!esa->size) {
+ continue;
+ }
+ if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits)
+ != esa->bits) {
+ continue;
+ }
+ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
+ if (eax != 0) {
+ assert(esa->size == eax);
+ esa->offset = ebx;
+ esa->ecx = ecx;
}
}
}
--
2.35.3

View File

@ -0,0 +1,181 @@
From 440ee491240f2f02f9a6082d8aad98d88c1039dd Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 15 Jan 2024 14:00:04 +0100
Subject: [PATCH 1/5] MAINTAINERS: split out s390x sections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 348: s390x: Provide some more useful information if decryption of a PV image fails
RH-Jira: RHEL-18214
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Commit: [1/5] a71a3c11922481f97c36570e361088d17474e481
JIRA: https://issues.redhat.com/browse/RHEL-18214
commit 56e34834029c7c6862cb0095d95ad83c50485f88
Author: Cornelia Huck <cohuck@redhat.com>
Date: Wed Dec 22 11:55:48 2021 +0100
MAINTAINERS: split out s390x sections
Split out some more specialized devices etc., so that we can build
smarter lists of people to be put on cc: in the future.
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Thomas Huth <thuth@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Eric Farman <farman@linux.ibm.com>
Message-Id: <20211222105548.356852-1-cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
MAINTAINERS | 85 ++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 74 insertions(+), 11 deletions(-)
diff --git a/MAINTAINERS b/MAINTAINERS
index 7543eb4d59..b893206fc3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -297,7 +297,6 @@ M: David Hildenbrand <david@redhat.com>
S: Maintained
F: target/s390x/
F: target/s390x/tcg
-F: target/s390x/cpu_models_*.[ch]
F: hw/s390x/
F: disas/s390.c
F: tests/tcg/s390x/
@@ -396,16 +395,10 @@ M: Halil Pasic <pasic@linux.ibm.com>
M: Christian Borntraeger <borntraeger@de.ibm.com>
S: Supported
F: target/s390x/kvm/
-F: target/s390x/ioinst.[ch]
F: target/s390x/machine.c
F: target/s390x/sigp.c
-F: target/s390x/cpu_features*.[ch]
-F: target/s390x/cpu_models.[ch]
F: hw/s390x/pv.c
F: include/hw/s390x/pv.h
-F: hw/intc/s390_flic.c
-F: hw/intc/s390_flic_kvm.c
-F: include/hw/s390x/s390_flic.h
F: gdb-xml/s390*.xml
T: git https://github.com/borntraeger/qemu.git s390-next
L: qemu-s390x@nongnu.org
@@ -1529,12 +1522,8 @@ S390 Virtio-ccw
M: Halil Pasic <pasic@linux.ibm.com>
M: Christian Borntraeger <borntraeger@de.ibm.com>
S: Supported
-F: hw/char/sclp*.[hc]
-F: hw/char/terminal3270.c
F: hw/s390x/
F: include/hw/s390x/
-F: hw/watchdog/wdt_diag288.c
-F: include/hw/watchdog/wdt_diag288.h
F: configs/devices/s390x-softmmu/default.mak
F: tests/avocado/machine_s390_ccw_virtio.py
T: git https://github.com/borntraeger/qemu.git s390-next
@@ -1559,6 +1548,37 @@ F: hw/s390x/s390-pci*
F: include/hw/s390x/s390-pci*
L: qemu-s390x@nongnu.org
+S390 channel subsystem
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Supported
+F: hw/s390x/ccw-device.[ch]
+F: hw/s390x/css.c
+F: hw/s390x/css-bridge.c
+F: include/hw/s390x/css.h
+F: include/hw/s390x/css-bridge.h
+F: include/hw/s390x/ioinst.h
+F: target/s390x/ioinst.c
+L: qemu-s390x@nongnu.org
+
+S390 CPU models
+M: David Hildenbrand <david@redhat.com>
+S: Maintained
+F: target/s390x/cpu_features*.[ch]
+F: target/s390x/cpu_models.[ch]
+L: qemu-s390x@nongnu.org
+
+S390 SCLP-backed devices
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Supported
+F: include/hw/s390x/event-facility.h
+F: include/hw/s390x/sclp.h
+F: hw/char/sclp*.[hc]
+F: hw/s390x/event-facility.c
+F: hw/s390x/sclp*.c
+L: qemu-s390x@nongnu.org
+
X86 Machines
------------
PC
@@ -1956,6 +1976,7 @@ M: Halil Pasic <pasic@linux.ibm.com>
S: Supported
F: hw/s390x/virtio-ccw*.[hc]
F: hw/s390x/vhost-vsock-ccw.c
+F: hw/s390x/vhost-user-fs-ccw.c
T: git https://gitlab.com/cohuck/qemu.git s390-next
T: git https://github.com/borntraeger/qemu.git s390-next
L: qemu-s390x@nongnu.org
@@ -2294,6 +2315,48 @@ F: hw/timer/mips_gictimer.c
F: include/hw/intc/mips_gic.h
F: include/hw/timer/mips_gictimer.h
+S390 3270 device
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Odd fixes
+F: include/hw/s390x/3270-ccw.h
+F: hw/char/terminal3270.c
+F: hw/s390x/3270-ccw.c
+L: qemu-s390x@nongnu.org
+
+S390 diag 288 watchdog
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Supported
+F: hw/watchdog/wdt_diag288.c
+F: include/hw/watchdog/wdt_diag288.h
+L: qemu-s390x@nongnu.org
+
+S390 storage key device
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Supported
+F: hw/s390x/storage-keys.h
+F: hw/390x/s390-skeys*.c
+L: qemu-s390x@nongnu.org
+
+S390 storage attribute device
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+S: Supported
+F: hw/s390x/storage-attributes.h
+F: hw/s390/s390-stattrib*.c
+L: qemu-s390x@nongnu.org
+
+S390 floating interrupt controller
+M: Halil Pasic <pasic@linux.ibm.com>
+M: Christian Borntraeger <borntraeger@linux.ibm.com>
+M: David Hildenbrand <david@redhat.com>
+S: Supported
+F: hw/intc/s390_flic*.c
+F: include/hw/s390x/s390_flic.h
+L: qemu-s390x@nongnu.org
+
Subsystems
----------
Overall Audio backends
--
2.41.0

View File

@ -0,0 +1,420 @@
From 7eeec7c008e947bc3e1fed682791092b408852c6 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Wed, 18 May 2022 02:52:24 -0300
Subject: [PATCH 17/37] QIOChannel: Add flags on io_writev and introduce
io_flush callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
RH-Commit: [17/26] 7bde4e79fd3f76a6cc84d9cacf50420584ddd35c
RH-Bugzilla: 2072049
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Add flags to io_writev and introduce io_flush as optional callback to
QIOChannelClass, allowing the implementation of zero copy writes by
subclasses.
How to use them:
- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY),
- Wait write completion with qio_channel_flush().
Notes:
As some zero copy write implementations work asynchronously, it's
recommended to keep the write buffer untouched until the return of
qio_channel_flush(), to avoid the risk of sending an updated buffer
instead of the buffer state during write.
As io_flush callback is optional, if a subclass does not implement it, then:
- io_flush will return 0 without changing anything.
Also, some functions like qio_channel_writev_full_all() were adapted to
receive a flag parameter. That allows shared code between zero copy and
non-zero copy writev, and also an easier implementation on new flags.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20220513062836.965425-3-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
chardev/char-io.c | 2 +-
hw/remote/mpqemu-link.c | 2 +-
include/io/channel.h | 38 +++++++++++++++++++++-
io/channel-buffer.c | 1 +
io/channel-command.c | 1 +
io/channel-file.c | 1 +
io/channel-socket.c | 2 ++
io/channel-tls.c | 1 +
io/channel-websock.c | 1 +
io/channel.c | 49 +++++++++++++++++++++++------
migration/rdma.c | 1 +
scsi/pr-manager-helper.c | 2 +-
tests/unit/test-io-channel-socket.c | 1 +
13 files changed, 88 insertions(+), 14 deletions(-)
diff --git a/chardev/char-io.c b/chardev/char-io.c
index 8ced184160..4451128cba 100644
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc,
ret = qio_channel_writev_full(
ioc, &iov, 1,
- fds, nfds, NULL);
+ fds, nfds, 0, NULL);
if (ret == QIO_CHANNEL_ERR_BLOCK) {
if (offset) {
return offset;
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index 7e841820e5..e8f556bd27 100644
--- a/hw/remote/mpqemu-link.c
+++ b/hw/remote/mpqemu-link.c
@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
}
if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send),
- fds, nfds, errp)) {
+ fds, nfds, 0, errp)) {
ret = true;
} else {
trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds);
diff --git a/include/io/channel.h b/include/io/channel.h
index 88988979f8..c680ee7480 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
#define QIO_CHANNEL_ERR_BLOCK -2
+#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
+
typedef enum QIOChannelFeature QIOChannelFeature;
enum QIOChannelFeature {
QIO_CHANNEL_FEATURE_FD_PASS,
QIO_CHANNEL_FEATURE_SHUTDOWN,
QIO_CHANNEL_FEATURE_LISTEN,
+ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
};
@@ -104,6 +107,7 @@ struct QIOChannelClass {
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp);
ssize_t (*io_readv)(QIOChannel *ioc,
const struct iovec *iov,
@@ -136,6 +140,8 @@ struct QIOChannelClass {
IOHandler *io_read,
IOHandler *io_write,
void *opaque);
+ int (*io_flush)(QIOChannel *ioc,
+ Error **errp);
};
/* General I/O handling functions */
@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
* @niov: the length of the @iov array
* @fds: an array of file handles to send
* @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
* @errp: pointer to a NULL-initialized error object
*
* Write data to the IO channel, reading it from the
@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp);
/**
@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
* @niov: the length of the @iov array
* @fds: an array of file handles to send
* @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
* @errp: pointer to a NULL-initialized error object
*
*
@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
* to be written, yielding from the current coroutine
* if required.
*
+ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags,
+ * instead of waiting for all requested data to be written,
+ * this function will wait until it's all queued for writing.
+ * In this case, if the buffer gets changed between queueing and
+ * sending, the updated buffer will be sent. If this is not a
+ * desired behavior, it's suggested to call qio_channel_flush()
+ * before reusing the buffer.
+ *
* Returns: 0 if all bytes were written, or -1 on error
*/
@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
const struct iovec *iov,
size_t niov,
int *fds, size_t nfds,
- Error **errp);
+ int flags, Error **errp);
+
+/**
+ * qio_channel_flush:
+ * @ioc: the channel object
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Will block until every packet queued with
+ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY
+ * is sent, or return in case of any error.
+ *
+ * If not implemented, acts as a no-op, and returns 0.
+ *
+ * Returns -1 if any error is found,
+ * 1 if every send failed to use zero copy.
+ * 0 otherwise.
+ */
+
+int qio_channel_flush(QIOChannel *ioc,
+ Error **errp);
#endif /* QIO_CHANNEL_H */
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
index baa4e2b089..bf52011be2 100644
--- a/io/channel-buffer.c
+++ b/io/channel-buffer.c
@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
diff --git a/io/channel-command.c b/io/channel-command.c
index b2a9e27138..5ff1691bad 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
diff --git a/io/channel-file.c b/io/channel-file.c
index c4bf799a80..348a48545e 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 606ec97cf7..bfbd64787e 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 2ae1b92fc0..4ce890a538 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
diff --git a/io/channel-websock.c b/io/channel-websock.c
index 70889bb54d..035dd6075b 100644
--- a/io/channel-websock.c
+++ b/io/channel-websock.c
@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
diff --git a/io/channel.c b/io/channel.c
index e8b019dc36..0640941ac5 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
- if ((fds || nfds) &&
- !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
+ if (fds || nfds) {
+ if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
+ error_setg_errno(errp, EINVAL,
+ "Channel does not support file descriptor passing");
+ return -1;
+ }
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+ error_setg_errno(errp, EINVAL,
+ "Zero Copy does not support file descriptor passing");
+ return -1;
+ }
+ }
+
+ if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) &&
+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
error_setg_errno(errp, EINVAL,
- "Channel does not support file descriptor passing");
+ "Requested Zero Copy feature is not available");
return -1;
}
- return klass->io_writev(ioc, iov, niov, fds, nfds, errp);
+ return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp);
}
@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc,
size_t niov,
Error **errp)
{
- return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp);
+ return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp);
}
int qio_channel_writev_full_all(QIOChannel *ioc,
const struct iovec *iov,
size_t niov,
int *fds, size_t nfds,
- Error **errp)
+ int flags, Error **errp)
{
int ret = -1;
struct iovec *local_iov = g_new(struct iovec, niov);
@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
while (nlocal_iov > 0) {
ssize_t len;
- len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds,
- errp);
+
+ len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds,
+ nfds, flags, errp);
+
if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) {
qio_channel_yield(ioc, G_IO_OUT);
@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc,
size_t niov,
Error **errp)
{
- return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp);
+ return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp);
}
@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc,
Error **errp)
{
struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
- return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp);
+ return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp);
}
@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc,
return klass->io_seek(ioc, offset, whence, errp);
}
+int qio_channel_flush(QIOChannel *ioc,
+ Error **errp)
+{
+ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+
+ if (!klass->io_flush ||
+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
+ return 0;
+ }
+
+ return klass->io_flush(ioc, errp);
+}
+
static void qio_channel_restart_read(void *opaque)
{
diff --git a/migration/rdma.c b/migration/rdma.c
index f5d3bbe7e9..54acd2000e 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2833,6 +2833,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c
index 451c7631b7..3be52a98d5 100644
--- a/scsi/pr-manager-helper.c
+++ b/scsi/pr-manager-helper.c
@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr,
iov.iov_base = (void *)buf;
iov.iov_len = sz;
n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1,
- nfds ? &fd : NULL, nfds, errp);
+ nfds ? &fd : NULL, nfds, 0, errp);
if (n_written <= 0) {
assert(n_written != QIO_CHANNEL_ERR_BLOCK);
diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
index c49eec1f03..6713886d02 100644
--- a/tests/unit/test-io-channel-socket.c
+++ b/tests/unit/test-io-channel-socket.c
@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void)
G_N_ELEMENTS(iosend),
fdsend,
G_N_ELEMENTS(fdsend),
+ 0,
&error_abort);
qio_channel_readv_full(dst,
--
2.35.3

View File

@ -0,0 +1,56 @@
From a6c4aed18a027ce8e107fdf9184e9ea43a86f843 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Thu, 4 Aug 2022 04:10:43 -0300
Subject: [PATCH 8/9] QIOChannelSocket: Add support for MSG_ZEROCOPY + IPV6
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 201: Zero-copy-send fixes + improvements
RH-Commit: [8/8] 6e26ee7c9ebaedb07623313cb0678816867751dd
RH-Bugzilla: 2110203
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
For using MSG_ZEROCOPY, there are two steps:
1 - io_writev() the packet, which enqueues the packet for sending, and
2 - io_flush(), which gets confirmation that all packets got correctly sent
Currently, if MSG_ZEROCOPY is used to send packets over IPV6, no error will
be reported in (1), but it will fail in the first time (2) happens.
This happens because (2) currently checks for cmsg_level & cmsg_type
associated with IPV4 only, before reporting any error.
Add checks for cmsg_level & cmsg_type associated with IPV6, and thus enable
support for MSG_ZEROCOPY + IPV6
Fixes: 2bc58ffc29 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
(cherry picked from commit 5258a7e2c0677d16e9e1d06845f60171adf0b290)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
io/channel-socket.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/io/channel-socket.c b/io/channel-socket.c
index cf0d67c51b..6010ad7017 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -747,8 +747,8 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
}
cm = CMSG_FIRSTHDR(&msg);
- if (cm->cmsg_level != SOL_IP &&
- cm->cmsg_type != IP_RECVERR) {
+ if (cm->cmsg_level != SOL_IP && cm->cmsg_type != IP_RECVERR &&
+ cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) {
error_setg_errno(errp, EPROTOTYPE,
"Wrong cmsg in errqueue");
return -1;
--
2.31.1

View File

@ -0,0 +1,65 @@
From 905cc8032fc63619efb3f0a8c9754b7190bcc43a Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 11 Jul 2022 18:11:11 -0300
Subject: [PATCH 3/9] QIOChannelSocket: Fix zero-copy flush returning code 1
when nothing sent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 201: Zero-copy-send fixes + improvements
RH-Commit: [3/8] 1ad707702fa26cd4d0fa1870c21f5f26ae93ff97
RH-Bugzilla: 2110203
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
If flush is called when no buffer was sent with MSG_ZEROCOPY, it currently
returns 1. This return code should be used only when Linux fails to use
MSG_ZEROCOPY on a lot of sendmsg().
Fix this by returning early from flush if no sendmsg(...,MSG_ZEROCOPY)
was attempted.
Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Acked-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-Id: <20220711211112.18951-2-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 927f93e099c4f9184e60a1bc61624ac2d04d0223)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
io/channel-socket.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/io/channel-socket.c b/io/channel-socket.c
index df858da924..cf0d67c51b 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -717,12 +717,18 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
struct cmsghdr *cm;
char control[CMSG_SPACE(sizeof(*serr))];
int received;
- int ret = 1;
+ int ret;
+
+ if (sioc->zero_copy_queued == sioc->zero_copy_sent) {
+ return 0;
+ }
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
memset(control, 0, sizeof(control));
+ ret = 1;
+
while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
if (received < 0) {
--
2.31.1

View File

@ -0,0 +1,58 @@
From c1fd32d93ae42fcf3c1a25f4d56e669f251087d8 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 20 Jun 2022 02:39:43 -0300
Subject: [PATCH 25/37] QIOChannelSocket: Fix zero-copy send so socket flush
works
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
RH-Commit: [25/26] 3ede94f3269e21c3ace073ed1a6f24696315bcbb
RH-Bugzilla: 2072049
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
part of the flushing mechanism got missing: incrementing zero_copy_queued.
Without that, the flushing interface becomes a no-op, and there is no
guarantee the buffer is really sent.
This can go as bad as causing a corruption in RAM during migration.
Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
Reported-by: 徐闯 <xuchuangxclwt@bytedance.com>
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
io/channel-socket.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 7d37b39de7..df858da924 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
"Unable to write to socket");
return -1;
}
+
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+ sioc->zero_copy_queued++;
+ }
+
return ret;
}
#else /* WIN32 */
--
2.35.3

View File

@ -0,0 +1,249 @@
From 5fd7af93a06adaddbae719aabbaf912159f4fb28 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Wed, 18 May 2022 02:52:25 -0300
Subject: [PATCH 18/37] QIOChannelSocket: Implement io_writev zero copy flag &
io_flush for CONFIG_LINUX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
RH-Commit: [18/26] 6f65c8c879a5df57213b541d58285b65178f8547
RH-Bugzilla: 2072049
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
For CONFIG_LINUX, implement the new zero copy flag and the optional callback
io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY
feature is available in the host kernel, which is checked on
qio_channel_socket_connect_sync()
qio_channel_socket_flush() was implemented by counting how many times
sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the
socket's error queue, in order to find how many of them finished sending.
Flush will loop until those counters are the same, or until some error occurs.
Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY:
1: Buffer
- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying,
some caution is necessary to avoid overwriting any buffer before it's sent.
If something like this happen, a newer version of the buffer may be sent instead.
- If this is a problem, it's recommended to call qio_channel_flush() before freeing
or re-using the buffer.
2: Locked memory
- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and
unlocked after it's sent.
- Depending on the size of each buffer, and how often it's sent, it may require
a larger amount of locked memory than usually available to non-root user.
- If the required amount of locked memory is not available, writev_zero_copy
will return an error, which can abort an operation like migration,
- Because of this, when an user code wants to add zero copy as a feature, it
requires a mechanism to disable it, so it can still be accessible to less
privileged users.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20220513062836.965425-4-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
include/io/channel-socket.h | 2 +
io/channel-socket.c | 116 ++++++++++++++++++++++++++++++++++--
2 files changed, 114 insertions(+), 4 deletions(-)
diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
index e747e63514..513c428fe4 100644
--- a/include/io/channel-socket.h
+++ b/include/io/channel-socket.h
@@ -47,6 +47,8 @@ struct QIOChannelSocket {
socklen_t localAddrLen;
struct sockaddr_storage remoteAddr;
socklen_t remoteAddrLen;
+ ssize_t zero_copy_queued;
+ ssize_t zero_copy_sent;
};
diff --git a/io/channel-socket.c b/io/channel-socket.c
index bfbd64787e..38a46ba213 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -26,6 +26,14 @@
#include "io/channel-watch.h"
#include "trace.h"
#include "qapi/clone-visitor.h"
+#ifdef CONFIG_LINUX
+#include <linux/errqueue.h>
+#include <sys/socket.h>
+
+#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY))
+#define QEMU_MSG_ZEROCOPY
+#endif
+#endif
#define SOCKET_MAX_FDS 16
@@ -55,6 +63,8 @@ qio_channel_socket_new(void)
sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
sioc->fd = -1;
+ sioc->zero_copy_queued = 0;
+ sioc->zero_copy_sent = 0;
ioc = QIO_CHANNEL(sioc);
qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
return -1;
}
+#ifdef QEMU_MSG_ZEROCOPY
+ int ret, v = 1;
+ ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v));
+ if (ret == 0) {
+ /* Zero copy available on host */
+ qio_channel_set_feature(QIO_CHANNEL(ioc),
+ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY);
+ }
+#endif
+
return 0;
}
@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
size_t fdsize = sizeof(int) * nfds;
struct cmsghdr *cmsg;
+ int sflags = 0;
memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
memcpy(CMSG_DATA(cmsg), fds, fdsize);
}
+#ifdef QEMU_MSG_ZEROCOPY
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+ sflags = MSG_ZEROCOPY;
+ }
+#endif
+
retry:
- ret = sendmsg(sioc->fd, &msg, 0);
+ ret = sendmsg(sioc->fd, &msg, sflags);
if (ret <= 0) {
- if (errno == EAGAIN) {
+ switch (errno) {
+ case EAGAIN:
return QIO_CHANNEL_ERR_BLOCK;
- }
- if (errno == EINTR) {
+ case EINTR:
goto retry;
+#ifdef QEMU_MSG_ZEROCOPY
+ case ENOBUFS:
+ if (sflags & MSG_ZEROCOPY) {
+ error_setg_errno(errp, errno,
+ "Process can't lock enough memory for using MSG_ZEROCOPY");
+ return -1;
+ }
+ break;
+#endif
}
+
error_setg_errno(errp, errno,
"Unable to write to socket");
return -1;
@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
}
#endif /* WIN32 */
+
+#ifdef QEMU_MSG_ZEROCOPY
+static int qio_channel_socket_flush(QIOChannel *ioc,
+ Error **errp)
+{
+ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+ struct msghdr msg = {};
+ struct sock_extended_err *serr;
+ struct cmsghdr *cm;
+ char control[CMSG_SPACE(sizeof(*serr))];
+ int received;
+ int ret = 1;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+ memset(control, 0, sizeof(control));
+
+ while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
+ received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
+ if (received < 0) {
+ switch (errno) {
+ case EAGAIN:
+ /* Nothing on errqueue, wait until something is available */
+ qio_channel_wait(ioc, G_IO_ERR);
+ continue;
+ case EINTR:
+ continue;
+ default:
+ error_setg_errno(errp, errno,
+ "Unable to read errqueue");
+ return -1;
+ }
+ }
+
+ cm = CMSG_FIRSTHDR(&msg);
+ if (cm->cmsg_level != SOL_IP &&
+ cm->cmsg_type != IP_RECVERR) {
+ error_setg_errno(errp, EPROTOTYPE,
+ "Wrong cmsg in errqueue");
+ return -1;
+ }
+
+ serr = (void *) CMSG_DATA(cm);
+ if (serr->ee_errno != SO_EE_ORIGIN_NONE) {
+ error_setg_errno(errp, serr->ee_errno,
+ "Error on socket");
+ return -1;
+ }
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
+ error_setg_errno(errp, serr->ee_origin,
+ "Error not from zero copy");
+ return -1;
+ }
+
+ /* No errors, count successfully finished sendmsg()*/
+ sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
+
+ /* If any sendmsg() succeeded using zero copy, return 0 at the end */
+ if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) {
+ ret = 0;
+ }
+ }
+
+ return ret;
+}
+
+#endif /* QEMU_MSG_ZEROCOPY */
+
static int
qio_channel_socket_set_blocking(QIOChannel *ioc,
bool enabled,
@@ -789,6 +894,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
ioc_klass->io_set_delay = qio_channel_socket_set_delay;
ioc_klass->io_create_watch = qio_channel_socket_create_watch;
ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
+#ifdef QEMU_MSG_ZEROCOPY
+ ioc_klass->io_flush = qio_channel_socket_flush;
+#endif
}
static const TypeInfo qio_channel_socket_info = {
--
2.35.3

View File

@ -0,0 +1,82 @@
From cbfaf86331c2b2e01a2083303b7554672bf991b7 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 20 Jun 2022 02:39:42 -0300
Subject: [PATCH 24/37] QIOChannelSocket: Introduce assert and reduce ifdefs to
improve readability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
RH-Commit: [24/26] b50e2e65307149f247155a7f7a032dc99e57718d
RH-Bugzilla: 2072049
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were
introduced, particularly at qio_channel_socket_writev().
Rewrite some of those changes so it's easier to read.
Also, introduce an assert to help detect incorrect zero-copy usage is when
it's disabled on build.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached
(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
io/channel-socket.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 38a46ba213..7d37b39de7 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
memcpy(CMSG_DATA(cmsg), fds, fdsize);
}
-#ifdef QEMU_MSG_ZEROCOPY
if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+#ifdef QEMU_MSG_ZEROCOPY
sflags = MSG_ZEROCOPY;
- }
+#else
+ /*
+ * We expect QIOChannel class entry point to have
+ * blocked this code path already
+ */
+ g_assert_not_reached();
#endif
+ }
retry:
ret = sendmsg(sioc->fd, &msg, sflags);
@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
return QIO_CHANNEL_ERR_BLOCK;
case EINTR:
goto retry;
-#ifdef QEMU_MSG_ZEROCOPY
case ENOBUFS:
- if (sflags & MSG_ZEROCOPY) {
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
error_setg_errno(errp, errno,
"Process can't lock enough memory for using MSG_ZEROCOPY");
return -1;
}
break;
-#endif
}
error_setg_errno(errp, errno,
--
2.35.3

View File

@ -0,0 +1,43 @@
From f1480fe9a4054113ddacd218961e29f31c33d329 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 6 Sep 2023 16:29:23 -0400
Subject: [PATCH 2/3] RHEL: Enable "x-not-migrate-acpi-index" for all pre-RHEL8
guests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 343: acpi: fix acpi_index migration
RH-Jira: RHEL-20189
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: Prasad Pandit <None>
RH-Commit: [2/2] 0a26a71236e68dd7feb5d2063254090e3852d6ba
The acpi index migration is simply broken before for all pre-RHEL8
branches. Don't migrate it for all of them.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
hw/core/machine.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 2724f6848a..6650a3d7b7 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -44,6 +44,10 @@ GlobalProperty hw_compat_rhel_8_6[] = {
* we need do disable it downstream on the latest hw_compat_rhel_8.
*/
{ "vhost-vsock-device", "seqpacket", "off" },
+ /*
+ * RHEL-2186: all rhel8 machines should not migrate acpi index.
+ */
+ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"},
};
const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6);
--
2.41.0

View File

@ -0,0 +1,107 @@
From e0e4f01c6f4fb5881960f72ae4e80951b711131e Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 24 Mar 2022 16:04:57 +0100
Subject: [PATCH 1/5] RHEL: disable "seqpacket" for "vhost-vsock-device" in
rhel8.6.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 136: RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 [rhel-8.7.0]
RH-Commit: [1/1] d82ea09e123679521503689f7d9af1c03dc71bfc
RH-Bugzilla: 2068202
RH-Acked-by: Jason Wang <None>
RH-Acked-by: Eugenio Pérez <None>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
vhost-vsock device in RHEL 8 kernels doesn't support seqpacket.
To avoid problems when migrating a VM from RHEL 9 host, we need to
disable it in rhel8-* machine types.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
hw/core/machine.c | 10 ++++++++++
hw/i386/pc_piix.c | 2 ++
hw/i386/pc_q35.c | 2 ++
hw/s390x/s390-virtio-ccw.c | 1 +
include/hw/boards.h | 3 +++
5 files changed, 18 insertions(+)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 024b025fc2..76fcabec7a 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -37,6 +37,16 @@
#include "hw/virtio/virtio.h"
#include "hw/virtio/virtio-pci.h"
+GlobalProperty hw_compat_rhel_8_6[] = {
+ /* hw_compat_rhel_8_6 bz 2068202 */
+ /*
+ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so
+ * we need do disable it downstream on the latest hw_compat_rhel_8.
+ */
+ { "vhost-vsock-device", "seqpacket", "off" },
+};
+const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6);
+
/*
* Mostly the same as hw_compat_6_0 and hw_compat_6_1
*/
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index f03a8f0db8..ab6d03e07a 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -998,6 +998,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
pcmc->kvmclock_create_always = false;
/* From pc_i440fx_5_1_machine_options() */
pcmc->pci_root_uid = 1;
+ compat_props_add(m->compat_props, hw_compat_rhel_8_6,
+ hw_compat_rhel_8_6_len);
compat_props_add(m->compat_props, hw_compat_rhel_8_5,
hw_compat_rhel_8_5_len);
compat_props_add(m->compat_props, pc_rhel_8_5_compat,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 5559261d9e..882fe7a68d 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -658,6 +658,8 @@ static void pc_q35_machine_rhel860_options(MachineClass *m)
m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)";
pcmc->smbios_stream_product = "RHEL-AV";
pcmc->smbios_stream_version = "8.6.0";
+ compat_props_add(m->compat_props, hw_compat_rhel_8_6,
+ hw_compat_rhel_8_6_len);
}
DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860,
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 9795eb9406..bec270598b 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1109,6 +1109,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine)
static void ccw_machine_rhel860_class_options(MachineClass *mc)
{
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len);
}
DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", true);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 04e8759815..4ddb798144 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -443,6 +443,9 @@ extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
+extern GlobalProperty hw_compat_rhel_8_6[];
+extern const size_t hw_compat_rhel_8_6_len;
+
extern GlobalProperty hw_compat_rhel_8_5[];
extern const size_t hw_compat_rhel_8_5_len;
--
2.27.0

View File

@ -0,0 +1,93 @@
From e626dc16d130c724c400b99a93daad0a9abeae59 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 22 Mar 2022 19:23:36 -0400
Subject: [PATCH 01/18] Revert "redhat: Add hw_compat_4_2_extra and apply to
upstream machines"
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines"
RH-Commit: [1/3] 47b7d9e5062f5e215d5ed1a3ecdc1a87ac3fa630 (jmaloy/qemu-kvm)
RH-Bugzilla: 2062613
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
BZ: https://bugzilla.redhat.com/2062613
UPSTREAM: no
BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000
commit dc2e9ec1e014950c7918e23a3e9b0096b34a4a92
Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Date: Wed Mar 9 10:31:53 2022 +0000
Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines"
This reverts commit 66882f9a3230246409f3918424aca26add5c034a.
We no longer need these compat machines it was added for.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit dc2e9ec1e014950c7918e23a3e9b0096b34a4a92)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/i386/pc.c | 12 ------------
hw/i386/pc_piix.c | 6 ------
include/hw/i386/pc.h | 3 ---
3 files changed, 21 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 4c08a1971c..357257349b 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -670,18 +670,6 @@ GlobalProperty pc_rhel_7_0_compat[] = {
};
const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat);
-/*
- * RHEL: These properties only apply to the RHEL exported machine types
- * pc-4.2/2.11 for the purpose to have a limited upstream machines support
- * which can be migrated to RHEL. Let's avoid touching hw_compat_4_2 directly
- * so that we can have some isolation against the upstream code.
- */
-GlobalProperty hw_compat_4_2_extra[] = {
- /* By default enlarge the default virtio-net-pci ROM to 512KB. */
- { "virtio-net-pci", "romsize", "0x80000" },
-};
-const size_t hw_compat_4_2_extra_len = G_N_ELEMENTS(hw_compat_4_2_extra);
-
GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled)
{
GSIState *s;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index c30057c443..7b7076cbc7 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -531,12 +531,6 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m)
* supported by RHEL, even if exported.
*/
m->deprecation_reason = "Not supported by RHEL";
- /*
- * RHEL: Specific compat properties to have limited support for upstream
- * machines exported.
- */
- compat_props_add(m->compat_props, hw_compat_4_2_extra,
- hw_compat_4_2_extra_len);
}
/* RHEL: Export pc-4.2 */
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9e8bfb69f8..4a593acb50 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -325,9 +325,6 @@ extern const size_t pc_rhel_7_1_compat_len;
extern GlobalProperty pc_rhel_7_0_compat[];
extern const size_t pc_rhel_7_0_compat_len;
-extern GlobalProperty hw_compat_4_2_extra[];
-extern const size_t hw_compat_4_2_extra_len;
-
/* Helper for setting model-id for CPU models that changed model-id
* depending on QEMU versions up to QEMU 2.4.
*/
--
2.27.0

View File

@ -0,0 +1,128 @@
From 96edd15df257f1d1496397a6fac24b4316570d7e Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Thu, 14 Apr 2022 16:45:30 -0400
Subject: [PATCH 1/3] Revert redhat: Add some devices for exporting upstream
machine types
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 156: Revert redhat: Add some devices for exporting upstream machine types
RH-Commit: [1/1] f25d0da3a181136917ead82f5a5c59efe3fa445a (jmaloy/qemu-kvm)
RH-Bugzilla: 2065043
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2065043
Upstream: no
Manual revert of commit 70d3924521c9bfd912bcf1a1fc76f49eb377de46, since
the directory structure looks different from rhel-av-8.4.0.z where
this commit is taken from. Besides, x86_64-softmmu.mak looks totally
different and should not be affected by this reversal.
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 -
.../devices/x86_64-softmmu/x86_64-upstream-devices.mak | 4 ----
hw/char/parallel.c | 9 ---------
hw/i386/pc_piix.c | 2 +-
hw/i386/pc_q35.c | 2 +-
hw/timer/hpet.c | 8 --------
6 files changed, 2 insertions(+), 24 deletions(-)
delete mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
index fdbbdf9742..31ce08edab 100644
--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
@@ -1,5 +1,4 @@
include ../rh-virtio.mak
-include x86_64-upstream-devices.mak
CONFIG_AC97=y
CONFIG_ACPI=y
diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak
deleted file mode 100644
index 2cd20f54d2..0000000000
--- a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak
+++ /dev/null
@@ -1,4 +0,0 @@
-# We need "isa-parallel"
-CONFIG_PARALLEL=y
-# We need "hpet"
-CONFIG_HPET=y
diff --git a/hw/char/parallel.c b/hw/char/parallel.c
index e5f108211b..b45e67bfbb 100644
--- a/hw/char/parallel.c
+++ b/hw/char/parallel.c
@@ -29,7 +29,6 @@
#include "chardev/char-parallel.h"
#include "chardev/char-fe.h"
#include "hw/acpi/aml-build.h"
-#include "hw/boards.h"
#include "hw/irq.h"
#include "hw/isa/isa.h"
#include "hw/qdev-properties.h"
@@ -535,14 +534,6 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp)
int base;
uint8_t dummy;
- /* Restricted for Red Hat Enterprise Linux */
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
- if (strstr(mc->name, "rhel")) {
- error_setg(errp, "Device %s is not supported with machine type %s",
- object_get_typename(OBJECT(dev)), mc->name);
- return;
- }
-
if (!qemu_chr_fe_backend_connected(&s->chr)) {
error_setg(errp, "Can't create parallel device, empty char device");
return;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index ab6d03e07a..5f101c8748 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -966,7 +966,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
m->family = "pc_piix_Y";
- m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
+ m->default_machine_opts = "firmware=bios-256k.bin";
pcmc->default_nic_model = "e1000";
pcmc->pci_root_uid = 0;
m->default_display = "std";
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 882fe7a68d..73b0d0d317 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -633,7 +633,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m)
pcmc->pci_root_uid = 0;
m->family = "pc_q35_Z";
m->units_per_default_bus = 1;
- m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
+ m->default_machine_opts = "firmware=bios-256k.bin";
m->default_display = "std";
m->no_floppy = 1;
m->no_parallel = 1;
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 202e032524..9520471be2 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -733,14 +733,6 @@ static void hpet_realize(DeviceState *dev, Error **errp)
int i;
HPETTimer *timer;
- /* Restricted for Red Hat Enterprise Linux */
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
- if (strstr(mc->name, "rhel")) {
- error_setg(errp, "Device %s is not supported with machine type %s",
- object_get_typename(OBJECT(dev)), mc->name);
- return;
- }
-
if (!s->intcap) {
warn_report("Hpet's intcap not initialized");
}
--
2.35.1

View File

@ -0,0 +1,53 @@
From 5bf8f1d69fea1225e927fbb3efe549a2a9d47d92 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 22 Mar 2022 19:23:36 -0400
Subject: [PATCH 02/18] Revert "redhat: Enable FDC device for upstream machines
too"
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines"
RH-Commit: [2/3] 4e3c945e3de9bb9d9a6d24115f0719168c9669fe (jmaloy/qemu-kvm)
RH-Bugzilla: 2062613
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
BZ: https://bugzilla.redhat.com/2062613
UPSTREAM: no
BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000
commit 597cb6ca1da4a3eea77c1e4928f55203a1d5c70c
Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Date: Wed Mar 9 10:32:39 2022 +0000
Revert "redhat: Enable FDC device for upstream machines too"
This reverts commit c4d1aa8bf21fe98da94a9cff30b7c25bed12c17f.
We no longer need these compat machines it was added for.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 597cb6ca1da4a3eea77c1e4928f55203a1d5c70c)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/block/fdc.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 63042ef030..97fa6de423 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -2341,10 +2341,7 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp)
/* Restricted for Red Hat Enterprise Linux: */
MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
- if (!strstr(mc->name, "-rhel7.") &&
- /* Exported two upstream machine types allows FDC too */
- strcmp(mc->name, "pc-i440fx-4.2") &&
- strcmp(mc->name, "pc-i440fx-2.11")) {
+ if (!strstr(mc->name, "-rhel7.")) {
error_setg(errp, "Device %s is not supported with machine type %s",
object_get_typename(OBJECT(dev)), mc->name);
return;
--
2.27.0

View File

@ -0,0 +1,191 @@
From ee3cae3bb349469edcf725a1c5161521e95dcb9f Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 22 Mar 2022 19:23:36 -0400
Subject: [PATCH 03/18] Revert "redhat: Expose upstream machines pc-4.2 and
pc-2.11"
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines"
RH-Commit: [3/3] 35cee68034580f81b3aa916921eecd2fdfa7dd15 (jmaloy/qemu-kvm)
RH-Bugzilla: 2062613
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
BZ: https://bugzilla.redhat.com/2062613
UPSTREAM: no
BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000
commit f3b50d6d4ae0be9e64aafe6a15f5423bab4899e9
Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Date: Wed Mar 9 10:34:58 2022 +0000
Revert "redhat: Expose upstream machines pc-4.2 and pc-2.11"
This reverts commit 618e2424edba499d52cd26cf8363bc2dd85ef149.
We no longer need these compat machines.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit f3b50d6d4ae0be9e64aafe6a15f5423bab4899e9)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/i386/pc_piix.c | 37 -------------------------------------
1 file changed, 37 deletions(-)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 7b7076cbc7..f03a8f0db8 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -315,14 +315,6 @@ static void pc_init1(MachineState *machine,
* hw_compat_*, pc_compat_*, or * pc_*_machine_options().
*/
-/*
- * NOTE! Not all the upstream machine types are disabled for RHEL. For
- * providing a very limited support for upstream machine types, pc machines
- * 2.11 and 4.2 are exposed explicitly. This will make the below "#if" macros
- * a bit messed up, but please read this comment first so that we can have a
- * rough understanding of what we're going to do.
- */
-
#if 0 /* Disabled for Red Hat Enterprise Linux */
static void pc_compat_2_3_fn(MachineState *machine)
{
@@ -399,8 +391,6 @@ static void pc_xen_hvm_init(MachineState *machine)
}
#endif
-#endif /* Disabled for Red Hat Enterprise Linux */
-
#define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \
static void pc_init_##suffix(MachineState *machine) \
{ \
@@ -465,10 +455,8 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m)
compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len);
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL,
pc_i440fx_6_0_machine_options);
-#endif /* Disabled for Red Hat Enterprise Linux */
static void pc_i440fx_5_2_machine_options(MachineClass *m)
{
@@ -479,10 +467,8 @@ static void pc_i440fx_5_2_machine_options(MachineClass *m)
compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len);
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
DEFINE_I440FX_MACHINE(v5_2, "pc-i440fx-5.2", NULL,
pc_i440fx_5_2_machine_options);
-#endif /* Disabled for Red Hat Enterprise Linux */
static void pc_i440fx_5_1_machine_options(MachineClass *m)
{
@@ -497,10 +483,8 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m)
pcmc->pci_root_uid = 1;
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL,
pc_i440fx_5_1_machine_options);
-#endif /* Disabled for Red Hat Enterprise Linux */
static void pc_i440fx_5_0_machine_options(MachineClass *m)
{
@@ -513,10 +497,8 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m)
m->auto_enable_numa_with_memdev = false;
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL,
pc_i440fx_5_0_machine_options);
-#endif /* Disabled for Red Hat Enterprise Linux */
static void pc_i440fx_4_2_machine_options(MachineClass *m)
{
@@ -525,15 +507,8 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m)
m->is_default = false;
compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len);
-
- /*
- * RHEL: Mark all upstream machines as deprecated because they're not
- * supported by RHEL, even if exported.
- */
- m->deprecation_reason = "Not supported by RHEL";
}
-/* RHEL: Export pc-4.2 */
DEFINE_I440FX_MACHINE(v4_2, "pc-i440fx-4.2", NULL,
pc_i440fx_4_2_machine_options);
@@ -546,10 +521,8 @@ static void pc_i440fx_4_1_machine_options(MachineClass *m)
compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len);
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL,
pc_i440fx_4_1_machine_options);
-#endif /* Disabled for Red Hat Enterprise Linux */
static void pc_i440fx_4_0_machine_options(MachineClass *m)
{
@@ -562,10 +535,8 @@ static void pc_i440fx_4_0_machine_options(MachineClass *m)
compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL,
pc_i440fx_4_0_machine_options);
-#endif /* Disabled for Red Hat Enterprise Linux */
static void pc_i440fx_3_1_machine_options(MachineClass *m)
{
@@ -581,10 +552,8 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m)
compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len);
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1", NULL,
pc_i440fx_3_1_machine_options);
-#endif /* Disabled for Red Hat Enterprise Linux */
static void pc_i440fx_3_0_machine_options(MachineClass *m)
{
@@ -593,10 +562,8 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m)
compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len);
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL,
pc_i440fx_3_0_machine_options);
-#endif /* Disabled for Red Hat Enterprise Linux */
static void pc_i440fx_2_12_machine_options(MachineClass *m)
{
@@ -605,10 +572,8 @@ static void pc_i440fx_2_12_machine_options(MachineClass *m)
compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len);
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12", NULL,
pc_i440fx_2_12_machine_options);
-#endif /* Disabled for Red Hat Enterprise Linux */
static void pc_i440fx_2_11_machine_options(MachineClass *m)
{
@@ -617,11 +582,9 @@ static void pc_i440fx_2_11_machine_options(MachineClass *m)
compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len);
}
-/* RHEL: Export pc-2.11 */
DEFINE_I440FX_MACHINE(v2_11, "pc-i440fx-2.11", NULL,
pc_i440fx_2_11_machine_options);
-#if 0 /* Disabled for Red Hat Enterprise Linux */
static void pc_i440fx_2_10_machine_options(MachineClass *m)
{
pc_i440fx_2_11_machine_options(m);
--
2.27.0

View File

@ -0,0 +1,171 @@
From 10fc28b61a6fba1e6dc44fd544cf31c7f313c622 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Fri, 28 Oct 2022 17:48:00 +0100
Subject: [PATCH 05/42] Update linux headers to v6.0-rc4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [5/41] ca55f497d1bf1e72179330f8f613781bf999d898
Based on upstream commit d525f73f9186a5bc641b8caf0b2c9bb94e5aa963
("Update linux headers to v6.0-rc4"), but this is focusing only on the
ZPCI and protected dump changes.
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
linux-headers/linux/kvm.h | 87 +++++++++++++++++++++++++++++++++
linux-headers/linux/vfio_zdev.h | 7 +++
2 files changed, 94 insertions(+)
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 0d05d02ee4..c65930288c 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1150,6 +1150,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_DISABLE_QUIRKS2 213
/* #define KVM_CAP_VM_TSC_CONTROL 214 */
#define KVM_CAP_SYSTEM_EVENT_DATA 215
+#define KVM_CAP_S390_PROTECTED_DUMP 217
+#define KVM_CAP_S390_ZPCI_OP 221
+#define KVM_CAP_S390_CPU_TOPOLOGY 222
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1651,6 +1654,55 @@ struct kvm_s390_pv_unp {
__u64 tweak;
};
+enum pv_cmd_dmp_id {
+ KVM_PV_DUMP_INIT,
+ KVM_PV_DUMP_CONFIG_STOR_STATE,
+ KVM_PV_DUMP_COMPLETE,
+ KVM_PV_DUMP_CPU,
+};
+
+struct kvm_s390_pv_dmp {
+ __u64 subcmd;
+ __u64 buff_addr;
+ __u64 buff_len;
+ __u64 gaddr; /* For dump storage state */
+ __u64 reserved[4];
+};
+
+enum pv_cmd_info_id {
+ KVM_PV_INFO_VM,
+ KVM_PV_INFO_DUMP,
+};
+
+struct kvm_s390_pv_info_dump {
+ __u64 dump_cpu_buffer_len;
+ __u64 dump_config_mem_buffer_per_1m;
+ __u64 dump_config_finalize_len;
+};
+
+struct kvm_s390_pv_info_vm {
+ __u64 inst_calls_list[4];
+ __u64 max_cpus;
+ __u64 max_guests;
+ __u64 max_guest_addr;
+ __u64 feature_indication;
+};
+
+struct kvm_s390_pv_info_header {
+ __u32 id;
+ __u32 len_max;
+ __u32 len_written;
+ __u32 reserved;
+};
+
+struct kvm_s390_pv_info {
+ struct kvm_s390_pv_info_header header;
+ union {
+ struct kvm_s390_pv_info_dump dump;
+ struct kvm_s390_pv_info_vm vm;
+ };
+};
+
enum pv_cmd_id {
KVM_PV_ENABLE,
KVM_PV_DISABLE,
@@ -1659,6 +1711,8 @@ enum pv_cmd_id {
KVM_PV_VERIFY,
KVM_PV_PREP_RESET,
KVM_PV_UNSHARE_ALL,
+ KVM_PV_INFO,
+ KVM_PV_DUMP,
};
struct kvm_pv_cmd {
@@ -2066,4 +2120,37 @@ struct kvm_stats_desc {
/* Available with KVM_CAP_XSAVE2 */
#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
+/* Available with KVM_CAP_S390_PROTECTED_DUMP */
+#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd)
+
+/* Available with KVM_CAP_S390_ZPCI_OP */
+#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op)
+
+struct kvm_s390_zpci_op {
+ /* in */
+ __u32 fh; /* target device */
+ __u8 op; /* operation to perform */
+ __u8 pad[3];
+ union {
+ /* for KVM_S390_ZPCIOP_REG_AEN */
+ struct {
+ __u64 ibv; /* Guest addr of interrupt bit vector */
+ __u64 sb; /* Guest addr of summary bit */
+ __u32 flags;
+ __u32 noi; /* Number of interrupts */
+ __u8 isc; /* Guest interrupt subclass */
+ __u8 sbo; /* Offset of guest summary bit vector */
+ __u16 pad;
+ } reg_aen;
+ __u64 reserved[8];
+ } u;
+};
+
+/* types for kvm_s390_zpci_op->op */
+#define KVM_S390_ZPCIOP_REG_AEN 0
+#define KVM_S390_ZPCIOP_DEREG_AEN 1
+
+/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
+#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
+
#endif /* __LINUX_KVM_H */
diff --git a/linux-headers/linux/vfio_zdev.h b/linux-headers/linux/vfio_zdev.h
index b4309397b6..77f2aff1f2 100644
--- a/linux-headers/linux/vfio_zdev.h
+++ b/linux-headers/linux/vfio_zdev.h
@@ -29,6 +29,9 @@ struct vfio_device_info_cap_zpci_base {
__u16 fmb_length; /* Measurement Block Length (in bytes) */
__u8 pft; /* PCI Function Type */
__u8 gid; /* PCI function group ID */
+ /* End of version 1 */
+ __u32 fh; /* PCI function handle */
+ /* End of version 2 */
};
/**
@@ -47,6 +50,10 @@ struct vfio_device_info_cap_zpci_group {
__u16 noi; /* Maximum number of MSIs */
__u16 maxstbl; /* Maximum Store Block Length */
__u8 version; /* Supported PCI Version */
+ /* End of version 1 */
+ __u8 reserved;
+ __u16 imaxstbl; /* Maximum Interpreted Store Block Length */
+ /* End of version 2 */
};
/**
--
2.37.3

View File

@ -0,0 +1,349 @@
From a5e7bb1f7a88efb5574266a76e80fd7604d19921 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Mon, 16 Jan 2023 07:49:59 -0500
Subject: [PATCH 04/11] accel: introduce accelerator blocker API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 247: accel: introduce accelerator blocker API
RH-Bugzilla: 2161188
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/3] 9d3d7f9554974a79042c915763288cce07aef135
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188
commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Fri Nov 11 10:47:56 2022 -0500
accel: introduce accelerator blocker API
This API allows the accelerators to prevent vcpus from issuing
new ioctls while execting a critical section marked with the
accel_ioctl_inhibit_begin/end functions.
Note that all functions submitting ioctls must mark where the
ioctl is being called with accel_{cpu_}ioctl_begin/end().
This API requires the caller to always hold the BQL.
API documentation is in sysemu/accel-blocker.h
Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt
(to minimize cache line bouncing) to keep avoid that new ioctls
run when the critical section starts, and a QemuEvent to wait
that all running ioctls finish.
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-Id: <20221111154758.1372674-2-eesposit@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts:
util/meson.build: files are missing in rhel 8.8.0
namely int128.c, memalign.c and interval-tree.c
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++
accel/meson.build | 2 +-
hw/core/cpu-common.c | 2 +
include/hw/core/cpu.h | 3 +
include/sysemu/accel-blocker.h | 56 ++++++++++++
util/meson.build | 2 +-
6 files changed, 217 insertions(+), 2 deletions(-)
create mode 100644 accel/accel-blocker.c
create mode 100644 include/sysemu/accel-blocker.h
diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c
new file mode 100644
index 0000000000..1e7f423462
--- /dev/null
+++ b/accel/accel-blocker.c
@@ -0,0 +1,154 @@
+/*
+ * Lock to inhibit accelerator ioctls
+ *
+ * Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+#include "hw/core/cpu.h"
+#include "sysemu/accel-blocker.h"
+
+static QemuLockCnt accel_in_ioctl_lock;
+static QemuEvent accel_in_ioctl_event;
+
+void accel_blocker_init(void)
+{
+ qemu_lockcnt_init(&accel_in_ioctl_lock);
+ qemu_event_init(&accel_in_ioctl_event, false);
+}
+
+void accel_ioctl_begin(void)
+{
+ if (likely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
+ qemu_lockcnt_inc(&accel_in_ioctl_lock);
+}
+
+void accel_ioctl_end(void)
+{
+ if (likely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ qemu_lockcnt_dec(&accel_in_ioctl_lock);
+ /* change event to SET. If event was BUSY, wake up all waiters */
+ qemu_event_set(&accel_in_ioctl_event);
+}
+
+void accel_cpu_ioctl_begin(CPUState *cpu)
+{
+ if (unlikely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
+ qemu_lockcnt_inc(&cpu->in_ioctl_lock);
+}
+
+void accel_cpu_ioctl_end(CPUState *cpu)
+{
+ if (unlikely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ qemu_lockcnt_dec(&cpu->in_ioctl_lock);
+ /* change event to SET. If event was BUSY, wake up all waiters */
+ qemu_event_set(&accel_in_ioctl_event);
+}
+
+static bool accel_has_to_wait(void)
+{
+ CPUState *cpu;
+ bool needs_to_wait = false;
+
+ CPU_FOREACH(cpu) {
+ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) {
+ /* exit the ioctl, if vcpu is running it */
+ qemu_cpu_kick(cpu);
+ needs_to_wait = true;
+ }
+ }
+
+ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock);
+}
+
+void accel_ioctl_inhibit_begin(void)
+{
+ CPUState *cpu;
+
+ /*
+ * We allow to inhibit only when holding the BQL, so we can identify
+ * when an inhibitor wants to issue an ioctl easily.
+ */
+ g_assert(qemu_mutex_iothread_locked());
+
+ /* Block further invocations of the ioctls outside the BQL. */
+ CPU_FOREACH(cpu) {
+ qemu_lockcnt_lock(&cpu->in_ioctl_lock);
+ }
+ qemu_lockcnt_lock(&accel_in_ioctl_lock);
+
+ /* Keep waiting until there are running ioctls */
+ while (true) {
+
+ /* Reset event to FREE. */
+ qemu_event_reset(&accel_in_ioctl_event);
+
+ if (accel_has_to_wait()) {
+ /*
+ * If event is still FREE, and there are ioctls still in progress,
+ * wait.
+ *
+ * If an ioctl finishes before qemu_event_wait(), it will change
+ * the event state to SET. This will prevent qemu_event_wait() from
+ * blocking, but it's not a problem because if other ioctls are
+ * still running the loop will iterate once more and reset the event
+ * status to FREE so that it can wait properly.
+ *
+ * If an ioctls finishes while qemu_event_wait() is blocking, then
+ * it will be waken up, but also here the while loop makes sure
+ * to re-enter the wait if there are other running ioctls.
+ */
+ qemu_event_wait(&accel_in_ioctl_event);
+ } else {
+ /* No ioctl is running */
+ return;
+ }
+ }
+}
+
+void accel_ioctl_inhibit_end(void)
+{
+ CPUState *cpu;
+
+ qemu_lockcnt_unlock(&accel_in_ioctl_lock);
+ CPU_FOREACH(cpu) {
+ qemu_lockcnt_unlock(&cpu->in_ioctl_lock);
+ }
+}
+
diff --git a/accel/meson.build b/accel/meson.build
index dfd808d2c8..801b4d44e8 100644
--- a/accel/meson.build
+++ b/accel/meson.build
@@ -1,4 +1,4 @@
-specific_ss.add(files('accel-common.c'))
+specific_ss.add(files('accel-common.c', 'accel-blocker.c'))
softmmu_ss.add(files('accel-softmmu.c'))
user_ss.add(files('accel-user.c'))
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index 9e3241b430..b6e83acf0a 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -238,6 +238,7 @@ static void cpu_common_initfn(Object *obj)
cpu->nr_threads = 1;
qemu_mutex_init(&cpu->work_mutex);
+ qemu_lockcnt_init(&cpu->in_ioctl_lock);
QSIMPLEQ_INIT(&cpu->work_list);
QTAILQ_INIT(&cpu->breakpoints);
QTAILQ_INIT(&cpu->watchpoints);
@@ -249,6 +250,7 @@ static void cpu_common_finalize(Object *obj)
{
CPUState *cpu = CPU(obj);
+ qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
qemu_mutex_destroy(&cpu->work_mutex);
}
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index e948e81f1a..49d9c73f97 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -383,6 +383,9 @@ struct CPUState {
uint32_t kvm_fetch_index;
uint64_t dirty_pages;
+ /* Use by accel-block: CPU is executing an ioctl() */
+ QemuLockCnt in_ioctl_lock;
+
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
new file mode 100644
index 0000000000..72020529ef
--- /dev/null
+++ b/include/sysemu/accel-blocker.h
@@ -0,0 +1,56 @@
+/*
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
+ * running ones finish.
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
+ * release the BQL.
+ *
+ * Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef ACCEL_BLOCKER_H
+#define ACCEL_BLOCKER_H
+
+#include "qemu/osdep.h"
+#include "sysemu/cpus.h"
+
+extern void accel_blocker_init(void);
+
+/*
+ * accel_{cpu_}ioctl_begin/end:
+ * Mark when ioctl is about to run or just finished.
+ *
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
+ * called, preventing new ioctls to run. They will continue only after
+ * accel_ioctl_inibith_end().
+ */
+extern void accel_ioctl_begin(void);
+extern void accel_ioctl_end(void);
+extern void accel_cpu_ioctl_begin(CPUState *cpu);
+extern void accel_cpu_ioctl_end(CPUState *cpu);
+
+/*
+ * accel_ioctl_inhibit_begin: start critical section
+ *
+ * This function makes sure that:
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
+ * 2) wait that all ioctls that were already running reach
+ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
+ *
+ * This allows the caller to access shared data or perform operations without
+ * worrying of concurrent vcpus accesses.
+ */
+extern void accel_ioctl_inhibit_begin(void);
+
+/*
+ * accel_ioctl_inhibit_end: end critical section started by
+ * accel_ioctl_inhibit_begin()
+ *
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
+ */
+extern void accel_ioctl_inhibit_end(void);
+
+#endif /* ACCEL_BLOCKER_H */
diff --git a/util/meson.build b/util/meson.build
index 05b593055a..b5f153b0e8 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -48,6 +48,7 @@ util_ss.add(files('transactions.c'))
util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c'))
util_ss.add(files('guest-random.c'))
util_ss.add(files('yank.c'))
+util_ss.add(files('lockcnt.c'))
if have_user
util_ss.add(files('selfmap.c'))
@@ -69,7 +70,6 @@ if have_block
util_ss.add(files('hexdump.c'))
util_ss.add(files('iova-tree.c'))
util_ss.add(files('iov.c', 'qemu-sockets.c', 'uri.c'))
- util_ss.add(files('lockcnt.c'))
util_ss.add(files('main-loop.c'))
util_ss.add(files('nvdimm-utils.c'))
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
--
2.37.3

View File

@ -0,0 +1,78 @@
From af082f3499de265d123157d097b5c84981e0aa63 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Wed, 30 Mar 2022 14:52:34 -0400
Subject: [PATCH 15/18] acpi: fix OEM ID/OEM Table ID padding
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table
RH-Commit: [7/10] 51ea859cbe12b5a902d529ab589d18757d98f71d (jmaloy/qemu-kvm)
RH-Bugzilla: 2062611
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611
Upstream: Merged
commit 748c030f360a940fe0c9382c8ca1649096c3a80d
Author: Igor Mammedov <imammedo@redhat.com>
Date: Wed Jan 12 08:03:31 2022 -0500
acpi: fix OEM ID/OEM Table ID padding
Commit [2] broke original '\0' padding of OEM ID and OEM Table ID
fields in headers of ACPI tables. While it doesn't have impact on
default values since QEMU uses 6 and 8 characters long values
respectively, it broke usecase where IDs are provided on QEMU CLI.
It shouldn't affect guest (but may cause licensing verification
issues in guest OS).
One of the broken usecases is user supplied SLIC table with IDs
shorter than max possible length, where [2] mangles IDs with extra
spaces in RSDT and FADT tables whereas guest OS expects those to
mirror the respective values of the used SLIC table.
Fix it by replacing whitespace padding with '\0' padding in
accordance with [1] and expectations of guest OS
1) ACPI spec, v2.0b
17.2 AML Grammar Definition
...
//OEM ID of up to 6 characters. If the OEM ID is
//shorter than 6 characters, it can be terminated
//with a NULL character.
2)
Fixes: 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/707
Reported-by: Dmitry V. Orekhov <dima.orekhov@gmail.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Cc: qemu-stable@nongnu.org
Message-Id: <20220112130332.1648664-4-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Ani Sinha <ani@anisinha.ca>
Tested-by: Dmitry V. Orekhov dima.orekhov@gmail.com
(cherry picked from commit 748c030f360a940fe0c9382c8ca1649096c3a80d)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/acpi/aml-build.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index b3b3310df3..65148d5b9d 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1724,9 +1724,9 @@ void acpi_table_begin(AcpiTable *desc, GArray *array)
build_append_int_noprefix(array, 0, 4); /* Length */
build_append_int_noprefix(array, desc->rev, 1); /* Revision */
build_append_int_noprefix(array, 0, 1); /* Checksum */
- build_append_padded_str(array, desc->oem_id, 6, ' '); /* OEMID */
+ build_append_padded_str(array, desc->oem_id, 6, '\0'); /* OEMID */
/* OEM Table ID */
- build_append_padded_str(array, desc->oem_table_id, 8, ' ');
+ build_append_padded_str(array, desc->oem_table_id, 8, '\0');
build_append_int_noprefix(array, 1, 4); /* OEM Revision */
g_array_append_vals(array, ACPI_BUILD_APPNAME8, 4); /* Creator ID */
build_append_int_noprefix(array, 1, 4); /* Creator Revision */
--
2.27.0

View File

@ -0,0 +1,108 @@
From 4e8fb957a349558648d5cddb80a89460bc97439e Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Wed, 30 Mar 2022 14:52:34 -0400
Subject: [PATCH 09/18] acpi: fix QEMU crash when started with SLIC table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table
RH-Commit: [1/10] 0c34e80346c33da4f220d9c486b120c35005144e (jmaloy/qemu-kvm)
RH-Bugzilla: 2062611
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611
Upstream: Merged
commit 8cdb99af45365727ac17f45239a9b8c1d5155c6d)
Author: Igor Mammedov <imammedo@redhat.com>
Date: Mon Dec 27 14:31:17 2021 -0500
acpi: fix QEMU crash when started with SLIC table
if QEMU is started with used provided SLIC table blob,
-acpitable sig=SLIC,oem_id='CRASH ',oem_table_id="ME",oem_rev=00002210,asl_compiler_id="",asl_compiler_rev=00000000,data=/dev/null
it will assert with:
hw/acpi/aml-build.c:61:build_append_padded_str: assertion failed: (len <= maxlen)
and following backtrace:
...
build_append_padded_str (array=0x555556afe320, str=0x555556afdb2e "CRASH ME", maxlen=0x6, pad=0x20) at hw/acpi/aml-build.c:61
acpi_table_begin (desc=0x7fffffffd1b0, array=0x555556afe320) at hw/acpi/aml-build.c:1727
build_fadt (tbl=0x555556afe320, linker=0x555557ca3830, f=0x7fffffffd318, oem_id=0x555556afdb2e "CRASH ME", oem_table_id=0x555556afdb34 "ME") at hw/acpi/aml-build.c:2064
...
which happens due to acpi_table_begin() expecting NULL terminated
oem_id and oem_table_id strings, which is normally the case, but
in case of user provided SLIC table, oem_id points to table's blob
directly and as result oem_id became longer than expected.
Fix issue by handling oem_id consistently and make acpi_get_slic_oem()
return NULL terminated strings.
PS:
After [1] refactoring, oem_id semantics became inconsistent, where
NULL terminated string was coming from machine and old way pointer
into byte array coming from -acpitable option. That used to work
since build_header() wasn't expecting NULL terminated string and
blindly copied the 1st 6 bytes only.
However commit [2] broke that by replacing build_header() with
acpi_table_begin(), which was expecting NULL terminated string
and was checking oem_id size.
1) 602b45820 ("acpi: Permit OEM ID and OEM table ID fields to be changed")
2)
Fixes: 4b56e1e4eb08 ("acpi: build_fadt: use acpi_table_begin()/acpi_table_end() instead of build_header()")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/786
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20211227193120.1084176-2-imammedo@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Denis Lisov <dennis.lissov@gmail.com>
Tested-by: Alexander Tsoy <alexander@tsoy.me>
Cc: qemu-stable@nongnu.org
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 8cdb99af45365727ac17f45239a9b8c1d5155c6d)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/acpi/core.c | 4 ++--
hw/i386/acpi-build.c | 2 ++
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/hw/acpi/core.c b/hw/acpi/core.c
index 1e004d0078..3e811bf03c 100644
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -345,8 +345,8 @@ int acpi_get_slic_oem(AcpiSlicOem *oem)
struct acpi_table_header *hdr = (void *)(u - sizeof(hdr->_length));
if (memcmp(hdr->sig, "SLIC", 4) == 0) {
- oem->id = hdr->oem_id;
- oem->table_id = hdr->oem_table_id;
+ oem->id = g_strndup(hdr->oem_id, 6);
+ oem->table_id = g_strndup(hdr->oem_table_id, 8);
return 0;
}
}
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index a4478e77b7..acc4869db0 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2726,6 +2726,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
/* Cleanup memory that's no longer used. */
g_array_free(table_offsets, true);
+ g_free(slic_oem.id);
+ g_free(slic_oem.table_id);
}
static void acpi_ram_update(MemoryRegion *mr, GArray *data)
--
2.27.0

View File

@ -0,0 +1,165 @@
From 3deffc03c2e9b0053eec5aeb5b5d633dfe29f499 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Wed, 6 Apr 2022 14:58:12 -0400
Subject: [PATCH 1/3] acpi: fix acpi_index migration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 343: acpi: fix acpi_index migration
RH-Jira: RHEL-20189
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: Prasad Pandit <None>
RH-Commit: [1/2] c5b9cdf5791cd856207b7df7e2ef5df360ec8de4
vmstate_acpi_pcihp_use_acpi_index() was expecting AcpiPciHpState
as state but it actually received PIIX4PMState, because
VMSTATE_PCI_HOTPLUG is a macro and not another struct.
So it ended up accessing random pointer, which resulted
in 'false' return value and acpi_index field wasn't ever
sent.
However in 7.0 that pointer de-references to value > 0, and
destination QEMU starts to expect the field which isn't
sent in migratioon stream from older QEMU (6.2 and older).
As result migration fails with:
qemu-system-x86_64: Missing section footer for 0000:00:01.3/piix4_pm
qemu-system-x86_64: load of migration failed: Invalid argument
In addition with QEMU-6.2, destination due to not expected
state, also never expects the acpi_index field in migration
stream.
Q35 is not affected as it always sends/expects the field as
long as acpi based PCI hotplug is enabled.
Fix issue by introducing compat knob to never send/expect
acpi_index in migration stream for 6.2 and older PC machine
types and always send it for 7.0 and newer PC machine types.
Diagnosed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Fixes: b32bd76 ("pci: introduce acpi-index property for PCI device")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/932
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit a83c2844903c45aa7d32cdd17305f23ce2c56ab9)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
hw/acpi/acpi-pci-hotplug-stub.c | 4 ----
hw/acpi/pcihp.c | 6 ------
hw/acpi/piix4.c | 15 ++++++++++++++-
hw/core/machine.c | 5 +++++
include/hw/acpi/pcihp.h | 2 --
5 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c
index 734e4c5986..a43f6dafc9 100644
--- a/hw/acpi/acpi-pci-hotplug-stub.c
+++ b/hw/acpi/acpi-pci-hotplug-stub.c
@@ -41,7 +41,3 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off)
return;
}
-bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id)
-{
- return false;
-}
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index be0e846b34..ec861661c3 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -559,12 +559,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
OBJ_PROP_FLAG_READ);
}
-bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id)
-{
- AcpiPciHpState *s = opaque;
- return s->acpi_index;
-}
-
const VMStateDescription vmstate_acpi_pcihp_pci_status = {
.name = "acpi_pcihp_pci_status",
.version_id = 1,
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 8d6011c0a3..033e75ce5b 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -82,6 +82,7 @@ struct PIIX4PMState {
AcpiPciHpState acpi_pci_hotplug;
bool use_acpi_hotplug_bridge;
bool use_acpi_root_pci_hotplug;
+ bool not_migrate_acpi_index;
uint8_t disable_s3;
uint8_t disable_s4;
@@ -269,6 +270,16 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id)
return pm_smbus_vmstate_needed();
}
+/*
+ * This is a fudge to turn off the acpi_index field,
+ * whose test was always broken on piix4 with 6.2 and older machine types.
+ */
+static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
+{
+ PIIX4PMState *s = PIIX4_PM(opaque);
+ return s->use_acpi_hotplug_bridge && !s->not_migrate_acpi_index;
+}
+
/* qemu-kvm 1.2 uses version 3 but advertised as 2
* To support incoming qemu-kvm 1.2 migration, change version_id
* and minimum_version_id to 2 below (which breaks migration from
@@ -299,7 +310,7 @@ static const VMStateDescription vmstate_acpi = {
struct AcpiPciHpPciStatus),
VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, PIIX4PMState,
vmstate_test_use_acpi_hotplug_bridge,
- vmstate_acpi_pcihp_use_acpi_index),
+ vmstate_test_migrate_acpi_index),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {
@@ -654,6 +665,8 @@ static Property piix4_pm_properties[] = {
DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState,
acpi_memory_hotplug.is_enabled, true),
DEFINE_PROP_BOOL("smm-compat", PIIX4PMState, smm_compat, false),
+ DEFINE_PROP_BOOL("x-not-migrate-acpi-index", PIIX4PMState,
+ not_migrate_acpi_index, false),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 76fcabec7a..2724f6848a 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -331,6 +331,11 @@ GlobalProperty hw_compat_rhel_7_1[] = {
};
const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1);
+GlobalProperty hw_compat_6_2[] = {
+ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"},
+};
+const size_t hw_compat_6_2_len = G_N_ELEMENTS(hw_compat_6_2);
+
GlobalProperty hw_compat_6_1[] = {
{ "vhost-user-vsock-device", "seqpacket", "off" },
{ "nvme-ns", "shared", "off" },
diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h
index af1a169fc3..7e268c2c9c 100644
--- a/include/hw/acpi/pcihp.h
+++ b/include/hw/acpi/pcihp.h
@@ -73,8 +73,6 @@ void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off);
extern const VMStateDescription vmstate_acpi_pcihp_pci_status;
-bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id);
-
#define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp, test_acpi_index) \
VMSTATE_UINT32_TEST(pcihp.hotplug_select, state, \
test_pcihp), \
--
2.41.0

View File

@ -0,0 +1,140 @@
From c9ceb175667cdeead59384a97a812367ae19c570 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Wed, 23 Mar 2022 13:21:40 -0400
Subject: [PATCH 06/18] acpi: pcihp: pcie: set power on cap on parent slot
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 134: pci: expose TYPE_XIO3130_DOWNSTREAM name
RH-Commit: [2/2] d883872647a6e90ec573140b2c171f3f53b600ab (jmaloy/qemu-kvm)
RH-Bugzilla: 2062610
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
BZ: https://bugzilla.redhat.com/2062610
UPSTREAM: merged
BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138
commit 6b0969f1ec825984cd74619f0730be421b0c46fb
Author: Igor Mammedov <imammedo@redhat.com>
Date: Tue Mar 1 10:11:59 2022 -0500
acpi: pcihp: pcie: set power on cap on parent slot
on creation a PCIDevice has power turned on at the end of pci_qdev_realize()
however later on if PCIe slot isn't populated with any children
it's power is turned off. It's fine if native hotplug is used
as plug callback will power slot on among other things.
However when ACPI hotplug is enabled it replaces native PCIe plug
callbacks with ACPI specific ones (acpi_pcihp_device_*plug_cb) and
as result slot stays powered off. It works fine as ACPI hotplug
on guest side takes care of enumerating/initializing hotplugged
device. But when later guest is migrated, call chain introduced by]
commit d5daff7d312 (pcie: implement slot power control for pcie root ports)
pcie_cap_slot_post_load()
-> pcie_cap_update_power()
-> pcie_set_power_device()
-> pci_set_power()
-> pci_update_mappings()
will disable earlier initialized BARs for the hotplugged device
in powered off slot due to commit 23786d13441 (pci: implement power state)
which disables BARs if power is off.
Fix it by setting PCI_EXP_SLTCTL_PCC to PCI_EXP_SLTCTL_PWR_ON
on slot (root port/downstream port) at the time a device
hotplugged into it. As result PCI_EXP_SLTCTL_PWR_ON is migrated
to target and above call chain keeps device plugged into it
powered on.
Fixes: d5daff7d312 ("pcie: implement slot power control for pcie root ports")
Fixes: 23786d13441 ("pci: implement power state")
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2053584
Suggested-by: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20220301151200.3507298-3-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 6b0969f1ec825984cd74619f0730be421b0c46fb)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/acpi/pcihp.c | 12 +++++++++++-
hw/pci/pcie.c | 11 +++++++++++
include/hw/pci/pcie.h | 1 +
3 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index a5e182dd3a..be0e846b34 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -32,6 +32,7 @@
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_host.h"
#include "hw/pci/pcie_port.h"
+#include "hw/pci-bridge/xio3130_downstream.h"
#include "hw/i386/acpi-build.h"
#include "hw/acpi/acpi.h"
#include "hw/pci/pci_bus.h"
@@ -341,6 +342,8 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
{
PCIDevice *pdev = PCI_DEVICE(dev);
int slot = PCI_SLOT(pdev->devfn);
+ PCIDevice *bridge;
+ PCIBus *bus;
int bsel;
/* Don't send event when device is enabled during qemu machine creation:
@@ -370,7 +373,14 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
return;
}
- bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev));
+ bus = pci_get_bus(pdev);
+ bridge = pci_bridge_get_device(bus);
+ if (object_dynamic_cast(OBJECT(bridge), TYPE_PCIE_ROOT_PORT) ||
+ object_dynamic_cast(OBJECT(bridge), TYPE_XIO3130_DOWNSTREAM)) {
+ pcie_cap_slot_enable_power(bridge);
+ }
+
+ bsel = acpi_pcihp_get_bsel(bus);
g_assert(bsel >= 0);
s->acpi_pcihp_pci_status[bsel].up |= (1U << slot);
acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index d7d73a31e4..996f0e24fe 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -366,6 +366,17 @@ static void hotplug_event_clear(PCIDevice *dev)
}
}
+void pcie_cap_slot_enable_power(PCIDevice *dev)
+{
+ uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+ uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP);
+
+ if (sltcap & PCI_EXP_SLTCAP_PCP) {
+ pci_set_word_by_mask(exp_cap + PCI_EXP_SLTCTL,
+ PCI_EXP_SLTCTL_PCC, PCI_EXP_SLTCTL_PWR_ON);
+ }
+}
+
static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque)
{
bool *power = opaque;
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 6063bee0ec..c27368d077 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -112,6 +112,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
uint32_t addr, uint32_t val, int len);
int pcie_cap_slot_post_load(void *opaque, int version_id);
void pcie_cap_slot_push_attention_button(PCIDevice *dev);
+void pcie_cap_slot_enable_power(PCIDevice *dev);
void pcie_cap_root_init(PCIDevice *dev);
void pcie_cap_root_reset(PCIDevice *dev);
--
2.27.0

View File

@ -0,0 +1,50 @@
From 953c5c0982b61b0a3f8f03452844b5487eb22fc7 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:13:17 -0500
Subject: [PATCH 06/13] aio-wait: switch to smp_mb__after_rmw()
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2168472
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [6/10] 9f30f97754139ffd18d36b2350f9ed4e59ac496e
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
commit b532526a07ef3b903ead2e055fe6cc87b41057a3
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri Mar 3 11:03:52 2023 +0100
aio-wait: switch to smp_mb__after_rmw()
The barrier comes after an atomic increment, so it is enough to use
smp_mb__after_rmw(); this avoids a double barrier on x86 systems.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
include/block/aio-wait.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index 54840f8622..03b6394c78 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -82,7 +82,7 @@ extern AioWait global_aio_wait;
/* Increment wait_->num_waiters before evaluating cond. */ \
qatomic_inc(&wait_->num_waiters); \
/* Paired with smp_mb in aio_wait_kick(). */ \
- smp_mb(); \
+ smp_mb__after_rmw(); \
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
while ((cond)) { \
aio_poll(ctx_, true); \
--
2.37.3

View File

@ -0,0 +1,86 @@
From d7eae0ff4c7f7f7bf10f10272adf7c6971c0db9b Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 09:26:35 -0500
Subject: [PATCH 01/13] aio_wait_kick: add missing memory barrier
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2168472
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [1/10] eb774aee79864052e14e706d931e52e7bd1162c8
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
commit 7455ff1aa01564cc175db5b2373e610503ad4411
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Tue May 24 13:30:54 2022 -0400
aio_wait_kick: add missing memory barrier
It seems that aio_wait_kick always required a memory barrier
or atomic operation in the caller, but nobody actually
took care of doing it.
Let's put the barrier in the function instead, and pair it
with another one in AIO_WAIT_WHILE. Read aio_wait_kick()
comment for further explanation.
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Message-Id: <20220524173054.12651-1-eesposit@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
include/block/aio-wait.h | 2 ++
util/aio-wait.c | 16 +++++++++++++++-
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index b39eefb38d..54840f8622 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -81,6 +81,8 @@ extern AioWait global_aio_wait;
AioContext *ctx_ = (ctx); \
/* Increment wait_->num_waiters before evaluating cond. */ \
qatomic_inc(&wait_->num_waiters); \
+ /* Paired with smp_mb in aio_wait_kick(). */ \
+ smp_mb(); \
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
while ((cond)) { \
aio_poll(ctx_, true); \
diff --git a/util/aio-wait.c b/util/aio-wait.c
index bdb3d3af22..98c5accd29 100644
--- a/util/aio-wait.c
+++ b/util/aio-wait.c
@@ -35,7 +35,21 @@ static void dummy_bh_cb(void *opaque)
void aio_wait_kick(void)
{
- /* The barrier (or an atomic op) is in the caller. */
+ /*
+ * Paired with smp_mb in AIO_WAIT_WHILE. Here we have:
+ * write(condition);
+ * aio_wait_kick() {
+ * smp_mb();
+ * read(num_waiters);
+ * }
+ *
+ * And in AIO_WAIT_WHILE:
+ * write(num_waiters);
+ * smp_mb();
+ * read(condition);
+ */
+ smp_mb();
+
if (qatomic_read(&global_aio_wait.num_waiters)) {
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
}
--
2.37.3

View File

@ -0,0 +1,56 @@
From 47d027147694fde94dd73305ee53b6a136cbeced Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 08/15] apic: disable reentrancy detection for apic-msi
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
RH-Bugzilla: 1999236
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [8/12] 25c3cf99b00cd9adc10d6e7afa9c3e3b7da08de2 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
Upstream: Merged
CVE: CVE-2021-3750
commit 50795ee051a342c681a9b45671c552fbd6274db8
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:13 2023 -0400
apic: disable reentrancy detection for apic-msi
As the code is designed for re-entrant calls to apic-msi, mark apic-msi
as reentrancy-safe.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Message-Id: <20230427211013.2994127-9-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/intc/apic.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 3df11c34d6..a7c2b301a8 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -883,6 +883,13 @@ static void apic_realize(DeviceState *dev, Error **errp)
memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi",
APIC_SPACE_SIZE);
+ /*
+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can
+ * write back to apic-msi. As such mark the apic-msi region re-entrancy
+ * safe.
+ */
+ s->io_memory.disable_reentrancy_guard = true;
+
s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s);
local_apics[s->id] = s;
--
2.37.3

View File

@ -0,0 +1,235 @@
From 8996ac4369de7e0cb6f911db6f47c3e4ae88c8aa Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 02/15] async: Add an optional reentrancy guard to the BH API
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
RH-Bugzilla: 1999236
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/12] b03f247e242a6cdb3eebec36477234ac77dcd20c (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
Upstream: Merged
CVE: CVE-2021-3750
Conflict: The file block/graph-lock.h, inluded from include/block/aio.h,
doesn't exist in this code version. The code compiles without
issues if this include is just omitted, so we do that.
commit 9c86c97f12c060bf7484dd931f38634e166a81f0
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:07 2023 -0400
async: Add an optional reentrancy guard to the BH API
Devices can pass their MemoryReentrancyGuard (from their DeviceState),
when creating new BHes. Then, the async API will toggle the guard
before/after calling the BH call-back. This prevents bh->mmio reentrancy
issues.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Message-Id: <20230427211013.2994127-3-alxndr@bu.edu>
[thuth: Fix "line over 90 characters" checkpatch.pl error]
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
docs/devel/multiple-iothreads.txt | 7 +++++++
include/block/aio.h | 18 ++++++++++++++++--
include/qemu/main-loop.h | 7 +++++--
tests/unit/ptimer-test-stubs.c | 3 ++-
util/async.c | 18 +++++++++++++++++-
util/main-loop.c | 6 ++++--
util/trace-events | 1 +
7 files changed, 52 insertions(+), 8 deletions(-)
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
index aeb997bed5..a11576bc74 100644
--- a/docs/devel/multiple-iothreads.txt
+++ b/docs/devel/multiple-iothreads.txt
@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext:
* LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
* LEGACY timer_new_ms() - create a timer
* LEGACY qemu_bh_new() - create a BH
+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard
* LEGACY qemu_aio_wait() - run an event loop iteration
Since they implicitly work on the main loop they cannot be used in code that
@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h):
* aio_set_event_notifier() - monitor an event notifier
* aio_timer_new() - create a timer
* aio_bh_new() - create a BH
+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard
* aio_poll() - run an event loop iteration
+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard"
+argument, which is used to check for and prevent re-entrancy problems. For
+BHs associated with devices, the reentrancy-guard is contained in the
+corresponding DeviceState and named "mem_reentrancy_guard".
+
The AioContext can be obtained from the IOThread using
iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
Code that takes an AioContext argument works both in IOThreads or the main
diff --git a/include/block/aio.h b/include/block/aio.h
index 47fbe9d81f..c7da152985 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -22,6 +22,8 @@
#include "qemu/event_notifier.h"
#include "qemu/thread.h"
#include "qemu/timer.h"
+#include "hw/qdev-core.h"
+
typedef struct BlockAIOCB BlockAIOCB;
typedef void BlockCompletionFunc(void *opaque, int ret);
@@ -321,9 +323,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
* is opaque and must be allocated prior to its use.
*
* @name: A human-readable identifier for debugging purposes.
+ * @reentrancy_guard: A guard set when entering a cb to prevent
+ * device-reentrancy issues
*/
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
- const char *name);
+ const char *name, MemReentrancyGuard *reentrancy_guard);
/**
* aio_bh_new: Allocate a new bottom half structure
@@ -332,7 +336,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
* string.
*/
#define aio_bh_new(ctx, cb, opaque) \
- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL)
+
+/**
+ * aio_bh_new_guarded: Allocate a new bottom half structure with a
+ * reentrancy_guard
+ *
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
+ * string.
+ */
+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)
/**
* aio_notify: Force processing of pending events.
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 8dbc6fcb89..85dd5ada9e 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -294,9 +294,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
void qemu_fd_register(int fd);
+#define qemu_bh_new_guarded(cb, opaque, guard) \
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard)
#define qemu_bh_new(cb, opaque) \
- qemu_bh_new_full((cb), (opaque), (stringify(cb)))
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+ MemReentrancyGuard *reentrancy_guard);
void qemu_bh_schedule_idle(QEMUBH *bh);
enum {
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
index 2a3ef58799..a7a2d08e7e 100644
--- a/tests/unit/ptimer-test-stubs.c
+++ b/tests/unit/ptimer-test-stubs.c
@@ -108,7 +108,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
return deadline;
}
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+ MemReentrancyGuard *reentrancy_guard)
{
QEMUBH *bh = g_new(QEMUBH, 1);
diff --git a/util/async.c b/util/async.c
index 2a63bf90f2..1fff02e7fc 100644
--- a/util/async.c
+++ b/util/async.c
@@ -62,6 +62,7 @@ struct QEMUBH {
void *opaque;
QSLIST_ENTRY(QEMUBH) next;
unsigned flags;
+ MemReentrancyGuard *reentrancy_guard;
};
/* Called concurrently from any thread */
@@ -127,7 +128,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
}
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
- const char *name)
+ const char *name, MemReentrancyGuard *reentrancy_guard)
{
QEMUBH *bh;
bh = g_new(QEMUBH, 1);
@@ -136,13 +137,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
.cb = cb,
.opaque = opaque,
.name = name,
+ .reentrancy_guard = reentrancy_guard,
};
return bh;
}
void aio_bh_call(QEMUBH *bh)
{
+ bool last_engaged_in_io = false;
+
+ if (bh->reentrancy_guard) {
+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
+ if (bh->reentrancy_guard->engaged_in_io) {
+ trace_reentrant_aio(bh->ctx, bh->name);
+ }
+ bh->reentrancy_guard->engaged_in_io = true;
+ }
+
bh->cb(bh->opaque);
+
+ if (bh->reentrancy_guard) {
+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
+ }
}
/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
diff --git a/util/main-loop.c b/util/main-loop.c
index 06b18b195c..1eacf04691 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -544,9 +544,11 @@ void main_loop_wait(int nonblocking)
/* Functions to operate on the main QEMU AioContext. */
-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
+ MemReentrancyGuard *reentrancy_guard)
{
- return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
+ reentrancy_guard);
}
/*
diff --git a/util/trace-events b/util/trace-events
index c8f53d7d9f..dc3b1eb3bf 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
# async.c
aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
+reentrant_aio(void *ctx, const char *name) "ctx %p name %s"
# thread-pool.c
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
--
2.37.3

View File

@ -0,0 +1,71 @@
From d754050d260e2ad890cecd975df6e163c531b40e Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 09/15] async: avoid use-after-free on re-entrancy guard
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
RH-Bugzilla: 1999236
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [9/12] d357650e581c3921bbfe3e2fde5e3f55853b5fab (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
Upstream: Merged
CVE: CVE-2021-3750
commit 7915bd06f25e1803778081161bf6fa10c42dc7cd
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Mon May 1 10:19:56 2023 -0400
async: avoid use-after-free on re-entrancy guard
A BH callback can free the BH, causing a use-after-free in aio_bh_call.
Fix that by keeping a local copy of the re-entrancy guard pointer.
Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513
Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API")
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Message-Id: <20230501141956.3444868-1-alxndr@bu.edu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
util/async.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/util/async.c b/util/async.c
index 1fff02e7fc..ffe0541c3b 100644
--- a/util/async.c
+++ b/util/async.c
@@ -146,18 +146,20 @@ void aio_bh_call(QEMUBH *bh)
{
bool last_engaged_in_io = false;
- if (bh->reentrancy_guard) {
- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
- if (bh->reentrancy_guard->engaged_in_io) {
+ /* Make a copy of the guard-pointer as cb may free the bh */
+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
+ if (reentrancy_guard) {
+ last_engaged_in_io = reentrancy_guard->engaged_in_io;
+ if (reentrancy_guard->engaged_in_io) {
trace_reentrant_aio(bh->ctx, bh->name);
}
- bh->reentrancy_guard->engaged_in_io = true;
+ reentrancy_guard->engaged_in_io = true;
}
bh->cb(bh->opaque);
- if (bh->reentrancy_guard) {
- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
+ if (reentrancy_guard) {
+ reentrancy_guard->engaged_in_io = last_engaged_in_io;
}
}
--
2.37.3

View File

@ -0,0 +1,66 @@
From 187eb7a418af93375e42298d06e231e2bec3cf00 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:15:42 -0500
Subject: [PATCH 10/13] async: clarify usage of barriers in the polling case
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2168472
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [10/10] 3be07ccc6137a0336becfe63a818d9cbadb38e9c
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
commit 6229438cca037d42f44a96d38feb15cb102a444f
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon Mar 6 10:43:52 2023 +0100
async: clarify usage of barriers in the polling case
Explain that aio_context_notifier_poll() relies on
aio_notify_accept() to catch all the memory writes that were
done before ctx->notified was set to true.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
util/async.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/util/async.c b/util/async.c
index 795fe699b6..2a63bf90f2 100644
--- a/util/async.c
+++ b/util/async.c
@@ -463,8 +463,9 @@ void aio_notify_accept(AioContext *ctx)
qatomic_set(&ctx->notified, false);
/*
- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb
- * in aio_notify.
+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
+ * with smp_wmb() in aio_notify.
*/
smp_mb();
}
@@ -487,6 +488,11 @@ static bool aio_context_notifier_poll(void *opaque)
EventNotifier *e = opaque;
AioContext *ctx = container_of(e, AioContext, notifier);
+ /*
+ * No need for load-acquire because we just want to kick the
+ * event loop. aio_notify_accept() takes care of synchronizing
+ * the event loop with the producers.
+ */
return qatomic_read(&ctx->notified);
}
--
2.37.3

View File

@ -0,0 +1,111 @@
From ea3856bb545d19499602830cdc3076d83a981e7a Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:15:36 -0500
Subject: [PATCH 09/13] async: update documentation of the memory barriers
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2168472
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [9/10] d471da2acf7a107cf75f3327c5e8d7456307160e
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
commit 8dd48650b43dfde4ebea34191ac267e474bcc29e
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon Mar 6 10:15:06 2023 +0100
async: update documentation of the memory barriers
Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)",
2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll()
is happening when the bottom half is enqueued in the bh_list; not
when the flags are set. Update the documentation to match.
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
util/async.c | 33 +++++++++++++++++++--------------
1 file changed, 19 insertions(+), 14 deletions(-)
diff --git a/util/async.c b/util/async.c
index 6f6717a34b..795fe699b6 100644
--- a/util/async.c
+++ b/util/async.c
@@ -71,14 +71,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
unsigned old_flags;
/*
- * The memory barrier implicit in qatomic_fetch_or makes sure that:
- * 1. idle & any writes needed by the callback are done before the
- * locations are read in the aio_bh_poll.
- * 2. ctx is loaded before the callback has a chance to execute and bh
- * could be freed.
+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
+ * insertion starts after BH_PENDING is set.
*/
old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
+
if (!(old_flags & BH_PENDING)) {
+ /*
+ * At this point the bottom half becomes visible to aio_bh_poll().
+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
+ * aio_bh_poll(), ensuring that:
+ * 1. any writes needed by the callback are visible from the callback
+ * after aio_bh_dequeue() returns bh.
+ * 2. ctx is loaded before the callback has a chance to execute and bh
+ * could be freed.
+ */
QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
}
@@ -97,11 +104,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
QSLIST_REMOVE_HEAD(head, next);
/*
- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory
- * barrier ensures that the callback sees all writes done by the scheduling
- * thread. It also ensures that the scheduling thread sees the cleared
- * flag before bh->cb has run, and thus will call aio_notify again if
- * necessary.
+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
+ * the removal finishes before BH_PENDING is reset.
*/
*flags = qatomic_fetch_and(&bh->flags,
~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
@@ -148,6 +152,7 @@ int aio_bh_poll(AioContext *ctx)
BHListSlice *s;
int ret = 0;
+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
@@ -437,15 +442,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
void aio_notify(AioContext *ctx)
{
/*
- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in
- * aio_notify_accept.
+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
+ * smp_mb() in aio_notify_accept().
*/
smp_wmb();
qatomic_set(&ctx->notified, true);
/*
- * Write ctx->notified before reading ctx->notify_me. Pairs
- * with smp_mb in aio_ctx_prepare or aio_poll.
+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
*/
smp_mb();
if (qatomic_read(&ctx->notify_me)) {
--
2.37.3

View File

@ -0,0 +1,71 @@
From 60da56e3685969493ae483c3cc2c66af13d00baf Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Wed, 10 Aug 2022 14:57:18 +0200
Subject: [PATCH 1/3] backends/hostmem: Fix support of memory-backend-memfd in
qemu_maxrampagesize()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <None>
RH-MergeRequest: 221: backends/hostmem: Fix support of memory-backend-memfd in qemu_maxrampagesize()
RH-Bugzilla: 2117149
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Commit: [1/1] b5a1047750af32c0a261b8385ea0e819eb16681a
It is currently not possible yet to use "memory-backend-memfd" on s390x
with hugepages enabled. This problem is caused by qemu_maxrampagesize()
not taking memory-backend-memfd objects into account yet, so the code
in s390_memory_init() fails to enable the huge page support there via
s390_set_max_pagesize(). Fix it by generalizing the code, so that it
looks at qemu_ram_pagesize(memdev->mr.ram_block) instead of re-trying
to get the information from the filesystem.
Suggested-by: David Hildenbrand <david@redhat.com>
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2116496
Message-Id: <20220810125720.3849835-2-thuth@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
(cherry picked from commit 8be934b70e923104da883b990dee18f02552d40e)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2117149
[clg: Resolved conflict on qemu_real_host_page_size() ]
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
backends/hostmem.c | 14 ++------------
1 file changed, 2 insertions(+), 12 deletions(-)
diff --git a/backends/hostmem.c b/backends/hostmem.c
index 4c05862ed5..0c4654ea85 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -305,22 +305,12 @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
return backend->is_mapped;
}
-#ifdef __linux__
size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
{
- Object *obj = OBJECT(memdev);
- char *path = object_property_get_str(obj, "mem-path", NULL);
- size_t pagesize = qemu_mempath_getpagesize(path);
-
- g_free(path);
+ size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block);
+ g_assert(pagesize >= qemu_real_host_page_size);
return pagesize;
}
-#else
-size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
-{
- return qemu_real_host_page_size;
-}
-#endif
static void
host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
--
2.35.3

View File

@ -0,0 +1,58 @@
From 7715635d018351e0a5c4c25aec2c71a2fe3b9e69 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 06/15] bcm2835_property: disable reentrancy detection for
iomem
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
RH-Bugzilla: 1999236
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [6/12] 4d6187430ca1c4309a36824c0c6815d2a763db1a (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
Upstream: Merged
CVE: CVE-2021-3750
commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:11 2023 -0400
bcm2835_property: disable reentrancy detection for iomem
As the code is designed for re-entrant calls from bcm2835_property to
bcm2835_mbox and back into bcm2835_property, mark iomem as
reentrancy-safe.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20230427211013.2994127-7-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/misc/bcm2835_property.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c
index 73941bdae9..022b5a849c 100644
--- a/hw/misc/bcm2835_property.c
+++ b/hw/misc/bcm2835_property.c
@@ -377,6 +377,13 @@ static void bcm2835_property_init(Object *obj)
memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s,
TYPE_BCM2835_PROPERTY, 0x10);
+
+ /*
+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from
+ * iomem. As such, mark iomem as re-entracy safe.
+ */
+ s->iomem.disable_reentrancy_guard = true;
+
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq);
}
--
2.37.3

View File

@ -0,0 +1,359 @@
From 1f7520baa6f0bf02ccba2ebfe7d1d5bf6520f95a Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Tue, 11 Apr 2023 19:34:16 +0200
Subject: [PATCH 2/5] block: Collapse padded I/O vecs exceeding IOV_MAX
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 291: block: Split padded I/O vectors exceeding IOV_MAX
RH-Bugzilla: 2141964
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [2/5] 1d86ce8398e4ab66e308a686f9855c963e52b0a9
When processing vectored guest requests that are not aligned to the
storage request alignment, we pad them by adding head and/or tail
buffers for a read-modify-write cycle.
The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
with this padding, the vector can exceed that limit. As of
4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
limit, instead returning an error to the guest.
To the guest, this appears as a random I/O error. We should not return
an I/O error to the guest when it issued a perfectly valid request.
Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
longer than IOV_MAX, which generally seems to work (because the guest
assumes a smaller alignment than we really have, file-posix's
raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
so emulate the request, so that the IOV_MAX does not matter). However,
that does not seem exactly great.
I see two ways to fix this problem:
1. We split such long requests into two requests.
2. We join some elements of the vector into new buffers to make it
shorter.
I am wary of (1), because it seems like it may have unintended side
effects.
(2) on the other hand seems relatively simple to implement, with
hopefully few side effects, so this patch does that.
To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
is effectively replaced by the new function bdrv_create_padded_qiov(),
which not only wraps the request IOV with padding head/tail, but also
ensures that the resulting vector will not have more than IOV_MAX
elements. Putting that functionality into qemu_iovec_init_extended() is
infeasible because it requires allocating a bounce buffer; doing so
would require many more parameters (buffer alignment, how to initialize
the buffer, and out parameters like the buffer, its length, and the
original elements), which is not reasonable.
Conversely, it is not difficult to move qemu_iovec_init_extended()'s
functionality into bdrv_create_padded_qiov() by using public
qemu_iovec_* functions, so that is what this patch does.
Because bdrv_pad_request() was the only "serious" user of
qemu_iovec_init_extended(), the next patch will remove the latter
function, so the functionality is not implemented twice.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a)
Conflicts:
block/io.c: Downstream bdrv_pad_request() has no @flags
parameter.
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 151 insertions(+), 15 deletions(-)
diff --git a/block/io.c b/block/io.c
index c3e7301613..0fe8f0dd40 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1624,6 +1624,14 @@ out:
* @merge_reads is true for small requests,
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
* head and tail exist but @buf_len == align and @tail_buf == @buf.
+ *
+ * @write is true for write requests, false for read requests.
+ *
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
+ * I/O vector elements so for read requests, the data can be copied back after
+ * the read is done.
*/
typedef struct BdrvRequestPadding {
uint8_t *buf;
@@ -1632,11 +1640,17 @@ typedef struct BdrvRequestPadding {
size_t head;
size_t tail;
bool merge_reads;
+ bool write;
QEMUIOVector local_qiov;
+
+ uint8_t *collapse_bounce_buf;
+ size_t collapse_len;
+ QEMUIOVector pre_collapse_qiov;
} BdrvRequestPadding;
static bool bdrv_init_padding(BlockDriverState *bs,
int64_t offset, int64_t bytes,
+ bool write,
BdrvRequestPadding *pad)
{
int64_t align = bs->bl.request_alignment;
@@ -1668,6 +1682,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
pad->tail_buf = pad->buf + pad->buf_len - align;
}
+ pad->write = write;
+
return true;
}
@@ -1733,8 +1749,23 @@ zero_mem:
return 0;
}
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
+/**
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
+ */
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
{
+ if (pad->collapse_bounce_buf) {
+ if (!pad->write) {
+ /*
+ * If padding required elements in the vector to be collapsed into a
+ * bounce buffer, copy the bounce buffer content back
+ */
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+ qemu_vfree(pad->collapse_bounce_buf);
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
+ }
if (pad->buf) {
qemu_vfree(pad->buf);
qemu_iovec_destroy(&pad->local_qiov);
@@ -1742,6 +1773,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
memset(pad, 0, sizeof(*pad));
}
+/*
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
+ *
+ * To ensure this, when necessary, the first two or three elements of @iov are
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
+ * bounce buffer in pad->local_qiov.
+ *
+ * After performing a read request, the data from the bounce buffer must be
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
+ */
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
+ BdrvRequestPadding *pad,
+ struct iovec *iov, int niov,
+ size_t iov_offset, size_t bytes)
+{
+ int padded_niov, surplus_count, collapse_count;
+
+ /* Assert this invariant */
+ assert(niov <= IOV_MAX);
+
+ /*
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
+ * to the guest is not ideal, but there is little else we can do. At least
+ * this will practically never happen on 64-bit systems.
+ */
+ if (SIZE_MAX - pad->head < bytes ||
+ SIZE_MAX - pad->head - bytes < pad->tail)
+ {
+ return -EINVAL;
+ }
+
+ /* Length of the resulting IOV if we just concatenated everything */
+ padded_niov = !!pad->head + niov + !!pad->tail;
+
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
+
+ if (pad->head) {
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
+ }
+
+ /*
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
+ * Instead, merge the first two or three elements of @iov to reduce the
+ * number of vector elements as necessary.
+ */
+ if (padded_niov > IOV_MAX) {
+ /*
+ * Only head and tail can have lead to the number of entries exceeding
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
+ * to reduce the number of elements by `surplus_count`, so we merge that
+ * many elements plus one into one element.
+ */
+ surplus_count = padded_niov - IOV_MAX;
+ assert(surplus_count <= !!pad->head + !!pad->tail);
+ collapse_count = surplus_count + 1;
+
+ /*
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
+ * advance `iov` (and associated variables) by those elements.
+ */
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
+ collapse_count, iov_offset, SIZE_MAX);
+ iov += collapse_count;
+ iov_offset = 0;
+ niov -= collapse_count;
+ bytes -= pad->pre_collapse_qiov.size;
+
+ /*
+ * Construct the bounce buffer to match the length of the to-collapse
+ * vector elements, and for write requests, initialize it with the data
+ * from those elements. Then add it to `pad->local_qiov`.
+ */
+ pad->collapse_len = pad->pre_collapse_qiov.size;
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
+ if (pad->write) {
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+ qemu_iovec_add(&pad->local_qiov,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
+
+ if (pad->tail) {
+ qemu_iovec_add(&pad->local_qiov,
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
+ }
+
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
+ return 0;
+}
+
/*
* bdrv_pad_request
*
@@ -1749,6 +1875,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
* read of padding, bdrv_padding_rmw_read() should be called separately if
* needed.
*
+ * @write is true for write requests, false for read requests.
+ *
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
* - on function start they represent original request
* - on failure or when padding is not needed they are unchanged
@@ -1757,25 +1885,33 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
static int bdrv_pad_request(BlockDriverState *bs,
QEMUIOVector **qiov, size_t *qiov_offset,
int64_t *offset, int64_t *bytes,
+ bool write,
BdrvRequestPadding *pad, bool *padded)
{
int ret;
+ struct iovec *sliced_iov;
+ int sliced_niov;
+ size_t sliced_head, sliced_tail;
bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
if (padded) {
*padded = false;
}
return 0;
}
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
- *qiov, *qiov_offset, *bytes,
- pad->buf + pad->buf_len - pad->tail,
- pad->tail);
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
+ &sliced_head, &sliced_tail,
+ &sliced_niov);
+
+ /* Guaranteed by bdrv_check_qiov_request() */
+ assert(*bytes <= SIZE_MAX);
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
+ sliced_head, *bytes);
if (ret < 0) {
- bdrv_padding_destroy(pad);
+ bdrv_padding_finalize(pad);
return ret;
}
*bytes += pad->head + pad->tail;
@@ -1836,8 +1972,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
flags |= BDRV_REQ_COPY_ON_READ;
}
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
- NULL);
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
+ &pad, NULL);
if (ret < 0) {
goto fail;
}
@@ -1847,7 +1983,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
bs->bl.request_alignment,
qiov, qiov_offset, flags);
tracked_request_end(&req);
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
fail:
bdrv_dec_in_flight(bs);
@@ -2167,7 +2303,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
bool padding;
BdrvRequestPadding pad;
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
if (padding) {
bdrv_make_request_serialising(req, align);
@@ -2214,7 +2350,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
}
out:
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
return ret;
}
@@ -2280,8 +2416,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
* alignment only if there is no ZERO flag.
*/
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
- &padded);
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
+ &pad, &padded);
if (ret < 0) {
return ret;
}
@@ -2310,7 +2446,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
qiov, qiov_offset, flags);
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
out:
tracked_request_end(&req);
--
2.39.3

View File

@ -0,0 +1,75 @@
From b9866279996ee065cb524bf30bc70e22efbab303 Mon Sep 17 00:00:00 2001
From: Hanna Czenczek <hreitz@redhat.com>
Date: Fri, 14 Jul 2023 10:59:38 +0200
Subject: [PATCH 5/5] block: Fix pad_request's request restriction
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 291: block: Split padded I/O vectors exceeding IOV_MAX
RH-Bugzilla: 2141964
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [5/5] f9188bd089d6c67185ea1accde20d491a2ed3193
bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
which bdrv_check_qiov_request() does not guarantee.
bdrv_check_request32() however will guarantee this, and both of
bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
bdrv_co_pwritev_part()) already run it before calling
bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call
bdrv_check_request32() without expecting error, too.
In effect, this patch will not change guest-visible behavior. It is a
clean-up to tighten a condition to match what is guaranteed by our
callers, and which exists purely to show clearly why the subsequent
assertion (`assert(*bytes <= SIZE_MAX)`) is always true.
Note there is a difference between the interfaces of
bdrv_check_qiov_request() and bdrv_check_request32(): The former takes
an errp, the latter does not, so we can no longer just pass
&error_abort. Instead, we need to check the returned value. While we
do expect success (because the callers have already run this function),
an assert(ret == 0) is not much simpler than just to return an error if
it occurs, so let us handle errors by returning them up the stack now.
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Message-id: 20230714085938.202730-1-hreitz@redhat.com
Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a
("block: Collapse padded I/O vecs exceeding IOV_MAX")
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/io.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/block/io.c b/block/io.c
index 0fe8f0dd40..8ae57728a6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1893,7 +1893,11 @@ static int bdrv_pad_request(BlockDriverState *bs,
int sliced_niov;
size_t sliced_head, sliced_tail;
- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
+ /* Should have been checked by the caller already */
+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
+ if (ret < 0) {
+ return ret;
+ }
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
if (padded) {
@@ -1906,7 +1910,7 @@ static int bdrv_pad_request(BlockDriverState *bs,
&sliced_head, &sliced_tail,
&sliced_niov);
- /* Guaranteed by bdrv_check_qiov_request() */
+ /* Guaranteed by bdrv_check_request32() */
assert(*bytes <= SIZE_MAX);
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
sliced_head, *bytes);
--
2.39.3

View File

@ -0,0 +1,63 @@
From b21fa5ecd9acf2b91839a2915fb4bb39dac4c803 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 3 Feb 2022 15:05:33 +0100
Subject: [PATCH 2/5] block: Lock AioContext for drain_end in blockdev-reopen
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 142: block: Lock AioContext for drain_end in blockdev-reopen
RH-Commit: [1/2] 98de3b5987f88ea6b4b503f623d6c4475574e037
RH-Bugzilla: 2067118
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
bdrv_subtree_drained_end() requires the caller to hold the AioContext
lock for the drained node. Not doing this for nodes outside of the main
AioContext leads to crashes when AIO_WAIT_WHILE() needs to wait and
tries to temporarily release the lock.
Fixes: 3908b7a8994fa5ef7a89aa58cd5a02fc58141592
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2046659
Reported-by: Qing Wang <qinwang@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20220203140534.36522-2-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit aba8205be0707b9d108e32254e186ba88107a869)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
blockdev.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/blockdev.c b/blockdev.c
index b35072644e..565f6a81fd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3562,6 +3562,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
{
BlockReopenQueue *queue = NULL;
GSList *drained = NULL;
+ GSList *p;
/* Add each one of the BDS that we want to reopen to the queue */
for (; reopen_list != NULL; reopen_list = reopen_list->next) {
@@ -3611,7 +3612,15 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
fail:
bdrv_reopen_queue_free(queue);
- g_slist_free_full(drained, (GDestroyNotify) bdrv_subtree_drained_end);
+ for (p = drained; p; p = p->next) {
+ BlockDriverState *bs = p->data;
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ bdrv_subtree_drained_end(bs);
+ aio_context_release(ctx);
+ }
+ g_slist_free(drained);
}
void qmp_blockdev_del(const char *node_name, Error **errp)
--
2.27.0

View File

@ -0,0 +1,78 @@
From 6348063b91b2370cc27153fd58fd11a6681631f6 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Wed, 16 Feb 2022 11:53:53 +0100
Subject: [PATCH 22/24] block: Make bdrv_refresh_limits() non-recursive
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Hanna Reitz <hreitz@redhat.com>
RH-MergeRequest: 189: block: Make bdrv_refresh_limits() non-recursive
RH-Commit: [1/3] 1a1fe37f8d8f0344dd8639d6cc9d884d1aff9096
RH-Bugzilla: 2072932
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
bdrv_refresh_limits() recurses down to the node's children. That does
not seem necessary: When we refresh limits on some node, and then
recurse down and were to change one of its children's BlockLimits, then
that would mean we noticed the changed limits by pure chance. The fact
that we refresh the parent's limits has nothing to do with it, so the
reason for the change probably happened before this point in time, and
we should have refreshed the limits then.
Consequently, we should actually propagate block limits changes upwards,
not downwards.  That is a separate and pre-existing issue, though, and
so will not be addressed in this patch.
The problem with recursing is that bdrv_refresh_limits() is not atomic.
It begins with zeroing BDS.bl, and only then sets proper, valid limits.
If we do not drain all nodes whose limits are refreshed, then concurrent
I/O requests can encounter invalid request_alignment values and crash
qemu. Therefore, a recursing bdrv_refresh_limits() requires the whole
subtree to be drained, which is currently not ensured by most callers.
A non-recursive bdrv_refresh_limits() only requires the node in question
to not receive I/O requests, and this is done by most callers in some
way or another:
- bdrv_open_driver() deals with a new node with no parents yet
- bdrv_set_file_or_backing_noperm() acts on a drained node
- bdrv_reopen_commit() acts only on drained nodes
- bdrv_append() should in theory require the node to be drained; in
practice most callers just lock the AioContext, which should at least
be enough to prevent concurrent I/O requests from accessing invalid
limits
So we can resolve the bug by making bdrv_refresh_limits() non-recursive.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1879437
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20220216105355.30729-2-hreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 4d378bbd831bdd2f6e6adcd4ea5b77b6effaa627)
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
block/io.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/block/io.c b/block/io.c
index 4e4cb556c5..c3e7301613 100644
--- a/block/io.c
+++ b/block/io.c
@@ -189,10 +189,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
{
- bdrv_refresh_limits(c->bs, tran, errp);
- if (*errp) {
- return;
- }
bdrv_merge_limits(&bs->bl, &c->bs->bl);
have_limits = true;
}
--
2.35.3

View File

@ -0,0 +1,129 @@
From bf4c15a3debbe68b6eb25c52174843470a9c014f Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 11 Jan 2022 15:36:12 +0000
Subject: [PATCH 3/6] block-backend: prevent dangling BDS pointers across
aio_poll()
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 109: block-backend: prevent dangling BDS pointers across aio_poll()
RH-Commit: [1/2] da5a59eddff0dc10be7de8e291fa675143d11d73
RH-Bugzilla: 2021778 2036178
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
The BlockBackend root child can change when aio_poll() is invoked. This
happens when a temporary filter node is removed upon blockjob
completion, for example.
Functions in block/block-backend.c must be aware of this when using a
blk_bs() pointer across aio_poll() because the BlockDriverState refcnt
may reach 0, resulting in a stale pointer.
One example is scsi_device_purge_requests(), which calls blk_drain() to
wait for in-flight requests to cancel. If the backup blockjob is active,
then the BlockBackend root child is a temporary filter BDS owned by the
blockjob. The blockjob can complete during bdrv_drained_begin() and the
last reference to the BDS is released when the temporary filter node is
removed. This results in a use-after-free when blk_drain() calls
bdrv_drained_end(bs) on the dangling pointer.
Explicitly hold a reference to bs across block APIs that invoke
aio_poll().
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2021778
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20220111153613.25453-2-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 1e3552dbd28359d35967b7c28dc86cde1bc29205)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/block-backend.c | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/block/block-backend.c b/block/block-backend.c
index 12ef80ea17..23e727199b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -822,16 +822,22 @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
void blk_remove_bs(BlockBackend *blk)
{
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
- BlockDriverState *bs;
BdrvChild *root;
notifier_list_notify(&blk->remove_bs_notifiers, blk);
if (tgm->throttle_state) {
- bs = blk_bs(blk);
+ BlockDriverState *bs = blk_bs(blk);
+
+ /*
+ * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for
+ * example, if a temporary filter node is removed by a blockjob.
+ */
+ bdrv_ref(bs);
bdrv_drained_begin(bs);
throttle_group_detach_aio_context(tgm);
throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
bdrv_drained_end(bs);
+ bdrv_unref(bs);
}
blk_update_root_state(blk);
@@ -1705,6 +1711,7 @@ void blk_drain(BlockBackend *blk)
BlockDriverState *bs = blk_bs(blk);
if (bs) {
+ bdrv_ref(bs);
bdrv_drained_begin(bs);
}
@@ -1714,6 +1721,7 @@ void blk_drain(BlockBackend *blk)
if (bs) {
bdrv_drained_end(bs);
+ bdrv_unref(bs);
}
}
@@ -2044,10 +2052,13 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
int ret;
if (bs) {
+ bdrv_ref(bs);
+
if (update_root_node) {
ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
errp);
if (ret < 0) {
+ bdrv_unref(bs);
return ret;
}
}
@@ -2057,6 +2068,8 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
throttle_group_attach_aio_context(tgm, new_context);
bdrv_drained_end(bs);
}
+
+ bdrv_unref(bs);
}
blk->ctx = new_context;
@@ -2326,11 +2339,13 @@ void blk_io_limits_disable(BlockBackend *blk)
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
assert(tgm->throttle_state);
if (bs) {
+ bdrv_ref(bs);
bdrv_drained_begin(bs);
}
throttle_group_unregister_tgm(tgm);
if (bs) {
bdrv_drained_end(bs);
+ bdrv_unref(bs);
}
}
--
2.27.0

View File

@ -0,0 +1,56 @@
From 4c6eff78f4b31ec4bd7b42440396760d19fde63e Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Tue, 18 Jan 2022 17:59:59 +0100
Subject: [PATCH 6/7] block/io: Update BSC only if want_zero is true
RH-Author: Hanna Reitz <hreitz@redhat.com>
RH-MergeRequest: 112: block/io: Update BSC only if want_zero is true
RH-Commit: [1/2] a202de1f52110d1e871c3b5b58f2d9e9b5d17570
RH-Bugzilla: 2041480
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
We update the block-status cache whenever we get new information from a
bdrv_co_block_status() call to the block driver. However, if we have
passed want_zero=false to that call, it may flag areas containing zeroes
as data, and so we would update the block-status cache with wrong
information.
Therefore, we should not update the cache with want_zero=false.
Reported-by: Nir Soffer <nsoffer@redhat.com>
Fixes: 0bc329fbb00 ("block: block-status cache for data regions")
Reviewed-by: Nir Soffer <nsoffer@redhat.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220118170000.49423-2-hreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit 113b727ce788335cf76f65355d670c9bc130fd75)
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
block/io.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/block/io.c b/block/io.c
index bb0a254def..4e4cb556c5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2497,8 +2497,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
* non-protocol nodes, and then it is never used. However, filling
* the cache requires an RCU update, so double check here to avoid
* such an update if possible.
+ *
+ * Check want_zero, because we only want to update the cache when we
+ * have accurate information about what is zero and what is data.
*/
- if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
+ if (want_zero &&
+ ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
QLIST_EMPTY(&bs->children))
{
/*
--
2.27.0

View File

@ -0,0 +1,153 @@
From 192f956f2b0761f270070555f8feb1f0544e5558 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Wed, 9 Nov 2022 17:54:48 +0100
Subject: [PATCH 01/11] block/mirror: Do not wait for active writes
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load
RH-Bugzilla: 2125119
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [1/3] 652d1e55b954f13eaec2c86f58735d4942837e16
Waiting for all active writes to settle before daring to create a
background copying operation means that we will never do background
operations while the guest does anything (in write-blocking mode), and
therefore cannot converge. Yes, we also will not diverge, but actually
converging would be even nicer.
It is unclear why we did decide to wait for all active writes to settle
before creating a background operation, but it just does not seem
necessary. Active writes will put themselves into the in_flight bitmap
and thus properly block actually conflicting background requests.
It is important for active requests to wait on overlapping background
requests, which we do in active_write_prepare(). However, so far it was
not documented why it is important. Add such documentation now, and
also to the other call of mirror_wait_on_conflicts(), so that it becomes
more clear why and when requests need to actively wait for other
requests to settle.
Another thing to note is that of course we need to ensure that there are
no active requests when the job completes, but that is done by virtue of
the BDS being drained anyway, so there cannot be any active requests at
that point.
With this change, we will need to explicitly keep track of how many
bytes are in flight in active requests so that
job_progress_set_remaining() in mirror_run() can set the correct number
of remaining bytes.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2123297
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221109165452.67927-2-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit d69a879bdf1aed586478eaa161ee064fe1b92f1a)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/mirror.c | 37 ++++++++++++++++++++++++++++++-------
1 file changed, 30 insertions(+), 7 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index efec2c7674..282f428cb7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -81,6 +81,7 @@ typedef struct MirrorBlockJob {
int max_iov;
bool initial_zeroing_ongoing;
int in_active_write_counter;
+ int64_t active_write_bytes_in_flight;
bool prepared;
bool in_drain;
} MirrorBlockJob;
@@ -493,6 +494,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
}
bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
+ /*
+ * Wait for concurrent requests to @offset. The next loop will limit the
+ * copied area based on in_flight_bitmap so we only copy an area that does
+ * not overlap with concurrent in-flight requests. Still, we would like to
+ * copy something, so wait until there are at least no more requests to the
+ * very beginning of the area.
+ */
mirror_wait_on_conflicts(NULL, s, offset, 1);
job_pause_point(&s->common.job);
@@ -993,12 +1001,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
int64_t cnt, delta;
bool should_complete;
- /* Do not start passive operations while there are active
- * writes in progress */
- while (s->in_active_write_counter) {
- mirror_wait_for_any_operation(s, true);
- }
-
if (s->ret < 0) {
ret = s->ret;
goto immediate_exit;
@@ -1015,7 +1017,9 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
/* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
* the number of bytes currently being processed; together those are
* the current remaining operation length */
- job_progress_set_remaining(&s->common.job, s->bytes_in_flight + cnt);
+ job_progress_set_remaining(&s->common.job,
+ s->bytes_in_flight + cnt +
+ s->active_write_bytes_in_flight);
/* Note that even when no rate limit is applied we need to yield
* periodically with no pending I/O so that bdrv_drain_all() returns.
@@ -1073,6 +1077,10 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
s->in_drain = true;
bdrv_drained_begin(bs);
+
+ /* Must be zero because we are drained */
+ assert(s->in_active_write_counter == 0);
+
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
if (cnt > 0 || mirror_flush(s) < 0) {
bdrv_drained_end(bs);
@@ -1306,6 +1314,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
}
job_progress_increase_remaining(&job->common.job, bytes);
+ job->active_write_bytes_in_flight += bytes;
switch (method) {
case MIRROR_METHOD_COPY:
@@ -1327,6 +1336,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method,
abort();
}
+ job->active_write_bytes_in_flight -= bytes;
if (ret >= 0) {
job_progress_update(&job->common.job, bytes);
} else {
@@ -1375,6 +1385,19 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s,
s->in_active_write_counter++;
+ /*
+ * Wait for concurrent requests affecting the area. If there are already
+ * running requests that are copying off now-to-be stale data in the area,
+ * we must wait for them to finish before we begin writing fresh data to the
+ * target so that the write operations appear in the correct order.
+ * Note that background requests (see mirror_iteration()) in contrast only
+ * wait for conflicting requests at the start of the dirty area, and then
+ * (based on the in_flight_bitmap) truncate the area to copy so it will not
+ * conflict with any requests beyond that. For active writes, however, we
+ * cannot truncate that area. The request from our parent must be blocked
+ * until the area is copied in full. Therefore, we must wait for the whole
+ * area to become free of concurrent requests.
+ */
mirror_wait_on_conflicts(op, s, offset, bytes);
bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk);
--
2.37.3

View File

@ -0,0 +1,76 @@
From 57c79ed20cb73aa9aa4dd7487379b85ea3f936f6 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Wed, 9 Nov 2022 17:54:49 +0100
Subject: [PATCH 02/11] block/mirror: Drop mirror_wait_for_any_operation()
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load
RH-Bugzilla: 2125119
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [2/3] dec37883bcc491441ae08d9592d1ec26a47765c0
mirror_wait_for_free_in_flight_slot() is the only remaining user of
mirror_wait_for_any_operation(), so inline the latter into the former.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221109165452.67927-3-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit eb994912993077f178ccb43b20e422ecf9ae4ac7)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/mirror.c | 21 ++++++++-------------
1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 282f428cb7..6b02555ad7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -304,19 +304,21 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
}
static inline void coroutine_fn
-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
{
MirrorOp *op;
QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
- /* Do not wait on pseudo ops, because it may in turn wait on
+ /*
+ * Do not wait on pseudo ops, because it may in turn wait on
* some other operation to start, which may in fact be the
* caller of this function. Since there is only one pseudo op
* at any given time, we will always find some real operation
- * to wait on. */
- if (!op->is_pseudo_op && op->is_in_flight &&
- op->is_active_write == active)
- {
+ * to wait on.
+ * Also, do not wait on active operations, because they do not
+ * use up in-flight slots.
+ */
+ if (!op->is_pseudo_op && op->is_in_flight && !op->is_active_write) {
qemu_co_queue_wait(&op->waiting_requests, NULL);
return;
}
@@ -324,13 +326,6 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
abort();
}
-static inline void coroutine_fn
-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
-{
- /* Only non-active operations use up in-flight slots */
- mirror_wait_for_any_operation(s, false);
-}
-
/* Perform a mirror copy operation.
*
* *op->bytes_handled is set to the number of bytes copied after and
--
2.37.3

View File

@ -0,0 +1,75 @@
From b1f5aa5a342a25dc558ee9d435fed0643fe5155f Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Wed, 9 Nov 2022 17:54:50 +0100
Subject: [PATCH 03/11] block/mirror: Fix NULL s->job in active writes
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load
RH-Bugzilla: 2125119
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [3/3] 49d7ebd15667151a6e14228a8260cfdd0aa27a78
There is a small gap in mirror_start_job() before putting the mirror
filter node into the block graph (bdrv_append() call) and the actual job
being created. Before the job is created, MirrorBDSOpaque.job is NULL.
It is possible that requests come in when bdrv_drained_end() is called,
and those requests would see MirrorBDSOpaque.job == NULL. Have our
filter node handle that case gracefully.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221109165452.67927-4-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit da93d5c84e56e6b4e84aa8e98b6b984c9b6bb528)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/mirror.c | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 6b02555ad7..50289fca49 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1438,11 +1438,13 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
MirrorOp *op = NULL;
MirrorBDSOpaque *s = bs->opaque;
int ret = 0;
- bool copy_to_target;
+ bool copy_to_target = false;
- copy_to_target = s->job->ret >= 0 &&
- !job_is_cancelled(&s->job->common.job) &&
- s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+ if (s->job) {
+ copy_to_target = s->job->ret >= 0 &&
+ !job_is_cancelled(&s->job->common.job) &&
+ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+ }
if (copy_to_target) {
op = active_write_prepare(s->job, offset, bytes);
@@ -1487,11 +1489,13 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
QEMUIOVector bounce_qiov;
void *bounce_buf;
int ret = 0;
- bool copy_to_target;
+ bool copy_to_target = false;
- copy_to_target = s->job->ret >= 0 &&
- !job_is_cancelled(&s->job->common.job) &&
- s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+ if (s->job) {
+ copy_to_target = s->job->ret >= 0 &&
+ !job_is_cancelled(&s->job->common.job) &&
+ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
+ }
if (copy_to_target) {
/* The guest might concurrently modify the data to write; but
--
2.37.3

View File

@ -0,0 +1,52 @@
From d5a85fcf996948d1154e88e9ee3b4e8c64ec2694 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Fri, 4 Feb 2022 12:10:08 +0100
Subject: [PATCH 2/6] block/nbd: Assert there are no timers when closed
RH-Author: Hanna Reitz <hreitz@redhat.com>
RH-MergeRequest: 117: block/nbd: Handle AioContext changes
RH-Commit: [2/6] 995795ae9844a7d2b28cb1e57fd7fe81482d0205
RH-Bugzilla: 2035185
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Our two timers must not remain armed beyond nbd_clear_bdrvstate(), or
they will access freed data when they fire.
This patch is separate from the patches that actually fix the issue
(HEAD^^ and HEAD^) so that you can run the associated regression iotest
(281) on a configuration that reproducibly exposes the bug.
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
(cherry picked from commit 8a39c381e5e407d2fe5500324323f90a8540fa90)
Conflict:
- block/nbd.c: open_timer was introduced after the 6.2 release (for
nbd's @open-timeout parameter), and has not been backported, so drop
the assertion that it is NULL
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
block/nbd.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/block/nbd.c b/block/nbd.c
index b8e5a9b4cc..aab20125d8 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -108,6 +108,9 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs)
yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
+ /* Must not leave timers behind that would access freed data */
+ assert(!s->reconnect_delay_timer);
+
object_unref(OBJECT(s->tlscreds));
qapi_free_SocketAddress(s->saddr);
s->saddr = NULL;
--
2.27.0

View File

@ -0,0 +1,54 @@
From 8e23c0f208c6bd5bb64c4f6e4863b93fa6f4e9de Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Fri, 4 Feb 2022 12:10:06 +0100
Subject: [PATCH 1/6] block/nbd: Delete reconnect delay timer when done
RH-Author: Hanna Reitz <hreitz@redhat.com>
RH-MergeRequest: 117: block/nbd: Handle AioContext changes
RH-Commit: [1/6] 70814602a8a43a7c14857d76266d82b1aa5174a9
RH-Bugzilla: 2035185
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
We start the reconnect delay timer to cancel the reconnection attempt
after a while. Once nbd_co_do_establish_connection() has returned, this
attempt is over, and we no longer need the timer.
Delete it before returning from nbd_reconnect_attempt(), so that it does
not persist beyond the I/O request that was paused for reconnecting; we
do not want it to fire in a drained section, because all sort of things
can happen in such a section (e.g. the AioContext might be changed, and
we do not want the timer to fire in the wrong context; or the BDS might
even be deleted, and so the timer CB would access already-freed data).
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
(cherry picked from commit 3ce1fc16bad9c3f8b7b10b451a224d6d76e5c551)
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
block/nbd.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/block/nbd.c b/block/nbd.c
index 5ef462db1b..b8e5a9b4cc 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -353,6 +353,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
}
nbd_co_do_establish_connection(s->bs, NULL);
+
+ /*
+ * The reconnect attempt is done (maybe successfully, maybe not), so
+ * we no longer need this timer. Delete it so it will not outlive
+ * this I/O request (so draining removes all timers).
+ */
+ reconnect_delay_timer_del(s);
}
static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle)
--
2.27.0

View File

@ -0,0 +1,107 @@
From c7f63e7bbc5119d92775e20d1ebbf8280c78b732 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Fri, 4 Feb 2022 12:10:11 +0100
Subject: [PATCH 5/6] block/nbd: Move s->ioc on AioContext change
RH-Author: Hanna Reitz <hreitz@redhat.com>
RH-MergeRequest: 117: block/nbd: Handle AioContext changes
RH-Commit: [5/6] 107757b9fbadfb832c75521317108525daa4174e
RH-Bugzilla: 2035185
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
s->ioc must always be attached to the NBD node's AioContext. If that
context changes, s->ioc must be attached to the new context.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2033626
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
(cherry picked from commit e15f3a66c830e3fce99c9d56c493c2f7078a1225)
Conflict:
- block/nbd.c: open_timer was added after the 6.2 release, so we need
not (and cannot) assert it is NULL here.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
block/nbd.c | 41 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git a/block/nbd.c b/block/nbd.c
index aab20125d8..a3896c7f5f 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -2003,6 +2003,38 @@ static void nbd_cancel_in_flight(BlockDriverState *bs)
nbd_co_establish_connection_cancel(s->conn);
}
+static void nbd_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVNBDState *s = bs->opaque;
+
+ /*
+ * The reconnect_delay_timer is scheduled in I/O paths when the
+ * connection is lost, to cancel the reconnection attempt after a
+ * given time. Once this attempt is done (successfully or not),
+ * nbd_reconnect_attempt() ensures the timer is deleted before the
+ * respective I/O request is resumed.
+ * Since the AioContext can only be changed when a node is drained,
+ * the reconnect_delay_timer cannot be active here.
+ */
+ assert(!s->reconnect_delay_timer);
+
+ if (s->ioc) {
+ qio_channel_attach_aio_context(s->ioc, new_context);
+ }
+}
+
+static void nbd_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVNBDState *s = bs->opaque;
+
+ assert(!s->reconnect_delay_timer);
+
+ if (s->ioc) {
+ qio_channel_detach_aio_context(s->ioc);
+ }
+}
+
static BlockDriver bdrv_nbd = {
.format_name = "nbd",
.protocol_name = "nbd",
@@ -2026,6 +2058,9 @@ static BlockDriver bdrv_nbd = {
.bdrv_dirname = nbd_dirname,
.strong_runtime_opts = nbd_strong_runtime_opts,
.bdrv_cancel_in_flight = nbd_cancel_in_flight,
+
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
};
static BlockDriver bdrv_nbd_tcp = {
@@ -2051,6 +2086,9 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_dirname = nbd_dirname,
.strong_runtime_opts = nbd_strong_runtime_opts,
.bdrv_cancel_in_flight = nbd_cancel_in_flight,
+
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
};
static BlockDriver bdrv_nbd_unix = {
@@ -2076,6 +2114,9 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_dirname = nbd_dirname,
.strong_runtime_opts = nbd_strong_runtime_opts,
.bdrv_cancel_in_flight = nbd_cancel_in_flight,
+
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
};
static void bdrv_nbd_init(void)
--
2.27.0

View File

@ -0,0 +1,59 @@
From f4b7133d7aeb1d0b9115d01b5cff4df7f6b24e78 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Thu, 13 Jan 2022 15:44:25 +0100
Subject: [PATCH 5/6] block/rbd: fix handling of holes in .bdrv_co_block_status
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 110: block/rbd: fix handling of holes in .bdrv_co_block_status
RH-Commit: [1/2] 352656a5c77cc7855b476c3559a10c6aa64a4f58
RH-Bugzilla: 2037135
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
the assumption that we can't hit a hole if we do not diff against a snapshot was wrong.
We can see a hole in an image if we diff against base if there exists an older snapshot
of the image and we have discarded blocks in the image where the snapshot has data.
Fix this by simply handling a hole like an unallocated area. There are no callbacks
for unallocated areas so just bail out if we hit a hole.
Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b
Suggested-by: Ilya Dryomov <idryomov@gmail.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Lieven <pl@kamp.de>
Message-Id: <20220113144426.4036493-2-pl@kamp.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 9e302f64bb407a9bb097b626da97228c2654cfee)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/rbd.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index def96292e0..20bb896c4a 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
RBDDiffIterateReq *req = opaque;
assert(req->offs + req->bytes <= offs);
- /*
- * we do not diff against a snapshot so we should never receive a callback
- * for a hole.
- */
- assert(exists);
+
+ /* treat a hole like an unallocated area and bail out */
+ if (!exists) {
+ return 0;
+ }
if (!req->exists && offs > req->offs) {
/*
--
2.27.0

View File

@ -0,0 +1,103 @@
From 8c50eedf03d8e62acd387b9aa9369dadcea9324c Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Thu, 13 Jan 2022 15:44:26 +0100
Subject: [PATCH 6/6] block/rbd: workaround for ceph issue #53784
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 110: block/rbd: fix handling of holes in .bdrv_co_block_status
RH-Commit: [2/2] 1384557462e89bb539d0d25a1a471ad738fb9e89
RH-Bugzilla: 2037135
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
librbd had a bug until early 2022 that affected all versions of ceph that
supported fast-diff. This bug results in reporting of incorrect offsets
if the offset parameter to rbd_diff_iterate2 is not object aligned.
This patch works around this bug for pre Quincy versions of librbd.
Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Lieven <pl@kamp.de>
Message-Id: <20220113144426.4036493-3-pl@kamp.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Tested-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit fc176116cdea816ceb8dd969080b2b95f58edbc0)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/rbd.c | 42 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 40 insertions(+), 2 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 20bb896c4a..8f183eba2a 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
int status, r;
RBDDiffIterateReq req = { .offs = offset };
uint64_t features, flags;
+ uint64_t head = 0;
assert(offset + bytes <= s->image_size);
@@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
return status;
}
- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0)
+ /*
+ * librbd had a bug until early 2022 that affected all versions of ceph that
+ * supported fast-diff. This bug results in reporting of incorrect offsets
+ * if the offset parameter to rbd_diff_iterate2 is not object aligned.
+ * Work around this bug by rounding down the offset to object boundaries.
+ * This is OK because we call rbd_diff_iterate2 with whole_object = true.
+ * However, this workaround only works for non cloned images with default
+ * striping.
+ *
+ * See: https://tracker.ceph.com/issues/53784
+ */
+
+ /* check if RBD image has non-default striping enabled */
+ if (features & RBD_FEATURE_STRIPINGV2) {
+ return status;
+ }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ /*
+ * check if RBD image is a clone (= has a parent).
+ *
+ * rbd_get_parent_info is deprecated from Nautilus onwards, but the
+ * replacement rbd_get_parent is not present in Luminous and Mimic.
+ */
+ if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) {
+ return status;
+ }
+#pragma GCC diagnostic pop
+
+ head = req.offs & (s->object_size - 1);
+ req.offs -= head;
+ bytes += head;
+#endif
+
+ r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true,
qemu_rbd_diff_iterate_cb, &req);
if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
return status;
@@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
}
- *pnum = req.bytes;
+ assert(req.bytes > head);
+ *pnum = req.bytes - head;
return status;
}
--
2.27.0

View File

@ -0,0 +1,56 @@
From 866a3b56f6a2d43f3cf7b3313fb41808bc5e6e1f Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 03/15] checkpatch: add qemu_bh_new/aio_bh_new checks
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
RH-Bugzilla: 1999236
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/12] 620b480b0878c18223f3cc103450bc16aa6d7e21 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
Upstream: Merged
CVE: CVE-2021-3750
commit ef56ffbdd6b0605dc1e305611287b948c970e236
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:08 2023 -0400
checkpatch: add qemu_bh_new/aio_bh_new checks
Advise authors to use the _guarded versions of the APIs, instead.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Message-Id: <20230427211013.2994127-4-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
scripts/checkpatch.pl | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index cb8eff233e..b2428e80cc 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2858,6 +2858,14 @@ sub process {
if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
}
+# recommend qemu_bh_new_guarded instead of qemu_bh_new
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) {
+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr);
+ }
+# recommend aio_bh_new_guarded instead of aio_bh_new
+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) {
+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr);
+ }
# check for module_init(), use category-specific init macros explicitly please
if ($line =~ /^module_init\s*\(/) {
ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
--
2.37.3

View File

@ -0,0 +1,58 @@
From abd84f26e0fe0bc9952d91fbd35fb3a7253cfecf Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@rehat.com>
Date: Wed, 13 Apr 2022 20:54:45 -0400
Subject: [PATCH 1/2] display/qxl-render: fix race condition in qxl_cursor
(CVE-2021-4207)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 152: display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207)
RH-Commit: [1/1] f05b9a956f2e0ca522b5be127beff813d04b5588 (jmaloy/qemu-kvm)
RH-Bugzilla: 2040738
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Mauro Matteo Cascella <None>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2040738
Upstream: Merged
CVE: CVE-2021-4207
commit 9569f5cb5b4bffa9d3ebc8ba7da1e03830a9a895
Author: Mauro Matteo Cascella <mcascell@redhat.com>
Date: Thu Apr 7 10:11:06 2022 +0200
display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207)
Avoid fetching 'width' and 'height' a second time to prevent possible
race condition. Refer to security advisory
https://starlabs.sg/advisories/22-4207/ for more information.
Fixes: CVE-2021-4207
Signed-off-by: Mauro Matteo Cascella <mcascell@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220407081106.343235-1-mcascell@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
(cherry picked from commit 9569f5cb5b4bffa9d3ebc8ba7da1e03830a9a895)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/display/qxl-render.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c
index d28849b121..237ed293ba 100644
--- a/hw/display/qxl-render.c
+++ b/hw/display/qxl-render.c
@@ -266,7 +266,7 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor,
}
break;
case SPICE_CURSOR_TYPE_ALPHA:
- size = sizeof(uint32_t) * cursor->header.width * cursor->header.height;
+ size = sizeof(uint32_t) * c->width * c->height;
qxl_unpack_chunks(c->data, size, qxl, &cursor->chunk, group_id);
if (qxl->debug > 2) {
cursor_print_ascii_art(c, "qxl/alpha");
--
2.27.0

View File

@ -0,0 +1,127 @@
From 103608465b8bd2edf7f9aaef5c3c93309ccf9ec2 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 21 Feb 2023 16:22:17 -0500
Subject: [PATCH 12/13] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel()
race
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 264: scsi: protect req->aiocb with AioContext lock
RH-Bugzilla: 2090990
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [2/3] 14f5835093ba8c5111f3ada2fe87730371aca733
dma_blk_cb() only takes the AioContext lock around ->io_func(). That
means the rest of dma_blk_cb() is not protected. In particular, the
DMAAIOCB field accesses happen outside the lock.
There is a race when the main loop thread holds the AioContext lock and
invokes scsi_device_purge_requests() -> bdrv_aio_cancel() ->
dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb
field determines how cancellation proceeds. If dma_aio_cancel() sees
dbs->acb == NULL while dma_blk_cb() is still running, the request can be
completed twice (-ECANCELED and the actual return value).
The following assertion can occur with virtio-scsi when an IOThread is
used:
../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed.
Fix the race by holding the AioContext across dma_blk_cb(). Now
dma_aio_cancel() under the AioContext lock will not see
inconsistent/intermediate states.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230221212218.1378734-3-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
hw/scsi/scsi-disk.c | 4 +---
softmmu/dma-helpers.c | 12 +++++++-----
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 179ce22c4a..c8109a673e 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -351,13 +351,12 @@ done:
scsi_req_unref(&r->req);
}
+/* Called with AioContext lock held */
static void scsi_dma_complete(void *opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
@@ -367,7 +366,6 @@ static void scsi_dma_complete(void *opaque, int ret)
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
}
scsi_dma_complete_noio(r, ret);
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
}
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
index 7d766a5e89..42af18719a 100644
--- a/softmmu/dma-helpers.c
+++ b/softmmu/dma-helpers.c
@@ -127,17 +127,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
static void dma_blk_cb(void *opaque, int ret)
{
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
+ AioContext *ctx = dbs->ctx;
dma_addr_t cur_addr, cur_len;
void *mem;
trace_dma_blk_cb(dbs, ret);
+ aio_context_acquire(ctx);
dbs->acb = NULL;
dbs->offset += dbs->iov.size;
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
dma_complete(dbs, ret);
- return;
+ goto out;
}
dma_blk_unmap(dbs);
@@ -177,9 +179,9 @@ static void dma_blk_cb(void *opaque, int ret)
if (dbs->iov.size == 0) {
trace_dma_map_wait(dbs);
- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
+ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
cpu_register_map_client(dbs->bh);
- return;
+ goto out;
}
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
@@ -187,11 +189,11 @@ static void dma_blk_cb(void *opaque, int ret)
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
}
- aio_context_acquire(dbs->ctx);
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
dma_blk_cb, dbs, dbs->io_func_opaque);
- aio_context_release(dbs->ctx);
assert(dbs->acb);
+out:
+ aio_context_release(ctx);
}
static void dma_aio_cancel(BlockAIOCB *acb)
--
2.37.3

View File

@ -0,0 +1,77 @@
From e8377e3f4d540e2594a50985523e87d1f3cabbc7 Mon Sep 17 00:00:00 2001
From: Yang Zhong <yang.zhong@intel.com>
Date: Mon, 1 Nov 2021 12:20:08 -0400
Subject: [PATCH 3/7] doc: Add the SGX numa description
RH-Author: Paul Lai <None>
RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections
RH-Commit: [3/5] 41c74688c9662b966c243566a837135ff52341c4
RH-Bugzilla: 1518984
RH-Acked-by: Paolo Bonzini <None>
RH-Acked-by: Bandan Das <None>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Add the SGX numa reference command and how to check if
SGX numa is support or not with multiple EPC sections.
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
Message-Id: <20211101162009.62161-5-yang.zhong@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d1889b36098c79e2e6ac90faf3d0dc5ec0057677)
Signed-off-by: Paul Lai <plai@redhat.com>
---
docs/system/i386/sgx.rst | 31 +++++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst
index f8fade5ac2..0f0a73f758 100644
--- a/docs/system/i386/sgx.rst
+++ b/docs/system/i386/sgx.rst
@@ -141,8 +141,7 @@ To launch a SGX guest:
|qemu_system_x86| \\
-cpu host,+sgx-provisionkey \\
-object memory-backend-epc,id=mem1,size=64M,prealloc=on \\
- -object memory-backend-epc,id=mem2,size=28M \\
- -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2
+ -M sgx-epc.0.memdev=mem1,sgx-epc.0.node=0
Utilizing SGX in the guest requires a kernel/OS with SGX support.
The support can be determined in guest by::
@@ -152,8 +151,32 @@ The support can be determined in guest by::
and SGX epc info by::
$ dmesg | grep sgx
- [ 1.242142] sgx: EPC section 0x180000000-0x181bfffff
- [ 1.242319] sgx: EPC section 0x181c00000-0x1837fffff
+ [ 0.182807] sgx: EPC section 0x140000000-0x143ffffff
+ [ 0.183695] sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0.
+
+To launch a SGX numa guest:
+
+.. parsed-literal::
+
+ |qemu_system_x86| \\
+ -cpu host,+sgx-provisionkey \\
+ -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \\
+ -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \\
+ -numa node,nodeid=0,cpus=0-1,memdev=node0 \\
+ -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \\
+ -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \\
+ -numa node,nodeid=1,cpus=2-3,memdev=node1 \\
+ -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1
+
+and SGX epc numa info by::
+
+ $ dmesg | grep sgx
+ [ 0.369937] sgx: EPC section 0x180000000-0x183ffffff
+ [ 0.370259] sgx: EPC section 0x184000000-0x185bfffff
+
+ $ dmesg | grep SRAT
+ [ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff]
+ [ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff]
References
----------
--
2.27.0

View File

@ -0,0 +1,70 @@
From 407e23d7f0c9020404247afe7d4df98505222bbb Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 14 Nov 2022 14:25:02 +0100
Subject: [PATCH 1/3] docs/system/s390x: Document the "loadparm" machine
property
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 233: s390x: Document the "loadparm" machine property
RH-Bugzilla: 2128225
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [1/2] e9589ea32d2a8f82971476b644e1063fa14cf822
The "loadparm" machine property is useful for selecting alternative
kernels on the disk of the guest, but so far we do not tell the users
yet how to use it. Add some documentation to fill this gap.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2128235
Message-Id: <20221114132502.110213-1-thuth@redhat.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
(cherry picked from commit be5df2edb5d69ff3107c5616aa035a9ba8d0422e)
---
docs/system/s390x/bootdevices.rst | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/docs/system/s390x/bootdevices.rst b/docs/system/s390x/bootdevices.rst
index 9e591cb9dc..d4bf3b9f0b 100644
--- a/docs/system/s390x/bootdevices.rst
+++ b/docs/system/s390x/bootdevices.rst
@@ -53,6 +53,32 @@ recommended to specify a CD-ROM device via ``-device scsi-cd`` (as mentioned
above) instead.
+Selecting kernels with the ``loadparm`` property
+------------------------------------------------
+
+The ``s390-ccw-virtio`` machine supports the so-called ``loadparm`` parameter
+which can be used to select the kernel on the disk of the guest that the
+s390-ccw bios should boot. When starting QEMU, it can be specified like this::
+
+ qemu-system-s390x -machine s390-ccw-virtio,loadparm=<string>
+
+The first way to use this parameter is to use the word ``PROMPT`` as the
+``<string>`` here. In that case the s390-ccw bios will show a list of
+installed kernels on the disk of the guest and ask the user to enter a number
+to chose which kernel should be booted -- similar to what can be achieved by
+specifying the ``-boot menu=on`` option when starting QEMU. Note that the menu
+list will only show the names of the installed kernels when using a DASD-like
+disk image with 4k byte sectors. On normal SCSI-style disks with 512-byte
+sectors, there is not enough space for the zipl loader on the disk to store
+the kernel names, so you only get a list without names here.
+
+The second way to use this parameter is to use a number in the range from 0
+to 31. The numbers that can be used here correspond to the numbers that are
+shown when using the ``PROMPT`` option, and the s390-ccw bios will then try
+to automatically boot the kernel that is associated with the given number.
+Note that ``0`` can be used to boot the default entry.
+
+
Booting from a network device
-----------------------------
--
2.37.3

View File

@ -0,0 +1,69 @@
From 837e09b1a8a38b53488f59aad090fbe6bb94e257 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 17 Nov 2023 11:32:37 +0100
Subject: [PATCH 2/3] dump: Add arch cleanup function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 323: Fix problem that secure execution guest might remain in "paused" state after failed dump
RH-Jira: RHEL-16696
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Commit: [2/3] b70f406dec88ffd4877f3d5d580fc8f821bdb252
JIRA: https://issues.redhat.com/browse/RHEL-16696
commit e72629e5149aba6f44122ea6d2a803ef136a0c6b
Author: Janosch Frank <frankja@linux.ibm.com>
Date: Thu Nov 9 12:04:42 2023 +0000
dump: Add arch cleanup function
Some architectures (s390x) need to cleanup after a failed dump to be
able to continue to run the vm. Add a cleanup function pointer and
call it if it's set.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-ID: <20231109120443.185979-3-frankja@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
dump/dump.c | 4 ++++
include/sysemu/dump-arch.h | 1 +
2 files changed, 5 insertions(+)
diff --git a/dump/dump.c b/dump/dump.c
index 5dee060b73..93edb89547 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -100,6 +100,10 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
static int dump_cleanup(DumpState *s)
{
+ if (s->dump_info.arch_cleanup_fn) {
+ s->dump_info.arch_cleanup_fn(s);
+ }
+
guest_phys_blocks_free(&s->guest_phys_blocks);
memory_mapping_list_free(&s->list);
close(s->fd);
diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h
index 59bbc9be38..743916e46c 100644
--- a/include/sysemu/dump-arch.h
+++ b/include/sysemu/dump-arch.h
@@ -24,6 +24,7 @@ typedef struct ArchDumpInfo {
void (*arch_sections_add_fn)(DumpState *s);
uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff);
int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff);
+ void (*arch_cleanup_fn)(DumpState *s);
} ArchDumpInfo;
struct GuestPhysBlockList; /* memory_mapping.h */
--
2.39.3

View File

@ -0,0 +1,356 @@
From f2f3efff83dddd38a97699cd2701f46f61a732e3 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Mon, 17 Oct 2022 11:32:10 +0000
Subject: [PATCH 36/42] dump: Add architecture section and section string table
support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [36/41] 83b98ff185e93e62703f686b65546d60c783d783
Add hooks which architectures can use to add arbitrary data to custom
sections.
Also add a section name string table in order to identify section
contents
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20221017113210.41674-1-frankja@linux.ibm.com>
(cherry picked from commit 9b72224f44612ddd5b434a1bccf79346946d11da)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 186 +++++++++++++++++++++++++++++++------
include/sysemu/dump-arch.h | 3 +
include/sysemu/dump.h | 3 +
3 files changed, 166 insertions(+), 26 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 7a42401790..4aa8fb64d2 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -104,6 +104,7 @@ static int dump_cleanup(DumpState *s)
memory_mapping_list_free(&s->list);
close(s->fd);
g_free(s->guest_note);
+ g_array_unref(s->string_table_buf);
s->guest_note = NULL;
if (s->resume) {
if (s->detached) {
@@ -153,11 +154,10 @@ static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header)
elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset);
elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
elf_header->e_phnum = cpu_to_dump16(s, phnum);
- if (s->shdr_num) {
- elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
- elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
- elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
- }
+ elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
+ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
+ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
+ elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
}
static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header)
@@ -181,11 +181,10 @@ static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header)
elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset);
elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
elf_header->e_phnum = cpu_to_dump16(s, phnum);
- if (s->shdr_num) {
- elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
- elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
- elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
- }
+ elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
+ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
+ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
+ elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1);
}
static void write_elf_header(DumpState *s, Error **errp)
@@ -196,6 +195,8 @@ static void write_elf_header(DumpState *s, Error **errp)
void *header_ptr;
int ret;
+ /* The NULL header and the shstrtab are always defined */
+ assert(s->shdr_num >= 2);
if (dump_is_64bit(s)) {
prepare_elf64_header(s, &elf64_header);
header_size = sizeof(elf64_header);
@@ -394,17 +395,49 @@ static void prepare_elf_section_hdr_zero(DumpState *s)
}
}
-static void prepare_elf_section_hdrs(DumpState *s)
+static void prepare_elf_section_hdr_string(DumpState *s, void *buff)
+{
+ uint64_t index = s->string_table_buf->len;
+ const char strtab[] = ".shstrtab";
+ Elf32_Shdr shdr32 = {};
+ Elf64_Shdr shdr64 = {};
+ int shdr_size;
+ void *shdr;
+
+ g_array_append_vals(s->string_table_buf, strtab, sizeof(strtab));
+ if (dump_is_64bit(s)) {
+ shdr_size = sizeof(Elf64_Shdr);
+ shdr64.sh_type = SHT_STRTAB;
+ shdr64.sh_offset = s->section_offset + s->elf_section_data_size;
+ shdr64.sh_name = index;
+ shdr64.sh_size = s->string_table_buf->len;
+ shdr = &shdr64;
+ } else {
+ shdr_size = sizeof(Elf32_Shdr);
+ shdr32.sh_type = SHT_STRTAB;
+ shdr32.sh_offset = s->section_offset + s->elf_section_data_size;
+ shdr32.sh_name = index;
+ shdr32.sh_size = s->string_table_buf->len;
+ shdr = &shdr32;
+ }
+ memcpy(buff, shdr, shdr_size);
+}
+
+static bool prepare_elf_section_hdrs(DumpState *s, Error **errp)
{
size_t len, sizeof_shdr;
+ void *buff_hdr;
/*
* Section ordering:
* - HDR zero
+ * - Arch section hdrs
+ * - String table hdr
*/
sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
len = sizeof_shdr * s->shdr_num;
s->elf_section_hdrs = g_malloc0(len);
+ buff_hdr = s->elf_section_hdrs;
/*
* The first section header is ALWAYS a special initial section
@@ -420,6 +453,26 @@ static void prepare_elf_section_hdrs(DumpState *s)
if (s->phdr_num >= PN_XNUM) {
prepare_elf_section_hdr_zero(s);
}
+ buff_hdr += sizeof_shdr;
+
+ /* Add architecture defined section headers */
+ if (s->dump_info.arch_sections_write_hdr_fn
+ && s->shdr_num > 2) {
+ buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr);
+
+ if (s->shdr_num >= SHN_LORESERVE) {
+ error_setg_errno(errp, EINVAL,
+ "dump: too many architecture defined sections");
+ return false;
+ }
+ }
+
+ /*
+ * String table is the last section since strings are added via
+ * arch_sections_write_hdr().
+ */
+ prepare_elf_section_hdr_string(s, buff_hdr);
+ return true;
}
static void write_elf_section_headers(DumpState *s, Error **errp)
@@ -427,7 +480,9 @@ static void write_elf_section_headers(DumpState *s, Error **errp)
size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
int ret;
- prepare_elf_section_hdrs(s);
+ if (!prepare_elf_section_hdrs(s, errp)) {
+ return;
+ }
ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s);
if (ret < 0) {
@@ -437,6 +492,29 @@ static void write_elf_section_headers(DumpState *s, Error **errp)
g_free(s->elf_section_hdrs);
}
+static void write_elf_sections(DumpState *s, Error **errp)
+{
+ int ret;
+
+ if (s->elf_section_data_size) {
+ /* Write architecture section data */
+ ret = fd_write_vmcore(s->elf_section_data,
+ s->elf_section_data_size, s);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "dump: failed to write architecture section data");
+ return;
+ }
+ }
+
+ /* Write string table */
+ ret = fd_write_vmcore(s->string_table_buf->data,
+ s->string_table_buf->len, s);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "dump: failed to write string table data");
+ }
+}
+
static void write_data(DumpState *s, void *buf, int length, Error **errp)
{
int ret;
@@ -693,6 +771,31 @@ static void dump_iterate(DumpState *s, Error **errp)
}
}
+static void dump_end(DumpState *s, Error **errp)
+{
+ int rc;
+ ERRP_GUARD();
+
+ if (s->elf_section_data_size) {
+ s->elf_section_data = g_malloc0(s->elf_section_data_size);
+ }
+
+ /* Adds the architecture defined section data to s->elf_section_data */
+ if (s->dump_info.arch_sections_write_fn &&
+ s->elf_section_data_size) {
+ rc = s->dump_info.arch_sections_write_fn(s, s->elf_section_data);
+ if (rc) {
+ error_setg_errno(errp, rc,
+ "dump: failed to get arch section data");
+ g_free(s->elf_section_data);
+ return;
+ }
+ }
+
+ /* write sections to vmcore */
+ write_elf_sections(s, errp);
+}
+
static void create_vmcore(DumpState *s, Error **errp)
{
ERRP_GUARD();
@@ -702,7 +805,14 @@ static void create_vmcore(DumpState *s, Error **errp)
return;
}
+ /* Iterate over memory and dump it to file */
dump_iterate(s, errp);
+ if (*errp) {
+ return;
+ }
+
+ /* Write the section data */
+ dump_end(s, errp);
}
static int write_start_flat_header(int fd)
@@ -1720,6 +1830,14 @@ static void dump_init(DumpState *s, int fd, bool has_format,
s->filter_area_begin = begin;
s->filter_area_length = length;
+ /* First index is 0, it's the special null name */
+ s->string_table_buf = g_array_new(FALSE, TRUE, 1);
+ /*
+ * Allocate the null name, due to the clearing option set to true
+ * it will be 0.
+ */
+ g_array_set_size(s->string_table_buf, 1);
+
memory_mapping_list_init(&s->list);
guest_phys_blocks_init(&s->guest_phys_blocks);
@@ -1856,26 +1974,42 @@ static void dump_init(DumpState *s, int fd, bool has_format,
}
/*
- * calculate phdr_num
+ * The first section header is always a special one in which most
+ * fields are 0. The section header string table is also always
+ * set.
+ */
+ s->shdr_num = 2;
+
+ /*
+ * Adds the number of architecture sections to shdr_num and sets
+ * elf_section_data_size so we know the offsets and sizes of all
+ * parts.
+ */
+ if (s->dump_info.arch_sections_add_fn) {
+ s->dump_info.arch_sections_add_fn(s);
+ }
+
+ /*
+ * calculate shdr_num so we know the offsets and sizes of all
+ * parts.
+ * Calculate phdr_num
*
- * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
+ * The absolute maximum amount of phdrs is UINT32_MAX - 1 as
+ * sh_info is 32 bit. There's special handling once we go over
+ * UINT16_MAX - 1 but that is handled in the ehdr and section
+ * code.
*/
- s->phdr_num = 1; /* PT_NOTE */
- if (s->list.num < UINT16_MAX - 2) {
- s->shdr_num = 0;
+ s->phdr_num = 1; /* Reserve PT_NOTE */
+ if (s->list.num <= UINT32_MAX - 1) {
s->phdr_num += s->list.num;
} else {
- /* sh_info of section 0 holds the real number of phdrs */
- s->shdr_num = 1;
-
- /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
- if (s->list.num <= UINT32_MAX - 1) {
- s->phdr_num += s->list.num;
- } else {
- s->phdr_num = UINT32_MAX;
- }
+ s->phdr_num = UINT32_MAX;
}
+ /*
+ * Now that the number of section and program headers is known we
+ * can calculate the offsets of the headers and data.
+ */
if (dump_is_64bit(s)) {
s->shdr_offset = sizeof(Elf64_Ehdr);
s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h
index e25b02e990..59bbc9be38 100644
--- a/include/sysemu/dump-arch.h
+++ b/include/sysemu/dump-arch.h
@@ -21,6 +21,9 @@ typedef struct ArchDumpInfo {
uint32_t page_size; /* The target's page size. If it's variable and
* unknown, then this should be the maximum. */
uint64_t phys_base; /* The target's physmem base. */
+ void (*arch_sections_add_fn)(DumpState *s);
+ uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff);
+ int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff);
} ArchDumpInfo;
struct GuestPhysBlockList; /* memory_mapping.h */
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 9ed811b313..38ccac7190 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -180,6 +180,9 @@ typedef struct DumpState {
hwaddr note_offset;
void *elf_section_hdrs; /* Pointer to section header buffer */
+ void *elf_section_data; /* Pointer to section data buffer */
+ uint64_t elf_section_data_size; /* Size of section data */
+ GArray *string_table_buf; /* String table data buffer */
uint8_t *note_buf; /* buffer for notes */
size_t note_buf_offset; /* the writing place in note_buf */
--
2.37.3

View File

@ -0,0 +1,138 @@
From bee31226b87d0b05faae84e88cce3af1b8dabbfd Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Wed, 30 Mar 2022 12:35:59 +0000
Subject: [PATCH 17/42] dump: Add more offset variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [17/41] fbe629e1476e8a0e039f989af6e1f4707075ba01
Offset calculations are easy enough to get wrong. Let's add a few
variables to make moving around elf headers and data sections easier.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220330123603.107120-6-frankja@linux.ibm.com>
(cherry picked from commit e71d353360bb09a8e784e35d78370c691f6ea185)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 35 +++++++++++++++--------------------
include/sysemu/dump.h | 4 ++++
2 files changed, 19 insertions(+), 20 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 5cc2322325..85a402b38c 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -142,13 +142,11 @@ static void write_elf64_header(DumpState *s, Error **errp)
elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
- elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr));
+ elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset);
elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
elf_header.e_phnum = cpu_to_dump16(s, phnum);
if (s->shdr_num) {
- uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num;
-
- elf_header.e_shoff = cpu_to_dump64(s, shoff);
+ elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset);
elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num);
}
@@ -179,13 +177,11 @@ static void write_elf32_header(DumpState *s, Error **errp)
elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
- elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr));
+ elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset);
elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
elf_header.e_phnum = cpu_to_dump16(s, phnum);
if (s->shdr_num) {
- uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num;
-
- elf_header.e_shoff = cpu_to_dump32(s, shoff);
+ elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset);
elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num);
}
@@ -248,12 +244,11 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
static void write_elf64_note(DumpState *s, Error **errp)
{
Elf64_Phdr phdr;
- hwaddr begin = s->memory_offset - s->note_size;
int ret;
memset(&phdr, 0, sizeof(Elf64_Phdr));
phdr.p_type = cpu_to_dump32(s, PT_NOTE);
- phdr.p_offset = cpu_to_dump64(s, begin);
+ phdr.p_offset = cpu_to_dump64(s, s->note_offset);
phdr.p_paddr = 0;
phdr.p_filesz = cpu_to_dump64(s, s->note_size);
phdr.p_memsz = cpu_to_dump64(s, s->note_size);
@@ -313,13 +308,12 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
static void write_elf32_note(DumpState *s, Error **errp)
{
- hwaddr begin = s->memory_offset - s->note_size;
Elf32_Phdr phdr;
int ret;
memset(&phdr, 0, sizeof(Elf32_Phdr));
phdr.p_type = cpu_to_dump32(s, PT_NOTE);
- phdr.p_offset = cpu_to_dump32(s, begin);
+ phdr.p_offset = cpu_to_dump32(s, s->note_offset);
phdr.p_paddr = 0;
phdr.p_filesz = cpu_to_dump32(s, s->note_size);
phdr.p_memsz = cpu_to_dump32(s, s->note_size);
@@ -1826,15 +1820,16 @@ static void dump_init(DumpState *s, int fd, bool has_format,
}
if (s->dump_info.d_class == ELFCLASS64) {
- s->memory_offset = sizeof(Elf64_Ehdr) +
- sizeof(Elf64_Phdr) * s->phdr_num +
- sizeof(Elf64_Shdr) * s->shdr_num +
- s->note_size;
+ s->phdr_offset = sizeof(Elf64_Ehdr);
+ s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
+ s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
+ s->memory_offset = s->note_offset + s->note_size;
} else {
- s->memory_offset = sizeof(Elf32_Ehdr) +
- sizeof(Elf32_Phdr) * s->phdr_num +
- sizeof(Elf32_Shdr) * s->shdr_num +
- s->note_size;
+
+ s->phdr_offset = sizeof(Elf32_Ehdr);
+ s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
+ s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
+ s->memory_offset = s->note_offset + s->note_size;
}
return;
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 19458bffbd..ffc2ea1072 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -159,6 +159,10 @@ typedef struct DumpState {
bool resume;
bool detached;
ssize_t note_size;
+ hwaddr shdr_offset;
+ hwaddr phdr_offset;
+ hwaddr section_offset;
+ hwaddr note_offset;
hwaddr memory_offset;
int fd;
--
2.37.3

View File

@ -0,0 +1,94 @@
From cbb653d73e32513ccd46b293a52384eed6a5f84f Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Wed, 30 Mar 2022 12:36:02 +0000
Subject: [PATCH 20/42] dump: Cleanup dump_begin write functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [20/41] 18ea1457a3e54fd368e556d96c3be50c6ad0a6bd
There's no need to have a gigantic if in there let's move the elf
32/64 bit logic into the section, segment or note code.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220330123603.107120-9-frankja@linux.ibm.com>
(cherry picked from commit 5ff2e5a3e1e67930e523486e39549a33fcf97227)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 42 +++++++++++-------------------------------
1 file changed, 11 insertions(+), 31 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 823ca32883..88abde355a 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -565,46 +565,26 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
- if (dump_is_64bit(s)) {
- /* write all PT_LOAD to vmcore */
- write_elf_loads(s, errp);
+ /* write all PT_LOAD to vmcore */
+ write_elf_loads(s, errp);
+ if (*errp) {
+ return;
+ }
+
+ /* write section to vmcore */
+ if (s->shdr_num) {
+ write_elf_section(s, 1, errp);
if (*errp) {
return;
}
+ }
- /* write section to vmcore */
- if (s->shdr_num) {
- write_elf_section(s, 1, errp);
- if (*errp) {
- return;
- }
- }
-
+ if (dump_is_64bit(s)) {
/* write notes to vmcore */
write_elf64_notes(fd_write_vmcore, s, errp);
- if (*errp) {
- return;
- }
} else {
- /* write all PT_LOAD to vmcore */
- write_elf_loads(s, errp);
- if (*errp) {
- return;
- }
-
- /* write section to vmcore */
- if (s->shdr_num) {
- write_elf_section(s, 0, errp);
- if (*errp) {
- return;
- }
- }
-
/* write notes to vmcore */
write_elf32_notes(fd_write_vmcore, s, errp);
- if (*errp) {
- return;
- }
}
}
--
2.37.3

View File

@ -0,0 +1,67 @@
From 0547599cf507930f91943f22d5f917ebacf69484 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Wed, 30 Mar 2022 12:36:03 +0000
Subject: [PATCH 21/42] dump: Consolidate elf note function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [21/41] 52298c098c116aea75ad15894731ff412c2c4e73
Just like with the other write functions let's move the 32/64 bit elf
handling to a function to improve readability.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220330123603.107120-10-frankja@linux.ibm.com>
(cherry picked from commit c68124738bc29017e4254c898bc40be7be477af7)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 88abde355a..a451abc590 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -520,6 +520,15 @@ static void write_elf_loads(DumpState *s, Error **errp)
}
}
+static void write_elf_notes(DumpState *s, Error **errp)
+{
+ if (dump_is_64bit(s)) {
+ write_elf64_notes(fd_write_vmcore, s, errp);
+ } else {
+ write_elf32_notes(fd_write_vmcore, s, errp);
+ }
+}
+
/* write elf header, PT_NOTE and elf note to vmcore. */
static void dump_begin(DumpState *s, Error **errp)
{
@@ -579,13 +588,8 @@ static void dump_begin(DumpState *s, Error **errp)
}
}
- if (dump_is_64bit(s)) {
- /* write notes to vmcore */
- write_elf64_notes(fd_write_vmcore, s, errp);
- } else {
- /* write notes to vmcore */
- write_elf32_notes(fd_write_vmcore, s, errp);
- }
+ /* write notes to vmcore */
+ write_elf_notes(s, errp);
}
static int get_next_block(DumpState *s, GuestPhysBlock *block)
--
2.37.3

View File

@ -0,0 +1,169 @@
From f87abe1ef14e80731249ebe9fe1bea569a68e9b4 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Wed, 30 Mar 2022 12:36:01 +0000
Subject: [PATCH 19/42] dump: Consolidate phdr note writes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [19/41] 180c4c0ab4941a0bf366dc7f32ee035e03daa6c0
There's no need to have two write functions. Let's rather have two
functions that set the data for elf 32/64 and then write it in a
common function.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220330123603.107120-8-frankja@linux.ibm.com>
(cherry picked from commit bc7d558017e6700f9a05c61b0b638a8994945f0d)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 94 +++++++++++++++++++++++++++--------------------------
1 file changed, 48 insertions(+), 46 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 6394e94023..823ca32883 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -246,24 +246,15 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
}
}
-static void write_elf64_note(DumpState *s, Error **errp)
+static void write_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr)
{
- Elf64_Phdr phdr;
- int ret;
-
- memset(&phdr, 0, sizeof(Elf64_Phdr));
- phdr.p_type = cpu_to_dump32(s, PT_NOTE);
- phdr.p_offset = cpu_to_dump64(s, s->note_offset);
- phdr.p_paddr = 0;
- phdr.p_filesz = cpu_to_dump64(s, s->note_size);
- phdr.p_memsz = cpu_to_dump64(s, s->note_size);
- phdr.p_vaddr = 0;
-
- ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
- if (ret < 0) {
- error_setg_errno(errp, -ret,
- "dump: failed to write program header table");
- }
+ memset(phdr, 0, sizeof(*phdr));
+ phdr->p_type = cpu_to_dump32(s, PT_NOTE);
+ phdr->p_offset = cpu_to_dump64(s, s->note_offset);
+ phdr->p_paddr = 0;
+ phdr->p_filesz = cpu_to_dump64(s, s->note_size);
+ phdr->p_memsz = cpu_to_dump64(s, s->note_size);
+ phdr->p_vaddr = 0;
}
static inline int cpu_index(CPUState *cpu)
@@ -311,24 +302,15 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
write_guest_note(f, s, errp);
}
-static void write_elf32_note(DumpState *s, Error **errp)
+static void write_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr)
{
- Elf32_Phdr phdr;
- int ret;
-
- memset(&phdr, 0, sizeof(Elf32_Phdr));
- phdr.p_type = cpu_to_dump32(s, PT_NOTE);
- phdr.p_offset = cpu_to_dump32(s, s->note_offset);
- phdr.p_paddr = 0;
- phdr.p_filesz = cpu_to_dump32(s, s->note_size);
- phdr.p_memsz = cpu_to_dump32(s, s->note_size);
- phdr.p_vaddr = 0;
-
- ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
- if (ret < 0) {
- error_setg_errno(errp, -ret,
- "dump: failed to write program header table");
- }
+ memset(phdr, 0, sizeof(*phdr));
+ phdr->p_type = cpu_to_dump32(s, PT_NOTE);
+ phdr->p_offset = cpu_to_dump32(s, s->note_offset);
+ phdr->p_paddr = 0;
+ phdr->p_filesz = cpu_to_dump32(s, s->note_size);
+ phdr->p_memsz = cpu_to_dump32(s, s->note_size);
+ phdr->p_vaddr = 0;
}
static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
@@ -358,6 +340,32 @@ static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
write_guest_note(f, s, errp);
}
+static void write_elf_phdr_note(DumpState *s, Error **errp)
+{
+ ERRP_GUARD();
+ Elf32_Phdr phdr32;
+ Elf64_Phdr phdr64;
+ void *phdr;
+ size_t size;
+ int ret;
+
+ if (dump_is_64bit(s)) {
+ write_elf64_phdr_note(s, &phdr64);
+ size = sizeof(phdr64);
+ phdr = &phdr64;
+ } else {
+ write_elf32_phdr_note(s, &phdr32);
+ size = sizeof(phdr32);
+ phdr = &phdr32;
+ }
+
+ ret = fd_write_vmcore(phdr, size, s);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "dump: failed to write program header table");
+ }
+}
+
static void write_elf_section(DumpState *s, int type, Error **errp)
{
Elf32_Shdr shdr32;
@@ -551,13 +559,13 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
- if (dump_is_64bit(s)) {
- /* write PT_NOTE to vmcore */
- write_elf64_note(s, errp);
- if (*errp) {
- return;
- }
+ /* write PT_NOTE to vmcore */
+ write_elf_phdr_note(s, errp);
+ if (*errp) {
+ return;
+ }
+ if (dump_is_64bit(s)) {
/* write all PT_LOAD to vmcore */
write_elf_loads(s, errp);
if (*errp) {
@@ -578,12 +586,6 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
} else {
- /* write PT_NOTE to vmcore */
- write_elf32_note(s, errp);
- if (*errp) {
- return;
- }
-
/* write all PT_LOAD to vmcore */
write_elf_loads(s, errp);
if (*errp) {
--
2.37.3

View File

@ -0,0 +1,118 @@
From c851676d202b5b76962529f3b6d433936becbd8a Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Wed, 30 Mar 2022 12:36:00 +0000
Subject: [PATCH 18/42] dump: Introduce dump_is_64bit() helper function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [18/41] a0fd2d1985c61b8e50d4a7ca26bc0ee6fcaa6196
Checking d_class in dump_info leads to lengthy conditionals so let's
shorten things a bit by introducing a helper function.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220330123603.107120-7-frankja@linux.ibm.com>
(cherry picked from commit 05bbaa5040ccb3419e8b93af8040485430e2db42)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 85a402b38c..6394e94023 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -55,6 +55,11 @@ static Error *dump_migration_blocker;
DIV_ROUND_UP((name_size), 4) + \
DIV_ROUND_UP((desc_size), 4)) * 4)
+static inline bool dump_is_64bit(DumpState *s)
+{
+ return s->dump_info.d_class == ELFCLASS64;
+}
+
uint16_t cpu_to_dump16(DumpState *s, uint16_t val)
{
if (s->dump_info.d_endian == ELFDATA2LSB) {
@@ -489,7 +494,7 @@ static void write_elf_loads(DumpState *s, Error **errp)
get_offset_range(memory_mapping->phys_addr,
memory_mapping->length,
s, &offset, &filesz);
- if (s->dump_info.d_class == ELFCLASS64) {
+ if (dump_is_64bit(s)) {
write_elf64_load(s, memory_mapping, phdr_index++, offset,
filesz, errp);
} else {
@@ -537,7 +542,7 @@ static void dump_begin(DumpState *s, Error **errp)
*/
/* write elf header to vmcore */
- if (s->dump_info.d_class == ELFCLASS64) {
+ if (dump_is_64bit(s)) {
write_elf64_header(s, errp);
} else {
write_elf32_header(s, errp);
@@ -546,7 +551,7 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
- if (s->dump_info.d_class == ELFCLASS64) {
+ if (dump_is_64bit(s)) {
/* write PT_NOTE to vmcore */
write_elf64_note(s, errp);
if (*errp) {
@@ -757,7 +762,7 @@ static void get_note_sizes(DumpState *s, const void *note,
uint64_t name_sz;
uint64_t desc_sz;
- if (s->dump_info.d_class == ELFCLASS64) {
+ if (dump_is_64bit(s)) {
const Elf64_Nhdr *hdr = note;
note_head_sz = sizeof(Elf64_Nhdr);
name_sz = tswap64(hdr->n_namesz);
@@ -1017,10 +1022,10 @@ out:
static void write_dump_header(DumpState *s, Error **errp)
{
- if (s->dump_info.d_class == ELFCLASS32) {
- create_header32(s, errp);
- } else {
+ if (dump_is_64bit(s)) {
create_header64(s, errp);
+ } else {
+ create_header32(s, errp);
}
}
@@ -1715,8 +1720,8 @@ static void dump_init(DumpState *s, int fd, bool has_format,
uint32_t size;
uint16_t format;
- note_head_size = s->dump_info.d_class == ELFCLASS32 ?
- sizeof(Elf32_Nhdr) : sizeof(Elf64_Nhdr);
+ note_head_size = dump_is_64bit(s) ?
+ sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr);
format = le16_to_cpu(vmci->vmcoreinfo.guest_format);
size = le32_to_cpu(vmci->vmcoreinfo.size);
@@ -1819,7 +1824,7 @@ static void dump_init(DumpState *s, int fd, bool has_format,
}
}
- if (s->dump_info.d_class == ELFCLASS64) {
+ if (dump_is_64bit(s)) {
s->phdr_offset = sizeof(Elf64_Ehdr);
s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
--
2.37.3

View File

@ -0,0 +1,136 @@
From 255722667a4fa4d522bb0b7e0825cbbe635abb8d Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Wed, 30 Mar 2022 12:35:57 +0000
Subject: [PATCH 15/42] dump: Introduce shdr_num to decrease complexity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [15/41] b0215ea5d381ef7f6abfe3f3bafea51ce933da56
Let's move from a boolean to a int variable which will later enable us
to store the number of sections that are in the dump file.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220330123603.107120-4-frankja@linux.ibm.com>
(cherry picked from commit 862a395858e5a302ed5921487777acdc95a3a31b)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 24 ++++++++++++------------
include/sysemu/dump.h | 2 +-
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 7236b167cc..972e28b089 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -145,12 +145,12 @@ static void write_elf64_header(DumpState *s, Error **errp)
elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr));
elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
elf_header.e_phnum = cpu_to_dump16(s, phnum);
- if (s->have_section) {
+ if (s->shdr_num) {
uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num;
elf_header.e_shoff = cpu_to_dump64(s, shoff);
elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
- elf_header.e_shnum = cpu_to_dump16(s, 1);
+ elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num);
}
ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
@@ -182,12 +182,12 @@ static void write_elf32_header(DumpState *s, Error **errp)
elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr));
elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
elf_header.e_phnum = cpu_to_dump16(s, phnum);
- if (s->have_section) {
+ if (s->shdr_num) {
uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num;
elf_header.e_shoff = cpu_to_dump32(s, shoff);
elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
- elf_header.e_shnum = cpu_to_dump16(s, 1);
+ elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num);
}
ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
@@ -566,7 +566,7 @@ static void dump_begin(DumpState *s, Error **errp)
}
/* write section to vmcore */
- if (s->have_section) {
+ if (s->shdr_num) {
write_elf_section(s, 1, errp);
if (*errp) {
return;
@@ -592,7 +592,7 @@ static void dump_begin(DumpState *s, Error **errp)
}
/* write section to vmcore */
- if (s->have_section) {
+ if (s->shdr_num) {
write_elf_section(s, 0, errp);
if (*errp) {
return;
@@ -1811,11 +1811,11 @@ static void dump_init(DumpState *s, int fd, bool has_format,
*/
s->phdr_num = 1; /* PT_NOTE */
if (s->list.num < UINT16_MAX - 2) {
+ s->shdr_num = 0;
s->phdr_num += s->list.num;
- s->have_section = false;
} else {
/* sh_info of section 0 holds the real number of phdrs */
- s->have_section = true;
+ s->shdr_num = 1;
/* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
if (s->list.num <= UINT32_MAX - 1) {
@@ -1826,19 +1826,19 @@ static void dump_init(DumpState *s, int fd, bool has_format,
}
if (s->dump_info.d_class == ELFCLASS64) {
- if (s->have_section) {
+ if (s->shdr_num) {
s->memory_offset = sizeof(Elf64_Ehdr) +
sizeof(Elf64_Phdr) * s->phdr_num +
- sizeof(Elf64_Shdr) + s->note_size;
+ sizeof(Elf64_Shdr) * s->shdr_num + s->note_size;
} else {
s->memory_offset = sizeof(Elf64_Ehdr) +
sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
}
} else {
- if (s->have_section) {
+ if (s->shdr_num) {
s->memory_offset = sizeof(Elf32_Ehdr) +
sizeof(Elf32_Phdr) * s->phdr_num +
- sizeof(Elf32_Shdr) + s->note_size;
+ sizeof(Elf32_Shdr) * s->shdr_num + s->note_size;
} else {
s->memory_offset = sizeof(Elf32_Ehdr) +
sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index b463fc9c02..19458bffbd 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -155,7 +155,7 @@ typedef struct DumpState {
ArchDumpInfo dump_info;
MemoryMappingList list;
uint32_t phdr_num;
- bool have_section;
+ uint32_t shdr_num;
bool resume;
bool detached;
ssize_t note_size;
--
2.37.3

View File

@ -0,0 +1,142 @@
From a18ba2fbaf132724e81be92da42b36d8f365e66c Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 11 Aug 2022 12:10:56 +0000
Subject: [PATCH 24/42] dump: Refactor dump_iterate and introduce
dump_filter_memblock_*()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [24/41] 74ef470f24d9d98093c4d63730a99474587033fd
The iteration over the memblocks in dump_iterate() is hard to
understand so it's about time to clean it up. Instead of manually
grabbing the next memblock we can use QTAILQ_FOREACH to iterate over
all memblocks.
Additionally we move the calculation of the offset and length out by
introducing and using the dump_filter_memblock_*() functions. These
functions will later be used to cleanup other parts of dump.c.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220811121111.9878-4-frankja@linux.ibm.com>
(cherry picked from commit 1e8113032f5b1efc5da66382470ce4809c76f8f2)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 74 ++++++++++++++++++++++++++++++-----------------------
1 file changed, 42 insertions(+), 32 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index fa787f379f..d981e843dd 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -592,31 +592,43 @@ static void dump_begin(DumpState *s, Error **errp)
write_elf_notes(s, errp);
}
-static int get_next_block(DumpState *s, GuestPhysBlock *block)
+static int64_t dump_filtered_memblock_size(GuestPhysBlock *block,
+ int64_t filter_area_start,
+ int64_t filter_area_length)
{
- while (1) {
- block = QTAILQ_NEXT(block, next);
- if (!block) {
- /* no more block */
- return 1;
- }
+ int64_t size, left, right;
- s->start = 0;
- s->next_block = block;
- if (s->has_filter) {
- if (block->target_start >= s->begin + s->length ||
- block->target_end <= s->begin) {
- /* This block is out of the range */
- continue;
- }
+ /* No filter, return full size */
+ if (!filter_area_length) {
+ return block->target_end - block->target_start;
+ }
- if (s->begin > block->target_start) {
- s->start = s->begin - block->target_start;
- }
+ /* calculate the overlapped region. */
+ left = MAX(filter_area_start, block->target_start);
+ right = MIN(filter_area_start + filter_area_length, block->target_end);
+ size = right - left;
+ size = size > 0 ? size : 0;
+
+ return size;
+}
+
+static int64_t dump_filtered_memblock_start(GuestPhysBlock *block,
+ int64_t filter_area_start,
+ int64_t filter_area_length)
+{
+ if (filter_area_length) {
+ /* return -1 if the block is not within filter area */
+ if (block->target_start >= filter_area_start + filter_area_length ||
+ block->target_end <= filter_area_start) {
+ return -1;
}
- return 0;
+ if (filter_area_start > block->target_start) {
+ return filter_area_start - block->target_start;
+ }
}
+
+ return 0;
}
/* write all memory to vmcore */
@@ -624,24 +636,22 @@ static void dump_iterate(DumpState *s, Error **errp)
{
ERRP_GUARD();
GuestPhysBlock *block;
- int64_t size;
-
- do {
- block = s->next_block;
+ int64_t memblock_size, memblock_start;
- size = block->target_end - block->target_start;
- if (s->has_filter) {
- size -= s->start;
- if (s->begin + s->length < block->target_end) {
- size -= block->target_end - (s->begin + s->length);
- }
+ QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
+ memblock_start = dump_filtered_memblock_start(block, s->begin, s->length);
+ if (memblock_start == -1) {
+ continue;
}
- write_memory(s, block, s->start, size, errp);
+
+ memblock_size = dump_filtered_memblock_size(block, s->begin, s->length);
+
+ /* Write the memory to file */
+ write_memory(s, block, memblock_start, memblock_size, errp);
if (*errp) {
return;
}
-
- } while (!get_next_block(s, block));
+ }
}
static void create_vmcore(DumpState *s, Error **errp)
--
2.37.3

View File

@ -0,0 +1,45 @@
From 6932fe3afbec443bbf6acff5b707536254e1bc37 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Mon, 17 Oct 2022 08:38:16 +0000
Subject: [PATCH 35/42] dump: Reintroduce memory_offset and section_offset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [35/41] e60c0d066aeeedb42e724712bc3aa7b7591c6c79
section_offset will later be used to store the offset to the section
data which will be stored last. For now memory_offset is only needed
to make section_offset look nicer.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20221017083822.43118-5-frankja@linux.ibm.com>
(cherry picked from commit 13fd417ddc81a1685c6a8f4e1c80bbfe7150f164)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/dump/dump.c b/dump/dump.c
index d17537d4e9..7a42401790 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -1885,6 +1885,8 @@ static void dump_init(DumpState *s, int fd, bool has_format,
s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
}
+ s->memory_offset = s->note_offset + s->note_size;
+ s->section_offset = s->memory_offset + s->total_size;
return;
--
2.37.3

View File

@ -0,0 +1,70 @@
From a8eeab6936a2bd27b33b63aed7e2ef96034f7772 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Wed, 30 Mar 2022 12:35:58 +0000
Subject: [PATCH 16/42] dump: Remove the section if when calculating the memory
offset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [16/41] ff214d2c23b9cb16fd49d22d976829267df43133
When s->shdr_num is 0 we'll add 0 bytes of section headers which is
equivalent to not adding section headers but with the multiplication
we can remove a if/else.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220330123603.107120-5-frankja@linux.ibm.com>
(cherry picked from commit 344107e07bd81546474a54ab83800158ca953059)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 24 ++++++++----------------
1 file changed, 8 insertions(+), 16 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 972e28b089..5cc2322325 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -1826,23 +1826,15 @@ static void dump_init(DumpState *s, int fd, bool has_format,
}
if (s->dump_info.d_class == ELFCLASS64) {
- if (s->shdr_num) {
- s->memory_offset = sizeof(Elf64_Ehdr) +
- sizeof(Elf64_Phdr) * s->phdr_num +
- sizeof(Elf64_Shdr) * s->shdr_num + s->note_size;
- } else {
- s->memory_offset = sizeof(Elf64_Ehdr) +
- sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
- }
+ s->memory_offset = sizeof(Elf64_Ehdr) +
+ sizeof(Elf64_Phdr) * s->phdr_num +
+ sizeof(Elf64_Shdr) * s->shdr_num +
+ s->note_size;
} else {
- if (s->shdr_num) {
- s->memory_offset = sizeof(Elf32_Ehdr) +
- sizeof(Elf32_Phdr) * s->phdr_num +
- sizeof(Elf32_Shdr) * s->shdr_num + s->note_size;
- } else {
- s->memory_offset = sizeof(Elf32_Ehdr) +
- sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
- }
+ s->memory_offset = sizeof(Elf32_Ehdr) +
+ sizeof(Elf32_Phdr) * s->phdr_num +
+ sizeof(Elf32_Shdr) * s->shdr_num +
+ s->note_size;
}
return;
--
2.37.3

View File

@ -0,0 +1,176 @@
From eb763bec53d6b9aea7a6b60b0cf8c5d8b5f1b35c Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 7 Apr 2022 09:48:24 +0000
Subject: [PATCH 14/42] dump: Remove the sh_info variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [14/41] 24af12b78c8f5a02cf85df2f6b1d64249f9499c9
There's no need to have phdr_num and sh_info at the same time. We can
make phdr_num 32 bit and set PN_XNUM when we write the header if
phdr_num >= PN_XNUM.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220407094824.5074-1-frankja@linux.ibm.com>
(cherry picked from commit 046bc4160bc780eaacc2d702a2589f1a7a01188d)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 44 +++++++++++++++++++++++--------------------
include/sysemu/dump.h | 3 +--
2 files changed, 25 insertions(+), 22 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 9876123f2e..7236b167cc 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -124,6 +124,12 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
static void write_elf64_header(DumpState *s, Error **errp)
{
+ /*
+ * phnum in the elf header is 16 bit, if we have more segments we
+ * set phnum to PN_XNUM and write the real number of segments to a
+ * special section.
+ */
+ uint16_t phnum = MIN(s->phdr_num, PN_XNUM);
Elf64_Ehdr elf_header;
int ret;
@@ -138,9 +144,9 @@ static void write_elf64_header(DumpState *s, Error **errp)
elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr));
elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
- elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
+ elf_header.e_phnum = cpu_to_dump16(s, phnum);
if (s->have_section) {
- uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
+ uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num;
elf_header.e_shoff = cpu_to_dump64(s, shoff);
elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
@@ -155,6 +161,12 @@ static void write_elf64_header(DumpState *s, Error **errp)
static void write_elf32_header(DumpState *s, Error **errp)
{
+ /*
+ * phnum in the elf header is 16 bit, if we have more segments we
+ * set phnum to PN_XNUM and write the real number of segments to a
+ * special section.
+ */
+ uint16_t phnum = MIN(s->phdr_num, PN_XNUM);
Elf32_Ehdr elf_header;
int ret;
@@ -169,9 +181,9 @@ static void write_elf32_header(DumpState *s, Error **errp)
elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr));
elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
- elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
+ elf_header.e_phnum = cpu_to_dump16(s, phnum);
if (s->have_section) {
- uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
+ uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num;
elf_header.e_shoff = cpu_to_dump32(s, shoff);
elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
@@ -358,12 +370,12 @@ static void write_elf_section(DumpState *s, int type, Error **errp)
if (type == 0) {
shdr_size = sizeof(Elf32_Shdr);
memset(&shdr32, 0, shdr_size);
- shdr32.sh_info = cpu_to_dump32(s, s->sh_info);
+ shdr32.sh_info = cpu_to_dump32(s, s->phdr_num);
shdr = &shdr32;
} else {
shdr_size = sizeof(Elf64_Shdr);
memset(&shdr64, 0, shdr_size);
- shdr64.sh_info = cpu_to_dump32(s, s->sh_info);
+ shdr64.sh_info = cpu_to_dump32(s, s->phdr_num);
shdr = &shdr64;
}
@@ -478,13 +490,6 @@ static void write_elf_loads(DumpState *s, Error **errp)
hwaddr offset, filesz;
MemoryMapping *memory_mapping;
uint32_t phdr_index = 1;
- uint32_t max_index;
-
- if (s->have_section) {
- max_index = s->sh_info;
- } else {
- max_index = s->phdr_num;
- }
QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
get_offset_range(memory_mapping->phys_addr,
@@ -502,7 +507,7 @@ static void write_elf_loads(DumpState *s, Error **errp)
return;
}
- if (phdr_index >= max_index) {
+ if (phdr_index >= s->phdr_num) {
break;
}
}
@@ -1809,22 +1814,21 @@ static void dump_init(DumpState *s, int fd, bool has_format,
s->phdr_num += s->list.num;
s->have_section = false;
} else {
+ /* sh_info of section 0 holds the real number of phdrs */
s->have_section = true;
- s->phdr_num = PN_XNUM;
- s->sh_info = 1; /* PT_NOTE */
/* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
if (s->list.num <= UINT32_MAX - 1) {
- s->sh_info += s->list.num;
+ s->phdr_num += s->list.num;
} else {
- s->sh_info = UINT32_MAX;
+ s->phdr_num = UINT32_MAX;
}
}
if (s->dump_info.d_class == ELFCLASS64) {
if (s->have_section) {
s->memory_offset = sizeof(Elf64_Ehdr) +
- sizeof(Elf64_Phdr) * s->sh_info +
+ sizeof(Elf64_Phdr) * s->phdr_num +
sizeof(Elf64_Shdr) + s->note_size;
} else {
s->memory_offset = sizeof(Elf64_Ehdr) +
@@ -1833,7 +1837,7 @@ static void dump_init(DumpState *s, int fd, bool has_format,
} else {
if (s->have_section) {
s->memory_offset = sizeof(Elf32_Ehdr) +
- sizeof(Elf32_Phdr) * s->sh_info +
+ sizeof(Elf32_Phdr) * s->phdr_num +
sizeof(Elf32_Shdr) + s->note_size;
} else {
s->memory_offset = sizeof(Elf32_Ehdr) +
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 250143cb5a..b463fc9c02 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -154,8 +154,7 @@ typedef struct DumpState {
GuestPhysBlockList guest_phys_blocks;
ArchDumpInfo dump_info;
MemoryMappingList list;
- uint16_t phdr_num;
- uint32_t sh_info;
+ uint32_t phdr_num;
bool have_section;
bool resume;
bool detached;
--
2.37.3

View File

@ -0,0 +1,69 @@
From 18fef7f02801d51207d67b8f8ec5f0d828889c78 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 11 Aug 2022 12:11:01 +0000
Subject: [PATCH 29/42] dump: Rename write_elf*_phdr_note to
prepare_elf*_phdr_note
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [29/41] 876cea6f6e51be8df2763f56d0daef99d11fdd49
The functions in question do not actually write to the file descriptor
they set up a buffer which is later written to the fd.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220811121111.9878-9-frankja@linux.ibm.com>
(cherry picked from commit 2341a94d3a0a8a93a5a977e642da1807b8edaab8)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 8d5226f861..c2c1341ad7 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -261,7 +261,7 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
}
}
-static void write_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr)
+static void prepare_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr)
{
memset(phdr, 0, sizeof(*phdr));
phdr->p_type = cpu_to_dump32(s, PT_NOTE);
@@ -317,7 +317,7 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
write_guest_note(f, s, errp);
}
-static void write_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr)
+static void prepare_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr)
{
memset(phdr, 0, sizeof(*phdr));
phdr->p_type = cpu_to_dump32(s, PT_NOTE);
@@ -365,11 +365,11 @@ static void write_elf_phdr_note(DumpState *s, Error **errp)
int ret;
if (dump_is_64bit(s)) {
- write_elf64_phdr_note(s, &phdr64);
+ prepare_elf64_phdr_note(s, &phdr64);
size = sizeof(phdr64);
phdr = &phdr64;
} else {
- write_elf32_phdr_note(s, &phdr32);
+ prepare_elf32_phdr_note(s, &phdr32);
size = sizeof(phdr32);
phdr = &phdr32;
}
--
2.37.3

View File

@ -0,0 +1,57 @@
From 04d4947a22fe3192384ff486d0a979d799ded98e Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 11 Aug 2022 12:10:55 +0000
Subject: [PATCH 23/42] dump: Rename write_elf_loads to write_elf_phdr_loads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [23/41] 18e3ef70b97c525b7c43cf12143204bdb1060e4f
Let's make it a bit clearer that we write the program headers of the
PT_LOAD type.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@ibm.linux.com>
Message-Id: <20220811121111.9878-3-frankja@linux.ibm.com>
(cherry picked from commit afae6056ea79e2d89fd90867de3a01732eae724f)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index a451abc590..fa787f379f 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -491,7 +491,7 @@ static void get_offset_range(hwaddr phys_addr,
}
}
-static void write_elf_loads(DumpState *s, Error **errp)
+static void write_elf_phdr_loads(DumpState *s, Error **errp)
{
ERRP_GUARD();
hwaddr offset, filesz;
@@ -574,8 +574,8 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
- /* write all PT_LOAD to vmcore */
- write_elf_loads(s, errp);
+ /* write all PT_LOADs to vmcore */
+ write_elf_phdr_loads(s, errp);
if (*errp) {
return;
}
--
2.37.3

View File

@ -0,0 +1,68 @@
From 7e8d6290099b33f88621b45e62652a97704c9573 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Mon, 17 Oct 2022 08:38:15 +0000
Subject: [PATCH 34/42] dump: Reorder struct DumpState
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [34/41] 8d44e5e8c86ea5b33644eba141046cd657d0071e
Let's move ELF related members into one block and guest memory related
ones into another to improve readability.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20221017083822.43118-4-frankja@linux.ibm.com>
(cherry picked from commit 8384b73c46fd474847d7e74d121318e344edc3c4)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
include/sysemu/dump.h | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 9995f65dc8..9ed811b313 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -154,15 +154,8 @@ typedef struct DumpState {
GuestPhysBlockList guest_phys_blocks;
ArchDumpInfo dump_info;
MemoryMappingList list;
- uint32_t phdr_num;
- uint32_t shdr_num;
bool resume;
bool detached;
- ssize_t note_size;
- hwaddr shdr_offset;
- hwaddr phdr_offset;
- hwaddr section_offset;
- hwaddr note_offset;
hwaddr memory_offset;
int fd;
@@ -177,6 +170,15 @@ typedef struct DumpState {
int64_t filter_area_begin; /* Start address of partial guest memory area */
int64_t filter_area_length; /* Length of partial guest memory area */
+ /* Elf dump related data */
+ uint32_t phdr_num;
+ uint32_t shdr_num;
+ ssize_t note_size;
+ hwaddr shdr_offset;
+ hwaddr phdr_offset;
+ hwaddr section_offset;
+ hwaddr note_offset;
+
void *elf_section_hdrs; /* Pointer to section header buffer */
uint8_t *note_buf; /* buffer for notes */
--
2.37.3

View File

@ -0,0 +1,467 @@
From 8f674e0e12e4b88fc035948612a0b0949e0ad892 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 11 Aug 2022 12:10:54 +0000
Subject: [PATCH 22/42] dump: Replace opaque DumpState pointer with a typed one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [22/41] 5f071d7ef441ae6f5da70eb56018c4657deee3d7
It's always better to convey the type of a pointer if at all
possible. So let's add the DumpState typedef to typedefs.h and move
the dump note functions from the opaque pointers to DumpState
pointers.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
CC: Peter Maydell <peter.maydell@linaro.org>
CC: Cédric Le Goater <clg@kaod.org>
CC: Daniel Henrique Barboza <danielhb413@gmail.com>
CC: David Gibson <david@gibson.dropbear.id.au>
CC: Greg Kurz <groug@kaod.org>
CC: Palmer Dabbelt <palmer@dabbelt.com>
CC: Alistair Francis <alistair.francis@wdc.com>
CC: Bin Meng <bin.meng@windriver.com>
CC: Cornelia Huck <cohuck@redhat.com>
CC: Thomas Huth <thuth@redhat.com>
CC: Richard Henderson <richard.henderson@linaro.org>
CC: David Hildenbrand <david@redhat.com>
Acked-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220811121111.9878-2-frankja@linux.ibm.com>
(cherry picked from commit 1af0006ab959864dfa2f59e9136c5fb93000b61f)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
include/hw/core/sysemu-cpu-ops.h | 8 ++++----
include/qemu/typedefs.h | 1 +
target/arm/arch_dump.c | 6 ++----
target/arm/cpu.h | 4 ++--
target/i386/arch_dump.c | 30 +++++++++++++++---------------
target/i386/cpu.h | 8 ++++----
target/ppc/arch_dump.c | 18 +++++++++---------
target/ppc/cpu.h | 4 ++--
target/riscv/arch_dump.c | 6 ++----
target/riscv/cpu.h | 4 ++--
target/s390x/arch_dump.c | 10 +++++-----
target/s390x/s390x-internal.h | 2 +-
12 files changed, 49 insertions(+), 52 deletions(-)
diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h
index a9ba39e5f2..ee169b872c 100644
--- a/include/hw/core/sysemu-cpu-ops.h
+++ b/include/hw/core/sysemu-cpu-ops.h
@@ -53,25 +53,25 @@ typedef struct SysemuCPUOps {
* 32-bit VM coredump.
*/
int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
/**
* @write_elf64_note: Callback for writing a CPU-specific ELF note to a
* 64-bit VM coredump.
*/
int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
/**
* @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF
* note to a 32-bit VM coredump.
*/
int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu,
- void *opaque);
+ DumpState *s);
/**
* @write_elf64_qemunote: Callback for writing a CPU- and QEMU-specific ELF
* note to a 64-bit VM coredump.
*/
int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu,
- void *opaque);
+ DumpState *s);
/**
* @virtio_is_big_endian: Callback to return %true if a CPU which supports
* runtime configurable endianness is currently big-endian.
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index ee60eb3de4..ac9d031be6 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -125,6 +125,7 @@ typedef struct VirtIODevice VirtIODevice;
typedef struct Visitor Visitor;
typedef struct VMChangeStateEntry VMChangeStateEntry;
typedef struct VMStateDescription VMStateDescription;
+typedef struct DumpState DumpState;
/*
* Pointer types
diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
index 0184845310..3a824e0aa6 100644
--- a/target/arm/arch_dump.c
+++ b/target/arm/arch_dump.c
@@ -232,12 +232,11 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
#endif
int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
struct aarch64_note note;
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
- DumpState *s = opaque;
uint64_t pstate, sp;
int ret, i;
@@ -360,12 +359,11 @@ static int arm_write_elf32_vfp(WriteCoreDumpFunction f, CPUARMState *env,
}
int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
struct arm_note note;
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
- DumpState *s = opaque;
int ret, i;
bool fpvalid = cpu_isar_feature(aa32_vfp_simd, cpu);
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index e33f37b70a..8d2f496ef9 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1065,9 +1065,9 @@ int arm_gen_dynamic_svereg_xml(CPUState *cpu, int base_reg);
const char *arm_gdb_get_dynamic_xml(CPUState *cpu, const char *xmlname);
int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
#ifdef TARGET_AARCH64
int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
diff --git a/target/i386/arch_dump.c b/target/i386/arch_dump.c
index 004141fc04..c290910a04 100644
--- a/target/i386/arch_dump.c
+++ b/target/i386/arch_dump.c
@@ -42,7 +42,7 @@ typedef struct {
static int x86_64_write_elf64_note(WriteCoreDumpFunction f,
CPUX86State *env, int id,
- void *opaque)
+ DumpState *s)
{
x86_64_user_regs_struct regs;
Elf64_Nhdr *note;
@@ -94,7 +94,7 @@ static int x86_64_write_elf64_note(WriteCoreDumpFunction f,
buf += descsz - sizeof(x86_64_user_regs_struct)-sizeof(target_ulong);
memcpy(buf, &regs, sizeof(x86_64_user_regs_struct));
- ret = f(note, note_size, opaque);
+ ret = f(note, note_size, s);
g_free(note);
if (ret < 0) {
return -1;
@@ -148,7 +148,7 @@ static void x86_fill_elf_prstatus(x86_elf_prstatus *prstatus, CPUX86State *env,
}
static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env,
- int id, void *opaque)
+ int id, DumpState *s)
{
x86_elf_prstatus prstatus;
Elf64_Nhdr *note;
@@ -170,7 +170,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env,
buf += ROUND_UP(name_size, 4);
memcpy(buf, &prstatus, sizeof(prstatus));
- ret = f(note, note_size, opaque);
+ ret = f(note, note_size, s);
g_free(note);
if (ret < 0) {
return -1;
@@ -180,7 +180,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env,
}
int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
X86CPU *cpu = X86_CPU(cs);
int ret;
@@ -189,10 +189,10 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
bool lma = !!(first_x86_cpu->env.hflags & HF_LMA_MASK);
if (lma) {
- ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, opaque);
+ ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, s);
} else {
#endif
- ret = x86_write_elf64_note(f, &cpu->env, cpuid, opaque);
+ ret = x86_write_elf64_note(f, &cpu->env, cpuid, s);
#ifdef TARGET_X86_64
}
#endif
@@ -201,7 +201,7 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
}
int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
X86CPU *cpu = X86_CPU(cs);
x86_elf_prstatus prstatus;
@@ -224,7 +224,7 @@ int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
buf += ROUND_UP(name_size, 4);
memcpy(buf, &prstatus, sizeof(prstatus));
- ret = f(note, note_size, opaque);
+ ret = f(note, note_size, s);
g_free(note);
if (ret < 0) {
return -1;
@@ -329,7 +329,7 @@ static void qemu_get_cpustate(QEMUCPUState *s, CPUX86State *env)
static inline int cpu_write_qemu_note(WriteCoreDumpFunction f,
CPUX86State *env,
- void *opaque,
+ DumpState *s,
int type)
{
QEMUCPUState state;
@@ -369,7 +369,7 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f,
buf += ROUND_UP(name_size, 4);
memcpy(buf, &state, sizeof(state));
- ret = f(note, note_size, opaque);
+ ret = f(note, note_size, s);
g_free(note);
if (ret < 0) {
return -1;
@@ -379,19 +379,19 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f,
}
int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cs,
- void *opaque)
+ DumpState *s)
{
X86CPU *cpu = X86_CPU(cs);
- return cpu_write_qemu_note(f, &cpu->env, opaque, 1);
+ return cpu_write_qemu_note(f, &cpu->env, s, 1);
}
int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cs,
- void *opaque)
+ DumpState *s)
{
X86CPU *cpu = X86_CPU(cs);
- return cpu_write_qemu_note(f, &cpu->env, opaque, 0);
+ return cpu_write_qemu_note(f, &cpu->env, s, 0);
}
int cpu_get_dump_info(ArchDumpInfo *info,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 006b735fe4..5d2ddd81b9 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1887,13 +1887,13 @@ extern const VMStateDescription vmstate_x86_cpu;
int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request);
int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
- void *opaque);
+ DumpState *s);
int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
- void *opaque);
+ DumpState *s);
void x86_cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list,
Error **errp);
diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index bb392f6d88..e9f512bcd4 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c
@@ -270,23 +270,23 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
static int ppc_write_all_elf_notes(const char *note_name,
WriteCoreDumpFunction f,
PowerPCCPU *cpu, int id,
- void *opaque)
+ DumpState *s)
{
- NoteFuncArg arg = { .state = opaque };
+ NoteFuncArg arg = { .state = s };
int ret = -1;
int note_size;
const NoteFuncDesc *nf;
for (nf = note_func; nf->note_contents_func; nf++) {
- arg.note.hdr.n_namesz = cpu_to_dump32(opaque, sizeof(arg.note.name));
- arg.note.hdr.n_descsz = cpu_to_dump32(opaque, nf->contents_size);
+ arg.note.hdr.n_namesz = cpu_to_dump32(s, sizeof(arg.note.name));
+ arg.note.hdr.n_descsz = cpu_to_dump32(s, nf->contents_size);
strncpy(arg.note.name, note_name, sizeof(arg.note.name));
(*nf->note_contents_func)(&arg, cpu);
note_size =
sizeof(arg.note) - sizeof(arg.note.contents) + nf->contents_size;
- ret = f(&arg.note, note_size, opaque);
+ ret = f(&arg.note, note_size, s);
if (ret < 0) {
return -1;
}
@@ -295,15 +295,15 @@ static int ppc_write_all_elf_notes(const char *note_name,
}
int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
- return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque);
+ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, s);
}
int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
- return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque);
+ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, s);
}
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 23e8b76c85..f5fb284706 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1289,9 +1289,9 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu);
const char *ppc_gdb_get_dynamic_xml(CPUState *cs, const char *xml_name);
#endif
int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
#ifndef CONFIG_USER_ONLY
void ppc_cpu_do_interrupt(CPUState *cpu);
bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req);
diff --git a/target/riscv/arch_dump.c b/target/riscv/arch_dump.c
index 709f621d82..736a232956 100644
--- a/target/riscv/arch_dump.c
+++ b/target/riscv/arch_dump.c
@@ -64,12 +64,11 @@ static void riscv64_note_init(struct riscv64_note *note, DumpState *s,
}
int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
struct riscv64_note note;
RISCVCPU *cpu = RISCV_CPU(cs);
CPURISCVState *env = &cpu->env;
- DumpState *s = opaque;
int ret, i = 0;
const char name[] = "CORE";
@@ -134,12 +133,11 @@ static void riscv32_note_init(struct riscv32_note *note, DumpState *s,
}
int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
struct riscv32_note note;
RISCVCPU *cpu = RISCV_CPU(cs);
CPURISCVState *env = &cpu->env;
- DumpState *s = opaque;
int ret, i;
const char name[] = "CORE";
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 0760c0af93..4cce524b2c 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -344,9 +344,9 @@ extern const char * const riscv_fpr_regnames[];
const char *riscv_cpu_get_trap_name(target_ulong cause, bool async);
void riscv_cpu_do_interrupt(CPUState *cpu);
int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
bool riscv_cpu_fp_enabled(CPURISCVState *env);
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index 08daf93ae1..f60a14920d 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -204,7 +204,7 @@ static const NoteFuncDesc note_linux[] = {
static int s390x_write_elf64_notes(const char *note_name,
WriteCoreDumpFunction f,
S390CPU *cpu, int id,
- void *opaque,
+ DumpState *s,
const NoteFuncDesc *funcs)
{
Note note;
@@ -222,7 +222,7 @@ static int s390x_write_elf64_notes(const char *note_name,
(*nf->note_contents_func)(&note, cpu, id);
note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size;
- ret = f(&note, note_size, opaque);
+ ret = f(&note, note_size, s);
if (ret < 0) {
return -1;
@@ -235,16 +235,16 @@ static int s390x_write_elf64_notes(const char *note_name,
int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
S390CPU *cpu = S390_CPU(cs);
int r;
- r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, opaque, note_core);
+ r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, s, note_core);
if (r) {
return r;
}
- return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, opaque, note_linux);
+ return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux);
}
int cpu_get_dump_info(ArchDumpInfo *info,
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
index 1a178aed41..02cf6c3f43 100644
--- a/target/s390x/s390x-internal.h
+++ b/target/s390x/s390x-internal.h
@@ -228,7 +228,7 @@ static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb,
/* arch_dump.c */
int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
/* cc_helper.c */
--
2.37.3

View File

@ -0,0 +1,73 @@
From 1f7cb73592a1922b3a981eb3232098281e07679f Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 11 Aug 2022 12:10:59 +0000
Subject: [PATCH 27/42] dump: Rework dump_calculate_size function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [27/41] eaa05c39109b57a119752ad3df66f4c2ace2cbe4
dump_calculate_size() sums up all the sizes of the guest memory
blocks. Since we already have a function that calculates the size of a
single memory block (dump_get_memblock_size()) we can simply iterate
over the blocks and use the function instead of calculating the size
ourselves.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Message-Id: <20220811121111.9878-7-frankja@linux.ibm.com>
(cherry picked from commit c370d5300f9ac1f90f8158082d22262b904fe30e)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 22 ++++++++--------------
1 file changed, 8 insertions(+), 14 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index f6fe13e258..902a85ef8e 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -1557,25 +1557,19 @@ bool dump_in_progress(void)
return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE);
}
-/* calculate total size of memory to be dumped (taking filter into
- * acoount.) */
+/*
+ * calculate total size of memory to be dumped (taking filter into
+ * account.)
+ */
static int64_t dump_calculate_size(DumpState *s)
{
GuestPhysBlock *block;
- int64_t size = 0, total = 0, left = 0, right = 0;
+ int64_t total = 0;
QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
- if (dump_has_filter(s)) {
- /* calculate the overlapped region. */
- left = MAX(s->filter_area_begin, block->target_start);
- right = MIN(s->filter_area_begin + s->filter_area_length, block->target_end);
- size = right - left;
- size = size > 0 ? size : 0;
- } else {
- /* count the whole region in */
- size = (block->target_end - block->target_start);
- }
- total += size;
+ total += dump_filtered_memblock_size(block,
+ s->filter_area_begin,
+ s->filter_area_length);
}
return total;
--
2.37.3

View File

@ -0,0 +1,187 @@
From 411f5354b809f6b783946e58d7655135814fb809 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 11 Aug 2022 12:10:58 +0000
Subject: [PATCH 26/42] dump: Rework filter area variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [26/41] f10a5523dfd2724f7a8637fca3ed68ba6df659a5
While the DumpState begin and length variables directly mirror the API
variable names they are not very descriptive. So let's add a
"filter_area_" prefix and make has_filter a function checking length > 0.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220811121111.9878-6-frankja@linux.ibm.com>
(cherry picked from commit dddf725f70bfe7f5adb41fa31dbd06e767271bda)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 53 +++++++++++++++++++++++++------------------
include/sysemu/dump.h | 13 ++++++++---
2 files changed, 41 insertions(+), 25 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index e6aa037f59..f6fe13e258 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -60,6 +60,11 @@ static inline bool dump_is_64bit(DumpState *s)
return s->dump_info.d_class == ELFCLASS64;
}
+static inline bool dump_has_filter(DumpState *s)
+{
+ return s->filter_area_length > 0;
+}
+
uint16_t cpu_to_dump16(DumpState *s, uint16_t val)
{
if (s->dump_info.d_endian == ELFDATA2LSB) {
@@ -444,29 +449,30 @@ static void get_offset_range(hwaddr phys_addr,
*p_offset = -1;
*p_filesz = 0;
- if (s->has_filter) {
- if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
+ if (dump_has_filter(s)) {
+ if (phys_addr < s->filter_area_begin ||
+ phys_addr >= s->filter_area_begin + s->filter_area_length) {
return;
}
}
QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
- if (s->has_filter) {
- if (block->target_start >= s->begin + s->length ||
- block->target_end <= s->begin) {
+ if (dump_has_filter(s)) {
+ if (block->target_start >= s->filter_area_begin + s->filter_area_length ||
+ block->target_end <= s->filter_area_begin) {
/* This block is out of the range */
continue;
}
- if (s->begin <= block->target_start) {
+ if (s->filter_area_begin <= block->target_start) {
start = block->target_start;
} else {
- start = s->begin;
+ start = s->filter_area_begin;
}
size_in_block = block->target_end - start;
- if (s->begin + s->length < block->target_end) {
- size_in_block -= block->target_end - (s->begin + s->length);
+ if (s->filter_area_begin + s->filter_area_length < block->target_end) {
+ size_in_block -= block->target_end - (s->filter_area_begin + s->filter_area_length);
}
} else {
start = block->target_start;
@@ -639,12 +645,12 @@ static void dump_iterate(DumpState *s, Error **errp)
int64_t memblock_size, memblock_start;
QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
- memblock_start = dump_filtered_memblock_start(block, s->begin, s->length);
+ memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, s->filter_area_length);
if (memblock_start == -1) {
continue;
}
- memblock_size = dump_filtered_memblock_size(block, s->begin, s->length);
+ memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, s->filter_area_length);
/* Write the memory to file */
write_memory(s, block, memblock_start, memblock_size, errp);
@@ -1513,14 +1519,14 @@ static int validate_start_block(DumpState *s)
{
GuestPhysBlock *block;
- if (!s->has_filter) {
+ if (!dump_has_filter(s)) {
return 0;
}
QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
/* This block is out of the range */
- if (block->target_start >= s->begin + s->length ||
- block->target_end <= s->begin) {
+ if (block->target_start >= s->filter_area_begin + s->filter_area_length ||
+ block->target_end <= s->filter_area_begin) {
continue;
}
return 0;
@@ -1559,10 +1565,10 @@ static int64_t dump_calculate_size(DumpState *s)
int64_t size = 0, total = 0, left = 0, right = 0;
QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
- if (s->has_filter) {
+ if (dump_has_filter(s)) {
/* calculate the overlapped region. */
- left = MAX(s->begin, block->target_start);
- right = MIN(s->begin + s->length, block->target_end);
+ left = MAX(s->filter_area_begin, block->target_start);
+ right = MIN(s->filter_area_begin + s->filter_area_length, block->target_end);
size = right - left;
size = size > 0 ? size : 0;
} else {
@@ -1652,9 +1658,12 @@ static void dump_init(DumpState *s, int fd, bool has_format,
}
s->fd = fd;
- s->has_filter = has_filter;
- s->begin = begin;
- s->length = length;
+ if (has_filter && !length) {
+ error_setg(errp, QERR_INVALID_PARAMETER, "length");
+ goto cleanup;
+ }
+ s->filter_area_begin = begin;
+ s->filter_area_length = length;
memory_mapping_list_init(&s->list);
@@ -1787,8 +1796,8 @@ static void dump_init(DumpState *s, int fd, bool has_format,
return;
}
- if (s->has_filter) {
- memory_mapping_filter(&s->list, s->begin, s->length);
+ if (dump_has_filter(s)) {
+ memory_mapping_filter(&s->list, s->filter_area_begin, s->filter_area_length);
}
/*
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 7fce1d4af6..b62513d87d 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -166,9 +166,16 @@ typedef struct DumpState {
hwaddr memory_offset;
int fd;
- bool has_filter;
- int64_t begin;
- int64_t length;
+ /*
+ * Dump filter area variables
+ *
+ * A filtered dump only contains the guest memory designated by
+ * the start address and length variables defined below.
+ *
+ * If length is 0, no filtering is applied.
+ */
+ int64_t filter_area_begin; /* Start address of partial guest memory area */
+ int64_t filter_area_length; /* Length of partial guest memory area */
uint8_t *note_buf; /* buffer for notes */
size_t note_buf_offset; /* the writing place in note_buf */
--
2.37.3

View File

@ -0,0 +1,102 @@
From b56c362132baef40cc25d910c1e0d217d83cfe44 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 11 Aug 2022 12:10:57 +0000
Subject: [PATCH 25/42] dump: Rework get_start_block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [25/41] c93842a1aaeadcc11e91c194452fcd05d163b3ca
get_start_block() returns the start address of the first memory block
or -1.
With the GuestPhysBlock iterator conversion we don't need to set the
start address and can therefore remove that code and the "start"
DumpState struct member. The only functionality left is the validation
of the start block so it only makes sense to re-name the function to
validate_start_block()
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Message-Id: <20220811121111.9878-5-frankja@linux.ibm.com>
(cherry picked from commit 0c2994ac9009577b967529ce18e269da5b280351)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 20 ++++++--------------
include/sysemu/dump.h | 2 --
2 files changed, 6 insertions(+), 16 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index d981e843dd..e6aa037f59 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -1509,30 +1509,22 @@ static void create_kdump_vmcore(DumpState *s, Error **errp)
}
}
-static ram_addr_t get_start_block(DumpState *s)
+static int validate_start_block(DumpState *s)
{
GuestPhysBlock *block;
if (!s->has_filter) {
- s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
return 0;
}
QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
+ /* This block is out of the range */
if (block->target_start >= s->begin + s->length ||
block->target_end <= s->begin) {
- /* This block is out of the range */
continue;
}
-
- s->next_block = block;
- if (s->begin > block->target_start) {
- s->start = s->begin - block->target_start;
- } else {
- s->start = 0;
- }
- return s->start;
- }
+ return 0;
+ }
return -1;
}
@@ -1679,8 +1671,8 @@ static void dump_init(DumpState *s, int fd, bool has_format,
goto cleanup;
}
- s->start = get_start_block(s);
- if (s->start == -1) {
+ /* Is the filter filtering everything? */
+ if (validate_start_block(s) == -1) {
error_setg(errp, QERR_INVALID_PARAMETER, "begin");
goto cleanup;
}
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index ffc2ea1072..7fce1d4af6 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -166,8 +166,6 @@ typedef struct DumpState {
hwaddr memory_offset;
int fd;
- GuestPhysBlock *next_block;
- ram_addr_t start;
bool has_filter;
int64_t begin;
int64_t length;
--
2.37.3

View File

@ -0,0 +1,173 @@
From d1e147a3133d4d31d4b0c02c05916366fadd9c30 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 11 Aug 2022 12:11:00 +0000
Subject: [PATCH 28/42] dump: Split elf header functions into prepare and write
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [28/41] f70a13ad443835e7f46b7c5e176e372d370ac797
Let's split the write from the modification of the elf header so we
can consolidate the write of the data in one function.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220811121111.9878-8-frankja@linux.ibm.com>
(cherry picked from commit 670e76998a61ca171200fcded3865b294a2d1243)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 100 ++++++++++++++++++++++++++++------------------------
1 file changed, 53 insertions(+), 47 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 902a85ef8e..8d5226f861 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -132,7 +132,7 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
return 0;
}
-static void write_elf64_header(DumpState *s, Error **errp)
+static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header)
{
/*
* phnum in the elf header is 16 bit, if we have more segments we
@@ -140,34 +140,27 @@ static void write_elf64_header(DumpState *s, Error **errp)
* special section.
*/
uint16_t phnum = MIN(s->phdr_num, PN_XNUM);
- Elf64_Ehdr elf_header;
- int ret;
- memset(&elf_header, 0, sizeof(Elf64_Ehdr));
- memcpy(&elf_header, ELFMAG, SELFMAG);
- elf_header.e_ident[EI_CLASS] = ELFCLASS64;
- elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
- elf_header.e_ident[EI_VERSION] = EV_CURRENT;
- elf_header.e_type = cpu_to_dump16(s, ET_CORE);
- elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
- elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
- elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
- elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset);
- elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
- elf_header.e_phnum = cpu_to_dump16(s, phnum);
+ memset(elf_header, 0, sizeof(Elf64_Ehdr));
+ memcpy(elf_header, ELFMAG, SELFMAG);
+ elf_header->e_ident[EI_CLASS] = ELFCLASS64;
+ elf_header->e_ident[EI_DATA] = s->dump_info.d_endian;
+ elf_header->e_ident[EI_VERSION] = EV_CURRENT;
+ elf_header->e_type = cpu_to_dump16(s, ET_CORE);
+ elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
+ elf_header->e_version = cpu_to_dump32(s, EV_CURRENT);
+ elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
+ elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset);
+ elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
+ elf_header->e_phnum = cpu_to_dump16(s, phnum);
if (s->shdr_num) {
- elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset);
- elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
- elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num);
- }
-
- ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "dump: failed to write elf header");
+ elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset);
+ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
+ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
}
}
-static void write_elf32_header(DumpState *s, Error **errp)
+static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header)
{
/*
* phnum in the elf header is 16 bit, if we have more segments we
@@ -175,28 +168,45 @@ static void write_elf32_header(DumpState *s, Error **errp)
* special section.
*/
uint16_t phnum = MIN(s->phdr_num, PN_XNUM);
- Elf32_Ehdr elf_header;
- int ret;
- memset(&elf_header, 0, sizeof(Elf32_Ehdr));
- memcpy(&elf_header, ELFMAG, SELFMAG);
- elf_header.e_ident[EI_CLASS] = ELFCLASS32;
- elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
- elf_header.e_ident[EI_VERSION] = EV_CURRENT;
- elf_header.e_type = cpu_to_dump16(s, ET_CORE);
- elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
- elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
- elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
- elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset);
- elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
- elf_header.e_phnum = cpu_to_dump16(s, phnum);
+ memset(elf_header, 0, sizeof(Elf32_Ehdr));
+ memcpy(elf_header, ELFMAG, SELFMAG);
+ elf_header->e_ident[EI_CLASS] = ELFCLASS32;
+ elf_header->e_ident[EI_DATA] = s->dump_info.d_endian;
+ elf_header->e_ident[EI_VERSION] = EV_CURRENT;
+ elf_header->e_type = cpu_to_dump16(s, ET_CORE);
+ elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
+ elf_header->e_version = cpu_to_dump32(s, EV_CURRENT);
+ elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
+ elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset);
+ elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
+ elf_header->e_phnum = cpu_to_dump16(s, phnum);
if (s->shdr_num) {
- elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset);
- elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
- elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num);
+ elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset);
+ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
+ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num);
}
+}
- ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
+static void write_elf_header(DumpState *s, Error **errp)
+{
+ Elf32_Ehdr elf32_header;
+ Elf64_Ehdr elf64_header;
+ size_t header_size;
+ void *header_ptr;
+ int ret;
+
+ if (dump_is_64bit(s)) {
+ prepare_elf64_header(s, &elf64_header);
+ header_size = sizeof(elf64_header);
+ header_ptr = &elf64_header;
+ } else {
+ prepare_elf32_header(s, &elf32_header);
+ header_size = sizeof(elf32_header);
+ header_ptr = &elf32_header;
+ }
+
+ ret = fd_write_vmcore(header_ptr, header_size, s);
if (ret < 0) {
error_setg_errno(errp, -ret, "dump: failed to write elf header");
}
@@ -565,11 +575,7 @@ static void dump_begin(DumpState *s, Error **errp)
*/
/* write elf header to vmcore */
- if (dump_is_64bit(s)) {
- write_elf64_header(s, errp);
- } else {
- write_elf32_header(s, errp);
- }
+ write_elf_header(s, errp);
if (*errp) {
return;
}
--
2.37.3

View File

@ -0,0 +1,420 @@
From 4ca61efe246d62d420eb332655c0c8ead4cc762b Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Wed, 30 Mar 2022 12:35:55 +0000
Subject: [PATCH 13/42] dump: Use ERRP_GUARD()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [13/41] f735cd1dab0230000cfadd878765fdf4647b239c
Let's move to the new way of handling errors before changing the dump
code. This patch has mostly been generated by the coccinelle script
scripts/coccinelle/errp-guard.cocci.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20220330123603.107120-2-frankja@linux.ibm.com>
(cherry picked from commit 86a518bba4f4d7c9016fc5b104fe1e58b00ad756)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 144 ++++++++++++++++++++++------------------------------
1 file changed, 61 insertions(+), 83 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 662d0a62cd..9876123f2e 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -390,23 +390,21 @@ static void write_data(DumpState *s, void *buf, int length, Error **errp)
static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
int64_t size, Error **errp)
{
+ ERRP_GUARD();
int64_t i;
- Error *local_err = NULL;
for (i = 0; i < size / s->dump_info.page_size; i++) {
write_data(s, block->host_addr + start + i * s->dump_info.page_size,
- s->dump_info.page_size, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ s->dump_info.page_size, errp);
+ if (*errp) {
return;
}
}
if ((size % s->dump_info.page_size) != 0) {
write_data(s, block->host_addr + start + i * s->dump_info.page_size,
- size % s->dump_info.page_size, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ size % s->dump_info.page_size, errp);
+ if (*errp) {
return;
}
}
@@ -476,11 +474,11 @@ static void get_offset_range(hwaddr phys_addr,
static void write_elf_loads(DumpState *s, Error **errp)
{
+ ERRP_GUARD();
hwaddr offset, filesz;
MemoryMapping *memory_mapping;
uint32_t phdr_index = 1;
uint32_t max_index;
- Error *local_err = NULL;
if (s->have_section) {
max_index = s->sh_info;
@@ -494,14 +492,13 @@ static void write_elf_loads(DumpState *s, Error **errp)
s, &offset, &filesz);
if (s->dump_info.d_class == ELFCLASS64) {
write_elf64_load(s, memory_mapping, phdr_index++, offset,
- filesz, &local_err);
+ filesz, errp);
} else {
write_elf32_load(s, memory_mapping, phdr_index++, offset,
- filesz, &local_err);
+ filesz, errp);
}
- if (local_err) {
- error_propagate(errp, local_err);
+ if (*errp) {
return;
}
@@ -514,7 +511,7 @@ static void write_elf_loads(DumpState *s, Error **errp)
/* write elf header, PT_NOTE and elf note to vmcore. */
static void dump_begin(DumpState *s, Error **errp)
{
- Error *local_err = NULL;
+ ERRP_GUARD();
/*
* the vmcore's format is:
@@ -542,73 +539,64 @@ static void dump_begin(DumpState *s, Error **errp)
/* write elf header to vmcore */
if (s->dump_info.d_class == ELFCLASS64) {
- write_elf64_header(s, &local_err);
+ write_elf64_header(s, errp);
} else {
- write_elf32_header(s, &local_err);
+ write_elf32_header(s, errp);
}
- if (local_err) {
- error_propagate(errp, local_err);
+ if (*errp) {
return;
}
if (s->dump_info.d_class == ELFCLASS64) {
/* write PT_NOTE to vmcore */
- write_elf64_note(s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_elf64_note(s, errp);
+ if (*errp) {
return;
}
/* write all PT_LOAD to vmcore */
- write_elf_loads(s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_elf_loads(s, errp);
+ if (*errp) {
return;
}
/* write section to vmcore */
if (s->have_section) {
- write_elf_section(s, 1, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_elf_section(s, 1, errp);
+ if (*errp) {
return;
}
}
/* write notes to vmcore */
- write_elf64_notes(fd_write_vmcore, s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_elf64_notes(fd_write_vmcore, s, errp);
+ if (*errp) {
return;
}
} else {
/* write PT_NOTE to vmcore */
- write_elf32_note(s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_elf32_note(s, errp);
+ if (*errp) {
return;
}
/* write all PT_LOAD to vmcore */
- write_elf_loads(s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_elf_loads(s, errp);
+ if (*errp) {
return;
}
/* write section to vmcore */
if (s->have_section) {
- write_elf_section(s, 0, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_elf_section(s, 0, errp);
+ if (*errp) {
return;
}
}
/* write notes to vmcore */
- write_elf32_notes(fd_write_vmcore, s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_elf32_notes(fd_write_vmcore, s, errp);
+ if (*errp) {
return;
}
}
@@ -644,9 +632,9 @@ static int get_next_block(DumpState *s, GuestPhysBlock *block)
/* write all memory to vmcore */
static void dump_iterate(DumpState *s, Error **errp)
{
+ ERRP_GUARD();
GuestPhysBlock *block;
int64_t size;
- Error *local_err = NULL;
do {
block = s->next_block;
@@ -658,9 +646,8 @@ static void dump_iterate(DumpState *s, Error **errp)
size -= block->target_end - (s->begin + s->length);
}
}
- write_memory(s, block, s->start, size, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_memory(s, block, s->start, size, errp);
+ if (*errp) {
return;
}
@@ -669,11 +656,10 @@ static void dump_iterate(DumpState *s, Error **errp)
static void create_vmcore(DumpState *s, Error **errp)
{
- Error *local_err = NULL;
+ ERRP_GUARD();
- dump_begin(s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ dump_begin(s, errp);
+ if (*errp) {
return;
}
@@ -810,6 +796,7 @@ static bool note_name_equal(DumpState *s,
/* write common header, sub header and elf note to vmcore */
static void create_header32(DumpState *s, Error **errp)
{
+ ERRP_GUARD();
DiskDumpHeader32 *dh = NULL;
KdumpSubHeader32 *kh = NULL;
size_t size;
@@ -818,7 +805,6 @@ static void create_header32(DumpState *s, Error **errp)
uint32_t bitmap_blocks;
uint32_t status = 0;
uint64_t offset_note;
- Error *local_err = NULL;
/* write common header, the version of kdump-compressed format is 6th */
size = sizeof(DiskDumpHeader32);
@@ -894,9 +880,8 @@ static void create_header32(DumpState *s, Error **errp)
s->note_buf_offset = 0;
/* use s->note_buf to store notes temporarily */
- write_elf32_notes(buf_write_note, s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_elf32_notes(buf_write_note, s, errp);
+ if (*errp) {
goto out;
}
if (write_buffer(s->fd, offset_note, s->note_buf,
@@ -922,6 +907,7 @@ out:
/* write common header, sub header and elf note to vmcore */
static void create_header64(DumpState *s, Error **errp)
{
+ ERRP_GUARD();
DiskDumpHeader64 *dh = NULL;
KdumpSubHeader64 *kh = NULL;
size_t size;
@@ -930,7 +916,6 @@ static void create_header64(DumpState *s, Error **errp)
uint32_t bitmap_blocks;
uint32_t status = 0;
uint64_t offset_note;
- Error *local_err = NULL;
/* write common header, the version of kdump-compressed format is 6th */
size = sizeof(DiskDumpHeader64);
@@ -1006,9 +991,8 @@ static void create_header64(DumpState *s, Error **errp)
s->note_buf_offset = 0;
/* use s->note_buf to store notes temporarily */
- write_elf64_notes(buf_write_note, s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_elf64_notes(buf_write_note, s, errp);
+ if (*errp) {
goto out;
}
@@ -1472,8 +1456,8 @@ out:
static void create_kdump_vmcore(DumpState *s, Error **errp)
{
+ ERRP_GUARD();
int ret;
- Error *local_err = NULL;
/*
* the kdump-compressed format is:
@@ -1503,21 +1487,18 @@ static void create_kdump_vmcore(DumpState *s, Error **errp)
return;
}
- write_dump_header(s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_dump_header(s, errp);
+ if (*errp) {
return;
}
- write_dump_bitmap(s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_dump_bitmap(s, errp);
+ if (*errp) {
return;
}
- write_dump_pages(s, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ write_dump_pages(s, errp);
+ if (*errp) {
return;
}
@@ -1647,10 +1628,10 @@ static void dump_init(DumpState *s, int fd, bool has_format,
DumpGuestMemoryFormat format, bool paging, bool has_filter,
int64_t begin, int64_t length, Error **errp)
{
+ ERRP_GUARD();
VMCoreInfoState *vmci = vmcoreinfo_find();
CPUState *cpu;
int nr_cpus;
- Error *err = NULL;
int ret;
s->has_format = has_format;
@@ -1769,9 +1750,8 @@ static void dump_init(DumpState *s, int fd, bool has_format,
/* get memory mapping */
if (paging) {
- qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
- if (err != NULL) {
- error_propagate(errp, err);
+ qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, errp);
+ if (*errp) {
goto cleanup;
}
} else {
@@ -1870,33 +1850,32 @@ cleanup:
/* this operation might be time consuming. */
static void dump_process(DumpState *s, Error **errp)
{
- Error *local_err = NULL;
+ ERRP_GUARD();
DumpQueryResult *result = NULL;
if (s->has_format && s->format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) {
#ifdef TARGET_X86_64
- create_win_dump(s, &local_err);
+ create_win_dump(s, errp);
#endif
} else if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
- create_kdump_vmcore(s, &local_err);
+ create_kdump_vmcore(s, errp);
} else {
- create_vmcore(s, &local_err);
+ create_vmcore(s, errp);
}
/* make sure status is written after written_size updates */
smp_wmb();
qatomic_set(&s->status,
- (local_err ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED));
+ (*errp ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED));
/* send DUMP_COMPLETED message (unconditionally) */
result = qmp_query_dump(NULL);
/* should never fail */
assert(result);
- qapi_event_send_dump_completed(result, !!local_err, (local_err ?
- error_get_pretty(local_err) : NULL));
+ qapi_event_send_dump_completed(result, !!*errp, (*errp ?
+ error_get_pretty(*errp) : NULL));
qapi_free_DumpQueryResult(result);
- error_propagate(errp, local_err);
dump_cleanup(s);
}
@@ -1925,10 +1904,10 @@ void qmp_dump_guest_memory(bool paging, const char *file,
int64_t length, bool has_format,
DumpGuestMemoryFormat format, Error **errp)
{
+ ERRP_GUARD();
const char *p;
int fd = -1;
DumpState *s;
- Error *local_err = NULL;
bool detach_p = false;
if (runstate_check(RUN_STATE_INMIGRATE)) {
@@ -2028,9 +2007,8 @@ void qmp_dump_guest_memory(bool paging, const char *file,
dump_state_prepare(s);
dump_init(s, fd, has_format, format, paging, has_begin,
- begin, length, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ begin, length, errp);
+ if (*errp) {
qatomic_set(&s->status, DUMP_STATUS_FAILED);
return;
}
--
2.37.3

View File

@ -0,0 +1,150 @@
From a918c7305ec7c68e8bc37b449f71e75d84124cd0 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Mon, 17 Oct 2022 08:38:13 +0000
Subject: [PATCH 32/42] dump: Use a buffer for ELF section data and headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [32/41] e1a03e202e67764581e486f37e13e479200e5846
Currently we're writing the NULL section header if we overflow the
physical header number in the ELF header. But in the future we'll add
custom section headers AND section data.
To facilitate this we need to rearange section handling a bit. As with
the other ELF headers we split the code into a prepare and a write
step.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20221017083822.43118-2-frankja@linux.ibm.com>
(cherry picked from commit e41ed29bcee5cb16715317bcf290f6b5c196eb0a)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 75 +++++++++++++++++++++++++++++--------------
include/sysemu/dump.h | 2 ++
2 files changed, 53 insertions(+), 24 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 88177fa886..4142b4cc0c 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -381,31 +381,60 @@ static void write_elf_phdr_note(DumpState *s, Error **errp)
}
}
-static void write_elf_section(DumpState *s, int type, Error **errp)
+static void prepare_elf_section_hdr_zero(DumpState *s)
{
- Elf32_Shdr shdr32;
- Elf64_Shdr shdr64;
- int shdr_size;
- void *shdr;
- int ret;
+ if (dump_is_64bit(s)) {
+ Elf64_Shdr *shdr64 = s->elf_section_hdrs;
- if (type == 0) {
- shdr_size = sizeof(Elf32_Shdr);
- memset(&shdr32, 0, shdr_size);
- shdr32.sh_info = cpu_to_dump32(s, s->phdr_num);
- shdr = &shdr32;
+ shdr64->sh_info = cpu_to_dump32(s, s->phdr_num);
} else {
- shdr_size = sizeof(Elf64_Shdr);
- memset(&shdr64, 0, shdr_size);
- shdr64.sh_info = cpu_to_dump32(s, s->phdr_num);
- shdr = &shdr64;
+ Elf32_Shdr *shdr32 = s->elf_section_hdrs;
+
+ shdr32->sh_info = cpu_to_dump32(s, s->phdr_num);
+ }
+}
+
+static void prepare_elf_section_hdrs(DumpState *s)
+{
+ size_t len, sizeof_shdr;
+
+ /*
+ * Section ordering:
+ * - HDR zero
+ */
+ sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
+ len = sizeof_shdr * s->shdr_num;
+ s->elf_section_hdrs = g_malloc0(len);
+
+ /*
+ * The first section header is ALWAYS a special initial section
+ * header.
+ *
+ * The header should be 0 with one exception being that if
+ * phdr_num is PN_XNUM then the sh_info field contains the real
+ * number of segment entries.
+ *
+ * As we zero allocate the buffer we will only need to modify
+ * sh_info for the PN_XNUM case.
+ */
+ if (s->phdr_num >= PN_XNUM) {
+ prepare_elf_section_hdr_zero(s);
}
+}
- ret = fd_write_vmcore(shdr, shdr_size, s);
+static void write_elf_section_headers(DumpState *s, Error **errp)
+{
+ size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
+ int ret;
+
+ prepare_elf_section_hdrs(s);
+
+ ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s);
if (ret < 0) {
- error_setg_errno(errp, -ret,
- "dump: failed to write section header table");
+ error_setg_errno(errp, -ret, "dump: failed to write section headers");
}
+
+ g_free(s->elf_section_hdrs);
}
static void write_data(DumpState *s, void *buf, int length, Error **errp)
@@ -592,12 +621,10 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
- /* write section to vmcore */
- if (s->shdr_num) {
- write_elf_section(s, 1, errp);
- if (*errp) {
- return;
- }
+ /* write section headers to vmcore */
+ write_elf_section_headers(s, errp);
+ if (*errp) {
+ return;
}
/* write notes to vmcore */
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index b62513d87d..9995f65dc8 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -177,6 +177,8 @@ typedef struct DumpState {
int64_t filter_area_begin; /* Start address of partial guest memory area */
int64_t filter_area_length; /* Length of partial guest memory area */
+ void *elf_section_hdrs; /* Pointer to section header buffer */
+
uint8_t *note_buf; /* buffer for notes */
size_t note_buf_offset; /* the writing place in note_buf */
uint32_t nr_cpus; /* number of guest's cpu */
--
2.37.3

View File

@ -0,0 +1,104 @@
From 987ede93fa4e3d058acddc19874e467faa116ede Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Mon, 17 Oct 2022 08:38:14 +0000
Subject: [PATCH 33/42] dump: Write ELF section headers right after ELF header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [33/41] e956040753533ac376e9763145192de1e216027d
Let's start bundling the writes of the headers and of the data so we
have a clear ordering between them. Since the ELF header uses offsets
to the headers we can freely order them.
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20221017083822.43118-3-frankja@linux.ibm.com>
(cherry picked from commit cb415fd61e48d52f81dcf38956e3f913651cff1c)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 31 ++++++++++++++-----------------
1 file changed, 14 insertions(+), 17 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 4142b4cc0c..d17537d4e9 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -584,6 +584,8 @@ static void dump_begin(DumpState *s, Error **errp)
* --------------
* | elf header |
* --------------
+ * | sctn_hdr |
+ * --------------
* | PT_NOTE |
* --------------
* | PT_LOAD |
@@ -592,8 +594,6 @@ static void dump_begin(DumpState *s, Error **errp)
* --------------
* | PT_LOAD |
* --------------
- * | sec_hdr |
- * --------------
* | elf note |
* --------------
* | memory |
@@ -609,20 +609,20 @@ static void dump_begin(DumpState *s, Error **errp)
return;
}
- /* write PT_NOTE to vmcore */
- write_elf_phdr_note(s, errp);
+ /* write section headers to vmcore */
+ write_elf_section_headers(s, errp);
if (*errp) {
return;
}
- /* write all PT_LOADs to vmcore */
- write_elf_phdr_loads(s, errp);
+ /* write PT_NOTE to vmcore */
+ write_elf_phdr_note(s, errp);
if (*errp) {
return;
}
- /* write section headers to vmcore */
- write_elf_section_headers(s, errp);
+ /* write all PT_LOADs to vmcore */
+ write_elf_phdr_loads(s, errp);
if (*errp) {
return;
}
@@ -1877,16 +1877,13 @@ static void dump_init(DumpState *s, int fd, bool has_format,
}
if (dump_is_64bit(s)) {
- s->phdr_offset = sizeof(Elf64_Ehdr);
- s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
- s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
- s->memory_offset = s->note_offset + s->note_size;
+ s->shdr_offset = sizeof(Elf64_Ehdr);
+ s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;
+ s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num;
} else {
-
- s->phdr_offset = sizeof(Elf32_Ehdr);
- s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
- s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
- s->memory_offset = s->note_offset + s->note_size;
+ s->shdr_offset = sizeof(Elf32_Ehdr);
+ s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num;
+ s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num;
}
return;
--
2.37.3

View File

@ -0,0 +1,173 @@
From deaf4e0f5e90d227b7b9f3e5d1dff7fd0bc0206a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
Date: Mon, 5 Sep 2022 16:06:21 +0400
Subject: [PATCH 31/42] dump: fix kdump to work over non-aligned blocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [31/41] b307bdce4a4791fc30160fa2a1678bd238f2432e
Rewrite get_next_page() to work over non-aligned blocks. When it
encounters non aligned addresses, it will try to fill a page provided by
the caller.
This solves a kdump crash with "tpm-crb-cmd" RAM memory region,
qemu-kvm: ../dump/dump.c:1162: _Bool get_next_page(GuestPhysBlock **,
uint64_t *, uint8_t **, DumpState *): Assertion `(block->target_start &
~target_page_mask) == 0' failed.
because:
guest_phys_block_add_section: target_start=00000000fed40080 target_end=00000000fed41000: added (count: 4)
Fixes:
https://bugzilla.redhat.com/show_bug.cgi?id=2120480
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
(cherry picked from commit 94d788408d2d5a6474c99b2c9cf06913b9db7c58)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 79 +++++++++++++++++++++++++++++++++++++----------------
1 file changed, 56 insertions(+), 23 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index 1c49232390..88177fa886 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -1117,50 +1117,81 @@ static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn)
}
/*
- * exam every page and return the page frame number and the address of the page.
- * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys
- * blocks, so block->target_start and block->target_end should be interal
- * multiples of the target page size.
+ * Return the page frame number and the page content in *bufptr. bufptr can be
+ * NULL. If not NULL, *bufptr must contains a target page size of pre-allocated
+ * memory. This is not necessarily the memory returned.
*/
static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
uint8_t **bufptr, DumpState *s)
{
GuestPhysBlock *block = *blockptr;
- hwaddr addr, target_page_mask = ~((hwaddr)s->dump_info.page_size - 1);
- uint8_t *buf;
+ uint32_t page_size = s->dump_info.page_size;
+ uint8_t *buf = NULL, *hbuf;
+ hwaddr addr;
/* block == NULL means the start of the iteration */
if (!block) {
block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
*blockptr = block;
addr = block->target_start;
+ *pfnptr = dump_paddr_to_pfn(s, addr);
} else {
- addr = dump_pfn_to_paddr(s, *pfnptr + 1);
+ *pfnptr += 1;
+ addr = dump_pfn_to_paddr(s, *pfnptr);
}
assert(block != NULL);
- if ((addr >= block->target_start) &&
- (addr + s->dump_info.page_size <= block->target_end)) {
- buf = block->host_addr + (addr - block->target_start);
- } else {
- /* the next page is in the next block */
- block = QTAILQ_NEXT(block, next);
- *blockptr = block;
- if (!block) {
- return false;
+ while (1) {
+ if (addr >= block->target_start && addr < block->target_end) {
+ size_t n = MIN(block->target_end - addr, page_size - addr % page_size);
+ hbuf = block->host_addr + (addr - block->target_start);
+ if (!buf) {
+ if (n == page_size) {
+ /* this is a whole target page, go for it */
+ assert(addr % page_size == 0);
+ buf = hbuf;
+ break;
+ } else if (bufptr) {
+ assert(*bufptr);
+ buf = *bufptr;
+ memset(buf, 0, page_size);
+ } else {
+ return true;
+ }
+ }
+
+ memcpy(buf + addr % page_size, hbuf, n);
+ addr += n;
+ if (addr % page_size == 0) {
+ /* we filled up the page */
+ break;
+ }
+ } else {
+ /* the next page is in the next block */
+ *blockptr = block = QTAILQ_NEXT(block, next);
+ if (!block) {
+ break;
+ }
+
+ addr = block->target_start;
+ /* are we still in the same page? */
+ if (dump_paddr_to_pfn(s, addr) != *pfnptr) {
+ if (buf) {
+ /* no, but we already filled something earlier, return it */
+ break;
+ } else {
+ /* else continue from there */
+ *pfnptr = dump_paddr_to_pfn(s, addr);
+ }
+ }
}
- addr = block->target_start;
- buf = block->host_addr;
}
- assert((block->target_start & ~target_page_mask) == 0);
- assert((block->target_end & ~target_page_mask) == 0);
- *pfnptr = dump_paddr_to_pfn(s, addr);
if (bufptr) {
*bufptr = buf;
}
- return true;
+ return buf != NULL;
}
static void write_dump_bitmap(DumpState *s, Error **errp)
@@ -1306,6 +1337,7 @@ static void write_dump_pages(DumpState *s, Error **errp)
uint8_t *buf;
GuestPhysBlock *block_iter = NULL;
uint64_t pfn_iter;
+ g_autofree uint8_t *page = NULL;
/* get offset of page_desc and page_data in dump file */
offset_desc = s->offset_page;
@@ -1341,12 +1373,13 @@ static void write_dump_pages(DumpState *s, Error **errp)
}
offset_data += s->dump_info.page_size;
+ page = g_malloc(s->dump_info.page_size);
/*
* dump memory to vmcore page by page. zero page will all be resided in the
* first page of page section
*/
- while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
+ for (buf = page; get_next_page(&block_iter, &pfn_iter, &buf, s); buf = page) {
/* check zero page */
if (is_zero_page(buf, s->dump_info.page_size)) {
ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
--
2.37.3

View File

@ -0,0 +1,75 @@
From bb55fde4d8ca587e2ef52ce58a0c22e4d66a08dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
Date: Thu, 25 Aug 2022 12:40:12 +0400
Subject: [PATCH 30/42] dump: simplify a bit kdump get_next_page()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump
RH-Bugzilla: 1664378 2043909
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [30/41] 417ac19fa96036e0242f40121ac6e87a9f3f70ba
This should be functionally equivalent, but slightly easier to read,
with simplified paths and checks at the end of the function.
The following patch is a major rewrite to get rid of the assert().
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
(cherry picked from commit 08df343874fcddd260021a04ce3c5a34f2c48164)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
dump/dump.c | 21 ++++++++-------------
1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/dump/dump.c b/dump/dump.c
index c2c1341ad7..1c49232390 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -1133,17 +1133,11 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
if (!block) {
block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
*blockptr = block;
- assert((block->target_start & ~target_page_mask) == 0);
- assert((block->target_end & ~target_page_mask) == 0);
- *pfnptr = dump_paddr_to_pfn(s, block->target_start);
- if (bufptr) {
- *bufptr = block->host_addr;
- }
- return true;
+ addr = block->target_start;
+ } else {
+ addr = dump_pfn_to_paddr(s, *pfnptr + 1);
}
-
- *pfnptr = *pfnptr + 1;
- addr = dump_pfn_to_paddr(s, *pfnptr);
+ assert(block != NULL);
if ((addr >= block->target_start) &&
(addr + s->dump_info.page_size <= block->target_end)) {
@@ -1155,12 +1149,13 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
if (!block) {
return false;
}
- assert((block->target_start & ~target_page_mask) == 0);
- assert((block->target_end & ~target_page_mask) == 0);
- *pfnptr = dump_paddr_to_pfn(s, block->target_start);
+ addr = block->target_start;
buf = block->host_addr;
}
+ assert((block->target_start & ~target_page_mask) == 0);
+ assert((block->target_end & ~target_page_mask) == 0);
+ *pfnptr = dump_paddr_to_pfn(s, addr);
if (bufptr) {
*bufptr = buf;
}
--
2.37.3

View File

@ -0,0 +1,61 @@
From 7693449b235bbab6d32a1b87fa1d0e101c786f3b Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:11:14 -0500
Subject: [PATCH 05/13] edu: add smp_mb__after_rmw()
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2168472
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [5/10] 300901290e08b253b1278eedc39cd07c1e202b96
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472
commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu Mar 2 11:16:13 2023 +0100
edu: add smp_mb__after_rmw()
Ensure ordering between clearing the COMPUTING flag and checking
IRQFACT, and between setting the IRQFACT flag and checking
COMPUTING. This ensures that no wakeups are lost.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
hw/misc/edu.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index e935c418d4..a1f8bc77e7 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
case 0x20:
if (val & EDU_STATUS_IRQFACT) {
qatomic_or(&edu->status, EDU_STATUS_IRQFACT);
+ /* Order check of the COMPUTING flag after setting IRQFACT. */
+ smp_mb__after_rmw();
} else {
qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT);
}
@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque)
qemu_mutex_unlock(&edu->thr_mutex);
qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING);
+ /* Clear COMPUTING flag before checking IRQFACT. */
+ smp_mb__after_rmw();
+
if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) {
qemu_mutex_lock_iothread();
edu_raise_irq(edu, FACT_IRQ);
--
2.37.3

View File

@ -0,0 +1,105 @@
From 939c75ab92ac608893cad0e46f55527950518a57 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 5 Mar 2024 11:36:15 -0500
Subject: [PATCH 1/3] glib-compat: Introduce g_memdup2() wrapper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 353: ui/clipboard: mark type as not available when there is no data
RH-Jira: RHEL-19628
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
RH-Acked-by: Gerd Hoffmann <None>
RH-Commit: [1/2] f401c63303ef558bfcbb36e4c8fcc8bf2b1c3eb4 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
JIRA: https://issues.redhat.com/browse/RHEL-19628
CVE: CVE-2023-6683
Upstream: Merged
commit 2c674fada72079583a3f2cc1790b16a0259c4fa0
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
Date: Fri Sep 3 19:44:44 2021 +0200
glib-compat: Introduce g_memdup2() wrapper
When experimenting raising GLIB_VERSION_MIN_REQUIRED to 2.68
(Fedora 34 provides GLib 2.68.1) we get:
hw/virtio/virtio-crypto.c:245:24: error: 'g_memdup' is deprecated: Use 'g_memdup2' instead [-Werror,-Wdeprecated-declarations]
...
g_memdup() has been updated by g_memdup2() to fix eventual security
issues (size argument is 32-bit and could be truncated / wrapping).
GLib recommends to copy their static inline version of g_memdup2():
https://discourse.gnome.org/t/port-your-module-from-g-memdup-to-g-memdup2-now/5538
Our glib-compat.h provides a comment explaining how to deal with
these deprecated declarations (see commit e71e8cc0355
"glib: enforce the minimum required version and warn about old APIs").
Following this comment suggestion, implement the g_memdup2_qemu()
wrapper to g_memdup2(), and use the safer equivalent inlined when
we are using pre-2.68 GLib.
Reported-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20210903174510.751630-3-philmd@redhat.com>
Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
include/glib-compat.h | 37 +++++++++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/include/glib-compat.h b/include/glib-compat.h
index 9e95c888f5..8d01a8c01f 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h
@@ -68,6 +68,43 @@
* without generating warnings.
*/
+/*
+ * g_memdup2_qemu:
+ * @mem: (nullable): the memory to copy.
+ * @byte_size: the number of bytes to copy.
+ *
+ * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
+ * from @mem. If @mem is %NULL it returns %NULL.
+ *
+ * This replaces g_memdup(), which was prone to integer overflows when
+ * converting the argument from a #gsize to a #guint.
+ *
+ * This static inline version is a backport of the new public API from
+ * GLib 2.68, kept internal to GLib for backport to older stable releases.
+ * See https://gitlab.gnome.org/GNOME/glib/-/issues/2319.
+ *
+ * Returns: (nullable): a pointer to the newly-allocated copy of the memory,
+ * or %NULL if @mem is %NULL.
+ */
+static inline gpointer g_memdup2_qemu(gconstpointer mem, gsize byte_size)
+{
+#if GLIB_CHECK_VERSION(2, 68, 0)
+ return g_memdup2(mem, byte_size);
+#else
+ gpointer new_mem;
+
+ if (mem && byte_size != 0) {
+ new_mem = g_malloc(byte_size);
+ memcpy(new_mem, mem, byte_size);
+ } else {
+ new_mem = NULL;
+ }
+
+ return new_mem;
+#endif
+}
+#define g_memdup2(m, s) g_memdup2_qemu(m, s)
+
#if defined(G_OS_UNIX)
/*
* Note: The fallback implementation is not MT-safe, and it returns a copy of
--
2.41.0

View File

@ -0,0 +1,81 @@
From edead46187b1e55ad5e238332780aef19f1bc214 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Wed, 9 Nov 2022 18:41:18 -0500
Subject: [PATCH 1/2] hw/acpi: Add ospm_status hook implementation for acpi-ged
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 228: qemu-kvm: backport some aarch64 fixes
RH-Bugzilla: 2132609
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Gavin Shan <gshan@redhat.com>
RH-Commit: [1/2] 99730b1a27666ca745dc28d90751c938d43f1682 (jmaloy/qemu-kvm)
BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2132609
Upstream: Merged
commit d4424bebceaa8ffbc23060ce45e52a9bb817e3c9
Author: Keqian Zhu <zhukeqian1@huawei.com>
Date: Tue Aug 16 17:49:57 2022 +0800
hw/acpi: Add ospm_status hook implementation for acpi-ged
Setup an ARM virtual machine of machine virt and execute qmp "query-acpi-ospm-status"
causes segmentation fault with following dumpstack:
#1 0x0000aaaaab64235c in qmp_query_acpi_ospm_status (errp=errp@entry=0xfffffffff030) at ../monitor/qmp-cmds.c:312
#2 0x0000aaaaabfc4e20 in qmp_marshal_query_acpi_ospm_status (args=<optimized out>, ret=0xffffea4ffe90, errp=0xffffea4ffe88) at qapi/qapi-commands-acpi.c:63
#3 0x0000aaaaabff8ba0 in do_qmp_dispatch_bh (opaque=0xffffea4ffe98) at ../qapi/qmp-dispatch.c:128
#4 0x0000aaaaac02e594 in aio_bh_call (bh=0xffffe0004d80) at ../util/async.c:150
#5 aio_bh_poll (ctx=ctx@entry=0xaaaaad0f6040) at ../util/async.c:178
#6 0x0000aaaaac00bd40 in aio_dispatch (ctx=ctx@entry=0xaaaaad0f6040) at ../util/aio-posix.c:421
#7 0x0000aaaaac02e010 in aio_ctx_dispatch (source=0xaaaaad0f6040, callback=<optimized out>, user_data=<optimized out>) at ../util/async.c:320
#8 0x0000fffff76f6884 in g_main_context_dispatch () at /usr/lib64/libglib-2.0.so.0
#9 0x0000aaaaac0452d4 in glib_pollfds_poll () at ../util/main-loop.c:297
#10 os_host_main_loop_wait (timeout=0) at ../util/main-loop.c:320
#11 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:596
#12 0x0000aaaaab5c9e50 in qemu_main_loop () at ../softmmu/runstate.c:734
#13 0x0000aaaaab185370 in qemu_main (argc=argc@entry=47, argv=argv@entry=0xfffffffff518, envp=envp@entry=0x0) at ../softmmu/main.c:38
#14 0x0000aaaaab16f99c in main (argc=47, argv=0xfffffffff518) at ../softmmu/main.c:47
Fixes: ebb62075021a ("hw/acpi: Add ACPI Generic Event Device Support")
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Message-id: 20220816094957.31700-1-zhukeqian1@huawei.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit d4424bebceaa8ffbc23060ce45e52a9bb817e3c9)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/acpi/generic_event_device.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index e28457a7d1..a3d31631fe 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -267,6 +267,13 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev,
}
}
+static void acpi_ged_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list)
+{
+ AcpiGedState *s = ACPI_GED(adev);
+
+ acpi_memory_ospm_status(&s->memhp_state, list);
+}
+
static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev)
{
AcpiGedState *s = ACPI_GED(adev);
@@ -409,6 +416,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data)
hc->unplug_request = acpi_ged_unplug_request_cb;
hc->unplug = acpi_ged_unplug_cb;
+ adevc->ospm_status = acpi_ged_ospm_status;
adevc->send_event = acpi_ged_send_event;
}
--
2.37.3

View File

@ -0,0 +1,119 @@
From 4f6f881de10e31cac4636d5fde4b7ed4c8affadb Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 4 Jan 2024 12:02:31 +0100
Subject: [PATCH 3/3] hw/arm/virt: Do not load efi-virtio.rom for all
virtio-net-pci variants
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 344: hw/arm/virt: Do not load efi-virtio.rom for any virtio-net-pci variants
RH-Jira: RHEL-14870
RH-Acked-by: Gerd Hoffmann <None>
RH-Acked-by: Sebastian Ott <None>
RH-Commit: [1/1] ffeaa78ad0a1cff5b49009dfb32d25e5cadc0e05
Upstream: RHEL-only
Brew: http://brewweb.engineering.redhat.com/brew/taskinfo?taskID=5785640
Currently arm_rhel_compat just sets the romfile to "" for
virtio-net-pci and not for transitional and non transitional
variants. However, on aarch64 RHEL, efi-virtio.rom is not
shipped so transitional and non-transitional variants cannot
be used and the following error is obeserved:
"Could not open option rom 'efi-virtio.rom': No such file or directory"
In practice, we do not need any rom file for those virtio-net-pci
variants either because edk2 already brings the full functionality.
So let's change the applied compat to cover all the variants. While
at it also change the way arm_rhel_compat is applied. Instead of
applying it from the latest _virt_options(), which is error prone
when upgrading the machine type, let's apply it before calling
*virt_options in the non abstract machine class. That way the setting
will apply to any machine type without any need to add it in any
future machine types.
We don't really care keeping non void romfiles for transitional and
non transitional devices on previous machine types because this
was not working anyway.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/arm/virt.c | 42 ++++++++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index dbf0a6d62f..46c72a9611 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -108,11 +108,39 @@
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
#endif /* disabled for RHEL */
+/*
+ * This variable is for changes to properties that are RHEL specific,
+ * different to the current upstream and to be applied to the latest
+ * machine type. They may be overriden by older machine compats.
+ *
+ * virtio-net-pci variant romfiles are not needed because edk2 does
+ * fully support the pxe boot. Besides virtio romfiles are not shipped
+ * on rhel/aarch64.
+ */
+GlobalProperty arm_rhel_compat[] = {
+ {"virtio-net-pci", "romfile", "" },
+ {"virtio-net-pci-transitional", "romfile", "" },
+ {"virtio-net-pci-non-transitional", "romfile", "" },
+};
+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
+
+/*
+ * This cannot be called from the rhel_virt_class_init() because
+ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new()
+ * only is called on virt-rhelm.n.s non abstract class init.
+ */
+static void arm_rhel_compat_set(MachineClass *mc)
+{
+ compat_props_add(mc->compat_props, arm_rhel_compat,
+ arm_rhel_compat_len);
+}
+
#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \
static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \
void *data) \
{ \
MachineClass *mc = MACHINE_CLASS(oc); \
+ arm_rhel_compat_set(mc); \
rhel##m##n##s##_virt_options(mc); \
mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \
if (latest) { \
@@ -136,19 +164,6 @@
#define DEFINE_RHEL_MACHINE(major, minor, subminor) \
DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false)
-/* This variable is for changes to properties that are RHEL specific,
- * different to the current upstream and to be applied to the latest
- * machine type.
- */
-GlobalProperty arm_rhel_compat[] = {
- {
- .driver = "virtio-net-pci",
- .property = "romfile",
- .value = "",
- },
-};
-const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
-
/* Number of external interrupt lines to configure the GIC with */
#define NUM_IRQS 256
@@ -3240,7 +3255,6 @@ type_init(rhel_machine_init);
static void rhel860_virt_options(MachineClass *mc)
{
- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
}
DEFINE_RHEL_MACHINE_AS_LATEST(8, 6, 0)
--
2.41.0

View File

@ -0,0 +1,97 @@
From fe4abbda80eea7f65b6b5cc544a806fb6e064917 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Thu, 18 Nov 2021 12:57:32 +0100
Subject: [PATCH 2/3] hw/block/fdc: Prevent end-of-track overrun
(CVE-2021-3507)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 194: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507)
RH-Commit: [1/2] 31fa0351382b4ca5bd989b09e4d811ae73040673 (jmaloy/qemu-kvm)
RH-Bugzilla: 1951521
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
Per the 82078 datasheet, if the end-of-track (EOT byte in
the FIFO) is more than the number of sectors per side, the
command is terminated unsuccessfully:
* 5.2.5 DATA TRANSFER TERMINATION
The 82078 supports terminal count explicitly through
the TC pin and implicitly through the underrun/over-
run and end-of-track (EOT) functions. For full sector
transfers, the EOT parameter can define the last
sector to be transferred in a single or multisector
transfer. If the last sector to be transferred is a par-
tial sector, the host can stop transferring the data in
mid-sector, and the 82078 will continue to complete
the sector as if a hardware TC was received. The
only difference between these implicit functions and
TC is that they return "abnormal termination" result
status. Such status indications can be ignored if they
were expected.
* 6.1.3 READ TRACK
This command terminates when the EOT specified
number of sectors have been read. If the 82078
does not find an I D Address Mark on the diskette
after the second· occurrence of a pulse on the
INDX# pin, then it sets the IC code in Status Regis-
ter 0 to "01" (Abnormal termination), sets the MA bit
in Status Register 1 to "1", and terminates the com-
mand.
* 6.1.6 VERIFY
Refer to Table 6-6 and Table 6-7 for information
concerning the values of MT and EC versus SC and
EOT value.
* Table 6·6. Result Phase Table
* Table 6-7. Verify Command Result Phase Table
Fix by aborting the transfer when EOT > # Sectors Per Side.
Cc: qemu-stable@nongnu.org
Cc: Hervé Poussineau <hpoussin@reactos.org>
Fixes: baca51faff0 ("floppy driver: disk geometry auto detect")
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211118115733.4038610-2-philmd@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit defac5e2fbddf8423a354ff0454283a2115e1367)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/block/fdc.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 97fa6de423..755a26c114 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1531,6 +1531,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
int tmp;
fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]);
tmp = (fdctrl->fifo[6] - ks + 1);
+ if (tmp < 0) {
+ FLOPPY_DPRINTF("invalid EOT: %d\n", tmp);
+ fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00);
+ fdctrl->fifo[3] = kt;
+ fdctrl->fifo[4] = kh;
+ fdctrl->fifo[5] = ks;
+ return;
+ }
if (fdctrl->fifo[0] & 0x80)
tmp += fdctrl->fifo[6];
fdctrl->data_len *= tmp;
--
2.35.3

View File

@ -0,0 +1,52 @@
From 100f33ff8a1d55986e43b99ba8726abc29ee8d26 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Mon, 5 Dec 2022 15:32:55 -0500
Subject: [PATCH 5/5] hw/display/qxl: Assert memory slot fits in preallocated
MemoryRegion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler
RH-Bugzilla: 2148545
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
RH-Commit: [5/5] f809ce48e7989dd6547b7c8bf1a5efc3fdcacbac (jmaloy/jons-qemu-kvm)
BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545
CVE: CVE-2022-4144
Upstream: Merged
commit 86fdb0582c653a9824183679403a85f588260d62
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
Date: Mon Nov 28 21:27:41 2022 +0100
hw/display/qxl: Assert memory slot fits in preallocated MemoryRegion
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20221128202741.4945-6-philmd@linaro.org>
(cherry picked from commit 86fdb0582c653a9824183679403a85f588260d62)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/display/qxl.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 2a4b2d4158..bcd9e8716a 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -1372,6 +1372,7 @@ static int qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta,
qxl_set_guest_bug(d, "%s: pci_region = %d", __func__, pci_region);
return 1;
}
+ assert(guest_end - pci_start <= memory_region_size(mr));
virt_start = (intptr_t)memory_region_get_ram_ptr(mr);
memslot.slot_id = slot_id;
--
2.37.3

View File

@ -0,0 +1,130 @@
From 4e1bfbe3a0a113fe3cf39336a9d7da4e8c2a21ea Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Mon, 5 Dec 2022 15:32:55 -0500
Subject: [PATCH 4/5] hw/display/qxl: Avoid buffer overrun in qxl_phys2virt
(CVE-2022-4144)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler
RH-Bugzilla: 2148545
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
RH-Commit: [4/5] afe53f8d9b31c6fd8211fe172173151f3255e67c (jmaloy/jons-qemu-kvm)
BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545
CVE: CVE-2022-4144
Upstream: Merged
commit 6dbbf055148c6f1b7d8a3251a65bd6f3d1e1f622
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
Date: Mon Nov 28 21:27:40 2022 +0100
hw/display/qxl: Avoid buffer overrun in qxl_phys2virt (CVE-2022-4144)
Have qxl_get_check_slot_offset() return false if the requested
buffer size does not fit within the slot memory region.
Similarly qxl_phys2virt() now returns NULL in such case, and
qxl_dirty_one_surface() aborts.
This avoids buffer overrun in the host pointer returned by
memory_region_get_ram_ptr().
Fixes: CVE-2022-4144 (out-of-bounds read)
Reported-by: Wenxu Yin (@awxylitol)
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1336
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20221128202741.4945-5-philmd@linaro.org>
(cherry picked from commit 6dbbf055148c6f1b7d8a3251a65bd6f3d1e1f622)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/display/qxl.c | 27 +++++++++++++++++++++++----
hw/display/qxl.h | 2 +-
2 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index aa9065183e..2a4b2d4158 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -1412,11 +1412,13 @@ static void qxl_reset_surfaces(PCIQXLDevice *d)
/* can be also called from spice server thread context */
static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl,
- uint32_t *s, uint64_t *o)
+ uint32_t *s, uint64_t *o,
+ size_t size_requested)
{
uint64_t phys = le64_to_cpu(pqxl);
uint32_t slot = (phys >> (64 - 8)) & 0xff;
uint64_t offset = phys & 0xffffffffffff;
+ uint64_t size_available;
if (slot >= NUM_MEMSLOTS) {
qxl_set_guest_bug(qxl, "slot too large %d >= %d", slot,
@@ -1440,6 +1442,23 @@ static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl,
slot, offset, qxl->guest_slots[slot].size);
return false;
}
+ size_available = memory_region_size(qxl->guest_slots[slot].mr);
+ if (qxl->guest_slots[slot].offset + offset >= size_available) {
+ qxl_set_guest_bug(qxl,
+ "slot %d offset %"PRIu64" > region size %"PRIu64"\n",
+ slot, qxl->guest_slots[slot].offset + offset,
+ size_available);
+ return false;
+ }
+ size_available -= qxl->guest_slots[slot].offset + offset;
+ if (size_requested > size_available) {
+ qxl_set_guest_bug(qxl,
+ "slot %d offset %"PRIu64" size %zu: "
+ "overrun by %"PRIu64" bytes\n",
+ slot, offset, size_requested,
+ size_requested - size_available);
+ return false;
+ }
*s = slot;
*o = offset;
@@ -1459,7 +1478,7 @@ void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id,
offset = le64_to_cpu(pqxl) & 0xffffffffffff;
return (void *)(intptr_t)offset;
case MEMSLOT_GROUP_GUEST:
- if (!qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset)) {
+ if (!qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset, size)) {
return NULL;
}
ptr = memory_region_get_ram_ptr(qxl->guest_slots[slot].mr);
@@ -1925,9 +1944,9 @@ static void qxl_dirty_one_surface(PCIQXLDevice *qxl, QXLPHYSICAL pqxl,
uint32_t slot;
bool rc;
- rc = qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset);
- assert(rc == true);
size = (uint64_t)height * abs(stride);
+ rc = qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset, size);
+ assert(rc == true);
trace_qxl_surfaces_dirty(qxl->id, offset, size);
qxl_set_dirty(qxl->guest_slots[slot].mr,
qxl->guest_slots[slot].offset + offset,
diff --git a/hw/display/qxl.h b/hw/display/qxl.h
index c784315daa..89ca832cf9 100644
--- a/hw/display/qxl.h
+++ b/hw/display/qxl.h
@@ -157,7 +157,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL)
*
* Returns a host pointer to a buffer placed at offset @phys within the
* active slot @group_id of the PCI VGA RAM memory region associated with
- * the @qxl device. If the slot is inactive, or the offset is out
+ * the @qxl device. If the slot is inactive, or the offset + size are out
* of the memory region, returns NULL.
*
* Use with care; by the time this function returns, the returned pointer is
--
2.37.3

View File

@ -0,0 +1,70 @@
From 068c531fb968ec04509b85f524d0745e6acf5449 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Mon, 5 Dec 2022 15:32:55 -0500
Subject: [PATCH 2/5] hw/display/qxl: Document qxl_phys2virt()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler
RH-Bugzilla: 2148545
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
RH-Commit: [2/5] f84c0b379022c527fc2508a242443d86454944c0 (jmaloy/jons-qemu-kvm)
BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545
CVE: CVE-2022-4144
Upstream: Merged
commit b1901de83a9456cde26fc755f71ca2b7b3ef50fc
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
Date: Mon Nov 28 21:27:38 2022 +0100
hw/display/qxl: Document qxl_phys2virt()
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20221128202741.4945-3-philmd@linaro.org>
(cherry picked from commit b1901de83a9456cde26fc755f71ca2b7b3ef50fc)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/display/qxl.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/hw/display/qxl.h b/hw/display/qxl.h
index 30d21f4d0b..c938f88a2f 100644
--- a/hw/display/qxl.h
+++ b/hw/display/qxl.h
@@ -147,6 +147,25 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL)
#define QXL_DEFAULT_REVISION (QXL_REVISION_STABLE_V12 + 1)
/* qxl.c */
+/**
+ * qxl_phys2virt: Get a pointer within a PCI VRAM memory region.
+ *
+ * @qxl: QXL device
+ * @phys: physical offset of buffer within the VRAM
+ * @group_id: memory slot group
+ *
+ * Returns a host pointer to a buffer placed at offset @phys within the
+ * active slot @group_id of the PCI VGA RAM memory region associated with
+ * the @qxl device. If the slot is inactive, or the offset is out
+ * of the memory region, returns NULL.
+ *
+ * Use with care; by the time this function returns, the returned pointer is
+ * not protected by RCU anymore. If the caller is not within an RCU critical
+ * section and does not hold the iothread lock, it must have other means of
+ * protecting the pointer, such as a reference to the region that includes
+ * the incoming ram_addr_t.
+ *
+ */
void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id);
void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...)
GCC_FMT_ATTR(2, 3);
--
2.37.3

View File

@ -0,0 +1,74 @@
From 5ec8d909d40fa04ef2c3572e01509a1866786070 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Mon, 5 Dec 2022 15:32:55 -0500
Subject: [PATCH 1/5] hw/display/qxl: Have qxl_log_command Return early if no
log_cmd handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler
RH-Bugzilla: 2148545
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
RH-Commit: [1/5] 33d94f40c46cccbc32d108d1035365917bf90356 (jmaloy/jons-qemu-kvm)
BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545
CVE: CVE-2022-4144
Upstream: Merged
commit 61c34fc194b776ecadc39fb26b061331107e5599
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
Date: Mon Nov 28 21:27:37 2022 +0100
hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler
Only 3 command types are logged: no need to call qxl_phys2virt()
for the other types. Using different cases will help to pass
different structure sizes to qxl_phys2virt() in a pair of commits.
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20221128202741.4945-2-philmd@linaro.org>
(cherry picked from commit 61c34fc194b776ecadc39fb26b061331107e5599)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/display/qxl-logger.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/hw/display/qxl-logger.c b/hw/display/qxl-logger.c
index 68bfa47568..1bcf803db6 100644
--- a/hw/display/qxl-logger.c
+++ b/hw/display/qxl-logger.c
@@ -247,6 +247,16 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext)
qxl_name(qxl_type, ext->cmd.type),
compat ? "(compat)" : "");
+ switch (ext->cmd.type) {
+ case QXL_CMD_DRAW:
+ break;
+ case QXL_CMD_SURFACE:
+ break;
+ case QXL_CMD_CURSOR:
+ break;
+ default:
+ goto out;
+ }
data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
if (!data) {
return 1;
@@ -269,6 +279,7 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext)
qxl_log_cmd_cursor(qxl, data, ext->group_id);
break;
}
+out:
fprintf(stderr, "\n");
return 0;
}
--
2.37.3

View File

@ -0,0 +1,234 @@
From 0e6bd3911c4971f575aac7e9cd726467b52fe544 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Mon, 5 Dec 2022 15:32:55 -0500
Subject: [PATCH 3/5] hw/display/qxl: Pass requested buffer size to
qxl_phys2virt()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler
RH-Bugzilla: 2148545
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
RH-Commit: [3/5] 8e362d67fe7fef9eb457cfb15d75b298fed725c3 (jmaloy/jons-qemu-kvm)
BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545
CVE: CVE-2022-4144
Upstream: Merged
commit 8efec0ef8bbc1e75a7ebf6e325a35806ece9b39f
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
Date: Mon Nov 28 21:27:39 2022 +0100
hw/display/qxl: Pass requested buffer size to qxl_phys2virt()
Currently qxl_phys2virt() doesn't check for buffer overrun.
In order to do so in the next commit, pass the buffer size
as argument.
For QXLCursor in qxl_render_cursor() -> qxl_cursor() we
verify the size of the chunked data ahead, checking we can
access 'sizeof(QXLCursor) + chunk->data_size' bytes.
Since in the SPICE_CURSOR_TYPE_MONO case the cursor is
assumed to fit in one chunk, no change are required.
In SPICE_CURSOR_TYPE_ALPHA the ahead read is handled in
qxl_unpack_chunks().
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20221128202741.4945-4-philmd@linaro.org>
(cherry picked from commit 8efec0ef8bbc1e75a7ebf6e325a35806ece9b39f)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/display/qxl-logger.c | 11 ++++++++---
hw/display/qxl-render.c | 20 ++++++++++++++++----
hw/display/qxl.c | 14 +++++++++-----
hw/display/qxl.h | 4 +++-
4 files changed, 36 insertions(+), 13 deletions(-)
diff --git a/hw/display/qxl-logger.c b/hw/display/qxl-logger.c
index 1bcf803db6..35c38f6252 100644
--- a/hw/display/qxl-logger.c
+++ b/hw/display/qxl-logger.c
@@ -106,7 +106,7 @@ static int qxl_log_image(PCIQXLDevice *qxl, QXLPHYSICAL addr, int group_id)
QXLImage *image;
QXLImageDescriptor *desc;
- image = qxl_phys2virt(qxl, addr, group_id);
+ image = qxl_phys2virt(qxl, addr, group_id, sizeof(QXLImage));
if (!image) {
return 1;
}
@@ -214,7 +214,8 @@ int qxl_log_cmd_cursor(PCIQXLDevice *qxl, QXLCursorCmd *cmd, int group_id)
cmd->u.set.position.y,
cmd->u.set.visible ? "yes" : "no",
cmd->u.set.shape);
- cursor = qxl_phys2virt(qxl, cmd->u.set.shape, group_id);
+ cursor = qxl_phys2virt(qxl, cmd->u.set.shape, group_id,
+ sizeof(QXLCursor));
if (!cursor) {
return 1;
}
@@ -236,6 +237,7 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext)
{
bool compat = ext->flags & QXL_COMMAND_FLAG_COMPAT;
void *data;
+ size_t datasz;
int ret;
if (!qxl->cmdlog) {
@@ -249,15 +251,18 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext)
switch (ext->cmd.type) {
case QXL_CMD_DRAW:
+ datasz = compat ? sizeof(QXLCompatDrawable) : sizeof(QXLDrawable);
break;
case QXL_CMD_SURFACE:
+ datasz = sizeof(QXLSurfaceCmd);
break;
case QXL_CMD_CURSOR:
+ datasz = sizeof(QXLCursorCmd);
break;
default:
goto out;
}
- data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+ data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id, datasz);
if (!data) {
return 1;
}
diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c
index ca217004bf..fcfd40c3ac 100644
--- a/hw/display/qxl-render.c
+++ b/hw/display/qxl-render.c
@@ -107,7 +107,9 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
qxl->guest_primary.resized = 0;
qxl->guest_primary.data = qxl_phys2virt(qxl,
qxl->guest_primary.surface.mem,
- MEMSLOT_GROUP_GUEST);
+ MEMSLOT_GROUP_GUEST,
+ qxl->guest_primary.abs_stride
+ * height);
if (!qxl->guest_primary.data) {
goto end;
}
@@ -228,7 +230,8 @@ static void qxl_unpack_chunks(void *dest, size_t size, PCIQXLDevice *qxl,
if (offset == size) {
return;
}
- chunk = qxl_phys2virt(qxl, chunk->next_chunk, group_id);
+ chunk = qxl_phys2virt(qxl, chunk->next_chunk, group_id,
+ sizeof(QXLDataChunk) + chunk->data_size);
if (!chunk) {
return;
}
@@ -295,7 +298,8 @@ fail:
/* called from spice server thread context only */
int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext)
{
- QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+ QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id,
+ sizeof(QXLCursorCmd));
QXLCursor *cursor;
QEMUCursor *c;
@@ -314,7 +318,15 @@ int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext)
}
switch (cmd->type) {
case QXL_CURSOR_SET:
- cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id);
+ /* First read the QXLCursor to get QXLDataChunk::data_size ... */
+ cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id,
+ sizeof(QXLCursor));
+ if (!cursor) {
+ return 1;
+ }
+ /* Then read including the chunked data following QXLCursor. */
+ cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id,
+ sizeof(QXLCursor) + cursor->chunk.data_size);
if (!cursor) {
return 1;
}
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 29c80b4289..aa9065183e 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -274,7 +274,8 @@ static void qxl_spice_monitors_config_async(PCIQXLDevice *qxl, int replay)
QXL_IO_MONITORS_CONFIG_ASYNC));
}
- cfg = qxl_phys2virt(qxl, qxl->guest_monitors_config, MEMSLOT_GROUP_GUEST);
+ cfg = qxl_phys2virt(qxl, qxl->guest_monitors_config, MEMSLOT_GROUP_GUEST,
+ sizeof(QXLMonitorsConfig));
if (cfg != NULL && cfg->count == 1) {
qxl->guest_primary.resized = 1;
qxl->guest_head0_width = cfg->heads[0].width;
@@ -459,7 +460,8 @@ static int qxl_track_command(PCIQXLDevice *qxl, struct QXLCommandExt *ext)
switch (le32_to_cpu(ext->cmd.type)) {
case QXL_CMD_SURFACE:
{
- QXLSurfaceCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+ QXLSurfaceCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id,
+ sizeof(QXLSurfaceCmd));
if (!cmd) {
return 1;
@@ -494,7 +496,8 @@ static int qxl_track_command(PCIQXLDevice *qxl, struct QXLCommandExt *ext)
}
case QXL_CMD_CURSOR:
{
- QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+ QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id,
+ sizeof(QXLCursorCmd));
if (!cmd) {
return 1;
@@ -1444,7 +1447,8 @@ static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl,
}
/* can be also called from spice server thread context */
-void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id)
+void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id,
+ size_t size)
{
uint64_t offset;
uint32_t slot;
@@ -1952,7 +1956,7 @@ static void qxl_dirty_surfaces(PCIQXLDevice *qxl)
}
cmd = qxl_phys2virt(qxl, qxl->guest_surfaces.cmds[i],
- MEMSLOT_GROUP_GUEST);
+ MEMSLOT_GROUP_GUEST, sizeof(QXLSurfaceCmd));
assert(cmd);
assert(cmd->type == QXL_SURFACE_CMD_CREATE);
qxl_dirty_one_surface(qxl, cmd->u.surface_create.data,
diff --git a/hw/display/qxl.h b/hw/display/qxl.h
index c938f88a2f..c784315daa 100644
--- a/hw/display/qxl.h
+++ b/hw/display/qxl.h
@@ -153,6 +153,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL)
* @qxl: QXL device
* @phys: physical offset of buffer within the VRAM
* @group_id: memory slot group
+ * @size: size of the buffer
*
* Returns a host pointer to a buffer placed at offset @phys within the
* active slot @group_id of the PCI VGA RAM memory region associated with
@@ -166,7 +167,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL)
* the incoming ram_addr_t.
*
*/
-void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id);
+void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id,
+ size_t size);
void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...)
GCC_FMT_ATTR(2, 3);
--
2.37.3

View File

@ -0,0 +1,128 @@
From 2308abf0c5da2fe35a0721318c31d22e077663c2 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Fri, 24 Nov 2023 12:17:11 -0500
Subject: [PATCH 1/2] hw/ide: reset: cancel async DMA operation before
resetting state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 335: hw/ide: reset: cancel async DMA operation before resetting state
RH-Jira: RHEL-15437
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Commit: [1/2] b0f5f7f888559a210f1c6b3c545e337dbbc9cf22 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
JIRA: https://issues.redhat.com/browse/RHEL-15437
CVE: CVE-2023-5088
Upstream: Merged
commit 7d7512019fc40c577e2bdd61f114f31a9eb84a8e
Author: Fiona Ebner <f.ebner@proxmox.com>
Date: Wed Sep 6 15:09:21 2023 +0200
hw/ide: reset: cancel async DMA operation before resetting state
If there is a pending DMA operation during ide_bus_reset(), the fact
that the IDEState is already reset before the operation is canceled
can be problematic. In particular, ide_dma_cb() might be called and
then use the reset IDEState which contains the signature after the
reset. When used to construct the IO operation this leads to
ide_get_sector() returning 0 and nsector being 1. This is particularly
bad, because a write command will thus destroy the first sector which
often contains a partition table or similar.
Traces showing the unsolicited write happening with IDEState
0x5595af6949d0 being used after reset:
> ahci_port_write ahci(0x5595af6923f0)[0]: port write [reg:PxSCTL] @ 0x2c: 0x00000300
> ahci_reset_port ahci(0x5595af6923f0)[0]: reset port
> ide_reset IDEstate 0x5595af6949d0
> ide_reset IDEstate 0x5595af694da8
> ide_bus_reset_aio aio_cancel
> dma_aio_cancel dbs=0x7f64600089a0
> dma_blk_cb dbs=0x7f64600089a0 ret=0
> dma_complete dbs=0x7f64600089a0 ret=0 cb=0x5595acd40b30
> ahci_populate_sglist ahci(0x5595af6923f0)[0]
> ahci_dma_prepare_buf ahci(0x5595af6923f0)[0]: prepare buf limit=512 prepared=512
> ide_dma_cb IDEState 0x5595af6949d0; sector_num=0 n=1 cmd=DMA WRITE
> dma_blk_io dbs=0x7f6420802010 bs=0x5595ae2c6c30 offset=0 to_dev=1
> dma_blk_cb dbs=0x7f6420802010 ret=0
> (gdb) p *qiov
> $11 = {iov = 0x7f647c76d840, niov = 1, {{nalloc = 1, local_iov = {iov_base = 0x0,
> iov_len = 512}}, {__pad = "\001\000\000\000\000\000\000\000\000\000\000",
> size = 512}}}
> (gdb) bt
> #0 blk_aio_pwritev (blk=0x5595ae2c6c30, offset=0, qiov=0x7f6420802070, flags=0,
> cb=0x5595ace6f0b0 <dma_blk_cb>, opaque=0x7f6420802010)
> at ../block/block-backend.c:1682
> #1 0x00005595ace6f185 in dma_blk_cb (opaque=0x7f6420802010, ret=<optimized out>)
> at ../softmmu/dma-helpers.c:179
> #2 0x00005595ace6f778 in dma_blk_io (ctx=0x5595ae0609f0,
> sg=sg@entry=0x5595af694d00, offset=offset@entry=0, align=align@entry=512,
> io_func=io_func@entry=0x5595ace6ee30 <dma_blk_write_io_func>,
> io_func_opaque=io_func_opaque@entry=0x5595ae2c6c30,
> cb=0x5595acd40b30 <ide_dma_cb>, opaque=0x5595af6949d0,
> dir=DMA_DIRECTION_TO_DEVICE) at ../softmmu/dma-helpers.c:244
> #3 0x00005595ace6f90a in dma_blk_write (blk=0x5595ae2c6c30,
> sg=sg@entry=0x5595af694d00, offset=offset@entry=0, align=align@entry=512,
> cb=cb@entry=0x5595acd40b30 <ide_dma_cb>, opaque=opaque@entry=0x5595af6949d0)
> at ../softmmu/dma-helpers.c:280
> #4 0x00005595acd40e18 in ide_dma_cb (opaque=0x5595af6949d0, ret=<optimized out>)
> at ../hw/ide/core.c:953
> #5 0x00005595ace6f319 in dma_complete (ret=0, dbs=0x7f64600089a0)
> at ../softmmu/dma-helpers.c:107
> #6 dma_blk_cb (opaque=0x7f64600089a0, ret=0) at ../softmmu/dma-helpers.c:127
> #7 0x00005595ad12227d in blk_aio_complete (acb=0x7f6460005b10)
> at ../block/block-backend.c:1527
> #8 blk_aio_complete (acb=0x7f6460005b10) at ../block/block-backend.c:1524
> #9 blk_aio_write_entry (opaque=0x7f6460005b10) at ../block/block-backend.c:1594
> #10 0x00005595ad258cfb in coroutine_trampoline (i0=<optimized out>,
> i1=<optimized out>) at ../util/coroutine-ucontext.c:177
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: simon.rowe@nutanix.com
Message-ID: <20230906130922.142845-1-f.ebner@proxmox.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/ide/core.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 05a32d0a99..fd50c123e8 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2456,19 +2456,19 @@ static void ide_dummy_transfer_stop(IDEState *s)
void ide_bus_reset(IDEBus *bus)
{
- bus->unit = 0;
- bus->cmd = 0;
- ide_reset(&bus->ifs[0]);
- ide_reset(&bus->ifs[1]);
- ide_clear_hob(bus);
-
- /* pending async DMA */
+ /* pending async DMA - needs the IDEState before it is reset */
if (bus->dma->aiocb) {
trace_ide_bus_reset_aio();
blk_aio_cancel(bus->dma->aiocb);
bus->dma->aiocb = NULL;
}
+ bus->unit = 0;
+ bus->cmd = 0;
+ ide_reset(&bus->ifs[0]);
+ ide_reset(&bus->ifs[1]);
+ ide_clear_hob(bus);
+
/* reset dma provider too */
if (bus->dma->ops->reset) {
bus->dma->ops->reset(bus->dma);
--
2.41.0

View File

@ -0,0 +1,75 @@
From 2db3d0de1be018f14cb91fdd4a368996b09d8bec Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Wed, 13 Apr 2022 14:51:06 -0400
Subject: [PATCH 1/3] hw/intc/arm_gicv3: Check for !MEMTX_OK instead of
MEMTX_ERROR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 151: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR
RH-Commit: [1/3] 561c9c2b1249f07d33013040b1c495ed1fbf825b (jmaloy/qemu-kvm)
RH-Bugzilla: 1999236
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
Upstream: Merged
CVE: CVE-2021-3750
commit b9d383ab797f54ae5fa8746117770709921dc529
Author: Philippe Mathieu-Daudé <philmd@redhat.com>
Date: Wed Dec 15 19:24:19 2021 +0100
hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR
Quoting Peter Maydell:
"These MEMTX_* aren't from the memory transaction
API functions; they're just being used by gicd_readl() and
friends as a way to indicate a success/failure so that the
actual MemoryRegionOps read/write fns like gicv3_dist_read()
can log a guest error."
We are going to introduce more MemTxResult bits, so it is
safer to check for !MEMTX_OK rather than MEMTX_ERROR.
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit b9d383ab797f54ae5fa8746117770709921dc529)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/intc/arm_gicv3_redist.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c
index c8ff3eca08..99b11ca5ee 100644
--- a/hw/intc/arm_gicv3_redist.c
+++ b/hw/intc/arm_gicv3_redist.c
@@ -462,7 +462,7 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data,
break;
}
- if (r == MEMTX_ERROR) {
+ if (r != MEMTX_OK) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: invalid guest read at offset " TARGET_FMT_plx
" size %u\n", __func__, offset, size);
@@ -521,7 +521,7 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data,
break;
}
- if (r == MEMTX_ERROR) {
+ if (r != MEMTX_OK) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: invalid guest write at offset " TARGET_FMT_plx
" size %u\n", __func__, offset, size);
--
2.27.0

View File

@ -0,0 +1,449 @@
From 146cfb23b76b898f08690ffc14aab16d22a41404 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 9 May 2023 10:29:03 -0400
Subject: [PATCH 04/15] hw: replace most qemu_bh_new calls with
qemu_bh_new_guarded
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
RH-Bugzilla: 1999236
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [4/12] 00c51d30246b3aa529f6043e35ee471660aa1fce (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
Upstream: Merged
CVE: CVE-2021-3750
Conflicts: In hw/nvme/ctrl.c there are no calls to qemu_bh_new() at the two locations
the replacement is done in the upstream commit. Instead, timer_new_ns() is
used. We leave these functions unaltered.
commit f63192b0544af5d3e4d5edfd85ab520fcf671377
Author: Alexander Bulekov <alxndr@bu.edu>
Date: Thu Apr 27 17:10:09 2023 -0400
hw: replace most qemu_bh_new calls with qemu_bh_new_guarded
This protects devices from bh->mmio reentrancy issues.
Thanks: Thomas Huth <thuth@redhat.com> for diagnosing OS X test failure.
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paul Durrant <paul@xen.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20230427211013.2994127-5-alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/9pfs/xen-9p-backend.c | 5 ++++-
hw/block/dataplane/virtio-blk.c | 3 ++-
hw/block/dataplane/xen-block.c | 5 +++--
hw/char/virtio-serial-bus.c | 3 ++-
hw/display/qxl.c | 9 ++++++---
hw/display/virtio-gpu.c | 6 ++++--
hw/ide/ahci.c | 3 ++-
hw/ide/ahci_internal.h | 1 +
hw/ide/core.c | 4 +++-
hw/misc/imx_rngc.c | 6 ++++--
hw/misc/macio/mac_dbdma.c | 2 +-
hw/net/virtio-net.c | 3 ++-
hw/scsi/mptsas.c | 3 ++-
hw/scsi/scsi-bus.c | 3 ++-
hw/scsi/vmw_pvscsi.c | 3 ++-
hw/usb/dev-uas.c | 3 ++-
hw/usb/hcd-dwc2.c | 3 ++-
hw/usb/hcd-ehci.c | 3 ++-
hw/usb/hcd-uhci.c | 2 +-
hw/usb/host-libusb.c | 6 ++++--
hw/usb/redirect.c | 6 ++++--
hw/usb/xen-usb.c | 3 ++-
hw/virtio/virtio-balloon.c | 5 +++--
hw/virtio/virtio-crypto.c | 3 ++-
24 files changed, 62 insertions(+), 31 deletions(-)
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 65c4979c3c..09f7c13588 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -60,6 +60,7 @@ typedef struct Xen9pfsDev {
int num_rings;
Xen9pfsRing *rings;
+ MemReentrancyGuard mem_reentrancy_guard;
} Xen9pfsDev;
static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
@@ -441,7 +442,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
XEN_FLEX_RING_SIZE(ring_order);
- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh,
+ &xen_9pdev->rings[i],
+ &xen_9pdev->mem_reentrancy_guard);
xen_9pdev->rings[i].out_cons = 0;
xen_9pdev->rings[i].out_size = 0;
xen_9pdev->rings[i].inprogress = false;
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index ee5a5352dc..5f0de7da1e 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
} else {
s->ctx = qemu_get_aio_context();
}
- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s,
+ &DEVICE(vdev)->mem_reentrancy_guard);
s->batch_notify_vqs = bitmap_new(conf->num_queues);
*dataplane = s;
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 860787580a..07855feea6 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -631,8 +631,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
} else {
dataplane->ctx = qemu_get_aio_context();
}
- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh,
- dataplane);
+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh,
+ dataplane,
+ &DEVICE(xendev)->mem_reentrancy_guard);
return dataplane;
}
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index f01ec2137c..f18124b155 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
return;
}
- port->bh = qemu_bh_new(flush_queued_data_bh, port);
+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port,
+ &dev->mem_reentrancy_guard);
port->elem = NULL;
}
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index bcd9e8716a..0f663b9912 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2206,11 +2206,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp)
qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);
- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl);
+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl,
+ &DEVICE(qxl)->mem_reentrancy_guard);
qxl_reset_state(qxl);
- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl);
- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd);
+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl,
+ &DEVICE(qxl)->mem_reentrancy_guard);
+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd,
+ &DEVICE(qxl)->mem_reentrancy_guard);
}
static void qxl_realize_primary(PCIDevice *dev, Error **errp)
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index d78b9700c7..ecf9079145 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1332,8 +1332,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
g->ctrl_vq = virtio_get_queue(vdev, 0);
g->cursor_vq = virtio_get_queue(vdev, 1);
- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g,
+ &qdev->mem_reentrancy_guard);
+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g,
+ &qdev->mem_reentrancy_guard);
QTAILQ_INIT(&g->reslist);
QTAILQ_INIT(&g->cmdq);
QTAILQ_INIT(&g->fenceq);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index a94c6e26fb..7488b28065 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1504,7 +1504,8 @@ static void ahci_cmd_done(const IDEDMA *dma)
ahci_write_fis_d2h(ad);
if (ad->port_regs.cmd_issue && !ad->check_bh) {
- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
+ &ad->mem_reentrancy_guard);
qemu_bh_schedule(ad->check_bh);
}
}
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
index 109de9e2d1..a7768dd69e 100644
--- a/hw/ide/ahci_internal.h
+++ b/hw/ide/ahci_internal.h
@@ -321,6 +321,7 @@ struct AHCIDevice {
bool init_d2h_sent;
AHCICmdHdr *cur_cmd;
NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
+ MemReentrancyGuard mem_reentrancy_guard;
};
struct AHCIPCIState {
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 15138225be..05a32d0a99 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -510,6 +510,7 @@ BlockAIOCB *ide_issue_trim(
BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
{
IDEState *s = opaque;
+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
TrimAIOCB *iocb;
/* Paired with a decrement in ide_trim_bh_cb() */
@@ -517,7 +518,8 @@ BlockAIOCB *ide_issue_trim(
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
iocb->s = s;
- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb,
+ &DEVICE(dev)->mem_reentrancy_guard);
iocb->ret = 0;
iocb->qiov = qiov;
iocb->i = -1;
diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c
index 632c03779c..082c6980ad 100644
--- a/hw/misc/imx_rngc.c
+++ b/hw/misc/imx_rngc.c
@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp)
sysbus_init_mmio(sbd, &s->iomem);
sysbus_init_irq(sbd, &s->irq);
- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s);
- s->seed_bh = qemu_bh_new(imx_rngc_seed, s);
+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s,
+ &dev->mem_reentrancy_guard);
+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s,
+ &dev->mem_reentrancy_guard);
}
static void imx_rngc_reset(DeviceState *dev)
diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c
index e220f1a927..f6a9e76fe7 100644
--- a/hw/misc/macio/mac_dbdma.c
+++ b/hw/misc/macio/mac_dbdma.c
@@ -912,7 +912,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp)
{
DBDMAState *s = MAC_DBDMA(dev);
- s->bh = qemu_bh_new(DBDMA_run_bh, s);
+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard);
}
static void mac_dbdma_class_init(ObjectClass *oc, void *data)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7e172ef829..ddaa8fa122 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -2753,7 +2753,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
n->vqs[index].tx_vq =
virtio_add_queue(vdev, n->net_conf.tx_queue_size,
virtio_net_handle_tx_bh);
- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
+ &DEVICE(vdev)->mem_reentrancy_guard);
}
n->vqs[index].tx_waiting = 0;
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index f6c7765544..ab8aaca85d 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -1313,7 +1313,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
}
s->max_devices = MPTSAS_NUM_PORTS;
- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s);
+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s,
+ &DEVICE(dev)->mem_reentrancy_guard);
scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info);
}
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index 77325d8cc7..b506ab7d04 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -192,7 +192,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
AioContext *ctx = blk_get_aio_context(s->conf.blk);
/* The reference is dropped in scsi_dma_restart_bh.*/
object_ref(OBJECT(s));
- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s);
+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
+ &DEVICE(s)->mem_reentrancy_guard);
qemu_bh_schedule(s->bh);
}
}
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index cd76bd67ab..4c36febbc0 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -1178,7 +1178,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET);
}
- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s,
+ &DEVICE(pci_dev)->mem_reentrancy_guard);
scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info);
/* override default SCSI bus hotplug-handler, with pvscsi's one */
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index 599d6b52a0..a36a7c3013 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -935,7 +935,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp)
QTAILQ_INIT(&uas->results);
QTAILQ_INIT(&uas->requests);
- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas,
+ &d->mem_reentrancy_guard);
dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info);
diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c
index e1d96acf7e..0e238f8422 100644
--- a/hw/usb/hcd-dwc2.c
+++ b/hw/usb/hcd-dwc2.c
@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp)
s->fi = USB_FRMINTVL - 1;
s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s);
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s);
- s->async_bh = qemu_bh_new(dwc2_work_bh, s);
+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s,
+ &dev->mem_reentrancy_guard);
sysbus_init_irq(sbd, &s->irq);
}
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 6caa7ac6c2..df4ff6f2c1 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2528,7 +2528,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp)
}
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s);
- s->async_bh = qemu_bh_new(ehci_work_bh, s);
+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s,
+ &dev->mem_reentrancy_guard);
s->device = dev;
s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 7930b868fa..469c5e57e9 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -1195,7 +1195,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp)
USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL);
}
}
- s->bh = qemu_bh_new(uhci_bh, s);
+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard);
s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s);
s->num_ports_vmstate = NB_PORTS;
QTAILQ_INIT(&s->queues);
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index d0d46dd0a4..09b961116b 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque)
static void usb_host_nodev(USBHostDevice *s)
{
if (!s->bh_nodev) {
- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s);
+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s,
+ &DEVICE(s)->mem_reentrancy_guard);
}
qemu_bh_schedule(s->bh_nodev);
}
@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id)
USBHostDevice *dev = opaque;
if (!dev->bh_postld) {
- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev);
+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev,
+ &DEVICE(dev)->mem_reentrancy_guard);
}
qemu_bh_schedule(dev->bh_postld);
dev->bh_postld_pending = true;
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 5f0ef9cb3b..59cd3cd7c4 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1437,8 +1437,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
}
}
- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev);
+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev,
+ &DEVICE(dev)->mem_reentrancy_guard);
+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev,
+ &DEVICE(dev)->mem_reentrancy_guard);
dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev);
packet_id_queue_init(&dev->cancelled, dev, "cancelled");
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 0f7369e7ed..dec91294ad 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -1021,7 +1021,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev)
QTAILQ_INIT(&usbif->req_free_q);
QSIMPLEQ_INIT(&usbif->hotplug_q);
- usbif->bh = qemu_bh_new(usbback_bh, usbif);
+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif,
+ &DEVICE(xendev)->mem_reentrancy_guard);
}
static int usbback_free(struct XenLegacyDevice *xendev)
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 9a4f491b54..f503572e27 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -917,8 +917,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
precopy_add_notifier(&s->free_page_hint_notify);
object_ref(OBJECT(s->iothread));
- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
- virtio_ballloon_get_free_page_hints, s);
+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread),
+ virtio_ballloon_get_free_page_hints, s,
+ &dev->mem_reentrancy_guard);
}
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index 54f9bbb789..1be7bb543c 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -817,7 +817,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
vcrypto->vqs[i].dataq =
virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
vcrypto->vqs[i].dataq_bh =
- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i],
+ &dev->mem_reentrancy_guard);
vcrypto->vqs[i].vcrypto = vcrypto;
}
--
2.37.3

View File

@ -0,0 +1,283 @@
From 59f02a421ecdba6e856597367020926fc0cb5177 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 15 Jan 2024 18:52:30 +0100
Subject: [PATCH 4/5] hw/s390x: Move KVM specific PV from hw/ to
target/s390x/kvm/
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 348: s390x: Provide some more useful information if decryption of a PV image fails
RH-Jira: RHEL-18214
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Commit: [4/5] f6095bfdb89268007a0741665284955db4752d46
JIRA: https://issues.redhat.com/browse/RHEL-18214
commit f5f9c6ea11bc807664fdeb9354915c2c9cdcbd89
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
Date: Sat Jun 24 22:06:44 2023 +0200
hw/s390x: Move KVM specific PV from hw/ to target/s390x/kvm/
Protected Virtualization (PV) is not a real hardware device:
it is a feature of the firmware on s390x that is exposed to
userspace via the KVM interface.
Move the pv.c/pv.h files to target/s390x/kvm/ to make this clearer.
Suggested-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-Id: <20230624200644.23931-1-philmd@linaro.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Conflicts:
hw/s390x/ipl.c
hw/s390x/s390-virtio-ccw.c
target/s390x/diag.c
(simple contextual conflict due to differce with #include statements)
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
MAINTAINERS | 2 --
hw/s390x/ipl.c | 2 +-
hw/s390x/meson.build | 1 -
hw/s390x/s390-pci-kvm.c | 2 +-
hw/s390x/s390-virtio-ccw.c | 2 +-
hw/s390x/tod-kvm.c | 2 +-
target/s390x/arch_dump.c | 2 +-
target/s390x/cpu-sysemu.c | 2 +-
target/s390x/cpu_features.c | 2 +-
target/s390x/cpu_models.c | 2 +-
target/s390x/diag.c | 2 +-
target/s390x/helper.c | 2 +-
target/s390x/ioinst.c | 2 +-
target/s390x/kvm/kvm.c | 2 +-
target/s390x/kvm/meson.build | 1 +
{hw/s390x => target/s390x/kvm}/pv.c | 2 +-
{include/hw/s390x => target/s390x/kvm}/pv.h | 0
17 files changed, 14 insertions(+), 16 deletions(-)
rename {hw/s390x => target/s390x/kvm}/pv.c (99%)
rename {include/hw/s390x => target/s390x/kvm}/pv.h (100%)
diff --git a/MAINTAINERS b/MAINTAINERS
index b893206fc3..d74ca51154 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -397,8 +397,6 @@ S: Supported
F: target/s390x/kvm/
F: target/s390x/machine.c
F: target/s390x/sigp.c
-F: hw/s390x/pv.c
-F: include/hw/s390x/pv.h
F: gdb-xml/s390*.xml
T: git https://github.com/borntraeger/qemu.git s390-next
L: qemu-s390x@nongnu.org
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 9051d8652d..c25e247426 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -27,7 +27,7 @@
#include "hw/s390x/vfio-ccw.h"
#include "hw/s390x/css.h"
#include "hw/s390x/ebcdic.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
#include "ipl.h"
#include "qemu/error-report.h"
#include "qemu/config-file.h"
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
index 6e6e47fcda..bb3b42f613 100644
--- a/hw/s390x/meson.build
+++ b/hw/s390x/meson.build
@@ -22,7 +22,6 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
'tod-kvm.c',
's390-skeys-kvm.c',
's390-stattrib-kvm.c',
- 'pv.c',
's390-pci-kvm.c',
))
s390x_ss.add(when: 'CONFIG_TCG', if_true: files(
diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c
index 9134fe185f..ff41e4106d 100644
--- a/hw/s390x/s390-pci-kvm.c
+++ b/hw/s390x/s390-pci-kvm.c
@@ -14,7 +14,7 @@
#include <linux/kvm.h>
#include "kvm/kvm_s390x.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
#include "hw/s390x/s390-pci-bus.h"
#include "hw/s390x/s390-pci-kvm.h"
#include "hw/s390x/s390-pci-inst.h"
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 17146469ee..7bfa5b4e8f 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -40,7 +40,7 @@
#include "hw/qdev-properties.h"
#include "hw/s390x/tod.h"
#include "sysemu/sysemu.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
#include "migration/blocker.h"
#include "qapi/visitor.h"
diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c
index c804c979b5..9776cda50a 100644
--- a/hw/s390x/tod-kvm.c
+++ b/hw/s390x/tod-kvm.c
@@ -13,7 +13,7 @@
#include "qemu/module.h"
#include "sysemu/runstate.h"
#include "hw/s390x/tod.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
#include "kvm/kvm_s390x.h"
static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp)
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index 3b1f178dc3..2554238c16 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -17,8 +17,8 @@
#include "s390x-internal.h"
#include "elf.h"
#include "sysemu/dump.h"
-#include "hw/s390x/pv.h"
#include "kvm/kvm_s390x.h"
+#include "target/s390x/kvm/pv.h"
struct S390xUserRegsStruct {
uint64_t psw[2];
diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c
index 5471e01ee8..547287a949 100644
--- a/target/s390x/cpu-sysemu.c
+++ b/target/s390x/cpu-sysemu.c
@@ -32,7 +32,7 @@
#include "qapi/qapi-visit-run-state.h"
#include "sysemu/hw_accel.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
#include "hw/boards.h"
#include "sysemu/sysemu.h"
#include "sysemu/tcg.h"
diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 2e4e11d264..ebb155ce1c 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -15,7 +15,7 @@
#include "qemu/module.h"
#include "cpu_features.h"
#ifndef CONFIG_USER_ONLY
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
#endif
#define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index e7c586c76e..100c5e7b3a 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -22,7 +22,7 @@
#include "qemu/qemu-print.h"
#ifndef CONFIG_USER_ONLY
#include "sysemu/sysemu.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
#endif
#define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
diff --git a/target/s390x/diag.c b/target/s390x/diag.c
index 76b01dcd68..7c8714cc27 100644
--- a/target/s390x/diag.c
+++ b/target/s390x/diag.c
@@ -19,9 +19,9 @@
#include "sysemu/cpus.h"
#include "hw/s390x/ipl.h"
#include "hw/s390x/s390-virtio-ccw.h"
-#include "hw/s390x/pv.h"
#include "sysemu/kvm.h"
#include "kvm/kvm_s390x.h"
+#include "target/s390x/kvm/pv.h"
int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3)
{
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 6e35473c7f..860977126a 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -24,7 +24,7 @@
#include "exec/gdbstub.h"
#include "qemu/timer.h"
#include "hw/s390x/ioinst.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
#include "sysemu/hw_accel.h"
#include "sysemu/runstate.h"
#include "sysemu/tcg.h"
diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c
index bdae5090bc..409f3e3e63 100644
--- a/target/s390x/ioinst.c
+++ b/target/s390x/ioinst.c
@@ -16,7 +16,7 @@
#include "hw/s390x/ioinst.h"
#include "trace.h"
#include "hw/s390x/s390-pci-bus.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
/* All I/O instructions but chsc use the s format */
static uint64_t get_address_from_regs(CPUS390XState *env, uint32_t ipb,
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index a963866ef4..6d1a6324b9 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -51,7 +51,7 @@
#include "exec/memattrs.h"
#include "hw/s390x/s390-virtio-ccw.h"
#include "hw/s390x/s390-virtio-hcall.h"
-#include "hw/s390x/pv.h"
+#include "target/s390x/kvm/pv.h"
#ifndef DEBUG_KVM
#define DEBUG_KVM 0
diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
index aef52b6686..739d5b9f54 100644
--- a/target/s390x/kvm/meson.build
+++ b/target/s390x/kvm/meson.build
@@ -1,5 +1,6 @@
s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
+ 'pv.c',
'kvm.c'
), if_false: files(
'stubs.c'
diff --git a/hw/s390x/pv.c b/target/s390x/kvm/pv.c
similarity index 99%
rename from hw/s390x/pv.c
rename to target/s390x/kvm/pv.c
index 8a1c71436b..e14db4f41a 100644
--- a/hw/s390x/pv.c
+++ b/target/s390x/kvm/pv.c
@@ -19,9 +19,9 @@
#include "qom/object_interfaces.h"
#include "exec/confidential-guest-support.h"
#include "hw/s390x/ipl.h"
-#include "hw/s390x/pv.h"
#include "hw/s390x/sclp.h"
#include "target/s390x/kvm/kvm_s390x.h"
+#include "target/s390x/kvm/pv.h"
static bool info_valid;
static struct kvm_s390_pv_info_vm info_vm;
diff --git a/include/hw/s390x/pv.h b/target/s390x/kvm/pv.h
similarity index 100%
rename from include/hw/s390x/pv.h
rename to target/s390x/kvm/pv.h
--
2.41.0

View File

@ -0,0 +1,100 @@
From 053faafcf523b0ea4d841c0af8e7e26a2cddd5e8 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 15 Jan 2024 14:00:04 +0100
Subject: [PATCH 3/5] hw/s390x/pv: Restrict Protected Virtualization to sysemu
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 348: s390x: Provide some more useful information if decryption of a PV image fails
RH-Jira: RHEL-18214
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Commit: [3/5] 17b11f9fd2b53c7d33c09a62f28cfca19b18e798
JIRA: https://issues.redhat.com/browse/RHEL-18214
commit 3ea7e312671686e616efa1b8caa5f5ce2d06543a
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
Date: Sat Dec 17 16:24:52 2022 +0100
hw/s390x/pv: Restrict Protected Virtualization to sysemu
Protected Virtualization is irrelevant in user emulation.
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-Id: <20221217152454.96388-4-philmd@linaro.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
target/s390x/cpu_features.c | 4 ++++
target/s390x/cpu_models.c | 4 +++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 5528acd082..2e4e11d264 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -14,7 +14,9 @@
#include "qemu/osdep.h"
#include "qemu/module.h"
#include "cpu_features.h"
+#ifndef CONFIG_USER_ONLY
#include "hw/s390x/pv.h"
+#endif
#define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \
[S390_FEAT_##_FEAT] = { \
@@ -107,6 +109,7 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
}
+#ifndef CONFIG_USER_ONLY
if (!s390_is_pv()) {
return;
}
@@ -147,6 +150,7 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
default:
return;
}
+#endif
}
void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 454485e706..e7c586c76e 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -22,8 +22,8 @@
#include "qemu/qemu-print.h"
#ifndef CONFIG_USER_ONLY
#include "sysemu/sysemu.h"
-#endif
#include "hw/s390x/pv.h"
+#endif
#define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
{ \
@@ -236,6 +236,7 @@ bool s390_has_feat(S390Feat feat)
return 0;
}
+#ifndef CONFIG_USER_ONLY
if (s390_is_pv()) {
switch (feat) {
case S390_FEAT_DIAG_318:
@@ -259,6 +260,7 @@ bool s390_has_feat(S390Feat feat)
break;
}
}
+#endif
return test_bit(feat, cpu->model->features);
}
--
2.41.0

View File

@ -0,0 +1,260 @@
From 57a26ba1c4053cdc426653f921e66f7a8efd3ce7 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 22 May 2023 11:10:11 +0200
Subject: [PATCH 12/15] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI
controller (CVE-2023-0330)
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 277: memory: prevent dma-reentracy issues
RH-Bugzilla: 1999236
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [12/12] 28f5e04344109d8514869c50468bef481437201d (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236
Upstream: Merged
CVE: CVE-2021-3750
commit b987718bbb1d0eabf95499b976212dd5f0120d75
Author: Thomas Huth <thuth@redhat.com>
Date: Mon May 22 11:10:11 2023 +0200
hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330)
We cannot use the generic reentrancy guard in the LSI code, so
we have to manually prevent endless reentrancy here. The problematic
lsi_execute_script() function has already a way to detect whether
too many instructions have been executed - we just have to slightly
change the logic here that it also takes into account if the function
has been called too often in a reentrant way.
The code in fuzz-lsi53c895a-test.c has been taken from an earlier
patch by Mauro Matteo Cascella.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563
Message-Id: <20230522091011.1082574-1-thuth@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/scsi/lsi53c895a.c | 23 +++--
tests/qtest/fuzz-lsi53c895a-test.c | 161 +++++++++++++++++++++++++++++
2 files changed, 178 insertions(+), 6 deletions(-)
create mode 100644 tests/qtest/fuzz-lsi53c895a-test.c
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index 2b9cb2ac5d..b60786fd56 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -1133,15 +1133,24 @@ static void lsi_execute_script(LSIState *s)
uint32_t addr, addr_high;
int opcode;
int insn_processed = 0;
+ static int reentrancy_level;
+
+ reentrancy_level++;
s->istat1 |= LSI_ISTAT1_SRUN;
again:
- if (++insn_processed > LSI_MAX_INSN) {
- /* Some windows drivers make the device spin waiting for a memory
- location to change. If we have been executed a lot of code then
- assume this is the case and force an unexpected device disconnect.
- This is apparently sufficient to beat the drivers into submission.
- */
+ /*
+ * Some windows drivers make the device spin waiting for a memory location
+ * to change. If we have executed more than LSI_MAX_INSN instructions then
+ * assume this is the case and force an unexpected device disconnect. This
+ * is apparently sufficient to beat the drivers into submission.
+ *
+ * Another issue (CVE-2023-0330) can occur if the script is programmed to
+ * trigger itself again and again. Avoid this problem by stopping after
+ * being called multiple times in a reentrant way (8 is an arbitrary value
+ * which should be enough for all valid use cases).
+ */
+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) {
if (!(s->sien0 & LSI_SIST0_UDC)) {
qemu_log_mask(LOG_GUEST_ERROR,
"lsi_scsi: inf. loop with UDC masked");
@@ -1595,6 +1604,8 @@ again:
}
}
trace_lsi_execute_script_stop();
+
+ reentrancy_level--;
}
static uint8_t lsi_reg_readb(LSIState *s, int offset)
diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c
new file mode 100644
index 0000000000..1b55928b9f
--- /dev/null
+++ b/tests/qtest/fuzz-lsi53c895a-test.c
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QTest fuzzer-generated testcase for LSI53C895A device
+ *
+ * Copyright (c) Red Hat
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+
+/*
+ * This used to trigger a DMA reentrancy issue
+ * leading to memory corruption bugs like stack
+ * overflow or use-after-free
+ * https://gitlab.com/qemu-project/qemu/-/issues/1563
+ */
+static void test_lsi_dma_reentrancy(void)
+{
+ QTestState *s;
+
+ s = qtest_init("-M q35 -m 512M -nodefaults "
+ "-blockdev driver=null-co,node-name=null0 "
+ "-device lsi53c810 -device scsi-cd,drive=null0");
+
+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */
+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */
+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */
+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/
+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */
+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/
+ qtest_writel(s, 0xff000000, 0xc0000024);
+ qtest_writel(s, 0xff000114, 0x00000080);
+ qtest_writel(s, 0xff00012c, 0xff000000);
+ qtest_writel(s, 0xff000004, 0xff000114);
+ qtest_writel(s, 0xff000008, 0xff100014);
+ qtest_writel(s, 0xff10002f, 0x000000ff);
+
+ qtest_quit(s);
+}
+
+/*
+ * This used to trigger a UAF in lsi_do_msgout()
+ * https://gitlab.com/qemu-project/qemu/-/issues/972
+ */
+static void test_lsi_do_msgout_cancel_req(void)
+{
+ QTestState *s;
+
+ if (sizeof(void *) == 4) {
+ g_test_skip("memory size too big for 32-bit build");
+ return;
+ }
+
+ s = qtest_init("-M q35 -m 2G -nodefaults "
+ "-device lsi53c895a,id=scsi "
+ "-device scsi-hd,drive=disk0 "
+ "-drive file=null-co://,id=disk0,if=none,format=raw");
+
+ qtest_outl(s, 0xcf8, 0x80000810);
+ qtest_outl(s, 0xcf8, 0xc000);
+ qtest_outl(s, 0xcf8, 0x80000810);
+ qtest_outw(s, 0xcfc, 0x7);
+ qtest_outl(s, 0xcf8, 0x80000810);
+ qtest_outl(s, 0xcfc, 0xc000);
+ qtest_outl(s, 0xcf8, 0x80000804);
+ qtest_outw(s, 0xcfc, 0x05);
+ qtest_writeb(s, 0x69736c10, 0x08);
+ qtest_writeb(s, 0x69736c13, 0x58);
+ qtest_writeb(s, 0x69736c1a, 0x01);
+ qtest_writeb(s, 0x69736c1b, 0x06);
+ qtest_writeb(s, 0x69736c22, 0x01);
+ qtest_writeb(s, 0x69736c23, 0x07);
+ qtest_writeb(s, 0x69736c2b, 0x02);
+ qtest_writeb(s, 0x69736c48, 0x08);
+ qtest_writeb(s, 0x69736c4b, 0x58);
+ qtest_writeb(s, 0x69736c52, 0x04);
+ qtest_writeb(s, 0x69736c53, 0x06);
+ qtest_writeb(s, 0x69736c5b, 0x02);
+ qtest_outl(s, 0xc02d, 0x697300);
+ qtest_writeb(s, 0x5a554662, 0x01);
+ qtest_writeb(s, 0x5a554663, 0x07);
+ qtest_writeb(s, 0x5a55466a, 0x10);
+ qtest_writeb(s, 0x5a55466b, 0x22);
+ qtest_writeb(s, 0x5a55466c, 0x5a);
+ qtest_writeb(s, 0x5a55466d, 0x5a);
+ qtest_writeb(s, 0x5a55466e, 0x34);
+ qtest_writeb(s, 0x5a55466f, 0x5a);
+ qtest_writeb(s, 0x5a345a5a, 0x77);
+ qtest_writeb(s, 0x5a345a5b, 0x55);
+ qtest_writeb(s, 0x5a345a5c, 0x51);
+ qtest_writeb(s, 0x5a345a5d, 0x27);
+ qtest_writeb(s, 0x27515577, 0x41);
+ qtest_outl(s, 0xc02d, 0x5a5500);
+ qtest_writeb(s, 0x364001d0, 0x08);
+ qtest_writeb(s, 0x364001d3, 0x58);
+ qtest_writeb(s, 0x364001da, 0x01);
+ qtest_writeb(s, 0x364001db, 0x26);
+ qtest_writeb(s, 0x364001dc, 0x0d);
+ qtest_writeb(s, 0x364001dd, 0xae);
+ qtest_writeb(s, 0x364001de, 0x41);
+ qtest_writeb(s, 0x364001df, 0x5a);
+ qtest_writeb(s, 0x5a41ae0d, 0xf8);
+ qtest_writeb(s, 0x5a41ae0e, 0x36);
+ qtest_writeb(s, 0x5a41ae0f, 0xd7);
+ qtest_writeb(s, 0x5a41ae10, 0x36);
+ qtest_writeb(s, 0x36d736f8, 0x0c);
+ qtest_writeb(s, 0x36d736f9, 0x80);
+ qtest_writeb(s, 0x36d736fa, 0x0d);
+ qtest_outl(s, 0xc02d, 0x364000);
+
+ qtest_quit(s);
+}
+
+/*
+ * This used to trigger the assert in lsi_do_dma()
+ * https://bugs.launchpad.net/qemu/+bug/697510
+ * https://bugs.launchpad.net/qemu/+bug/1905521
+ * https://bugs.launchpad.net/qemu/+bug/1908515
+ */
+static void test_lsi_do_dma_empty_queue(void)
+{
+ QTestState *s;
+
+ s = qtest_init("-M q35 -nographic -monitor none -serial none "
+ "-drive if=none,id=drive0,"
+ "file=null-co://,file.read-zeroes=on,format=raw "
+ "-device lsi53c895a,id=scsi0 "
+ "-device scsi-hd,drive=drive0,"
+ "bus=scsi0.0,channel=0,scsi-id=0,lun=0");
+ qtest_outl(s, 0xcf8, 0x80001814);
+ qtest_outl(s, 0xcfc, 0xe1068000);
+ qtest_outl(s, 0xcf8, 0x80001818);
+ qtest_outl(s, 0xcf8, 0x80001804);
+ qtest_outw(s, 0xcfc, 0x7);
+ qtest_outl(s, 0xcf8, 0x80002010);
+
+ qtest_writeb(s, 0xe106802e, 0xff); /* Fill DSP bits 16-23 */
+ qtest_writeb(s, 0xe106802f, 0xff); /* Fill DSP bits 24-31: trigger SCRIPT */
+
+ qtest_quit(s);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+
+ if (!qtest_has_device("lsi53c895a")) {
+ return 0;
+ }
+
+ qtest_add_func("fuzz/lsi53c895a/lsi_do_dma_empty_queue",
+ test_lsi_do_dma_empty_queue);
+
+ qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req",
+ test_lsi_do_msgout_cancel_req);
+
+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy",
+ test_lsi_dma_reentrancy);
+
+ return g_test_run();
+}
--
2.37.3

View File

@ -0,0 +1,66 @@
From f0115d856f46e65e3b62896f84fe1902a958bf79 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 22 Mar 2022 19:23:36 -0400
Subject: [PATCH 04/18] hw/virtio: vdpa: Fix leak of host-notifier
memory-region
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 132: hw/virtio: vdpa: Fix leak of host-notifier memory-region
RH-Commit: [1/1] b3cec35d185e3b9844a458f5c51c5d5ef7e3d8f1 (jmaloy/qemu-kvm)
RH-Bugzilla: 2060843
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
BZ: https://bugzilla.redhat.com/2060843
UPSTREAM: no
BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138
commit 98f7607ecda00dea3cbb2ed7b4427c96846efb83
Author: Laurent Vivier <lvivier@redhat.com>
Date: Fri Feb 11 18:02:59 2022 +0100
hw/virtio: vdpa: Fix leak of host-notifier memory-region
If call virtio_queue_set_host_notifier_mr fails, should free
host-notifier memory-region.
This problem can trigger a coredump with some vDPA drivers (mlx5,
but not with the vdpasim), if we unplug the virtio-net card from
the guest after a stop/start.
The same fix has been done for vhost-user:
1f89d3b91e3e ("hw/virtio: Fix leak of host-notifier memory-region")
Fixes: d0416d487bd5 ("vhost-vdpa: map virtqueue notification area if possible")
Cc: jasowang@redhat.com
Resolves: https://bugzilla.redhat.com/2027208
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20220211170259.1388734-1-lvivier@redhat.com>
Cc: qemu-stable@nongnu.org
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 98f7607ecda00dea3cbb2ed7b4427c96846efb83)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/virtio/vhost-vdpa.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index bcaf00e09f..78da48a333 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -415,6 +415,7 @@ static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index)
g_free(name);
if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) {
+ object_unparent(OBJECT(&n->mr));
munmap(addr, page_size);
goto err;
}
--
2.27.0

View File

@ -0,0 +1,59 @@
From ccaa1135bd1aa90c94f0e8b5417bd2a420134e6c Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Wed, 30 Mar 2022 14:52:34 -0400
Subject: [PATCH 08/18] i386: Add Icelake-Server-v6 CPU model with 5-level EPT
support
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 139: vmxcap: Add 5-level EPT bit
RH-Commit: [2/2] e913746b2df9cbd0308014ab5cc72577458857fa (jmaloy/qemu-kvm)
RH-Bugzilla: 2065207
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2065207
UPSTREAM: Merged
commit: 12cab535db6440af41ed8dfefe908a594321b6ce
Author: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon Feb 21 15:53:15 2022 +0100
i386: Add Icelake-Server-v6 CPU model with 5-level EPT support
Windows 11 with WSL2 enabled (Hyper-V) fails to boot with Icelake-Server
{-v5} CPU model but boots well with '-cpu host'. Apparently, it expects
5-level paging and 5-level EPT support to come in pair but QEMU's
Icelake-Server CPU model lacks the later. Introduce 'Icelake-Server-v6'
CPU model with 'vmx-page-walk-5' enabled by default.
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220221145316.576138-1-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 12cab535db6440af41ed8dfefe908a594321b6ce)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
target/i386/cpu.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index aa9e636800..6e25d13339 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3505,6 +3505,14 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
},
},
+ {
+ .version = 6,
+ .note = "5-level EPT",
+ .props = (PropValue[]) {
+ { "vmx-page-walk-5", "on" },
+ { /* end of list */ }
+ },
+ },
{ /* end of list */ }
}
},
--
2.27.0

View File

@ -0,0 +1,53 @@
From 18ac13c7d64266238bd44b2188e0d044af3c3377 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Thu, 3 Aug 2023 15:14:14 -0400
Subject: [PATCH 4/5] i386/cpu: Update how the EBX register of CPUID 0x8000001F
is set
RH-Author: Bandan Das <None>
RH-MergeRequest: 296: Updates to SEV reduced-phys-bits parameter
RH-Bugzilla: 2214840
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [4/4] 8b236fd9bc4c177bfacf6220a429e711b5bf062e
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214840
commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8
Author: Tom Lendacky <thomas.lendacky@amd.com>
Date: Fri Sep 30 10:14:30 2022 -0500
i386/cpu: Update how the EBX register of CPUID 0x8000001F is set
Update the setting of CPUID 0x8000001F EBX to clearly document the ranges
associated with fields being set.
Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
target/i386/cpu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 9d3dcdcc0d..265f0aadfc 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5836,8 +5836,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if (sev_enabled()) {
*eax = 0x2;
*eax |= sev_es_enabled() ? 0x8 : 0;
- *ebx = sev_get_cbit_position();
- *ebx |= sev_get_reduced_phys_bits() << 6;
+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
}
break;
default:
--
2.37.3

View File

@ -0,0 +1,67 @@
From f96220d64a31a4a52b2d132a503048579946f982 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 18 Aug 2022 17:01:13 +0200
Subject: [PATCH 3/3] i386: do kvm_put_msr_feature_control() first thing when
vCPU is reset
RH-Author: Miroslav Rezanina <mrezanin@redhat.com>
RH-MergeRequest: 219: Synchronize qemu-6.2.0-20.el8.1 build from RHEL 8.7 to RHEL 8.8
RH-Bugzilla: 2125271
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [2/2] 08e1e67db96801e4a35aa6b60a93b2c2f1641220
kvm_put_sregs2() fails to reset 'locked' CR4/CR0 bits upon vCPU reset when
it is in VMX root operation. Do kvm_put_msr_feature_control() before
kvm_put_sregs2() to (possibly) kick vCPU out of VMX root operation. It also
seems logical to do kvm_put_msr_feature_control() before
kvm_put_nested_state() and not after it, especially when 'real' nested
state is set.
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220818150113.479917-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 45ed68a1a3a19754ade954d75a3c9d13ff560e5c)
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
target/i386/kvm/kvm.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 81d729dc40..a06221d3e5 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4255,6 +4255,18 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
+ /*
+ * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX
+ * root operation upon vCPU reset. kvm_put_msr_feature_control() should also
+ * preceed kvm_put_nested_state() when 'real' nested state is set.
+ */
+ if (level >= KVM_PUT_RESET_STATE) {
+ ret = kvm_put_msr_feature_control(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
/* must be before kvm_put_nested_state so that EFER.SVME is set */
ret = kvm_put_sregs(x86_cpu);
if (ret < 0) {
@@ -4266,11 +4278,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
if (ret < 0) {
return ret;
}
-
- ret = kvm_put_msr_feature_control(x86_cpu);
- if (ret < 0) {
- return ret;
- }
}
if (level == KVM_PUT_FULL_STATE) {
--
2.35.3

View File

@ -0,0 +1,94 @@
From 46e54544c3480658111d6f111d6c265dcea2e19b Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 18 Aug 2022 17:01:12 +0200
Subject: [PATCH 2/3] i386: reset KVM nested state upon CPU reset
RH-Author: Miroslav Rezanina <mrezanin@redhat.com>
RH-MergeRequest: 219: Synchronize qemu-6.2.0-20.el8.1 build from RHEL 8.7 to RHEL 8.8
RH-Bugzilla: 2125271
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [1/2] de4db7bceb6baaf69aec8b0ae9aa8887aa869e15
Make sure env->nested_state is cleaned up when a vCPU is reset, it may
be stale after an incoming migration, kvm_arch_put_registers() may
end up failing or putting vCPU in a weird state.
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220818150113.479917-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 3cafdb67504a34a0305260f0c86a73d5a3fb000b)
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
target/i386/kvm/kvm.c | 37 +++++++++++++++++++++++++++----------
1 file changed, 27 insertions(+), 10 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index bd439e56ad..81d729dc40 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1615,6 +1615,30 @@ static void kvm_init_xsave(CPUX86State *env)
env->xsave_buf_len);
}
+static void kvm_init_nested_state(CPUX86State *env)
+{
+ struct kvm_vmx_nested_state_hdr *vmx_hdr;
+ uint32_t size;
+
+ if (!env->nested_state) {
+ return;
+ }
+
+ size = env->nested_state->size;
+
+ memset(env->nested_state, 0, size);
+ env->nested_state->size = size;
+
+ if (cpu_has_vmx(env)) {
+ env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
+ vmx_hdr = &env->nested_state->hdr.vmx;
+ vmx_hdr->vmxon_pa = -1ull;
+ vmx_hdr->vmcs12_pa = -1ull;
+ } else if (cpu_has_svm(env)) {
+ env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM;
+ }
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
struct {
@@ -2042,19 +2066,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data));
if (cpu_has_vmx(env) || cpu_has_svm(env)) {
- struct kvm_vmx_nested_state_hdr *vmx_hdr;
-
env->nested_state = g_malloc0(max_nested_state_len);
env->nested_state->size = max_nested_state_len;
- if (cpu_has_vmx(env)) {
- env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
- vmx_hdr = &env->nested_state->hdr.vmx;
- vmx_hdr->vmxon_pa = -1ull;
- vmx_hdr->vmcs12_pa = -1ull;
- } else {
- env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM;
- }
+ kvm_init_nested_state(env);
}
}
@@ -2117,6 +2132,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
/* enabled by default */
env->poll_control_msr = 1;
+ kvm_init_nested_state(env);
+
sev_es_set_reset_vector(CPU(cpu));
}
--
2.35.3

View File

@ -0,0 +1,78 @@
From 19504ea76b6341c11213316402bb5194487e1f01 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Thu, 3 Aug 2023 15:13:19 -0400
Subject: [PATCH 3/5] i386/sev: Update checks and information related to
reduced-phys-bits
RH-Author: Bandan Das <None>
RH-MergeRequest: 296: Updates to SEV reduced-phys-bits parameter
RH-Bugzilla: 2214840
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [3/4] b617173d2b15fa39cdc02b5c1ac4d52e9b0dfede
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214840
commit 8168fed9f84e3128f7628969ae78af49433d5ce7
Author: Tom Lendacky <thomas.lendacky@amd.com>
Date: Fri Sep 30 10:14:29 2022 -0500
i386/sev: Update checks and information related to reduced-phys-bits
The value of the reduced-phys-bits parameter is propogated to the CPUID
information exposed to the guest. Update the current validation check to
account for the size of the CPUID field (6-bits), ensuring the value is
in the range of 1 to 63.
Maintain backward compatibility, to an extent, by allowing a value greater
than 1 (so that the previously documented value of 5 still works), but not
allowing anything over 63.
Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <cca5341a95ac73f904e6300f10b04f9c62e4e8ff.1664550870.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
target/i386/sev.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 025ff7a6f8..ba6a65e90c 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -892,15 +892,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
host_cbitpos = ebx & 0x3f;
+ /*
+ * The cbitpos value will be placed in bit positions 5:0 of the EBX
+ * register of CPUID 0x8000001F. No need to verify the range as the
+ * comparison against the host value accomplishes that.
+ */
if (host_cbitpos != sev->cbitpos) {
error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
__func__, host_cbitpos, sev->cbitpos);
goto err;
}
- if (sev->reduced_phys_bits < 1) {
- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1,"
- " requested '%d'", __func__, sev->reduced_phys_bits);
+ /*
+ * The reduced-phys-bits value will be placed in bit positions 11:6 of
+ * the EBX register of CPUID 0x8000001F, so verify the supplied value
+ * is in the range of 1 to 63.
+ */
+ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) {
+ error_setg(errp, "%s: reduced_phys_bits check failed,"
+ " it should be in the range of 1 to 63, requested '%d'",
+ __func__, sev->reduced_phys_bits);
goto err;
}
--
2.37.3

Some files were not shown because too many files have changed in this diff Show More