* Thu Apr 20 2023 Miroslav Rezanina <mrezanin@redhat.com> - 8.0.0-1

- Rebase to QEMU 8.0.0
- Resolves: bz#2180898
  (Rebase to QEMU 8.0.0 for RHEL 9.3.0)
This commit is contained in:
Miroslav Rezanina 2023-04-20 02:33:14 -04:00
parent 42801435ce
commit 07d01bd47b
137 changed files with 783 additions and 18192 deletions

1
.gitignore vendored
View File

@ -25,3 +25,4 @@
/capstone.tar.gz
/qemu-7.1.0.tar.xz
/qemu-7.2.0.tar.xz
/qemu-8.0.0.tar.xz

View File

@ -1,4 +1,4 @@
From ccc4a5bdc8c2f27678312364a7c12aeafd009bb6 Mon Sep 17 00:00:00 2001
From 84039bfc860878f3c3421de4a1836ac5d6300ed7 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 26 May 2021 10:56:02 +0200
Subject: Initial redhat build
@ -13,7 +13,7 @@ several issues are fixed in QEMU tree:
We disable make check due to issues with some of the tests.
This rebase is based on qemu-kvm-7.1.0-7.el9
This rebase is based on qemu-kvm-7.2.0-14.el9
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
--
@ -66,6 +66,16 @@ Rebase changes (7.2.0):
- Fix SRPM name generation to work on Fedora 37
- Switch back to system meson
Rebase changes (8.0.0-rc1):
- use enable-dtrace-backands instead of enable-dtrace-backend
- Removed qemu virtiofsd bits
Rebase changes (8.0.0-rc2):
- test/check-block.sh removed (upstream)
Rebase changes (8.0.0-rc3):
- Add new --disable-* options for configure
Merged patches (6.0.0):
- 605758c902 Limit build on Power to qemu-img and qemu-ga only
@ -162,16 +172,18 @@ Merged patches (7.2.0 rc4):
- 8c6834feb6 Remove opengl display device subpackages (C9S MR 124)
- 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
Merged patches (8.0.0-rc1):
- 7754f6ba78 Minor packaging fixes
- 401af56187 spec: Disable VDUSE
fix
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
.distro/Makefile | 100 +
.distro/Makefile.common | 41 +
.distro/README.tests | 39 +
.distro/modules-load.conf | 4 +
.distro/qemu-guest-agent.service | 1 -
.distro/qemu-kvm.spec.template | 4315 +++++++++++++++++++++++
.distro/qemu-kvm.spec.template | 4528 +++++++++++++++++++++++
.distro/rpminspect.yaml | 6 +-
.distro/scripts/extract_build_cmd.py | 12 +
.distro/scripts/process-patches.sh | 4 +
@ -180,9 +192,8 @@ fix
scripts/qemu-guest-agent/fsfreeze-hook | 2 +-
scripts/systemtap/conf.d/qemu_kvm.conf | 4 +
scripts/systemtap/script.d/qemu_kvm.stp | 1 +
tests/check-block.sh | 2 +
ui/vnc-auth-sasl.c | 2 +-
16 files changed, 4573 insertions(+), 4 deletions(-)
15 files changed, 4784 insertions(+), 4 deletions(-)
create mode 100644 .distro/Makefile
create mode 100644 .distro/Makefile.common
create mode 100644 .distro/README.tests
@ -271,19 +282,6 @@ index 0000000000..c04abf9449
+++ b/scripts/systemtap/script.d/qemu_kvm.stp
@@ -0,0 +1 @@
+probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {}
diff --git a/tests/check-block.sh b/tests/check-block.sh
index 5de2c1ba0b..6af743f441 100755
--- a/tests/check-block.sh
+++ b/tests/check-block.sh
@@ -22,6 +22,8 @@ if [ -z "$(find . -name 'qemu-system-*' -print)" ]; then
skip "No qemu-system binary available ==> Not running the qemu-iotests."
fi
+exit 0
+
cd tests/qemu-iotests
# QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests
diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index 47fdae5b21..2a950caa2a 100644
--- a/ui/vnc-auth-sasl.c
@ -298,5 +296,5 @@ index 47fdae5b21..2a950caa2a 100644
if (saslErr != SASL_OK) {
error_setg(errp, "Failed to initialize SASL auth: %s",
--
2.31.1
2.39.1

View File

@ -1,4 +1,4 @@
From 90366cd2ead5a5301aaceed56477d2e6d9f1b3cd Mon Sep 17 00:00:00 2001
From 63829772dbc2075fc014a9d52e3968735d228018 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 7 Dec 2022 03:05:48 -0500
Subject: Enable/disable devices for RHEL
@ -32,6 +32,11 @@ Rebase notes (7.1.0 rc3):
Rebase notes (7.2.0 rc20):
- Removed disabling a15mpcore.c as no longer needed
Rebase notes (8.0.0-rc1):
- Rename CONFIG_ACPI_X86_ICH to CONFIG_ACPI_ICH9
- Inlude qemu/error-report.h in hw/display/cirrus_vga.c
- Change virtiofsd dependency version
Merged patches (6.1.0):
- c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak
- 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI
@ -63,7 +68,7 @@ Merged patches (7.1.0 rc0):
hw/arm/meson.build | 2 +-
hw/block/fdc.c | 10 ++
hw/cpu/meson.build | 3 +-
hw/display/cirrus_vga.c | 5 +-
hw/display/cirrus_vga.c | 7 +-
hw/ide/piix.c | 5 +-
hw/input/pckbd.c | 2 +
hw/net/e1000.c | 2 +
@ -73,7 +78,7 @@ Merged patches (7.1.0 rc0):
target/ppc/cpu-models.c | 9 ++
target/s390x/cpu_models_sysemu.c | 3 +
target/s390x/kvm/kvm.c | 8 ++
19 files changed, 283 insertions(+), 13 deletions(-)
19 files changed, 285 insertions(+), 13 deletions(-)
create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak
create mode 100644 configs/devices/rh-virtio.mak
@ -212,7 +217,7 @@ index 0000000000..69a799adbd
+CONFIG_VHOST_USER_FS=y
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
new file mode 100644
index 0000000000..10cb0a14e0
index 0000000000..668b2d0e18
--- /dev/null
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
@@ -0,0 +1,109 @@
@ -226,7 +231,7 @@ index 0000000000..10cb0a14e0
+CONFIG_ACPI_SMBUS=y
+CONFIG_ACPI_VMGENID=y
+CONFIG_ACPI_X86=y
+CONFIG_ACPI_X86_ICH=y
+CONFIG_ACPI_ICH9=y
+CONFIG_AHCI=y
+CONFIG_APIC=y
+CONFIG_APM=y
@ -326,10 +331,10 @@ index 0000000000..10cb0a14e0
+CONFIG_VHOST_USER_VSOCK=y
+CONFIG_VHOST_USER_FS=y
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 92f9f6e000..c5e94c997c 100644
index b545ba0e4f..a41a16cba7 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -30,7 +30,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
@@ -29,7 +29,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c'))
arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c'))
@ -339,7 +344,7 @@ index 92f9f6e000..c5e94c997c 100644
arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c'))
arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c'))
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 64ae4a6899..9b8e782c19 100644
index d7cc4d3ec1..12d0a60905 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -49,6 +49,8 @@
@ -367,7 +372,7 @@ index 64ae4a6899..9b8e782c19 100644
error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'");
return;
diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build
index 9e52fee9e7..87c209a754 100644
index e37490074f..4431e3731c 100644
--- a/hw/cpu/meson.build
+++ b/hw/cpu/meson.build
@@ -1,4 +1,5 @@
@ -375,13 +380,29 @@ index 9e52fee9e7..87c209a754 100644
+#softmmu_ss.add(files('core.c', 'cluster.c'))
+softmmu_ss.add(files('core.c'))
specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c'))
specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c'))
softmmu_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c'))
softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c'))
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 6e8c747c46..1948ebee8e 100644
index b80f98b6c4..cbde6a8f15 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -2946,7 +2946,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp)
@@ -36,6 +36,7 @@
#include "qemu/module.h"
#include "qemu/units.h"
#include "qemu/log.h"
+#include "qemu/error-report.h"
#include "sysemu/reset.h"
#include "qapi/error.h"
#include "trace.h"
@@ -47,6 +48,7 @@
#include "qom/object.h"
#include "ui/console.h"
+
/*
* TODO:
* - destination write mask support not complete (bits 5..7)
@@ -2946,7 +2948,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp)
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
int16_t device_id = pc->device_id;
@ -394,10 +415,10 @@ index 6e8c747c46..1948ebee8e 100644
* Also accept 8 MB/16 MB for backward compatibility.
*/
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 267dbf37db..87fcda4062 100644
index 41d60921e3..a4af45b4e8 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -199,7 +199,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data)
@@ -193,7 +193,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data)
k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1;
k->class_id = PCI_CLASS_STORAGE_IDE;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
@ -407,7 +428,7 @@ index 267dbf37db..87fcda4062 100644
}
static const TypeInfo piix3_ide_info = {
@@ -222,6 +223,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data)
@@ -216,6 +217,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data)
k->class_id = PCI_CLASS_STORAGE_IDE;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
dc->hotpluggable = false;
@ -430,10 +451,10 @@ index b92b63bedc..3b6235dde6 100644
static const TypeInfo i8042_info = {
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index e26e0a64c1..41492fae79 100644
index 23d660619f..b75c9aa799 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1824,6 +1824,7 @@ static const E1000Info e1000_devices[] = {
@@ -1805,6 +1805,7 @@ static const E1000Info e1000_devices[] = {
.revision = 0x03,
.phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
},
@ -441,7 +462,7 @@ index e26e0a64c1..41492fae79 100644
{
.name = "e1000-82544gc",
.device_id = E1000_DEV_ID_82544GC_COPPER,
@@ -1836,6 +1837,7 @@ static const E1000Info e1000_devices[] = {
@@ -1817,6 +1818,7 @@ static const E1000Info e1000_devices[] = {
.revision = 0x03,
.phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
},
@ -467,7 +488,7 @@ index 8a4861f45a..fcb5dfe792 100644
DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"),
DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"),
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index 793df42e21..cd3c305471 100644
index 599dc24f0d..905a994c3a 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade
@ -480,10 +501,10 @@ index 793df42e21..cd3c305471 100644
endif
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index 9a2cef7d05..a528ff9a3d 100644
index df0c45e523..c154a4dcf2 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -151,6 +151,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
@@ -155,6 +155,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
/* CPU models. These are not needed for the AArch64 linux-user build. */
#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
@ -491,7 +512,7 @@ index 9a2cef7d05..a528ff9a3d 100644
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
@@ -504,6 +505,7 @@ static void cortex_a9_initfn(Object *obj)
@@ -508,6 +509,7 @@ static void cortex_a9_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41093000;
define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
}
@ -499,7 +520,7 @@ index 9a2cef7d05..a528ff9a3d 100644
#ifndef CONFIG_USER_ONLY
static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -528,6 +530,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
@@ -532,6 +534,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
.access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
};
@ -507,7 +528,7 @@ index 9a2cef7d05..a528ff9a3d 100644
static void cortex_a7_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -576,6 +579,7 @@ static void cortex_a7_initfn(Object *obj)
@@ -580,6 +583,7 @@ static void cortex_a7_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41072000;
define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
}
@ -515,7 +536,7 @@ index 9a2cef7d05..a528ff9a3d 100644
static void cortex_a15_initfn(Object *obj)
{
@@ -624,6 +628,7 @@ static void cortex_a15_initfn(Object *obj)
@@ -628,6 +632,7 @@ static void cortex_a15_initfn(Object *obj)
define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
}
@ -523,7 +544,7 @@ index 9a2cef7d05..a528ff9a3d 100644
static void cortex_m0_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -1065,6 +1070,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
@@ -1110,6 +1115,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
cc->gdb_core_xml_file = "arm-m-profile.xml";
}
@ -531,7 +552,7 @@ index 9a2cef7d05..a528ff9a3d 100644
#ifndef TARGET_AARCH64
/*
@@ -1132,6 +1138,7 @@ static void arm_max_initfn(Object *obj)
@@ -1177,6 +1183,7 @@ static void arm_max_initfn(Object *obj)
#endif /* !TARGET_AARCH64 */
static const ARMCPUInfo arm_tcg_cpus[] = {
@ -539,7 +560,7 @@ index 9a2cef7d05..a528ff9a3d 100644
{ .name = "arm926", .initfn = arm926_initfn },
{ .name = "arm946", .initfn = arm946_initfn },
{ .name = "arm1026", .initfn = arm1026_initfn },
@@ -1147,7 +1154,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
@@ -1192,7 +1199,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "cortex-a7", .initfn = cortex_a7_initfn },
{ .name = "cortex-a8", .initfn = cortex_a8_initfn },
{ .name = "cortex-a9", .initfn = cortex_a9_initfn },
@ -549,7 +570,7 @@ index 9a2cef7d05..a528ff9a3d 100644
{ .name = "cortex-m0", .initfn = cortex_m0_initfn,
.class_init = arm_v7m_class_init },
{ .name = "cortex-m3", .initfn = cortex_m3_initfn,
@@ -1178,6 +1187,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
@@ -1224,6 +1233,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "pxa270-b1", .initfn = pxa270b1_initfn },
{ .name = "pxa270-c0", .initfn = pxa270c0_initfn },
{ .name = "pxa270-c5", .initfn = pxa270c5_initfn },
@ -620,7 +641,7 @@ index 912b037c63..cd3ff700ac 100644
{ NULL, NULL }
};
diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c
index d8a141a023..d086b1c39c 100644
index 63981bf36b..87a4480c05 100644
--- a/target/s390x/cpu_models_sysemu.c
+++ b/target/s390x/cpu_models_sysemu.c
@@ -35,6 +35,9 @@ static void check_unavailable_features(const S390CPUModel *max_model,
@ -653,5 +674,5 @@ index 3ac7ec9acf..97da1a6424 100644
prop.ibc = s390_ibc_from_cpu_model(model);
/* configure cpu features indicated via STFL(e) */
--
2.31.1
2.39.1

View File

@ -1,4 +1,4 @@
From 0208f38671b9de4036c0d56142a7f22e5091bae0 Mon Sep 17 00:00:00 2001
From c13f8e21b32aa06b08847e88080f2fdea5084a9b Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 11 Jan 2019 09:54:45 +0100
Subject: Machine type related general changes
@ -46,28 +46,33 @@ Merged patches (7.1.0 rc0):
Merged patches (7.2.0 rc0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
- 21ed34787b Addd 7.2 compat bits for RHEL 9.1 machine type
- e5c8d5d603 virtio-rng-pci: fix migration compat for vectors
- 5a5fa77059 virtio-rng-pci: fix transitional migration compat for vectors
---
hw/acpi/piix4.c | 2 +-
hw/arm/virt.c | 2 +-
hw/core/machine.c | 222 +++++++++++++++++++++++++++++++++++
hw/core/machine.c | 229 +++++++++++++++++++++++++++++++++++
hw/display/vga-isa.c | 2 +-
hw/i386/pc_piix.c | 2 +
hw/i386/pc_q35.c | 2 +
hw/net/rtl8139.c | 4 +-
hw/smbios/smbios.c | 46 +++++++-
hw/smbios/smbios.c | 46 ++++++-
hw/timer/i8254_common.c | 2 +-
hw/usb/hcd-xhci-pci.c | 59 +++++++---
hw/usb/hcd-xhci-pci.c | 59 ++++++---
hw/usb/hcd-xhci-pci.h | 1 +
include/hw/boards.h | 31 +++++
include/hw/firmware/smbios.h | 5 +-
include/hw/i386/pc.h | 3 +
14 files changed, 360 insertions(+), 23 deletions(-)
14 files changed, 367 insertions(+), 23 deletions(-)
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 0a81f1ad93..dbfb362a8f 100644
index 63d2113b86..a24b9aac92 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -248,7 +248,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
@@ -247,7 +247,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
static const VMStateDescription vmstate_acpi = {
.name = "piix4_pm",
.version_id = 3,
@ -77,25 +82,25 @@ index 0a81f1ad93..dbfb362a8f 100644
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState),
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index b871350856..d633300fdc 100644
index ac626b3bef..4a6e89c7bc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1619,7 +1619,7 @@ static void virt_build_smbios(VirtMachineState *vms)
@@ -1629,7 +1629,7 @@ static void virt_build_smbios(VirtMachineState *vms)
smbios_set_defaults("QEMU", product,
vmc->smbios_old_sys_ver ? "1.0" : mc->name, false,
- true, SMBIOS_ENTRY_POINT_TYPE_64);
+ true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64);
smbios_get_tables(MACHINE(vms), NULL, 0,
&smbios_tables, &smbios_tables_len,
/* build the array of physical mem area from base_memmap */
mem_array.address = vms->memmap[VIRT_MEM].base;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 8d34caa31d..9edec1ca05 100644
index cd13b8b0a3..5aa567fad3 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -40,6 +40,228 @@
#include "hw/virtio/virtio-pci.h"
#include "qom/object_interfaces.h"
@@ -46,6 +46,235 @@ GlobalProperty hw_compat_7_2[] = {
};
const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
+/*
+ * RHEL only: machine types for previous major releases are deprecated
@ -111,6 +116,13 @@ index 8d34caa31d..9edec1ca05 100644
+ { "arm-gicv3-common", "force-8-bit-prio", "on" },
+ /* hw_compat_rhel_9_1 from hw_compat_7_0 */
+ { "nvme-ns", "eui64-default", "on"},
+ /* hw_compat_rhel_9_1 from hw_compat_7_1 */
+ { "virtio-device", "queue_reset", "false" },
+ /* hw_compat_rhel_9_1 bz 2155749 */
+ { "virtio-rng-pci", "vectors", "0" },
+ /* hw_compat_rhel_9_1 bz 2162569 */
+ { "virtio-rng-pci-transitional", "vectors", "0" },
+ { "virtio-rng-pci-non-transitional", "vectors", "0" },
+};
+const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1);
+
@ -321,7 +333,7 @@ index 8d34caa31d..9edec1ca05 100644
+
GlobalProperty hw_compat_7_1[] = {
{ "virtio-device", "queue_reset", "false" },
};
{ "virtio-rng-pci", "vectors", "0" },
diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c
index 2a5437d803..0db2c2b2a1 100644
--- a/hw/display/vga-isa.c
@ -336,10 +348,10 @@ index 2a5437d803..0db2c2b2a1 100644
};
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0ad0ed1603..0985ff67d2 100644
index 30eedd62a3..14a794081e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -187,6 +187,8 @@ static void pc_init1(MachineState *machine,
@@ -201,6 +201,8 @@ static void pc_init1(MachineState *machine,
smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)",
mc->name, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
@ -349,10 +361,10 @@ index 0ad0ed1603..0985ff67d2 100644
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index a496bd6e74..ea582254e3 100644
index 797ba347fd..dc0ba5f9e7 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -201,6 +201,8 @@ static void pc_q35_init(MachineState *machine)
@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine)
smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)",
mc->name, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
@ -362,7 +374,7 @@ index a496bd6e74..ea582254e3 100644
}
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 700b1b66b6..13693aeb4f 100644
index 5a5aaf868d..3d473d5869 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque)
@ -385,10 +397,10 @@ index 700b1b66b6..13693aeb4f 100644
VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State),
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index b4243de735..c5ad69237e 100644
index d2007e70fb..319eae9e9d 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -57,6 +57,9 @@ static bool smbios_legacy = true;
@@ -58,6 +58,9 @@ static bool smbios_legacy = true;
static bool smbios_uuid_encoded = true;
/* end: legacy structures & constants for <= 2.0 machines */
@ -398,7 +410,7 @@ index b4243de735..c5ad69237e 100644
uint8_t *smbios_tables;
size_t smbios_tables_len;
@@ -669,7 +672,7 @@ static void smbios_build_type_1_table(void)
@@ -670,7 +673,7 @@ static void smbios_build_type_1_table(void)
static void smbios_build_type_2_table(void)
{
@ -407,7 +419,7 @@ index b4243de735..c5ad69237e 100644
SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer);
SMBIOS_TABLE_SET_STR(2, product_str, type2.product);
@@ -977,7 +980,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features)
@@ -980,7 +983,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features)
void smbios_set_defaults(const char *manufacturer, const char *product,
const char *version, bool legacy_mode,
@ -419,7 +431,7 @@ index b4243de735..c5ad69237e 100644
{
smbios_have_defaults = true;
smbios_legacy = legacy_mode;
@@ -998,11 +1004,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product,
@@ -1001,11 +1007,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product,
g_free(smbios_entries);
}
@ -579,10 +591,10 @@ index 643d4643e4..529bad9366 100644
dc->vmsd = &vmstate_xhci_pci;
set_bit(DEVICE_CATEGORY_USB, dc->categories);
diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h
index c193f79443..086a1feb1e 100644
index 08f70ce97c..1be7527c1b 100644
--- a/hw/usb/hcd-xhci-pci.h
+++ b/hw/usb/hcd-xhci-pci.h
@@ -39,6 +39,7 @@ typedef struct XHCIPciState {
@@ -40,6 +40,7 @@ typedef struct XHCIPciState {
XHCIState xhci;
OnOffAuto msi;
OnOffAuto msix;
@ -591,10 +603,10 @@ index c193f79443..086a1feb1e 100644
#endif
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 90f1dd3aeb..2209d4e416 100644
index 6fbbfd56c8..c5a965d27f 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -454,4 +454,35 @@ extern const size_t hw_compat_2_2_len;
@@ -459,4 +459,35 @@ extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
@ -647,10 +659,10 @@ index 7f3259a630..d24b3ccd32 100644
void smbios_get_tables(MachineState *ms,
const struct smbios_phys_mem_area *mem_array,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index c95333514e..3754eaa97d 100644
index 8206d5405a..908a275736 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -112,6 +112,9 @@ struct PCMachineClass {
@@ -111,6 +111,9 @@ struct PCMachineClass {
bool smbios_defaults;
bool smbios_legacy_mode;
bool smbios_uuid_encoded;
@ -661,5 +673,5 @@ index c95333514e..3754eaa97d 100644
/* RAM / address space compat: */
bool gigabyte_align;
--
2.31.1
2.39.1

View File

@ -1,4 +1,4 @@
From 8501581c99760ed8a800d0c98eeb17a4bf450366 Mon Sep 17 00:00:00 2001
From ec6468b65a3af0e2b84575c9f965f61916d0d8ea Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 12:53:31 +0200
Subject: Add aarch64 machine types
@ -26,6 +26,9 @@ Rebase notes (7.1.0 rc3):
Rebase notes (7.2.0 rc0):
- Disabled cortex-a35
Rebase notes (8.0.0-rc1):
- Moved changed code from target/arm/helper.c to target/arm/arm-qmp-cmds.c
Merged patches (6.2.0):
- 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type
- f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type
@ -49,23 +52,27 @@ Merged patches (7.1.0 rc0):
Merged patches (7.2.0 rc0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
- c1a21266d8 redhat: aarch64: add rhel9.2.0 virt machine type
- d97cd7c513 redhat: fix virt-rhel9.2.0 compat props
---
hw/arm/virt.c | 237 ++++++++++++++++++++++++++++++++-
hw/arm/virt.c | 251 ++++++++++++++++++++++++++++++++-
include/hw/arm/virt.h | 8 ++
target/arm/arm-qmp-cmds.c | 2 +
target/arm/cpu-qom.h | 1 +
target/arm/cpu.c | 5 +
target/arm/cpu.h | 2 +
target/arm/cpu64.c | 16 ++-
target/arm/cpu_tcg.c | 12 +-
target/arm/helper.c | 2 +
tests/qtest/arm-cpu-features.c | 6 +
9 files changed, 277 insertions(+), 12 deletions(-)
9 files changed, 289 insertions(+), 14 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index d633300fdc..dfcab40a73 100644
index 4a6e89c7bc..1ae1654be5 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -80,6 +80,7 @@
@@ -81,6 +81,7 @@
#include "hw/char/pl011.h"
#include "qemu/guest-random.h"
@ -73,7 +80,7 @@ index d633300fdc..dfcab40a73 100644
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
void *data) \
@@ -106,7 +107,48 @@
@@ -107,7 +108,48 @@
DEFINE_VIRT_MACHINE_LATEST(major, minor, true)
#define DEFINE_VIRT_MACHINE(major, minor) \
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
@ -123,7 +130,7 @@ index d633300fdc..dfcab40a73 100644
/* Number of external interrupt lines to configure the GIC with */
#define NUM_IRQS 256
@@ -197,15 +239,19 @@ static const int a15irqmap[] = {
@@ -204,16 +246,20 @@ static const int a15irqmap[] = {
};
static const char *valid_cpus[] = {
@ -132,6 +139,7 @@ index d633300fdc..dfcab40a73 100644
ARM_CPU_TYPE_NAME("cortex-a15"),
ARM_CPU_TYPE_NAME("cortex-a35"),
ARM_CPU_TYPE_NAME("cortex-a53"),
ARM_CPU_TYPE_NAME("cortex-a55"),
+#endif /* disabled for RHEL */
ARM_CPU_TYPE_NAME("cortex-a57"),
+#if 0 /* Disabled for Red Hat Enterprise Linux */
@ -143,7 +151,7 @@ index d633300fdc..dfcab40a73 100644
ARM_CPU_TYPE_NAME("host"),
ARM_CPU_TYPE_NAME("max"),
};
@@ -2290,6 +2336,7 @@ static void machvirt_init(MachineState *machine)
@@ -2339,6 +2385,7 @@ static void machvirt_init(MachineState *machine)
qemu_add_machine_init_done_notifier(&vms->machine_done);
}
@ -151,7 +159,7 @@ index d633300fdc..dfcab40a73 100644
static bool virt_get_secure(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2317,6 +2364,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp)
@@ -2366,6 +2413,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp)
vms->virt = value;
}
@ -159,7 +167,25 @@ index d633300fdc..dfcab40a73 100644
static bool virt_get_highmem(Object *obj, Error **errp)
{
@@ -2346,6 +2394,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp)
@@ -2380,7 +2428,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
vms->highmem = value;
}
-
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static bool virt_get_compact_highmem(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2436,7 +2484,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
vms->highmem_mmio = value;
}
-
+#endif /* disabled for RHEL */
static bool virt_get_its(Object *obj, Error **errp)
{
@@ -2452,6 +2500,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp)
vms->its = value;
}
@ -167,7 +193,7 @@ index d633300fdc..dfcab40a73 100644
static bool virt_get_dtb_randomness(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2359,6 +2408,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp)
@@ -2465,6 +2514,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp)
vms->dtb_randomness = value;
}
@ -175,7 +201,7 @@ index d633300fdc..dfcab40a73 100644
static char *virt_get_oem_id(Object *obj, Error **errp)
{
@@ -2442,6 +2492,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp)
@@ -2548,6 +2598,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp)
vms->ras = value;
}
@ -183,7 +209,7 @@ index d633300fdc..dfcab40a73 100644
static bool virt_get_mte(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2455,6 +2506,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp)
@@ -2561,6 +2612,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp)
vms->mte = value;
}
@ -191,7 +217,7 @@ index d633300fdc..dfcab40a73 100644
static char *virt_get_gic_version(Object *obj, Error **errp)
{
@@ -2886,6 +2938,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
@@ -2988,6 +3040,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
return fixed_ipa ? 0 : requested_pa_size;
}
@ -199,7 +225,7 @@ index d633300fdc..dfcab40a73 100644
static void virt_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -3294,3 +3347,185 @@ static void virt_machine_2_6_options(MachineClass *mc)
@@ -3441,3 +3494,195 @@ static void virt_machine_2_6_options(MachineClass *mc)
vmc->no_pmu = true;
}
DEFINE_VIRT_MACHINE(2, 6)
@ -312,6 +338,7 @@ index d633300fdc..dfcab40a73 100644
+
+ /* High memory is enabled by default */
+ vms->highmem = true;
+ vms->highmem_compact = !vmc->no_highmem_compact;
+ vms->gic_version = VIRT_GIC_VERSION_NOSEL;
+
+ vms->highmem_ecam = !vmc->no_highmem_ecam;
@ -374,22 +401,31 @@ index d633300fdc..dfcab40a73 100644
+}
+type_init(rhel_machine_init);
+
+static void rhel920_virt_options(MachineClass *mc)
+{
+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
+}
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
+
+static void rhel900_virt_options(MachineClass *mc)
+{
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
+ rhel920_virt_options(mc);
+
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
+
+ /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
+ vmc->no_tcg_lpa2 = true;
+ /* Compact layout for high memory regions was introduced with 9.2.0 */
+ vmc->no_highmem_compact = true;
+}
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0)
+DEFINE_RHEL_MACHINE(9, 0, 0)
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 6ec479ca2b..22b54ec510 100644
index e1ddbea96b..81c2363a40 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -180,9 +180,17 @@ struct VirtMachineState {
@@ -187,9 +187,17 @@ struct VirtMachineState {
#define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)
@ -407,8 +443,28 @@ index 6ec479ca2b..22b54ec510 100644
void virt_acpi_setup(VirtMachineState *vms);
bool virt_is_acpi_enabled(VirtMachineState *vms);
diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
index c8fa524002..3aa089abf3 100644
--- a/target/arm/arm-qmp-cmds.c
+++ b/target/arm/arm-qmp-cmds.c
@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
static void arm_cpu_add_definition(gpointer data, gpointer user_data)
{
ObjectClass *oc = data;
+ CPUClass *cc = CPU_CLASS(oc);
CpuDefinitionInfoList **cpu_list = user_data;
CpuDefinitionInfo *info;
const char *typename;
@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data)
info->name = g_strndup(typename,
strlen(typename) - strlen("-" TYPE_ARM_CPU));
info->q_typename = g_strdup(typename);
+ info->deprecated = !!cc->deprecation_note;
QAPI_LIST_PREPEND(*cpu_list, info);
}
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
index 64c44cef2d..82e97249bc 100644
index 514c22ced9..f789173451 100644
--- a/target/arm/cpu-qom.h
+++ b/target/arm/cpu-qom.h
@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo {
@ -420,10 +476,10 @@ index 64c44cef2d..82e97249bc 100644
void arm_cpu_register(const ARMCPUInfo *info);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 38d066c294..a845814bfb 100644
index 5182ed0c91..6740a8b940 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2250,8 +2250,13 @@ static void arm_cpu_instance_init(Object *obj)
@@ -2290,8 +2290,13 @@ static void arm_cpu_instance_init(Object *obj)
static void cpu_register_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
@ -438,7 +494,7 @@ index 38d066c294..a845814bfb 100644
void arm_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 9aeed3c848..f9f504d89e 100644
index c097cae988..829d4a2328 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -34,6 +34,8 @@
@ -451,10 +507,10 @@ index 9aeed3c848..f9f504d89e 100644
#define EXCP_SWI 2 /* software interrupt */
#define EXCP_PREFETCH_ABORT 3
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 3d74f134f5..4b330a52b5 100644
index 0fb07cc7b6..47459627fb 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -36,6 +36,7 @@
@@ -31,6 +31,7 @@
#include "hw/qdev-properties.h"
#include "internals.h"
@ -462,7 +518,7 @@ index 3d74f134f5..4b330a52b5 100644
static void aarch64_a35_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -115,6 +116,7 @@ static void aarch64_a35_initfn(Object *obj)
@@ -110,6 +111,7 @@ static void aarch64_a35_initfn(Object *obj)
/* These values are the same with A53/A57/A72. */
define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
@ -470,7 +526,7 @@ index 3d74f134f5..4b330a52b5 100644
void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
{
@@ -735,6 +737,7 @@ static void aarch64_a57_initfn(Object *obj)
@@ -730,6 +732,7 @@ static void aarch64_a57_initfn(Object *obj)
define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
@ -478,15 +534,15 @@ index 3d74f134f5..4b330a52b5 100644
static void aarch64_a53_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -1033,6 +1036,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj)
/* From D5.1 AArch64 PMU register summary */
cpu->isar.reset_pmcr_el0 = 0x410c3000;
@@ -1164,6 +1167,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj)
define_neoverse_n1_cp_reginfo(cpu);
}
+#endif /* disabled for RHEL */
static void aarch64_host_initfn(Object *obj)
{
@@ -1240,13 +1244,18 @@ static void aarch64_max_initfn(Object *obj)
@@ -1373,14 +1377,19 @@ static void aarch64_max_initfn(Object *obj)
}
static const ARMCPUInfo aarch64_cpus[] = {
@ -498,6 +554,7 @@ index 3d74f134f5..4b330a52b5 100644
+ .deprecation_note = RHEL_CPU_DEPRECATION },
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-a53", .initfn = aarch64_a53_initfn },
{ .name = "cortex-a55", .initfn = aarch64_a55_initfn },
{ .name = "cortex-a72", .initfn = aarch64_a72_initfn },
{ .name = "cortex-a76", .initfn = aarch64_a76_initfn },
{ .name = "a64fx", .initfn = aarch64_a64fx_initfn },
@ -506,7 +563,7 @@ index 3d74f134f5..4b330a52b5 100644
{ .name = "max", .initfn = aarch64_max_initfn },
#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
{ .name = "host", .initfn = aarch64_host_initfn },
@@ -1318,8 +1327,13 @@ static void aarch64_cpu_instance_init(Object *obj)
@@ -1452,8 +1461,13 @@ static void aarch64_cpu_instance_init(Object *obj)
static void cpu_register_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
@ -521,10 +578,10 @@ index 3d74f134f5..4b330a52b5 100644
void aarch64_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index a528ff9a3d..053f70e399 100644
index c154a4dcf2..f29425b656 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -148,10 +148,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
@@ -152,10 +152,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
}
#endif /* !CONFIG_USER_ONLY */
@ -536,7 +593,7 @@ index a528ff9a3d..053f70e399 100644
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
@@ -505,7 +505,6 @@ static void cortex_a9_initfn(Object *obj)
@@ -509,7 +509,6 @@ static void cortex_a9_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41093000;
define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
}
@ -544,7 +601,7 @@ index a528ff9a3d..053f70e399 100644
#ifndef CONFIG_USER_ONLY
static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -530,7 +529,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
@@ -534,7 +533,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
.access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
};
@ -552,7 +609,7 @@ index a528ff9a3d..053f70e399 100644
static void cortex_a7_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -579,7 +577,6 @@ static void cortex_a7_initfn(Object *obj)
@@ -583,7 +581,6 @@ static void cortex_a7_initfn(Object *obj)
cpu->isar.reset_pmcr_el0 = 0x41072000;
define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
}
@ -560,7 +617,7 @@ index a528ff9a3d..053f70e399 100644
static void cortex_a15_initfn(Object *obj)
{
@@ -628,7 +625,6 @@ static void cortex_a15_initfn(Object *obj)
@@ -632,7 +629,6 @@ static void cortex_a15_initfn(Object *obj)
define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
}
@ -568,7 +625,7 @@ index a528ff9a3d..053f70e399 100644
static void cortex_m0_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -1070,7 +1066,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
@@ -1115,7 +1111,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
cc->gdb_core_xml_file = "arm-m-profile.xml";
}
@ -576,7 +633,7 @@ index a528ff9a3d..053f70e399 100644
#ifndef TARGET_AARCH64
/*
@@ -1138,7 +1133,6 @@ static void arm_max_initfn(Object *obj)
@@ -1183,7 +1178,6 @@ static void arm_max_initfn(Object *obj)
#endif /* !TARGET_AARCH64 */
static const ARMCPUInfo arm_tcg_cpus[] = {
@ -584,7 +641,7 @@ index a528ff9a3d..053f70e399 100644
{ .name = "arm926", .initfn = arm926_initfn },
{ .name = "arm946", .initfn = arm946_initfn },
{ .name = "arm1026", .initfn = arm1026_initfn },
@@ -1154,9 +1148,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
@@ -1199,9 +1193,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "cortex-a7", .initfn = cortex_a7_initfn },
{ .name = "cortex-a8", .initfn = cortex_a8_initfn },
{ .name = "cortex-a9", .initfn = cortex_a9_initfn },
@ -594,7 +651,7 @@ index a528ff9a3d..053f70e399 100644
{ .name = "cortex-m0", .initfn = cortex_m0_initfn,
.class_init = arm_v7m_class_init },
{ .name = "cortex-m3", .initfn = cortex_m3_initfn,
@@ -1187,7 +1179,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
@@ -1233,7 +1225,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "pxa270-b1", .initfn = pxa270b1_initfn },
{ .name = "pxa270-c0", .initfn = pxa270c0_initfn },
{ .name = "pxa270-c5", .initfn = pxa270c5_initfn },
@ -602,36 +659,16 @@ index a528ff9a3d..053f70e399 100644
#ifndef TARGET_AARCH64
{ .name = "max", .initfn = arm_max_initfn },
#endif
@@ -1215,3 +1206,4 @@ static void arm_tcg_cpu_register_types(void)
@@ -1261,3 +1252,4 @@ static void arm_tcg_cpu_register_types(void)
type_init(arm_tcg_cpu_register_types)
#endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */
+#endif /* disabled for RHEL */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index d8c8223ec3..ad9d235773 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8476,6 +8476,7 @@ void arm_cpu_list(void)
static void arm_cpu_add_definition(gpointer data, gpointer user_data)
{
ObjectClass *oc = data;
+ CPUClass *cc = CPU_CLASS(oc);
CpuDefinitionInfoList **cpu_list = user_data;
CpuDefinitionInfo *info;
const char *typename;
@@ -8485,6 +8486,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data)
info->name = g_strndup(typename,
strlen(typename) - strlen("-" TYPE_ARM_CPU));
info->q_typename = g_strdup(typename);
+ info->deprecated = !!cc->deprecation_note;
QAPI_LIST_PREPEND(*cpu_list, info);
}
diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
index 5a14527386..a3579fc303 100644
index 1cb08138ad..834497dfec 100644
--- a/tests/qtest/arm-cpu-features.c
+++ b/tests/qtest/arm-cpu-features.c
@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data)
@@ -441,8 +441,10 @@ static void test_query_cpu_model_expansion(const void *data)
assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL);
/* Test expected feature presence/absence for some cpu types */
@ -642,7 +679,7 @@ index 5a14527386..a3579fc303 100644
/* Enabling and disabling pmu should always work. */
assert_has_feature_enabled(qts, "max", "pmu");
@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data)
@@ -459,6 +461,7 @@ static void test_query_cpu_model_expansion(const void *data)
assert_has_feature_enabled(qts, "cortex-a57", "pmu");
assert_has_feature_enabled(qts, "cortex-a57", "aarch64");
@ -650,7 +687,7 @@ index 5a14527386..a3579fc303 100644
assert_has_feature_enabled(qts, "a64fx", "pmu");
assert_has_feature_enabled(qts, "a64fx", "aarch64");
/*
@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data)
@@ -471,6 +474,7 @@ static void test_query_cpu_model_expansion(const void *data)
"{ 'sve384': true }");
assert_error(qts, "a64fx", "cannot enable sve640",
"{ 'sve640': true }");
@ -658,7 +695,7 @@ index 5a14527386..a3579fc303 100644
sve_tests_default(qts, "max");
pauth_tests_default(qts, "max");
@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
@@ -506,9 +510,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
QDict *resp;
char *error;
@ -671,5 +708,5 @@ index 5a14527386..a3579fc303 100644
assert_has_feature_enabled(qts, "host", "aarch64");
--
2.31.1
2.39.1

View File

@ -1,4 +1,4 @@
From 2c523f1b6c9470e1cd517ba99e414cde02727e16 Mon Sep 17 00:00:00 2001
From 401d0ebf1ee959fd944df6b5b4ae9c51c36d1244 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:27:13 +0200
Subject: Add ppc64 machine types
@ -34,10 +34,10 @@ Merged patches (7.1.0 rc0):
8 files changed, 314 insertions(+), 1 deletion(-)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 66b414d2e9..499eb49253 100644
index 4921198b9d..e24b3e22e3 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1633,6 +1633,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
@@ -1634,6 +1634,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
pef_kvm_reset(machine->cgs, &error_fatal);
spapr_caps_apply(spapr);
@ -47,7 +47,7 @@ index 66b414d2e9..499eb49253 100644
first_ppc_cpu = POWERPC_CPU(first_cpu);
if (kvm_enabled() && kvmppc_has_cap_mmu_radix() &&
@@ -3347,6 +3350,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
@@ -3348,6 +3351,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
spapr->host_serial = g_strdup(value);
}
@ -68,7 +68,7 @@ index 66b414d2e9..499eb49253 100644
static void spapr_instance_init(Object *obj)
{
SpaprMachineState *spapr = SPAPR_MACHINE(obj);
@@ -3425,6 +3442,12 @@ static void spapr_instance_init(Object *obj)
@@ -3426,6 +3443,12 @@ static void spapr_instance_init(Object *obj)
spapr_get_host_serial, spapr_set_host_serial);
object_property_set_description(obj, "host-serial",
"Host serial number to advertise in guest device tree");
@ -81,7 +81,7 @@ index 66b414d2e9..499eb49253 100644
}
static void spapr_machine_finalizefn(Object *obj)
@@ -4682,6 +4705,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
@@ -4683,6 +4706,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
vmc->client_architecture_support = spapr_vof_client_architecture_support;
vmc->quiesce = spapr_vof_quiesce;
vmc->setprop = spapr_vof_setprop;
@ -89,15 +89,15 @@ index 66b414d2e9..499eb49253 100644
}
static const TypeInfo spapr_machine_info = {
@@ -4733,6 +4757,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
@@ -4734,6 +4758,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
} \
type_init(spapr_machine_register_##suffix)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
/*
* pseries-7.2
* pseries-8.0
*/
@@ -4882,6 +4907,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc)
@@ -4894,6 +4919,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc)
}
DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
@ -105,7 +105,7 @@ index 66b414d2e9..499eb49253 100644
/*
* pseries-4.0
@@ -4901,6 +4927,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
@@ -4913,6 +4939,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
*nv2atsd = 0;
return true;
}
@ -114,7 +114,7 @@ index 66b414d2e9..499eb49253 100644
static void spapr_machine_4_0_class_options(MachineClass *mc)
{
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
@@ -5228,6 +5256,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc)
@@ -5240,6 +5268,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc)
compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len);
}
DEFINE_SPAPR_MACHINE(2_1, "2.1", false);
@ -375,10 +375,10 @@ index fcb5dfe792..ab8fb5bf62 100644
qdev_unrealize(DEVICE(cpu));
return false;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 04a95669ab..d5f4cf5e03 100644
index 5c8aabd444..04489d5808 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -154,6 +154,7 @@ struct SpaprMachineClass {
@@ -155,6 +155,7 @@ struct SpaprMachineClass {
bool pre_5_2_numa_associativity;
bool pre_6_2_numa_affinity;
@ -386,7 +386,7 @@ index 04a95669ab..d5f4cf5e03 100644
bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
uint64_t *buid, hwaddr *pio,
hwaddr *mmio32, hwaddr *mmio64,
@@ -256,6 +257,9 @@ struct SpaprMachineState {
@@ -257,6 +258,9 @@ struct SpaprMachineState {
/* Set by -boot */
char *boot_device;
@ -434,10 +434,10 @@ index cd3ff700ac..1cb49c8087 100644
{ "405cr", "405crc" },
{ "405gp", "405gpd" },
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 81d4263a07..508fbed90b 100644
index 557d736dab..6646ec1c27 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1467,6 +1467,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch)
@@ -1482,6 +1482,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch)
/* Compatibility modes */
#if defined(TARGET_PPC64)
@ -446,10 +446,10 @@ index 81d4263a07..508fbed90b 100644
uint32_t min_compat_pvr, uint32_t max_compat_pvr);
bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr,
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 7c25348b7b..83671c955f 100644
index 78f6fc50cd..68d06c3f8f 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -89,6 +89,7 @@ static int cap_ppc_nested_kvm_hv;
@@ -88,6 +88,7 @@ static int cap_ppc_nested_kvm_hv;
static int cap_large_decr;
static int cap_fwnmi;
static int cap_rpt_invalidate;
@ -457,7 +457,7 @@ index 7c25348b7b..83671c955f 100644
static uint32_t debug_inst_opcode;
@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
@@ -135,6 +136,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
kvmppc_get_cpu_characteristics(s);
cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
@ -465,7 +465,7 @@ index 7c25348b7b..83671c955f 100644
cap_large_decr = kvmppc_get_dec_bits();
cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI);
/*
@@ -2570,6 +2572,16 @@ int kvmppc_has_cap_rpt_invalidate(void)
@@ -2569,6 +2571,16 @@ int kvmppc_has_cap_rpt_invalidate(void)
return cap_rpt_invalidate;
}
@ -482,7 +482,7 @@ index 7c25348b7b..83671c955f 100644
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
{
uint32_t host_pvr = mfpvr();
@@ -2970,3 +2982,18 @@ bool kvm_arch_cpu_check_are_resettable(void)
@@ -2969,3 +2981,18 @@ bool kvm_arch_cpu_check_are_resettable(void)
void kvm_arch_accel_class_init(ObjectClass *oc)
{
}
@ -502,10 +502,10 @@ index 7c25348b7b..83671c955f 100644
+ }
+}
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index ee9325bf9a..20dbb95989 100644
index 5fd9753953..b5ebfe2be0 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
@@ -43,6 +43,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
bool radix, bool gtse,
uint64_t proc_tbl);
@ -513,7 +513,7 @@ index ee9325bf9a..20dbb95989 100644
#ifndef CONFIG_USER_ONLY
bool kvmppc_spapr_use_multitce(void);
int kvmppc_spapr_enable_inkernel_multitce(void);
@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void);
@@ -77,6 +78,8 @@ int kvmppc_get_cap_large_decr(void);
int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
int kvmppc_has_cap_rpt_invalidate(void);
int kvmppc_enable_hwrng(void);
@ -522,7 +522,7 @@ index ee9325bf9a..20dbb95989 100644
int kvmppc_put_books_sregs(PowerPCCPU *cpu);
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
void kvmppc_check_papr_resize_hpt(Error **errp);
@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void)
@@ -396,6 +399,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void)
return false;
}
@ -540,5 +540,5 @@ index ee9325bf9a..20dbb95989 100644
{
return -1;
--
2.31.1
2.39.1

View File

@ -1,4 +1,4 @@
From 1973257ed781a93943f27f1518933e8c09c50f88 Mon Sep 17 00:00:00 2001
From 3c7647197729fcd76e219070c6f359bb3667d04d Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:47:32 +0200
Subject: Add s390x machine types
@ -30,45 +30,72 @@ Merged patches (7.1.0 rc0):
Merged patches (7.2.0 rc0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
- 27c188c6a4 redhat: Update s390x machine type compatibility for QEMU 7.2.0 update
- a932b8d429 redhat: Add new rhel-9.2.0 s390x machine type
- ac88104bad s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0
---
hw/s390x/s390-virtio-ccw.c | 108 +++++++++++++++++++++++++++++++
target/s390x/cpu_models.c | 11 ++++
hw/s390x/s390-virtio-ccw.c | 143 +++++++++++++++++++++++++++++++
target/s390x/cpu_models.c | 11 +++
target/s390x/cpu_models.h | 2 +
target/s390x/cpu_models_sysemu.c | 2 +
4 files changed, 123 insertions(+)
4 files changed, 158 insertions(+)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 2e64ffab45..8d5221fbb1 100644
index 503f212a31..dcd3b966b0 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -823,6 +823,7 @@ bool css_migration_enabled(void)
@@ -826,6 +826,7 @@ bool css_migration_enabled(void)
} \
type_init(ccw_machine_register_##suffix)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void ccw_machine_7_2_instance_options(MachineState *machine)
static void ccw_machine_8_0_instance_options(MachineState *machine)
{
}
@@ -1186,6 +1187,113 @@ static void ccw_machine_2_4_class_options(MachineClass *mc)
@@ -1201,6 +1202,148 @@ static void ccw_machine_2_4_class_options(MachineClass *mc)
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
}
DEFINE_CCW_MACHINE(2_4, "2.4", false);
+#endif
+
+
+static void ccw_machine_rhel920_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_rhel920_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
+
+static void ccw_machine_rhel900_instance_options(MachineState *machine)
+{
+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
+
+ ccw_machine_rhel920_instance_options(machine);
+
+ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
+}
+
+static void ccw_machine_rhel900_class_options(MachineClass *mc)
+{
+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "interpret", "off", },
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
+ };
+
+ ccw_machine_rhel920_class_options(mc);
+
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
+ s390mc->max_threads = S390_MAX_CPUS;
+}
+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false);
+
+static void ccw_machine_rhel860_instance_options(MachineState *machine)
+{
@ -78,7 +105,14 @@ index 2e64ffab45..8d5221fbb1 100644
+
+static void ccw_machine_rhel860_class_options(MachineClass *mc)
+{
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "interpret", "on", },
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", },
+ };
+
+ ccw_machine_rhel900_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+
+ /* All RHEL machines for prior major releases are deprecated */
+ mc->deprecation_reason = rhel_old_machine_deprecation;
@ -102,8 +136,14 @@ index 2e64ffab45..8d5221fbb1 100644
+
+static void ccw_machine_rhel850_class_options(MachineClass *mc)
+{
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "interpret", "off", },
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
+ };
+
+ ccw_machine_rhel860_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+ mc->smp_props.prefer_sockets = true;
+}
+DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false);
@ -164,10 +204,10 @@ index 2e64ffab45..8d5221fbb1 100644
static void ccw_machine_register_types(void)
{
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index c3a4f80633..739770dc15 100644
index 457b5cb10c..ff6b9463cb 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -45,6 +45,9 @@
@@ -46,6 +46,9 @@
* of a following release have been a superset of the previous release. With
* generation 15 one base feature and one optional feature have been deprecated.
*/
@ -177,7 +217,7 @@ index c3a4f80633..739770dc15 100644
static S390CPUDef s390_cpu_defs[] = {
CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"),
CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"),
@@ -854,22 +857,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data)
@@ -857,22 +860,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data)
static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data)
{
S390CPUClass *xcc = S390_CPU_CLASS(oc);
@ -222,7 +262,7 @@ index fb1adc8b21..d76745afa9 100644
/* CPU model based on a CPU definition */
diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c
index d086b1c39c..1b9cc66405 100644
index 87a4480c05..28c1b0486c 100644
--- a/target/s390x/cpu_models_sysemu.c
+++ b/target/s390x/cpu_models_sysemu.c
@@ -60,6 +60,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque)
@ -242,5 +282,5 @@ index d086b1c39c..1b9cc66405 100644
if (cpu_list_data->model) {
Object *obj;
--
2.31.1
2.39.1

View File

@ -1,4 +1,4 @@
From 0935624ccdddc286d6eeeb0c1b70d78983c21aa2 Mon Sep 17 00:00:00 2001
From 510291040cb280e1f68b793a84ec0f7d1c88aafa Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:10:31 +0200
Subject: Add x86_64 machine types
@ -13,6 +13,9 @@ Rebase notes (6.1.0):
Rebase notes (7.0.0):
- Reset alias for all machine-types except latest one
Rebase notes (8.0.0-rc1):
- remove legacy_no_rng_seed usage (removed upstream)
Merged patches (6.1.0):
- 59c284ad3b x86: Add x86 rhel8.5 machine types
- a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default
@ -39,24 +42,26 @@ Merged patches (7.1.0 rc0):
Merged patches (7.2.0 rc0):
- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts)
Merged patches (8.0.0-rc1):
- f33ca8aed4 x86: rhel 9.2.0 machine type
---
hw/i386/pc.c | 147 ++++++++++++++++++++++-
hw/i386/pc_piix.c | 86 +++++++++++++-
hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++++++++++-
hw/s390x/s390-virtio-ccw.c | 1 +
hw/i386/pc.c | 147 +++++++++++++++++++++-
hw/i386/pc_piix.c | 86 ++++++++++++-
hw/i386/pc_q35.c | 252 ++++++++++++++++++++++++++++++++++++-
include/hw/boards.h | 2 +
include/hw/i386/pc.h | 27 +++++
include/hw/i386/pc.h | 27 ++++
target/i386/cpu.c | 21 ++++
target/i386/kvm/kvm-cpu.c | 1 +
target/i386/kvm/kvm.c | 4 +
tests/qtest/pvpanic-test.c | 5 +-
10 files changed, 521 insertions(+), 7 deletions(-)
9 files changed, 538 insertions(+), 7 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 546b703cb4..c7b1350e64 100644
index 1489abf010..8abb1f872e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -393,6 +393,149 @@ GlobalProperty pc_compat_1_4[] = {
@@ -407,6 +407,149 @@ GlobalProperty pc_compat_1_4[] = {
};
const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4);
@ -206,7 +211,7 @@ index 546b703cb4..c7b1350e64 100644
GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled)
{
GSIState *s;
@@ -1907,6 +2050,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
@@ -1944,6 +2087,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->pvh_enabled = true;
pcmc->kvmclock_create_always = true;
assert(!mc->get_hotplug_handler);
@ -214,7 +219,7 @@ index 546b703cb4..c7b1350e64 100644
mc->get_hotplug_handler = pc_get_hotplug_handler;
mc->hotplug_allowed = pc_hotplug_allowed;
mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
@@ -1917,7 +2061,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
@@ -1954,7 +2098,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
mc->has_hotpluggable_cpus = true;
mc->default_boot_order = "cad";
mc->block_default_type = IF_IDE;
@ -225,10 +230,10 @@ index 546b703cb4..c7b1350e64 100644
mc->wakeup = pc_machine_wakeup;
hc->pre_plug = pc_machine_device_pre_plug_cb;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0985ff67d2..173a1fd10b 100644
index 14a794081e..3e330fd36f 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -53,6 +53,7 @@
@@ -54,6 +54,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "sysemu/xen.h"
@ -236,7 +241,7 @@ index 0985ff67d2..173a1fd10b 100644
#ifdef CONFIG_XEN
#include <xen/hvm/hvm_info_table.h>
#include "hw/xen/xen_pt.h"
@@ -184,8 +185,8 @@ static void pc_init1(MachineState *machine,
@@ -198,8 +199,8 @@ static void pc_init1(MachineState *machine,
if (pcmc->smbios_defaults) {
MachineClass *mc = MACHINE_GET_CLASS(machine);
/* These values are guest ABI, do not change */
@ -247,7 +252,7 @@ index 0985ff67d2..173a1fd10b 100644
pcmc->smbios_uuid_encoded,
pcmc->smbios_stream_product,
pcmc->smbios_stream_version,
@@ -334,6 +335,7 @@ static void pc_init1(MachineState *machine,
@@ -351,6 +352,7 @@ static void pc_init1(MachineState *machine,
* hw_compat_*, pc_compat_*, or * pc_*_machine_options().
*/
@ -255,7 +260,7 @@ index 0985ff67d2..173a1fd10b 100644
static void pc_compat_2_3_fn(MachineState *machine)
{
X86MachineState *x86ms = X86_MACHINE(machine);
@@ -896,3 +898,83 @@ static void xenfv_3_1_machine_options(MachineClass *m)
@@ -899,3 +901,83 @@ static void xenfv_3_1_machine_options(MachineClass *m)
DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init,
xenfv_3_1_machine_options);
#endif
@ -304,7 +309,7 @@ index 0985ff67d2..173a1fd10b 100644
+ pcmc->kvmclock_create_always = false;
+ /* From pc_i440fx_5_1_machine_options() */
+ pcmc->pci_root_uid = 1;
+ pcmc->legacy_no_rng_seed = true;
+ pcmc->enforce_amd_1tb_hole = false;
+ compat_props_add(m->compat_props, hw_compat_rhel_9_1,
+ hw_compat_rhel_9_1_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_9_0,
@ -340,10 +345,10 @@ index 0985ff67d2..173a1fd10b 100644
+DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760,
+ pc_machine_rhel760_options);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index ea582254e3..97c3630021 100644
index dc0ba5f9e7..98601bb76f 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine)
@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine)
if (pcmc->smbios_defaults) {
/* These values are guest ABI, do not change */
@ -354,7 +359,7 @@ index ea582254e3..97c3630021 100644
pcmc->smbios_uuid_encoded,
pcmc->smbios_stream_product,
pcmc->smbios_stream_version,
@@ -352,6 +352,7 @@ static void pc_q35_init(MachineState *machine)
@@ -354,6 +354,7 @@ static void pc_q35_init(MachineState *machine)
DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn)
@ -362,7 +367,7 @@ index ea582254e3..97c3630021 100644
static void pc_q35_machine_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
@@ -666,3 +667,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
@@ -663,3 +664,250 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL,
pc_q35_2_4_machine_options);
@ -391,6 +396,23 @@ index ea582254e3..97c3630021 100644
+ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len);
+}
+
+static void pc_q35_init_rhel920(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel920_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel_options(m);
+ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
+ pcmc->smbios_stream_product = "RHEL";
+ pcmc->smbios_stream_version = "9.2.0";
+}
+
+DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
+ pc_q35_machine_rhel920_options);
+
+static void pc_q35_init_rhel900(MachineState *machine)
+{
+ pc_q35_init(machine);
@ -399,11 +421,12 @@ index ea582254e3..97c3630021 100644
+static void pc_q35_machine_rhel900_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel_options(m);
+ pc_q35_machine_rhel920_options(m);
+ m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
+ pcmc->smbios_stream_product = "RHEL";
+ pcmc->smbios_stream_version = "9.0.0";
+ pcmc->legacy_no_rng_seed = true;
+ pcmc->enforce_amd_1tb_hole = false;
+ compat_props_add(m->compat_props, hw_compat_rhel_9_1,
+ hw_compat_rhel_9_1_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_9_0,
@ -595,23 +618,11 @@ index ea582254e3..97c3630021 100644
+
+DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760,
+ pc_q35_machine_rhel760_options);
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 8d5221fbb1..ba640e3d9e 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1213,6 +1213,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine)
static void ccw_machine_rhel860_class_options(MachineClass *mc)
{
ccw_machine_rhel900_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len);
/* All RHEL machines for prior major releases are deprecated */
mc->deprecation_reason = rhel_old_machine_deprecation;
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 2209d4e416..fd75f551b1 100644
index c5a965d27f..5e7446ee40 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -266,6 +266,8 @@ struct MachineClass {
@@ -268,6 +268,8 @@ struct MachineClass {
strList *allowed_dynamic_sysbus_devices;
bool auto_enable_numa_with_memhp;
bool auto_enable_numa_with_memdev;
@ -621,12 +632,12 @@ index 2209d4e416..fd75f551b1 100644
bool smbus_no_migration_support;
bool nvdimm_supported;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 3754eaa97d..4266fe2fdb 100644
index 908a275736..4376f64a47 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_5_len;
extern GlobalProperty pc_compat_1_4[];
extern const size_t pc_compat_1_4_len;
@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_4_len;
int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
+extern GlobalProperty pc_rhel_compat[];
+extern const size_t pc_rhel_compat_len;
@ -659,10 +670,10 @@ index 3754eaa97d..4266fe2fdb 100644
static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \
{ \
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 22b681ca37..f7c526cbe6 100644
index 6576287e5b..0ef2bf1b93 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1832,9 +1832,13 @@ static const CPUCaches epyc_milan_cache_info = {
@@ -1834,9 +1834,13 @@ static const CPUCaches epyc_milan_cache_info = {
* PT in VMX operation
*/
@ -676,7 +687,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 0xd,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -1855,6 +1859,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -1857,6 +1861,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "phenom",
@ -684,7 +695,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 16,
@@ -1887,6 +1892,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -1889,6 +1894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "core2duo",
@ -692,7 +703,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -1929,6 +1935,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -1931,6 +1937,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "kvm64",
@ -700,7 +711,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 0xd,
.vendor = CPUID_VENDOR_INTEL,
.family = 15,
@@ -1970,6 +1977,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -1972,6 +1979,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "qemu32",
@ -708,7 +719,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 4,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -1984,6 +1992,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -1986,6 +1994,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "kvm32",
@ -716,7 +727,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 5,
.vendor = CPUID_VENDOR_INTEL,
.family = 15,
@@ -2014,6 +2023,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2016,6 +2025,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "coreduo",
@ -724,7 +735,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2047,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2049,6 +2059,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "486",
@ -732,7 +743,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 1,
.vendor = CPUID_VENDOR_INTEL,
.family = 4,
@@ -2059,6 +2070,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2061,6 +2072,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium",
@ -740,7 +751,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 1,
.vendor = CPUID_VENDOR_INTEL,
.family = 5,
@@ -2071,6 +2083,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2073,6 +2085,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium2",
@ -748,7 +759,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 2,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2083,6 +2096,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2085,6 +2098,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium3",
@ -756,7 +767,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 3,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2095,6 +2109,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2097,6 +2111,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "athlon",
@ -764,7 +775,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 2,
.vendor = CPUID_VENDOR_AMD,
.family = 6,
@@ -2110,6 +2125,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2112,6 +2127,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "n270",
@ -772,7 +783,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2135,6 +2151,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2137,6 +2153,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Conroe",
@ -780,7 +791,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2175,6 +2192,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -2177,6 +2194,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Penryn",
@ -788,7 +799,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -3762,6 +3780,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -3893,6 +3911,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G1",
@ -796,7 +807,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -3782,6 +3801,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -3913,6 +3932,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G2",
@ -804,7 +815,7 @@ index 22b681ca37..f7c526cbe6 100644
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -3804,6 +3824,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
@@ -3935,6 +3955,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G3",
@ -825,10 +836,10 @@ index 7237378a7d..7b8a3d5af0 100644
};
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a213209379..81526a1575 100644
index de531842f6..8d82304609 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3707,6 +3707,7 @@ static int kvm_get_msrs(X86CPU *cpu)
@@ -3822,6 +3822,7 @@ static int kvm_get_msrs(X86CPU *cpu)
struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries;
int ret, i;
uint64_t mtrr_top_bits;
@ -836,7 +847,7 @@ index a213209379..81526a1575 100644
kvm_msr_buf_reset(cpu);
@@ -4062,6 +4063,9 @@ static int kvm_get_msrs(X86CPU *cpu)
@@ -4177,6 +4178,9 @@ static int kvm_get_msrs(X86CPU *cpu)
break;
case MSR_KVM_ASYNC_PF_EN:
env->async_pf_en_msr = msrs[i].data;
@ -847,7 +858,7 @@ index a213209379..81526a1575 100644
case MSR_KVM_ASYNC_PF_INT:
env->async_pf_int_msr = msrs[i].data;
diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c
index bc7b7dfc39..96e6dee3a1 100644
index 78f1cf8186..ac954c9b06 100644
--- a/tests/qtest/pvpanic-test.c
+++ b/tests/qtest/pvpanic-test.c
@@ -17,7 +17,7 @@ static void test_panic_nopause(void)
@ -870,5 +881,5 @@ index bc7b7dfc39..96e6dee3a1 100644
val = qtest_inb(qts, 0x505);
g_assert_cmpuint(val, ==, 3);
--
2.31.1
2.39.1

View File

@ -1,4 +1,4 @@
From badfb1290c8eea8a2e1769b2392c7899d5077698 Mon Sep 17 00:00:00 2001
From 738db8353055eb6fd902513949c6659af8b401d0 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 2 Sep 2020 09:39:41 +0200
Subject: Enable make check
@ -27,28 +27,37 @@ Rebase changes (7.0.0):
Rebase changes (7.1.0 rc0):
- Disable bcm2835-dma-test (added upstream)
Rebase changes (8.0.0-rc1):
- Removed chunks for disabling bios-table-test (protected upstream)
Rebase change (8.0.0-rc2):
- Disable new qemu-iotests execution
- Revert change in tco qtest (blocking test run)
Merged patches (6.1.0):
- 2f129df7d3 redhat: Enable the 'test-block-iothread' test again
Merged patches (7.1.0 rc0):
- 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57
---
.distro/qemu-kvm.spec.template | 5 ++---
tests/avocado/replay_kernel.py | 2 +-
tests/avocado/reverse_debugging.py | 2 +-
tests/avocado/tcg_plugins.py | 6 +++---
tests/qtest/fuzz-e1000e-test.c | 2 +-
tests/qtest/fuzz-virtio-scsi-test.c | 2 +-
tests/qtest/intel-hda-test.c | 2 +-
tests/qtest/libqos/meson.build | 2 +-
tests/qtest/lpc-ich9-test.c | 2 +-
tests/qtest/meson.build | 7 +------
tests/qtest/usb-hcd-xhci-test.c | 4 ++++
tests/qtest/virtio-net-failover.c | 1 +
12 files changed, 18 insertions(+), 19 deletions(-)
.distro/qemu-kvm.spec.template | 4 ++--
tests/avocado/replay_kernel.py | 2 +-
tests/avocado/reverse_debugging.py | 2 +-
tests/avocado/tcg_plugins.py | 6 ++---
tests/qemu-iotests/meson.build | 34 ++++++++++++++---------------
tests/qtest/fuzz-e1000e-test.c | 2 +-
tests/qtest/fuzz-virtio-scsi-test.c | 2 +-
tests/qtest/intel-hda-test.c | 2 +-
tests/qtest/libqos/meson.build | 2 +-
tests/qtest/lpc-ich9-test.c | 2 +-
tests/qtest/meson.build | 2 --
tests/qtest/tco-test.c | 2 +-
tests/qtest/usb-hcd-xhci-test.c | 4 ++++
tests/qtest/virtio-net-failover.c | 1 +
14 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index 00a26e4a0c..fe5ecf238a 100644
index f13456e1ec..2fee270a42 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -147,7 +147,7 @@ def test_aarch64_virt(self):
@ -61,10 +70,10 @@ index 00a26e4a0c..fe5ecf238a 100644
kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
'/linux/releases/29/Everything/aarch64/os/images/pxeboot'
diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py
index d2921e70c3..66d185ed42 100644
index 680c314cfc..71eccb8fb6 100644
--- a/tests/avocado/reverse_debugging.py
+++ b/tests/avocado/reverse_debugging.py
@@ -198,7 +198,7 @@ def test_aarch64_virt(self):
@@ -206,7 +206,7 @@ def test_aarch64_virt(self):
"""
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
@ -104,6 +113,49 @@ index 642d2e49e3..93b3afd823 100644
"""
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build
index 9735071a29..32002335f4 100644
--- a/tests/qemu-iotests/meson.build
+++ b/tests/qemu-iotests/meson.build
@@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats
check: true,
)
- foreach item: rc.stdout().strip().split()
- args = [qemu_iotests_check_cmd,
- '-tap', '-' + format, item,
- '--source-dir', meson.current_source_dir(),
- '--build-dir', meson.current_build_dir()]
- # Some individual tests take as long as 45 seconds
- # Bump the timeout to 3 minutes for some headroom
- # on slow machines to minimize spurious failures
- test('io-' + format + '-' + item,
- python,
- args: args,
- depends: qemu_iotests_binaries,
- env: qemu_iotests_env,
- protocol: 'tap',
- timeout: 180,
- suite: suites)
- endforeach
+# foreach item: rc.stdout().strip().split()
+# args = [qemu_iotests_check_cmd,
+# '-tap', '-' + format, item,
+# '--source-dir', meson.current_source_dir(),
+# '--build-dir', meson.current_build_dir()]
+# # Some individual tests take as long as 45 seconds
+# # Bump the timeout to 3 minutes for some headroom
+# # on slow machines to minimize spurious failures
+# test('io-' + format + '-' + item,
+# python,
+# args: args,
+# depends: qemu_iotests_binaries,
+# env: qemu_iotests_env,
+# protocol: 'tap',
+# timeout: 180,
+# suite: suites)
+# endforeach
endforeach
diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c
index 5052883fb6..b5286f4b12 100644
--- a/tests/qtest/fuzz-e1000e-test.c
@ -144,10 +196,10 @@ index d4a8db6fd6..1a796ec15a 100644
qtest_outl(s, 0xcf8, 0x80000804);
diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build
index 32f028872c..1e78a1a055 100644
index cc209a8de5..42a7c529c9 100644
--- a/tests/qtest/libqos/meson.build
+++ b/tests/qtest/libqos/meson.build
@@ -43,7 +43,7 @@ libqos_srcs = files(
@@ -44,7 +44,7 @@ libqos_srcs = files(
'virtio-rng.c',
'virtio-scsi.c',
'virtio-serial.c',
@ -170,18 +222,10 @@ index 8ac95b89f7..cd2102555c 100644
qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index c07a5b1a5f..9df3f9f8b9 100644
index 85ea4e8d99..893afc8eeb 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -82,7 +82,6 @@ qtests_i386 = \
config_all_devices.has_key('CONFIG_Q35') and \
config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \
slirp.found() ? ['virtio-net-failover'] : []) + \
- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \
qtests_pci + \
qtests_cxl + \
['fdc-test',
@@ -96,7 +95,6 @@ qtests_i386 = \
@@ -94,7 +94,6 @@ qtests_i386 = \
'drive_del-test',
'tco-test',
'cpu-plug-test',
@ -189,24 +233,7 @@ index c07a5b1a5f..9df3f9f8b9 100644
'vmgenid-test',
'migration-test',
'test-x86-cpuid-compat',
@@ -209,15 +207,13 @@ qtests_arm = \
# TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional
qtests_aarch64 = \
- (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) + \
(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \
(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \
(config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \
['arm-cpu-features',
'numa-test',
'boot-serial-test',
- 'migration-test',
- 'bcm2835-dma-test']
+ 'migration-test']
qtests_s390x = \
(slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \
@@ -225,7 +221,6 @@ qtests_s390x = \
@@ -223,7 +222,6 @@ qtests_s390x = \
(config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \
['boot-serial-test',
'drive_del-test',
@ -214,6 +241,19 @@ index c07a5b1a5f..9df3f9f8b9 100644
'virtio-ccw-test',
'cpu-plug-test',
'migration-test']
diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c
index 0547d41173..3756ce82d8 100644
--- a/tests/qtest/tco-test.c
+++ b/tests/qtest/tco-test.c
@@ -60,7 +60,7 @@ static void test_init(TestData *d)
QTestState *qs;
qs = qtest_initf("-machine q35 %s %s",
- d->noreboot ? "-global ICH9-LPC.noreboot=true" : "",
+ d->noreboot ? "" : "-global ICH9-LPC.noreboot=false",
!d->args ? "" : d->args);
qtest_irq_intercept_in(qs, "ioapic");
diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c
index 10ef9d2a91..3855873050 100644
--- a/tests/qtest/usb-hcd-xhci-test.c
@ -257,5 +297,5 @@ index 4a809590bf..1bf3fa641c 100644
"-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 "
--
2.31.1
2.39.1

View File

@ -1,4 +1,4 @@
From 0804844e4755377be6d2ebad578794ad9f4f3f31 Mon Sep 17 00:00:00 2001
From 34cb4f7ddd762ec46ed1a6a4261aebde39360ca4 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Tue, 3 Dec 2013 20:05:13 +0100
Subject: vfio: cap number of devices that can be assigned
@ -32,7 +32,7 @@ Signed-off-by: Bandan Das <bsd@redhat.com>
2 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 939dcc3d4a..acbc6673ce 100644
index ec9a854361..a779053be3 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -48,6 +48,9 @@
@ -77,7 +77,7 @@ index 939dcc3d4a..acbc6673ce 100644
if (!vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
~vdev->host.slot || ~vdev->host.function)) {
@@ -3293,6 +3317,9 @@ static Property vfio_pci_dev_properties[] = {
@@ -3294,6 +3318,9 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice,
no_geforce_quirks, false),
@ -88,7 +88,7 @@ index 939dcc3d4a..acbc6673ce 100644
false),
DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 7c236a52f4..7b7d036a8f 100644
index 177abcc8fb..45235d38ba 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -140,6 +140,7 @@ struct VFIOPCIDevice {
@ -100,5 +100,5 @@ index 7c236a52f4..7b7d036a8f 100644
uint32_t device_id;
uint32_t sub_vendor_id;
--
2.31.1
2.39.1

View File

@ -1,4 +1,4 @@
From 283a0e258dc2f3b83c58e6f948bafe430cd2c1d5 Mon Sep 17 00:00:00 2001
From 8964a3e8835992442902d35b011a708787366d82 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 4 Dec 2013 18:53:17 +0100
Subject: Add support statement to -help output
@ -21,7 +21,7 @@ Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
1 file changed, 9 insertions(+)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 5115221efe..17188df528 100644
index ea20b23e4c..ad4173138d 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -834,9 +834,17 @@ static void version(void)
@ -51,5 +51,5 @@ index 5115221efe..17188df528 100644
}
--
2.31.1
2.39.1

View File

@ -1,4 +1,4 @@
From d8ded821aa698b3b03bd9089fbd6c2b33da87b9e Mon Sep 17 00:00:00 2001
From 0b72d348fa0714de641ee242e5cee97df006e8fd Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 8 Jul 2020 08:35:50 +0200
Subject: Use qemu-kvm in documentation instead of qemu-system-<arch>
@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644
.. |I2C| replace:: I\ :sup:`2`\ C
.. |I2S| replace:: I\ :sup:`2`\ S
diff --git a/qemu-options.hx b/qemu-options.hx
index 7f99d15b23..ea02ca3a45 100644
index 59bdf67a2c..52b49f1f6a 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3300,11 +3300,11 @@ SRST
@@ -3296,11 +3296,11 @@ SRST
::
@ -57,5 +57,5 @@ index 7f99d15b23..ea02ca3a45 100644
``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
Establish a vhost-vdpa netdev.
--
2.31.1
2.39.1

View File

@ -1,60 +0,0 @@
From 9c6acadb444c9300d7c18b6939ce4f96484aeacc Mon Sep 17 00:00:00 2001
From: David Gibson <dgibson@redhat.com>
Date: Wed, 6 Feb 2019 03:58:56 +0000
Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts
RH-Author: David Gibson <dgibson@redhat.com>
Message-id: <20190206035856.19058-1-dgibson@redhat.com>
Patchwork-id: 84246
O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts
Bugzilla: 1653590
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
RH-Acked-by: Serhii Popovych <spopovyc@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
Most current POWER guests require 64kiB page support, so that's the default
for the cap-hpt-max-pagesize option in qemu which limits available guest
page sizes. We warn if the value is set smaller than that, but don't
outright fail upstream, because we need to allow for the possibility of
guest (and/or host) kernels configured for 4kiB page sizes.
Downstream, however, we simply don't support 4kiB pagesize configured
kernels in guest or host, so we can have qemu simply error out in this
situation.
Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified
it failed immediately with a qemu error
Signed-off-by: David Gibson <dgibson@redhat.com>
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
---
hw/ppc/spapr_caps.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index b4283055c1..59b88aadff 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
uint8_t val, Error **errp)
{
+#if 0 /* disabled for RHEL */
if (val < 12) {
error_setg(errp, "Require at least 4kiB hpt-max-page-size");
return;
} else if (val < 16) {
warn_report("Many guests require at least 64kiB hpt-max-page-size");
}
+#else /* Only page sizes >=64kiB supported for RHEL */
+ if (val < 16) {
+ error_setg(errp, "Require at least 64kiB hpt-max-page-size");
+ return;
+ }
+#endif
spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
}
--
2.31.1

View File

@ -1,4 +1,4 @@
From 02fde2a0cbd679ebd4104fe5522572c31ec23abd Mon Sep 17 00:00:00 2001
From bd6bcebfd783fa49e283d035d378fb5240423d84 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 20 Aug 2021 18:25:12 +0200
Subject: qcow2: Deprecation warning when opening v2 images rw
@ -44,10 +44,10 @@ Rebase notes (6.1.0):
2 files changed, 7 insertions(+)
diff --git a/block/qcow2.c b/block/qcow2.c
index 4d6666d3ff..d2ba263e9d 100644
index 30fd53fa64..22084730f9 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
@@ -1337,6 +1337,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
ret = -ENOTSUP;
goto fail;
}
@ -61,7 +61,7 @@ index 4d6666d3ff..d2ba263e9d 100644
s->qcow_version = header.version;
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index cc9f1a5891..6a13757177 100644
index 6b32c7fbfa..6ddda2ee64 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -83,6 +83,7 @@ _filter_qemu()
@ -73,5 +73,5 @@ index cc9f1a5891..6a13757177 100644
}
--
2.31.1
2.39.1

View File

@ -1,7 +1,7 @@
From 48f45171b89b8ed24f2b2484d63b00ea7818b5c3 Mon Sep 17 00:00:00 2001
From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001
From: Kfir Manor <kfir@daynix.com>
Date: Sun, 22 Jan 2023 17:33:07 +0200
Subject: [PATCH 9/9] qga/linux: add usb support to guest-get-fsinfo
Subject: qga/linux: add usb support to guest-get-fsinfo
RH-Author: Kostiantyn Kostiuk <kkostiuk@redhat.com>
RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo
@ -16,15 +16,19 @@ Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.co
Signed-off-by: Kfir Manor <kfir@daynix.com>
Reviewed-by: Konstantin Kostiuk <kkostiuk@redhat.com>
Signed-off-by: Konstantin Kostiuk <kkostiuk@redhat.com>
Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch
Patch-id: 72
Patch-present-in-specfile: True
---
qga/commands-posix.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 32493d6383..f1b2b87c13 100644
index 079689d79a..97754930c1 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -877,7 +877,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
g_str_equal(driver, "sym53c8xx") ||
g_str_equal(driver, "virtio-pci") ||
g_str_equal(driver, "ahci") ||
@ -35,7 +39,7 @@ index 32493d6383..f1b2b87c13 100644
break;
}
@@ -974,6 +976,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
}
} else if (strcmp(driver, "nvme") == 0) {
disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
@ -45,5 +49,5 @@ index 32493d6383..f1b2b87c13 100644
g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
goto cleanup;
--
2.31.1
2.39.1

View File

@ -0,0 +1,110 @@
From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 15 Feb 2023 02:03:17 -0500
Subject: Add RHEL 9.2.0 compat structure
Adding compatibility bits necessary to keep 9.2.0 machine
types same after rebase to 8.0.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
Rebase notes (8.0.0 rc4):
- Added migration.x-preempt-pre-7-2 compat)
---
hw/arm/virt.c | 1 +
hw/core/machine.c | 10 ++++++++++
hw/i386/pc_piix.c | 2 ++
hw/i386/pc_q35.c | 3 +++
hw/s390x/s390-virtio-ccw.c | 1 +
include/hw/boards.h | 3 +++
6 files changed, 20 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 1ae1654be5..9be53e9355 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init);
static void rhel920_virt_options(MachineClass *mc)
{
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
}
DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 5aa567fad3..0e0120b7f2 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
const char *rhel_old_machine_deprecation =
"machine types for previous major releases are deprecated";
+GlobalProperty hw_compat_rhel_9_2[] = {
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "e1000e", "migrate-timadj", "off" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "virtio-mem", "x-early-migration", "false" },
+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */
+ { "migration", "x-preempt-pre-7-2", "true" },
+};
+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2);
+
/*
* Mostly the same as hw_compat_7_0
*/
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 3e330fd36f..90fb6e2e03 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
/* From pc_i440fx_5_1_machine_options() */
pcmc->pci_root_uid = 1;
pcmc->enforce_amd_1tb_hole = false;
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
+ hw_compat_rhel_9_2_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
hw_compat_rhel_9_1_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 98601bb76f..8945b69175 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
pcmc->smbios_stream_product = "RHEL";
pcmc->smbios_stream_version = "9.2.0";
+
+ compat_props_add(m->compat_props, hw_compat_rhel_9_2,
+ hw_compat_rhel_9_2_len);
}
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index dcd3b966b0..6a0b93c63d 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine)
static void ccw_machine_rhel920_class_options(MachineClass *mc)
{
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len);
}
DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 5e7446ee40..5f08bd7550 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
+extern GlobalProperty hw_compat_rhel_9_2[];
+extern const size_t hw_compat_rhel_9_2_len;
+
extern GlobalProperty hw_compat_rhel_9_1[];
extern const size_t hw_compat_rhel_9_1_len;
--
2.39.1

View File

@ -1,26 +0,0 @@
From 21ed34787b9492c2cfe3d8fc12a32748bcf02307 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 9 Nov 2022 07:08:32 -0500
Subject: Addd 7.2 compat bits for RHEL 9.1 machine type
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/core/machine.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 9edec1ca05..3d851d34da 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -54,6 +54,8 @@ GlobalProperty hw_compat_rhel_9_1[] = {
{ "arm-gicv3-common", "force-8-bit-prio", "on" },
/* hw_compat_rhel_9_1 from hw_compat_7_0 */
{ "nvme-ns", "eui64-default", "on"},
+ /* hw_compat_rhel_9_1 from hw_compat_7_1 */
+ { "virtio-device", "queue_reset", "false" },
};
const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1);
--
2.31.1

View File

@ -0,0 +1,76 @@
From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 27 Mar 2023 15:14:03 +0200
Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU
8.0.0 update
Add pc_rhel_9_2_compat based on upstream pc_compat_7_2.
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
hw/i386/pc.c | 6 ++++++
hw/i386/pc_piix.c | 2 ++
hw/i386/pc_q35.c | 2 ++
include/hw/i386/pc.h | 3 +++
4 files changed, 13 insertions(+)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8abb1f872e..f216922cee 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = {
};
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
+GlobalProperty pc_rhel_9_2_compat[] = {
+ /* pc_rhel_9_2_compat from pc_compat_7_2 */
+ { "ICH9-LPC", "noreboot", "true" },
+};
+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat);
+
GlobalProperty pc_rhel_9_0_compat[] = {
/* pc_rhel_9_0_compat from pc_compat_6_2 */
{ "virtio-mem", "unplugged-inaccessible", "off" },
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 90fb6e2e03..fc704d783f 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
hw_compat_rhel_9_2_len);
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
+ pc_rhel_9_2_compat_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
hw_compat_rhel_9_1_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 8945b69175..e97655616a 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m)
compat_props_add(m->compat_props, hw_compat_rhel_9_2,
hw_compat_rhel_9_2_len);
+ compat_props_add(m->compat_props, pc_rhel_9_2_compat,
+ pc_rhel_9_2_compat_len);
}
DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 4376f64a47..d218ad1628 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
extern GlobalProperty pc_rhel_compat[];
extern const size_t pc_rhel_compat_len;
+extern GlobalProperty pc_rhel_9_2_compat[];
+extern const size_t pc_rhel_9_2_compat_len;
+
extern GlobalProperty pc_rhel_9_0_compat[];
extern const size_t pc_rhel_9_0_compat_len;
--
2.39.1

View File

@ -0,0 +1,83 @@
From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Mon, 17 Apr 2023 01:24:18 -0400
Subject: Disable unwanted new devices
QEMU 8.0 adds two new device we do not want to support that can't
be disabled using configure switch.
1) ide-cf - virtual CompactFlash card
2) i2c-echo - testing echo device
Use manual disabling of the device by changing code (1) and meson configs (2).
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/ide/qdev.c | 9 +++++++++
hw/misc/meson.build | 3 ++-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 1b3b4da01d..454bfa5783 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp)
ide_dev_initfn(dev, IDE_CD, errp);
}
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
static void ide_cf_realize(IDEDevice *dev, Error **errp)
{
ide_dev_initfn(dev, IDE_CFATA, errp);
}
+#endif
#define DEFINE_IDE_DEV_PROPERTIES() \
DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \
@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = {
.class_init = ide_cd_class_init,
};
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
static Property ide_cf_properties[] = {
DEFINE_IDE_DEV_PROPERTIES(),
DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf),
@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = {
.instance_size = sizeof(IDEDrive),
.class_init = ide_cf_class_init,
};
+#endif
static void ide_device_class_init(ObjectClass *klass, void *data)
{
@@ -396,7 +402,10 @@ static void ide_register_types(void)
type_register_static(&ide_bus_info);
type_register_static(&ide_hd_info);
type_register_static(&ide_cd_info);
+/* Disabled for Red Hat Enterprise Linux */
+#if 0
type_register_static(&ide_cf_info);
+#endif
type_register_static(&ide_device_type_info);
}
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index a40245ad44..9cc5a61ed7 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c'))
softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c'))
-softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
+# Disabled for Red Hat Enterprise Linux
+# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c'))
specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c'))
--
2.39.1

View File

@ -1,47 +0,0 @@
From 27c188c6a4cbd908269cf06affd24025708ecb5c Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Thu, 17 Nov 2022 16:47:16 +0100
Subject: redhat: Update s390x machine type compatibility for QEMU 7.2.0 update
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2143585
Upstream Status: n/a (rhel-only)
Add the compatibility handling for the rebase from QEMU 7.1 to 7.2,
i.e. the settings from ccw_machine_7_1_class_options() and
ccw_machine_7_1_instance_options() to the rhel9.1.0 machine type
(earlier settings have been added by previous rebases already).
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
hw/s390x/s390-virtio-ccw.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index ba640e3d9e..97e868ada0 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1195,12 +1195,21 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine)
static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
}
static void ccw_machine_rhel900_class_options(MachineClass *mc)
{
+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "interpret", "off", },
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
+ };
+
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
+ s390mc->max_threads = S390_MAX_CPUS;
}
DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
--
2.31.1

View File

@ -1,43 +0,0 @@
From c1a21266d8bed27f1ef1f705818fde5f9350b73f Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Wed, 23 Nov 2022 14:15:37 +0100
Subject: redhat: aarch64: add rhel9.2.0 virt machine type
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2131982
Upstream: RHEL only
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
hw/arm/virt.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index dfcab40a73..0a94f31dd1 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3518,14 +3518,21 @@ static void rhel_machine_init(void)
}
type_init(rhel_machine_init);
+static void rhel920_virt_options(MachineClass *mc)
+{
+}
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
+
static void rhel900_virt_options(MachineClass *mc)
{
VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+ rhel920_virt_options(mc);
+
compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
vmc->no_tcg_lpa2 = true;
}
-DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0)
+DEFINE_RHEL_MACHINE(9, 0, 0)
--
2.31.1

View File

@ -1,62 +0,0 @@
From a932b8d4296066be01613ada84241b501488f99f Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Thu, 17 Nov 2022 17:03:24 +0100
Subject: redhat: Add new rhel-9.2.0 s390x machine type
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2136473
Upstream Status: n/a (rhel-only)
RHEL 9.2 will be an EUS release - we want to have a new machine
type here to make sure that we have a spot where we can wire up
fixes later.
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
hw/s390x/s390-virtio-ccw.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 97e868ada0..aa142a1a4e 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1190,10 +1190,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false);
#endif
+static void ccw_machine_rhel920_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_rhel920_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true);
+
static void ccw_machine_rhel900_instance_options(MachineState *machine)
{
static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
+ ccw_machine_rhel920_instance_options(machine);
+
s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE);
}
@@ -1206,12 +1217,14 @@ static void ccw_machine_rhel900_class_options(MachineClass *mc)
{ TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
};
+ ccw_machine_rhel920_class_options(mc);
+
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
s390mc->max_threads = S390_MAX_CPUS;
}
-DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false);
static void ccw_machine_rhel860_instance_options(MachineState *machine)
{
--
2.31.1

View File

@ -1,75 +0,0 @@
From f33ca8aed4744238230f1f2cc47df77aa4c9e0ac Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Thu, 17 Nov 2022 12:36:30 +0000
Subject: x86: rhel 9.2.0 machine type
Add a 9.2.0 x86 machine type, and fix up the compatibility
for 9.0.0 and older.
pc_compat_7_1 and pc_compat_7_0 are both empty upstream so there's
nothing to do there.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
hw/i386/pc_piix.c | 1 +
hw/i386/pc_q35.c | 21 ++++++++++++++++++++-
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 173a1fd10b..fc06877344 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -944,6 +944,7 @@ static void pc_machine_rhel760_options(MachineClass *m)
/* From pc_i440fx_5_1_machine_options() */
pcmc->pci_root_uid = 1;
pcmc->legacy_no_rng_seed = true;
+ pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
hw_compat_rhel_9_1_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 97c3630021..52cfe3bf45 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -692,6 +692,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m)
compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len);
}
+static void pc_q35_init_rhel920(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel920_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel_options(m);
+ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)";
+ pcmc->smbios_stream_product = "RHEL";
+ pcmc->smbios_stream_version = "9.2.0";
+}
+
+DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920,
+ pc_q35_machine_rhel920_options);
+
static void pc_q35_init_rhel900(MachineState *machine)
{
pc_q35_init(machine);
@@ -700,11 +717,13 @@ static void pc_q35_init_rhel900(MachineState *machine)
static void pc_q35_machine_rhel900_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- pc_q35_machine_rhel_options(m);
+ pc_q35_machine_rhel920_options(m);
m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
pcmc->smbios_stream_product = "RHEL";
pcmc->smbios_stream_version = "9.0.0";
pcmc->legacy_no_rng_seed = true;
+ pcmc->enforce_amd_1tb_hole = false;
compat_props_add(m->compat_props, hw_compat_rhel_9_1,
hw_compat_rhel_9_1_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_0,
--
2.31.1

View File

@ -1,82 +0,0 @@
From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Mon, 16 Jan 2023 07:17:23 -0500
Subject: [PATCH 30/31] KVM: keep track of running ioctls
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 138: accel: introduce accelerator blocker API
RH-Bugzilla: 1979276
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
commit a27dd2de68f37ba96fe164a42121daa5f0750afc
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Fri Nov 11 10:47:57 2022 -0500
KVM: keep track of running ioctls
Using the new accel-blocker API, mark where ioctls are being called
in KVM. Next, we will implement the critical section that will take
care of performing memslots modifications atomically, therefore
preventing any new ioctl from running and allowing the running ones
to finish.
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Message-Id: <20221111154758.1372674-3-eesposit@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
accel/kvm/kvm-all.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f99b0becd8..ff660fd469 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms)
assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
s->sigmask_len = 8;
+ accel_blocker_init();
#ifdef KVM_CAP_SET_GUEST_DEBUG
QTAILQ_INIT(&s->kvm_sw_breakpoints);
@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
va_end(ap);
trace_kvm_vm_ioctl(type, arg);
+ accel_ioctl_begin();
ret = ioctl(s->vmfd, type, arg);
+ accel_ioctl_end();
if (ret == -1) {
ret = -errno;
}
@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
va_end(ap);
trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
+ accel_cpu_ioctl_begin(cpu);
ret = ioctl(cpu->kvm_fd, type, arg);
+ accel_cpu_ioctl_end(cpu);
if (ret == -1) {
ret = -errno;
}
@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...)
va_end(ap);
trace_kvm_device_ioctl(fd, type, arg);
+ accel_ioctl_begin();
ret = ioctl(fd, type, arg);
+ accel_ioctl_end();
if (ret == -1) {
ret = -errno;
}
--
2.31.1

View File

@ -1,140 +0,0 @@
From 0c19fb7c4a22a30830152b224b2e66963f829a7a Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Thu, 19 Jan 2023 18:24:24 +0100
Subject: [PATCH 19/20] Revert "vhost-user: Introduce nested event loop in
vhost_user_read()"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Laurent Vivier <lvivier@redhat.com>
RH-MergeRequest: 146: Fix vhost-user with dpdk
RH-Bugzilla: 2155173
RH-Acked-by: Cindy Lu <lulu@redhat.com>
RH-Acked-by: Greg Kurz (RH) <gkurz@redhat.com>
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
RH-Commit: [2/2] 9b67041f92f29f70b7ccb41d8087801e4e4e38af (lvivier/qemu-kvm-centos)
This reverts commit a7f523c7d114d445c5d83aecdba3efc038e5a692.
The nested event loop is broken by design. It's only user was removed.
Drop the code as well so that nobody ever tries to use it again.
I had to fix a couple of trivial conflicts around return values because
of 025faa872bcf ("vhost-user: stick to -errno error return convention").
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <20230119172424.478268-3-groug@kaod.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
(cherry picked from commit 4382138f642f69fdbc79ebf4e93d84be8061191f)
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
hw/virtio/vhost-user.c | 65 ++++--------------------------------------
1 file changed, 5 insertions(+), 60 deletions(-)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 0ac00eb901..7cb49c50f9 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -305,19 +305,8 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
return 0;
}
-struct vhost_user_read_cb_data {
- struct vhost_dev *dev;
- VhostUserMsg *msg;
- GMainLoop *loop;
- int ret;
-};
-
-static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
- gpointer opaque)
+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
{
- struct vhost_user_read_cb_data *data = opaque;
- struct vhost_dev *dev = data->dev;
- VhostUserMsg *msg = data->msg;
struct vhost_user *u = dev->opaque;
CharBackend *chr = u->user->chr;
uint8_t *p = (uint8_t *) msg;
@@ -325,8 +314,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
r = vhost_user_read_header(dev, msg);
if (r < 0) {
- data->ret = r;
- goto end;
+ return r;
}
/* validate message size is sane */
@@ -334,8 +322,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
error_report("Failed to read msg header."
" Size %d exceeds the maximum %zu.", msg->hdr.size,
VHOST_USER_PAYLOAD_SIZE);
- data->ret = -EPROTO;
- goto end;
+ return -EPROTO;
}
if (msg->hdr.size) {
@@ -346,53 +333,11 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
int saved_errno = errno;
error_report("Failed to read msg payload."
" Read %d instead of %d.", r, msg->hdr.size);
- data->ret = r < 0 ? -saved_errno : -EIO;
- goto end;
+ return r < 0 ? -saved_errno : -EIO;
}
}
-end:
- g_main_loop_quit(data->loop);
- return G_SOURCE_REMOVE;
-}
-
-static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
-{
- struct vhost_user *u = dev->opaque;
- CharBackend *chr = u->user->chr;
- GMainContext *prev_ctxt = chr->chr->gcontext;
- GMainContext *ctxt = g_main_context_new();
- GMainLoop *loop = g_main_loop_new(ctxt, FALSE);
- struct vhost_user_read_cb_data data = {
- .dev = dev,
- .loop = loop,
- .msg = msg,
- .ret = 0
- };
-
- /*
- * We want to be able to monitor the slave channel fd while waiting
- * for chr I/O. This requires an event loop, but we can't nest the
- * one to which chr is currently attached : its fd handlers might not
- * be prepared for re-entrancy. So we create a new one and switch chr
- * to use it.
- */
- qemu_chr_be_update_read_handlers(chr->chr, ctxt);
- qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
-
- g_main_loop_run(loop);
-
- /*
- * Restore the previous event loop context. This also destroys/recreates
- * event sources : this guarantees that all pending events in the original
- * context that have been processed by the nested loop are purged.
- */
- qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
-
- g_main_loop_unref(loop);
- g_main_context_unref(ctxt);
-
- return data.ret;
+ return 0;
}
static int process_message_reply(struct vhost_dev *dev,
--
2.31.1

View File

@ -1,143 +0,0 @@
From 9fb47ad317ad8cdda9960190d499ad6c3a9817f0 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Thu, 19 Jan 2023 18:24:23 +0100
Subject: [PATCH 18/20] Revert "vhost-user: Monitor slave channel in
vhost_user_read()"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Laurent Vivier <lvivier@redhat.com>
RH-MergeRequest: 146: Fix vhost-user with dpdk
RH-Bugzilla: 2155173
RH-Acked-by: Cindy Lu <lulu@redhat.com>
RH-Acked-by: Greg Kurz (RH) <gkurz@redhat.com>
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
RH-Commit: [1/2] c583a7f121ca9c93c9a2ad17bf0ccf5c1241dc99 (lvivier/qemu-kvm-centos)
This reverts commit db8a3772e300c1a656331a92da0785d81667dc81.
Motivation : this is breaking vhost-user with DPDK as reported in [0].
Received unexpected msg type. Expected 22 received 40
Fail to update device iotlb
Received unexpected msg type. Expected 40 received 22
Received unexpected msg type. Expected 22 received 11
Fail to update device iotlb
Received unexpected msg type. Expected 11 received 22
vhost VQ 1 ring restore failed: -71: Protocol error (71)
Received unexpected msg type. Expected 22 received 11
Fail to update device iotlb
Received unexpected msg type. Expected 11 received 22
vhost VQ 0 ring restore failed: -71: Protocol error (71)
unable to start vhost net: 71: falling back on userspace virtio
The failing sequence that leads to the first error is :
- QEMU sends a VHOST_USER_GET_STATUS (40) request to DPDK on the master
socket
- QEMU starts a nested event loop in order to wait for the
VHOST_USER_GET_STATUS response and to be able to process messages from
the slave channel
- DPDK sends a couple of legitimate IOTLB miss messages on the slave
channel
- QEMU processes each IOTLB request and sends VHOST_USER_IOTLB_MSG (22)
updates on the master socket
- QEMU assumes to receive a response for the latest VHOST_USER_IOTLB_MSG
but it gets the response for the VHOST_USER_GET_STATUS instead
The subsequent errors have the same root cause : the nested event loop
breaks the order by design. It lures QEMU to expect responses to the
latest message sent on the master socket to arrive first.
Since this was only needed for DAX enablement which is still not merged
upstream, just drop the code for now. A working solution will have to
be merged later on. Likely protect the master socket with a mutex
and service the slave channel with a separate thread, as discussed with
Maxime in the mail thread below.
[0] https://lore.kernel.org/qemu-devel/43145ede-89dc-280e-b953-6a2b436de395@redhat.com/
Reported-by: Yanghang Liu <yanghliu@redhat.com>
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155173
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <20230119172424.478268-2-groug@kaod.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Maxime Coquelin <maxime.coquelin@redhat.com>
(cherry picked from commit f340a59d5a852d75ae34555723694c7e8eafbd0c)
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
hw/virtio/vhost-user.c | 35 +++--------------------------------
1 file changed, 3 insertions(+), 32 deletions(-)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 8f635844af..0ac00eb901 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -356,35 +356,6 @@ end:
return G_SOURCE_REMOVE;
}
-static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
- gpointer opaque);
-
-/*
- * This updates the read handler to use a new event loop context.
- * Event sources are removed from the previous context : this ensures
- * that events detected in the previous context are purged. They will
- * be re-detected and processed in the new context.
- */
-static void slave_update_read_handler(struct vhost_dev *dev,
- GMainContext *ctxt)
-{
- struct vhost_user *u = dev->opaque;
-
- if (!u->slave_ioc) {
- return;
- }
-
- if (u->slave_src) {
- g_source_destroy(u->slave_src);
- g_source_unref(u->slave_src);
- }
-
- u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
- G_IO_IN | G_IO_HUP,
- slave_read, dev, NULL,
- ctxt);
-}
-
static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
{
struct vhost_user *u = dev->opaque;
@@ -406,7 +377,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
* be prepared for re-entrancy. So we create a new one and switch chr
* to use it.
*/
- slave_update_read_handler(dev, ctxt);
qemu_chr_be_update_read_handlers(chr->chr, ctxt);
qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
@@ -418,7 +388,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
* context that have been processed by the nested loop are purged.
*/
qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
- slave_update_read_handler(dev, NULL);
g_main_loop_unref(loop);
g_main_context_unref(ctxt);
@@ -1802,7 +1771,9 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev)
return -ECONNREFUSED;
}
u->slave_ioc = ioc;
- slave_update_read_handler(dev, NULL);
+ u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
+ G_IO_IN | G_IO_HUP,
+ slave_read, dev, NULL, NULL);
if (reply_supported) {
msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
--
2.31.1

View File

@ -1,348 +0,0 @@
From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Mon, 16 Jan 2023 07:16:41 -0500
Subject: [PATCH 29/31] accel: introduce accelerator blocker API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 138: accel: introduce accelerator blocker API
RH-Bugzilla: 1979276
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1
Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Fri Nov 11 10:47:56 2022 -0500
accel: introduce accelerator blocker API
This API allows the accelerators to prevent vcpus from issuing
new ioctls while execting a critical section marked with the
accel_ioctl_inhibit_begin/end functions.
Note that all functions submitting ioctls must mark where the
ioctl is being called with accel_{cpu_}ioctl_begin/end().
This API requires the caller to always hold the BQL.
API documentation is in sysemu/accel-blocker.h
Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt
(to minimize cache line bouncing) to keep avoid that new ioctls
run when the critical section starts, and a QemuEvent to wait
that all running ioctls finish.
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-Id: <20221111154758.1372674-2-eesposit@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts:
util/meson.build: "interval-tree.c" does not exist
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++
accel/meson.build | 2 +-
hw/core/cpu-common.c | 2 +
include/hw/core/cpu.h | 3 +
include/sysemu/accel-blocker.h | 56 ++++++++++++
util/meson.build | 2 +-
6 files changed, 217 insertions(+), 2 deletions(-)
create mode 100644 accel/accel-blocker.c
create mode 100644 include/sysemu/accel-blocker.h
diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c
new file mode 100644
index 0000000000..1e7f423462
--- /dev/null
+++ b/accel/accel-blocker.c
@@ -0,0 +1,154 @@
+/*
+ * Lock to inhibit accelerator ioctls
+ *
+ * Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+#include "hw/core/cpu.h"
+#include "sysemu/accel-blocker.h"
+
+static QemuLockCnt accel_in_ioctl_lock;
+static QemuEvent accel_in_ioctl_event;
+
+void accel_blocker_init(void)
+{
+ qemu_lockcnt_init(&accel_in_ioctl_lock);
+ qemu_event_init(&accel_in_ioctl_event, false);
+}
+
+void accel_ioctl_begin(void)
+{
+ if (likely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
+ qemu_lockcnt_inc(&accel_in_ioctl_lock);
+}
+
+void accel_ioctl_end(void)
+{
+ if (likely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ qemu_lockcnt_dec(&accel_in_ioctl_lock);
+ /* change event to SET. If event was BUSY, wake up all waiters */
+ qemu_event_set(&accel_in_ioctl_event);
+}
+
+void accel_cpu_ioctl_begin(CPUState *cpu)
+{
+ if (unlikely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */
+ qemu_lockcnt_inc(&cpu->in_ioctl_lock);
+}
+
+void accel_cpu_ioctl_end(CPUState *cpu)
+{
+ if (unlikely(qemu_mutex_iothread_locked())) {
+ return;
+ }
+
+ qemu_lockcnt_dec(&cpu->in_ioctl_lock);
+ /* change event to SET. If event was BUSY, wake up all waiters */
+ qemu_event_set(&accel_in_ioctl_event);
+}
+
+static bool accel_has_to_wait(void)
+{
+ CPUState *cpu;
+ bool needs_to_wait = false;
+
+ CPU_FOREACH(cpu) {
+ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) {
+ /* exit the ioctl, if vcpu is running it */
+ qemu_cpu_kick(cpu);
+ needs_to_wait = true;
+ }
+ }
+
+ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock);
+}
+
+void accel_ioctl_inhibit_begin(void)
+{
+ CPUState *cpu;
+
+ /*
+ * We allow to inhibit only when holding the BQL, so we can identify
+ * when an inhibitor wants to issue an ioctl easily.
+ */
+ g_assert(qemu_mutex_iothread_locked());
+
+ /* Block further invocations of the ioctls outside the BQL. */
+ CPU_FOREACH(cpu) {
+ qemu_lockcnt_lock(&cpu->in_ioctl_lock);
+ }
+ qemu_lockcnt_lock(&accel_in_ioctl_lock);
+
+ /* Keep waiting until there are running ioctls */
+ while (true) {
+
+ /* Reset event to FREE. */
+ qemu_event_reset(&accel_in_ioctl_event);
+
+ if (accel_has_to_wait()) {
+ /*
+ * If event is still FREE, and there are ioctls still in progress,
+ * wait.
+ *
+ * If an ioctl finishes before qemu_event_wait(), it will change
+ * the event state to SET. This will prevent qemu_event_wait() from
+ * blocking, but it's not a problem because if other ioctls are
+ * still running the loop will iterate once more and reset the event
+ * status to FREE so that it can wait properly.
+ *
+ * If an ioctls finishes while qemu_event_wait() is blocking, then
+ * it will be waken up, but also here the while loop makes sure
+ * to re-enter the wait if there are other running ioctls.
+ */
+ qemu_event_wait(&accel_in_ioctl_event);
+ } else {
+ /* No ioctl is running */
+ return;
+ }
+ }
+}
+
+void accel_ioctl_inhibit_end(void)
+{
+ CPUState *cpu;
+
+ qemu_lockcnt_unlock(&accel_in_ioctl_lock);
+ CPU_FOREACH(cpu) {
+ qemu_lockcnt_unlock(&cpu->in_ioctl_lock);
+ }
+}
+
diff --git a/accel/meson.build b/accel/meson.build
index 259c35c4c8..061332610f 100644
--- a/accel/meson.build
+++ b/accel/meson.build
@@ -1,4 +1,4 @@
-specific_ss.add(files('accel-common.c'))
+specific_ss.add(files('accel-common.c', 'accel-blocker.c'))
softmmu_ss.add(files('accel-softmmu.c'))
user_ss.add(files('accel-user.c'))
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index f9fdd46b9d..8d6a4b1b65 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj)
cpu->nr_threads = 1;
qemu_mutex_init(&cpu->work_mutex);
+ qemu_lockcnt_init(&cpu->in_ioctl_lock);
QSIMPLEQ_INIT(&cpu->work_list);
QTAILQ_INIT(&cpu->breakpoints);
QTAILQ_INIT(&cpu->watchpoints);
@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj)
{
CPUState *cpu = CPU(obj);
+ qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
qemu_mutex_destroy(&cpu->work_mutex);
}
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 8830546121..2417597236 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -398,6 +398,9 @@ struct CPUState {
uint32_t kvm_fetch_index;
uint64_t dirty_pages;
+ /* Use by accel-block: CPU is executing an ioctl() */
+ QemuLockCnt in_ioctl_lock;
+
/* Used for events with 'vcpu' and *without* the 'disabled' properties */
DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
new file mode 100644
index 0000000000..72020529ef
--- /dev/null
+++ b/include/sysemu/accel-blocker.h
@@ -0,0 +1,56 @@
+/*
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
+ * running ones finish.
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
+ * release the BQL.
+ *
+ * Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef ACCEL_BLOCKER_H
+#define ACCEL_BLOCKER_H
+
+#include "qemu/osdep.h"
+#include "sysemu/cpus.h"
+
+extern void accel_blocker_init(void);
+
+/*
+ * accel_{cpu_}ioctl_begin/end:
+ * Mark when ioctl is about to run or just finished.
+ *
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
+ * called, preventing new ioctls to run. They will continue only after
+ * accel_ioctl_inibith_end().
+ */
+extern void accel_ioctl_begin(void);
+extern void accel_ioctl_end(void);
+extern void accel_cpu_ioctl_begin(CPUState *cpu);
+extern void accel_cpu_ioctl_end(CPUState *cpu);
+
+/*
+ * accel_ioctl_inhibit_begin: start critical section
+ *
+ * This function makes sure that:
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
+ * 2) wait that all ioctls that were already running reach
+ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
+ *
+ * This allows the caller to access shared data or perform operations without
+ * worrying of concurrent vcpus accesses.
+ */
+extern void accel_ioctl_inhibit_begin(void);
+
+/*
+ * accel_ioctl_inhibit_end: end critical section started by
+ * accel_ioctl_inhibit_begin()
+ *
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
+ */
+extern void accel_ioctl_inhibit_end(void);
+
+#endif /* ACCEL_BLOCKER_H */
diff --git a/util/meson.build b/util/meson.build
index 25b9b61f98..85a5504c4d 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c'))
util_ss.add(files('yank.c'))
util_ss.add(files('int128.c'))
util_ss.add(files('memalign.c'))
+util_ss.add(files('lockcnt.c'))
if have_user
util_ss.add(files('selfmap.c'))
@@ -71,7 +72,6 @@ endif
if have_block or have_ga
util_ss.add(files('aiocb.c', 'async.c'))
util_ss.add(files('base64.c'))
- util_ss.add(files('lockcnt.c'))
util_ss.add(files('main-loop.c'))
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND'])))
--
2.31.1

View File

@ -1,58 +0,0 @@
From ab68e13b7628f2348d41a4518a92508542af712f Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Fri, 3 Feb 2023 18:15:10 +0100
Subject: [PATCH 05/20] accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 144: accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
RH-Bugzilla: 2165280
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Gavin Shan <gshan@redhat.com>
RH-Acked-by: Shaoqin Huang <None>
RH-Commit: [1/1] 5b0863c34ba06c01c4e343d1ecd72402779c7de3 (eauger1/centos-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/2165280
Upstream: yes
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=50530041
Test: 'kvm unit test ./run_tests.sh -g debug' does not SIGSEV anymore
After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization
before registration"), it looks the CPUJumpCache pointer can be NULL.
This causes a SIGSEV when running debug-wp-migration kvm unit test.
At the first place it should be clarified why this TCG code is called
with KVM acceleration. This may hide another bug.
Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration")
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
(cherry picked from commit 99ab4d500af638ba3ebb20e8aa89d72201b70860)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
accel/tcg/cputlb.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 6f1c00682b..4244b0e4e3 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -100,9 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
{
- int i, i0 = tb_jmp_cache_hash_page(page_addr);
CPUJumpCache *jc = cpu->tb_jmp_cache;
+ int i, i0;
+ if (unlikely(!jc)) {
+ return;
+ }
+
+ i0 = tb_jmp_cache_hash_page(page_addr);
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
qatomic_set(&jc->array[i0 + i].tb, NULL);
}
--
2.31.1

View File

@ -1,50 +0,0 @@
From e9a9c0b023ae0dcbb14543b74063cca931d8230f Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 08/12] aio-wait: switch to smp_mb__after_rmw()
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [5/9] a90c96d148fdbec340a45dc6cedf3660d8be2aab (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit b532526a07ef3b903ead2e055fe6cc87b41057a3
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri Mar 3 11:03:52 2023 +0100
aio-wait: switch to smp_mb__after_rmw()
The barrier comes after an atomic increment, so it is enough to use
smp_mb__after_rmw(); this avoids a double barrier on x86 systems.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
include/block/aio-wait.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index dd9a7f6461..da13357bb8 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -85,7 +85,7 @@ extern AioWait global_aio_wait;
/* Increment wait_->num_waiters before evaluating cond. */ \
qatomic_inc(&wait_->num_waiters); \
/* Paired with smp_mb in aio_wait_kick(). */ \
- smp_mb(); \
+ smp_mb__after_rmw(); \
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
while ((cond)) { \
aio_poll(ctx_, true); \
--
2.39.1

View File

@ -1,66 +0,0 @@
From 3d823dda6832b76fd3d776131008107b0b0f7166 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 12/12] async: clarify usage of barriers in the polling case
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [9/9] b4ea298d75a75bb61e07a27d1296e0095fbc2bbf (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit 6229438cca037d42f44a96d38feb15cb102a444f
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon Mar 6 10:43:52 2023 +0100
async: clarify usage of barriers in the polling case
Explain that aio_context_notifier_poll() relies on
aio_notify_accept() to catch all the memory writes that were
done before ctx->notified was set to true.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
util/async.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/util/async.c b/util/async.c
index 37d3e6036d..e0846baf93 100644
--- a/util/async.c
+++ b/util/async.c
@@ -472,8 +472,9 @@ void aio_notify_accept(AioContext *ctx)
qatomic_set(&ctx->notified, false);
/*
- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb
- * in aio_notify.
+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the
+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs
+ * with smp_wmb() in aio_notify.
*/
smp_mb();
}
@@ -496,6 +497,11 @@ static bool aio_context_notifier_poll(void *opaque)
EventNotifier *e = opaque;
AioContext *ctx = container_of(e, AioContext, notifier);
+ /*
+ * No need for load-acquire because we just want to kick the
+ * event loop. aio_notify_accept() takes care of synchronizing
+ * the event loop with the producers.
+ */
return qatomic_read(&ctx->notified);
}
--
2.39.1

View File

@ -1,111 +0,0 @@
From 29bcf843d796ffc2a0906dea947e4cdfe9f7ec60 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 11/12] async: update documentation of the memory barriers
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [8/9] 5ca20e4c8983e0bc1ecee66bead3472777abe4d1 (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit 8dd48650b43dfde4ebea34191ac267e474bcc29e
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon Mar 6 10:15:06 2023 +0100
async: update documentation of the memory barriers
Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)",
2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll()
is happening when the bottom half is enqueued in the bh_list; not
when the flags are set. Update the documentation to match.
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
util/async.c | 33 +++++++++++++++++++--------------
1 file changed, 19 insertions(+), 14 deletions(-)
diff --git a/util/async.c b/util/async.c
index 63434ddae4..37d3e6036d 100644
--- a/util/async.c
+++ b/util/async.c
@@ -73,14 +73,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags)
unsigned old_flags;
/*
- * The memory barrier implicit in qatomic_fetch_or makes sure that:
- * 1. idle & any writes needed by the callback are done before the
- * locations are read in the aio_bh_poll.
- * 2. ctx is loaded before the callback has a chance to execute and bh
- * could be freed.
+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that
+ * insertion starts after BH_PENDING is set.
*/
old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags);
+
if (!(old_flags & BH_PENDING)) {
+ /*
+ * At this point the bottom half becomes visible to aio_bh_poll().
+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in
+ * aio_bh_poll(), ensuring that:
+ * 1. any writes needed by the callback are visible from the callback
+ * after aio_bh_dequeue() returns bh.
+ * 2. ctx is loaded before the callback has a chance to execute and bh
+ * could be freed.
+ */
QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next);
}
@@ -106,11 +113,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
QSLIST_REMOVE_HEAD(head, next);
/*
- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory
- * barrier ensures that the callback sees all writes done by the scheduling
- * thread. It also ensures that the scheduling thread sees the cleared
- * flag before bh->cb has run, and thus will call aio_notify again if
- * necessary.
+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that
+ * the removal finishes before BH_PENDING is reset.
*/
*flags = qatomic_fetch_and(&bh->flags,
~(BH_PENDING | BH_SCHEDULED | BH_IDLE));
@@ -157,6 +161,7 @@ int aio_bh_poll(AioContext *ctx)
BHListSlice *s;
int ret = 0;
+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */
QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list);
QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next);
@@ -446,15 +451,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
void aio_notify(AioContext *ctx)
{
/*
- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in
- * aio_notify_accept.
+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with
+ * smp_mb() in aio_notify_accept().
*/
smp_wmb();
qatomic_set(&ctx->notified, true);
/*
- * Write ctx->notified before reading ctx->notify_me. Pairs
- * with smp_mb in aio_ctx_prepare or aio_poll.
+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me.
+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll.
*/
smp_mb();
if (qatomic_read(&ctx->notify_me)) {
--
2.39.1

View File

@ -1,250 +0,0 @@
From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:06 +0100
Subject: [PATCH 24/31] block: Call drain callbacks only once
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s)
We only need to call both the BlockDriver's callback and the parent
callbacks when going from undrained to drained or vice versa. A second
drain section doesn't make a difference for the driver or the parent,
they weren't supposed to send new requests before and after the second
drain.
One thing that gets in the way is the 'ignore_bds_parents' parameter in
bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that
bdrv_drain_all_begin() increases bs->quiesce_counter, but does not
quiesce the parent through BdrvChildClass callbacks. If an additional
drain section is started now, bs->quiesce_counter will be non-zero, but
we would still need to quiesce the parent through BdrvChildClass in
order to keep things consistent (and unquiesce it on the matching
bdrv_drained_end(), even though the counter would not reach 0 yet as
long as the bdrv_drain_all() section is still active).
Instead of keeping track of this, let's just get rid of the parameter.
It was introduced in commit 6cd5c9d7b2d as an optimisation so that
during bdrv_drain_all(), we wouldn't recursively drain all parents up to
the root for each node, resulting in quadratic complexity. As it happens,
calling the callbacks only once solves the same problem, so as of this
patch, we'll still have O(n) complexity and ignore_bds_parents is not
needed any more.
This patch only ignores the 'ignore_bds_parents' parameter. It will be
removed in a separate patch.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-12-kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 25 +++++++------------------
block/io.c | 30 ++++++++++++++++++------------
include/block/block_int-common.h | 8 ++++----
tests/unit/test-bdrv-drain.c | 16 ++++++++++------
4 files changed, 39 insertions(+), 40 deletions(-)
diff --git a/block.c b/block.c
index e0e3b21790..5a583e260d 100644
--- a/block.c
+++ b/block.c
@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
{
BlockDriverState *old_bs = child->bs;
int new_bs_quiesce_counter;
- int drain_saldo;
assert(!child->frozen);
assert(old_bs != new_bs);
@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
}
- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
- drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
-
/*
* If the new child node is drained but the old one was not, flush
* all outstanding requests to the old child node.
*/
- while (drain_saldo > 0 && child->klass->drained_begin) {
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+ if (new_bs_quiesce_counter && !child->quiesced_parent) {
bdrv_parent_drained_begin_single(child, true);
- drain_saldo--;
}
if (old_bs) {
@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
if (new_bs) {
assert_bdrv_graph_writable(new_bs);
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
-
- /*
- * Polling in bdrv_parent_drained_begin_single() may have led to the new
- * node's quiesce_counter having been decreased. Not a problem, we just
- * need to recognize this here and then invoke drained_end appropriately
- * more often.
- */
- assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
- drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
-
if (child->klass->attach) {
child->klass->attach(child);
}
@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
/*
* If the old child node was drained but the new one is not, allow
* requests to come in only after the new node has been attached.
+ *
+ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
+ * polls, which could have changed the value.
*/
- while (drain_saldo < 0 && child->klass->drained_end) {
+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+ if (!new_bs_quiesce_counter && child->quiesced_parent) {
bdrv_parent_drained_end_single(child);
- drain_saldo++;
}
}
diff --git a/block/io.c b/block/io.c
index 75224480d0..87d6f22ec4 100644
--- a/block/io.c
+++ b/block/io.c
@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
{
IO_OR_GS_CODE();
- assert(c->parent_quiesce_counter > 0);
- c->parent_quiesce_counter--;
+ assert(c->quiesced_parent);
+ c->quiesced_parent = false;
+
if (c->klass->drained_end) {
c->klass->drained_end(c);
}
@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
{
AioContext *ctx = bdrv_child_get_parent_aio_context(c);
IO_OR_GS_CODE();
- c->parent_quiesce_counter++;
+
+ assert(!c->quiesced_parent);
+ c->quiesced_parent = true;
+
if (c->klass->drained_begin) {
c->klass->drained_begin(c);
}
@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
/* Stop things in parent-to-child order */
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
aio_disable_external(bdrv_get_aio_context(bs));
- }
- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
- if (bs->drv && bs->drv->bdrv_drain_begin) {
- bs->drv->bdrv_drain_begin(bs);
+ /* TODO Remove ignore_bds_parents, we don't consider it any more */
+ bdrv_parent_drained_begin(bs, parent, false);
+ if (bs->drv && bs->drv->bdrv_drain_begin) {
+ bs->drv->bdrv_drain_begin(bs);
+ }
}
}
@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
assert(bs->quiesce_counter > 0);
/* Re-enable things in child-to-parent order */
- if (bs->drv && bs->drv->bdrv_drain_end) {
- bs->drv->bdrv_drain_end(bs);
- }
- bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
-
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
if (old_quiesce_counter == 1) {
+ if (bs->drv && bs->drv->bdrv_drain_end) {
+ bs->drv->bdrv_drain_end(bs);
+ }
+ /* TODO Remove ignore_bds_parents, we don't consider it any more */
+ bdrv_parent_drained_end(bs, parent, false);
+
aio_enable_external(bdrv_get_aio_context(bs));
}
}
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 791dddfd7d..a6bc6b7fe9 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -980,13 +980,13 @@ struct BdrvChild {
bool frozen;
/*
- * How many times the parent of this child has been drained
+ * True if the parent of this child has been drained by this BdrvChild
* (through klass->drained_*).
- * Usually, this is equal to bs->quiesce_counter (potentially
- * reduced by bdrv_drain_all_count). It may differ while the
+ *
+ * It is generally true if bs->quiesce_counter > 0. It may differ while the
* child is entering or leaving a drained section.
*/
- int parent_quiesce_counter;
+ bool quiesced_parent;
QLIST_ENTRY(BdrvChild) next;
QLIST_ENTRY(BdrvChild) next_parent;
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index dda08de8db..172bc6debc 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
do_drain_begin(drain_type, bs);
- g_assert_cmpint(bs->quiesce_counter, ==, 1);
+ if (drain_type == BDRV_DRAIN_ALL) {
+ g_assert_cmpint(bs->quiesce_counter, ==, 2);
+ } else {
+ g_assert_cmpint(bs->quiesce_counter, ==, 1);
+ }
g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
do_drain_end(drain_type, bs);
@@ -348,8 +352,8 @@ static void test_nested(void)
for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
- int backing_quiesce = (outer != BDRV_DRAIN) +
- (inner != BDRV_DRAIN);
+ int backing_quiesce = (outer == BDRV_DRAIN_ALL) +
+ (inner == BDRV_DRAIN_ALL);
g_assert_cmpint(bs->quiesce_counter, ==, 0);
g_assert_cmpint(backing->quiesce_counter, ==, 0);
@@ -359,10 +363,10 @@ static void test_nested(void)
do_drain_begin(outer, bs);
do_drain_begin(inner, bs);
- g_assert_cmpint(bs->quiesce_counter, ==, 2);
+ g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce);
g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
- g_assert_cmpint(s->drain_count, ==, 2);
- g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
+ g_assert_cmpint(s->drain_count, ==, 1);
+ g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce);
do_drain_end(inner, bs);
do_drain_end(outer, bs);
--
2.31.1

View File

@ -1,298 +0,0 @@
From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:09 +0100
Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s)
In order to make sure that bdrv_replace_child_noperm() doesn't have to
poll any more, get rid of the bdrv_parent_drained_begin_single() call.
This is possible now because we can require that the parent is already
drained through the child in question when the function is called and we
don't call the parent drain callbacks more than once.
The additional drain calls needed in callers cause the test case to run
its code in the drain handler too early (bdrv_attach_child() drains
now), so modify it to only enable the code after the test setup has
completed.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-15-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 103 ++++++++++++++++++++++++++++++-----
block/io.c | 2 +-
include/block/block-io.h | 8 +++
tests/unit/test-bdrv-drain.c | 10 ++++
4 files changed, 108 insertions(+), 15 deletions(-)
diff --git a/block.c b/block.c
index af31a94863..65588d313a 100644
--- a/block.c
+++ b/block.c
@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque)
GLOBAL_STATE_CODE();
/* old_bs reference is transparently moved from @s to @s->child */
+ if (!s->child->bs) {
+ /*
+ * The parents were undrained when removing old_bs from the child. New
+ * requests can't have been made, though, because the child was empty.
+ *
+ * TODO Make bdrv_replace_child_noperm() transactionable to avoid
+ * undraining the parent in the first place. Once this is done, having
+ * new_bs drained when calling bdrv_replace_child_tran() is not a
+ * requirement any more.
+ */
+ bdrv_parent_drained_begin_single(s->child, false);
+ assert(!bdrv_parent_drained_poll_single(s->child));
+ }
+ assert(s->child->quiesced_parent);
bdrv_replace_child_noperm(s->child, s->old_bs);
bdrv_unref(new_bs);
}
@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = {
*
* Note: real unref of old_bs is done only on commit.
*
+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
+ * kept drained until the transaction is completed.
+ *
* The function doesn't update permissions, caller is responsible for this.
*/
static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
Transaction *tran)
{
BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
+
+ assert(child->quiesced_parent);
+ assert(!new_bs || new_bs->quiesce_counter);
+
*s = (BdrvReplaceChildState) {
.child = child,
.old_bs = child->bs,
@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
return permissions[qapi_perm];
}
+/*
+ * Replaces the node that a BdrvChild points to without updating permissions.
+ *
+ * If @new_bs is non-NULL, the parent of @child must already be drained through
+ * @child.
+ *
+ * This function does not poll.
+ */
static void bdrv_replace_child_noperm(BdrvChild *child,
BlockDriverState *new_bs)
{
@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
int new_bs_quiesce_counter;
assert(!child->frozen);
+
+ /*
+ * If we want to change the BdrvChild to point to a drained node as its new
+ * child->bs, we need to make sure that its new parent is drained, too. In
+ * other words, either child->quiesce_parent must already be true or we must
+ * be able to set it and keep the parent's quiesce_counter consistent with
+ * that, but without polling or starting new requests (this function
+ * guarantees that it doesn't poll, and starting new requests would be
+ * against the invariants of drain sections).
+ *
+ * To keep things simple, we pick the first option (child->quiesce_parent
+ * must already be true). We also generalise the rule a bit to make it
+ * easier to verify in callers and more likely to be covered in test cases:
+ * The parent must be quiesced through this child even if new_bs isn't
+ * currently drained.
+ *
+ * The only exception is for callers that always pass new_bs == NULL. In
+ * this case, we obviously never need to consider the case of a drained
+ * new_bs, so we can keep the callers simpler by allowing them not to drain
+ * the parent.
+ */
+ assert(!new_bs || child->quiesced_parent);
assert(old_bs != new_bs);
GLOBAL_STATE_CODE();
@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
}
- /*
- * If the new child node is drained but the old one was not, flush
- * all outstanding requests to the old child node.
- */
- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
- if (new_bs_quiesce_counter && !child->quiesced_parent) {
- bdrv_parent_drained_begin_single(child, true);
- }
-
if (old_bs) {
if (child->klass->detach) {
child->klass->detach(child);
@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
}
/*
- * If the old child node was drained but the new one is not, allow
- * requests to come in only after the new node has been attached.
- *
- * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
- * polls, which could have changed the value.
+ * If the parent was drained through this BdrvChild previously, but new_bs
+ * is not drained, allow requests to come in only after the new node has
+ * been attached.
*/
new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
if (!new_bs_quiesce_counter && child->quiesced_parent) {
@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
}
bdrv_ref(child_bs);
+ /*
+ * Let every new BdrvChild start with a drained parent. Inserting the child
+ * in the graph with bdrv_replace_child_noperm() will undrain it if
+ * @child_bs is not drained.
+ *
+ * The child was only just created and is not yet visible in global state
+ * until bdrv_replace_child_noperm() inserts it into the graph, so nobody
+ * could have sent requests and polling is not necessary.
+ *
+ * Note that this means that the parent isn't fully drained yet, we only
+ * stop new requests from coming in. This is fine, we don't care about the
+ * old requests here, they are not for this child. If another place enters a
+ * drain section for the same parent, but wants it to be fully quiesced, it
+ * will not run most of the the code in .drained_begin() again (which is not
+ * a problem, we already did this), but it will still poll until the parent
+ * is fully quiesced, so it will not be negatively affected either.
+ */
+ bdrv_parent_drained_begin_single(new_child, false);
bdrv_replace_child_noperm(new_child, child_bs);
BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
}
if (child->bs) {
+ BlockDriverState *bs = child->bs;
+ bdrv_drained_begin(bs);
bdrv_replace_child_tran(child, NULL, tran);
+ bdrv_drained_end(bs);
}
tran_add(tran, &bdrv_remove_child_drv, child);
}
+static void undrain_on_clean_cb(void *opaque)
+{
+ bdrv_drained_end(opaque);
+}
+
+static TransactionActionDrv undrain_on_clean = {
+ .clean = undrain_on_clean_cb,
+};
+
static int bdrv_replace_node_noperm(BlockDriverState *from,
BlockDriverState *to,
bool auto_skip, Transaction *tran,
@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
GLOBAL_STATE_CODE();
+ bdrv_drained_begin(from);
+ bdrv_drained_begin(to);
+ tran_add(tran, &undrain_on_clean, from);
+ tran_add(tran, &undrain_on_clean, to);
+
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
assert(c->bs == from);
if (!should_update_child(c, to)) {
diff --git a/block/io.c b/block/io.c
index 5e9150d92c..ae64830eac 100644
--- a/block/io.c
+++ b/block/io.c
@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
}
}
-static bool bdrv_parent_drained_poll_single(BdrvChild *c)
+bool bdrv_parent_drained_poll_single(BdrvChild *c)
{
if (c->klass->drained_poll) {
return c->klass->drained_poll(c);
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 8f5e75756a..65e6d2569b 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
*/
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+/**
+ * bdrv_parent_drained_poll_single:
+ *
+ * Returns true if there is any pending activity to cease before @c can be
+ * called quiesced, false otherwise.
+ */
+bool bdrv_parent_drained_poll_single(BdrvChild *c);
+
/**
* bdrv_parent_drained_end_single:
*
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 172bc6debc..2686a8acee 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void)
typedef struct BDRVReplaceTestState {
+ bool setup_completed;
bool was_drained;
bool was_undrained;
bool has_read;
@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
{
BDRVReplaceTestState *s = bs->opaque;
+ if (!s->setup_completed) {
+ return;
+ }
+
if (!s->drain_count) {
s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
bdrv_inc_in_flight(bs);
@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs)
{
BDRVReplaceTestState *s = bs->opaque;
+ if (!s->setup_completed) {
+ return;
+ }
+
g_assert(s->drain_count > 0);
if (!--s->drain_count) {
s->was_undrained = true;
@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
bdrv_ref(old_child_bs);
bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
BDRV_CHILD_COW, &error_abort);
+ parent_s->setup_completed = true;
for (i = 0; i < old_drain_count; i++) {
bdrv_drained_begin(old_child_bs);
--
2.31.1

View File

@ -1,54 +0,0 @@
From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:03 +0100
Subject: [PATCH 21/31] block: Don't use subtree drains in
bdrv_drop_intermediate()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s)
Instead of using a subtree drain from the top node (which also drains
child nodes of base that we're not even interested in), use a normal
drain for base, which automatically drains all of the parents, too.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-9-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/block.c b/block.c
index cb5e96b1cf..b3449a312e 100644
--- a/block.c
+++ b/block.c
@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
GLOBAL_STATE_CODE();
bdrv_ref(top);
- bdrv_subtree_drained_begin(top);
+ bdrv_drained_begin(base);
if (!top->drv || !base->drv) {
goto exit;
@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
ret = 0;
exit:
- bdrv_subtree_drained_end(top);
+ bdrv_drained_end(base);
bdrv_unref(top);
return ret;
}
--
2.31.1

View File

@ -1,157 +0,0 @@
From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:02 +0100
Subject: [PATCH 20/31] block: Drain individual nodes during reopen
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s)
bdrv_reopen() and friends use subtree drains as a lazy way of covering
all the nodes they touch. Turns out that this lazy way is a lot more
complicated than just draining the nodes individually, even not
accounting for the additional complexity in the drain mechanism itself.
Simplify the code by switching to draining the individual nodes that are
already managed in the BlockReopenQueue anyway.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-8-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 16 +++++++++-------
block/replication.c | 6 ------
blockdev.c | 13 -------------
3 files changed, 9 insertions(+), 26 deletions(-)
diff --git a/block.c b/block.c
index 46df410b07..cb5e96b1cf 100644
--- a/block.c
+++ b/block.c
@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
* returns a pointer to bs_queue, which is either the newly allocated
* bs_queue, or the existing bs_queue being used.
*
- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
+ * bs is drained here and undrained by bdrv_reopen_queue_free().
*
* To be called with bs->aio_context locked.
*/
@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
int flags;
QemuOpts *opts;
- /* Make sure that the caller remembered to use a drained section. This is
- * important to avoid graph changes between the recursive queuing here and
- * bdrv_reopen_multiple(). */
- assert(bs->quiesce_counter > 0);
GLOBAL_STATE_CODE();
+ bdrv_drained_begin(bs);
+
if (bs_queue == NULL) {
bs_queue = g_new0(BlockReopenQueue, 1);
QTAILQ_INIT(bs_queue);
@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
if (bs_queue) {
BlockReopenQueueEntry *bs_entry, *next;
QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+ AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs);
+
+ aio_context_acquire(ctx);
+ bdrv_drained_end(bs_entry->state.bs);
+ aio_context_release(ctx);
+
qobject_unref(bs_entry->state.explicit_options);
qobject_unref(bs_entry->state.options);
g_free(bs_entry);
@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
GLOBAL_STATE_CODE();
- bdrv_subtree_drained_begin(bs);
queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
if (ctx != qemu_get_aio_context()) {
@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
if (ctx != qemu_get_aio_context()) {
aio_context_acquire(ctx);
}
- bdrv_subtree_drained_end(bs);
return ret;
}
diff --git a/block/replication.c b/block/replication.c
index f1eed25e43..c62f48a874 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
}
- bdrv_subtree_drained_begin(hidden_disk->bs);
- bdrv_subtree_drained_begin(secondary_disk->bs);
-
if (s->orig_hidden_read_only) {
QDict *opts = qdict_new();
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
aio_context_acquire(ctx);
}
}
-
- bdrv_subtree_drained_end(hidden_disk->bs);
- bdrv_subtree_drained_end(secondary_disk->bs);
}
static void backup_job_cleanup(BlockDriverState *bs)
diff --git a/blockdev.c b/blockdev.c
index 3f1dec6242..8ffb3d9537 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3547,8 +3547,6 @@ fail:
void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
{
BlockReopenQueue *queue = NULL;
- GSList *drained = NULL;
- GSList *p;
/* Add each one of the BDS that we want to reopen to the queue */
for (; reopen_list != NULL; reopen_list = reopen_list->next) {
@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
- bdrv_subtree_drained_begin(bs);
queue = bdrv_reopen_queue(queue, bs, qdict, false);
- drained = g_slist_prepend(drained, bs);
aio_context_release(ctx);
}
@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
fail:
bdrv_reopen_queue_free(queue);
- for (p = drained; p; p = p->next) {
- BlockDriverState *bs = p->data;
- AioContext *ctx = bdrv_get_aio_context(bs);
-
- aio_context_acquire(ctx);
- bdrv_subtree_drained_end(bs);
- aio_context_release(ctx);
- }
- g_slist_free(drained);
}
void qmp_blockdev_del(const char *node_name, Error **errp)
--
2.31.1

View File

@ -1,96 +0,0 @@
From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:08 +0100
Subject: [PATCH 26/31] block: Drop out of coroutine in
bdrv_do_drained_begin_quiesce()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s)
The next patch adds a parent drain to bdrv_attach_child_common(), which
shouldn't be, but is currently called from coroutines in some cases (e.g.
.bdrv_co_create implementations generally open new nodes). Therefore,
the assertion that we're not in a coroutine doesn't hold true any more.
We could just remove the assertion because there is nothing in the
function that should be in conflict with running in a coroutine, but
just to be on the safe side, we can reverse the caller relationship
between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so
that the latter also just drops out of coroutine context and we can
still be certain in the future that any drain code doesn't run in
coroutines.
As a nice side effect, the structure of bdrv_do_drained_begin() is now
symmetrical with bdrv_do_drained_end().
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-14-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/io.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/block/io.c b/block/io.c
index 2e9503df6a..5e9150d92c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
}
}
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+ bool poll)
{
IO_OR_GS_CODE();
- assert(!qemu_in_coroutine());
+
+ if (qemu_in_coroutine()) {
+ bdrv_co_yield_to_drain(bs, true, parent, poll);
+ return;
+ }
/* Stop things in parent-to-child order */
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
bs->drv->bdrv_drain_begin(bs);
}
}
-}
-
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
- bool poll)
-{
- if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, true, parent, poll);
- return;
- }
-
- bdrv_do_drained_begin_quiesce(bs, parent);
/*
* Wait for drained requests to finish.
@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
}
}
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+{
+ bdrv_do_drained_begin(bs, parent, false);
+}
+
void bdrv_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
--
2.31.1

View File

@ -1,67 +0,0 @@
From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:01 +0100
Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s)
Callers don't agree whether bdrv_reopen_queue_child() should be called
with the AioContext lock held or not. Standardise on holding the lock
(as done by QMP blockdev-reopen and the replication block driver) and
fix bdrv_reopen() to do the same.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-7-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/block.c b/block.c
index 7999fd08c5..46df410b07 100644
--- a/block.c
+++ b/block.c
@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
* bs_queue, or the existing bs_queue being used.
*
* bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
+ *
+ * To be called with bs->aio_context locked.
*/
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
return bs_queue;
}
+/* To be called with bs->aio_context locked */
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
QDict *options, bool keep_old_opts)
@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
GLOBAL_STATE_CODE();
bdrv_subtree_drained_begin(bs);
+ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
+
if (ctx != qemu_get_aio_context()) {
aio_context_release(ctx);
}
-
- queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
ret = bdrv_reopen_multiple(queue, errp);
if (ctx != qemu_get_aio_context()) {
--
2.31.1

View File

@ -1,132 +0,0 @@
From 074c89b05dae971c7118cb769fd34e22135c8f4c Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:53 +0200
Subject: [PATCH 06/20] block: Improve empty format-specific info dump
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [1/12] be551e83f426e620e673302198b51368bfd324ce (hreitz/qemu-kvm-c-9-s)
When a block driver supports obtaining format-specific information, but
that object only contains optional fields, it is possible that none of
them are present, so that dump_qobject() (called by
bdrv_image_info_specific_dump()) will not print anything.
The callers of bdrv_image_info_specific_dump() put a header above this
information ("Format specific information:\n"), which will look strange
when there is nothing below. Modify bdrv_image_info_specific_dump() to
print this header instead of its callers, and only if there is indeed
something to be printed.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-2-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 3716470b24f0f63090d59bcf28ad8fe6fb7835bd)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/qapi.c | 41 +++++++++++++++++++++++++++++++++++++----
include/block/qapi.h | 3 ++-
qemu-io-cmds.c | 4 ++--
3 files changed, 41 insertions(+), 7 deletions(-)
diff --git a/block/qapi.c b/block/qapi.c
index cf557e3aea..51202b470a 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -777,7 +777,35 @@ static void dump_qdict(int indentation, QDict *dict)
}
}
-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec)
+/*
+ * Return whether dumping the given QObject with dump_qobject() would
+ * yield an empty dump, i.e. not print anything.
+ */
+static bool qobject_is_empty_dump(const QObject *obj)
+{
+ switch (qobject_type(obj)) {
+ case QTYPE_QNUM:
+ case QTYPE_QSTRING:
+ case QTYPE_QBOOL:
+ return false;
+
+ case QTYPE_QDICT:
+ return qdict_size(qobject_to(QDict, obj)) == 0;
+
+ case QTYPE_QLIST:
+ return qlist_empty(qobject_to(QList, obj));
+
+ default:
+ abort();
+ }
+}
+
+/**
+ * Dumps the given ImageInfoSpecific object in a human-readable form,
+ * prepending an optional prefix if the dump is not empty.
+ */
+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
+ const char *prefix)
{
QObject *obj, *data;
Visitor *v = qobject_output_visitor_new(&obj);
@@ -785,7 +813,12 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec)
visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
visit_complete(v, &obj);
data = qdict_get(qobject_to(QDict, obj), "data");
- dump_qobject(1, data);
+ if (!qobject_is_empty_dump(data)) {
+ if (prefix) {
+ qemu_printf("%s", prefix);
+ }
+ dump_qobject(1, data);
+ }
qobject_unref(obj);
visit_free(v);
}
@@ -866,7 +899,7 @@ void bdrv_image_info_dump(ImageInfo *info)
}
if (info->has_format_specific) {
- qemu_printf("Format specific information:\n");
- bdrv_image_info_specific_dump(info->format_specific);
+ bdrv_image_info_specific_dump(info->format_specific,
+ "Format specific information:\n");
}
}
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 22c7807c89..c09859ea78 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -40,6 +40,7 @@ void bdrv_query_image_info(BlockDriverState *bs,
Error **errp);
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec);
+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
+ const char *prefix);
void bdrv_image_info_dump(ImageInfo *info);
#endif
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 952dc940f1..f4a374528e 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -1825,8 +1825,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv)
return -EIO;
}
if (spec_info) {
- printf("Format specific information:\n");
- bdrv_image_info_specific_dump(spec_info);
+ bdrv_image_info_specific_dump(spec_info,
+ "Format specific information:\n");
qapi_free_ImageInfoSpecific(spec_info);
}
--
2.31.1

View File

@ -1,81 +0,0 @@
From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:00 +0100
Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s)
bdrv_drain_invoke() has now two entirely separate cases that share no
code any more and are selected depending on a bool parameter. Each case
has only one caller. Just inline the function.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-6-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/io.c | 23 ++++++-----------------
1 file changed, 6 insertions(+), 17 deletions(-)
diff --git a/block/io.c b/block/io.c
index f4ca62b034..a25103be6f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -242,21 +242,6 @@ typedef struct {
bool ignore_bds_parents;
} BdrvCoDrainData;
-/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
-{
- if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
- (!begin && !bs->drv->bdrv_drain_end)) {
- return;
- }
-
- if (begin) {
- bs->drv->bdrv_drain_begin(bs);
- } else {
- bs->drv->bdrv_drain_end(bs);
- }
-}
-
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent, bool ignore_bds_parents)
@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
}
bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
- bdrv_drain_invoke(bs, true);
+ if (bs->drv && bs->drv->bdrv_drain_begin) {
+ bs->drv->bdrv_drain_begin(bs);
+ }
}
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
assert(bs->quiesce_counter > 0);
/* Re-enable things in child-to-parent order */
- bdrv_drain_invoke(bs, false);
+ if (bs->drv && bs->drv->bdrv_drain_end) {
+ bs->drv->bdrv_drain_end(bs);
+ }
bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
--
2.31.1

View File

@ -1,433 +0,0 @@
From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:40:59 +0100
Subject: [PATCH 17/31] block: Remove drained_end_counter
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s)
drained_end_counter is unused now, nobody changes its value any more. It
can be removed.
In cases where we had two almost identical functions that only differed
in whether the caller passes drained_end_counter, or whether they would
poll for a local drained_end_counter to reach 0, these become a single
function.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Message-Id: <20221118174110.55183-5-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 5 +-
block/block-backend.c | 4 +-
block/io.c | 98 ++++++++------------------------
blockjob.c | 2 +-
include/block/block-io.h | 24 --------
include/block/block_int-common.h | 6 +-
6 files changed, 30 insertions(+), 109 deletions(-)
diff --git a/block.c b/block.c
index 16a62a329c..7999fd08c5 100644
--- a/block.c
+++ b/block.c
@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child)
return bdrv_drain_poll(bs, false, NULL, false);
}
-static void bdrv_child_cb_drained_end(BdrvChild *child,
- int *drained_end_counter)
+static void bdrv_child_cb_drained_end(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
- bdrv_drained_end_no_poll(bs, drained_end_counter);
+ bdrv_drained_end(bs);
}
static int bdrv_child_cb_inactivate(BdrvChild *child)
diff --git a/block/block-backend.c b/block/block-backend.c
index d98a96ff37..feaf2181fa 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
}
static void blk_root_drained_begin(BdrvChild *child);
static bool blk_root_drained_poll(BdrvChild *child);
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
+static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child)
return busy || !!blk->in_flight;
}
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
+static void blk_root_drained_end(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
assert(blk->quiesce_counter);
diff --git a/block/io.c b/block/io.c
index c2ed4b2af9..f4ca62b034 100644
--- a/block/io.c
+++ b/block/io.c
@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
}
}
-static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
- int *drained_end_counter)
+void bdrv_parent_drained_end_single(BdrvChild *c)
{
+ IO_OR_GS_CODE();
+
assert(c->parent_quiesce_counter > 0);
c->parent_quiesce_counter--;
if (c->klass->drained_end) {
- c->klass->drained_end(c, drained_end_counter);
+ c->klass->drained_end(c);
}
}
-void bdrv_parent_drained_end_single(BdrvChild *c)
-{
- int drained_end_counter = 0;
- AioContext *ctx = bdrv_child_get_parent_aio_context(c);
- IO_OR_GS_CODE();
- bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
- AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0);
-}
-
static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
- bool ignore_bds_parents,
- int *drained_end_counter)
+ bool ignore_bds_parents)
{
BdrvChild *c;
@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
continue;
}
- bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
+ bdrv_parent_drained_end_single(c);
}
}
@@ -249,12 +240,10 @@ typedef struct {
bool poll;
BdrvChild *parent;
bool ignore_bds_parents;
- int *drained_end_counter;
} BdrvCoDrainData;
/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
- int *drained_end_counter)
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
{
if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
(!begin && !bs->drv->bdrv_drain_end)) {
@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
BdrvChild *parent, bool ignore_bds_parents,
bool poll);
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents,
- int *drained_end_counter);
+ BdrvChild *parent, bool ignore_bds_parents);
static void bdrv_co_drain_bh_cb(void *opaque)
{
@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque)
aio_context_acquire(ctx);
bdrv_dec_in_flight(bs);
if (data->begin) {
- assert(!data->drained_end_counter);
bdrv_do_drained_begin(bs, data->recursive, data->parent,
data->ignore_bds_parents, data->poll);
} else {
assert(!data->poll);
bdrv_do_drained_end(bs, data->recursive, data->parent,
- data->ignore_bds_parents,
- data->drained_end_counter);
+ data->ignore_bds_parents);
}
aio_context_release(ctx);
} else {
@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
bool begin, bool recursive,
BdrvChild *parent,
bool ignore_bds_parents,
- bool poll,
- int *drained_end_counter)
+ bool poll)
{
BdrvCoDrainData data;
Coroutine *self = qemu_coroutine_self();
@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
.parent = parent,
.ignore_bds_parents = ignore_bds_parents,
.poll = poll,
- .drained_end_counter = drained_end_counter,
};
if (bs) {
@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
}
bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
- bdrv_drain_invoke(bs, true, NULL);
+ bdrv_drain_invoke(bs, true);
}
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
- poll, NULL);
+ poll);
return;
}
@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs)
/**
* This function does not poll, nor must any of its recursively called
- * functions. The *drained_end_counter pointee will be incremented
- * once for every background operation scheduled, and decremented once
- * the operation settles. Therefore, the pointer must remain valid
- * until the pointee reaches 0. That implies that whoever sets up the
- * pointee has to poll until it is 0.
- *
- * We use atomic operations to access *drained_end_counter, because
- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of
- * @bs may contain nodes in different AioContexts,
- * (2) bdrv_drain_all_end() uses the same counter for all nodes,
- * regardless of which AioContext they are in.
+ * functions.
*/
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents,
- int *drained_end_counter)
+ BdrvChild *parent, bool ignore_bds_parents)
{
BdrvChild *child;
int old_quiesce_counter;
- assert(drained_end_counter != NULL);
-
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
- false, drained_end_counter);
+ false);
return;
}
assert(bs->quiesce_counter > 0);
/* Re-enable things in child-to-parent order */
- bdrv_drain_invoke(bs, false, drained_end_counter);
- bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
- drained_end_counter);
+ bdrv_drain_invoke(bs, false);
+ bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
if (old_quiesce_counter == 1) {
@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
assert(!ignore_bds_parents);
bs->recursive_quiesce_counter--;
QLIST_FOREACH(child, &bs->children, next) {
- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
- drained_end_counter);
+ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
}
}
}
void bdrv_drained_end(BlockDriverState *bs)
{
- int drained_end_counter = 0;
IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
-}
-
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
-{
- IO_CODE();
- bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
+ bdrv_do_drained_end(bs, false, NULL, false);
}
void bdrv_subtree_drained_end(BlockDriverState *bs)
{
- int drained_end_counter = 0;
IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
+ bdrv_do_drained_end(bs, true, NULL, false);
}
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
{
- int drained_end_counter = 0;
int i;
IO_OR_GS_CODE();
for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
- bdrv_do_drained_end(child->bs, true, child, false,
- &drained_end_counter);
+ bdrv_do_drained_end(child->bs, true, child, false);
}
-
- BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
}
void bdrv_drain(BlockDriverState *bs)
@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void)
GLOBAL_STATE_CODE();
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
return;
}
@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void)
void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
{
- int drained_end_counter = 0;
GLOBAL_STATE_CODE();
g_assert(bs->quiesce_counter > 0);
g_assert(!bs->refcnt);
while (bs->quiesce_counter) {
- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
+ bdrv_do_drained_end(bs, false, NULL, true);
}
- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
void bdrv_drain_all_end(void)
{
BlockDriverState *bs = NULL;
- int drained_end_counter = 0;
GLOBAL_STATE_CODE();
/*
@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
+ bdrv_do_drained_end(bs, false, NULL, true);
aio_context_release(aio_context);
}
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
- AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
-
assert(bdrv_drain_all_count > 0);
bdrv_drain_all_count--;
}
diff --git a/blockjob.c b/blockjob.c
index f51d4e18f3..0ab721e139 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c)
}
}
-static void child_job_drained_end(BdrvChild *c, int *drained_end_counter)
+static void child_job_drained_end(BdrvChild *c)
{
BlockJob *job = c->opaque;
job_resume(&job->job);
diff --git a/include/block/block-io.h b/include/block/block-io.h
index b099d7db45..054e964c9b 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
-/**
- * bdrv_drained_end_no_poll:
- *
- * Same as bdrv_drained_end(), but do not poll for the subgraph to
- * actually become unquiesced. Therefore, no graph changes will occur
- * with this function.
- *
- * *drained_end_counter is incremented for every background operation
- * that is scheduled, and will be decremented for every operation once
- * it settles. The caller must poll until it reaches 0. The counter
- * should be accessed using atomic operations only.
- */
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
-
-
/*
* "I/O or GS" API functions. These functions can run without
* the BQL, but only in one specific iothread/main loop.
@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
* bdrv_parent_drained_end_single:
*
* End a quiesced section for the parent of @c.
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled, which may result in graph changes.
*/
void bdrv_parent_drained_end_single(BdrvChild *c);
@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
* bdrv_drained_end:
*
* End a quiescent section started by bdrv_drained_begin().
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled. On one hand, that may result in graph changes. On
- * the other, this requires that the caller either runs in the main
- * loop; or that all involved nodes (@bs and all of its parents) are
- * in the caller's AioContext.
*/
void bdrv_drained_end(BlockDriverState *bs);
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 40d646d1ed..2b97576f6d 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -939,15 +939,11 @@ struct BdrvChildClass {
* These functions must not change the graph (and therefore also must not
* call aio_poll(), which could change the graph indirectly).
*
- * If drained_end() schedules background operations, it must atomically
- * increment *drained_end_counter for each such operation and atomically
- * decrement it once the operation has settled.
- *
* Note that this can be nested. If drained_begin() was called twice, new
* I/O is allowed only after drained_end() was called twice, too.
*/
void (*drained_begin)(BdrvChild *child);
- void (*drained_end)(BdrvChild *child, int *drained_end_counter);
+ void (*drained_end)(BdrvChild *child);
/*
* Returns whether the parent has pending requests for the child. This
--
2.31.1

View File

@ -1,274 +0,0 @@
From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:07 +0100
Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from
drain_begin/end.
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s)
ignore_bds_parents is now ignored during drain_begin and drain_end, so
we can just remove it there. It is still a valid optimisation for
drain_all in bdrv_drained_poll(), so leave it around there.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-13-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 2 +-
block/io.c | 58 +++++++++++++++-------------------------
include/block/block-io.h | 3 +--
3 files changed, 24 insertions(+), 39 deletions(-)
diff --git a/block.c b/block.c
index 5a583e260d..af31a94863 100644
--- a/block.c
+++ b/block.c
@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c)
static void bdrv_child_cb_drained_begin(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
- bdrv_do_drained_begin_quiesce(bs, NULL, false);
+ bdrv_do_drained_begin_quiesce(bs, NULL);
}
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
diff --git a/block/io.c b/block/io.c
index 87d6f22ec4..2e9503df6a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs);
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int64_t bytes, BdrvRequestFlags flags);
-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
- bool ignore_bds_parents)
+static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c, *next;
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+ if (c == ignore) {
continue;
}
bdrv_parent_drained_begin_single(c, false);
@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
}
}
-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
- bool ignore_bds_parents)
+static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c;
QLIST_FOREACH(c, &bs->parents, next_parent) {
- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+ if (c == ignore) {
continue;
}
bdrv_parent_drained_end_single(c);
@@ -242,7 +240,6 @@ typedef struct {
bool begin;
bool poll;
BdrvChild *parent;
- bool ignore_bds_parents;
} BdrvCoDrainData;
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
}
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents, bool poll);
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents);
+ bool poll);
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
static void bdrv_co_drain_bh_cb(void *opaque)
{
@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque)
aio_context_acquire(ctx);
bdrv_dec_in_flight(bs);
if (data->begin) {
- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
- data->poll);
+ bdrv_do_drained_begin(bs, data->parent, data->poll);
} else {
assert(!data->poll);
- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
+ bdrv_do_drained_end(bs, data->parent);
}
aio_context_release(ctx);
} else {
@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque)
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
bool begin,
BdrvChild *parent,
- bool ignore_bds_parents,
bool poll)
{
BdrvCoDrainData data;
@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
.done = false,
.begin = begin,
.parent = parent,
- .ignore_bds_parents = ignore_bds_parents,
.poll = poll,
};
@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
}
}
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
- BdrvChild *parent, bool ignore_bds_parents)
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
{
IO_OR_GS_CODE();
assert(!qemu_in_coroutine());
@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
/* Stop things in parent-to-child order */
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
aio_disable_external(bdrv_get_aio_context(bs));
-
- /* TODO Remove ignore_bds_parents, we don't consider it any more */
- bdrv_parent_drained_begin(bs, parent, false);
+ bdrv_parent_drained_begin(bs, parent);
if (bs->drv && bs->drv->bdrv_drain_begin) {
bs->drv->bdrv_drain_begin(bs);
}
@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
}
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents, bool poll)
+ bool poll)
{
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
+ bdrv_co_yield_to_drain(bs, true, parent, poll);
return;
}
- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
+ bdrv_do_drained_begin_quiesce(bs, parent);
/*
* Wait for drained requests to finish.
@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
* nodes.
*/
if (poll) {
- assert(!ignore_bds_parents);
BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
}
}
@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
void bdrv_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
- bdrv_do_drained_begin(bs, NULL, false, true);
+ bdrv_do_drained_begin(bs, NULL, true);
}
/**
* This function does not poll, nor must any of its recursively called
* functions.
*/
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents)
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
{
int old_quiesce_counter;
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
+ bdrv_co_yield_to_drain(bs, false, parent, false);
return;
}
assert(bs->quiesce_counter > 0);
@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
if (bs->drv && bs->drv->bdrv_drain_end) {
bs->drv->bdrv_drain_end(bs);
}
- /* TODO Remove ignore_bds_parents, we don't consider it any more */
- bdrv_parent_drained_end(bs, parent, false);
-
+ bdrv_parent_drained_end(bs, parent);
aio_enable_external(bdrv_get_aio_context(bs));
}
}
@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
void bdrv_drained_end(BlockDriverState *bs)
{
IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, NULL, false);
+ bdrv_do_drained_end(bs, NULL);
}
void bdrv_drain(BlockDriverState *bs)
@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void)
GLOBAL_STATE_CODE();
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
+ bdrv_co_yield_to_drain(NULL, true, NULL, true);
return;
}
@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_begin(bs, NULL, true, false);
+ bdrv_do_drained_begin(bs, NULL, false);
aio_context_release(aio_context);
}
@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
g_assert(!bs->refcnt);
while (bs->quiesce_counter) {
- bdrv_do_drained_end(bs, NULL, true);
+ bdrv_do_drained_end(bs, NULL);
}
}
@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_end(bs, NULL, true);
+ bdrv_do_drained_end(bs, NULL);
aio_context_release(aio_context);
}
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 9c36a16a1f..8f5e75756a 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs);
* Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
* running requests to complete.
*/
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
- BdrvChild *parent, bool ignore_bds_parents);
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent);
/**
* bdrv_drained_end:
--
2.31.1

View File

@ -1,106 +0,0 @@
From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:10 +0100
Subject: [PATCH 28/31] block: Remove poll parameter from
bdrv_parent_drained_begin_single()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s)
All callers of bdrv_parent_drained_begin_single() pass poll=false now,
so we don't need the parameter any more.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20221118174110.55183-16-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 4 ++--
block/io.c | 8 ++------
include/block/block-io.h | 5 ++---
3 files changed, 6 insertions(+), 11 deletions(-)
diff --git a/block.c b/block.c
index 65588d313a..0d78711416 100644
--- a/block.c
+++ b/block.c
@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque)
* new_bs drained when calling bdrv_replace_child_tran() is not a
* requirement any more.
*/
- bdrv_parent_drained_begin_single(s->child, false);
+ bdrv_parent_drained_begin_single(s->child);
assert(!bdrv_parent_drained_poll_single(s->child));
}
assert(s->child->quiesced_parent);
@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
* a problem, we already did this), but it will still poll until the parent
* is fully quiesced, so it will not be negatively affected either.
*/
- bdrv_parent_drained_begin_single(new_child, false);
+ bdrv_parent_drained_begin_single(new_child);
bdrv_replace_child_noperm(new_child, child_bs);
BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
diff --git a/block/io.c b/block/io.c
index ae64830eac..38e57d1f67 100644
--- a/block/io.c
+++ b/block/io.c
@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
if (c == ignore) {
continue;
}
- bdrv_parent_drained_begin_single(c, false);
+ bdrv_parent_drained_begin_single(c);
}
}
@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
return busy;
}
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
+void bdrv_parent_drained_begin_single(BdrvChild *c)
{
- AioContext *ctx = bdrv_child_get_parent_aio_context(c);
IO_OR_GS_CODE();
assert(!c->quiesced_parent);
@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
if (c->klass->drained_begin) {
c->klass->drained_begin(c);
}
- if (poll) {
- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
- }
}
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 65e6d2569b..92aaa7c1e9 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
/**
* bdrv_parent_drained_begin_single:
*
- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
- * any pending activity to cease.
+ * Begin a quiesced section for the parent of @c.
*/
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+void bdrv_parent_drained_begin_single(BdrvChild *c);
/**
* bdrv_parent_drained_poll_single:
--
2.31.1

View File

@ -1,896 +0,0 @@
From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:05 +0100
Subject: [PATCH 23/31] block: Remove subtree drains
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s)
Subtree drains are not used any more. Remove them.
After this, BdrvChildClass.attach/detach() don't poll any more.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-11-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 20 +--
block/io.c | 121 +++-----------
include/block/block-io.h | 18 +--
include/block/block_int-common.h | 1 -
include/block/block_int-io.h | 12 --
tests/unit/test-bdrv-drain.c | 261 ++-----------------------------
6 files changed, 44 insertions(+), 389 deletions(-)
diff --git a/block.c b/block.c
index 5330e89903..e0e3b21790 100644
--- a/block.c
+++ b/block.c
@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child)
static bool bdrv_child_cb_drained_poll(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
- return bdrv_drain_poll(bs, false, NULL, false);
+ return bdrv_drain_poll(bs, NULL, false);
}
static void bdrv_child_cb_drained_end(BdrvChild *child)
@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child)
assert(!bs->file);
bs->file = child;
}
-
- bdrv_apply_subtree_drain(child, bs);
}
static void bdrv_child_cb_detach(BdrvChild *child)
@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child)
bdrv_backing_detach(child);
}
- bdrv_unapply_subtree_drain(child, bs);
-
assert_bdrv_graph_writable(bs);
QLIST_REMOVE(child, next);
if (child == bs->backing) {
@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
}
if (old_bs) {
- /* Detach first so that the recursive drain sections coming from @child
- * are already gone and we only end the drain sections that came from
- * elsewhere. */
if (child->klass->detach) {
child->klass->detach(child);
}
@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
/*
- * Detaching the old node may have led to the new node's
- * quiesce_counter having been decreased. Not a problem, we
- * just need to recognize this here and then invoke
- * drained_end appropriately more often.
+ * Polling in bdrv_parent_drained_begin_single() may have led to the new
+ * node's quiesce_counter having been decreased. Not a problem, we just
+ * need to recognize this here and then invoke drained_end appropriately
+ * more often.
*/
assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
- /* Attach only after starting new drained sections, so that recursive
- * drain sections coming from @child don't get an extra .drained_begin
- * callback. */
if (child->klass->attach) {
child->klass->attach(child);
}
diff --git a/block/io.c b/block/io.c
index a25103be6f..75224480d0 100644
--- a/block/io.c
+++ b/block/io.c
@@ -236,17 +236,15 @@ typedef struct {
BlockDriverState *bs;
bool done;
bool begin;
- bool recursive;
bool poll;
BdrvChild *parent;
bool ignore_bds_parents;
} BdrvCoDrainData;
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
- BdrvChild *ignore_parent, bool ignore_bds_parents)
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
+ bool ignore_bds_parents)
{
- BdrvChild *child, *next;
IO_OR_GS_CODE();
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
return true;
}
- if (recursive) {
- assert(!ignore_bds_parents);
- QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
- if (bdrv_drain_poll(child->bs, recursive, child, false)) {
- return true;
- }
- }
- }
-
return false;
}
-static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
BdrvChild *ignore_parent)
{
- return bdrv_drain_poll(bs, recursive, ignore_parent, false);
+ return bdrv_drain_poll(bs, ignore_parent, false);
}
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents,
- bool poll);
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents);
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+ bool ignore_bds_parents, bool poll);
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+ bool ignore_bds_parents);
static void bdrv_co_drain_bh_cb(void *opaque)
{
@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque)
aio_context_acquire(ctx);
bdrv_dec_in_flight(bs);
if (data->begin) {
- bdrv_do_drained_begin(bs, data->recursive, data->parent,
- data->ignore_bds_parents, data->poll);
+ bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
+ data->poll);
} else {
assert(!data->poll);
- bdrv_do_drained_end(bs, data->recursive, data->parent,
- data->ignore_bds_parents);
+ bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
}
aio_context_release(ctx);
} else {
@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
}
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
- bool begin, bool recursive,
+ bool begin,
BdrvChild *parent,
bool ignore_bds_parents,
bool poll)
@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
.bs = bs,
.done = false,
.begin = begin,
- .recursive = recursive,
.parent = parent,
.ignore_bds_parents = ignore_bds_parents,
.poll = poll,
@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
}
}
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents,
- bool poll)
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+ bool ignore_bds_parents, bool poll)
{
- BdrvChild *child, *next;
-
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
- poll);
+ bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
return;
}
bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
- if (recursive) {
- assert(!ignore_bds_parents);
- bs->recursive_quiesce_counter++;
- QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
- bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
- false);
- }
- }
-
/*
* Wait for drained requests to finish.
*
@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
*/
if (poll) {
assert(!ignore_bds_parents);
- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
+ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
}
}
void bdrv_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
- bdrv_do_drained_begin(bs, false, NULL, false, true);
-}
-
-void bdrv_subtree_drained_begin(BlockDriverState *bs)
-{
- IO_OR_GS_CODE();
- bdrv_do_drained_begin(bs, true, NULL, false, true);
+ bdrv_do_drained_begin(bs, NULL, false, true);
}
/**
* This function does not poll, nor must any of its recursively called
* functions.
*/
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool ignore_bds_parents)
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+ bool ignore_bds_parents)
{
- BdrvChild *child;
int old_quiesce_counter;
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
- false);
+ bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
return;
}
assert(bs->quiesce_counter > 0);
@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
if (old_quiesce_counter == 1) {
aio_enable_external(bdrv_get_aio_context(bs));
}
-
- if (recursive) {
- assert(!ignore_bds_parents);
- bs->recursive_quiesce_counter--;
- QLIST_FOREACH(child, &bs->children, next) {
- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
- }
- }
}
void bdrv_drained_end(BlockDriverState *bs)
{
IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, false, NULL, false);
-}
-
-void bdrv_subtree_drained_end(BlockDriverState *bs)
-{
- IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, true, NULL, false);
-}
-
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
-{
- int i;
- IO_OR_GS_CODE();
-
- for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
- bdrv_do_drained_begin(child->bs, true, child, false, true);
- }
-}
-
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
-{
- int i;
- IO_OR_GS_CODE();
-
- for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
- bdrv_do_drained_end(child->bs, true, child, false);
- }
+ bdrv_do_drained_end(bs, NULL, false);
}
void bdrv_drain(BlockDriverState *bs)
@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void)
while ((bs = bdrv_next_all_states(bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- result |= bdrv_drain_poll(bs, false, NULL, true);
+ result |= bdrv_drain_poll(bs, NULL, true);
aio_context_release(aio_context);
}
@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void)
GLOBAL_STATE_CODE();
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
+ bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
return;
}
@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_begin(bs, false, NULL, true, false);
+ bdrv_do_drained_begin(bs, NULL, true, false);
aio_context_release(aio_context);
}
@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
g_assert(!bs->refcnt);
while (bs->quiesce_counter) {
- bdrv_do_drained_end(bs, false, NULL, true);
+ bdrv_do_drained_end(bs, NULL, true);
}
}
@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_end(bs, false, NULL, true);
+ bdrv_do_drained_end(bs, NULL, true);
aio_context_release(aio_context);
}
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 054e964c9b..9c36a16a1f 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
/**
* bdrv_drain_poll:
*
- * Poll for pending requests in @bs, its parents (except for @ignore_parent),
- * and if @recursive is true its children as well (used for subtree drain).
+ * Poll for pending requests in @bs and its parents (except for @ignore_parent).
*
* If @ignore_bds_parents is true, parents that are BlockDriverStates must
* ignore the drain request because they will be drained separately (used for
@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
*
* This is part of bdrv_drained_begin.
*/
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
- BdrvChild *ignore_parent, bool ignore_bds_parents);
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
+ bool ignore_bds_parents);
/**
* bdrv_drained_begin:
@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs);
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
BdrvChild *parent, bool ignore_bds_parents);
-/**
- * Like bdrv_drained_begin, but recursively begins a quiesced section for
- * exclusive access to all child nodes as well.
- */
-void bdrv_subtree_drained_begin(BlockDriverState *bs);
-
/**
* bdrv_drained_end:
*
@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
*/
void bdrv_drained_end(BlockDriverState *bs);
-/**
- * End a quiescent section started by bdrv_subtree_drained_begin().
- */
-void bdrv_subtree_drained_end(BlockDriverState *bs);
-
#endif /* BLOCK_IO_H */
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 2b97576f6d..791dddfd7d 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -1184,7 +1184,6 @@ struct BlockDriverState {
/* Accessed with atomic ops. */
int quiesce_counter;
- int recursive_quiesce_counter;
unsigned int write_gen; /* Current data generation */
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index 4b0b3e17ef..8bc061ebb8 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs,
*/
void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
-
-/*
- * "I/O or GS" API functions. These functions can run without
- * the BQL, but only in one specific iothread/main loop.
- *
- * See include/block/block-io.h for more information about
- * the "I/O or GS" API.
- */
-
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
-
#endif /* BLOCK_INT_IO_H */
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 695519ee02..dda08de8db 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void))
enum drain_type {
BDRV_DRAIN_ALL,
BDRV_DRAIN,
- BDRV_SUBTREE_DRAIN,
DRAIN_TYPE_MAX,
};
@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
switch (drain_type) {
case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
case BDRV_DRAIN: bdrv_drained_begin(bs); break;
- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break;
default: g_assert_not_reached();
}
}
@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
switch (drain_type) {
case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
case BDRV_DRAIN: bdrv_drained_end(bs); break;
- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break;
default: g_assert_not_reached();
}
}
@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void)
test_drv_cb_common(BDRV_DRAIN, false);
}
-static void test_drv_cb_drain_subtree(void)
-{
- test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
-}
-
static void test_drv_cb_co_drain_all(void)
{
call_in_coroutine(test_drv_cb_drain_all);
@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void)
call_in_coroutine(test_drv_cb_drain);
}
-static void test_drv_cb_co_drain_subtree(void)
-{
- call_in_coroutine(test_drv_cb_drain_subtree);
-}
-
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
{
BlockBackend *blk;
@@ -332,11 +319,6 @@ static void test_quiesce_drain(void)
test_quiesce_common(BDRV_DRAIN, false);
}
-static void test_quiesce_drain_subtree(void)
-{
- test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
-}
-
static void test_quiesce_co_drain_all(void)
{
call_in_coroutine(test_quiesce_drain_all);
@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void)
call_in_coroutine(test_quiesce_drain);
}
-static void test_quiesce_co_drain_subtree(void)
-{
- call_in_coroutine(test_quiesce_drain_subtree);
-}
-
static void test_nested(void)
{
BlockBackend *blk;
@@ -402,158 +379,6 @@ static void test_nested(void)
blk_unref(blk);
}
-static void test_multiparent(void)
-{
- BlockBackend *blk_a, *blk_b;
- BlockDriverState *bs_a, *bs_b, *backing;
- BDRVTestState *a_s, *b_s, *backing_s;
-
- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
- &error_abort);
- a_s = bs_a->opaque;
- blk_insert_bs(blk_a, bs_a, &error_abort);
-
- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
- &error_abort);
- b_s = bs_b->opaque;
- blk_insert_bs(blk_b, bs_b, &error_abort);
-
- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
- backing_s = backing->opaque;
- bdrv_set_backing_hd(bs_a, backing, &error_abort);
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
- g_assert_cmpint(a_s->drain_count, ==, 0);
- g_assert_cmpint(b_s->drain_count, ==, 0);
- g_assert_cmpint(backing_s->drain_count, ==, 0);
-
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
- g_assert_cmpint(backing->quiesce_counter, ==, 1);
- g_assert_cmpint(a_s->drain_count, ==, 1);
- g_assert_cmpint(b_s->drain_count, ==, 1);
- g_assert_cmpint(backing_s->drain_count, ==, 1);
-
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
- g_assert_cmpint(backing->quiesce_counter, ==, 2);
- g_assert_cmpint(a_s->drain_count, ==, 2);
- g_assert_cmpint(b_s->drain_count, ==, 2);
- g_assert_cmpint(backing_s->drain_count, ==, 2);
-
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
- g_assert_cmpint(backing->quiesce_counter, ==, 1);
- g_assert_cmpint(a_s->drain_count, ==, 1);
- g_assert_cmpint(b_s->drain_count, ==, 1);
- g_assert_cmpint(backing_s->drain_count, ==, 1);
-
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
- g_assert_cmpint(a_s->drain_count, ==, 0);
- g_assert_cmpint(b_s->drain_count, ==, 0);
- g_assert_cmpint(backing_s->drain_count, ==, 0);
-
- bdrv_unref(backing);
- bdrv_unref(bs_a);
- bdrv_unref(bs_b);
- blk_unref(blk_a);
- blk_unref(blk_b);
-}
-
-static void test_graph_change_drain_subtree(void)
-{
- BlockBackend *blk_a, *blk_b;
- BlockDriverState *bs_a, *bs_b, *backing;
- BDRVTestState *a_s, *b_s, *backing_s;
-
- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
- &error_abort);
- a_s = bs_a->opaque;
- blk_insert_bs(blk_a, bs_a, &error_abort);
-
- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
- &error_abort);
- b_s = bs_b->opaque;
- blk_insert_bs(blk_b, bs_b, &error_abort);
-
- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
- backing_s = backing->opaque;
- bdrv_set_backing_hd(bs_a, backing, &error_abort);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
- g_assert_cmpint(a_s->drain_count, ==, 0);
- g_assert_cmpint(b_s->drain_count, ==, 0);
- g_assert_cmpint(backing_s->drain_count, ==, 0);
-
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
-
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
- g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
- g_assert_cmpint(backing->quiesce_counter, ==, 5);
- g_assert_cmpint(a_s->drain_count, ==, 5);
- g_assert_cmpint(b_s->drain_count, ==, 5);
- g_assert_cmpint(backing_s->drain_count, ==, 5);
-
- bdrv_set_backing_hd(bs_b, NULL, &error_abort);
- g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
- g_assert_cmpint(backing->quiesce_counter, ==, 3);
- g_assert_cmpint(a_s->drain_count, ==, 3);
- g_assert_cmpint(b_s->drain_count, ==, 2);
- g_assert_cmpint(backing_s->drain_count, ==, 3);
-
- bdrv_set_backing_hd(bs_b, backing, &error_abort);
- g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
- g_assert_cmpint(backing->quiesce_counter, ==, 5);
- g_assert_cmpint(a_s->drain_count, ==, 5);
- g_assert_cmpint(b_s->drain_count, ==, 5);
- g_assert_cmpint(backing_s->drain_count, ==, 5);
-
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
-
- g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
- g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
- g_assert_cmpint(backing->quiesce_counter, ==, 0);
- g_assert_cmpint(a_s->drain_count, ==, 0);
- g_assert_cmpint(b_s->drain_count, ==, 0);
- g_assert_cmpint(backing_s->drain_count, ==, 0);
-
- bdrv_unref(backing);
- bdrv_unref(bs_a);
- bdrv_unref(bs_b);
- blk_unref(blk_a);
- blk_unref(blk_b);
-}
-
static void test_graph_change_drain_all(void)
{
BlockBackend *blk_a, *blk_b;
@@ -773,12 +598,6 @@ static void test_iothread_drain(void)
test_iothread_common(BDRV_DRAIN, 1);
}
-static void test_iothread_drain_subtree(void)
-{
- test_iothread_common(BDRV_SUBTREE_DRAIN, 0);
- test_iothread_common(BDRV_SUBTREE_DRAIN, 1);
-}
-
typedef struct TestBlockJob {
BlockJob common;
@@ -863,7 +682,6 @@ enum test_job_result {
enum test_job_drain_node {
TEST_JOB_DRAIN_SRC,
TEST_JOB_DRAIN_SRC_CHILD,
- TEST_JOB_DRAIN_SRC_PARENT,
};
static void test_blockjob_common_drain_node(enum drain_type drain_type,
@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
case TEST_JOB_DRAIN_SRC_CHILD:
drain_bs = src_backing;
break;
- case TEST_JOB_DRAIN_SRC_PARENT:
- drain_bs = src_overlay;
- break;
default:
g_assert_not_reached();
}
@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
TEST_JOB_DRAIN_SRC);
test_blockjob_common_drain_node(drain_type, use_iothread, result,
TEST_JOB_DRAIN_SRC_CHILD);
- if (drain_type == BDRV_SUBTREE_DRAIN) {
- test_blockjob_common_drain_node(drain_type, use_iothread, result,
- TEST_JOB_DRAIN_SRC_PARENT);
- }
}
static void test_blockjob_drain_all(void)
@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void)
test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
}
-static void test_blockjob_drain_subtree(void)
-{
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
-}
-
static void test_blockjob_error_drain_all(void)
{
test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void)
test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
}
-static void test_blockjob_error_drain_subtree(void)
-{
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
-}
-
static void test_blockjob_iothread_drain_all(void)
{
test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void)
test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
}
-static void test_blockjob_iothread_drain_subtree(void)
-{
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
-}
-
static void test_blockjob_iothread_error_drain_all(void)
{
test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void)
test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
}
-static void test_blockjob_iothread_error_drain_subtree(void)
-{
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
-}
-
typedef struct BDRVTestTopState {
BdrvChild *wait_child;
@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
bdrv_drain(child_bs);
bdrv_unref(child_bs);
break;
- case BDRV_SUBTREE_DRAIN:
- /* Would have to ref/unref bs here for !detach_instead_of_delete, but
- * then the whole test becomes pointless because the graph changes
- * don't occur during the drain any more. */
- assert(detach_instead_of_delete);
- bdrv_subtree_drained_begin(bs);
- bdrv_subtree_drained_end(bs);
- break;
case BDRV_DRAIN_ALL:
bdrv_drain_all_begin();
bdrv_drain_all_end();
@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void)
do_test_delete_by_drain(true, BDRV_DRAIN);
}
-static void test_detach_by_drain_subtree(void)
-{
- do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN);
-}
-
struct detach_by_parent_data {
BlockDriverState *parent_b;
@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb)
g_assert(acb != NULL);
/* Drain and check the expected result */
- bdrv_subtree_drained_begin(parent_b);
+ bdrv_drained_begin(parent_b);
+ bdrv_drained_begin(a);
+ bdrv_drained_begin(b);
+ bdrv_drained_begin(c);
g_assert(detach_by_parent_data.child_c != NULL);
@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb)
g_assert(QLIST_NEXT(child_a, next) == NULL);
g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
- g_assert_cmpint(parent_b->quiesce_counter, ==, 1);
+ g_assert_cmpint(parent_b->quiesce_counter, ==, 3);
g_assert_cmpint(a->quiesce_counter, ==, 1);
- g_assert_cmpint(b->quiesce_counter, ==, 0);
+ g_assert_cmpint(b->quiesce_counter, ==, 1);
g_assert_cmpint(c->quiesce_counter, ==, 1);
- bdrv_subtree_drained_end(parent_b);
+ bdrv_drained_end(parent_b);
+ bdrv_drained_end(a);
+ bdrv_drained_end(b);
+ bdrv_drained_end(c);
bdrv_unref(parent_b);
blk_unref(blk);
@@ -2202,70 +1984,47 @@ int main(int argc, char **argv)
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
- g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
- test_drv_cb_drain_subtree);
g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
test_drv_cb_co_drain_all);
g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
- g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
- test_drv_cb_co_drain_subtree);
-
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
- g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
- test_quiesce_drain_subtree);
g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
test_quiesce_co_drain_all);
g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
- g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
- test_quiesce_co_drain_subtree);
g_test_add_func("/bdrv-drain/nested", test_nested);
- g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
- g_test_add_func("/bdrv-drain/graph-change/drain_subtree",
- test_graph_change_drain_subtree);
g_test_add_func("/bdrv-drain/graph-change/drain_all",
test_graph_change_drain_all);
g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
- g_test_add_func("/bdrv-drain/iothread/drain_subtree",
- test_iothread_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
- g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
- test_blockjob_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
test_blockjob_error_drain_all);
g_test_add_func("/bdrv-drain/blockjob/error/drain",
test_blockjob_error_drain);
- g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
- test_blockjob_error_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
test_blockjob_iothread_drain_all);
g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
test_blockjob_iothread_drain);
- g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
- test_blockjob_iothread_drain_subtree);
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
test_blockjob_iothread_error_drain_all);
g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
test_blockjob_iothread_error_drain);
- g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
- test_blockjob_iothread_error_drain_subtree);
g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
- g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb);
--
2.31.1

View File

@ -1,302 +0,0 @@
From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:40:58 +0100
Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to
non-coroutine_fn
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s)
Polling during bdrv_drained_end() can be problematic (and in the future,
we may get cases for bdrv_drained_begin() where polling is forbidden,
and we don't care about already in-flight requests, but just want to
prevent new requests from arriving).
The .bdrv_drained_begin/end callbacks running in a coroutine is the only
reason why we have to do this polling, so make them non-coroutine
callbacks again. None of the callers actually yield any more.
This means that bdrv_drained_end() effectively doesn't poll any more,
even if AIO_WAIT_WHILE() loops are still there (their condition is false
from the beginning). This is generally not a problem, but in
test-bdrv-drain, some additional explicit aio_poll() calls need to be
added because the test case wants to verify the final state after BHs
have executed.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-4-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 4 +--
block/io.c | 49 +++++---------------------------
block/qed.c | 6 ++--
block/throttle.c | 8 +++---
include/block/block_int-common.h | 10 ++++---
tests/unit/test-bdrv-drain.c | 18 ++++++------
6 files changed, 32 insertions(+), 63 deletions(-)
diff --git a/block.c b/block.c
index ec184150a2..16a62a329c 100644
--- a/block.c
+++ b/block.c
@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
assert(is_power_of_2(bs->bl.request_alignment));
for (i = 0; i < bs->quiesce_counter; i++) {
- if (drv->bdrv_co_drain_begin) {
- drv->bdrv_co_drain_begin(bs);
+ if (drv->bdrv_drain_begin) {
+ drv->bdrv_drain_begin(bs);
}
}
diff --git a/block/io.c b/block/io.c
index b9424024f9..c2ed4b2af9 100644
--- a/block/io.c
+++ b/block/io.c
@@ -252,55 +252,20 @@ typedef struct {
int *drained_end_counter;
} BdrvCoDrainData;
-static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
-{
- BdrvCoDrainData *data = opaque;
- BlockDriverState *bs = data->bs;
-
- if (data->begin) {
- bs->drv->bdrv_co_drain_begin(bs);
- } else {
- bs->drv->bdrv_co_drain_end(bs);
- }
-
- /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */
- qatomic_mb_set(&data->done, true);
- if (!data->begin) {
- qatomic_dec(data->drained_end_counter);
- }
- bdrv_dec_in_flight(bs);
-
- g_free(data);
-}
-
-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
+/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
int *drained_end_counter)
{
- BdrvCoDrainData *data;
-
- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
- (!begin && !bs->drv->bdrv_co_drain_end)) {
+ if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
+ (!begin && !bs->drv->bdrv_drain_end)) {
return;
}
- data = g_new(BdrvCoDrainData, 1);
- *data = (BdrvCoDrainData) {
- .bs = bs,
- .done = false,
- .begin = begin,
- .drained_end_counter = drained_end_counter,
- };
-
- if (!begin) {
- qatomic_inc(drained_end_counter);
+ if (begin) {
+ bs->drv->bdrv_drain_begin(bs);
+ } else {
+ bs->drv->bdrv_drain_end(bs);
}
-
- /* Make sure the driver callback completes during the polling phase for
- * drain_begin. */
- bdrv_inc_in_flight(bs);
- data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
- aio_co_schedule(bdrv_get_aio_context(bs), data->co);
}
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
diff --git a/block/qed.c b/block/qed.c
index 013f826c44..c2691a85b1 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s)
assert(!s->allocating_write_reqs_plugged);
if (s->allocating_acb != NULL) {
/* Another allocating write came concurrently. This cannot happen
- * from bdrv_qed_co_drain_begin, but it can happen when the timer runs.
+ * from bdrv_qed_drain_begin, but it can happen when the timer runs.
*/
qemu_co_mutex_unlock(&s->table_lock);
return false;
@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
}
}
-static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
+static void bdrv_qed_drain_begin(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = {
.bdrv_co_check = bdrv_qed_co_check,
.bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
.bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
- .bdrv_co_drain_begin = bdrv_qed_co_drain_begin,
+ .bdrv_drain_begin = bdrv_qed_drain_begin,
};
static void bdrv_qed_init(void)
diff --git a/block/throttle.c b/block/throttle.c
index 131eba3ab4..88851c84f4 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state)
reopen_state->opaque = NULL;
}
-static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
+static void throttle_drain_begin(BlockDriverState *bs)
{
ThrottleGroupMember *tgm = bs->opaque;
if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
}
}
-static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs)
+static void throttle_drain_end(BlockDriverState *bs)
{
ThrottleGroupMember *tgm = bs->opaque;
assert(tgm->io_limits_disabled);
@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = {
.bdrv_reopen_commit = throttle_reopen_commit,
.bdrv_reopen_abort = throttle_reopen_abort,
- .bdrv_co_drain_begin = throttle_co_drain_begin,
- .bdrv_co_drain_end = throttle_co_drain_end,
+ .bdrv_drain_begin = throttle_drain_begin,
+ .bdrv_drain_end = throttle_drain_end,
.is_filter = true,
.strong_runtime_opts = throttle_strong_runtime_opts,
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 31ae91e56e..40d646d1ed 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -735,17 +735,19 @@ struct BlockDriver {
void (*bdrv_io_unplug)(BlockDriverState *bs);
/**
- * bdrv_co_drain_begin is called if implemented in the beginning of a
+ * bdrv_drain_begin is called if implemented in the beginning of a
* drain operation to drain and stop any internal sources of requests in
* the driver.
- * bdrv_co_drain_end is called if implemented at the end of the drain.
+ * bdrv_drain_end is called if implemented at the end of the drain.
*
* They should be used by the driver to e.g. manage scheduled I/O
* requests, or toggle an internal state. After the end of the drain new
* requests will continue normally.
+ *
+ * Implementations of both functions must not call aio_poll().
*/
- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
+ void (*bdrv_drain_begin)(BlockDriverState *bs);
+ void (*bdrv_drain_end)(BlockDriverState *bs);
bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 24f34e24ad..695519ee02 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque)
bdrv_dec_in_flight(bs);
}
-static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
+static void bdrv_test_drain_begin(BlockDriverState *bs)
{
BDRVTestState *s = bs->opaque;
s->drain_count++;
@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
}
}
-static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
+static void bdrv_test_drain_end(BlockDriverState *bs)
{
BDRVTestState *s = bs->opaque;
s->drain_count--;
@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = {
.bdrv_close = bdrv_test_close,
.bdrv_co_preadv = bdrv_test_co_preadv,
- .bdrv_co_drain_begin = bdrv_test_co_drain_begin,
- .bdrv_co_drain_end = bdrv_test_co_drain_end,
+ .bdrv_drain_begin = bdrv_test_drain_begin,
+ .bdrv_drain_end = bdrv_test_drain_end,
.bdrv_child_perm = bdrv_default_perms,
@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void)
bdrv_drained_begin(bs_child);
g_assert(!job_has_completed);
bdrv_drained_end(bs_child);
+ aio_poll(qemu_get_aio_context(), false);
g_assert(job_has_completed);
bdrv_unref(bs_parents[0]);
@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void)
g_assert(!job_has_completed);
ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
+ aio_poll(qemu_get_aio_context(), false);
g_assert(ret == 0);
g_assert(job_has_completed);
@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque)
* .was_drained.
* Increment .drain_count.
*/
-static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
+static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
{
BDRVReplaceTestState *s = bs->opaque;
@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque)
* If .drain_count reaches 0 and the node has a backing file, issue a
* read request.
*/
-static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
+static void bdrv_replace_test_drain_end(BlockDriverState *bs)
{
BDRVReplaceTestState *s = bs->opaque;
@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = {
.bdrv_close = bdrv_replace_test_close,
.bdrv_co_preadv = bdrv_replace_test_co_preadv,
- .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin,
- .bdrv_co_drain_end = bdrv_replace_test_co_drain_end,
+ .bdrv_drain_begin = bdrv_replace_test_drain_begin,
+ .bdrv_drain_end = bdrv_replace_test_drain_end,
.bdrv_child_perm = bdrv_default_perms,
};
--
2.31.1

View File

@ -1,246 +0,0 @@
From 54e290df4bc1c9e83be7357caed6a2b1ba4f21f0 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:56 +0200
Subject: [PATCH 09/20] block: Split BlockNodeInfo off of ImageInfo
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [4/12] fc8d69d549bb9a929db218b91697ee3ae95c1ff6 (hreitz/qemu-kvm-c-9-s)
ImageInfo sometimes contains flat information, and sometimes it does
not. Split off a BlockNodeInfo struct, which only contains information
about a single node and has no link to the backing image.
We do this so we can extend BlockNodeInfo to a BlockGraphInfo struct,
which has links to all child nodes, not just the backing node. It would
be strange to base BlockGraphInfo on ImageInfo, because then this
extended struct would have two links to the backing node (one in
BlockGraphInfo as one of all the child links, and one in ImageInfo).
Furthermore, it is quite common to ignore the backing-image field
altogether: bdrv_query_image_info() does not set it, and
bdrv_image_info_dump() does not evaluate it. That signals that we
should have different structs for describing a single node and one that
has a link to the backing image.
Still, bdrv_query_image_info() and bdrv_image_info_dump() are not
changed too much in this patch. Follow-up patches will handle them.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-5-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit a2085f8909377b6df738f6c3f7ee6db4d16da8f7)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/qapi.c | 86 ++++++++++++++++++++++++++++++++------------
include/block/qapi.h | 3 ++
qapi/block-core.json | 24 +++++++++----
3 files changed, 85 insertions(+), 28 deletions(-)
diff --git a/block/qapi.c b/block/qapi.c
index 51202b470a..e5022b4481 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -241,30 +241,18 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs,
}
/**
- * bdrv_query_image_info:
- * @bs: block device to examine
- * @p_info: location to store image information
- * @errp: location to store error information
- *
- * Store "flat" image information in @p_info.
- *
- * "Flat" means it does *not* query backing image information,
- * i.e. (*pinfo)->has_backing_image will be set to false and
- * (*pinfo)->backing_image to NULL even when the image does in fact have
- * a backing image.
- *
- * @p_info will be set only on success. On error, store error in @errp.
+ * Helper function for other query info functions. Store information about @bs
+ * in @info, setting @errp on error.
*/
-void bdrv_query_image_info(BlockDriverState *bs,
- ImageInfo **p_info,
- Error **errp)
+static void bdrv_do_query_node_info(BlockDriverState *bs,
+ BlockNodeInfo *info,
+ Error **errp)
{
int64_t size;
const char *backing_filename;
BlockDriverInfo bdi;
int ret;
Error *err = NULL;
- ImageInfo *info;
aio_context_acquire(bdrv_get_aio_context(bs));
@@ -277,7 +265,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
bdrv_refresh_filename(bs);
- info = g_new0(ImageInfo, 1);
info->filename = g_strdup(bs->filename);
info->format = g_strdup(bdrv_get_format_name(bs));
info->virtual_size = size;
@@ -298,7 +285,6 @@ void bdrv_query_image_info(BlockDriverState *bs,
info->format_specific = bdrv_get_specific_info(bs, &err);
if (err) {
error_propagate(errp, err);
- qapi_free_ImageInfo(info);
goto out;
}
info->has_format_specific = info->format_specific != NULL;
@@ -339,16 +325,72 @@ void bdrv_query_image_info(BlockDriverState *bs,
break;
default:
error_propagate(errp, err);
- qapi_free_ImageInfo(info);
goto out;
}
- *p_info = info;
-
out:
aio_context_release(bdrv_get_aio_context(bs));
}
+/**
+ * bdrv_query_block_node_info:
+ * @bs: block node to examine
+ * @p_info: location to store node information
+ * @errp: location to store error information
+ *
+ * Store image information about @bs in @p_info.
+ *
+ * @p_info will be set only on success. On error, store error in @errp.
+ */
+void bdrv_query_block_node_info(BlockDriverState *bs,
+ BlockNodeInfo **p_info,
+ Error **errp)
+{
+ BlockNodeInfo *info;
+ ERRP_GUARD();
+
+ info = g_new0(BlockNodeInfo, 1);
+ bdrv_do_query_node_info(bs, info, errp);
+ if (*errp) {
+ qapi_free_BlockNodeInfo(info);
+ return;
+ }
+
+ *p_info = info;
+}
+
+/**
+ * bdrv_query_image_info:
+ * @bs: block node to examine
+ * @p_info: location to store image information
+ * @errp: location to store error information
+ *
+ * Store "flat" image information in @p_info.
+ *
+ * "Flat" means it does *not* query backing image information,
+ * i.e. (*pinfo)->has_backing_image will be set to false and
+ * (*pinfo)->backing_image to NULL even when the image does in fact have
+ * a backing image.
+ *
+ * @p_info will be set only on success. On error, store error in @errp.
+ */
+void bdrv_query_image_info(BlockDriverState *bs,
+ ImageInfo **p_info,
+ Error **errp)
+{
+ ImageInfo *info;
+ ERRP_GUARD();
+
+ info = g_new0(ImageInfo, 1);
+ bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp);
+ if (*errp) {
+ qapi_free_ImageInfo(info);
+ return;
+ }
+
+ *p_info = info;
+}
+
/* @p_info will be set only on success. */
static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
Error **errp)
diff --git a/include/block/qapi.h b/include/block/qapi.h
index c09859ea78..c7de4e3fa9 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -35,6 +35,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
int bdrv_query_snapshot_info_list(BlockDriverState *bs,
SnapshotInfoList **p_list,
Error **errp);
+void bdrv_query_block_node_info(BlockDriverState *bs,
+ BlockNodeInfo **p_info,
+ Error **errp);
void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
Error **errp);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 4b9365167f..7720da0498 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -251,7 +251,7 @@
} }
##
-# @ImageInfo:
+# @BlockNodeInfo:
#
# Information about a QEMU image file
#
@@ -279,22 +279,34 @@
#
# @snapshots: list of VM snapshots
#
-# @backing-image: info of the backing image (since 1.6)
-#
# @format-specific: structure supplying additional format-specific
# information (since 1.7)
#
-# Since: 1.3
+# Since: 8.0
##
-{ 'struct': 'ImageInfo',
+{ 'struct': 'BlockNodeInfo',
'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
'*actual-size': 'int', 'virtual-size': 'int',
'*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool',
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
- '*backing-image': 'ImageInfo',
'*format-specific': 'ImageInfoSpecific' } }
+##
+# @ImageInfo:
+#
+# Information about a QEMU image file, and potentially its backing image
+#
+# @backing-image: info of the backing image
+#
+# Since: 1.3
+##
+{ 'struct': 'ImageInfo',
+ 'base': 'BlockNodeInfo',
+ 'data': {
+ '*backing-image': 'ImageInfo'
+ } }
+
##
# @ImageCheck:
#
--
2.31.1

View File

@ -1,70 +0,0 @@
From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vladimir.sementsov-ogievskiy@openvz.org>
Date: Mon, 7 Nov 2022 19:35:56 +0300
Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s)
Drop this simple wrapper used only in one place. We have too many graph
modifying functions even without it.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@openvz.org>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 15 +--------------
1 file changed, 1 insertion(+), 14 deletions(-)
diff --git a/block.c b/block.c
index a18f052374..ec184150a2 100644
--- a/block.c
+++ b/block.c
@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
static void bdrv_replace_child_noperm(BdrvChild *child,
BlockDriverState *new_bs);
static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
- Transaction *tran);
static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue,
@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
tran_add(tran, &bdrv_remove_child_drv, child);
}
-/*
- * A function to remove backing-chain child of @bs if exists: cow child for
- * format nodes (always .backing) and filter child for filters (may be .file or
- * .backing)
- */
-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
- Transaction *tran)
-{
- bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran);
-}
-
static int bdrv_replace_node_noperm(BlockDriverState *from,
BlockDriverState *to,
bool auto_skip, Transaction *tran,
@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from,
}
if (detach_subchain) {
- bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
+ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
}
found = g_hash_table_new(NULL, NULL);
--
2.31.1

View File

@ -1,145 +0,0 @@
From 4af86458d6bea2a6e15fd57d4d4bbe88e35f7e72 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:54 +0200
Subject: [PATCH 07/20] block/file: Add file-specific image info
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [2/12] d8cc351d6c16c41b2000e41dc555f13093a9edce (hreitz/qemu-kvm-c-9-s)
Add some (optional) information that the file driver can provide for
image files, namely the extent size hint.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-3-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 7f36a50ab4e7d39369cac67be4ba9d6ee4081dc0)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/file-posix.c | 30 ++++++++++++++++++++++++++++++
qapi/block-core.json | 26 ++++++++++++++++++++++++--
2 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index b9647c5ffc..df3da79aed 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -3095,6 +3095,34 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
+static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs,
+ Error **errp)
+{
+ ImageInfoSpecificFile *file_info = g_new0(ImageInfoSpecificFile, 1);
+ ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);
+
+ *spec_info = (ImageInfoSpecific){
+ .type = IMAGE_INFO_SPECIFIC_KIND_FILE,
+ .u.file.data = file_info,
+ };
+
+#ifdef FS_IOC_FSGETXATTR
+ {
+ BDRVRawState *s = bs->opaque;
+ struct fsxattr attr;
+ int ret;
+
+ ret = ioctl(s->fd, FS_IOC_FSGETXATTR, &attr);
+ if (!ret && attr.fsx_extsize != 0) {
+ file_info->has_extent_size_hint = true;
+ file_info->extent_size_hint = attr.fsx_extsize;
+ }
+ }
+#endif
+
+ return spec_info;
+}
+
static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
@@ -3328,6 +3356,7 @@ BlockDriver bdrv_file = {
.bdrv_co_truncate = raw_co_truncate,
.bdrv_getlength = raw_getlength,
.bdrv_get_info = raw_get_info,
+ .bdrv_get_specific_info = raw_get_specific_info,
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
.bdrv_get_specific_stats = raw_get_specific_stats,
@@ -3700,6 +3729,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_co_truncate = raw_co_truncate,
.bdrv_getlength = raw_getlength,
.bdrv_get_info = raw_get_info,
+ .bdrv_get_specific_info = raw_get_specific_info,
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
.bdrv_get_specific_stats = hdev_get_specific_stats,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 95ac4fa634..f5d822cbd6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -139,16 +139,29 @@
'*encryption-format': 'RbdImageEncryptionFormat'
} }
+##
+# @ImageInfoSpecificFile:
+#
+# @extent-size-hint: Extent size hint (if available)
+#
+# Since: 8.0
+##
+{ 'struct': 'ImageInfoSpecificFile',
+ 'data': {
+ '*extent-size-hint': 'size'
+ } }
+
##
# @ImageInfoSpecificKind:
#
# @luks: Since 2.7
# @rbd: Since 6.1
+# @file: Since 8.0
#
# Since: 1.7
##
{ 'enum': 'ImageInfoSpecificKind',
- 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd' ] }
+ 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd', 'file' ] }
##
# @ImageInfoSpecificQCow2Wrapper:
@@ -185,6 +198,14 @@
{ 'struct': 'ImageInfoSpecificRbdWrapper',
'data': { 'data': 'ImageInfoSpecificRbd' } }
+##
+# @ImageInfoSpecificFileWrapper:
+#
+# Since: 8.0
+##
+{ 'struct': 'ImageInfoSpecificFileWrapper',
+ 'data': { 'data': 'ImageInfoSpecificFile' } }
+
##
# @ImageInfoSpecific:
#
@@ -199,7 +220,8 @@
'qcow2': 'ImageInfoSpecificQCow2Wrapper',
'vmdk': 'ImageInfoSpecificVmdkWrapper',
'luks': 'ImageInfoSpecificLUKSWrapper',
- 'rbd': 'ImageInfoSpecificRbdWrapper'
+ 'rbd': 'ImageInfoSpecificRbdWrapper',
+ 'file': 'ImageInfoSpecificFileWrapper'
} }
##
--
2.31.1

View File

@ -1,206 +0,0 @@
From c8c282c2e1d74cfc5de6527f7e20dfc3e76b67ac Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:27:00 +0200
Subject: [PATCH 13/20] block/qapi: Add indentation to bdrv_node_info_dump()
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [8/12] d3a697e81ab9828457198075e5815a592363c725 (hreitz/qemu-kvm-c-9-s)
In order to let qemu-img info present a block graph, add a parameter to
bdrv_node_info_dump() and bdrv_image_info_specific_dump() so that the
information of nodes below the root level can be given an indentation.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-9-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 76c9e9750d1bd580e8ed4465f6be3a986434e7c3)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/monitor/block-hmp-cmds.c | 2 +-
block/qapi.c | 47 +++++++++++++++++++---------------
include/block/qapi.h | 5 ++--
qemu-img.c | 2 +-
qemu-io-cmds.c | 3 ++-
5 files changed, 34 insertions(+), 25 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index aa37faa601..72824d4e2e 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
monitor_printf(mon, "\nImages:\n");
image_info = inserted->image;
while (1) {
- bdrv_node_info_dump(qapi_ImageInfo_base(image_info));
+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0);
if (image_info->has_backing_image) {
image_info = image_info->backing_image;
} else {
diff --git a/block/qapi.c b/block/qapi.c
index f208c21ccf..3e35603f0c 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -915,7 +915,8 @@ static bool qobject_is_empty_dump(const QObject *obj)
* prepending an optional prefix if the dump is not empty.
*/
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
- const char *prefix)
+ const char *prefix,
+ int indentation)
{
QObject *obj, *data;
Visitor *v = qobject_output_visitor_new(&obj);
@@ -925,48 +926,51 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
data = qdict_get(qobject_to(QDict, obj), "data");
if (!qobject_is_empty_dump(data)) {
if (prefix) {
- qemu_printf("%s", prefix);
+ qemu_printf("%*s%s", indentation * 4, "", prefix);
}
- dump_qobject(1, data);
+ dump_qobject(indentation + 1, data);
}
qobject_unref(obj);
visit_free(v);
}
-void bdrv_node_info_dump(BlockNodeInfo *info)
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation)
{
char *size_buf, *dsize_buf;
+ g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, "");
+
if (!info->has_actual_size) {
dsize_buf = g_strdup("unavailable");
} else {
dsize_buf = size_to_str(info->actual_size);
}
size_buf = size_to_str(info->virtual_size);
- qemu_printf("image: %s\n"
- "file format: %s\n"
- "virtual size: %s (%" PRId64 " bytes)\n"
- "disk size: %s\n",
- info->filename, info->format, size_buf,
- info->virtual_size,
- dsize_buf);
+ qemu_printf("%simage: %s\n"
+ "%sfile format: %s\n"
+ "%svirtual size: %s (%" PRId64 " bytes)\n"
+ "%sdisk size: %s\n",
+ ind_s, info->filename,
+ ind_s, info->format,
+ ind_s, size_buf, info->virtual_size,
+ ind_s, dsize_buf);
g_free(size_buf);
g_free(dsize_buf);
if (info->has_encrypted && info->encrypted) {
- qemu_printf("encrypted: yes\n");
+ qemu_printf("%sencrypted: yes\n", ind_s);
}
if (info->has_cluster_size) {
- qemu_printf("cluster_size: %" PRId64 "\n",
- info->cluster_size);
+ qemu_printf("%scluster_size: %" PRId64 "\n",
+ ind_s, info->cluster_size);
}
if (info->has_dirty_flag && info->dirty_flag) {
- qemu_printf("cleanly shut down: no\n");
+ qemu_printf("%scleanly shut down: no\n", ind_s);
}
if (info->has_backing_filename) {
- qemu_printf("backing file: %s", info->backing_filename);
+ qemu_printf("%sbacking file: %s", ind_s, info->backing_filename);
if (!info->has_full_backing_filename) {
qemu_printf(" (cannot determine actual path)");
} else if (strcmp(info->backing_filename,
@@ -975,15 +979,16 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
}
qemu_printf("\n");
if (info->has_backing_filename_format) {
- qemu_printf("backing file format: %s\n",
- info->backing_filename_format);
+ qemu_printf("%sbacking file format: %s\n",
+ ind_s, info->backing_filename_format);
}
}
if (info->has_snapshots) {
SnapshotInfoList *elem;
- qemu_printf("Snapshot list:\n");
+ qemu_printf("%sSnapshot list:\n", ind_s);
+ qemu_printf("%s", ind_s);
bdrv_snapshot_dump(NULL);
qemu_printf("\n");
@@ -1003,6 +1008,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
pstrcpy(sn.name, sizeof(sn.name), elem->value->name);
+ qemu_printf("%s", ind_s);
bdrv_snapshot_dump(&sn);
qemu_printf("\n");
}
@@ -1010,6 +1016,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info)
if (info->has_format_specific) {
bdrv_image_info_specific_dump(info->format_specific,
- "Format specific information:\n");
+ "Format specific information:\n",
+ indentation);
}
}
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 196436020e..38855f2ae9 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -49,6 +49,7 @@ void bdrv_query_block_graph_info(BlockDriverState *bs,
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
- const char *prefix);
-void bdrv_node_info_dump(BlockNodeInfo *info);
+ const char *prefix,
+ int indentation);
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation);
#endif
diff --git a/qemu-img.c b/qemu-img.c
index 3b2ca3bbcb..30b4ea58bb 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list)
}
delim = true;
- bdrv_node_info_dump(elem->value);
+ bdrv_node_info_dump(elem->value, 0);
}
}
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index f4a374528e..fdcb89211b 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -1826,7 +1826,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv)
}
if (spec_info) {
bdrv_image_info_specific_dump(spec_info,
- "Format specific information:\n");
+ "Format specific information:\n",
+ 0);
qapi_free_ImageInfoSpecific(spec_info);
}
--
2.31.1

View File

@ -1,155 +0,0 @@
From 0044e3848b02ef6edba5961d1f4b6297d137d207 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:59 +0200
Subject: [PATCH 12/20] block/qapi: Introduce BlockGraphInfo
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [7/12] de47bac372cd552b812c774a2f35f95923af74ff (hreitz/qemu-kvm-c-9-s)
Introduce a new QAPI type BlockGraphInfo and an associated
bdrv_query_block_graph_info() function that recursively gathers
BlockNodeInfo objects through a block graph.
A follow-up patch is going to make "qemu-img info" use this to print
information about all nodes that are (usually implicitly) opened for a
given image file.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-8-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 6cab33997b91eb86e82a6a2ae58a24f835249d4a)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/qapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++
include/block/qapi.h | 3 +++
qapi/block-core.json | 35 ++++++++++++++++++++++++++++++++
3 files changed, 86 insertions(+)
diff --git a/block/qapi.c b/block/qapi.c
index 5d0a8d2ce3..f208c21ccf 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -411,6 +411,54 @@ fail:
qapi_free_ImageInfo(info);
}
+/**
+ * bdrv_query_block_graph_info:
+ * @bs: root node to start from
+ * @p_info: location to store image information
+ * @errp: location to store error information
+ *
+ * Store image information about the graph starting from @bs in @p_info.
+ *
+ * @p_info will be set only on success. On error, store error in @errp.
+ */
+void bdrv_query_block_graph_info(BlockDriverState *bs,
+ BlockGraphInfo **p_info,
+ Error **errp)
+{
+ BlockGraphInfo *info;
+ BlockChildInfoList **children_list_tail;
+ BdrvChild *c;
+ ERRP_GUARD();
+
+ info = g_new0(BlockGraphInfo, 1);
+ bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp);
+ if (*errp) {
+ goto fail;
+ }
+
+ children_list_tail = &info->children;
+
+ QLIST_FOREACH(c, &bs->children, next) {
+ BlockChildInfo *c_info;
+
+ c_info = g_new0(BlockChildInfo, 1);
+ QAPI_LIST_APPEND(children_list_tail, c_info);
+
+ c_info->name = g_strdup(c->name);
+ bdrv_query_block_graph_info(c->bs, &c_info->info, errp);
+ if (*errp) {
+ goto fail;
+ }
+ }
+
+ *p_info = info;
+ return;
+
+fail:
+ assert(*errp != NULL);
+ qapi_free_BlockGraphInfo(info);
+}
+
/* @p_info will be set only on success. */
static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
Error **errp)
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 2174bf8fa2..196436020e 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -43,6 +43,9 @@ void bdrv_query_image_info(BlockDriverState *bs,
bool flat,
bool skip_implicit_filters,
Error **errp);
+void bdrv_query_block_graph_info(BlockDriverState *bs,
+ BlockGraphInfo **p_info,
+ Error **errp);
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 4cf2deeb6c..d703e0fb16 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -307,6 +307,41 @@
'*backing-image': 'ImageInfo'
} }
+##
+# @BlockChildInfo:
+#
+# Information about all nodes in the block graph starting at some node,
+# annotated with information about that node in relation to its parent.
+#
+# @name: Child name of the root node in the BlockGraphInfo struct, in its role
+# as the child of some undescribed parent node
+#
+# @info: Block graph information starting at this node
+#
+# Since: 8.0
+##
+{ 'struct': 'BlockChildInfo',
+ 'data': {
+ 'name': 'str',
+ 'info': 'BlockGraphInfo'
+ } }
+
+##
+# @BlockGraphInfo:
+#
+# Information about all nodes in a block (sub)graph in the form of BlockNodeInfo
+# data.
+# The base BlockNodeInfo struct contains the information for the (sub)graph's
+# root node.
+#
+# @children: Array of links to this node's child nodes' information
+#
+# Since: 8.0
+##
+{ 'struct': 'BlockGraphInfo',
+ 'base': 'BlockNodeInfo',
+ 'data': { 'children': ['BlockChildInfo'] } }
+
##
# @ImageCheck:
#
--
2.31.1

View File

@ -1,197 +0,0 @@
From ae2c3df00d673d436fe4d8ec9103a3b76d7e6233 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:58 +0200
Subject: [PATCH 11/20] block/qapi: Let bdrv_query_image_info() recurse
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [6/12] 451a83fd682cd6dd6026c22974d18c2f12ee06e3 (hreitz/qemu-kvm-c-9-s)
There is no real reason why bdrv_query_image_info() should generally not
recurse. The ImageInfo struct has a pointer to the backing image, so it
should generally be filled, unless the caller explicitly opts out.
This moves the recursing code from bdrv_block_device_info() into
bdrv_query_image_info().
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-7-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 5d8813593f3f673fc96eed199beb35690cc46f58)
Conflicts:
block/qapi.c: Conflicts with
54fde4ff0621c22b15cbaaa3c74301cc0dbd1c9e ("qapi block: Elide
redundant has_FOO in generated C"), which dropped
`has_backing_image`. Without that commit (and 44ea9d9be before it),
we still need to set `has_backing_image` in
`bdrv_query_image_info()`.
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/qapi.c | 94 +++++++++++++++++++++++++++-----------------
include/block/qapi.h | 2 +
2 files changed, 59 insertions(+), 37 deletions(-)
diff --git a/block/qapi.c b/block/qapi.c
index ad88bf9b38..5d0a8d2ce3 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -47,8 +47,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
Error **errp)
{
ImageInfo **p_image_info;
+ ImageInfo *backing_info;
BlockDriverState *bs0, *backing;
BlockDeviceInfo *info;
+ ERRP_GUARD();
if (!bs->drv) {
error_setg(errp, "Block device %s is ejected", bs->node_name);
@@ -149,38 +151,21 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
bs0 = bs;
p_image_info = &info->image;
info->backing_file_depth = 0;
- while (1) {
- Error *local_err = NULL;
- bdrv_query_image_info(bs0, p_image_info, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- qapi_free_BlockDeviceInfo(info);
- return NULL;
- }
-
- /* stop gathering data for flat output */
- if (flat) {
- break;
- }
- if (bs0->drv && bdrv_filter_or_cow_child(bs0)) {
- /*
- * Put any filtered child here (for backwards compatibility to when
- * we put bs0->backing here, which might be any filtered child).
- */
- info->backing_file_depth++;
- bs0 = bdrv_filter_or_cow_bs(bs0);
- (*p_image_info)->has_backing_image = true;
- p_image_info = &((*p_image_info)->backing_image);
- } else {
- break;
- }
+ /*
+ * Skip automatically inserted nodes that the user isn't aware of for
+ * query-block (blk != NULL), but not for query-named-block-nodes
+ */
+ bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp);
+ if (*errp) {
+ qapi_free_BlockDeviceInfo(info);
+ return NULL;
+ }
- /* Skip automatically inserted nodes that the user isn't aware of for
- * query-block (blk != NULL), but not for query-named-block-nodes */
- if (blk) {
- bs0 = bdrv_skip_implicit_filters(bs0);
- }
+ backing_info = info->image->backing_image;
+ while (backing_info) {
+ info->backing_file_depth++;
+ backing_info = backing_info->backing_image;
}
return info;
@@ -363,19 +348,28 @@ void bdrv_query_block_node_info(BlockDriverState *bs,
* bdrv_query_image_info:
* @bs: block node to examine
* @p_info: location to store image information
+ * @flat: skip backing node information
+ * @skip_implicit_filters: skip implicit filters in the backing chain
* @errp: location to store error information
*
- * Store "flat" image information in @p_info.
+ * Store image information in @p_info, potentially recursively covering the
+ * backing chain.
*
- * "Flat" means it does *not* query backing image information,
- * i.e. (*pinfo)->has_backing_image will be set to false and
- * (*pinfo)->backing_image to NULL even when the image does in fact have
- * a backing image.
+ * If @flat is true, do not query backing image information, i.e.
+ * (*p_info)->has_backing_image will be set to false and
+ * (*p_info)->backing_image to NULL even when the image does in fact have a
+ * backing image.
+ *
+ * If @skip_implicit_filters is true, implicit filter nodes in the backing chain
+ * will be skipped when querying backing image information.
+ * (@skip_implicit_filters is ignored when @flat is true.)
*
* @p_info will be set only on success. On error, store error in @errp.
*/
void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
+ bool flat,
+ bool skip_implicit_filters,
Error **errp)
{
ImageInfo *info;
@@ -384,11 +378,37 @@ void bdrv_query_image_info(BlockDriverState *bs,
info = g_new0(ImageInfo, 1);
bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp);
if (*errp) {
- qapi_free_ImageInfo(info);
- return;
+ goto fail;
+ }
+
+ if (!flat) {
+ BlockDriverState *backing;
+
+ /*
+ * Use any filtered child here (for backwards compatibility to when
+ * we always took bs->backing, which might be any filtered child).
+ */
+ backing = bdrv_filter_or_cow_bs(bs);
+ if (skip_implicit_filters) {
+ backing = bdrv_skip_implicit_filters(backing);
+ }
+
+ if (backing) {
+ bdrv_query_image_info(backing, &info->backing_image, false,
+ skip_implicit_filters, errp);
+ if (*errp) {
+ goto fail;
+ }
+ info->has_backing_image = true;
+ }
}
*p_info = info;
+ return;
+
+fail:
+ assert(*errp);
+ qapi_free_ImageInfo(info);
}
/* @p_info will be set only on success. */
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 22198dcd0c..2174bf8fa2 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -40,6 +40,8 @@ void bdrv_query_block_node_info(BlockDriverState *bs,
Error **errp);
void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
+ bool flat,
+ bool skip_implicit_filters,
Error **errp);
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
--
2.31.1

View File

@ -1,99 +0,0 @@
From b952c8f1da6f8597736c0e040565830139369359 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 14 Feb 2023 18:16:21 +0100
Subject: [PATCH] block: temporarily hold the new AioContext of bs_top in
bdrv_append()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 153: block: temporarily hold the new AioContext of bs_top in bdrv_append()
RH-Bugzilla: 2168209
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [1/1] 5b190426d996e8c9f7a781bd97aee8d25756dbd3 (sgarzarella/qemu-kvm-c-9-s)
bdrv_append() is called with bs_top AioContext held, but
bdrv_attach_child_noperm() could change the AioContext of bs_top.
bdrv_replace_node_noperm() calls bdrv_drained_begin() starting from
commit 2398747128 ("block: Don't poll in bdrv_replace_child_noperm()").
bdrv_drained_begin() can call BDRV_POLL_WHILE that assumes the new lock
is taken, so let's temporarily hold the new AioContext to prevent QEMU
from failing in BDRV_POLL_WHILE when it tries to release the wrong
AioContext.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2168209
Reported-by: Aihua Liang <aliang@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-Id: <20230214171621.11574-1-sgarzare@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 60d90bf43c169b9d1dbcb17ed794b7b02c6862b1)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/block.c b/block.c
index 0d78711416..9e1dcb9e47 100644
--- a/block.c
+++ b/block.c
@@ -5275,6 +5275,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
* child.
*
* This function does not create any image files.
+ *
+ * The caller must hold the AioContext lock for @bs_top.
*/
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
Error **errp)
@@ -5282,11 +5284,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
int ret;
BdrvChild *child;
Transaction *tran = tran_new();
+ AioContext *old_context, *new_context = NULL;
GLOBAL_STATE_CODE();
assert(!bs_new->backing);
+ old_context = bdrv_get_aio_context(bs_top);
+
child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
&child_of_bds, bdrv_backing_role(bs_new),
tran, errp);
@@ -5295,6 +5300,19 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
goto out;
}
+ /*
+ * bdrv_attach_child_noperm could change the AioContext of bs_top.
+ * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily
+ * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE
+ * that assumes the new lock is taken.
+ */
+ new_context = bdrv_get_aio_context(bs_top);
+
+ if (old_context != new_context) {
+ aio_context_release(old_context);
+ aio_context_acquire(new_context);
+ }
+
ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
if (ret < 0) {
goto out;
@@ -5306,6 +5324,11 @@ out:
bdrv_refresh_limits(bs_top, NULL, NULL);
+ if (new_context && old_context != new_context) {
+ aio_context_release(new_context);
+ aio_context_acquire(old_context);
+ }
+
return ret;
}
--
2.31.1

View File

@ -1,140 +0,0 @@
From d8caed018afb0f60f449e971398d2a8d6c2992e7 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:55 +0200
Subject: [PATCH 08/20] block/vmdk: Change extent info type
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [3/12] efe50a2797c679ce6bb5faa423047461a34e6792 (hreitz/qemu-kvm-c-9-s)
VMDK's implementation of .bdrv_get_specific_info() returns information
about its extent files, ostensibly in the form of ImageInfo objects.
However, it does not get this information through
bdrv_query_image_info(), but fills only a select few fields with custom
information that does not always match the fields' purposes.
For example, @format, which is supposed to be a block driver name, is
filled with the extent type, e.g. SPARSE or FLAT.
In ImageInfo, @compressed shows whether the data that can be seen in the
image is stored in compressed form or not. For example, a compressed
qcow2 image will store compressed data in its data file, but when
accessing the qcow2 node, you will see normal data. This is not how
VMDK uses the @compressed field for its extent files: Instead, it
signifies whether accessing the extent file will yield compressed data
(which the VMDK driver then (de-)compresses).
Create a new structure to represent the extent information. This allows
us to clarify the fields' meanings, and it clearly shows that these are
not complete ImageInfo objects. (That is, if a user wants an extent
file's ImageInfo object, they will need to query it separately, and will
not get it from ImageInfoSpecificVmdk.extents.)
Note that this removes the last use of ['ImageInfo'] (i.e. an array of
ImageInfo objects), so the QAPI generator will no longer generate
ImageInfoList by default. However, we use it in qemu-img.c, so we need
to create a dummy object to force the generate to create that type,
similarly to DummyForceArrays in machine.json (introduced in commit
9f08c8ec73878122ad4b061ed334f0437afaaa32 ("qapi: Lazy creation of array
types")).
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-4-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 456e75171a85c19a5bfa202eefcbdc4ef1692f05)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/vmdk.c | 8 ++++----
qapi/block-core.json | 38 +++++++++++++++++++++++++++++++++++++-
2 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/block/vmdk.c b/block/vmdk.c
index 26376352b9..4435b9880b 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2901,12 +2901,12 @@ static int vmdk_has_zero_init(BlockDriverState *bs)
return 1;
}
-static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
+static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent)
{
- ImageInfo *info = g_new0(ImageInfo, 1);
+ VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1);
bdrv_refresh_filename(extent->file->bs);
- *info = (ImageInfo){
+ *info = (VmdkExtentInfo){
.filename = g_strdup(extent->file->bs->filename),
.format = g_strdup(extent->type),
.virtual_size = extent->sectors * BDRV_SECTOR_SIZE,
@@ -2985,7 +2985,7 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs,
int i;
BDRVVmdkState *s = bs->opaque;
ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
- ImageInfoList **tail;
+ VmdkExtentInfoList **tail;
*spec_info = (ImageInfoSpecific){
.type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index f5d822cbd6..4b9365167f 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -124,7 +124,33 @@
'create-type': 'str',
'cid': 'int',
'parent-cid': 'int',
- 'extents': ['ImageInfo']
+ 'extents': ['VmdkExtentInfo']
+ } }
+
+##
+# @VmdkExtentInfo:
+#
+# Information about a VMDK extent file
+#
+# @filename: Name of the extent file
+#
+# @format: Extent type (e.g. FLAT or SPARSE)
+#
+# @virtual-size: Number of bytes covered by this extent
+#
+# @cluster-size: Cluster size in bytes (for non-flat extents)
+#
+# @compressed: Whether this extent contains compressed data
+#
+# Since: 8.0
+##
+{ 'struct': 'VmdkExtentInfo',
+ 'data': {
+ 'filename': 'str',
+ 'format': 'str',
+ 'virtual-size': 'int',
+ '*cluster-size': 'int',
+ '*compressed': 'bool'
} }
##
@@ -5754,3 +5780,13 @@
'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
'returns': 'SnapshotInfo',
'allow-preconfig': true }
+
+##
+# @DummyBlockCoreForceArrays:
+#
+# Not used by QMP; hack to let us use ImageInfoList internally
+#
+# Since: 8.0
+##
+{ 'struct': 'DummyBlockCoreForceArrays',
+ 'data': { 'unused-image-info': ['ImageInfo'] } }
--
2.31.1

View File

@ -1,127 +0,0 @@
From b886411a682b56bfe674f0a35d40c67c8e9dc87a Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 21 Feb 2023 16:22:17 -0500
Subject: [PATCH 02/12] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel()
race
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread
RH-Bugzilla: 2155748
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
RH-Commit: [2/3] eeeea43c25d8f4fa84591b05547fb77e4058abff (stefanha/centos-stream-qemu-kvm)
dma_blk_cb() only takes the AioContext lock around ->io_func(). That
means the rest of dma_blk_cb() is not protected. In particular, the
DMAAIOCB field accesses happen outside the lock.
There is a race when the main loop thread holds the AioContext lock and
invokes scsi_device_purge_requests() -> bdrv_aio_cancel() ->
dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb
field determines how cancellation proceeds. If dma_aio_cancel() sees
dbs->acb == NULL while dma_blk_cb() is still running, the request can be
completed twice (-ECANCELED and the actual return value).
The following assertion can occur with virtio-scsi when an IOThread is
used:
../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed.
Fix the race by holding the AioContext across dma_blk_cb(). Now
dma_aio_cancel() under the AioContext lock will not see
inconsistent/intermediate states.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230221212218.1378734-3-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
hw/scsi/scsi-disk.c | 4 +---
softmmu/dma-helpers.c | 12 +++++++-----
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 5327f93f4c..b12d8b0816 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -354,13 +354,12 @@ done:
scsi_req_unref(&r->req);
}
+/* Called with AioContext lock held */
static void scsi_dma_complete(void *opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
-
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
@@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret)
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
}
scsi_dma_complete_noio(r, ret);
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
}
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
index 7820fec54c..2463964805 100644
--- a/softmmu/dma-helpers.c
+++ b/softmmu/dma-helpers.c
@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
static void dma_blk_cb(void *opaque, int ret)
{
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
+ AioContext *ctx = dbs->ctx;
dma_addr_t cur_addr, cur_len;
void *mem;
trace_dma_blk_cb(dbs, ret);
+ aio_context_acquire(ctx);
dbs->acb = NULL;
dbs->offset += dbs->iov.size;
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
dma_complete(dbs, ret);
- return;
+ goto out;
}
dma_blk_unmap(dbs);
@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret)
if (dbs->iov.size == 0) {
trace_dma_map_wait(dbs);
- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
+ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
cpu_register_map_client(dbs->bh);
- return;
+ goto out;
}
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret)
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
}
- aio_context_acquire(dbs->ctx);
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
dma_blk_cb, dbs, dbs->io_func_opaque);
- aio_context_release(dbs->ctx);
assert(dbs->acb);
+out:
+ aio_context_release(ctx);
}
static void dma_aio_cancel(BlockAIOCB *acb)
--
2.39.1

View File

@ -1,61 +0,0 @@
From 67bbeb056f75adc6c964468d876531ab68366fe0 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 07/12] edu: add smp_mb__after_rmw()
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [4/9] 2ad6fd6cb33fde39d2d017d94c0dde2152ad70c4 (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu Mar 2 11:16:13 2023 +0100
edu: add smp_mb__after_rmw()
Ensure ordering between clearing the COMPUTING flag and checking
IRQFACT, and between setting the IRQFACT flag and checking
COMPUTING. This ensures that no wakeups are lost.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
hw/misc/edu.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index e935c418d4..a1f8bc77e7 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
case 0x20:
if (val & EDU_STATUS_IRQFACT) {
qatomic_or(&edu->status, EDU_STATUS_IRQFACT);
+ /* Order check of the COMPUTING flag after setting IRQFACT. */
+ smp_mb__after_rmw();
} else {
qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT);
}
@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque)
qemu_mutex_unlock(&edu->thr_mutex);
qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING);
+ /* Clear COMPUTING flag before checking IRQFACT. */
+ smp_mb__after_rmw();
+
if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) {
qemu_mutex_lock_iothread();
edu_raise_irq(edu, FACT_IRQ);
--
2.39.1

View File

@ -1,169 +0,0 @@
From 4ab2aff624908e49b099f00609875f4d03e9e1ec Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 6/8] hw/arm/virt: Add 'compact-highmem' property
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [6/8] 781506f3445493f05b511547370b6d88ef092457
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
After the improvement to high memory region address assignment is
applied, the memory layout can be changed, introducing possible
migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region
is disabled or enabled when the optimization is applied or not, with
the following configuration. The configuration is only achievable by
modifying the source code until more properties are added to allow
users selectively disable those high memory regions.
pa_bits = 40;
vms->highmem_redists = false;
vms->highmem_ecam = false;
vms->highmem_mmio = true;
# qemu-system-aarch64 -accel kvm -cpu host \
-machine virt-7.2,compact-highmem={on, off} \
-m 4G,maxmem=511G -monitor stdio
Region compact-highmem=off compact-highmem=on
----------------------------------------------------------------
MEM [1GB 512GB] [1GB 512GB]
HIGH_GIC_REDISTS2 [512GB 512GB+64MB] [disabled]
HIGH_PCIE_ECAM [512GB+256MB 512GB+512MB] [disabled]
HIGH_PCIE_MMIO [disabled] [512GB 1TB]
In order to keep backwords compatibility, we need to disable the
optimization on machine, which is virt-7.1 or ealier than it. It
means the optimization is enabled by default from virt-7.2. Besides,
'compact-highmem' property is added so that the optimization can be
explicitly enabled or disabled on all machine types by users.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-7-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit f40408a9fe5d1db70a75a33d2b26c8af8a5d57b0)
Signed-off-by: Gavin Shan <gshan@redhat.com>
Conflicts:
hw/arm/virt.c
Comment out the handlers of property 'compact-highmem' since
the property isn't exposed.
---
docs/system/arm/virt.rst | 4 ++++
hw/arm/virt.c | 34 ++++++++++++++++++++++++++++++++++
include/hw/arm/virt.h | 1 +
3 files changed, 39 insertions(+)
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 20442ea2c1..4454706392 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -94,6 +94,10 @@ highmem
address space above 32 bits. The default is ``on`` for machine types
later than ``virt-2.12``.
+compact-highmem
+ Set ``on``/``off`` to enable/disable the compact layout for high memory regions.
+ The default is ``on`` for machine types later than ``virt-7.2``.
+
gic-version
Specify the version of the Generic Interrupt Controller (GIC) to provide.
Valid values are:
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6896e0ca0f..6087511ae9 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -216,6 +216,12 @@ static const MemMapEntry base_memmap[] = {
* Note the extended_memmap is sized so that it eventually also includes the
* base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last
* index of base_memmap).
+ *
+ * The memory map for these Highmem IO Regions can be in legacy or compact
+ * layout, depending on 'compact-highmem' property. With legacy layout, the
+ * PA space for one specific region is always reserved, even if the region
+ * has been disabled or doesn't fit into the PA space. However, the PA space
+ * for the region won't be reserved in these circumstances with compact layout.
*/
static MemMapEntry extended_memmap[] = {
/* Additional 64 MB redist region (can contain up to 512 redistributors) */
@@ -2400,6 +2406,22 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
vms->highmem = value;
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
+static bool virt_get_compact_highmem(Object *obj, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ return vms->highmem_compact;
+}
+
+static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ vms->highmem_compact = value;
+}
+#endif /* disabled for RHEL */
+
static bool virt_get_its(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -3023,6 +3045,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
"Set on/off to enable/disable using "
"physical address space above 32 bits");
+ object_class_property_add_bool(oc, "compact-highmem",
+ virt_get_compact_highmem,
+ virt_set_compact_highmem);
+ object_class_property_set_description(oc, "compact-highmem",
+ "Set on/off to enable/disable compact "
+ "layout for high memory regions");
+
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
virt_set_gic_version);
object_class_property_set_description(oc, "gic-version",
@@ -3107,6 +3136,7 @@ static void virt_instance_init(Object *obj)
/* High memory is enabled by default */
vms->highmem = true;
+ vms->highmem_compact = !vmc->no_highmem_compact;
vms->gic_version = VIRT_GIC_VERSION_NOSEL;
vms->highmem_ecam = !vmc->no_highmem_ecam;
@@ -3176,8 +3206,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2)
static void virt_machine_7_1_options(MachineClass *mc)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
virt_machine_7_2_options(mc);
compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len);
+ /* Compact layout for high memory regions was introduced with 7.2 */
+ vmc->no_highmem_compact = true;
}
DEFINE_VIRT_MACHINE(7, 1)
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 15bd291311..85e7d61868 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -125,6 +125,7 @@ struct VirtMachineClass {
bool no_pmu;
bool claim_edge_triggered_timers;
bool smbios_old_sys_ver;
+ bool no_highmem_compact;
bool no_highmem_ecam;
bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */
bool kvm_no_adjvtime;
--
2.31.1

View File

@ -1,179 +0,0 @@
From 30e86a7c4fbcdc95b74bcb2a15745cb221783091 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 7/8] hw/arm/virt: Add properties to disable high memory
regions
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [7/8] 16f8762393b447a590b31c9e4d8d3c58c6bc9fa8
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
The 3 high memory regions are usually enabled by default, but they may
be not used. For example, VIRT_HIGH_GIC_REDIST2 isn't needed by GICv2.
This leads to waste in the PA space.
Add properties ("highmem-redists", "highmem-ecam", "highmem-mmio") to
allow users selectively disable them if needed. After that, the high
memory region for GICv3 or GICv4 redistributor can be disabled by user,
the number of maximal supported CPUs needs to be calculated based on
'vms->highmem_redists'. The follow-up error message is also improved
to indicate if the high memory region for GICv3 and GICv4 has been
enabled or not.
Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 20221029224307.138822-8-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 6a48c64eec355ab1aff694eb4522d07a8e461368)
Signed-off-by: Gavin Shan <gshan@redhat.com>
Conflicts:
hw/arm/virt.c
Comment out the handlers of the property 'highmem-redists',
'highmem-ecam' and 'highmem-mmio' since they aren't exposed.
---
docs/system/arm/virt.rst | 13 +++++++
hw/arm/virt.c | 75 ++++++++++++++++++++++++++++++++++++++--
2 files changed, 86 insertions(+), 2 deletions(-)
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 4454706392..188a4f211f 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -98,6 +98,19 @@ compact-highmem
Set ``on``/``off`` to enable/disable the compact layout for high memory regions.
The default is ``on`` for machine types later than ``virt-7.2``.
+highmem-redists
+ Set ``on``/``off`` to enable/disable the high memory region for GICv3 or
+ GICv4 redistributor. The default is ``on``. Setting this to ``off`` will
+ limit the maximum number of CPUs when GICv3 or GICv4 is used.
+
+highmem-ecam
+ Set ``on``/``off`` to enable/disable the high memory region for PCI ECAM.
+ The default is ``on`` for machine types later than ``virt-3.0``.
+
+highmem-mmio
+ Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO.
+ The default is ``on``.
+
gic-version
Specify the version of the Generic Interrupt Controller (GIC) to provide.
Valid values are:
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6087511ae9..304fa0d6e7 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2142,14 +2142,20 @@ static void machvirt_init(MachineState *machine)
if (vms->gic_version == VIRT_GIC_VERSION_2) {
virt_max_cpus = GIC_NCPU;
} else {
- virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST) +
- virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
+ virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST);
+ if (vms->highmem_redists) {
+ virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2);
+ }
}
if (max_cpus > virt_max_cpus) {
error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
"supported by machine 'mach-virt' (%d)",
max_cpus, virt_max_cpus);
+ if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) {
+ error_printf("Try 'highmem-redists=on' for more CPUs\n");
+ }
+
exit(1);
}
@@ -2420,6 +2426,49 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
vms->highmem_compact = value;
}
+
+static bool virt_get_highmem_redists(Object *obj, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ return vms->highmem_redists;
+}
+
+static void virt_set_highmem_redists(Object *obj, bool value, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ vms->highmem_redists = value;
+}
+
+static bool virt_get_highmem_ecam(Object *obj, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ return vms->highmem_ecam;
+}
+
+static void virt_set_highmem_ecam(Object *obj, bool value, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ vms->highmem_ecam = value;
+}
+
+static bool virt_get_highmem_mmio(Object *obj, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ return vms->highmem_mmio;
+}
+
+static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ vms->highmem_mmio = value;
+}
+
#endif /* disabled for RHEL */
static bool virt_get_its(Object *obj, Error **errp)
@@ -3052,6 +3101,28 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
"Set on/off to enable/disable compact "
"layout for high memory regions");
+ object_class_property_add_bool(oc, "highmem-redists",
+ virt_get_highmem_redists,
+ virt_set_highmem_redists);
+ object_class_property_set_description(oc, "highmem-redists",
+ "Set on/off to enable/disable high "
+ "memory region for GICv3 or GICv4 "
+ "redistributor");
+
+ object_class_property_add_bool(oc, "highmem-ecam",
+ virt_get_highmem_ecam,
+ virt_set_highmem_ecam);
+ object_class_property_set_description(oc, "highmem-ecam",
+ "Set on/off to enable/disable high "
+ "memory region for PCI ECAM");
+
+ object_class_property_add_bool(oc, "highmem-mmio",
+ virt_get_highmem_mmio,
+ virt_set_highmem_mmio);
+ object_class_property_set_description(oc, "highmem-mmio",
+ "Set on/off to enable/disable high "
+ "memory region for PCI MMIO");
+
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
virt_set_gic_version);
object_class_property_set_description(oc, "gic-version",
--
2.31.1

View File

@ -1,51 +0,0 @@
From 969ea1ff46b52c5fe6d87f2eeb1625871a2dfb2a Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 8/8] hw/arm/virt: Enable compat high memory region address
assignment for 9.2.0 machine
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [8/8] beda1791c0c35dce5c669efd47685302b8468032
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
Upstream: RHEL only
The compact high memory region address assignment is enabled for 9.2.0,
but it's kept as disabled for 9.0.0, to keep the backwards compatibility
on 9.0.0. Note that these newly added properties ('compact-highmem',
'highmem-redists', 'highmem-ecam', and 'highmem-mmio') in the upstream
aren't exposed for the downstream.
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 304fa0d6e7..e41c0b462c 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3581,6 +3581,7 @@ static void rhel_virt_instance_init(Object *obj)
/* High memory is enabled by default */
vms->highmem = true;
+ vms->highmem_compact = !vmc->no_highmem_compact;
vms->gic_version = VIRT_GIC_VERSION_NOSEL;
vms->highmem_ecam = !vmc->no_highmem_ecam;
@@ -3659,5 +3660,7 @@ static void rhel900_virt_options(MachineClass *mc)
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
vmc->no_tcg_lpa2 = true;
+ /* Compact layout for high memory regions was introduced with 9.2.0 */
+ vmc->no_highmem_compact = true;
}
DEFINE_RHEL_MACHINE(9, 0, 0)
--
2.31.1

View File

@ -1,112 +0,0 @@
From 1c7fad3776a14ca35b24dc2fdb262d4ddf40d6eb Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 5/8] hw/arm/virt: Improve high memory region address
assignment
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [5/8] 4d77fa78b5258a1bd8d30405cec5ba3311d42f92
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
There are three high memory regions, which are VIRT_HIGH_REDIST2,
VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses
are floating on highest RAM address. However, they can be disabled
in several cases.
(1) One specific high memory region is likely to be disabled by
code by toggling vms->highmem_{redists, ecam, mmio}.
(2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is
'virt-2.12' or ealier than it.
(3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded
on 32-bits system.
(4) One specific high memory region is disabled when it breaks the
PA space limit.
The current implementation of virt_set_{memmap, high_memmap}() isn't
optimized because the high memory region's PA space is always reserved,
regardless of whatever the actual state in the corresponding
vms->highmem_{redists, ecam, mmio} flag. In the code, 'base' and
'vms->highest_gpa' are always increased for case (1), (2) and (3).
It's unnecessary since the assigned PA space for the disabled high
memory region won't be used afterwards.
Improve the address assignment for those three high memory region by
skipping the address assignment for one specific high memory region if
it has been disabled in case (1), (2) and (3). The memory layout may
be changed after the improvement is applied, which leads to potential
migration breakage. So 'vms->highmem_compact' is added to control if
the improvement should be applied. For now, 'vms->highmem_compact' is
set to false, meaning that we don't have memory layout change until it
becomes configurable through property 'compact-highmem' in next patch.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-6-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 4a4ff9edc6a8fdc76082af5b41b059217138c09b)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 15 ++++++++++-----
include/hw/arm/virt.h | 1 +
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6e3b9fc060..6896e0ca0f 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1768,18 +1768,23 @@ static void virt_set_high_memmap(VirtMachineState *vms,
vms->memmap[i].size = region_size;
/*
- * Check each device to see if they fit in the PA space,
- * moving highest_gpa as we go.
+ * Check each device to see if it fits in the PA space,
+ * moving highest_gpa as we go. For compatibility, move
+ * highest_gpa for disabled fitting devices as well, if
+ * the compact layout has been disabled.
*
* For each device that doesn't fit, disable it.
*/
fits = (region_base + region_size) <= BIT_ULL(pa_bits);
- if (fits) {
- vms->highest_gpa = region_base + region_size - 1;
+ *region_enabled &= fits;
+ if (vms->highmem_compact && !*region_enabled) {
+ continue;
}
- *region_enabled &= fits;
base = region_base + region_size;
+ if (fits) {
+ vms->highest_gpa = base - 1;
+ }
}
}
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 22b54ec510..15bd291311 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -144,6 +144,7 @@ struct VirtMachineState {
PFlashCFI01 *flash[2];
bool secure;
bool highmem;
+ bool highmem_compact;
bool highmem_ecam;
bool highmem_mmio;
bool highmem_redists;
--
2.31.1

View File

@ -1,82 +0,0 @@
From 305a369fd18f29914bf96cc181add532d435d8ed Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 3/8] hw/arm/virt: Introduce variable region_base in
virt_set_high_memmap()
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/8] 15de90df217d680ccc858b679898b3993e1c050a
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
This introduces variable 'region_base' for the base address of the
specific high memory region. It's the preparatory work to optimize
high memory region address assignment.
No functional change intended.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-4-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit fa245799b9407fc7b561da185b3d889df5e16a88)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ca098d40b8..ddcf7ee2f8 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1739,15 +1739,15 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
static void virt_set_high_memmap(VirtMachineState *vms,
hwaddr base, int pa_bits)
{
- hwaddr region_size;
+ hwaddr region_base, region_size;
bool fits;
int i;
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
+ region_base = ROUND_UP(base, extended_memmap[i].size);
region_size = extended_memmap[i].size;
- base = ROUND_UP(base, region_size);
- vms->memmap[i].base = base;
+ vms->memmap[i].base = region_base;
vms->memmap[i].size = region_size;
/*
@@ -1756,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms,
*
* For each device that doesn't fit, disable it.
*/
- fits = (base + region_size) <= BIT_ULL(pa_bits);
+ fits = (region_base + region_size) <= BIT_ULL(pa_bits);
if (fits) {
- vms->highest_gpa = base + region_size - 1;
+ vms->highest_gpa = region_base + region_size - 1;
}
switch (i) {
@@ -1773,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
break;
}
- base += region_size;
+ base = region_base + region_size;
}
}
--
2.31.1

View File

@ -1,95 +0,0 @@
From a2ddd68c8365ec602db6b2a9cf83bb441ca701cc Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 4/8] hw/arm/virt: Introduce virt_get_high_memmap_enabled()
helper
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [4/8] 65524de2fc106600bbaff641caa8c4f2f8027114
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
This introduces virt_get_high_memmap_enabled() helper, which returns
the pointer to vms->highmem_{redists, ecam, mmio}. The pointer will
be used in the subsequent patches.
No functional change intended.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-5-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit a5cb1350b19a5c2a58ab4edddf609ed429c13085)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 32 +++++++++++++++++++-------------
1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ddcf7ee2f8..6e3b9fc060 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1736,14 +1736,31 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
return arm_cpu_mp_affinity(idx, clustersz);
}
+static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
+ int index)
+{
+ bool *enabled_array[] = {
+ &vms->highmem_redists,
+ &vms->highmem_ecam,
+ &vms->highmem_mmio,
+ };
+
+ assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST ==
+ ARRAY_SIZE(enabled_array));
+ assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array));
+
+ return enabled_array[index - VIRT_LOWMEMMAP_LAST];
+}
+
static void virt_set_high_memmap(VirtMachineState *vms,
hwaddr base, int pa_bits)
{
hwaddr region_base, region_size;
- bool fits;
+ bool *region_enabled, fits;
int i;
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
+ region_enabled = virt_get_high_memmap_enabled(vms, i);
region_base = ROUND_UP(base, extended_memmap[i].size);
region_size = extended_memmap[i].size;
@@ -1761,18 +1778,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
vms->highest_gpa = region_base + region_size - 1;
}
- switch (i) {
- case VIRT_HIGH_GIC_REDIST2:
- vms->highmem_redists &= fits;
- break;
- case VIRT_HIGH_PCIE_ECAM:
- vms->highmem_ecam &= fits;
- break;
- case VIRT_HIGH_PCIE_MMIO:
- vms->highmem_mmio &= fits;
- break;
- }
-
+ *region_enabled &= fits;
base = region_base + region_size;
}
}
--
2.31.1

View File

@ -1,130 +0,0 @@
From 5dff87c5ea60054709021025c9513ec259433ce2 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 1/8] hw/arm/virt: Introduce virt_set_high_memmap() helper
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/8] 5f6ba5af7a2c21d8473c58e088ee99b11336c673
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
This introduces virt_set_high_memmap() helper. The logic of high
memory region address assignment is moved to the helper. The intention
is to make the subsequent optimization for high memory region address
assignment easier.
No functional change intended.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-2-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 4af6b6edece5ef273d29972d53547f823d2bc1c0)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 74 ++++++++++++++++++++++++++++-----------------------
1 file changed, 41 insertions(+), 33 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index bf18838b87..bea5f54720 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1736,6 +1736,46 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
return arm_cpu_mp_affinity(idx, clustersz);
}
+static void virt_set_high_memmap(VirtMachineState *vms,
+ hwaddr base, int pa_bits)
+{
+ int i;
+
+ for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
+ hwaddr size = extended_memmap[i].size;
+ bool fits;
+
+ base = ROUND_UP(base, size);
+ vms->memmap[i].base = base;
+ vms->memmap[i].size = size;
+
+ /*
+ * Check each device to see if they fit in the PA space,
+ * moving highest_gpa as we go.
+ *
+ * For each device that doesn't fit, disable it.
+ */
+ fits = (base + size) <= BIT_ULL(pa_bits);
+ if (fits) {
+ vms->highest_gpa = base + size - 1;
+ }
+
+ switch (i) {
+ case VIRT_HIGH_GIC_REDIST2:
+ vms->highmem_redists &= fits;
+ break;
+ case VIRT_HIGH_PCIE_ECAM:
+ vms->highmem_ecam &= fits;
+ break;
+ case VIRT_HIGH_PCIE_MMIO:
+ vms->highmem_mmio &= fits;
+ break;
+ }
+
+ base += size;
+ }
+}
+
static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
{
MachineState *ms = MACHINE(vms);
@@ -1791,39 +1831,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
/* We know for sure that at least the memory fits in the PA space */
vms->highest_gpa = memtop - 1;
- for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
- hwaddr size = extended_memmap[i].size;
- bool fits;
-
- base = ROUND_UP(base, size);
- vms->memmap[i].base = base;
- vms->memmap[i].size = size;
-
- /*
- * Check each device to see if they fit in the PA space,
- * moving highest_gpa as we go.
- *
- * For each device that doesn't fit, disable it.
- */
- fits = (base + size) <= BIT_ULL(pa_bits);
- if (fits) {
- vms->highest_gpa = base + size - 1;
- }
-
- switch (i) {
- case VIRT_HIGH_GIC_REDIST2:
- vms->highmem_redists &= fits;
- break;
- case VIRT_HIGH_PCIE_ECAM:
- vms->highmem_ecam &= fits;
- break;
- case VIRT_HIGH_PCIE_MMIO:
- vms->highmem_mmio &= fits;
- break;
- }
-
- base += size;
- }
+ virt_set_high_memmap(vms, base, pa_bits);
if (device_memory_size > 0) {
ms->device_memory = g_malloc0(sizeof(*ms->device_memory));
--
2.31.1

View File

@ -1,83 +0,0 @@
From bd5b7edbf8f4425f4b4e0d49a00cbdd48d9c6f48 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 21 Dec 2022 08:48:45 +0800
Subject: [PATCH 2/8] hw/arm/virt: Rename variable size to region_size in
virt_set_high_memmap()
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment
RH-Bugzilla: 2113840
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/8] 1cadf1b00686cceb45821a58fdcb509bc5da335d
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840
This renames variable 'size' to 'region_size' in virt_set_high_memmap().
Its counterpart ('region_base') will be introduced in next patch.
No functional change intended.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Zhenyu Zhang <zhenyzha@redhat.com>
Message-id: 20221029224307.138822-3-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 370bea9d1c78796eec235ed6cb4310f489931a62)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index bea5f54720..ca098d40b8 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1739,15 +1739,16 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
static void virt_set_high_memmap(VirtMachineState *vms,
hwaddr base, int pa_bits)
{
+ hwaddr region_size;
+ bool fits;
int i;
for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
- hwaddr size = extended_memmap[i].size;
- bool fits;
+ region_size = extended_memmap[i].size;
- base = ROUND_UP(base, size);
+ base = ROUND_UP(base, region_size);
vms->memmap[i].base = base;
- vms->memmap[i].size = size;
+ vms->memmap[i].size = region_size;
/*
* Check each device to see if they fit in the PA space,
@@ -1755,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms,
*
* For each device that doesn't fit, disable it.
*/
- fits = (base + size) <= BIT_ULL(pa_bits);
+ fits = (base + region_size) <= BIT_ULL(pa_bits);
if (fits) {
- vms->highest_gpa = base + size - 1;
+ vms->highest_gpa = base + region_size - 1;
}
switch (i) {
@@ -1772,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms,
break;
}
- base += size;
+ base += region_size;
}
}
--
2.31.1

View File

@ -1,59 +0,0 @@
From 8b0c5c6d356fd6cce9092727e20097b70e07bba9 Mon Sep 17 00:00:00 2001
From: Julia Suvorova <jusual@redhat.com>
Date: Thu, 23 Feb 2023 13:57:47 +0100
Subject: [PATCH] hw/smbios: fix field corruption in type 4 table
RH-Author: Julia Suvorova <None>
RH-MergeRequest: 156: hw/smbios: fix field corruption in type 4 table
RH-Bugzilla: 2169904
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Acked-by: Ani Sinha <None>
RH-Commit: [1/1] ee6d9bb6dfa0fb2625915947072cb91a0926c4ec
Since table type 4 of SMBIOS version 2.6 is shorter than 3.0, the
strings which follow immediately after the struct fields have been
overwritten by unconditional filling of later fields such as core_count2.
Make these fields dependent on the SMBIOS version.
Fixes: 05e27d74c7 ("hw/smbios: add core_count2 to smbios table type 4")
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2169904
Signed-off-by: Julia Suvorova <jusual@redhat.com>
Message-Id: <20230223125747.254914-1-jusual@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Ani Sinha <ani@anisinha.ca>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 60d09b8dc7dd4256d664ad680795cb1327805b2b)
---
hw/smbios/smbios.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index c5ad69237e..2d2ece3edb 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -752,14 +752,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance)
t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores;
t->core_enabled = t->core_count;
- t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
-
t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads;
- t->thread_count2 = cpu_to_le16(ms->smp.threads);
t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */
t->processor_family2 = cpu_to_le16(0x01); /* Other */
+ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) {
+ t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
+ t->thread_count2 = cpu_to_le16(ms->smp.threads);
+ }
+
SMBIOS_BUILD_TABLE_POST;
smbios_type4_count++;
}
--
2.31.1

View File

@ -1,64 +0,0 @@
From cadcc1c6a001622d971c86d44925516905e3d104 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 23 Feb 2023 14:59:21 +0800
Subject: [PATCH 8/8] intel-iommu: fail DEVIOTLB_UNMAP without dt mode
RH-Author: Laurent Vivier <lvivier@redhat.com>
RH-MergeRequest: 157: intel-iommu: fail DEVIOTLB_UNMAP without dt mode
RH-Bugzilla: 2156876
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Commit: [1/1] eb9dbae6140ef4ba10d90b9e66abd75540f6892d (lvivier/qemu-kvm-centos)
Without dt mode, device IOTLB notifier won't work since guest won't
send device IOTLB invalidation descriptor in this case. Let's fail
early instead of misbehaving silently.
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Tested-by: Viktor Prutyanov <viktor@daynix.com>
Buglink: https://bugzilla.redhat.com/2156876
Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20230223065924.42503-3-jasowang@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 09adb0e021207b60a0c51a68939b4539d98d3ef3)
Conflict in hw/i386/intel_iommu.c because of missing commit:
4ce27463ccce ("intel-iommu: fail MAP notifier without caching mode")
---
hw/i386/intel_iommu.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a08ee85edf..d2983f40d3 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3179,6 +3179,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
/* TODO: add support for VFIO and vhost users */
if (s->snoop_control) {
@@ -3186,6 +3187,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
"Snoop Control with vhost or VFIO is not supported");
return -ENOTSUP;
}
+ if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) {
+ error_setg_errno(errp, ENOTSUP,
+ "device %02x.%02x.%x requires device IOTLB mode",
+ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn),
+ PCI_FUNC(vtd_as->devfn));
+ return -ENOTSUP;
+ }
/* Update per-address-space notifier flags */
vtd_as->notifier_flags = new;
--
2.39.1

View File

@ -1,386 +0,0 @@
From 3a29b50036b972caae5bca0e5dfc34d910b1d5e9 Mon Sep 17 00:00:00 2001
From: "manish.mishra" <manish.mishra@nutanix.com>
Date: Tue, 20 Dec 2022 18:44:17 +0000
Subject: [PATCH 6/8] io: Add support for MSG_PEEK for socket channel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders
RH-Bugzilla: 2169732
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Commit: [1/2] 266563f3e387e97ec710d9bc179e5de26dfd09f1 (peterx/qemu-kvm)
MSG_PEEK peeks at the channel, The data is treated as unread and
the next read shall still return this data. This support is
currently added only for socket class. Extra parameter 'flags'
is added to io_readv calls to pass extra read flags like MSG_PEEK.
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Suggested-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
chardev/char-socket.c | 4 ++--
include/io/channel.h | 6 ++++++
io/channel-buffer.c | 1 +
io/channel-command.c | 1 +
io/channel-file.c | 1 +
io/channel-null.c | 1 +
io/channel-socket.c | 19 ++++++++++++++++++-
io/channel-tls.c | 1 +
io/channel-websock.c | 1 +
io/channel.c | 16 ++++++++++++----
migration/channel-block.c | 1 +
migration/rdma.c | 1 +
scsi/qemu-pr-helper.c | 2 +-
tests/qtest/tpm-emu.c | 2 +-
tests/unit/test-io-channel-socket.c | 1 +
util/vhost-user-server.c | 2 +-
16 files changed, 50 insertions(+), 10 deletions(-)
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 879564aa8a..5afce9a464 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
ret = qio_channel_readv_full(s->ioc, &iov, 1,
&msgfds, &msgfds_num,
- NULL);
+ 0, NULL);
} else {
ret = qio_channel_readv_full(s->ioc, &iov, 1,
NULL, NULL,
- NULL);
+ 0, NULL);
}
if (msgfds_num) {
diff --git a/include/io/channel.h b/include/io/channel.h
index c680ee7480..716235d496 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
+
typedef enum QIOChannelFeature QIOChannelFeature;
enum QIOChannelFeature {
@@ -41,6 +43,7 @@ enum QIOChannelFeature {
QIO_CHANNEL_FEATURE_SHUTDOWN,
QIO_CHANNEL_FEATURE_LISTEN,
QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
};
@@ -114,6 +117,7 @@ struct QIOChannelClass {
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp);
int (*io_close)(QIOChannel *ioc,
Error **errp);
@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc,
* @niov: the length of the @iov array
* @fds: pointer to an array that will received file handles
* @nfds: pointer filled with number of elements in @fds on return
+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
* @errp: pointer to a NULL-initialized error object
*
* Read data from the IO channel, storing it in the
@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp);
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
index bf52011be2..8096180f85 100644
--- a/io/channel-buffer.c
+++ b/io/channel-buffer.c
@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
diff --git a/io/channel-command.c b/io/channel-command.c
index 74516252ba..e7edd091af 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
diff --git a/io/channel-file.c b/io/channel-file.c
index b67687c2aa..d76663e6ae 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
diff --git a/io/channel-null.c b/io/channel-null.c
index 75e3781507..4fafdb770d 100644
--- a/io/channel-null.c
+++ b/io/channel-null.c
@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc,
size_t niov,
int **fds G_GNUC_UNUSED,
size_t *nfds G_GNUC_UNUSED,
+ int flags,
Error **errp)
{
QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
diff --git a/io/channel-socket.c b/io/channel-socket.c
index b76dca9cc1..7aca84f61a 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
}
#endif
+ qio_channel_set_feature(QIO_CHANNEL(ioc),
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
+
return 0;
}
@@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
}
#endif /* WIN32 */
+ qio_channel_set_feature(QIO_CHANNEL(cioc),
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
+
trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
return cioc;
@@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
}
+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
+ sflags |= MSG_PEEK;
+ }
+
retry:
ret = recvmsg(sioc->fd, &msg, sflags);
if (ret < 0) {
@@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
ssize_t done = 0;
ssize_t i;
+ int sflags = 0;
+
+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
+ sflags |= MSG_PEEK;
+ }
for (i = 0; i < niov; i++) {
ssize_t ret;
@@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
ret = recv(sioc->fd,
iov[i].iov_base,
iov[i].iov_len,
- 0);
+ sflags);
if (ret < 0) {
if (errno == EAGAIN) {
if (done) {
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 4ce890a538..c730cb8ec5 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
diff --git a/io/channel-websock.c b/io/channel-websock.c
index fb4932ade7..a12acc27cf 100644
--- a/io/channel-websock.c
+++ b/io/channel-websock.c
@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
diff --git a/io/channel.c b/io/channel.c
index 0640941ac5..a8c7f11649 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
return -1;
}
- return klass->io_readv(ioc, iov, niov, fds, nfds, errp);
+ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) &&
+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+ error_setg_errno(errp, EINVAL,
+ "Channel does not support peek read");
+ return -1;
+ }
+
+ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp);
}
@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc,
while ((nlocal_iov > 0) || local_fds) {
ssize_t len;
len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
- local_nfds, errp);
+ local_nfds, 0, errp);
if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) {
qio_channel_yield(ioc, G_IO_IN);
@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc,
size_t niov,
Error **errp)
{
- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp);
+ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp);
}
@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc,
Error **errp)
{
struct iovec iov = { .iov_base = buf, .iov_len = buflen };
- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp);
+ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp);
}
diff --git a/migration/channel-block.c b/migration/channel-block.c
index f4ab53acdb..b7374363c3 100644
--- a/migration/channel-block.c
+++ b/migration/channel-block.c
@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc);
diff --git a/migration/rdma.c b/migration/rdma.c
index 94a55dd95b..d8b4632094 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2854,6 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index 196b78c00d..199227a556 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
iov.iov_base = buf;
iov.iov_len = sz;
n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
- &fds, &nfds, errp);
+ &fds, &nfds, 0, errp);
if (n_read == QIO_CHANNEL_ERR_BLOCK) {
qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c
index 2994d1cf42..3cf1acaf7d 100644
--- a/tests/qtest/tpm-emu.c
+++ b/tests/qtest/tpm-emu.c
@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data)
int *pfd = NULL;
size_t nfd = 0;
- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort);
+ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort);
cmd = be32_to_cpu(cmd);
g_assert_cmpint(cmd, ==, CMD_SET_DATAFD);
g_assert_cmpint(nfd, ==, 1);
diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
index b36a5d972a..b964bb202d 100644
--- a/tests/unit/test-io-channel-socket.c
+++ b/tests/unit/test-io-channel-socket.c
@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void)
G_N_ELEMENTS(iorecv),
&fdrecv,
&nfdrecv,
+ 0,
&error_abort);
g_assert(nfdrecv == G_N_ELEMENTS(fdsend));
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 232984ace6..145eb17c08 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
* qio_channel_readv_full may have short reads, keeping calling it
* until getting VHOST_USER_HDR_SIZE or 0 bytes in total
*/
- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
if (rc < 0) {
if (rc == QIO_CHANNEL_ERR_BLOCK) {
assert(local_err == NULL);
--
2.31.1

View File

@ -1,99 +0,0 @@
From 6727e92a97f8ee9f367a41111bef3f5cad4a479a Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:27:02 +0200
Subject: [PATCH 15/20] iotests/106, 214, 308: Read only one size line
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [10/12] 1554e0a92b92ed101a251478ccae43f45f6e071e (hreitz/qemu-kvm-c-9-s)
These tests read size information (sometimes disk size, sometimes
virtual size) from qemu-img info's output. Once qemu-img starts
printing info about child nodes, we are going to see multiple instances
of that per image, but these tests are only interested in the first one,
so use "head -n 1" to get it.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-11-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 74163adda3101b127943f7cbbf8fcccd2d472426)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
tests/qemu-iotests/106 | 4 ++--
tests/qemu-iotests/214 | 6 ++++--
tests/qemu-iotests/308 | 4 ++--
3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106
index 9d6adb542d..ae0fc46691 100755
--- a/tests/qemu-iotests/106
+++ b/tests/qemu-iotests/106
@@ -66,7 +66,7 @@ for create_mode in off falloc full; do
expected_size=$((expected_size + $GROWTH_SIZE))
fi
- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size')
+ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1)
actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/')
# The actual size may exceed the expected size, depending on the file
@@ -105,7 +105,7 @@ for growth_mode in falloc full; do
_make_test_img -o "extent_size_hint=0" 2G
$QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" +${GROWTH_SIZE}K
- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size')
+ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1)
actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/')
if [ $actual_size -lt $GROWTH_SIZE ]; then
diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214
index c66e246ba2..55ffcd7f44 100755
--- a/tests/qemu-iotests/214
+++ b/tests/qemu-iotests/214
@@ -102,7 +102,8 @@ let data_size="8 * $cluster_size"
$QEMU_IO -c "write -P 0xaa 0 $data_size" "$TEST_IMG" \
2>&1 | _filter_qemu_io | _filter_testdir
sizeA=$($QEMU_IMG info --output=json "$TEST_IMG" |
- sed -n '/"actual-size":/ s/[^0-9]//gp')
+ sed -n '/"actual-size":/ s/[^0-9]//gp' |
+ head -n 1)
_make_test_img 2M -o cluster_size=$cluster_size
echo "Write compressed data:"
@@ -124,7 +125,8 @@ $QEMU_IO -c "write -P 0xcc $offset $data_size" "json:{\
_filter_qemu_io | _filter_testdir
sizeB=$($QEMU_IMG info --output=json "$TEST_IMG" |
- sed -n '/"actual-size":/ s/[^0-9]//gp')
+ sed -n '/"actual-size":/ s/[^0-9]//gp' |
+ head -n 1)
if [ $sizeA -lt $sizeB ]
then
diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308
index bde4aac2fa..09275e9a10 100755
--- a/tests/qemu-iotests/308
+++ b/tests/qemu-iotests/308
@@ -217,12 +217,12 @@ echo
echo '=== Remove export ==='
# Double-check that $EXT_MP appears as a non-empty file (the raw image)
-$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size'
+$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1
fuse_export_del 'export-mp'
# See that the file appears empty again
-$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size'
+$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1
echo
echo '=== Writable export ==='
--
2.31.1

View File

@ -1,171 +0,0 @@
From 3102e62f80757729c97e58e2b3d62a6a9de952a7 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:27:01 +0200
Subject: [PATCH 14/20] iotests: Filter child node information
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [9/12] 0b0a42d54397791f7f149e53c9175b7863707e70 (hreitz/qemu-kvm-c-9-s)
Before we let qemu-img info print child node information, have
common.filter, common.rc, and iotests.py filter it from the test output
so we get as few reference output changes as possible.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-10-hreitz@redhat.com>
Tested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit bcc6777ad6facede73c0cf8b1700045bf4365f7d)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
tests/qemu-iotests/common.filter | 22 ++++++++++++++--------
tests/qemu-iotests/common.rc | 22 ++++++++++++++--------
tests/qemu-iotests/iotests.py | 18 +++++++++++++++---
3 files changed, 43 insertions(+), 19 deletions(-)
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 6a13757177..6ddda2ee64 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -224,6 +224,7 @@ _filter_img_info()
discard=0
regex_json_spec_start='^ *"format-specific": \{'
+ regex_json_child_start='^ *"children": \['
gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
-e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
-e "s#$TEST_DIR#TEST_DIR#g" \
@@ -252,20 +253,25 @@ _filter_img_info()
-e 's/\(compression type: \)\(zlib\|zstd\)/\1COMPRESSION_TYPE/' \
-e "s/uuid: [-a-f0-9]\\+/uuid: 00000000-0000-0000-0000-000000000000/" | \
while IFS='' read -r line; do
- if [[ $format_specific == 1 ]]; then
- discard=0
- elif [[ $line == "Format specific information:" ]]; then
- discard=1
- elif [[ $line =~ $regex_json_spec_start ]]; then
- discard=2
- regex_json_spec_end="^${line%%[^ ]*}\\},? *$"
+ if [[ $discard == 0 ]]; then
+ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then
+ discard=1
+ elif [[ $line =~ "Child node '/" ]]; then
+ discard=1
+ elif [[ $line =~ $regex_json_spec_start ]]; then
+ discard=2
+ regex_json_end="^${line%%[^ ]*}\\},? *$"
+ elif [[ $line =~ $regex_json_child_start ]]; then
+ discard=2
+ regex_json_end="^${line%%[^ ]*}\\],? *$"
+ fi
fi
if [[ $discard == 0 ]]; then
echo "$line"
elif [[ $discard == 1 && ! $line ]]; then
echo
discard=0
- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then
+ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then
discard=0
fi
done
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index db757025cb..f4476b62f7 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -711,6 +711,7 @@ _img_info()
discard=0
regex_json_spec_start='^ *"format-specific": \{'
+ regex_json_child_start='^ *"children": \['
$QEMU_IMG info $QEMU_IMG_EXTRA_ARGS "$@" "$TEST_IMG" 2>&1 | \
sed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
-e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
@@ -721,20 +722,25 @@ _img_info()
-e "/^disk size:/ D" \
-e "/actual-size/ D" | \
while IFS='' read -r line; do
- if [[ $format_specific == 1 ]]; then
- discard=0
- elif [[ $line == "Format specific information:" ]]; then
- discard=1
- elif [[ $line =~ $regex_json_spec_start ]]; then
- discard=2
- regex_json_spec_end="^${line%%[^ ]*}\\},? *$"
+ if [[ $discard == 0 ]]; then
+ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then
+ discard=1
+ elif [[ $line =~ "Child node '/" ]]; then
+ discard=1
+ elif [[ $format_specific == 0 && $line =~ $regex_json_spec_start ]]; then
+ discard=2
+ regex_json_end="^${line%%[^ ]*}\\},? *$"
+ elif [[ $line =~ $regex_json_child_start ]]; then
+ discard=2
+ regex_json_end="^${line%%[^ ]*}\\],? *$"
+ fi
fi
if [[ $discard == 0 ]]; then
echo "$line"
elif [[ $discard == 1 && ! $line ]]; then
echo
discard=0
- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then
+ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then
discard=0
fi
done
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index da7d6637e1..94aeb3f3b2 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -329,7 +329,7 @@ def qemu_img_log(*args: str, check: bool = True
def img_info_log(filename: str, filter_path: Optional[str] = None,
use_image_opts: bool = False, extra_args: Sequence[str] = (),
- check: bool = True,
+ check: bool = True, drop_child_info: bool = True,
) -> None:
args = ['info']
if use_image_opts:
@@ -342,7 +342,7 @@ def img_info_log(filename: str, filter_path: Optional[str] = None,
output = qemu_img(*args, check=check).stdout
if not filter_path:
filter_path = filename
- log(filter_img_info(output, filter_path))
+ log(filter_img_info(output, filter_path, drop_child_info))
def qemu_io_wrap_args(args: Sequence[str]) -> List[str]:
if '-f' in args or '--image-opts' in args:
@@ -642,11 +642,23 @@ def _filter(_key, value):
def filter_generated_node_ids(msg):
return re.sub("#block[0-9]+", "NODE_NAME", msg)
-def filter_img_info(output, filename):
+def filter_img_info(output: str, filename: str,
+ drop_child_info: bool = True) -> str:
lines = []
+ drop_indented = False
for line in output.split('\n'):
if 'disk size' in line or 'actual-size' in line:
continue
+
+ # Drop child node info
+ if drop_indented:
+ if line.startswith(' '):
+ continue
+ drop_indented = False
+ if drop_child_info and "Child node '/" in line:
+ drop_indented = True
+ continue
+
line = line.replace(filename, 'TEST_IMG')
line = filter_testfiles(line)
line = line.replace(imgfmt, 'IMGFMT')
--
2.31.1

View File

@ -1,286 +0,0 @@
From e13fdc97ff05cdee46c112c2dee70b6ef33e7fa7 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Mon, 16 Jan 2023 07:17:31 -0500
Subject: [PATCH 31/31] kvm: Atomic memslot updates
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 138: accel: introduce accelerator blocker API
RH-Bugzilla: 1979276
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/3] 9f03181ebcad2474fbe859acbce7b9891caa216b (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39
Author: David Hildenbrand <david@redhat.com>
Date: Fri Nov 11 10:47:58 2022 -0500
kvm: Atomic memslot updates
If we update an existing memslot (e.g., resize, split), we temporarily
remove the memslot to re-add it immediately afterwards. These updates
are not atomic, especially not for KVM VCPU threads, such that we can
get spurious faults.
Let's inhibit most KVM ioctls while performing relevant updates, such
that we can perform the update just as if it would happen atomically
without additional kernel support.
We capture the add/del changes and apply them in the notifier commit
stage instead. There, we can check for overlaps and perform the ioctl
inhibiting only if really required (-> overlap).
To keep things simple we don't perform additional checks that wouldn't
actually result in an overlap -- such as !RAM memory regions in some
cases (see kvm_set_phys_mem()).
To minimize cache-line bouncing, use a separate indicator
(in_ioctl_lock) per CPU. Also, make sure to hold the kvm_slots_lock
while performing both actions (removing+re-adding).
We have to wait until all IOCTLs were exited and block new ones from
getting executed.
This approach cannot result in a deadlock as long as the inhibitor does
not hold any locks that might hinder an IOCTL from getting finished and
exited - something fairly unusual. The inhibitor will always hold the BQL.
AFAIKs, one possible candidate would be userfaultfd. If a page cannot be
placed (e.g., during postcopy), because we're waiting for a lock, or if the
userfaultfd thread cannot process a fault, because it is waiting for a
lock, there could be a deadlock. However, the BQL is not applicable here,
because any other guest memory access while holding the BQL would already
result in a deadlock.
Nothing else in the kernel should block forever and wait for userspace
intervention.
Note: pause_all_vcpus()/resume_all_vcpus() or
start_exclusive()/end_exclusive() cannot be used, as they either drop
the BQL or require to be called without the BQL - something inhibitors
cannot handle. We need a low-level locking mechanism that is
deadlock-free even when not releasing the BQL.
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Tested-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Message-Id: <20221111154758.1372674-4-eesposit@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
accel/kvm/kvm-all.c | 101 ++++++++++++++++++++++++++++++++++-----
include/sysemu/kvm_int.h | 8 ++++
2 files changed, 98 insertions(+), 11 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index ff660fd469..39ed30ab59 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -31,6 +31,7 @@
#include "sysemu/kvm_int.h"
#include "sysemu/runstate.h"
#include "sysemu/cpus.h"
+#include "sysemu/accel-blocker.h"
#include "qemu/bswap.h"
#include "exec/memory.h"
#include "exec/ram_addr.h"
@@ -46,6 +47,7 @@
#include "sysemu/hw_accel.h"
#include "kvm-cpus.h"
#include "sysemu/dirtylimit.h"
+#include "qemu/range.h"
#include "hw/boards.h"
#include "monitor/stats.h"
@@ -1292,6 +1294,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size)
kvm_max_slot_size = max_slot_size;
}
+/* Called with KVMMemoryListener.slots_lock held */
static void kvm_set_phys_mem(KVMMemoryListener *kml,
MemoryRegionSection *section, bool add)
{
@@ -1326,14 +1329,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
ram = memory_region_get_ram_ptr(mr) + mr_offset;
ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset;
- kvm_slots_lock();
-
if (!add) {
do {
slot_size = MIN(kvm_max_slot_size, size);
mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
if (!mem) {
- goto out;
+ return;
}
if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
/*
@@ -1371,7 +1372,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
start_addr += slot_size;
size -= slot_size;
} while (size);
- goto out;
+ return;
}
/* register the new slot */
@@ -1396,9 +1397,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
ram += slot_size;
size -= slot_size;
} while (size);
-
-out:
- kvm_slots_unlock();
}
static void *kvm_dirty_ring_reaper_thread(void *data)
@@ -1455,18 +1453,95 @@ static void kvm_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+ KVMMemoryUpdate *update;
+
+ update = g_new0(KVMMemoryUpdate, 1);
+ update->section = *section;
- memory_region_ref(section->mr);
- kvm_set_phys_mem(kml, section, true);
+ QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next);
}
static void kvm_region_del(MemoryListener *listener,
MemoryRegionSection *section)
{
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+ KVMMemoryUpdate *update;
+
+ update = g_new0(KVMMemoryUpdate, 1);
+ update->section = *section;
+
+ QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next);
+}
+
+static void kvm_region_commit(MemoryListener *listener)
+{
+ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener,
+ listener);
+ KVMMemoryUpdate *u1, *u2;
+ bool need_inhibit = false;
+
+ if (QSIMPLEQ_EMPTY(&kml->transaction_add) &&
+ QSIMPLEQ_EMPTY(&kml->transaction_del)) {
+ return;
+ }
+
+ /*
+ * We have to be careful when regions to add overlap with ranges to remove.
+ * We have to simulate atomic KVM memslot updates by making sure no ioctl()
+ * is currently active.
+ *
+ * The lists are order by addresses, so it's easy to find overlaps.
+ */
+ u1 = QSIMPLEQ_FIRST(&kml->transaction_del);
+ u2 = QSIMPLEQ_FIRST(&kml->transaction_add);
+ while (u1 && u2) {
+ Range r1, r2;
+
+ range_init_nofail(&r1, u1->section.offset_within_address_space,
+ int128_get64(u1->section.size));
+ range_init_nofail(&r2, u2->section.offset_within_address_space,
+ int128_get64(u2->section.size));
+
+ if (range_overlaps_range(&r1, &r2)) {
+ need_inhibit = true;
+ break;
+ }
+ if (range_lob(&r1) < range_lob(&r2)) {
+ u1 = QSIMPLEQ_NEXT(u1, next);
+ } else {
+ u2 = QSIMPLEQ_NEXT(u2, next);
+ }
+ }
+
+ kvm_slots_lock();
+ if (need_inhibit) {
+ accel_ioctl_inhibit_begin();
+ }
+
+ /* Remove all memslots before adding the new ones. */
+ while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) {
+ u1 = QSIMPLEQ_FIRST(&kml->transaction_del);
+ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next);
- kvm_set_phys_mem(kml, section, false);
- memory_region_unref(section->mr);
+ kvm_set_phys_mem(kml, &u1->section, false);
+ memory_region_unref(u1->section.mr);
+
+ g_free(u1);
+ }
+ while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) {
+ u1 = QSIMPLEQ_FIRST(&kml->transaction_add);
+ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next);
+
+ memory_region_ref(u1->section.mr);
+ kvm_set_phys_mem(kml, &u1->section, true);
+
+ g_free(u1);
+ }
+
+ if (need_inhibit) {
+ accel_ioctl_inhibit_end();
+ }
+ kvm_slots_unlock();
}
static void kvm_log_sync(MemoryListener *listener,
@@ -1610,8 +1685,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
kml->slots[i].slot = i;
}
+ QSIMPLEQ_INIT(&kml->transaction_add);
+ QSIMPLEQ_INIT(&kml->transaction_del);
+
kml->listener.region_add = kvm_region_add;
kml->listener.region_del = kvm_region_del;
+ kml->listener.commit = kvm_region_commit;
kml->listener.log_start = kvm_log_start;
kml->listener.log_stop = kvm_log_stop;
kml->listener.priority = 10;
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index 3b4adcdc10..60b520a13e 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -12,6 +12,7 @@
#include "exec/memory.h"
#include "qapi/qapi-types-common.h"
#include "qemu/accel.h"
+#include "qemu/queue.h"
#include "sysemu/kvm.h"
typedef struct KVMSlot
@@ -31,10 +32,17 @@ typedef struct KVMSlot
ram_addr_t ram_start_offset;
} KVMSlot;
+typedef struct KVMMemoryUpdate {
+ QSIMPLEQ_ENTRY(KVMMemoryUpdate) next;
+ MemoryRegionSection section;
+} KVMMemoryUpdate;
+
typedef struct KVMMemoryListener {
MemoryListener listener;
KVMSlot *slots;
int as_id;
+ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
+ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
} KVMMemoryListener;
#define KVM_MSI_HASHTAB_SIZE 256
--
2.31.1

View File

@ -1,577 +0,0 @@
From cbe35c6a4794107ea1ddecf0b381ba4b1c8799f5 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 7 Feb 2023 15:57:10 -0500
Subject: [PATCH 3/8] linux-headers: Update to v6.1
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 149: Support /dev/userfaultfd
RH-Bugzilla: 2158704
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/3] 15d97026e802a0f01b5f80f81fb4414dc69b2b2d (peterx/qemu-kvm)
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
(cherry picked from commit 93e0932b7be2498024cd6ba8446a0fa2cb1769bc)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
include/standard-headers/drm/drm_fourcc.h | 34 ++++-
include/standard-headers/linux/ethtool.h | 63 +++++++-
include/standard-headers/linux/fuse.h | 6 +-
.../linux/input-event-codes.h | 1 +
include/standard-headers/linux/virtio_blk.h | 19 +++
linux-headers/asm-generic/hugetlb_encode.h | 26 ++--
linux-headers/asm-generic/mman-common.h | 2 +
linux-headers/asm-mips/mman.h | 2 +
linux-headers/asm-riscv/kvm.h | 4 +
linux-headers/linux/kvm.h | 1 +
linux-headers/linux/psci.h | 14 ++
linux-headers/linux/userfaultfd.h | 4 +
linux-headers/linux/vfio.h | 142 ++++++++++++++++++
13 files changed, 298 insertions(+), 20 deletions(-)
diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h
index 48b620cbef..b868488f93 100644
--- a/include/standard-headers/drm/drm_fourcc.h
+++ b/include/standard-headers/drm/drm_fourcc.h
@@ -98,18 +98,42 @@ extern "C" {
#define DRM_FORMAT_INVALID 0
/* color index */
+#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */
+#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */
+#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */
#define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */
-/* 8 bpp Red */
+/* 1 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */
+
+/* 8 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */
+
+/* 1 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */
+
+/* 8 bpp Red (direct relationship between channel value and brightness) */
#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
-/* 10 bpp Red */
+/* 10 bpp Red (direct relationship between channel value and brightness) */
#define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */
-/* 12 bpp Red */
+/* 12 bpp Red (direct relationship between channel value and brightness) */
#define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */
-/* 16 bpp Red */
+/* 16 bpp Red (direct relationship between channel value and brightness) */
#define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */
/* 16 bpp RG */
@@ -204,7 +228,9 @@ extern "C" {
#define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
#define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */
#define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */
#define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */
#define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */
diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h
index 4537da20cc..1dc56cdc0a 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -736,6 +736,51 @@ enum ethtool_module_power_mode {
ETHTOOL_MODULE_POWER_MODE_HIGH,
};
+/**
+ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE
+ * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are
+ * unknown
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled
+ */
+enum ethtool_podl_pse_admin_state {
+ ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1,
+ ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
+ ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED,
+};
+
+/**
+ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE.
+ * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus:
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is
+ * asserted true when the PoDL PSE state diagram variable mr_pse_enable is
+ * false"
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is
+ * asserted true when either of the PSE state diagram variables
+ * pi_detecting or pi_classifying is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower”
+ * is asserted true when the PoDL PSE state diagram variable pi_powered is
+ * true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted
+ * true when the PoDL PSE state diagram variable pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true
+ * when the logical combination of the PoDL PSE state diagram variables
+ * pi_prebiased*!pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted
+ * true when the PoDL PSE state diagram variable overload_held is true."
+ */
+enum ethtool_podl_pse_pw_d_status {
+ ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR,
+};
+
/**
* struct ethtool_gstrings - string set for data tagging
* @cmd: Command number = %ETHTOOL_GSTRINGS
@@ -1840,6 +1885,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
#define MASTER_SLAVE_STATE_SLAVE 3
#define MASTER_SLAVE_STATE_ERR 4
+/* These are used to throttle the rate of data on the phy interface when the
+ * native speed of the interface is higher than the link speed. These should
+ * not be used for phy interfaces which natively support multiple speeds (e.g.
+ * MII or SGMII).
+ */
+/* No rate matching performed. */
+#define RATE_MATCH_NONE 0
+/* The phy sends pause frames to throttle the MAC. */
+#define RATE_MATCH_PAUSE 1
+/* The phy asserts CRS to prevent the MAC from transmitting. */
+#define RATE_MATCH_CRS 2
+/* The MAC is programmed with a sufficiently-large IPG. */
+#define RATE_MATCH_OPEN_LOOP 3
+
/* Which connector port. */
#define PORT_TP 0x00
#define PORT_AUI 0x01
@@ -2033,8 +2092,8 @@ enum ethtool_reset_flags {
* reported consistently by PHYLIB. Read-only.
* @master_slave_cfg: Master/slave port mode.
* @master_slave_state: Master/slave port state.
+ * @rate_matching: Rate adaptation performed by the PHY
* @reserved: Reserved for future use; see the note on reserved space.
- * @reserved1: Reserved for future use; see the note on reserved space.
* @link_mode_masks: Variable length bitmaps.
*
* If autonegotiation is disabled, the speed and @duplex represent the
@@ -2085,7 +2144,7 @@ struct ethtool_link_settings {
uint8_t transceiver;
uint8_t master_slave_cfg;
uint8_t master_slave_state;
- uint8_t reserved1[1];
+ uint8_t rate_matching;
uint32_t reserved[7];
uint32_t link_mode_masks[];
/* layout of link_mode_masks fields:
diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
index bda06258be..713d259768 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
@@ -194,6 +194,9 @@
* - add FUSE_SECURITY_CTX init flag
* - add security context to create, mkdir, symlink, and mknod requests
* - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX
+ *
+ * 7.37
+ * - add FUSE_TMPFILE
*/
#ifndef _LINUX_FUSE_H
@@ -225,7 +228,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 36
+#define FUSE_KERNEL_MINOR_VERSION 37
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -533,6 +536,7 @@ enum fuse_opcode {
FUSE_SETUPMAPPING = 48,
FUSE_REMOVEMAPPING = 49,
FUSE_SYNCFS = 50,
+ FUSE_TMPFILE = 51,
/* CUSE specific operations */
CUSE_INIT = 4096,
diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index 50790aee5a..815f7a1dff 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h
@@ -862,6 +862,7 @@
#define ABS_TOOL_WIDTH 0x1c
#define ABS_VOLUME 0x20
+#define ABS_PROFILE 0x21
#define ABS_MISC 0x28
diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h
index 2dcc90826a..e81715cd70 100644
--- a/include/standard-headers/linux/virtio_blk.h
+++ b/include/standard-headers/linux/virtio_blk.h
@@ -40,6 +40,7 @@
#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */
#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */
+#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */
/* Legacy feature bits */
#ifndef VIRTIO_BLK_NO_LEGACY
@@ -119,6 +120,21 @@ struct virtio_blk_config {
uint8_t write_zeroes_may_unmap;
uint8_t unused1[3];
+
+ /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */
+ /*
+ * The maximum secure erase sectors (in 512-byte sectors) for
+ * one segment.
+ */
+ __virtio32 max_secure_erase_sectors;
+ /*
+ * The maximum number of secure erase segments in a
+ * secure erase command.
+ */
+ __virtio32 max_secure_erase_seg;
+ /* Secure erase commands must be aligned to this number of sectors. */
+ __virtio32 secure_erase_sector_alignment;
+
} QEMU_PACKED;
/*
@@ -153,6 +169,9 @@ struct virtio_blk_config {
/* Write zeroes command */
#define VIRTIO_BLK_T_WRITE_ZEROES 13
+/* Secure erase command */
+#define VIRTIO_BLK_T_SECURE_ERASE 14
+
#ifndef VIRTIO_BLK_NO_LEGACY
/* Barrier before this op. */
#define VIRTIO_BLK_T_BARRIER 0x80000000
diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h
index 4f3d5aaa11..de687009bf 100644
--- a/linux-headers/asm-generic/hugetlb_encode.h
+++ b/linux-headers/asm-generic/hugetlb_encode.h
@@ -20,18 +20,18 @@
#define HUGETLB_FLAG_ENCODE_SHIFT 26
#define HUGETLB_FLAG_ENCODE_MASK 0x3f
-#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT)
#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h
index 6c1aa92a92..6ce1f1ceb4 100644
--- a/linux-headers/asm-generic/mman-common.h
+++ b/linux-headers/asm-generic/mman-common.h
@@ -77,6 +77,8 @@
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
+#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h
index 1be428663c..c6e1fc77c9 100644
--- a/linux-headers/asm-mips/mman.h
+++ b/linux-headers/asm-mips/mman.h
@@ -103,6 +103,8 @@
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
+#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h
index 7351417afd..8985ff234c 100644
--- a/linux-headers/asm-riscv/kvm.h
+++ b/linux-headers/asm-riscv/kvm.h
@@ -48,6 +48,7 @@ struct kvm_sregs {
/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
struct kvm_riscv_config {
unsigned long isa;
+ unsigned long zicbom_block_size;
};
/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
@@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_M,
KVM_RISCV_ISA_EXT_SVPBMT,
KVM_RISCV_ISA_EXT_SSTC,
+ KVM_RISCV_ISA_EXT_SVINVAL,
+ KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
+ KVM_RISCV_ISA_EXT_ZICBOM,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index ebdafa576d..b2783c5202 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220
#define KVM_CAP_S390_ZPCI_OP 221
#define KVM_CAP_S390_CPU_TOPOLOGY 222
+#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h
index 213b2a0f70..e60dfd8907 100644
--- a/linux-headers/linux/psci.h
+++ b/linux-headers/linux/psci.h
@@ -48,12 +48,26 @@
#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7)
#define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10)
+#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11)
+#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12)
+#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13)
#define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14)
#define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15)
+#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16)
+#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17)
+
#define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18)
+#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19)
+#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19)
+#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12)
+#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13)
#define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14)
+#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16)
+#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17)
+
#define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18)
+#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19)
/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff
diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index a3a377cd44..ba5d0df52f 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -12,6 +12,10 @@
#include <linux/types.h>
+/* ioctls for /dev/userfaultfd */
+#define USERFAULTFD_IOC 0xAA
+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00)
+
/*
* If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
* UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index ede44b5572..bee7e42198 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -986,6 +986,148 @@ enum vfio_device_mig_state {
VFIO_DEVICE_STATE_RUNNING_P2P = 5,
};
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power
+ * state with the platform-based power management. Device use of lower power
+ * states depends on factors managed by the runtime power management core,
+ * including system level support and coordinating support among dependent
+ * devices. Enabling device low power entry does not guarantee lower power
+ * usage by the device, nor is a mechanism provided through this feature to
+ * know the current power state of the device. If any device access happens
+ * (either from the host or through the vfio uAPI) when the device is in the
+ * low power state, then the host will move the device out of the low power
+ * state as necessary prior to the access. Once the access is completed, the
+ * device may re-enter the low power state. For single shot low power support
+ * with wake-up notification, see
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd
+ * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after
+ * calling LOW_POWER_EXIT.
+ */
+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3
+
+/*
+ * This device feature has the same behavior as
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user
+ * provides an eventfd for wake-up notification. When the device moves out of
+ * the low power state for the wake-up, the host will not allow the device to
+ * re-enter a low power state without a subsequent user call to one of the low
+ * power entry device feature IOCTLs. Access to mmap'd device regions is
+ * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the
+ * low power exit. The low power exit can happen either through LOW_POWER_EXIT
+ * or through any other access (where the wake-up notification has been
+ * generated). The access to mmap'd device regions will not trigger low power
+ * exit.
+ *
+ * The notification through the provided eventfd will be generated only when
+ * the device has entered and is resumed from a low power state after
+ * calling this device feature IOCTL. A device that has not entered low power
+ * state, as managed through the runtime power management core, will not
+ * generate a notification through the provided eventfd on access. Calling the
+ * LOW_POWER_EXIT feature is optional in the case where notification has been
+ * signaled on the provided eventfd that a resume from low power has occurred.
+ */
+struct vfio_device_low_power_entry_with_wakeup {
+ __s32 wakeup_eventfd;
+ __u32 reserved;
+};
+
+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as
+ * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features.
+ * This device feature IOCTL may itself generate a wakeup eventfd notification
+ * in the latter case if the device had previously entered a low power state.
+ */
+#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging.
+ * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports
+ * DMA logging.
+ *
+ * DMA logging allows a device to internally record what DMAs the device is
+ * initiating and report them back to userspace. It is part of the VFIO
+ * migration infrastructure that allows implementing dirty page tracking
+ * during the pre copy phase of live migration. Only DMA WRITEs are logged,
+ * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE.
+ *
+ * When DMA logging is started a range of IOVAs to monitor is provided and the
+ * device can optimize its logging to cover only the IOVA range given. Each
+ * DMA that the device initiates inside the range will be logged by the device
+ * for later retrieval.
+ *
+ * page_size is an input that hints what tracking granularity the device
+ * should try to achieve. If the device cannot do the hinted page size then
+ * it's the driver choice which page size to pick based on its support.
+ * On output the device will return the page size it selected.
+ *
+ * ranges is a pointer to an array of
+ * struct vfio_device_feature_dma_logging_range.
+ *
+ * The core kernel code guarantees to support by minimum num_ranges that fit
+ * into a single kernel page. User space can try higher values but should give
+ * up if the above can't be achieved as of some driver limitations.
+ *
+ * A single call to start device DMA logging can be issued and a matching stop
+ * should follow at the end. Another start is not allowed in the meantime.
+ */
+struct vfio_device_feature_dma_logging_control {
+ __aligned_u64 page_size;
+ __u32 num_ranges;
+ __u32 __reserved;
+ __aligned_u64 ranges;
+};
+
+struct vfio_device_feature_dma_logging_range {
+ __aligned_u64 iova;
+ __aligned_u64 length;
+};
+
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started
+ * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START
+ */
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log
+ *
+ * Query the device's DMA log for written pages within the given IOVA range.
+ * During querying the log is cleared for the IOVA range.
+ *
+ * bitmap is a pointer to an array of u64s that will hold the output bitmap
+ * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits
+ * is given by:
+ * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64))
+ *
+ * The input page_size can be any power of two value and does not have to
+ * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver
+ * will format its internal logging to match the reporting page size, possibly
+ * by replicating bits if the internal page size is lower than requested.
+ *
+ * The LOGGING_REPORT will only set bits in the bitmap and never clear or
+ * perform any initialization of the user provided bitmap.
+ *
+ * If any error is returned userspace should assume that the dirty log is
+ * corrupted. Error recovery is to consider all memory dirty and try to
+ * restart the dirty tracking, or to abort/restart the whole migration.
+ *
+ * If DMA logging is not enabled, an error will be returned.
+ *
+ */
+struct vfio_device_feature_dma_logging_report {
+ __aligned_u64 iova;
+ __aligned_u64 length;
+ __aligned_u64 page_size;
+ __aligned_u64 bitmap;
+};
+
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8
+
/* -------- API for Type1 VFIO IOMMU -------- */
/**
--
2.31.1

View File

@ -1,330 +0,0 @@
From 29eee1fbb84c0e2f0ece9e6d996afa7238ed2912 Mon Sep 17 00:00:00 2001
From: "manish.mishra" <manish.mishra@nutanix.com>
Date: Tue, 20 Dec 2022 18:44:18 +0000
Subject: [PATCH 7/8] migration: check magic value for deciding the mapping of
channels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders
RH-Bugzilla: 2169732
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Commit: [2/2] 4fb9408478923415a91fe0527bf4b1a0f022f329 (peterx/qemu-kvm)
Current logic assumes that channel connections on the destination side are
always established in the same order as the source and the first one will
always be the main channel followed by the multifid or post-copy
preemption channel. This may not be always true, as even if a channel has a
connection established on the source side it can be in the pending state on
the destination side and a newer connection can be established first.
Basically causing out of order mapping of channels on the destination side.
Currently, all channels except post-copy preempt send a magic number, this
patch uses that magic number to decide the type of channel. This logic is
applicable only for precopy(multifd) live migration, as mentioned, the
post-copy preempt channel does not send any magic number. Also, tls live
migrations already does tls handshake before creating other channels, so
this issue is not possible with tls, hence this logic is avoided for tls
live migrations. This patch uses read peek to check the magic number of
channels so that current data/control stream management remains
un-effected.
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Suggested-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
migration/channel.c | 45 +++++++++++++++++++++++++++++++++
migration/channel.h | 5 ++++
migration/migration.c | 54 ++++++++++++++++++++++++++++------------
migration/multifd.c | 19 +++++++-------
migration/multifd.h | 2 +-
migration/postcopy-ram.c | 5 +---
migration/postcopy-ram.h | 2 +-
7 files changed, 101 insertions(+), 31 deletions(-)
diff --git a/migration/channel.c b/migration/channel.c
index 1b0815039f..ca3319a309 100644
--- a/migration/channel.c
+++ b/migration/channel.c
@@ -92,3 +92,48 @@ void migration_channel_connect(MigrationState *s,
migrate_fd_connect(s, error);
error_free(error);
}
+
+
+/**
+ * @migration_channel_read_peek - Peek at migration channel, without
+ * actually removing it from channel buffer.
+ *
+ * @ioc: the channel object
+ * @buf: the memory region to read data into
+ * @buflen: the number of bytes to read in @buf
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Returns 0 if successful, returns -1 and sets @errp if fails.
+ */
+int migration_channel_read_peek(QIOChannel *ioc,
+ const char *buf,
+ const size_t buflen,
+ Error **errp)
+{
+ ssize_t len = 0;
+ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
+
+ while (true) {
+ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL,
+ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
+
+ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) {
+ error_setg(errp,
+ "Failed to peek at channel");
+ return -1;
+ }
+
+ if (len == buflen) {
+ break;
+ }
+
+ /* 1ms sleep. */
+ if (qemu_in_coroutine()) {
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
+ } else {
+ g_usleep(1000);
+ }
+ }
+
+ return 0;
+}
diff --git a/migration/channel.h b/migration/channel.h
index 67a461c28a..5bdb8208a7 100644
--- a/migration/channel.h
+++ b/migration/channel.h
@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s,
QIOChannel *ioc,
const char *hostname,
Error *error_in);
+
+int migration_channel_read_peek(QIOChannel *ioc,
+ const char *buf,
+ const size_t buflen,
+ Error **errp);
#endif
diff --git a/migration/migration.c b/migration/migration.c
index f485eea5fb..593dbd25de 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -31,6 +31,7 @@
#include "migration.h"
#include "savevm.h"
#include "qemu-file.h"
+#include "channel.h"
#include "migration/vmstate.h"
#include "block/block.h"
#include "qapi/error.h"
@@ -663,10 +664,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp)
{
MigrationIncomingState *mis = migration_incoming_get_current();
- if (multifd_load_setup(errp) != 0) {
- return false;
- }
-
if (!mis->from_src_file) {
mis->from_src_file = f;
}
@@ -733,31 +730,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
{
MigrationIncomingState *mis = migration_incoming_get_current();
Error *local_err = NULL;
- bool start_migration;
QEMUFile *f;
+ bool default_channel = true;
+ uint32_t channel_magic = 0;
+ int ret = 0;
- if (!mis->from_src_file) {
- /* The first connection (multifd may have multiple) */
+ if (migrate_use_multifd() && !migrate_postcopy_ram() &&
+ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+ /*
+ * With multiple channels, it is possible that we receive channels
+ * out of order on destination side, causing incorrect mapping of
+ * source channels on destination side. Check channel MAGIC to
+ * decide type of channel. Please note this is best effort, postcopy
+ * preempt channel does not send any magic number so avoid it for
+ * postcopy live migration. Also tls live migration already does
+ * tls handshake while initializing main channel so with tls this
+ * issue is not possible.
+ */
+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
+ sizeof(channel_magic), &local_err);
+
+ if (ret != 0) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
+ } else {
+ default_channel = !mis->from_src_file;
+ }
+
+ if (multifd_load_setup(errp) != 0) {
+ error_setg(errp, "Failed to setup multifd channels");
+ return;
+ }
+
+ if (default_channel) {
f = qemu_file_new_input(ioc);
if (!migration_incoming_setup(f, errp)) {
return;
}
-
- /*
- * Common migration only needs one channel, so we can start
- * right now. Some features need more than one channel, we wait.
- */
- start_migration = !migration_needs_multiple_sockets();
} else {
/* Multiple connections */
assert(migration_needs_multiple_sockets());
if (migrate_use_multifd()) {
- start_migration = multifd_recv_new_channel(ioc, &local_err);
+ multifd_recv_new_channel(ioc, &local_err);
} else {
assert(migrate_postcopy_preempt());
f = qemu_file_new_input(ioc);
- start_migration = postcopy_preempt_new_channel(mis, f);
+ postcopy_preempt_new_channel(mis, f);
}
if (local_err) {
error_propagate(errp, local_err);
@@ -765,7 +787,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
}
}
- if (start_migration) {
+ if (migration_has_all_channels()) {
/* If it's a recovery, we're done */
if (postcopy_try_recover()) {
return;
diff --git a/migration/multifd.c b/migration/multifd.c
index 509bbbe3bf..c3385529cf 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -1167,9 +1167,14 @@ int multifd_load_setup(Error **errp)
uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
uint8_t i;
- if (!migrate_use_multifd()) {
+ /*
+ * Return successfully if multiFD recv state is already initialised
+ * or multiFD is not enabled.
+ */
+ if (multifd_recv_state || !migrate_use_multifd()) {
return 0;
}
+
if (!migrate_multi_channels_is_allowed()) {
error_setg(errp, "multifd is not supported by current protocol");
return -1;
@@ -1228,11 +1233,9 @@ bool multifd_recv_all_channels_created(void)
/*
* Try to receive all multifd channels to get ready for the migration.
- * - Return true and do not set @errp when correctly receiving all channels;
- * - Return false and do not set @errp when correctly receiving the current one;
- * - Return false and set @errp when failing to receive the current channel.
+ * Sets @errp when failing to receive the current channel.
*/
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
{
MultiFDRecvParams *p;
Error *local_err = NULL;
@@ -1245,7 +1248,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
"failed to receive packet"
" via multifd channel %d: ",
qatomic_read(&multifd_recv_state->count));
- return false;
+ return;
}
trace_multifd_recv_new_channel(id);
@@ -1255,7 +1258,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
id);
multifd_recv_terminate_threads(local_err);
error_propagate(errp, local_err);
- return false;
+ return;
}
p->c = ioc;
object_ref(OBJECT(ioc));
@@ -1266,6 +1269,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
QEMU_THREAD_JOINABLE);
qatomic_inc(&multifd_recv_state->count);
- return qatomic_read(&multifd_recv_state->count) ==
- migrate_multifd_channels();
}
diff --git a/migration/multifd.h b/migration/multifd.h
index 519f498643..913e4ba274 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -18,7 +18,7 @@ void multifd_save_cleanup(void);
int multifd_load_setup(Error **errp);
int multifd_load_cleanup(Error **errp);
bool multifd_recv_all_channels_created(void);
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
void multifd_recv_sync_main(void);
int multifd_send_sync_main(QEMUFile *f);
int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 0c55df0e52..b98e95dab0 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1538,7 +1538,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
}
}
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
{
/*
* The new loading channel has its own threads, so it needs to be
@@ -1547,9 +1547,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
qemu_file_set_blocking(file, true);
mis->postcopy_qemufile_dst = file;
trace_postcopy_preempt_new_channel();
-
- /* Start the migration immediately */
- return true;
}
/*
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 6147bf7d1d..25881c4127 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -190,7 +190,7 @@ enum PostcopyChannels {
RAM_CHANNEL_MAX,
};
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
int postcopy_preempt_setup(MigrationState *s, Error **errp);
int postcopy_preempt_wait_channel(MigrationState *s);
--
2.31.1

View File

@ -1,325 +0,0 @@
From e5834364958a3914d7b8b46b985a1b054728b466 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Thu, 19 Jan 2023 11:16:45 +0100
Subject: [PATCH 2/8] net: stream: add a new option to automatically reconnect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Laurent Vivier <lvivier@redhat.com>
RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect
RH-Bugzilla: 2169232
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
RH-Acked-by: Cindy Lu <lulu@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Acked-by: David Gibson (Red Hat) <dgibson@redhat.com>
RH-Commit: [2/2] 9b87647a9ed2e7c1b91bdfa9d0a736e091c892a5 (lvivier/qemu-kvm-centos)
In stream mode, if the server shuts down there is currently
no way to reconnect the client to a new server without removing
the NIC device and the netdev backend (or to reboot).
This patch introduces a reconnect option that specifies a delay
to try to reconnect with the same parameters.
Add a new test in qtest to test the reconnect option and the
connect/disconnect events.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
(cherry picked from commit b95c0d4440950fba6dbef0f781962911fa42abdb)
---
net/stream.c | 53 ++++++++++++++++++-
qapi/net.json | 7 ++-
qemu-options.hx | 6 +--
tests/qtest/netdev-socket.c | 101 ++++++++++++++++++++++++++++++++++++
4 files changed, 162 insertions(+), 5 deletions(-)
diff --git a/net/stream.c b/net/stream.c
index 37ff727e0c..9204b4c96e 100644
--- a/net/stream.c
+++ b/net/stream.c
@@ -39,6 +39,8 @@
#include "io/channel-socket.h"
#include "io/net-listener.h"
#include "qapi/qapi-events-net.h"
+#include "qapi/qapi-visit-sockets.h"
+#include "qapi/clone-visitor.h"
typedef struct NetStreamState {
NetClientState nc;
@@ -49,11 +51,15 @@ typedef struct NetStreamState {
guint ioc_write_tag;
SocketReadState rs;
unsigned int send_index; /* number of bytes sent*/
+ uint32_t reconnect;
+ guint timer_tag;
+ SocketAddress *addr;
} NetStreamState;
static void net_stream_listen(QIONetListener *listener,
QIOChannelSocket *cioc,
void *opaque);
+static void net_stream_arm_reconnect(NetStreamState *s);
static gboolean net_stream_writable(QIOChannel *ioc,
GIOCondition condition,
@@ -170,6 +176,7 @@ static gboolean net_stream_send(QIOChannel *ioc,
qemu_set_info_str(&s->nc, "%s", "");
qapi_event_send_netdev_stream_disconnected(s->nc.name);
+ net_stream_arm_reconnect(s);
return G_SOURCE_REMOVE;
}
@@ -187,6 +194,14 @@ static gboolean net_stream_send(QIOChannel *ioc,
static void net_stream_cleanup(NetClientState *nc)
{
NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc);
+ if (s->timer_tag) {
+ g_source_remove(s->timer_tag);
+ s->timer_tag = 0;
+ }
+ if (s->addr) {
+ qapi_free_SocketAddress(s->addr);
+ s->addr = NULL;
+ }
if (s->ioc) {
if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) {
if (s->ioc_read_tag) {
@@ -346,12 +361,37 @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque)
error:
object_unref(OBJECT(s->ioc));
s->ioc = NULL;
+ net_stream_arm_reconnect(s);
+}
+
+static gboolean net_stream_reconnect(gpointer data)
+{
+ NetStreamState *s = data;
+ QIOChannelSocket *sioc;
+
+ s->timer_tag = 0;
+
+ sioc = qio_channel_socket_new();
+ s->ioc = QIO_CHANNEL(sioc);
+ qio_channel_socket_connect_async(sioc, s->addr,
+ net_stream_client_connected, s,
+ NULL, NULL);
+ return G_SOURCE_REMOVE;
+}
+
+static void net_stream_arm_reconnect(NetStreamState *s)
+{
+ if (s->reconnect && s->timer_tag == 0) {
+ s->timer_tag = g_timeout_add_seconds(s->reconnect,
+ net_stream_reconnect, s);
+ }
}
static int net_stream_client_init(NetClientState *peer,
const char *model,
const char *name,
SocketAddress *addr,
+ uint32_t reconnect,
Error **errp)
{
NetStreamState *s;
@@ -364,6 +404,10 @@ static int net_stream_client_init(NetClientState *peer,
s->ioc = QIO_CHANNEL(sioc);
s->nc.link_down = true;
+ s->reconnect = reconnect;
+ if (reconnect) {
+ s->addr = QAPI_CLONE(SocketAddress, addr);
+ }
qio_channel_socket_connect_async(sioc, addr,
net_stream_client_connected, s,
NULL, NULL);
@@ -380,7 +424,14 @@ int net_init_stream(const Netdev *netdev, const char *name,
sock = &netdev->u.stream;
if (!sock->has_server || !sock->server) {
- return net_stream_client_init(peer, "stream", name, sock->addr, errp);
+ return net_stream_client_init(peer, "stream", name, sock->addr,
+ sock->has_reconnect ? sock->reconnect : 0,
+ errp);
+ }
+ if (sock->has_reconnect) {
+ error_setg(errp, "'reconnect' option is incompatible with "
+ "socket in server mode");
+ return -1;
}
return net_stream_server_init(peer, "stream", name, sock->addr, errp);
}
diff --git a/qapi/net.json b/qapi/net.json
index 522ac582ed..d6eb30008b 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -585,6 +585,10 @@
# @addr: socket address to listen on (server=true)
# or connect to (server=false)
# @server: create server socket (default: false)
+# @reconnect: For a client socket, if a socket is disconnected,
+# then attempt a reconnect after the given number of seconds.
+# Setting this to zero disables this function. (default: 0)
+# (since 8.0)
#
# Only SocketAddress types 'unix', 'inet' and 'fd' are supported.
#
@@ -593,7 +597,8 @@
{ 'struct': 'NetdevStreamOptions',
'data': {
'addr': 'SocketAddress',
- '*server': 'bool' } }
+ '*server': 'bool',
+ '*reconnect': 'uint32' } }
##
# @NetdevDgramOptions:
diff --git a/qemu-options.hx b/qemu-options.hx
index ea02ca3a45..48eef4aa2c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2766,9 +2766,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
"-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n"
" configure a network backend to connect to another network\n"
" using an UDP tunnel\n"
- "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n"
- "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n"
- "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n"
+ "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n"
+ "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n"
+ "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n"
" configure a network backend to connect to another network\n"
" using a socket connection in stream mode.\n"
"-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n"
diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c
index 6ba256e173..acc32c378b 100644
--- a/tests/qtest/netdev-socket.c
+++ b/tests/qtest/netdev-socket.c
@@ -11,6 +11,10 @@
#include <glib/gstdio.h>
#include "../unit/socket-helpers.h"
#include "libqtest.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/sockets.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qapi-visit-sockets.h"
#define CONNECTION_TIMEOUT 5
@@ -142,6 +146,101 @@ static void test_stream_inet_ipv4(void)
qtest_quit(qts0);
}
+static void wait_stream_connected(QTestState *qts, const char *id,
+ SocketAddress **addr)
+{
+ QDict *resp, *data;
+ QString *qstr;
+ QObject *obj;
+ Visitor *v = NULL;
+
+ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED");
+ g_assert_nonnull(resp);
+ data = qdict_get_qdict(resp, "data");
+ g_assert_nonnull(data);
+
+ qstr = qobject_to(QString, qdict_get(data, "netdev-id"));
+ g_assert_nonnull(data);
+
+ g_assert(!strcmp(qstring_get_str(qstr), id));
+
+ obj = qdict_get(data, "addr");
+
+ v = qobject_input_visitor_new(obj);
+ visit_type_SocketAddress(v, NULL, addr, NULL);
+ visit_free(v);
+ qobject_unref(resp);
+}
+
+static void wait_stream_disconnected(QTestState *qts, const char *id)
+{
+ QDict *resp, *data;
+ QString *qstr;
+
+ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED");
+ g_assert_nonnull(resp);
+ data = qdict_get_qdict(resp, "data");
+ g_assert_nonnull(data);
+
+ qstr = qobject_to(QString, qdict_get(data, "netdev-id"));
+ g_assert_nonnull(data);
+
+ g_assert(!strcmp(qstring_get_str(qstr), id));
+ qobject_unref(resp);
+}
+
+static void test_stream_inet_reconnect(void)
+{
+ QTestState *qts0, *qts1;
+ int port;
+ SocketAddress *addr;
+
+ port = inet_get_free_port(false);
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,server=true,addr.type=inet,"
+ "addr.ipv4=on,addr.ipv6=off,"
+ "addr.host=127.0.0.1,addr.port=%d", port);
+
+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0);
+
+ qts1 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,server=false,id=st0,addr.type=inet,"
+ "addr.ipv4=on,addr.ipv6=off,reconnect=1,"
+ "addr.host=127.0.0.1,addr.port=%d", port);
+
+ wait_stream_connected(qts0, "st0", &addr);
+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+ qapi_free_SocketAddress(addr);
+
+ /* kill server */
+ qtest_quit(qts0);
+
+ /* check client has been disconnected */
+ wait_stream_disconnected(qts1, "st0");
+
+ /* restart server */
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,server=true,addr.type=inet,"
+ "addr.ipv4=on,addr.ipv6=off,"
+ "addr.host=127.0.0.1,addr.port=%d", port);
+
+ /* wait connection events*/
+ wait_stream_connected(qts0, "st0", &addr);
+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+ qapi_free_SocketAddress(addr);
+
+ wait_stream_connected(qts1, "st0", &addr);
+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+ g_assert_cmpint(atoi(addr->u.inet.port), ==, port);
+ qapi_free_SocketAddress(addr);
+
+ qtest_quit(qts1);
+ qtest_quit(qts0);
+}
+
static void test_stream_inet_ipv6(void)
{
QTestState *qts0, *qts1;
@@ -418,6 +517,8 @@ int main(int argc, char **argv)
#ifndef _WIN32
qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast);
#endif
+ qtest_add_func("/netdev/stream/inet/reconnect",
+ test_stream_inet_reconnect);
}
if (has_ipv6) {
qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6);
--
2.31.1

View File

@ -1,55 +0,0 @@
From 0dd4be411e35f00d006d89a15d9161f5d8783c1d Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 10/12] physmem: add missing memory barrier
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [7/9] ee4875cb8c564f0510e48b00a5d95c0e6ea6301b (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit 33828ca11da08436e1b32f3e79dabce3061a0427
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri Mar 3 14:36:32 2023 +0100
physmem: add missing memory barrier
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
softmmu/physmem.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 1b606a3002..772c9896cd 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -3117,6 +3117,8 @@ void cpu_register_map_client(QEMUBH *bh)
qemu_mutex_lock(&map_client_list_lock);
client->bh = bh;
QLIST_INSERT_HEAD(&map_client_list, client, link);
+ /* Write map_client_list before reading in_use. */
+ smp_mb();
if (!qatomic_read(&bounce.in_use)) {
cpu_notify_map_clients_locked();
}
@@ -3309,6 +3311,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
qemu_vfree(bounce.buffer);
bounce.buffer = NULL;
memory_region_unref(bounce.mr);
+ /* Clear in_use before reading map_client_list. */
qatomic_mb_set(&bounce.in_use, false);
cpu_notify_map_clients();
}
--
2.39.1

View File

@ -1,177 +0,0 @@
From 1fdc864f9ac927f3ea407f35f6771a4b2e8f509f Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 04/12] qatomic: add smp_mb__before/after_rmw()
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [1/9] e8d0b64670bff778d275b1fb477dcee0c109251a (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit ff00bed1897c3d27adc5b0cec6f6eeb5a7d13176
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu Mar 2 11:10:56 2023 +0100
qatomic: add smp_mb__before/after_rmw()
On ARM, seqcst loads and stores (which QEMU does not use) are compiled
respectively as LDAR and STLR instructions. Even though LDAR is
also used for load-acquire operations, it also waits for all STLRs to
leave the store buffer. Thus, LDAR and STLR alone are load-acquire
and store-release operations, but LDAR also provides store-against-load
ordering as long as the previous store is a STLR.
Compare this to ARMv7, where store-release is DMB+STR and load-acquire
is LDR+DMB, but an additional DMB is needed between store-seqcst and
load-seqcst (e.g. DMB+STR+DMB+LDR+DMB); or with x86, where MOV provides
load-acquire and store-release semantics and the two can be reordered.
Likewise, on ARM sequentially consistent read-modify-write operations only
need to use LDAXR and STLXR respectively for the load and the store, while
on x86 they need to use the stronger LOCK prefix.
In a strange twist of events, however, the _stronger_ semantics
of the ARM instructions can end up causing bugs on ARM, not on x86.
The problems occur when seqcst atomics are mixed with relaxed atomics.
QEMU's atomics try to bridge the Linux API (that most of the developers
are familiar with) and the C11 API, and the two have a substantial
difference:
- in Linux, strongly-ordered atomics such as atomic_add_return() affect
the global ordering of _all_ memory operations, including for example
READ_ONCE()/WRITE_ONCE()
- in C11, sequentially consistent atomics (except for seq-cst fences)
only affect the ordering of sequentially consistent operations.
In particular, since relaxed loads are done with LDR on ARM, they are
not ordered against seqcst stores (which are done with STLR).
QEMU implements high-level synchronization primitives with the idea that
the primitives contain the necessary memory barriers, and the callers can
use relaxed atomics (qatomic_read/qatomic_set) or even regular accesses.
This is very much incompatible with the C11 view that seqcst accesses
are only ordered against other seqcst accesses, and requires using seqcst
fences as in the following example:
qatomic_set(&y, 1); qatomic_set(&x, 1);
smp_mb(); smp_mb();
... qatomic_read(&x) ... ... qatomic_read(&y) ...
When a qatomic_*() read-modify write operation is used instead of one
or both stores, developers that are more familiar with the Linux API may
be tempted to omit the smp_mb(), which will work on x86 but not on ARM.
This nasty difference between Linux and C11 read-modify-write operations
has already caused issues in util/async.c and more are being found.
Provide something similar to Linux smp_mb__before/after_atomic(); this
has the double function of documenting clearly why there is a memory
barrier, and avoiding a double barrier on x86 and s390x systems.
The new macro can already be put to use in qatomic_mb_set().
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
docs/devel/atomics.rst | 26 +++++++++++++++++++++-----
include/qemu/atomic.h | 17 ++++++++++++++++-
2 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst
index 52baa0736d..10fbfc58bb 100644
--- a/docs/devel/atomics.rst
+++ b/docs/devel/atomics.rst
@@ -25,7 +25,8 @@ provides macros that fall in three camps:
- weak atomic access and manual memory barriers: ``qatomic_read()``,
``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``,
- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``;
+ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``,
+ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``;
- sequentially consistent atomic access: everything else.
@@ -470,7 +471,7 @@ and memory barriers, and the equivalents in QEMU:
sequential consistency.
- in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in
- the total ordering enforced by sequentially-consistent operations.
+ the ordering enforced by read-modify-write operations.
This is because QEMU uses the C11 memory model. The following example
is correct in Linux but not in QEMU:
@@ -486,9 +487,24 @@ and memory barriers, and the equivalents in QEMU:
because the read of ``y`` can be moved (by either the processor or the
compiler) before the write of ``x``.
- Fixing this requires an ``smp_mb()`` memory barrier between the write
- of ``x`` and the read of ``y``. In the common case where only one thread
- writes ``x``, it is also possible to write it like this:
+ Fixing this requires a full memory barrier between the write of ``x`` and
+ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and
+ ``smp_mb__after_rmw()``; they act both as an optimization,
+ avoiding the memory barrier on processors where it is unnecessary,
+ and as a clarification of this corner case of the C11 memory model:
+
+ +--------------------------------+
+ | QEMU (correct) |
+ +================================+
+ | :: |
+ | |
+ | a = qatomic_fetch_add(&x, 2);|
+ | smp_mb__after_rmw(); |
+ | b = qatomic_read(&y); |
+ +--------------------------------+
+
+ In the common case where only one thread writes ``x``, it is also possible
+ to write it like this:
+--------------------------------+
| QEMU (correct) |
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 874134fd19..f85834ee8b 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -245,6 +245,20 @@
#define smp_wmb() smp_mb_release()
#define smp_rmb() smp_mb_acquire()
+/*
+ * SEQ_CST is weaker than the older __sync_* builtins and Linux
+ * kernel read-modify-write atomics. Provide a macro to obtain
+ * the same semantics.
+ */
+#if !defined(QEMU_SANITIZE_THREAD) && \
+ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
+# define smp_mb__before_rmw() signal_barrier()
+# define smp_mb__after_rmw() signal_barrier()
+#else
+# define smp_mb__before_rmw() smp_mb()
+# define smp_mb__after_rmw() smp_mb()
+#endif
+
/* qatomic_mb_read/set semantics map Java volatile variables. They are
* less expensive on some platforms (notably POWER) than fully
* sequentially consistent operations.
@@ -259,7 +273,8 @@
#if !defined(QEMU_SANITIZE_THREAD) && \
(defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
/* This is more efficient than a store plus a fence. */
-# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i))
+# define qatomic_mb_set(ptr, i) \
+ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); })
#else
# define qatomic_mb_set(ptr, i) \
({ qatomic_store_release(ptr, i); smp_mb(); })
--
2.39.1

View File

@ -1,67 +0,0 @@
From 46ead2c391924b68741d6da28f28f909b80f5914 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 12 Jan 2023 20:14:51 +0100
Subject: [PATCH 01/20] qcow2: Fix theoretical corruption in store_bitmap()
error path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image
RH-Bugzilla: 2150180
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [1/4] a6a497947179431567d330d0501247a3749fb9fd (kmwolf/centos-qemu-kvm)
In order to write the bitmap table to the image file, it is converted to
big endian. If the write fails, it is passed to clear_bitmap_table() to
free all of the clusters it had allocated before. However, if we don't
convert it back to native endianness first, we'll free things at a wrong
offset.
In practical terms, the offsets will be so high that we won't actually
free any allocated clusters, but just run into an error, but in theory
this can cause image corruption.
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230112191454.169353-2-kwolf@redhat.com>
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit b03dd9613bcf8fe948581b2b3585510cb525c382)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/qcow2-bitmap.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index bcad567c0c..3dff99ba06 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -115,7 +115,7 @@ static int update_header_sync(BlockDriverState *bs)
return bdrv_flush(bs->file->bs);
}
-static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size)
+static inline void bitmap_table_bswap_be(uint64_t *bitmap_table, size_t size)
{
size_t i;
@@ -1401,9 +1401,10 @@ static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp)
goto fail;
}
- bitmap_table_to_be(tb, tb_size);
+ bitmap_table_bswap_be(tb, tb_size);
ret = bdrv_pwrite(bs->file, tb_offset, tb_size * sizeof(tb[0]), tb, 0);
if (ret < 0) {
+ bitmap_table_bswap_be(tb, tb_size);
error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file",
bm_name);
goto fail;
--
2.31.1

View File

@ -1,84 +0,0 @@
From f628a08d20b9ab6be24c2ab18b38a934a314c78b Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:40:56 +0100
Subject: [PATCH 14/31] qed: Don't yield in bdrv_qed_co_drain_begin()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [2/16] f18e9aebb7e04a62e309b656bac8f2ab83df657f (sgarzarella/qemu-kvm-c-9-s)
We want to change .bdrv_co_drained_begin() back to be a non-coroutine
callback, so in preparation, avoid yielding in its implementation.
Because we increase bs->in_flight and bdrv_drained_begin() polls, the
behaviour is unchanged.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-2-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 6d47eb0c8bf2d50682c7dccae74d24104076fe23)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block/qed.c | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/block/qed.c b/block/qed.c
index 2f36ad342c..013f826c44 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -282,9 +282,8 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s)
qemu_co_mutex_unlock(&s->table_lock);
}
-static void coroutine_fn qed_need_check_timer_entry(void *opaque)
+static void coroutine_fn qed_need_check_timer(BDRVQEDState *s)
{
- BDRVQEDState *s = opaque;
int ret;
trace_qed_need_check_timer_cb(s);
@@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque)
(void) ret;
}
+static void coroutine_fn qed_need_check_timer_entry(void *opaque)
+{
+ BDRVQEDState *s = opaque;
+
+ qed_need_check_timer(opaque);
+ bdrv_dec_in_flight(s->bs);
+}
+
static void qed_need_check_timer_cb(void *opaque)
{
+ BDRVQEDState *s = opaque;
Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque);
+
+ bdrv_inc_in_flight(s->bs);
qemu_coroutine_enter(co);
}
@@ -363,8 +373,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
* header is flushed.
*/
if (s->need_check_timer && timer_pending(s->need_check_timer)) {
+ Coroutine *co;
+
qed_cancel_need_check_timer(s);
- qed_need_check_timer_entry(s);
+ co = qemu_coroutine_create(qed_need_check_timer_entry, s);
+ bdrv_inc_in_flight(bs);
+ aio_co_enter(bdrv_get_aio_context(bs), co);
}
}
--
2.31.1

View File

@ -1,75 +0,0 @@
From 7a9907c65e3e2bbb0c119acdbbeb4381e7f1d902 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 09/12] qemu-coroutine-lock: add smp_mb__after_rmw()
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [6/9] 4b1723b1ad670ec4c85240390b4fc15ff361154f (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit e3a3b6ec8169eab2feb241b4982585001512cd55
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri Mar 3 10:52:59 2023 +0100
qemu-coroutine-lock: add smp_mb__after_rmw()
mutex->from_push and mutex->handoff in qemu-coroutine-lock implement
the familiar pattern:
write a write b
smp_mb() smp_mb()
read b read a
The memory barrier is required by the C memory model even after a
SEQ_CST read-modify-write operation such as QSLIST_INSERT_HEAD_ATOMIC.
Add it and avoid the unclear qatomic_mb_read() operation.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
util/qemu-coroutine-lock.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
index 45c6b57374..c5897bd963 100644
--- a/util/qemu-coroutine-lock.c
+++ b/util/qemu-coroutine-lock.c
@@ -202,10 +202,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx,
trace_qemu_co_mutex_lock_entry(mutex, self);
push_waiter(mutex, &w);
+ /*
+ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set
+ * in qemu_co_mutex_unlock.
+ */
+ smp_mb__after_rmw();
+
/* This is the "Responsibility Hand-Off" protocol; a lock() picks from
* a concurrent unlock() the responsibility of waking somebody up.
*/
- old_handoff = qatomic_mb_read(&mutex->handoff);
+ old_handoff = qatomic_read(&mutex->handoff);
if (old_handoff &&
has_waiters(mutex) &&
qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) {
@@ -304,6 +310,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
}
our_handoff = mutex->sequence;
+ /* Set handoff before checking for waiters. */
qatomic_mb_set(&mutex->handoff, our_handoff);
if (!has_waiters(mutex)) {
/* The concurrent lock has not added itself yet, so it
--
2.39.1

View File

@ -1,197 +0,0 @@
From b1970c733dc46b2a8f648997a7e1c5d12900ff54 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:27:04 +0200
Subject: [PATCH 17/20] qemu-img: Change info key names for protocol nodes
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [12/12] 67c260aaa05466410503fecee6210bf9d47e8c7c (hreitz/qemu-kvm-c-9-s)
Currently, when querying a qcow2 image, qemu-img info reports something
like this:
image: test.qcow2
file format: qcow2
virtual size: 64 MiB (67108864 bytes)
disk size: 196 KiB
cluster_size: 65536
Format specific information:
compat: 1.1
compression type: zlib
lazy refcounts: false
refcount bits: 16
corrupt: false
extended l2: false
Child node '/file':
image: test.qcow2
file format: file
virtual size: 192 KiB (197120 bytes)
disk size: 196 KiB
Format specific information:
extent size hint: 1048576
Notably, the way the keys are named is specific for image files: The
filename is shown under "image", the BDS driver under "file format", and
the BDS length under "virtual size". This does not make much sense for
nodes that are not actually supposed to be guest images, like the /file
child node shown above.
Give bdrv_node_info_dump() a @protocol parameter that gives a hint that
the respective node is probably just used for data storage and does not
necessarily present the data for a VM guest disk. This renames the keys
so that with this patch, the output becomes:
image: test.qcow2
[...]
Child node '/file':
filename: test.qcow2
protocol type: file
file length: 192 KiB (197120 bytes)
disk size: 196 KiB
Format specific information:
extent size hint: 1048576
(Perhaps we should also rename "Format specific information", but I
could not come up with anything better that will not become problematic
if we guess wrong with the protocol "heuristic".)
This change affects iotest 302, which has protocol node information in
its reference output.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-13-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit d570177b50c389f379f93183155a27d44856ab46)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/monitor/block-hmp-cmds.c | 2 +-
block/qapi.c | 39 ++++++++++++++++++++++++++++------
include/block/qapi.h | 2 +-
qemu-img.c | 3 ++-
tests/qemu-iotests/302.out | 6 +++---
5 files changed, 39 insertions(+), 13 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 72824d4e2e..4d83339a5d 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
monitor_printf(mon, "\nImages:\n");
image_info = inserted->image;
while (1) {
- bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0);
+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0, false);
if (image_info->has_backing_image) {
image_info = image_info->backing_image;
} else {
diff --git a/block/qapi.c b/block/qapi.c
index 3e35603f0c..56f398c500 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -934,24 +934,49 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
visit_free(v);
}
-void bdrv_node_info_dump(BlockNodeInfo *info, int indentation)
+/**
+ * Print the given @info object in human-readable form. Every field is indented
+ * using the given @indentation (four spaces per indentation level).
+ *
+ * When using this to print a whole block graph, @protocol can be set to true to
+ * signify that the given information is associated with a protocol node, i.e.
+ * just data storage for an image, such that the data it presents is not really
+ * a full VM disk. If so, several fields change name: For example, "virtual
+ * size" is printed as "file length".
+ * (Consider a qcow2 image, which is represented by a qcow2 node and a file
+ * node. Printing a "virtual size" for the file node does not make sense,
+ * because without the qcow2 node, it is not really a guest disk, so it does not
+ * have a "virtual size". Therefore, we call it "file length" instead.)
+ *
+ * @protocol is ignored when @indentation is 0, because we take that to mean
+ * that the associated node is the root node in the queried block graph, and
+ * thus is always to be interpreted as a standalone guest disk.
+ */
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol)
{
char *size_buf, *dsize_buf;
g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, "");
+ if (indentation == 0) {
+ /* Top level, consider this a normal image */
+ protocol = false;
+ }
+
if (!info->has_actual_size) {
dsize_buf = g_strdup("unavailable");
} else {
dsize_buf = size_to_str(info->actual_size);
}
size_buf = size_to_str(info->virtual_size);
- qemu_printf("%simage: %s\n"
- "%sfile format: %s\n"
- "%svirtual size: %s (%" PRId64 " bytes)\n"
+ qemu_printf("%s%s: %s\n"
+ "%s%s: %s\n"
+ "%s%s: %s (%" PRId64 " bytes)\n"
"%sdisk size: %s\n",
- ind_s, info->filename,
- ind_s, info->format,
- ind_s, size_buf, info->virtual_size,
+ ind_s, protocol ? "filename" : "image", info->filename,
+ ind_s, protocol ? "protocol type" : "file format",
+ info->format,
+ ind_s, protocol ? "file length" : "virtual size",
+ size_buf, info->virtual_size,
ind_s, dsize_buf);
g_free(size_buf);
g_free(dsize_buf);
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 38855f2ae9..26113da21a 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -51,5 +51,5 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
const char *prefix,
int indentation);
-void bdrv_node_info_dump(BlockNodeInfo *info, int indentation);
+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol);
#endif
diff --git a/qemu-img.c b/qemu-img.c
index e281011245..2943625c67 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2853,7 +2853,8 @@ static void dump_human_image_info(BlockGraphInfo *info, int indentation,
{
BlockChildInfoList *children_list;
- bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation);
+ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation,
+ info->children == NULL);
for (children_list = info->children; children_list;
children_list = children_list->next)
diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out
index edfa1c4f05..7b5014cdd8 100644
--- a/tests/qemu-iotests/302.out
+++ b/tests/qemu-iotests/302.out
@@ -5,9 +5,9 @@ file format: raw
virtual size: 448 KiB (458752 bytes)
disk size: unavailable
Child node '/file':
- image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock
- file format: nbd
- virtual size: 448 KiB (458752 bytes)
+ filename: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock
+ protocol type: nbd
+ file length: 448 KiB (458752 bytes)
disk size: unavailable
=== Converted image info ===
--
2.31.1

View File

@ -1,261 +0,0 @@
From ea73e9de42b446ce1049805c23f7706e4f87ed1f Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:27:03 +0200
Subject: [PATCH 16/20] qemu-img: Let info print block graph
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [11/12] 2c1b8a03c918484449e876acf4c6663766848ad8 (hreitz/qemu-kvm-c-9-s)
For every node in the backing chain, collect its BlockGraphInfo struct
using bdrv_query_block_graph_info(). Print all nodes' information,
indenting child nodes and labelling them with a path constructed from
the child names leading to the node from the root (e.g. /file/file).
Note that we open each image with BDRV_O_NO_BACKING, so its backing
child is omitted from this graph, and thus presented in the previous
manner: By simply concatenating all images' information, separated with
blank lines.
This affects two iotests:
- 065: Here we try to get the format node's format specific information.
The pre-patch code does so by taking all lines from "Format specific
information:" until an empty line. This format specific information
is no longer followed by an empty line, though, but by child node
information, so limit the range by "Child node '/file':".
- 302: Calls qemu_img() for qemu-img info directly, which does not
filter the output, so the child node information ends up in the
output.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-12-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit c04d0ab026201d21873a63f768cb69c4554dfec1)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
qapi/block-core.json | 4 +--
qemu-img.c | 69 ++++++++++++++++++++++++++------------
tests/qemu-iotests/065 | 2 +-
tests/qemu-iotests/302.out | 5 +++
4 files changed, 56 insertions(+), 24 deletions(-)
diff --git a/qapi/block-core.json b/qapi/block-core.json
index d703e0fb16..7f331eb8ea 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -5831,9 +5831,9 @@
##
# @DummyBlockCoreForceArrays:
#
-# Not used by QMP; hack to let us use BlockNodeInfoList internally
+# Not used by QMP; hack to let us use BlockGraphInfoList internally
#
# Since: 8.0
##
{ 'struct': 'DummyBlockCoreForceArrays',
- 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } }
+ 'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } }
diff --git a/qemu-img.c b/qemu-img.c
index 30b4ea58bb..e281011245 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs)
g_free(sn_tab);
}
-static void dump_json_block_node_info_list(BlockNodeInfoList *list)
+static void dump_json_block_graph_info_list(BlockGraphInfoList *list)
{
GString *str;
QObject *obj;
Visitor *v = qobject_output_visitor_new(&obj);
- visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort);
+ visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort);
visit_complete(v, &obj);
str = qobject_to_json_pretty(obj, true);
assert(str != NULL);
@@ -2832,13 +2832,13 @@ static void dump_json_block_node_info_list(BlockNodeInfoList *list)
g_string_free(str, true);
}
-static void dump_json_block_node_info(BlockNodeInfo *info)
+static void dump_json_block_graph_info(BlockGraphInfo *info)
{
GString *str;
QObject *obj;
Visitor *v = qobject_output_visitor_new(&obj);
- visit_type_BlockNodeInfo(v, NULL, &info, &error_abort);
+ visit_type_BlockGraphInfo(v, NULL, &info, &error_abort);
visit_complete(v, &obj);
str = qobject_to_json_pretty(obj, true);
assert(str != NULL);
@@ -2848,9 +2848,29 @@ static void dump_json_block_node_info(BlockNodeInfo *info)
g_string_free(str, true);
}
-static void dump_human_image_info_list(BlockNodeInfoList *list)
+static void dump_human_image_info(BlockGraphInfo *info, int indentation,
+ const char *path)
{
- BlockNodeInfoList *elem;
+ BlockChildInfoList *children_list;
+
+ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation);
+
+ for (children_list = info->children; children_list;
+ children_list = children_list->next)
+ {
+ BlockChildInfo *child = children_list->value;
+ g_autofree char *child_path = NULL;
+
+ printf("%*sChild node '%s%s':\n",
+ indentation * 4, "", path, child->name);
+ child_path = g_strdup_printf("%s%s/", path, child->name);
+ dump_human_image_info(child->info, indentation + 1, child_path);
+ }
+}
+
+static void dump_human_image_info_list(BlockGraphInfoList *list)
+{
+ BlockGraphInfoList *elem;
bool delim = false;
for (elem = list; elem; elem = elem->next) {
@@ -2859,7 +2879,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list)
}
delim = true;
- bdrv_node_info_dump(elem->value, 0);
+ dump_human_image_info(elem->value, 0, "/");
}
}
@@ -2869,7 +2889,7 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b)
}
/**
- * Open an image file chain and return an BlockNodeInfoList
+ * Open an image file chain and return an BlockGraphInfoList
*
* @filename: topmost image filename
* @fmt: topmost image format (may be NULL to autodetect)
@@ -2880,13 +2900,13 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b)
* opening an image file. If there was an error a message will have been
* printed to stderr.
*/
-static BlockNodeInfoList *collect_image_info_list(bool image_opts,
- const char *filename,
- const char *fmt,
- bool chain, bool force_share)
+static BlockGraphInfoList *collect_image_info_list(bool image_opts,
+ const char *filename,
+ const char *fmt,
+ bool chain, bool force_share)
{
- BlockNodeInfoList *head = NULL;
- BlockNodeInfoList **tail = &head;
+ BlockGraphInfoList *head = NULL;
+ BlockGraphInfoList **tail = &head;
GHashTable *filenames;
Error *err = NULL;
@@ -2895,7 +2915,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts,
while (filename) {
BlockBackend *blk;
BlockDriverState *bs;
- BlockNodeInfo *info;
+ BlockGraphInfo *info;
if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
error_report("Backing file '%s' creates an infinite loop.",
@@ -2912,7 +2932,14 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts,
}
bs = blk_bs(blk);
- bdrv_query_block_node_info(bs, &info, &err);
+ /*
+ * Note that the returned BlockGraphInfo object will not have
+ * information about this image's backing node, because we have opened
+ * it with BDRV_O_NO_BACKING. Printing this object will therefore not
+ * duplicate the backing chain information that we obtain by walking
+ * the chain manually here.
+ */
+ bdrv_query_block_graph_info(bs, &info, &err);
if (err) {
error_report_err(err);
blk_unref(blk);
@@ -2945,7 +2972,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts,
return head;
err:
- qapi_free_BlockNodeInfoList(head);
+ qapi_free_BlockGraphInfoList(head);
g_hash_table_destroy(filenames);
return NULL;
}
@@ -2956,7 +2983,7 @@ static int img_info(int argc, char **argv)
OutputFormat output_format = OFORMAT_HUMAN;
bool chain = false;
const char *filename, *fmt, *output;
- BlockNodeInfoList *list;
+ BlockGraphInfoList *list;
bool image_opts = false;
bool force_share = false;
@@ -3035,14 +3062,14 @@ static int img_info(int argc, char **argv)
break;
case OFORMAT_JSON:
if (chain) {
- dump_json_block_node_info_list(list);
+ dump_json_block_graph_info_list(list);
} else {
- dump_json_block_node_info(list->value);
+ dump_json_block_graph_info(list->value);
}
break;
}
- qapi_free_BlockNodeInfoList(list);
+ qapi_free_BlockGraphInfoList(list);
return 0;
}
diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065
index b724c89c7c..b76701c71e 100755
--- a/tests/qemu-iotests/065
+++ b/tests/qemu-iotests/065
@@ -56,7 +56,7 @@ class TestQemuImgInfo(TestImageInfoSpecific):
def test_human(self):
data = qemu_img('info', '--output=human', test_img).stdout.split('\n')
data = data[(data.index('Format specific information:') + 1)
- :data.index('')]
+ :data.index("Child node '/file':")]
for field in data:
self.assertTrue(re.match('^ {4}[^ ]', field) is not None)
data = [line.strip() for line in data]
diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out
index 3e7c281b91..edfa1c4f05 100644
--- a/tests/qemu-iotests/302.out
+++ b/tests/qemu-iotests/302.out
@@ -4,6 +4,11 @@ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock
file format: raw
virtual size: 448 KiB (458752 bytes)
disk size: unavailable
+Child node '/file':
+ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock
+ file format: nbd
+ virtual size: 448 KiB (458752 bytes)
+ disk size: unavailable
=== Converted image info ===
image: TEST_IMG
--
2.31.1

View File

@ -1,241 +0,0 @@
From dca4cbe680baff837ca8ac8bd39b77b46af3f64b Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 20 Jun 2022 18:26:57 +0200
Subject: [PATCH 10/20] qemu-img: Use BlockNodeInfo
RH-Author: Hanna Czenczek <hreitz@redhat.com>
RH-MergeRequest: 145: Show protocol-level information in qemu-img info
RH-Bugzilla: 1860292
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [5/12] b599af3ec05951a0ba11d9eae2ee19148d6bf624 (hreitz/qemu-kvm-c-9-s)
qemu-img info never uses ImageInfo's backing-image field, because it
opens the backing chain one by one with BDRV_O_NO_BACKING, and prints
all backing chain nodes' information consecutively. Use BlockNodeInfo
to make it clear that we only print information about a single node, and
that we are not using the backing-image field.
Notably, bdrv_image_info_dump() does not evaluate the backing-image
field, so we can easily make it take a BlockNodeInfo pointer (and
consequentially rename it to bdrv_node_info_dump()). It makes more
sense this way, because again, the interface now makes it syntactically
clear that backing-image is ignored by this function.
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220620162704.80987-6-hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit b1f4cd1589a16fec02f264a09bd3560e4ccce3c2)
Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
---
block/monitor/block-hmp-cmds.c | 2 +-
block/qapi.c | 2 +-
include/block/qapi.h | 2 +-
qapi/block-core.json | 4 +--
qemu-img.c | 48 +++++++++++++++++-----------------
5 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index b6135e9bfe..aa37faa601 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
monitor_printf(mon, "\nImages:\n");
image_info = inserted->image;
while (1) {
- bdrv_image_info_dump(image_info);
+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info));
if (image_info->has_backing_image) {
image_info = image_info->backing_image;
} else {
diff --git a/block/qapi.c b/block/qapi.c
index e5022b4481..ad88bf9b38 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -865,7 +865,7 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
visit_free(v);
}
-void bdrv_image_info_dump(ImageInfo *info)
+void bdrv_node_info_dump(BlockNodeInfo *info)
{
char *size_buf, *dsize_buf;
if (!info->has_actual_size) {
diff --git a/include/block/qapi.h b/include/block/qapi.h
index c7de4e3fa9..22198dcd0c 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -45,5 +45,5 @@ void bdrv_query_image_info(BlockDriverState *bs,
void bdrv_snapshot_dump(QEMUSnapshotInfo *sn);
void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec,
const char *prefix);
-void bdrv_image_info_dump(ImageInfo *info);
+void bdrv_node_info_dump(BlockNodeInfo *info);
#endif
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 7720da0498..4cf2deeb6c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -5796,9 +5796,9 @@
##
# @DummyBlockCoreForceArrays:
#
-# Not used by QMP; hack to let us use ImageInfoList internally
+# Not used by QMP; hack to let us use BlockNodeInfoList internally
#
# Since: 8.0
##
{ 'struct': 'DummyBlockCoreForceArrays',
- 'data': { 'unused-image-info': ['ImageInfo'] } }
+ 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } }
diff --git a/qemu-img.c b/qemu-img.c
index 2f85bb7ede..3b2ca3bbcb 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs)
g_free(sn_tab);
}
-static void dump_json_image_info_list(ImageInfoList *list)
+static void dump_json_block_node_info_list(BlockNodeInfoList *list)
{
GString *str;
QObject *obj;
Visitor *v = qobject_output_visitor_new(&obj);
- visit_type_ImageInfoList(v, NULL, &list, &error_abort);
+ visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort);
visit_complete(v, &obj);
str = qobject_to_json_pretty(obj, true);
assert(str != NULL);
@@ -2832,13 +2832,13 @@ static void dump_json_image_info_list(ImageInfoList *list)
g_string_free(str, true);
}
-static void dump_json_image_info(ImageInfo *info)
+static void dump_json_block_node_info(BlockNodeInfo *info)
{
GString *str;
QObject *obj;
Visitor *v = qobject_output_visitor_new(&obj);
- visit_type_ImageInfo(v, NULL, &info, &error_abort);
+ visit_type_BlockNodeInfo(v, NULL, &info, &error_abort);
visit_complete(v, &obj);
str = qobject_to_json_pretty(obj, true);
assert(str != NULL);
@@ -2848,9 +2848,9 @@ static void dump_json_image_info(ImageInfo *info)
g_string_free(str, true);
}
-static void dump_human_image_info_list(ImageInfoList *list)
+static void dump_human_image_info_list(BlockNodeInfoList *list)
{
- ImageInfoList *elem;
+ BlockNodeInfoList *elem;
bool delim = false;
for (elem = list; elem; elem = elem->next) {
@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(ImageInfoList *list)
}
delim = true;
- bdrv_image_info_dump(elem->value);
+ bdrv_node_info_dump(elem->value);
}
}
@@ -2869,24 +2869,24 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b)
}
/**
- * Open an image file chain and return an ImageInfoList
+ * Open an image file chain and return an BlockNodeInfoList
*
* @filename: topmost image filename
* @fmt: topmost image format (may be NULL to autodetect)
* @chain: true - enumerate entire backing file chain
* false - only topmost image file
*
- * Returns a list of ImageInfo objects or NULL if there was an error opening an
- * image file. If there was an error a message will have been printed to
- * stderr.
+ * Returns a list of BlockNodeInfo objects or NULL if there was an error
+ * opening an image file. If there was an error a message will have been
+ * printed to stderr.
*/
-static ImageInfoList *collect_image_info_list(bool image_opts,
- const char *filename,
- const char *fmt,
- bool chain, bool force_share)
+static BlockNodeInfoList *collect_image_info_list(bool image_opts,
+ const char *filename,
+ const char *fmt,
+ bool chain, bool force_share)
{
- ImageInfoList *head = NULL;
- ImageInfoList **tail = &head;
+ BlockNodeInfoList *head = NULL;
+ BlockNodeInfoList **tail = &head;
GHashTable *filenames;
Error *err = NULL;
@@ -2895,7 +2895,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts,
while (filename) {
BlockBackend *blk;
BlockDriverState *bs;
- ImageInfo *info;
+ BlockNodeInfo *info;
if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
error_report("Backing file '%s' creates an infinite loop.",
@@ -2912,7 +2912,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts,
}
bs = blk_bs(blk);
- bdrv_query_image_info(bs, &info, &err);
+ bdrv_query_block_node_info(bs, &info, &err);
if (err) {
error_report_err(err);
blk_unref(blk);
@@ -2945,7 +2945,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts,
return head;
err:
- qapi_free_ImageInfoList(head);
+ qapi_free_BlockNodeInfoList(head);
g_hash_table_destroy(filenames);
return NULL;
}
@@ -2956,7 +2956,7 @@ static int img_info(int argc, char **argv)
OutputFormat output_format = OFORMAT_HUMAN;
bool chain = false;
const char *filename, *fmt, *output;
- ImageInfoList *list;
+ BlockNodeInfoList *list;
bool image_opts = false;
bool force_share = false;
@@ -3035,14 +3035,14 @@ static int img_info(int argc, char **argv)
break;
case OFORMAT_JSON:
if (chain) {
- dump_json_image_info_list(list);
+ dump_json_block_node_info_list(list);
} else {
- dump_json_image_info(list->value);
+ dump_json_block_node_info(list->value);
}
break;
}
- qapi_free_ImageInfoList(list);
+ qapi_free_BlockNodeInfoList(list);
return 0;
}
--
2.31.1

View File

@ -1,70 +0,0 @@
From d0d3d694b3a8d200442484ae0c9d263e0439cd04 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 12 Jan 2023 20:14:53 +0100
Subject: [PATCH 03/20] qemu-img bitmap: Report errors while closing the image
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image
RH-Bugzilla: 2150180
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [3/4] 4a704fec2e3bcb47b2be1529e27fd1833d58c517 (kmwolf/centos-qemu-kvm)
blk_unref() can't report any errors that happen while closing the image.
For example, if qcow2 hits an -ENOSPC error while writing out dirty
bitmaps when it's closed, it prints error messages to stderr, but
'qemu-img bitmap' won't see any error return value and will therefore
look successful with exit code 0.
In order to fix this, manually inactivate the image first before calling
blk_unref(). This already performs the operations that would be most
likely to fail while closing the image, but it can still return errors.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1330
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230112191454.169353-4-kwolf@redhat.com>
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit c5e477110dcb8ef4642dce399777c3dee68fa96c)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
qemu-img.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/qemu-img.c b/qemu-img.c
index 3cbdda9f76..2f85bb7ede 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4646,6 +4646,7 @@ static int img_bitmap(int argc, char **argv)
QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
ImgBitmapAction *act, *act_next;
const char *op;
+ int inactivate_ret;
QSIMPLEQ_INIT(&actions);
@@ -4830,6 +4831,16 @@ static int img_bitmap(int argc, char **argv)
ret = 0;
out:
+ /*
+ * Manually inactivate the images first because this way we can know whether
+ * an error occurred. blk_unref() doesn't tell us about failures.
+ */
+ inactivate_ret = bdrv_inactivate_all();
+ if (inactivate_ret < 0) {
+ error_report("Error while closing the image: %s", strerror(-inactivate_ret));
+ ret = 1;
+ }
+
blk_unref(src);
blk_unref(blk);
qemu_opts_del(opts);
--
2.31.1

View File

@ -1,67 +0,0 @@
From 2f5369f0effaa23be746f9b5d9f6a0bfc346fb7d Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 12 Jan 2023 20:14:52 +0100
Subject: [PATCH 02/20] qemu-img commit: Report errors while closing the image
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image
RH-Bugzilla: 2150180
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [2/4] faedd43355463b1210a3f21ecd430f478bd06f5a (kmwolf/centos-qemu-kvm)
blk_unref() can't report any errors that happen while closing the image.
For example, if qcow2 hits an -ENOSPC error while writing out dirty
bitmaps when it's closed, it prints error messages to stderr, but
'qemu-img commit' won't see any error return value and will therefore
look successful with exit code 0.
In order to fix this, manually inactivate the image first before calling
blk_unref(). This already performs the operations that would be most
likely to fail while closing the image, but it can still return errors.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230112191454.169353-3-kwolf@redhat.com>
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 44efba2d713aca076c411594d0c1a2b99155eeb3)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
qemu-img.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/qemu-img.c b/qemu-img.c
index a9b3a8103c..3cbdda9f76 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -449,6 +449,11 @@ static BlockBackend *img_open(bool image_opts,
blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
force_share);
}
+
+ if (blk) {
+ blk_set_force_allow_inactivate(blk);
+ }
+
return blk;
}
@@ -1119,6 +1124,14 @@ unref_backing:
done:
qemu_progress_end();
+ /*
+ * Manually inactivate the image first because this way we can know whether
+ * an error occurred. blk_unref() doesn't tell us about failures.
+ */
+ ret = bdrv_inactivate_all();
+ if (ret < 0 && !local_err) {
+ error_setg_errno(&local_err, -ret, "Error while closing the image");
+ }
blk_unref(blk);
if (local_err) {
--
2.31.1

View File

@ -1,166 +0,0 @@
From 06030aa79fcb2d90d6a670e75d959aa0c3204b5c Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 12 Jan 2023 20:14:54 +0100
Subject: [PATCH 04/20] qemu-iotests: Test qemu-img bitmap/commit exit code on
error
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image
RH-Bugzilla: 2150180
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Commit: [4/4] b96bb671bcfb7ae18015fda14db70f42a83a6ea7 (kmwolf/centos-qemu-kvm)
This tests that when an error happens while writing back bitmaps to the
image file in qcow2_inactivate(), 'qemu-img bitmap/commit' actually
return an error value in their exit code instead of making the operation
look successful to scripts.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20230112191454.169353-5-kwolf@redhat.com>
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 07a4e1f8e5418f36424cd57d5d061b090a238c65)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
.../qemu-iotests/tests/qemu-img-close-errors | 96 +++++++++++++++++++
.../tests/qemu-img-close-errors.out | 23 +++++
2 files changed, 119 insertions(+)
create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors
create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out
diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors b/tests/qemu-iotests/tests/qemu-img-close-errors
new file mode 100755
index 0000000000..50bfb6cfa2
--- /dev/null
+++ b/tests/qemu-iotests/tests/qemu-img-close-errors
@@ -0,0 +1,96 @@
+#!/usr/bin/env bash
+# group: rw auto quick
+#
+# Check that errors while closing the image, in particular writing back dirty
+# bitmaps, is correctly reported with a failing qemu-img exit code.
+#
+# Copyright (C) 2023 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+cd ..
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+size=1G
+
+# The error we are going to use is ENOSPC. Depending on how many bitmaps we
+# create in the backing file (and therefore increase the used up space), we get
+# failures in different places. With a low number, only merging the bitmap
+# fails, whereas with a higher number, already 'qemu-img commit' fails.
+for max_bitmap in 6 7; do
+ echo
+ echo "=== Test with $max_bitmap bitmaps ==="
+
+ TEST_IMG="$TEST_IMG.base" _make_test_img -q $size
+ for i in $(seq 1 $max_bitmap); do
+ $QEMU_IMG bitmap --add "$TEST_IMG.base" "stale-bitmap-$i"
+ done
+
+ # Simulate a block device of 128 MB by resizing the image file accordingly
+ # and then enforcing the size with the raw driver
+ $QEMU_IO -f raw -c "truncate 128M" "$TEST_IMG.base"
+ BASE_JSON='json:{
+ "driver": "qcow2",
+ "file": {
+ "driver": "raw",
+ "size": 134217728,
+ "file": {
+ "driver": "file",
+ "filename":"'"$TEST_IMG.base"'"
+ }
+ }
+ }'
+
+ _make_test_img -q -b "$BASE_JSON" -F $IMGFMT
+ $QEMU_IMG bitmap --add "$TEST_IMG" "good-bitmap"
+
+ $QEMU_IO -c 'write 0 126m' "$TEST_IMG" | _filter_qemu_io
+
+ $QEMU_IMG commit -d "$TEST_IMG" 2>&1 | _filter_generated_node_ids
+ echo "qemu-img commit exit code: ${PIPESTATUS[0]}"
+
+ $QEMU_IMG bitmap --add "$BASE_JSON" "good-bitmap"
+ echo "qemu-img bitmap --add exit code: $?"
+
+ $QEMU_IMG bitmap --merge "good-bitmap" -b "$TEST_IMG" "$BASE_JSON" \
+ "good-bitmap" 2>&1 | _filter_generated_node_ids
+ echo "qemu-img bitmap --merge exit code: ${PIPESTATUS[0]}"
+done
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
+
diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors.out b/tests/qemu-iotests/tests/qemu-img-close-errors.out
new file mode 100644
index 0000000000..1bfe88f176
--- /dev/null
+++ b/tests/qemu-iotests/tests/qemu-img-close-errors.out
@@ -0,0 +1,23 @@
+QA output created by qemu-img-close-errors
+
+=== Test with 6 bitmaps ===
+wrote 132120576/132120576 bytes at offset 0
+126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+qemu-img commit exit code: 0
+qemu-img bitmap --add exit code: 0
+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device
+qemu-img: Error while closing the image: Invalid argument
+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device
+qemu-img bitmap --merge exit code: 1
+
+=== Test with 7 bitmaps ===
+wrote 132120576/132120576 bytes at offset 0
+126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device
+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device
+qemu-img: Error while closing the image: Invalid argument
+qemu-img commit exit code: 1
+qemu-img bitmap --add exit code: 0
+qemu-img bitmap --merge exit code: 0
+*** done
--
2.31.1

View File

@ -1,146 +0,0 @@
From aa61e4c437d29a791ea09a01f7230231f1e53356 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 05/12] qemu-thread-posix: cleanup, fix, document QemuEvent
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [2/9] c3bdf75f884e137c667316aaac96bb4a0b9ec2d9 (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit 9586a1329f5dce6c1d7f4de53cf0536644d7e593
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu Mar 2 11:19:52 2023 +0100
qemu-thread-posix: cleanup, fix, document QemuEvent
QemuEvent is currently broken on ARM due to missing memory barriers
after qatomic_*(). Apart from adding the memory barrier, a closer look
reveals some unpaired memory barriers too. Document more clearly what
is going on.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
util/qemu-thread-posix.c | 69 ++++++++++++++++++++++++++++------------
1 file changed, 49 insertions(+), 20 deletions(-)
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index bae938c670..cc74f4ede0 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -379,13 +379,21 @@ void qemu_event_destroy(QemuEvent *ev)
void qemu_event_set(QemuEvent *ev)
{
- /* qemu_event_set has release semantics, but because it *loads*
+ assert(ev->initialized);
+
+ /*
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
+ *
+ * qemu_event_set has release semantics, but because it *loads*
* ev->value we need a full memory barrier here.
*/
- assert(ev->initialized);
smp_mb();
if (qatomic_read(&ev->value) != EV_SET) {
- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+ int old = qatomic_xchg(&ev->value, EV_SET);
+
+ /* Pairs with memory barrier in kernel futex_wait system call. */
+ smp_mb__after_rmw();
+ if (old == EV_BUSY) {
/* There were waiters, wake them up. */
qemu_futex_wake(ev, INT_MAX);
}
@@ -394,18 +402,19 @@ void qemu_event_set(QemuEvent *ev)
void qemu_event_reset(QemuEvent *ev)
{
- unsigned value;
-
assert(ev->initialized);
- value = qatomic_read(&ev->value);
- smp_mb_acquire();
- if (value == EV_SET) {
- /*
- * If there was a concurrent reset (or even reset+wait),
- * do nothing. Otherwise change EV_SET->EV_FREE.
- */
- qatomic_or(&ev->value, EV_FREE);
- }
+
+ /*
+ * If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ qatomic_or(&ev->value, EV_FREE);
+
+ /*
+ * Order reset before checking the condition in the caller.
+ * Pairs with the first memory barrier in qemu_event_set().
+ */
+ smp_mb__after_rmw();
}
void qemu_event_wait(QemuEvent *ev)
@@ -413,20 +422,40 @@ void qemu_event_wait(QemuEvent *ev)
unsigned value;
assert(ev->initialized);
- value = qatomic_read(&ev->value);
- smp_mb_acquire();
+
+ /*
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
+ * not go down the slow path, so this load-acquire is needed that
+ * synchronizes with the first memory barrier in qemu_event_set().
+ *
+ * If we do go down the slow path, there is no requirement at all: we
+ * might miss a qemu_event_set() here but ultimately the memory barrier in
+ * qemu_futex_wait() will ensure the check is done correctly.
+ */
+ value = qatomic_load_acquire(&ev->value);
if (value != EV_SET) {
if (value == EV_FREE) {
/*
- * Leave the event reset and tell qemu_event_set that there
- * are waiters. No need to retry, because there cannot be
- * a concurrent busy->free transition. After the CAS, the
- * event will be either set or busy.
+ * Leave the event reset and tell qemu_event_set that there are
+ * waiters. No need to retry, because there cannot be a concurrent
+ * busy->free transition. After the CAS, the event will be either
+ * set or busy.
+ *
+ * This cmpxchg doesn't have particular ordering requirements if it
+ * succeeds (moving the store earlier can only cause qemu_event_set()
+ * to issue _more_ wakeups), the failing case needs acquire semantics
+ * like the load above.
*/
if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
return;
}
}
+
+ /*
+ * This is the final check for a concurrent set, so it does need
+ * a smp_mb() pairing with the second barrier of qemu_event_set().
+ * The barrier is inside the FUTEX_WAIT system call.
+ */
qemu_futex_wait(ev, EV_BUSY);
}
}
--
2.39.1

View File

@ -1,162 +0,0 @@
From 02347869410fe53d814487501fb586f7dc614375 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Thu, 9 Mar 2023 08:24:36 -0500
Subject: [PATCH 06/12] qemu-thread-win32: cleanup, fix, document QemuEvent
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw()
RH-Bugzilla: 2175660
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [3/9] d228e9d6a4a75dd1f0a23a6dceaf4fea23d69192 (eesposit/qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660
commit 6c5df4b48f0c52a61342ecb307a43f4c2a3565c4
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu Mar 2 11:22:50 2023 +0100
qemu-thread-win32: cleanup, fix, document QemuEvent
QemuEvent is currently broken on ARM due to missing memory barriers
after qatomic_*(). Apart from adding the memory barrier, a closer look
reveals some unpaired memory barriers that are not really needed and
complicated the functions unnecessarily. Also, it is relying on
a memory barrier in ResetEvent(); the barrier _ought_ to be there
but there is really no documentation about it, so make it explicit.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
util/qemu-thread-win32.c | 82 +++++++++++++++++++++++++++-------------
1 file changed, 56 insertions(+), 26 deletions(-)
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 69db254ac7..a7fe3cc345 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -272,12 +272,20 @@ void qemu_event_destroy(QemuEvent *ev)
void qemu_event_set(QemuEvent *ev)
{
assert(ev->initialized);
- /* qemu_event_set has release semantics, but because it *loads*
+
+ /*
+ * Pairs with both qemu_event_reset() and qemu_event_wait().
+ *
+ * qemu_event_set has release semantics, but because it *loads*
* ev->value we need a full memory barrier here.
*/
smp_mb();
if (qatomic_read(&ev->value) != EV_SET) {
- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+ int old = qatomic_xchg(&ev->value, EV_SET);
+
+ /* Pairs with memory barrier after ResetEvent. */
+ smp_mb__after_rmw();
+ if (old == EV_BUSY) {
/* There were waiters, wake them up. */
SetEvent(ev->event);
}
@@ -286,17 +294,19 @@ void qemu_event_set(QemuEvent *ev)
void qemu_event_reset(QemuEvent *ev)
{
- unsigned value;
-
assert(ev->initialized);
- value = qatomic_read(&ev->value);
- smp_mb_acquire();
- if (value == EV_SET) {
- /* If there was a concurrent reset (or even reset+wait),
- * do nothing. Otherwise change EV_SET->EV_FREE.
- */
- qatomic_or(&ev->value, EV_FREE);
- }
+
+ /*
+ * If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ qatomic_or(&ev->value, EV_FREE);
+
+ /*
+ * Order reset before checking the condition in the caller.
+ * Pairs with the first memory barrier in qemu_event_set().
+ */
+ smp_mb__after_rmw();
}
void qemu_event_wait(QemuEvent *ev)
@@ -304,29 +314,49 @@ void qemu_event_wait(QemuEvent *ev)
unsigned value;
assert(ev->initialized);
- value = qatomic_read(&ev->value);
- smp_mb_acquire();
+
+ /*
+ * qemu_event_wait must synchronize with qemu_event_set even if it does
+ * not go down the slow path, so this load-acquire is needed that
+ * synchronizes with the first memory barrier in qemu_event_set().
+ *
+ * If we do go down the slow path, there is no requirement at all: we
+ * might miss a qemu_event_set() here but ultimately the memory barrier in
+ * qemu_futex_wait() will ensure the check is done correctly.
+ */
+ value = qatomic_load_acquire(&ev->value);
if (value != EV_SET) {
if (value == EV_FREE) {
- /* qemu_event_set is not yet going to call SetEvent, but we are
- * going to do another check for EV_SET below when setting EV_BUSY.
- * At that point it is safe to call WaitForSingleObject.
+ /*
+ * Here the underlying kernel event is reset, but qemu_event_set is
+ * not yet going to call SetEvent. However, there will be another
+ * check for EV_SET below when setting EV_BUSY. At that point it
+ * is safe to call WaitForSingleObject.
*/
ResetEvent(ev->event);
- /* Tell qemu_event_set that there are waiters. No need to retry
- * because there cannot be a concurrent busy->free transition.
- * After the CAS, the event will be either set or busy.
+ /*
+ * It is not clear whether ResetEvent provides this barrier; kernel
+ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry!
+ */
+ smp_mb();
+
+ /*
+ * Leave the event reset and tell qemu_event_set that there are
+ * waiters. No need to retry, because there cannot be a concurrent
+ * busy->free transition. After the CAS, the event will be either
+ * set or busy.
*/
if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
- value = EV_SET;
- } else {
- value = EV_BUSY;
+ return;
}
}
- if (value == EV_BUSY) {
- WaitForSingleObject(ev->event, INFINITE);
- }
+
+ /*
+ * ev->value is now EV_BUSY. Since we didn't observe EV_SET,
+ * qemu_event_set() must observe EV_BUSY and call SetEvent().
+ */
+ WaitForSingleObject(ev->event, INFINITE);
}
}
--
2.39.1

View File

@ -1,43 +0,0 @@
From 546e4213c4e8a7b2e369315a71bc9aec091eed6e Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Mon, 19 Dec 2022 10:30:26 +0100
Subject: redhat: fix virt-rhel9.2.0 compat props
RH-Author: Cornelia Huck <cohuck@redhat.com>
RH-MergeRequest: 127: redhat: fix virt-rhel9.2.0 compat props
RH-Bugzilla: 2154640
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Gavin Shan <gshan@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/1] 49635fdc1d9a934ece78abd160b07c19909f876a (cohuck/qemu-kvm-c9s)
We need to include arm_rhel_compat props in the latest machine.
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
hw/arm/virt.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 0a94f31dd1..bf18838b87 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3520,6 +3520,7 @@ type_init(rhel_machine_init);
static void rhel920_virt_options(MachineClass *mc)
{
+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
}
DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0)
@@ -3529,7 +3530,6 @@ static void rhel900_virt_options(MachineClass *mc)
rhel920_virt_options(mc);
- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len);
/* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
--
2.38.1

View File

@ -1,125 +0,0 @@
From ed90f91b61844abd2dff2eb970f721a6cf072235 Mon Sep 17 00:00:00 2001
From: Matthew Rosato <mjrosato@linux.ibm.com>
Date: Fri, 28 Oct 2022 15:47:57 -0400
Subject: [PATCH 6/9] s390x/pci: coalesce unmap operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset
RH-Bugzilla: 2163701
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/3] 80c3a2c1d720057ae2a80b338ea06c9c6c804532 (clegoate/qemu-kvm-c9s)
Currently, each unmapped page is handled as an individual iommu
region notification. Attempt to group contiguous unmap operations
into fewer notifications to reduce overhead.
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
Message-Id: <20221028194758.204007-3-mjrosato@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
(cherry picked from commit ef536007c3301bbd6a787e4c2210ea289adaa6f0)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/s390x/s390-pci-inst.c | 51 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 51 insertions(+)
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 7cc4bcf850..66e764f901 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -640,6 +640,8 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
}
g_hash_table_remove(iommu->iotlb, &entry->iova);
inc_dma_avail(iommu);
+ /* Don't notify the iommu yet, maybe we can bundle contiguous unmaps */
+ goto out;
} else {
if (cache) {
if (cache->perm == entry->perm &&
@@ -663,15 +665,44 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
dec_dma_avail(iommu);
}
+ /*
+ * All associated iotlb entries have already been cleared, trigger the
+ * unmaps.
+ */
memory_region_notify_iommu(&iommu->iommu_mr, 0, event);
out:
return iommu->dma_limit ? iommu->dma_limit->avail : 1;
}
+static void s390_pci_batch_unmap(S390PCIIOMMU *iommu, uint64_t iova,
+ uint64_t len)
+{
+ uint64_t remain = len, start = iova, end = start + len - 1, mask, size;
+ IOMMUTLBEvent event = {
+ .type = IOMMU_NOTIFIER_UNMAP,
+ .entry = {
+ .target_as = &address_space_memory,
+ .translated_addr = 0,
+ .perm = IOMMU_NONE,
+ },
+ };
+
+ while (remain >= TARGET_PAGE_SIZE) {
+ mask = dma_aligned_pow2_mask(start, end, 64);
+ size = mask + 1;
+ event.entry.iova = start;
+ event.entry.addr_mask = mask;
+ memory_region_notify_iommu(&iommu->iommu_mr, 0, event);
+ start += size;
+ remain -= size;
+ }
+}
+
int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
{
CPUS390XState *env = &cpu->env;
+ uint64_t iova, coalesce = 0;
uint32_t fh;
uint16_t error = 0;
S390PCIBusDevice *pbdev;
@@ -742,6 +773,21 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
break;
}
+ /*
+ * If this is an unmap of a PTE, let's try to coalesce multiple unmaps
+ * into as few notifier events as possible.
+ */
+ if (entry.perm == IOMMU_NONE && entry.len == TARGET_PAGE_SIZE) {
+ if (coalesce == 0) {
+ iova = entry.iova;
+ }
+ coalesce += entry.len;
+ } else if (coalesce > 0) {
+ /* Unleash the coalesced unmap before processing a new map */
+ s390_pci_batch_unmap(iommu, iova, coalesce);
+ coalesce = 0;
+ }
+
start += entry.len;
while (entry.iova < start && entry.iova < end) {
if (dma_avail > 0 || entry.perm == IOMMU_NONE) {
@@ -759,6 +805,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
}
}
}
+ if (coalesce) {
+ /* Unleash the coalesced unmap before finishing rpcit */
+ s390_pci_batch_unmap(iommu, iova, coalesce);
+ coalesce = 0;
+ }
if (again && dma_avail > 0)
goto retry;
err:
--
2.31.1

View File

@ -1,147 +0,0 @@
From 1ed1f8fc20a4883bc0bc1f58d299b0278abc5442 Mon Sep 17 00:00:00 2001
From: Matthew Rosato <mjrosato@linux.ibm.com>
Date: Fri, 9 Dec 2022 14:57:00 -0500
Subject: [PATCH 8/9] s390x/pci: reset ISM passthrough devices on shutdown and
system reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset
RH-Bugzilla: 2163701
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/3] c531352b9d57f51ba938d4c46ee19a5706ade697 (clegoate/qemu-kvm-c9s)
ISM device firmware stores unique state information that can
can cause a wholesale unmap of the associated IOMMU (e.g. when
we get a termination signal for QEMU) to trigger firmware errors
because firmware believes we are attempting to invalidate entries
that are still in-use by the guest OS (when in fact that guest is
in the process of being terminated or rebooted).
To alleviate this, register both a shutdown notifier (for unexpected
termination cases e.g. virsh destroy) as well as a reset callback
(for cases like guest OS reboot). For each of these scenarios, trigger
PCI device reset; this is enough to indicate to firmware that the IOMMU
is no longer in-use by the guest OS, making it safe to invalidate any
associated IOMMU entries.
Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X")
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
Message-Id: <20221209195700.263824-1-mjrosato@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
[thuth: Adjusted the hunk in s390-pci-vfio.c due to different context]
Signed-off-by: Thomas Huth <thuth@redhat.com>
(cherry picked from commit 03451953c79e6b31f7860ee0c35b28e181d573c1)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++
hw/s390x/s390-pci-vfio.c | 2 ++
include/hw/s390x/s390-pci-bus.h | 5 +++++
3 files changed, 35 insertions(+)
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 977e7daa15..02751f3597 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -24,6 +24,8 @@
#include "hw/pci/msi.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
#ifndef DEBUG_S390PCI_BUS
#define DEBUG_S390PCI_BUS 0
@@ -150,10 +152,30 @@ out:
psccb->header.response_code = cpu_to_be16(rc);
}
+static void s390_pci_shutdown_notifier(Notifier *n, void *opaque)
+{
+ S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice,
+ shutdown_notifier);
+
+ pci_device_reset(pbdev->pdev);
+}
+
+static void s390_pci_reset_cb(void *opaque)
+{
+ S390PCIBusDevice *pbdev = opaque;
+
+ pci_device_reset(pbdev->pdev);
+}
+
static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev)
{
HotplugHandler *hotplug_ctrl;
+ if (pbdev->pft == ZPCI_PFT_ISM) {
+ notifier_remove(&pbdev->shutdown_notifier);
+ qemu_unregister_reset(s390_pci_reset_cb, pbdev);
+ }
+
/* Unplug the PCI device */
if (pbdev->pdev) {
DeviceState *pdev = DEVICE(pbdev->pdev);
@@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
pbdev->fh |= FH_SHM_VFIO;
pbdev->forwarding_assist = false;
}
+ /* Register shutdown notifier and reset callback for ISM devices */
+ if (pbdev->pft == ZPCI_PFT_ISM) {
+ pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier;
+ qemu_register_shutdown_notifier(&pbdev->shutdown_notifier);
+ qemu_register_reset(s390_pci_reset_cb, pbdev);
+ }
} else {
pbdev->fh |= FH_SHM_EMUL;
/* Always intercept emulated devices */
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index f7bf36cec8..f51190d466 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -124,6 +124,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
/* The following values remain 0 until we support other FMB formats */
pbdev->zpci_fn.fmbl = 0;
pbdev->zpci_fn.pft = 0;
+ /* Store function type separately for type-specific behavior */
+ pbdev->pft = cap->pft;
/*
* If appropriate, reduce the size of the supported DMA aperture reported
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
index 1c46e3a269..e0a9f9385b 100644
--- a/include/hw/s390x/s390-pci-bus.h
+++ b/include/hw/s390x/s390-pci-bus.h
@@ -39,6 +39,9 @@
#define UID_CHECKING_ENABLED 0x01
#define ZPCI_DTSM 0x40
+/* zPCI Function Types */
+#define ZPCI_PFT_ISM 5
+
OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE)
OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS)
OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE)
@@ -344,6 +347,7 @@ struct S390PCIBusDevice {
uint16_t noi;
uint16_t maxstbl;
uint8_t sum;
+ uint8_t pft;
S390PCIGroup *pci_group;
ClpRspQueryPci zpci_fn;
S390MsixInfo msix;
@@ -352,6 +356,7 @@ struct S390PCIBusDevice {
MemoryRegion msix_notify_mr;
IndAddr *summary_ind;
IndAddr *indicator;
+ Notifier shutdown_notifier;
bool pci_unplug_request_processed;
bool unplug_requested;
bool interp;
--
2.31.1

View File

@ -1,91 +0,0 @@
From ee69c8c57fe62fc200f749c4ce3927c88803644d Mon Sep 17 00:00:00 2001
From: Matthew Rosato <mjrosato@linux.ibm.com>
Date: Fri, 28 Oct 2022 15:47:58 -0400
Subject: [PATCH 7/9] s390x/pci: shrink DMA aperture to be bound by vfio DMA
limit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset
RH-Bugzilla: 2163701
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/3] 0956bbb4773dd0085f6aed59d6284c704b4fed3b (clegoate/qemu-kvm-c9s)
Currently, s390x-pci performs accounting against the vfio DMA
limit and triggers the guest to clean up mappings when the limit
is reached. Let's go a step further and also limit the size of
the supported DMA aperture reported to the guest based upon the
initial vfio DMA limit reported for the container (if less than
than the size reported by the firmware/host zPCI layer). This
avoids processing sections of the guest DMA table during global
refresh that, for common use cases, will never be used anway, and
makes exhausting the vfio DMA limit due to mismatch between guest
aperture size and host limit far less likely and more indicitive
of an error.
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
Message-Id: <20221028194758.204007-4-mjrosato@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
(cherry picked from commit df202e3ff3fccb49868e08f20d0bda86cb953fbe)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/s390x/s390-pci-vfio.c | 11 +++++++++++
include/hw/s390x/s390-pci-bus.h | 1 +
2 files changed, 12 insertions(+)
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index 5f0adb0b4a..f7bf36cec8 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -84,6 +84,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
cnt->users = 1;
cnt->avail = avail;
QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link);
+ pbdev->iommu->max_dma_limit = avail;
return cnt;
}
@@ -103,6 +104,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
struct vfio_info_cap_header *hdr;
struct vfio_device_info_cap_zpci_base *cap;
VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ uint64_t vfio_size;
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
@@ -122,6 +124,15 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
/* The following values remain 0 until we support other FMB formats */
pbdev->zpci_fn.fmbl = 0;
pbdev->zpci_fn.pft = 0;
+
+ /*
+ * If appropriate, reduce the size of the supported DMA aperture reported
+ * to the guest based upon the vfio DMA limit.
+ */
+ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
+ if (vfio_size < (cap->end_dma - cap->start_dma + 1)) {
+ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
+ }
}
static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info,
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
index 0605fcea24..1c46e3a269 100644
--- a/include/hw/s390x/s390-pci-bus.h
+++ b/include/hw/s390x/s390-pci-bus.h
@@ -278,6 +278,7 @@ struct S390PCIIOMMU {
uint64_t g_iota;
uint64_t pba;
uint64_t pal;
+ uint64_t max_dma_limit;
GHashTable *iotlb;
S390PCIDMACount *dma_limit;
};
--
2.31.1

View File

@ -1,109 +0,0 @@
From 9452246e59a5f16f44fdf9a7d514b947faf1d5fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Mon, 16 Jan 2023 18:46:05 +0100
Subject: [PATCH 5/9] s390x/pv: Implement a CGS check helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 139: s390x/pv: Implement a CGS check helper
RH-Bugzilla: 2122523
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [1/1] 8551ce772b10de653b4e1c8be60aae60ec98b421 (clegoate/qemu-kvm-c9s)
When a protected VM is started with the maximum number of CPUs (248),
the service call providing information on the CPUs requires more
buffer space than allocated and QEMU disgracefully aborts :
LOADPARM=[........]
Using virtio-blk.
Using SCSI scheme.
...................................................................................
qemu-system-s390x: KVM_S390_MEM_OP failed: Argument list too long
When protected virtualization is initialized, compute the maximum
number of vCPUs supported by the machine and return useful information
to the user before the machine starts in case of error.
Suggested-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Message-Id: <20230116174607.2459498-2-clg@kaod.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
(cherry picked from commit 75d7150c636569f6687f7e70a33be893be43eb5f)
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/s390x/pv.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
index 8dfe92d8df..8a1c71436b 100644
--- a/hw/s390x/pv.c
+++ b/hw/s390x/pv.c
@@ -20,6 +20,7 @@
#include "exec/confidential-guest-support.h"
#include "hw/s390x/ipl.h"
#include "hw/s390x/pv.h"
+#include "hw/s390x/sclp.h"
#include "target/s390x/kvm/kvm_s390x.h"
static bool info_valid;
@@ -249,6 +250,41 @@ struct S390PVGuestClass {
ConfidentialGuestSupportClass parent_class;
};
+/*
+ * If protected virtualization is enabled, the amount of data that the
+ * Read SCP Info Service Call can use is limited to one page. The
+ * available space also depends on the Extended-Length SCCB (ELS)
+ * feature which can take more buffer space to store feature
+ * information. This impacts the maximum number of CPUs supported in
+ * the machine.
+ */
+static uint32_t s390_pv_get_max_cpus(void)
+{
+ int offset_cpu = s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) ?
+ offsetof(ReadInfo, entries) : SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET;
+
+ return (TARGET_PAGE_SIZE - offset_cpu) / sizeof(CPUEntry);
+}
+
+static bool s390_pv_check_cpus(Error **errp)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ uint32_t pv_max_cpus = s390_pv_get_max_cpus();
+
+ if (ms->smp.max_cpus > pv_max_cpus) {
+ error_setg(errp, "Protected VMs support a maximum of %d CPUs",
+ pv_max_cpus);
+ return false;
+ }
+
+ return true;
+}
+
+static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp)
+{
+ return s390_pv_check_cpus(errp);
+}
+
int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) {
@@ -261,6 +297,10 @@ int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
return -1;
}
+ if (!s390_pv_guest_check(cgs, errp)) {
+ return -1;
+ }
+
cgs->ready = true;
return 0;
--
2.31.1

View File

@ -1,70 +0,0 @@
From 51fcf352a97f2e99a6a3fb8ae663b45436304120 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
Date: Tue, 10 Jan 2023 14:25:34 +0100
Subject: [PATCH 11/31] s390x/s390-virtio-ccw: Activate zPCI features on
s390-ccw-virtio-rhel8.6.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Cédric Le Goater <clg@redhat.com>
RH-MergeRequest: 133: s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0
RH-Bugzilla: 2159408
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/1] 1ed82e56fe74a283a1726c4893dc3387e645072c (clegoate/qemu-kvm-c9s)
commit c7b14d3af7 ("s390x/s390-virtio-ccw: Switch off zPCI enhancements
on older machines") activated zPCI enhancement features (interpretation
and forward assist) silently on the s390-ccw-virtio-rhel8.6.0 machine
for RHEL8.8. It didn't seem to be a problem since migration is not
possible but it broke LEAPP upgrade to RHEL9 when the machine is
defined with a passthrough device. Activate the zPCI features also on
RHEL9.2 for the machines to be alike in both latest RHEL distros.
Upstream Status: RHEL-only
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2159408
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
hw/s390x/s390-virtio-ccw.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index aa142a1a4e..4cdd59c394 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1234,8 +1234,14 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine)
static void ccw_machine_rhel860_class_options(MachineClass *mc)
{
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "interpret", "on", },
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", },
+ };
+
ccw_machine_rhel900_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
/* All RHEL machines for prior major releases are deprecated */
mc->deprecation_reason = rhel_old_machine_deprecation;
@@ -1259,8 +1265,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine)
static void ccw_machine_rhel850_class_options(MachineClass *mc)
{
+ static GlobalProperty compat[] = {
+ { TYPE_S390_PCI_DEVICE, "interpret", "off", },
+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
+ };
+
ccw_machine_rhel860_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
mc->smp_props.prefer_sockets = true;
}
DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false);
--
2.31.1

View File

@ -1,176 +0,0 @@
From 0a4f5bcc2a6f8ac31431e971c1dce9e6ab2191c2 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 21 Feb 2023 16:22:16 -0500
Subject: [PATCH 01/12] scsi: protect req->aiocb with AioContext lock
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread
RH-Bugzilla: 2155748
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
RH-Commit: [1/3] 61727297bd31dfe18220b61f1d265ced0649c60d (stefanha/centos-stream-qemu-kvm)
If requests are being processed in the IOThread when a SCSIDevice is
unplugged, scsi_device_purge_requests() -> scsi_req_cancel_async() races
with I/O completion callbacks. Both threads load and store req->aiocb.
This can lead to assert(r->req.aiocb == NULL) failures and undefined
behavior.
Protect r->req.aiocb with the AioContext lock to prevent the race.
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230221212218.1378734-2-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 7b7fc3d0102dafe8eb44802493036a526e921a71)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
hw/scsi/scsi-disk.c | 23 ++++++++++++++++-------
hw/scsi/scsi-generic.c | 11 ++++++-----
2 files changed, 22 insertions(+), 12 deletions(-)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index e493c28814..5327f93f4c 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -273,9 +273,11 @@ static void scsi_aio_complete(void *opaque, int ret)
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
if (scsi_disk_req_check_error(r, ret, true)) {
goto done;
}
@@ -357,10 +359,11 @@ static void scsi_dma_complete(void *opaque, int ret)
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
if (ret < 0) {
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
} else {
@@ -393,10 +396,11 @@ static void scsi_read_complete(void *opaque, int ret)
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
if (ret < 0) {
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
} else {
@@ -446,10 +450,11 @@ static void scsi_do_read_cb(void *opaque, int ret)
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
assert (r->req.aiocb != NULL);
r->req.aiocb = NULL;
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
if (ret < 0) {
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
} else {
@@ -530,10 +535,11 @@ static void scsi_write_complete(void * opaque, int ret)
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
assert (r->req.aiocb != NULL);
r->req.aiocb = NULL;
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
if (ret < 0) {
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
} else {
@@ -1737,10 +1743,11 @@ static void scsi_unmap_complete(void *opaque, int ret)
SCSIDiskReq *r = data->r;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
if (scsi_disk_req_check_error(r, ret, true)) {
scsi_req_unref(&r->req);
g_free(data);
@@ -1816,9 +1823,11 @@ static void scsi_write_same_complete(void *opaque, int ret)
SCSIDiskReq *r = data->r;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
+
if (scsi_disk_req_check_error(r, ret, true)) {
goto done;
}
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 92cce20a4d..ac9fa662b4 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -111,10 +111,11 @@ static void scsi_command_complete(void *opaque, int ret)
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
SCSIDevice *s = r->req.dev;
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
scsi_command_complete_noio(r, ret);
aio_context_release(blk_get_aio_context(s->conf.blk));
}
@@ -269,11 +270,11 @@ static void scsi_read_complete(void * opaque, int ret)
SCSIDevice *s = r->req.dev;
int len;
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
-
if (ret || r->req.io_canceled) {
scsi_command_complete_noio(r, ret);
goto done;
@@ -386,11 +387,11 @@ static void scsi_write_complete(void * opaque, int ret)
trace_scsi_generic_write_complete(ret);
+ aio_context_acquire(blk_get_aio_context(s->conf.blk));
+
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
-
if (ret || r->req.io_canceled) {
scsi_command_complete_noio(r, ret);
goto done;
--
2.39.1

View File

@ -1,159 +0,0 @@
From 5defda06ec4c24818a34126c5048be5e274b63f5 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:41:04 +0100
Subject: [PATCH 22/31] stream: Replace subtree drain with a single node drain
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [10/16] a93250b1f6ef296e903df0ba5d8b29bc2ed540a8 (sgarzarella/qemu-kvm-c-9-s)
The subtree drain was introduced in commit b1e1af394d9 as a way to avoid
graph changes between finding the base node and changing the block graph
as necessary on completion of the image streaming job.
The block graph could change between these two points because
bdrv_set_backing_hd() first drains the parent node, which involved
polling and can do anything.
Subtree draining was an imperfect way to make this less likely (because
with it, fewer callbacks are called during this window). Everyone agreed
that it's not really the right solution, and it was only committed as a
stopgap solution.
This replaces the subtree drain with a solution that simply drains the
parent node before we try to find the base node, and then call a version
of bdrv_set_backing_hd() that doesn't drain, but just asserts that the
parent node is already drained.
This way, any graph changes caused by draining happen before we start
looking at the graph and things stay consistent between finding the base
node and changing the graph.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-10-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 92140b9f3f07d80e2c27edcc6e32f392be2135e6)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
block.c | 17 ++++++++++++++---
block/stream.c | 26 ++++++++++++++++----------
include/block/block-global-state.h | 3 +++
3 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/block.c b/block.c
index b3449a312e..5330e89903 100644
--- a/block.c
+++ b/block.c
@@ -3403,14 +3403,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs,
return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
}
-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
- Error **errp)
+int bdrv_set_backing_hd_drained(BlockDriverState *bs,
+ BlockDriverState *backing_hd,
+ Error **errp)
{
int ret;
Transaction *tran = tran_new();
GLOBAL_STATE_CODE();
- bdrv_drained_begin(bs);
+ assert(bs->quiesce_counter > 0);
ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
if (ret < 0) {
@@ -3420,7 +3421,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
ret = bdrv_refresh_perms(bs, errp);
out:
tran_finalize(tran, ret);
+ return ret;
+}
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp)
+{
+ int ret;
+ GLOBAL_STATE_CODE();
+
+ bdrv_drained_begin(bs);
+ ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
bdrv_drained_end(bs);
return ret;
diff --git a/block/stream.c b/block/stream.c
index 694709bd25..8744ad103f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -64,13 +64,16 @@ static int stream_prepare(Job *job)
bdrv_cor_filter_drop(s->cor_filter_bs);
s->cor_filter_bs = NULL;
- bdrv_subtree_drained_begin(s->above_base);
+ /*
+ * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain
+ * already here and use bdrv_set_backing_hd_drained() instead because
+ * the polling during drained_begin() might change the graph, and if we do
+ * this only later, we may end up working with the wrong base node (or it
+ * might even have gone away by the time we want to use it).
+ */
+ bdrv_drained_begin(unfiltered_bs);
base = bdrv_filter_or_cow_bs(s->above_base);
- if (base) {
- bdrv_ref(base);
- }
-
unfiltered_base = bdrv_skip_filters(base);
if (bdrv_cow_child(unfiltered_bs)) {
@@ -82,7 +85,13 @@ static int stream_prepare(Job *job)
}
}
- bdrv_set_backing_hd(unfiltered_bs, base, &local_err);
+ bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err);
+
+ /*
+ * This call will do I/O, so the graph can change again from here on.
+ * We have already completed the graph change, so we are not in danger
+ * of operating on the wrong node any more if this happens.
+ */
ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false);
if (local_err) {
error_report_err(local_err);
@@ -92,10 +101,7 @@ static int stream_prepare(Job *job)
}
out:
- if (base) {
- bdrv_unref(base);
- }
- bdrv_subtree_drained_end(s->above_base);
+ bdrv_drained_end(unfiltered_bs);
return ret;
}
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index c7bd4a2088..00e0cf8aea 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename,
BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp);
+int bdrv_set_backing_hd_drained(BlockDriverState *bs,
+ BlockDriverState *backing_hd,
+ Error **errp);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
const char *bdref_key, Error **errp);
BlockDriverState *bdrv_open(const char *filename, const char *reference,
--
2.31.1

View File

@ -1,144 +0,0 @@
From e419493e6ec188461aa6f06c1b1cdc8a698859df Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Sat, 14 Jan 2023 15:21:03 -1000
Subject: [PATCH 6/8] target/i386: Fix 32-bit AD[CO]X insns in 64-bit mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions
RH-Bugzilla: 2173590
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Bandan Das <None>
RH-Commit: [6/7] 0fa4d3858319d4f877a5b3f31776121a72e2c57a (bonzini/rhel-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590
Upstream-Status: merged
Failure to truncate the inputs results in garbage for the carry-out.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1373
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-Id: <20230115012103.3131796-1-richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 6fbef9426bac7184b5d5887589d8386e732865eb)
---
target/i386/tcg/emit.c.inc | 2 +
tests/tcg/x86_64/Makefile.target | 3 ++
tests/tcg/x86_64/adox.c | 69 ++++++++++++++++++++++++++++++++
3 files changed, 74 insertions(+)
create mode 100644 tests/tcg/x86_64/adox.c
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 0d7c6e80ae..e61ae9a2e9 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1037,6 +1037,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
#ifdef TARGET_X86_64
case MO_32:
/* If TL is 64-bit just do everything in 64-bit arithmetic. */
+ tcg_gen_ext32u_tl(s->T0, s->T0);
+ tcg_gen_ext32u_tl(s->T1, s->T1);
tcg_gen_add_i64(s->T0, s->T0, s->T1);
tcg_gen_add_i64(s->T0, s->T0, carry_in);
tcg_gen_shri_i64(carry_out, s->T0, 32);
diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
index 4eac78293f..e64aab1b81 100644
--- a/tests/tcg/x86_64/Makefile.target
+++ b/tests/tcg/x86_64/Makefile.target
@@ -12,11 +12,14 @@ ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET))
X86_64_TESTS += vsyscall
X86_64_TESTS += noexec
X86_64_TESTS += cmpxchg
+X86_64_TESTS += adox
TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
else
TESTS=$(MULTIARCH_TESTS)
endif
+adox: CFLAGS=-O2
+
run-test-i386-ssse3: QEMU_OPTS += -cpu max
run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max
diff --git a/tests/tcg/x86_64/adox.c b/tests/tcg/x86_64/adox.c
new file mode 100644
index 0000000000..36be644c8b
--- /dev/null
+++ b/tests/tcg/x86_64/adox.c
@@ -0,0 +1,69 @@
+/* See if ADOX give expected results */
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+static uint64_t adoxq(bool *c_out, uint64_t a, uint64_t b, bool c)
+{
+ asm ("addl $0x7fffffff, %k1\n\t"
+ "adoxq %2, %0\n\t"
+ "seto %b1"
+ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c));
+ *c_out = c;
+ return a;
+}
+
+static uint64_t adoxl(bool *c_out, uint64_t a, uint64_t b, bool c)
+{
+ asm ("addl $0x7fffffff, %k1\n\t"
+ "adoxl %k2, %k0\n\t"
+ "seto %b1"
+ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c));
+ *c_out = c;
+ return a;
+}
+
+int main()
+{
+ uint64_t r;
+ bool c;
+
+ r = adoxq(&c, 0, 0, 0);
+ assert(r == 0);
+ assert(c == 0);
+
+ r = adoxl(&c, 0, 0, 0);
+ assert(r == 0);
+ assert(c == 0);
+
+ r = adoxl(&c, 0x100000000, 0, 0);
+ assert(r == 0);
+ assert(c == 0);
+
+ r = adoxq(&c, 0, 0, 1);
+ assert(r == 1);
+ assert(c == 0);
+
+ r = adoxl(&c, 0, 0, 1);
+ assert(r == 1);
+ assert(c == 0);
+
+ r = adoxq(&c, -1, -1, 0);
+ assert(r == -2);
+ assert(c == 1);
+
+ r = adoxl(&c, -1, -1, 0);
+ assert(r == 0xfffffffe);
+ assert(c == 1);
+
+ r = adoxq(&c, -1, -1, 1);
+ assert(r == -1);
+ assert(c == 1);
+
+ r = adoxl(&c, -1, -1, 1);
+ assert(r == 0xffffffff);
+ assert(c == 1);
+
+ return 0;
+}
--
2.39.1

View File

@ -1,110 +0,0 @@
From a019c203f0148e5fbb20e102a17453806f5296b6 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Sat, 14 Jan 2023 13:05:42 -1000
Subject: [PATCH 3/8] target/i386: Fix BEXTR instruction
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions
RH-Bugzilla: 2173590
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Bandan Das <None>
RH-Commit: [3/7] bd1e3b26c72d7152b44be2d34308fd40dc106424 (bonzini/rhel-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590
Upstream-Status: merged
There were two problems here: not limiting the input to operand bits,
and not correctly handling large extraction length.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1372
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230114230542.3116013-3-richard.henderson@linaro.org>
Cc: qemu-stable@nongnu.org
Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b14c0098975264ed03144f145bca0179a6763a07)
---
target/i386/tcg/emit.c.inc | 22 +++++++++++-----------
tests/tcg/i386/test-i386-bmi2.c | 12 ++++++++++++
2 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 7037ff91c6..99f6ba6e19 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1078,30 +1078,30 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
- TCGv bound, zero;
+ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
+ TCGv zero = tcg_constant_tl(0);
+ TCGv mone = tcg_constant_tl(-1);
/*
* Extract START, and shift the operand.
* Shifts larger than operand size get zeros.
*/
tcg_gen_ext8u_tl(s->A0, s->T1);
+ if (TARGET_LONG_BITS == 64 && ot == MO_32) {
+ tcg_gen_ext32u_tl(s->T0, s->T0);
+ }
tcg_gen_shr_tl(s->T0, s->T0, s->A0);
- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
- zero = tcg_constant_tl(0);
tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero);
/*
- * Extract the LEN into a mask. Lengths larger than
- * operand size get all ones.
+ * Extract the LEN into an inverse mask. Lengths larger than
+ * operand size get all zeros, length 0 gets all ones.
*/
tcg_gen_extract_tl(s->A0, s->T1, 8, 8);
- tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound);
-
- tcg_gen_movi_tl(s->T1, 1);
- tcg_gen_shl_tl(s->T1, s->T1, s->A0);
- tcg_gen_subi_tl(s->T1, s->T1, 1);
- tcg_gen_and_tl(s->T0, s->T0, s->T1);
+ tcg_gen_shl_tl(s->T1, mone, s->A0);
+ tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero);
+ tcg_gen_andc_tl(s->T0, s->T0, s->T1);
gen_op_update1_cc(s);
set_cc_op(s, CC_OP_LOGICB + ot);
diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c
index 3c3ef85513..982d4abda4 100644
--- a/tests/tcg/i386/test-i386-bmi2.c
+++ b/tests/tcg/i386/test-i386-bmi2.c
@@ -99,6 +99,9 @@ int main(int argc, char *argv[]) {
result = bextrq(mask, 0x10f8);
assert(result == 0);
+ result = bextrq(0xfedcba9876543210ull, 0x7f00);
+ assert(result == 0xfedcba9876543210ull);
+
result = blsiq(0x30);
assert(result == 0x10);
@@ -164,6 +167,15 @@ int main(int argc, char *argv[]) {
result = bextrl(mask, 0x1038);
assert(result == 0);
+ result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018);
+ assert(result == 0x5a);
+
+ result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00);
+ assert(result == 0x76543210u);
+
+ result = bextrl(-1, 0);
+ assert(result == 0);
+
result = blsil(0xffff);
assert(result == 1);
--
2.39.1

View File

@ -1,77 +0,0 @@
From d49e5d193dfccf6f5cfa98ccce5bd491478d563d Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Sat, 14 Jan 2023 13:32:06 -1000
Subject: [PATCH 7/8] target/i386: Fix BZHI instruction
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions
RH-Bugzilla: 2173590
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Bandan Das <None>
RH-Commit: [7/7] ad6b343c09c0304ac32cc68670c49d1fc12d8cf8 (bonzini/rhel-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590
Upstream-Status: merged
We did not correctly handle N >= operand size.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1374
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230114233206.3118472-1-richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 9ad2ba6e8e7fc195d0dd0b76ab38bd2fceb1bdd4)
---
target/i386/tcg/emit.c.inc | 14 +++++++-------
tests/tcg/i386/test-i386-bmi2.c | 3 +++
2 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index e61ae9a2e9..0d01e13002 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1147,20 +1147,20 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
- TCGv bound;
+ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
+ TCGv zero = tcg_constant_tl(0);
+ TCGv mone = tcg_constant_tl(-1);
- tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
+ tcg_gen_ext8u_tl(s->T1, s->T1);
/*
* Note that since we're using BMILG (in order to get O
* cleared) we need to store the inverse into C.
*/
- tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound);
- tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1);
+ tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound);
- tcg_gen_movi_tl(s->A0, -1);
- tcg_gen_shl_tl(s->A0, s->A0, s->T1);
+ tcg_gen_shl_tl(s->A0, mone, s->T1);
+ tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero);
tcg_gen_andc_tl(s->T0, s->T0, s->A0);
gen_op_update1_cc(s);
diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c
index 982d4abda4..0244df7987 100644
--- a/tests/tcg/i386/test-i386-bmi2.c
+++ b/tests/tcg/i386/test-i386-bmi2.c
@@ -123,6 +123,9 @@ int main(int argc, char *argv[]) {
result = bzhiq(mask, 0x1f);
assert(result == (mask & ~(-1 << 30)));
+ result = bzhiq(mask, 0x40);
+ assert(result == mask);
+
result = rorxq(0x2132435465768798, 8);
assert(result == 0x9821324354657687);
--
2.39.1

View File

@ -1,60 +0,0 @@
From cb2b591e1677db2837810eaedac534a7ff3a7b1c Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Sat, 14 Jan 2023 08:06:01 -1000
Subject: [PATCH 4/8] target/i386: Fix C flag for BLSI, BLSMSK, BLSR
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions
RH-Bugzilla: 2173590
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Bandan Das <None>
RH-Commit: [4/7] 173e23c492c830da6c5a4be0cfc20a69ac655b59 (bonzini/rhel-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590
Upstream-Status: merged
We forgot to set cc_src, which is used for computing C.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1370
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230114180601.2993644-1-richard.henderson@linaro.org>
Cc: qemu-stable@nongnu.org
Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 99282098dc74c2055bde5652bde6cf0067d0c370)
---
target/i386/tcg/emit.c.inc | 3 +++
1 file changed, 3 insertions(+)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 99f6ba6e19..4d7702c106 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1111,6 +1111,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
+ tcg_gen_mov_tl(cpu_cc_src, s->T0);
tcg_gen_neg_tl(s->T1, s->T0);
tcg_gen_and_tl(s->T0, s->T0, s->T1);
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
@@ -1121,6 +1122,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
{
MemOp ot = decode->op[0].ot;
+ tcg_gen_mov_tl(cpu_cc_src, s->T0);
tcg_gen_subi_tl(s->T1, s->T0, 1);
tcg_gen_xor_tl(s->T0, s->T0, s->T1);
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
@@ -1131,6 +1133,7 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
+ tcg_gen_mov_tl(cpu_cc_src, s->T0);
tcg_gen_subi_tl(s->T1, s->T0, 1);
tcg_gen_and_tl(s->T0, s->T0, s->T1);
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
--
2.39.1

View File

@ -1,205 +0,0 @@
From 54d3e58aabf9716f9a07aeb7044d7b7997e28123 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 31 Jan 2023 09:48:03 +0100
Subject: [PATCH 5/8] target/i386: fix ADOX followed by ADCX
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions
RH-Bugzilla: 2173590
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Bandan Das <None>
RH-Commit: [5/7] 64dbe4e602f08e4a88fdeacee5a8993ca4383563 (bonzini/rhel-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590
Upstream-Status: merged
When ADCX is followed by ADOX or vice versa, the second instruction's
carry comes from EFLAGS and the condition codes use the CC_OP_ADCOX
operation. Retrieving the carry from EFLAGS is handled by this bit
of gen_ADCOX:
tcg_gen_extract_tl(carry_in, cpu_cc_src,
ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);
Unfortunately, in this case cc_op has been overwritten by the previous
"if" statement to CC_OP_ADCOX. This works by chance when the first
instruction is ADCX; however, if the first instruction is ADOX,
ADCX will incorrectly take its carry from OF instead of CF.
Fix by moving the computation of the new cc_op at the end of the function.
The included exhaustive test case fails without this patch and passes
afterwards.
Because ADCX/ADOX need not be invoked through the VEX prefix, this
regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement
0x0f 0x38, add AVX", 2022-10-18). However, the mistake happened a
little earlier, when BMI instructions were rewritten using the new
decoder framework.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1471
Reported-by: Paul Jolly <https://gitlab.com/myitcv>
Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18)
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 60c7dd22e1383754d5f150bc9f7c2785c662a7b6)
---
target/i386/tcg/emit.c.inc | 20 +++++----
tests/tcg/i386/Makefile.target | 6 ++-
tests/tcg/i386/test-i386-adcox.c | 75 ++++++++++++++++++++++++++++++++
3 files changed, 91 insertions(+), 10 deletions(-)
create mode 100644 tests/tcg/i386/test-i386-adcox.c
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 4d7702c106..0d7c6e80ae 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1015,6 +1015,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq)
static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
{
+ int opposite_cc_op;
TCGv carry_in = NULL;
TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
TCGv zero;
@@ -1022,14 +1023,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
/* Re-use the carry-out from a previous round. */
carry_in = carry_out;
- cc_op = s->cc_op;
- } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) {
- /* Merge with the carry-out from the opposite instruction. */
- cc_op = CC_OP_ADCOX;
- }
-
- /* If we don't have a carry-in, get it out of EFLAGS. */
- if (!carry_in) {
+ } else {
+ /* We don't have a carry-in, get it out of EFLAGS. */
if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
gen_compute_eflags(s);
}
@@ -1053,7 +1048,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
break;
}
- set_cc_op(s, cc_op);
+
+ opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX;
+ if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) {
+ /* Merge with the carry-out from the opposite instruction. */
+ set_cc_op(s, CC_OP_ADCOX);
+ } else {
+ set_cc_op(s, cc_op);
+ }
}
static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index 81831cafbc..bafd8c2180 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -14,7 +14,7 @@ config-cc.mak: Makefile
I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
ALL_X86_TESTS=$(I386_SRCS:.c=)
SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx
-X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
+X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2
run-test-i386-bmi2: QEMU_OPTS += -cpu max
run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max
+test-i386-adcox: CFLAGS=-O2
+run-test-i386-adcox: QEMU_OPTS += -cpu max
+run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max
+
#
# hello-i386 is a barebones app
#
diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c
new file mode 100644
index 0000000000..16169efff8
--- /dev/null
+++ b/tests/tcg/i386/test-i386-adcox.c
@@ -0,0 +1,75 @@
+/* See if various BMI2 instructions give expected results */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define CC_C 1
+#define CC_O (1 << 11)
+
+#ifdef __x86_64__
+#define REG uint64_t
+#else
+#define REG uint32_t
+#endif
+
+void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
+{
+ REG flags;
+ REG out_adcx, out_adox;
+
+ asm("pushf; pop %0" : "=r"(flags));
+ flags &= ~(CC_C | CC_O);
+ flags |= (in_c ? CC_C : 0);
+ flags |= (in_o ? CC_O : 0);
+
+ out_adcx = adcx_operand;
+ out_adox = adox_operand;
+ asm("push %0; popf;"
+ "adox %3, %2;"
+ "adcx %3, %1;"
+ "pushf; pop %0"
+ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
+ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
+
+ assert(out_adcx == in_c + adcx_operand - 1);
+ assert(out_adox == in_o + adox_operand - 1);
+ assert(!!(flags & CC_C) == (in_c || adcx_operand));
+ assert(!!(flags & CC_O) == (in_o || adox_operand));
+}
+
+void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
+{
+ REG flags;
+ REG out_adcx, out_adox;
+
+ asm("pushf; pop %0" : "=r"(flags));
+ flags &= ~(CC_C | CC_O);
+ flags |= (in_c ? CC_C : 0);
+ flags |= (in_o ? CC_O : 0);
+
+ out_adcx = adcx_operand;
+ out_adox = adox_operand;
+ asm("push %0; popf;"
+ "adcx %3, %1;"
+ "adox %3, %2;"
+ "pushf; pop %0"
+ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
+ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
+
+ assert(out_adcx == in_c + adcx_operand - 1);
+ assert(out_adox == in_o + adox_operand - 1);
+ assert(!!(flags & CC_C) == (in_c || adcx_operand));
+ assert(!!(flags & CC_O) == (in_o || adox_operand));
+}
+
+int main(int argc, char *argv[]) {
+ /* try all combinations of input CF, input OF, CF from op1+op2, OF from op2+op1 */
+ int i;
+ for (i = 0; i <= 15; i++) {
+ printf("%d\n", i);
+ test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
+ test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
+ }
+ return 0;
+}
+
--
2.39.1

View File

@ -1,77 +0,0 @@
From f4ddcdd2395e0944c20f6683c66068ed0ac7d757 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 7 Jan 2023 18:14:20 +0100
Subject: [PATCH 1/8] target/i386: fix operand size of unary SSE operations
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions
RH-Bugzilla: 2173590
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Bandan Das <None>
RH-Commit: [1/7] 7041f3e30e19add6bd8e5355d8bebf92390a5c2e (bonzini/rhel-qemu-kvm)
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590
Upstream-Status: merged
VRCPSS, VRSQRTSS and VCVTSx2Sx have a 32-bit or 64-bit memory operand,
which is represented in the decoding tables by X86_VEX_REPScalar. Add it
to the tables, and make validate_vex() handle the case of an instruction
that is in exception type 4 without the REP prefix and exception type 5
with it; this is the cas of VRCP and VRSQRT.
Reported-by: yongwoo <https://gitlab.com/yongwoo36>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1377
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 3d304620ec6c95f31db17acc132f42f243369299)
---
target/i386/tcg/decode-new.c.inc | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 80c579164f..d5fd8d965c 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -105,6 +105,7 @@
#define vex3 .vex_class = 3,
#define vex4 .vex_class = 4,
#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
+#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar,
#define vex5 .vex_class = 5,
#define vex6 .vex_class = 6,
#define vex7 .vex_class = 7,
@@ -839,8 +840,8 @@ static const X86OpEntry opcodes_0F[256] = {
[0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66),
[0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
- [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3),
- [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3),
+ [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3),
+ [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3),
[0x54] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /* vand */
[0x55] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /* vandn */
[0x56] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /* vor */
@@ -878,7 +879,7 @@ static const X86OpEntry opcodes_0F[256] = {
[0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
[0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
- [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex3 p_00_66_f3_f2),
+ [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
[0x5b] = X86_OP_GROUP0(0F5B),
[0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
[0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
@@ -1447,9 +1448,9 @@ static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
* Instructions which differ between 00/66 and F2/F3 in the
* exception classification and the size of the memory operand.
*/
- assert(e->vex_class == 1 || e->vex_class == 2);
+ assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4);
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
- e->vex_class = 3;
+ e->vex_class = e->vex_class < 4 ? 3 : 5;
if (s->vex_l) {
goto illegal;
}
--
2.39.1

View File

@ -1,50 +0,0 @@
From b330bf0a2ad5af73d3c62997f7f0fa5b61f1796b Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Tue, 14 Feb 2023 14:48:37 +0100
Subject: [PATCH 8/8] target/s390x/arch_dump: Fix memory corruption in
s390x_write_elf64_notes()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 152: Fix memory corruption in s390x_write_elf64_notes()
RH-Bugzilla: 2168172
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Commit: [1/1] 37a2c997b2c8b7524e0b6299891bf3ea7c9a46d0 (thuth/qemu-kvm-cs9)
Bugzilla: https://bugzilla.redhat.com/2168172
Upstream-Status: Posted (and reviewed, but not merged yet)
"note_size" can be smaller than sizeof(note), so unconditionally calling
memset(notep, 0, sizeof(note)) could cause a memory corruption here in
case notep has been allocated dynamically, thus let's use note_size as
length argument for memset() instead.
Fixes: 113d8f4e95 ("s390x: pv: Add dump support")
Message-Id: <20230214141056.680969-1-thuth@redhat.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
target/s390x/arch_dump.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index a2329141e8..a7c44ba49d 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -248,7 +248,7 @@ static int s390x_write_elf64_notes(const char *note_name,
notep = g_malloc(note_size);
}
- memset(notep, 0, sizeof(note));
+ memset(notep, 0, note_size);
/* Setup note header data */
notep->hdr.n_descsz = cpu_to_be32(content_size);
--
2.31.1

View File

@ -1,153 +0,0 @@
From 093c4a6834f3ec5a05390a3630ae4edec80885b8 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 18 Nov 2022 18:40:57 +0100
Subject: [PATCH 15/31] test-bdrv-drain: Don't yield in
.bdrv_co_drained_begin/end()
RH-Author: Stefano Garzarella <sgarzare@redhat.com>
RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
RH-Bugzilla: 2155112
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Commit: [3/16] 5282d3e13cb85dfb480edb11b7eb2769248465df (sgarzarella/qemu-kvm-c-9-s)
We want to change .bdrv_co_drained_begin/end() back to be non-coroutine
callbacks, so in preparation, avoid yielding in their implementation.
This does almost the same as the existing logic in bdrv_drain_invoke(),
by creating and entering coroutines internally. However, since the test
case is by far the heaviest user of coroutine code in drain callbacks,
it is preferable to have the complexity in the test case rather than the
drain core, which is already complicated enough without this.
The behaviour for bdrv_drain_begin() is unchanged because we increase
bs->in_flight and this is still polled. However, bdrv_drain_end()
doesn't wait for the spawned coroutine to complete any more. This is
fine, we don't rely on bdrv_drain_end() restarting all operations
immediately before the next aio_poll().
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20221118174110.55183-3-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 7bce1c299834557bffd92294608ea528648cfe75)
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
tests/unit/test-bdrv-drain.c | 64 ++++++++++++++++++++++++++----------
1 file changed, 46 insertions(+), 18 deletions(-)
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 09dc4a4891..24f34e24ad 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -38,12 +38,22 @@ typedef struct BDRVTestState {
bool sleep_in_drain_begin;
} BDRVTestState;
+static void coroutine_fn sleep_in_drain_begin(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
+ bdrv_dec_in_flight(bs);
+}
+
static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
{
BDRVTestState *s = bs->opaque;
s->drain_count++;
if (s->sleep_in_drain_begin) {
- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
+ Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs);
+ bdrv_inc_in_flight(bs);
+ aio_co_enter(bdrv_get_aio_context(bs), co);
}
}
@@ -1916,6 +1926,21 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
return 0;
}
+static void coroutine_fn bdrv_replace_test_drain_co(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ BDRVReplaceTestState *s = bs->opaque;
+
+ /* Keep waking io_co up until it is done */
+ while (s->io_co) {
+ aio_co_wake(s->io_co);
+ s->io_co = NULL;
+ qemu_coroutine_yield();
+ }
+ s->drain_co = NULL;
+ bdrv_dec_in_flight(bs);
+}
+
/**
* If .drain_count is 0, wake up .io_co if there is one; and set
* .was_drained.
@@ -1926,20 +1951,27 @@ static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
BDRVReplaceTestState *s = bs->opaque;
if (!s->drain_count) {
- /* Keep waking io_co up until it is done */
- s->drain_co = qemu_coroutine_self();
- while (s->io_co) {
- aio_co_wake(s->io_co);
- s->io_co = NULL;
- qemu_coroutine_yield();
- }
- s->drain_co = NULL;
-
+ s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
+ bdrv_inc_in_flight(bs);
+ aio_co_enter(bdrv_get_aio_context(bs), s->drain_co);
s->was_drained = true;
}
s->drain_count++;
}
+static void coroutine_fn bdrv_replace_test_read_entry(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ char data;
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
+ int ret;
+
+ /* Queue a read request post-drain */
+ ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
+ g_assert(ret >= 0);
+ bdrv_dec_in_flight(bs);
+}
+
/**
* Reduce .drain_count, set .was_undrained once it reaches 0.
* If .drain_count reaches 0 and the node has a backing file, issue a
@@ -1951,17 +1983,13 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
g_assert(s->drain_count > 0);
if (!--s->drain_count) {
- int ret;
-
s->was_undrained = true;
if (bs->backing) {
- char data;
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
-
- /* Queue a read request post-drain */
- ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
- g_assert(ret >= 0);
+ Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry,
+ bs);
+ bdrv_inc_in_flight(bs);
+ aio_co_enter(bdrv_get_aio_context(bs), co);
}
}
}
--
2.31.1

Some files were not shown because too many files have changed in this diff Show More