diff --git a/.gitignore b/.gitignore index ded41b6..49421e7 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/qemu-8.2.0.tar.xz +SOURCES/qemu-9.0.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata index 4a22f24..437fb86 100644 --- a/.qemu-kvm.metadata +++ b/.qemu-kvm.metadata @@ -1 +1 @@ -1615e59b1bd68324e0819245fe003e33c14a52f9 SOURCES/qemu-8.2.0.tar.xz +6699bb03d6da21159b89668bca01c6c958b95d07 SOURCES/qemu-9.0.0.tar.xz diff --git a/SOURCES/0004-Initial-redhat-build.patch b/SOURCES/0004-Initial-redhat-build.patch index a63b5c3..49991a2 100644 --- a/SOURCES/0004-Initial-redhat-build.patch +++ b/SOURCES/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From faae70a870156f86a5cf55ca967b15d7612941ff Mon Sep 17 00:00:00 2001 +From ea7dff3dbf979d7d8a85a16cf5187235143e1048 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-8.1.0-5.el9 +This rebase is based on qemu-kvm-8.2.0-11.el9 Signed-off-by: Miroslav Rezanina -- @@ -83,6 +83,12 @@ Rebase changes (8.2.0): - Added --disable-plugins configure option - Fixing frh.py strings +Rebase notes (9.0.0): +- Fixed qemu-kvm binary location change +- Remove hppa-firmware64.img +- Package stp files for utilities +- Download subprojects on local build + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -193,14 +199,17 @@ Merged patches (8.1.0): Merged patches (8.2.0): - cd9efa221d Enable qemu-kvm-device-usb-redirec for aarch64 +Merged patches (9.0.0 rc0): +- 25de053dbf spec: Enable zstd + Signed-off-by: Miroslav Rezanina --- - .distro/Makefile | 100 + + .distro/Makefile | 101 + .distro/Makefile.common | 42 + .distro/README.tests | 39 + .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4909 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 5170 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + .distro/scripts/frh.py | 4 +- @@ -211,7 +220,7 @@ Signed-off-by: Miroslav Rezanina scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + ui/vnc-auth-sasl.c | 2 +- - 16 files changed, 5168 insertions(+), 6 deletions(-) + 16 files changed, 5430 insertions(+), 6 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests diff --git a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch index 97c53b4..61e84a1 100644 --- a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch +++ b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 048067b4618ba1fa7c8c517185d4cd3a675eba72 Mon Sep 17 00:00:00 2001 +From 780c39975b059deaee106775b6e3a240155acea3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL @@ -47,6 +47,12 @@ Rebase notes (8.2.0): - Disable new neoverse-v2 - Removed CONFIG_OPENGL from x86_64 config file +Rebase notes (9.0.0 rc0): +- Split CONFIG_IDE_QDEV to CONFIG_IDE_DEV and CONFIG_IDE_BUS (upstream change) + +Rebase notes (9.0.0 rc1): +- Do not compile armv7 cpu types + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -74,36 +80,41 @@ Merged patches (8.1.0): Merged patches (8.2.0): - b29f66431f Enable igb on x86_64 + +Merged patches (9.0.0 rc0): +- 3889ede5d9 Compile IOMMUFD on x86_64 +- 0beb18451f Compile IOMMUFD on s390x +- 2b4b13f70d Compile IOMMUFD object on aarch64 --- .distro/qemu-kvm.spec.template | 18 +-- - .../aarch64-softmmu/aarch64-rh-devices.mak | 41 +++++++ + .../aarch64-softmmu/aarch64-rh-devices.mak | 42 +++++++ .../ppc64-softmmu/ppc64-rh-devices.mak | 37 ++++++ configs/devices/rh-virtio.mak | 10 ++ - .../s390x-softmmu/s390x-rh-devices.mak | 18 +++ - .../x86_64-softmmu/x86_64-rh-devices.mak | 110 ++++++++++++++++++ + .../s390x-softmmu/s390x-rh-devices.mak | 19 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 112 ++++++++++++++++++ hw/arm/virt.c | 2 + hw/block/fdc.c | 10 ++ hw/cpu/meson.build | 3 +- hw/cxl/meson.build | 3 +- hw/display/cirrus_vga.c | 4 + hw/ide/piix.c | 5 +- - hw/ide/qdev.c | 9 ++ hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/meson.build | 2 +- - hw/virtio/meson.build | 5 +- + hw/virtio/meson.build | 6 +- target/arm/arm-qmp-cmds.c | 2 + target/arm/cpu.c | 4 + target/arm/cpu.h | 3 + target/arm/cpu64.c | 12 +- target/arm/tcg/cpu32.c | 2 + target/arm/tcg/cpu64.c | 8 ++ + target/arm/tcg/meson.build | 4 +- target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ tests/qtest/arm-cpu-features.c | 4 + - 28 files changed, 323 insertions(+), 15 deletions(-) + 28 files changed, 321 insertions(+), 17 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -112,10 +123,10 @@ Merged patches (8.2.0): diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..aec1831199 +index 0000000000..b0191d3c69 --- /dev/null +++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -0,0 +1,41 @@ +@@ -0,0 +1,42 @@ +include ../rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y @@ -157,6 +168,7 @@ index 0000000000..aec1831199 +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak new file mode 100644 index 0000000000..dbb7d30829 @@ -218,10 +230,10 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_SERIAL=y diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak new file mode 100644 -index 0000000000..69a799adbd +index 0000000000..24cf6dbd03 --- /dev/null +++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak -@@ -0,0 +1,18 @@ +@@ -0,0 +1,19 @@ +include ../rh-virtio.mak + +CONFIG_PCI=y @@ -240,12 +252,13 @@ index 0000000000..69a799adbd +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..ce5be73633 +index 0000000000..d60ff1bcfc --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -0,0 +1,110 @@ +@@ -0,0 +1,112 @@ +include ../rh-virtio.mak + +CONFIG_ACPI=y @@ -283,7 +296,8 @@ index 0000000000..ce5be73633 +CONFIG_IDE_CORE=y +CONFIG_IDE_PCI=y +CONFIG_IDE_PIIX=y -+CONFIG_IDE_QDEV=y ++CONFIG_IDE_DEV=y ++CONFIG_IDE_BUS=y +CONFIG_IGB_PCI_EXPRESS=y +CONFIG_IOAPIC=y +CONFIG_IOH3420=y @@ -356,28 +370,29 @@ index 0000000000..ce5be73633 +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index be2856c018..af9ea4dd1c 100644 +index a9a913aead..6c6d155002 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -205,6 +205,7 @@ static const int a15irqmap[] = { - }; - - static const char *valid_cpus[] = { +@@ -2954,6 +2954,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + static const char * const valid_cpu_types[] = { +#if 0 /* Disabled for Red Hat Enterprise Linux */ #ifdef CONFIG_TCG - ARM_CPU_TYPE_NAME("cortex-a7"), - ARM_CPU_TYPE_NAME("cortex-a15"), -@@ -219,6 +220,7 @@ static const char *valid_cpus[] = { - ARM_CPU_TYPE_NAME("neoverse-n2"), - #endif - ARM_CPU_TYPE_NAME("cortex-a53"), + ARM_CPU_TYPE_NAME("cortex-a7"), + ARM_CPU_TYPE_NAME("cortex-a15"), +@@ -2971,6 +2972,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + #endif /* CONFIG_TCG */ + #ifdef TARGET_AARCH64 + ARM_CPU_TYPE_NAME("cortex-a53"), +#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("cortex-a57"), - ARM_CPU_TYPE_NAME("host"), - ARM_CPU_TYPE_NAME("max"), + ARM_CPU_TYPE_NAME("cortex-a57"), + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + ARM_CPU_TYPE_NAME("host"), diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index d7cc4d3ec1..12d0a60905 100644 +index 6dd94e98bc..a05757fc9a 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -49,6 +49,8 @@ @@ -405,7 +420,7 @@ index d7cc4d3ec1..12d0a60905 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index 6d319947ca..91962fd863 100644 +index 38cdcfbe57..e588ecfd42 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build @@ -1,4 +1,5 @@ @@ -416,7 +431,7 @@ index 6d319947ca..91962fd863 100644 system_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) system_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build -index ea0aebf6e3..6878f06974 100644 +index 3e375f61a9..613adb3ebb 100644 --- a/hw/cxl/meson.build +++ b/hw/cxl/meson.build @@ -6,7 +6,8 @@ system_ss.add(when: 'CONFIG_CXL', @@ -430,7 +445,7 @@ index ea0aebf6e3..6878f06974 100644 if_false: files( 'cxl-host-stubs.c', diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index b80f98b6c4..0370cf8a64 100644 +index 150883a971..497365bd80 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -36,6 +36,7 @@ @@ -452,10 +467,10 @@ index b80f98b6c4..0370cf8a64 100644 * Follow real hardware, cirrus card emulated has 4 MB video memory. * Also accept 8 MB/16 MB for backward compatibility. diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 4e5e12935f..03ca06bb17 100644 +index 80efc633d3..9cb82b8eea 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -190,7 +190,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -191,7 +191,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -465,7 +480,7 @@ index 4e5e12935f..03ca06bb17 100644 } static const TypeInfo piix3_ide_info = { -@@ -214,6 +215,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -215,6 +216,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -474,57 +489,11 @@ index 4e5e12935f..03ca06bb17 100644 } static const TypeInfo piix4_ide_info = { -diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c -index 1b3b4da01d..454bfa5783 100644 ---- a/hw/ide/qdev.c -+++ b/hw/ide/qdev.c -@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) - ide_dev_initfn(dev, IDE_CD, errp); - } - -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - static void ide_cf_realize(IDEDevice *dev, Error **errp) - { - ide_dev_initfn(dev, IDE_CFATA, errp); - } -+#endif - - #define DEFINE_IDE_DEV_PROPERTIES() \ - DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ -@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = { - .class_init = ide_cd_class_init, - }; - -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - static Property ide_cf_properties[] = { - DEFINE_IDE_DEV_PROPERTIES(), - DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf), -@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = { - .instance_size = sizeof(IDEDrive), - .class_init = ide_cf_class_init, - }; -+#endif - - static void ide_device_class_init(ObjectClass *klass, void *data) - { -@@ -396,7 +402,10 @@ static void ide_register_types(void) - type_register_static(&ide_bus_info); - type_register_static(&ide_hd_info); - type_register_static(&ide_cd_info); -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - type_register_static(&ide_cf_info); -+#endif - type_register_static(&ide_device_type_info); - } - diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index b92b63bedc..3b6235dde6 100644 +index 74f10b640f..2e85ecf476 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -957,6 +957,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -952,6 +952,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_kbd_isa; adevc->build_dev_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -534,7 +503,7 @@ index b92b63bedc..3b6235dde6 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 8ffe1077f1..b3dfeeca4f 100644 +index 43f3a4a701..267f182883 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1746,6 +1746,7 @@ static const E1000Info e1000_devices[] = { @@ -554,10 +523,10 @@ index 8ffe1077f1..b3dfeeca4f 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 91fae56573..33e0c8724c 100644 +index e7c9edd033..3b0a47a28c 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -386,10 +386,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -389,10 +389,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -565,16 +534,16 @@ index 91fae56573..33e0c8724c 100644 DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), - DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5p_v2.1"), +#endif DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), - DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7p_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index e94149ebde..4a8adbf3dc 100644 +index aac3bb35f2..5411ff35df 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build -@@ -52,7 +52,7 @@ system_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reader +@@ -55,7 +55,7 @@ system_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reader if cacard.found() usbsmartcard_ss = ss.source_set() usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', @@ -584,26 +553,34 @@ index e94149ebde..4a8adbf3dc 100644 endif diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build -index c0055a7832..12e1d6c67e 100644 +index d7f18c96e6..aaabbb8b0b 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build -@@ -17,8 +17,9 @@ if have_vhost - if have_vhost_user - # fixme - this really should be generic - specific_virtio_ss.add(files('vhost-user.c')) +@@ -20,7 +20,8 @@ if have_vhost + system_virtio_ss.add(files('vhost-user-base.c')) + + # MMIO Stubs - system_virtio_ss.add(files('vhost-user-device.c')) -- system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) +# Disabled for 8.2.0 rebase for RHEL 9.4.0 +# system_virtio_ss.add(files('vhost-user-device.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) +@@ -28,7 +29,8 @@ if have_vhost + system_virtio_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input.c')) + + # PCI Stubs +- system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) ++# Disabled for 8.2.0 rebase for RHEL 9.4.0 +# system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) - endif - if have_vhost_vdpa - system_virtio_ss.add(files('vhost-vdpa.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], + if_true: files('vhost-user-gpio-pci.c')) + system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c -index b53d5efe13..64989a02d1 100644 +index 3cc8cc738b..6f21fea1f5 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c -@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, +@@ -223,6 +223,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, static void arm_cpu_add_definition(gpointer data, gpointer user_data) { ObjectClass *oc = data; @@ -611,23 +588,23 @@ index b53d5efe13..64989a02d1 100644 CpuDefinitionInfoList **cpu_list = user_data; CpuDefinitionInfo *info; const char *typename; -@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); +@@ -231,6 +232,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info = g_malloc0(sizeof(*info)); + info->name = cpu_model_from_type(typename); info->q_typename = g_strdup(typename); + info->deprecated = !!cc->deprecation_note; QAPI_LIST_PREPEND(*cpu_list, info); } diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index efb22a87f9..a32521ada9 100644 +index ab8d007a86..e5dce20f19 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2524,6 +2524,10 @@ static void cpu_register_class_init(ObjectClass *oc, void *data) +@@ -2546,6 +2546,10 @@ static void cpu_register_class_init(ObjectClass *oc, void *data) acc->info = data; cc->gdb_core_xml_file = "arm-core.xml"; -+ ++ + if (acc->info->deprecation_note) { + cc->deprecation_note = acc->info->deprecation_note; + } @@ -635,10 +612,10 @@ index efb22a87f9..a32521ada9 100644 void arm_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index a0282e0d28..7e0f0dfea7 100644 +index bc0c84873f..e9472c8bb8 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h -@@ -34,6 +34,8 @@ +@@ -37,6 +37,8 @@ #define KVM_HAVE_MCE_INJECTION 1 #endif @@ -647,7 +624,7 @@ index a0282e0d28..7e0f0dfea7 100644 #define EXCP_UDEF 1 /* undefined instruction */ #define EXCP_SWI 2 /* software interrupt */ #define EXCP_PREFETCH_ABORT 3 -@@ -1120,6 +1122,7 @@ typedef struct ARMCPUInfo { +@@ -1092,6 +1094,7 @@ typedef struct ARMCPUInfo { const char *name; void (*initfn)(Object *obj); void (*class_init)(ObjectClass *oc, void *data); @@ -656,7 +633,7 @@ index a0282e0d28..7e0f0dfea7 100644 /** diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 1e9c6c85ae..10be900803 100644 +index 985b1efe16..46a4e80171 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -648,6 +648,7 @@ static void aarch64_a57_initfn(Object *obj) @@ -688,7 +665,7 @@ index 1e9c6c85ae..10be900803 100644 { .name = "max", .initfn = aarch64_max_initfn }, #if defined(CONFIG_KVM) || defined(CONFIG_HVF) { .name = "host", .initfn = aarch64_host_initfn }, -@@ -815,8 +820,13 @@ static void aarch64_cpu_instance_init(Object *obj) +@@ -814,8 +819,13 @@ static void aarch64_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); @@ -703,24 +680,24 @@ index 1e9c6c85ae..10be900803 100644 void aarch64_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c -index d9e0e2a4dd..c5c639a6ea 100644 +index de8f2be941..8896295ae3 100644 --- a/target/arm/tcg/cpu32.c +++ b/target/arm/tcg/cpu32.c -@@ -98,6 +98,7 @@ void aa32_max_features(ARMCPU *cpu) +@@ -92,6 +92,7 @@ void aa32_max_features(ARMCPU *cpu) + cpu->isar.id_dfr1 = t; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if !defined(CONFIG_USER_ONLY) - static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) - { -@@ -1189,3 +1190,4 @@ static void arm_tcg_cpu_register_types(void) +@@ -1037,3 +1038,4 @@ static void arm_tcg_cpu_register_types(void) type_init(arm_tcg_cpu_register_types) #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ +#endif /* disabled for RHEL */ diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c -index fcda99e158..bd5a993ff8 100644 +index 9f7a9f3d2c..7ec6851c9c 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -29,6 +29,7 @@ @@ -755,7 +732,7 @@ index fcda99e158..bd5a993ff8 100644 /* * -cpu max: a CPU with as many features enabled as our emulation supports. -@@ -1259,6 +1263,7 @@ void aarch64_max_tcg_initfn(Object *obj) +@@ -1271,6 +1275,7 @@ void aarch64_max_tcg_initfn(Object *obj) qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); } @@ -763,7 +740,7 @@ index fcda99e158..bd5a993ff8 100644 static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, -@@ -1270,14 +1275,17 @@ static const ARMCPUInfo aarch64_cpus[] = { +@@ -1282,14 +1287,17 @@ static const ARMCPUInfo aarch64_cpus[] = { { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn }, { .name = "neoverse-n2", .initfn = aarch64_neoverse_n2_initfn }, }; @@ -781,8 +758,20 @@ index fcda99e158..bd5a993ff8 100644 } type_init(aarch64_cpu_register_types) +diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build +index 3b1a9f0fc5..6898b4de6f 100644 +--- a/target/arm/tcg/meson.build ++++ b/target/arm/tcg/meson.build +@@ -56,5 +56,5 @@ arm_system_ss.add(files( + 'psci.c', + )) + +-arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) +-arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) ++#arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) ++#arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 7dbb47de64..69fddb05bc 100644 +index f2301b43f7..f77ebfcc81 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -806,13 +795,13 @@ index 7dbb47de64..69fddb05bc 100644 POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, @@ -718,6 +721,7 @@ "PowerPC 970MP v1.1") - POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + POWERPC_DEF("power5p_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, "POWER5+ v2.1") +#endif POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") - POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -898,12 +902,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + POWERPC_DEF("power7p_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -894,13 +898,16 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -822,13 +811,14 @@ index 7dbb47de64..69fddb05bc 100644 { "970", "970_v2.2" }, { "970fx", "970fx_v3.1" }, { "970mp", "970mp_v1.1" }, - { "power5+", "power5+_v2.1" }, + { "power5+", "power5p_v2.1" }, + { "power5+_v2.1", "power5p_v2.1" }, { "power5gs", "power5+_v2.1" }, +#endif { "power7", "power7_v2.3" }, - { "power7+", "power7+_v2.1" }, - { "power8e", "power8e_v2.1" }, -@@ -913,12 +920,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power7+", "power7p_v2.1" }, + { "power7+_v2.1", "power7p_v2.1" }, +@@ -911,12 +918,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power10", "power10_v2.0" }, #endif @@ -844,10 +834,10 @@ index 7dbb47de64..69fddb05bc 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index 63981bf36b..87a4480c05 100644 +index 2d99218069..0728bfcc20 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c -@@ -35,6 +35,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +@@ -34,6 +34,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -858,10 +848,10 @@ index 63981bf36b..87a4480c05 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 33ab3551f4..912e493951 100644 +index 4ce809c5d4..55fb4855b1 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c -@@ -2567,6 +2567,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2565,6 +2565,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -877,10 +867,10 @@ index 33ab3551f4..912e493951 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c -index a8a4c668ad..2458cc527c 100644 +index 9d6e6190d5..f822526acb 100644 --- a/tests/qtest/arm-cpu-features.c +++ b/tests/qtest/arm-cpu-features.c -@@ -451,8 +451,10 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -452,8 +452,10 @@ static void test_query_cpu_model_expansion(const void *data) assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); /* Test expected feature presence/absence for some cpu types */ @@ -891,7 +881,7 @@ index a8a4c668ad..2458cc527c 100644 /* Enabling and disabling pmu should always work. */ assert_has_feature_enabled(qts, "max", "pmu"); -@@ -469,6 +471,7 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -470,6 +472,7 @@ static void test_query_cpu_model_expansion(const void *data) assert_has_feature_enabled(qts, "cortex-a57", "pmu"); assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); @@ -899,7 +889,7 @@ index a8a4c668ad..2458cc527c 100644 assert_has_feature_enabled(qts, "a64fx", "pmu"); assert_has_feature_enabled(qts, "a64fx", "aarch64"); /* -@@ -481,6 +484,7 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -482,6 +485,7 @@ static void test_query_cpu_model_expansion(const void *data) "{ 'sve384': true }"); assert_error(qts, "a64fx", "cannot enable sve640", "{ 'sve640': true }"); diff --git a/SOURCES/0006-Machine-type-related-general-changes.patch b/SOURCES/0006-Machine-type-related-general-changes.patch index 4a4c6fb..e0c3795 100644 --- a/SOURCES/0006-Machine-type-related-general-changes.patch +++ b/SOURCES/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From d9ff466c980d219ebf230ea24becce294c196f1f Mon Sep 17 00:00:00 2001 +From 8e6a30073f9c1a5d6294b2d16556522453e227e7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -26,6 +26,12 @@ Rebase notes (7.1.0): Rebase notes (8.1.0): - Do not modify unused vga-isa.c +Rebase notes (9.0.0 rc0): +- Updated smsbios handling + +Rebase notes (9.0.0 rc4): +- Moving downstream compat changes + Merged patches (6.1.0): - f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 - 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 @@ -61,24 +67,27 @@ Merged patches (8.1.0): Merged patches (8.2.0): - 4ee284aca9 Add machine types compat bits. (partial) + +Merged patches (9.0.0 rc0): +- 4b8fe42abc virtio-mem: default-enable "dynamic-memslots" --- hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 267 +++++++++++++++++++++++++++++++++++ - hw/i386/pc_piix.c | 2 + - hw/i386/pc_q35.c | 2 + + hw/core/machine.c | 269 +++++++++++++++++++++++++++++++++++ + hw/i386/fw_cfg.c | 3 +- hw/net/rtl8139.c | 4 +- hw/smbios/smbios.c | 46 +++++- hw/timer/i8254_common.c | 2 +- hw/usb/hcd-xhci-pci.c | 59 ++++++-- hw/usb/hcd-xhci-pci.h | 1 + + hw/virtio/virtio-mem.c | 3 +- include/hw/boards.h | 40 ++++++ - include/hw/firmware/smbios.h | 5 +- + include/hw/firmware/smbios.h | 4 +- include/hw/i386/pc.h | 3 + - 13 files changed, 413 insertions(+), 22 deletions(-) + 13 files changed, 414 insertions(+), 24 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index dd523d2e4c..5050c0ba97 100644 +index debe1adb84..e8ddcd716e 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -245,7 +245,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) @@ -88,28 +97,28 @@ index dd523d2e4c..5050c0ba97 100644 - .minimum_version_id = 3, + .minimum_version_id = 2, .post_load = vmstate_acpi_post_load, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index af9ea4dd1c..62f0f7d4d6 100644 +index 6c6d155002..36e9b4b4e9 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1638,7 +1638,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1651,7 +1651,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, - vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, -- true, SMBIOS_ENTRY_POINT_TYPE_64); -+ true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); + vmc->smbios_old_sys_ver ? "1.0" : mc->name, +- true); ++ true, NULL, NULL); /* build the array of physical mem area from base_memmap */ mem_array.address = vms->memmap[VIRT_MEM].base; diff --git a/hw/core/machine.c b/hw/core/machine.c -index 0c17398141..446601ee30 100644 +index 37ede0e7d4..695cb89a46 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -57,6 +57,273 @@ GlobalProperty hw_compat_7_2[] = { +@@ -296,6 +296,275 @@ GlobalProperty hw_compat_2_1[] = { }; - const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1); +/* + * RHEL only: machine types for previous major releases are deprecated @@ -132,6 +141,8 @@ index 0c17398141..446601ee30 100644 + { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, + /* hw_compat_rhel_9_4 from hw_compat_8_1 */ + { "igb", "x-pcie-flr-init", "off" }, ++ /* hw_compat_rhel_9_4 jira RHEL-24045 */ ++ { "virtio-mem", "dynamic-memslots", "off" }, +}; +const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); + @@ -378,37 +389,25 @@ index 0c17398141..446601ee30 100644 +}; +const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + - GlobalProperty hw_compat_7_1[] = { - { "virtio-device", "queue_reset", "false" }, - { "virtio-rng-pci", "vectors", "0" }, -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index eace854335..2a9f465619 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -238,6 +238,8 @@ static void pc_init1(MachineState *machine, - smbios_set_defaults("QEMU", mc->desc, - mc->name, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - pcms->smbios_entry_point_type); - } - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 4f3e5412f6..912cb0c0dc 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -206,6 +206,8 @@ static void pc_q35_init(MachineState *machine) - smbios_set_defaults("QEMU", mc->desc, - mc->name, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - pcms->smbios_entry_point_type); + MachineState *current_machine; + + static char *machine_get_kernel(Object *obj, Error **errp) +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index d802d2787f..c7aa39a13e 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -64,7 +64,8 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ + smbios_set_defaults("QEMU", mc->desc, mc->name, +- pcmc->smbios_uuid_encoded); ++ pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, pcmc->smbios_stream_version); } + /* tell smbios about cpuid version and features */ diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 4af8c66266..7dc12907ab 100644 +index 897c86ec41..2d0db43f49 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3169,7 +3169,7 @@ static int rtl8139_pre_save(void *opaque) @@ -431,20 +430,21 @@ index 4af8c66266..7dc12907ab 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 2a90601ac5..7bde23e59d 100644 +index eed5787b15..68608a3403 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -58,6 +58,9 @@ static bool smbios_legacy = true; - static bool smbios_uuid_encoded = true; - /* end: legacy structures & constants for <= 2.0 machines */ +@@ -39,6 +39,10 @@ size_t usr_blobs_len; + static unsigned usr_table_max; + static unsigned usr_table_cnt; +/* Set to true for modern Windows 10 HardwareID-6 compat */ +static bool smbios_type2_required; + - ++ uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -670,7 +673,7 @@ static void smbios_build_type_1_table(void) + unsigned smbios_table_max; +@@ -629,7 +633,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -453,21 +453,17 @@ index 2a90601ac5..7bde23e59d 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -985,7 +988,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -1018,16 +1022,52 @@ void smbios_set_default_processor_family(uint16_t processor_family) void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type) + const char *version, +- bool uuid_encoded) + bool uuid_encoded, + const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type) ++ const char *stream_version) { smbios_have_defaults = true; - smbios_legacy = legacy_mode; -@@ -1006,11 +1012,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - g_free(smbios_entries); - } + smbios_uuid_encoded = uuid_encoded; + /* + * If @stream_product & @stream_version are non-NULL, then @@ -494,12 +490,12 @@ index 2a90601ac5..7bde23e59d 100644 + * + * We get 'System Manufacturer' and 'Baseboard Manufacturer' + */ - SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type1.product, product); - SMBIOS_SET_DEFAULT(type1.version, version); -+ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); + SMBIOS_SET_DEFAULT(smbios_type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(smbios_type1.product, product); + SMBIOS_SET_DEFAULT(smbios_type1.version, version); ++ SMBIOS_SET_DEFAULT(smbios_type1.family, "Red Hat Enterprise Linux"); + if (stream_version != NULL) { -+ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ SMBIOS_SET_DEFAULT(smbios_type1.sku, stream_version); + } SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type2.product, product); @@ -513,20 +509,20 @@ index 2a90601ac5..7bde23e59d 100644 SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index b25da448c8..0331e84398 100644 +index 28fdabc321..bad13ec224 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -229,7 +229,7 @@ static const VMStateDescription vmstate_pit_common = { .pre_save = pit_dispatch_pre_save, .post_load = pit_dispatch_post_load, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { - VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), + VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c -index 643d4643e4..529bad9366 100644 +index 4423983308..43b4b71fdf 100644 --- a/hw/usb/hcd-xhci-pci.c +++ b/hw/usb/hcd-xhci-pci.c @@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) @@ -636,11 +632,26 @@ index 08f70ce97c..1be7527c1b 100644 } XHCIPciState; #endif +diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c +index ffd119ebac..0e2be2219c 100644 +--- a/hw/virtio/virtio-mem.c ++++ b/hw/virtio/virtio-mem.c +@@ -1694,8 +1694,9 @@ static Property virtio_mem_properties[] = { + #endif + DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, + early_migration, true), ++ /* RHEL: default-enable "dynamic-memslots" (jira RHEL-24045) */ + DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM, +- dynamic_memslots, false), ++ dynamic_memslots, true), + DEFINE_PROP_END_OF_LIST(), + }; + diff --git a/include/hw/boards.h b/include/hw/boards.h -index da85f86efb..4a21eddbf9 100644 +index 8b8f6d5c00..0466f9d0f3 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -503,4 +503,44 @@ extern const size_t hw_compat_2_2_len; +@@ -512,4 +512,44 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -686,26 +697,25 @@ index da85f86efb..4a21eddbf9 100644 +extern const char *rhel_old_machine_deprecation; #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 7f3259a630..d24b3ccd32 100644 +index 8d3fb2fb3b..d9d6d7a169 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h -@@ -294,7 +294,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); +@@ -332,7 +332,9 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); void smbios_set_cpuid(uint32_t version, uint32_t features); void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type); + const char *version, +- bool uuid_encoded); + bool uuid_encoded, + const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type); - uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); ++ const char *stream_version); + void smbios_set_default_processor_family(uint16_t processor_family); + uint8_t *smbios_get_table_legacy(size_t *length, Error **errp); void smbios_get_tables(MachineState *ms, - const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index a10ceeabbf..037942d233 100644 +index 27a68071d7..ebd8f973f2 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -113,6 +113,9 @@ struct PCMachineClass { +@@ -112,6 +112,9 @@ struct PCMachineClass { bool smbios_legacy_mode; bool smbios_uuid_encoded; SmbiosEntryPointType default_smbios_ep_type; diff --git a/SOURCES/0007-Add-aarch64-machine-types.patch b/SOURCES/0007-Add-aarch64-machine-types.patch index fde7982..a556bb2 100644 --- a/SOURCES/0007-Add-aarch64-machine-types.patch +++ b/SOURCES/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 23f614ab0b79ec1c6f65a7f0d6993bfdfc53fd23 Mon Sep 17 00:00:00 2001 +From cf398296f3fcee185a00f23de5deae57c97d648e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -30,6 +30,9 @@ Rebase notes (8.0.0): Rebase notes (8.1.0): - Added setting default_nic +Rebase notes (9.0.0 rc0): +- call arm_virt_compat_set on rhel type class_init + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -64,34 +67,67 @@ Merged patches (8.1.0): Merged patches (8.2.0): - 4ee284aca9 Add machine types compat bits. (partial) + +Merged patches (9.0.0 rc0): +- 117068376a hw/arm/virt: Fix compats +- 8bcccfabc4 hw/arm/virt: Add properties to disable high memory regions +- 0005a8b93a hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types --- - hw/arm/virt.c | 250 +++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 299 +++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 8 ++ - 2 files changed, 257 insertions(+), 1 deletion(-) + 2 files changed, 306 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 62f0f7d4d6..c541efee5e 100644 +index 36e9b4b4e9..22bc345137 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -82,6 +82,7 @@ - #include "hw/char/pl011.h" - #include "qemu/guest-random.h" +@@ -101,6 +101,7 @@ static void arm_virt_compat_set(MachineClass *mc) + arm_virt_compat_len); + } +#if 0 /* Disabled for Red Hat Enterprise Linux */ #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -108,7 +109,48 @@ +@@ -128,7 +129,63 @@ static void arm_virt_compat_set(MachineClass *mc) DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +#endif /* disabled for RHEL */ ++ ++/* ++ * This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. They may be overriden by older machine compats. ++ * ++ * virtio-net-pci variant romfiles are not needed because edk2 does ++ * fully support the pxe boot. Besides virtio romfiles are not shipped ++ * on rhel/aarch64. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ {"virtio-net-pci", "romfile", "" }, ++ {"virtio-net-pci-transitional", "romfile", "" }, ++ {"virtio-net-pci-non-transitional", "romfile", "" }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); ++/* ++ * This cannot be called from the rhel_virt_class_init() because ++ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new() ++ * only is called on virt-rhelm.n.s non abstract class init. ++ */ ++static void arm_rhel_compat_set(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, ++ arm_rhel_compat_len); ++} ++ +#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ + static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ + void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ ++ arm_rhel_compat_set(mc); \ + rhel##m##n##s##_virt_options(mc); \ + mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ + if (latest) { \ @@ -114,23 +150,10 @@ index 62f0f7d4d6..c541efee5e 100644 + DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) +#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ + DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) -+ -+/* This variable is for changes to properties that are RHEL specific, -+ * different to the current upstream and to be applied to the latest -+ * machine type. -+ */ -+GlobalProperty arm_rhel_compat[] = { -+ { -+ .driver = "virtio-net-pci", -+ .property = "romfile", -+ .value = "", -+ }, -+}; -+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -2341,6 +2383,7 @@ static void machvirt_init(MachineState *machine) +@@ -2355,6 +2412,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -138,7 +161,7 @@ index 62f0f7d4d6..c541efee5e 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2368,6 +2411,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2382,6 +2440,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -146,7 +169,7 @@ index 62f0f7d4d6..c541efee5e 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2383,6 +2427,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) +@@ -2397,6 +2456,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) vms->highmem = value; } @@ -154,16 +177,23 @@ index 62f0f7d4d6..c541efee5e 100644 static bool virt_get_compact_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2438,7 +2483,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) +@@ -2410,6 +2470,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) - vms->highmem_mmio = value; + vms->highmem_compact = value; } -- +#endif /* disabled for RHEL */ + static bool virt_get_highmem_redists(Object *obj, Error **errp) + { +@@ -2453,7 +2514,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) + vms->highmem_mmio = value; + } + +- static bool virt_get_its(Object *obj, Error **errp) { -@@ -2454,6 +2499,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2468,6 +2528,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } @@ -171,7 +201,7 @@ index 62f0f7d4d6..c541efee5e 100644 static bool virt_get_dtb_randomness(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2467,6 +2513,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +@@ -2481,6 +2542,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) vms->dtb_randomness = value; } @@ -179,7 +209,7 @@ index 62f0f7d4d6..c541efee5e 100644 static char *virt_get_oem_id(Object *obj, Error **errp) { -@@ -2550,6 +2597,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2564,6 +2626,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -187,7 +217,7 @@ index 62f0f7d4d6..c541efee5e 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2563,6 +2611,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2577,6 +2640,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -195,7 +225,7 @@ index 62f0f7d4d6..c541efee5e 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2935,6 +2984,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2949,6 +3013,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -203,7 +233,7 @@ index 62f0f7d4d6..c541efee5e 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3405,3 +3455,201 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3463,3 +3528,235 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -213,6 +243,7 @@ index 62f0f7d4d6..c541efee5e 100644 +{ + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ arm_virt_compat_set(mc); + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; @@ -258,6 +289,28 @@ index 62f0f7d4d6..c541efee5e 100644 + "Set on/off to enable/disable using " + "physical address space above 32 bits"); + ++ object_class_property_add_bool(oc, "highmem-redists", ++ virt_get_highmem_redists, ++ virt_set_highmem_redists); ++ object_class_property_set_description(oc, "highmem-redists", ++ "Set on/off to enable/disable high " ++ "memory region for GICv3 or GICv4 " ++ "redistributor"); ++ ++ object_class_property_add_bool(oc, "highmem-ecam", ++ virt_get_highmem_ecam, ++ virt_set_highmem_ecam); ++ object_class_property_set_description(oc, "highmem-ecam", ++ "Set on/off to enable/disable high " ++ "memory region for PCI ECAM"); ++ ++ object_class_property_add_bool(oc, "highmem-mmio", ++ virt_get_highmem_mmio, ++ virt_set_highmem_mmio); ++ object_class_property_set_description(oc, "highmem-mmio", ++ "Set on/off to enable/disable high " ++ "memory region for PCI MMIO"); ++ + object_class_property_add_str(oc, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_class_property_set_description(oc, "gic-version", @@ -382,14 +435,24 @@ index 62f0f7d4d6..c541efee5e 100644 +} +type_init(rhel_machine_init); + ++static void rhel940_virt_options(MachineClass *mc) ++{ ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) ++ +static void rhel920_virt_options(MachineClass *mc) +{ -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel940_virt_options(mc); ++ + compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); ++ ++ /* RHEL 9.4 is the first supported release */ ++ mc->deprecation_reason = ++ "machine types for versions prior to 9.4 are deprecated"; +} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) ++DEFINE_RHEL_MACHINE(9, 2, 0) + +static void rhel900_virt_options(MachineClass *mc) +{ @@ -398,6 +461,7 @@ index 62f0f7d4d6..c541efee5e 100644 + rhel920_virt_options(mc); + + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; @@ -406,10 +470,10 @@ index 62f0f7d4d6..c541efee5e 100644 +} +DEFINE_RHEL_MACHINE(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index f69239850e..7b8abe5645 100644 +index bb486d36b1..237fc77bda 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -177,9 +177,17 @@ struct VirtMachineState { +@@ -179,9 +179,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) diff --git a/SOURCES/0008-Add-ppc64-machine-types.patch b/SOURCES/0008-Add-ppc64-machine-types.patch index a269adb..87fcb3a 100644 --- a/SOURCES/0008-Add-ppc64-machine-types.patch +++ b/SOURCES/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From d03cff85f5f1b69b1a66011ebaa974ece81d31bc Mon Sep 17 00:00:00 2001 +From fb905dbe5b51ed899062ef99a2dd7f238d3e3384 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -34,20 +34,20 @@ Merged patches (7.1.0): 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index df09aa9d6a..ff459e1a46 100644 +index e9bc97fee0..a258d81846 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1689,6 +1689,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) - +@@ -1718,6 +1718,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); + spapr_nested_reset(spapr); + if (spapr->svm_allowed) { + kvmppc_svm_allow(&error_fatal); + } first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3397,6 +3400,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3421,6 +3424,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -68,7 +68,7 @@ index df09aa9d6a..ff459e1a46 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3475,6 +3492,12 @@ static void spapr_instance_init(Object *obj) +@@ -3499,6 +3516,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -81,7 +81,7 @@ index df09aa9d6a..ff459e1a46 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4734,6 +4757,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4754,6 +4777,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -89,15 +89,15 @@ index df09aa9d6a..ff459e1a46 100644 } static const TypeInfo spapr_machine_info = { -@@ -4785,6 +4809,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4805,6 +4829,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-8.2 + * pseries-9.0 */ -@@ -4967,6 +4992,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4998,6 +5023,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -105,7 +105,7 @@ index df09aa9d6a..ff459e1a46 100644 /* * pseries-4.0 -@@ -4982,6 +5008,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -5013,6 +5039,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, } return true; } @@ -114,7 +114,7 @@ index df09aa9d6a..ff459e1a46 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5306,6 +5334,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5338,6 +5366,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -337,7 +337,7 @@ index df09aa9d6a..ff459e1a46 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 33e0c8724c..9d01663f43 100644 +index 3b0a47a28c..375e0c8e45 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -25,6 +25,7 @@ @@ -348,7 +348,7 @@ index 33e0c8724c..9d01663f43 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -261,6 +262,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -264,6 +265,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); @@ -356,7 +356,7 @@ index 33e0c8724c..9d01663f43 100644 if (!qdev_realize(DEVICE(cpu), NULL, errp)) { return false; -@@ -277,6 +279,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -280,6 +282,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, /* Set time-base frequency to 512 MHz. vhyp must be set first. */ cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); @@ -375,10 +375,10 @@ index 33e0c8724c..9d01663f43 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index e91791a1a9..1951d8a2a0 100644 +index 4aaf23d28f..3233c54d11 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -154,6 +154,7 @@ struct SpaprMachineClass { +@@ -157,6 +157,7 @@ struct SpaprMachineClass { bool pre_5_2_numa_associativity; bool pre_6_2_numa_affinity; @@ -386,7 +386,7 @@ index e91791a1a9..1951d8a2a0 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -256,6 +257,9 @@ struct SpaprMachineState { +@@ -259,6 +260,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -422,10 +422,10 @@ index ebef2cccec..ff2c00c60e 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 69fddb05bc..64a05aaef3 100644 +index f77ebfcc81..18e9422006 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c -@@ -748,6 +748,7 @@ +@@ -744,6 +744,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -434,10 +434,10 @@ index 69fddb05bc..64a05aaef3 100644 { "405cr", "405crc" }, { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index f8101ffa29..e799a2bee6 100644 +index 67e6b2effd..11187aeb93 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1635,6 +1635,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1655,6 +1655,7 @@ static inline int ppc_env_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -446,7 +446,7 @@ index f8101ffa29..e799a2bee6 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 9b1abe2fc4..56f1c46e8e 100644 +index 8231feb2d4..59f640cf7b 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -89,6 +89,7 @@ static int cap_large_decr; @@ -465,7 +465,7 @@ index 9b1abe2fc4..56f1c46e8e 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2579,6 +2581,16 @@ bool kvmppc_supports_ail_3(void) +@@ -2564,6 +2566,16 @@ bool kvmppc_supports_ail_3(void) return cap_ail_mode_3; } @@ -482,7 +482,7 @@ index 9b1abe2fc4..56f1c46e8e 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2979,3 +2991,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2964,3 +2976,18 @@ bool kvm_arch_cpu_check_are_resettable(void) void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/SOURCES/0009-Add-s390x-machine-types.patch b/SOURCES/0009-Add-s390x-machine-types.patch index c3b9936..b9709f1 100644 --- a/SOURCES/0009-Add-s390x-machine-types.patch +++ b/SOURCES/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 3623043d4a923bf9f541d439c76e7874cf0fa81d Mon Sep 17 00:00:00 2001 +From 04178c77cfe188b4eed9c08a0bf66842e61fe5dc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -49,18 +49,18 @@ Merged patches (8.2.0): 4 files changed, 174 insertions(+) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 7262725d2e..984891b82a 100644 +index b1dcb3857f..ff753a29e0 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -855,6 +855,7 @@ bool css_migration_enabled(void) +@@ -859,6 +859,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_8_2_instance_options(MachineState *machine) + static void ccw_machine_9_0_instance_options(MachineState *machine) { } -@@ -1256,6 +1257,164 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1272,6 +1273,164 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); @@ -226,7 +226,7 @@ index 7262725d2e..984891b82a 100644 static void ccw_machine_register_types(void) { diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index a63d990e4e..198b81f2c0 100644 +index 8ed3bb6a27..370b3b3065 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -46,6 +46,9 @@ @@ -239,7 +239,7 @@ index a63d990e4e..198b81f2c0 100644 static S390CPUDef s390_cpu_defs[] = { CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), -@@ -856,22 +859,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) +@@ -866,22 +869,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -284,10 +284,10 @@ index d7b8912989..1a806a97c4 100644 /* CPU model based on a CPU definition */ diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index 87a4480c05..28c1b0486c 100644 +index 0728bfcc20..ca2e5d91e2 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c -@@ -60,6 +60,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) +@@ -59,6 +59,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) CpuDefinitionInfo *info; char *name = g_strdup(object_class_get_name(klass)); S390CPUClass *scc = S390_CPU_CLASS(klass); @@ -295,7 +295,7 @@ index 87a4480c05..28c1b0486c 100644 /* strip off the -s390x-cpu */ g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; -@@ -69,6 +70,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) +@@ -68,6 +69,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) info->migration_safe = scc->is_migration_safe; info->q_static = scc->is_static; info->q_typename = g_strdup(object_class_get_name(klass)); diff --git a/SOURCES/0010-Add-x86_64-machine-types.patch b/SOURCES/0010-Add-x86_64-machine-types.patch index d24bb57..83ee4a3 100644 --- a/SOURCES/0010-Add-x86_64-machine-types.patch +++ b/SOURCES/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From b432505cb28bc3b9b0c1849210ac6c63bca3fe37 Mon Sep 17 00:00:00 2001 +From 3c88acb005806ad2386ab6c94a8831151f624738 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -57,23 +57,40 @@ Merged patches (8.1.0): Merged patches (8.2.0): - 4ee284aca9 Add machine types compat bits. (partial) - 719e2ac147 Fix x86 machine type compatibility for qemu-kvm 8.1.0 + +Merged patches (9.0.0 rc0): +- 9149e2bc8f x86: rhel 9.2.0 machine type compat fix --- + hw/i386/fw_cfg.c | 2 +- hw/i386/pc.c | 159 ++++++++++++++++++++- - hw/i386/pc_piix.c | 112 ++++++++++++++- - hw/i386/pc_q35.c | 285 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 109 ++++++++++++++ + hw/i386/pc_q35.c | 285 +++++++++++++++++++++++++++++++++++++ include/hw/boards.h | 2 + include/hw/i386/pc.h | 33 +++++ target/i386/cpu.c | 21 +++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 9 files changed, 615 insertions(+), 7 deletions(-) + 10 files changed, 617 insertions(+), 4 deletions(-) +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index c7aa39a13e..283c3f4c16 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -63,7 +63,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", mc->desc, mc->name, ++ smbios_set_defaults("Red Hat", "KVM", mc->desc, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, pcmc->smbios_stream_version); + } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 29b9964733..a1faa9e92c 100644 +index 5c21b0c4db..4a154c1a9a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -323,6 +323,161 @@ GlobalProperty pc_compat_2_0[] = { +@@ -326,6 +326,161 @@ GlobalProperty pc_compat_2_0[] = { }; const size_t pc_compat_2_0_len = G_N_ELEMENTS(pc_compat_2_0); @@ -235,15 +252,15 @@ index 29b9964733..a1faa9e92c 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1826,6 +1981,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->kvmclock_create_always = true; +@@ -1813,6 +1968,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->resizable_acpi_blob = true; + x86mc->apic_xrupt_override = true; assert(!mc->get_hotplug_handler); + mc->async_pf_vmexit_disable = false; mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1836,7 +1992,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1823,7 +1979,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -254,10 +271,10 @@ index 29b9964733..a1faa9e92c 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 2a9f465619..44038391fb 100644 +index 18ba076609..a647262d63 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -53,6 +53,7 @@ +@@ -52,6 +52,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -265,18 +282,7 @@ index 2a9f465619..44038391fb 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -235,8 +236,8 @@ static void pc_init1(MachineState *machine, - if (pcmc->smbios_defaults) { - MachineClass *mc = MACHINE_GET_CLASS(machine); - /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", mc->desc, -- mc->name, pcmc->smbios_legacy_mode, -+ smbios_set_defaults("Red Hat", "KVM", -+ mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, - pcmc->smbios_stream_product, - pcmc->smbios_stream_version, -@@ -453,6 +454,7 @@ static void pc_set_south_bridge(Object *obj, int value, Error **errp) +@@ -422,6 +423,7 @@ static void pc_set_south_bridge(Object *obj, int value, Error **errp) * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -284,7 +290,7 @@ index 2a9f465619..44038391fb 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -970,3 +972,109 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -951,3 +953,110 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -314,8 +320,7 @@ index 2a9f465619..44038391fb 100644 + +static void pc_init_rhel760(MachineState *machine) +{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); ++ pc_init1(machine, TYPE_I440FX_PCI_DEVICE); +} + +static void pc_machine_rhel760_options(MachineClass *m) @@ -339,6 +344,8 @@ index 2a9f465619..44038391fb 100644 + pcmc->enforce_amd_1tb_hole = false; + /* From pc_i440fx_8_0_machine_options() */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* From pc_i440fx_8_1_machine_options() */ ++ pcmc->broken_32bit_mem_addr_check = true; + /* Introduced in QEMU 8.2 */ + pcmc->default_south_bridge = TYPE_PIIX3_DEVICE; + @@ -395,21 +402,10 @@ index 2a9f465619..44038391fb 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 912cb0c0dc..6387df97c8 100644 +index c7bc8a2041..e872dc7e46 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -203,8 +203,8 @@ static void pc_q35_init(MachineState *machine) - - if (pcmc->smbios_defaults) { - /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", mc->desc, -- mc->name, pcmc->smbios_legacy_mode, -+ smbios_set_defaults("Red Hat", "KVM", -+ mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, - pcmc->smbios_stream_product, - pcmc->smbios_stream_version, -@@ -363,6 +363,7 @@ static void pc_q35_init(MachineState *machine) +@@ -341,6 +341,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -417,7 +413,7 @@ index 912cb0c0dc..6387df97c8 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -699,3 +700,283 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -693,3 +694,287 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -444,6 +440,8 @@ index 912cb0c0dc..6387df97c8 100644 + m->alias = "q35"; + m->max_cpus = 710; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ compat_props_add(m->compat_props, ++ pc_q35_compat_defaults, pc_q35_compat_defaults_len); +} + +static void pc_q35_init_rhel940(MachineState *machine) @@ -480,6 +478,8 @@ index 912cb0c0dc..6387df97c8 100644 + + /* From pc_q35_8_0_machine_options() */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* From pc_q35_8_1_machine_options() */ ++ pcmc->broken_32bit_mem_addr_check = true; + + compat_props_add(m->compat_props, hw_compat_rhel_9_4, + hw_compat_rhel_9_4_len); @@ -702,10 +702,10 @@ index 912cb0c0dc..6387df97c8 100644 +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 4a21eddbf9..4edfdb0ddb 100644 +index 0466f9d0f3..46b8725c41 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -277,6 +277,8 @@ struct MachineClass { +@@ -283,6 +283,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -715,12 +715,12 @@ index 4a21eddbf9..4edfdb0ddb 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 037942d233..37644ede7e 100644 +index ebd8f973f2..a984c951ad 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -314,6 +314,39 @@ extern const size_t pc_compat_1_4_len; - - int pc_machine_kvm_type(MachineState *machine, const char *vm_type); +@@ -291,6 +291,39 @@ extern const size_t pc_compat_2_1_len; + extern GlobalProperty pc_compat_2_0[]; + extern const size_t pc_compat_2_0_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; @@ -759,7 +759,7 @@ index 037942d233..37644ede7e 100644 static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index cd16cb893d..93203d9b91 100644 +index 33760a2ee1..be7b0663cd 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2190,9 +2190,13 @@ static const CPUCaches epyc_genoa_cache_info = { @@ -925,10 +925,10 @@ index 9c791b7b05..b91af5051f 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 4ce80555b4..9d41edf01e 100644 +index e68cbe9293..739f33db47 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3711,6 +3711,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3715,6 +3715,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -936,7 +936,7 @@ index 4ce80555b4..9d41edf01e 100644 kvm_msr_buf_reset(cpu); -@@ -4065,6 +4066,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4069,6 +4070,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; diff --git a/SOURCES/0011-Enable-make-check.patch b/SOURCES/0011-Enable-make-check.patch index 54015c0..502bc67 100644 --- a/SOURCES/0011-Enable-make-check.patch +++ b/SOURCES/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 66a0510405e5142a1f9e38e0770aa0f10aed3e03 Mon Sep 17 00:00:00 2001 +From 5768cf6811842e5c59da3b752f60659a9d6b5ba1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -63,10 +63,10 @@ Merged patches (8.1.0): 13 files changed, 33 insertions(+), 30 deletions(-) diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index c37afa662c..61c95a2198 100644 +index 10d99403a4..c3422ea1e4 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py -@@ -153,7 +153,7 @@ def test_aarch64_virt(self): +@@ -166,7 +166,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -76,7 +76,7 @@ index c37afa662c..61c95a2198 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index 4cce5a5598..e9248a04a2 100644 +index 92855a02a5..87822074b6 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py @@ -230,7 +230,7 @@ def test_aarch64_virt(self): @@ -120,7 +120,7 @@ index 15fd87b2c1..f0d9d89c93 100644 kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build -index 53847cb98f..a2abdb650e 100644 +index fad340ad59..3c0d5241f6 100644 --- a/tests/qemu-iotests/meson.build +++ b/tests/qemu-iotests/meson.build @@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats @@ -163,7 +163,7 @@ index 53847cb98f..a2abdb650e 100644 +# endforeach endforeach diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py -index 3ff38f2661..cab9a2bd6c 100644 +index 588f30a4f1..3929a3634f 100644 --- a/tests/qemu-iotests/testenv.py +++ b/tests/qemu-iotests/testenv.py @@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, @@ -216,7 +216,7 @@ index 663bb6c485..2efc43e3f7 100644 "-device intel-hda,id=" HDA_ID CODEC_DEVICES); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index 90aae42a22..9bc4e41af0 100644 +index 3aed6efcb8..119613237e 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build @@ -44,7 +44,7 @@ libqos_srcs = files( @@ -242,10 +242,10 @@ index 8ac95b89f7..cd2102555c 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 47dabf91d0..0bdfa3a821 100644 +index 36c5c13a7b..a2887d6057 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -97,7 +97,6 @@ qtests_i386 = \ +@@ -101,7 +101,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -254,7 +254,7 @@ index 47dabf91d0..0bdfa3a821 100644 'migration-test', 'test-x86-cpuid-compat', diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c -index 0d40bc1f2d..4c633c1584 100644 +index 73dfabc272..a9dd304781 100644 --- a/tests/qtest/virtio-net-failover.c +++ b/tests/qtest/virtio-net-failover.c @@ -26,6 +26,7 @@ diff --git a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 8222efd..e8bf13a 100644 --- a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From a27cfa0b407bd806ce389a7c69d0130bcfd35244 Mon Sep 17 00:00:00 2001 +From e06a905d726fc20ea6bd95dff1bd0ffe97ebb202 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -27,7 +27,7 @@ to 64 as some usecases require more than 32 devices. Signed-off-by: Bandan Das -Rebase changes (231025): +Rebase changes (8.2.0): - Update to upstream changes --- hw/vfio/pci.c | 31 ++++++++++++++++++++++++++++++- @@ -35,10 +35,10 @@ Rebase changes (231025): 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index c62c02f7b6..ec98080f28 100644 +index 64780d1b79..57ac63c10c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -48,6 +48,9 @@ +@@ -50,6 +50,9 @@ /* Protected by BQL */ static KVMRouteChange vfio_route_change; @@ -48,15 +48,14 @@ index c62c02f7b6..ec98080f28 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); -@@ -3076,14 +3079,37 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - { +@@ -2946,13 +2949,36 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ERRP_GUARD(); VFIOPCIDevice *vdev = VFIO_PCI(pdev); VFIODevice *vbasedev = &vdev->vbasedev; + VFIODevice *vbasedev_iter; + VFIOGroup *group; char *tmp, *subsys; Error *err = NULL; - struct stat st; - int i, ret; + int ret, i = 0; bool is_mdev; @@ -84,10 +83,10 @@ index c62c02f7b6..ec98080f28 100644 + return; + } + - if (!vbasedev->sysfsdev) { + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3501,6 +3527,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3370,6 +3396,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -98,7 +97,7 @@ index c62c02f7b6..ec98080f28 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index fba8737ab2..eb74d9de2d 100644 +index 6e64a2654e..b7de39c010 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -142,6 +142,7 @@ struct VFIOPCIDevice { diff --git a/SOURCES/0013-Add-support-statement-to-help-output.patch b/SOURCES/0013-Add-support-statement-to-help-output.patch index bc5d9b4..0644440 100644 --- a/SOURCES/0013-Add-support-statement-to-help-output.patch +++ b/SOURCES/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 424f14d123fe1043518758605d94ed5ba50e52ad Mon Sep 17 00:00:00 2001 +From b467dc6a24ef41fa574260429807711f6802a54d Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,10 +21,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/system/vl.c b/system/vl.c -index 2bcd9efb9a..93635ffc5b 100644 +index c644222982..03c3b0aa94 100644 --- a/system/vl.c +++ b/system/vl.c -@@ -870,9 +870,17 @@ static void version(void) +@@ -869,9 +869,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index 2bcd9efb9a..93635ffc5b 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", g_get_prgname()); -@@ -898,6 +906,7 @@ static void help(int exitcode) +@@ -897,6 +905,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 7fa10b5..04adb4a 100644 --- a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From c683ff4a770b77dbe707413840918a46f67fa825 Mon Sep 17 00:00:00 2001 +From 20cc3a6d9bce3e40d165f865b5e398c300cae7bf Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index 42fd09e4de..557118cb1f 100644 +index 8ce85d4559..4fc27ee2e2 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3469,11 +3469,11 @@ SRST +@@ -3493,11 +3493,11 @@ SRST :: diff --git a/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index 667d431..8518918 100644 --- a/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From 776bff1be5e98982a9bbc8345ff27274ff5b8c0f Mon Sep 17 00:00:00 2001 +From 2f9fdd21ecf2810d0d83a8125ce0cc1e75dbb13a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,7 +44,7 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 13e032bd5e..7968735346 100644 +index 956128b409..0e8b2f7518 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1358,6 +1358,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, diff --git a/SOURCES/0016-Add-upstream-compatibility-bits.patch b/SOURCES/0016-Add-upstream-compatibility-bits.patch new file mode 100644 index 0000000..3efa22c --- /dev/null +++ b/SOURCES/0016-Add-upstream-compatibility-bits.patch @@ -0,0 +1,121 @@ +From 59470e8ab849f22b407f55292e540e16a8cad01a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 20 Mar 2024 05:34:32 -0400 +Subject: Add upstream compatibility bits + +Adding new compats structure for changes introduced during rebase to QEMU 9.0.0. + +Signed-off-by: Miroslav Rezanina + +--- + +Rebase notes (9.0.0 rc2): +- Add aw-bits setting for aarch compat record (overwritten for 9.4 and older) +--- + hw/arm/virt.c | 3 +++ + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 3 ++- + hw/i386/pc_q35.c | 3 +++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 6 files changed, 22 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 22bc345137..f1af9495c6 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -144,6 +144,8 @@ GlobalProperty arm_rhel_compat[] = { + {"virtio-net-pci", "romfile", "" }, + {"virtio-net-pci-transitional", "romfile", "" }, + {"virtio-net-pci-non-transitional", "romfile", "" }, ++ /* arm_rhel_compat from arm_virt_compat, added for 9.0.0 rebase */ ++ { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" }, + }; + const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + +@@ -3728,6 +3730,7 @@ type_init(rhel_machine_init); + + static void rhel940_virt_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_5, hw_compat_rhel_9_5_len); + } + DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 695cb89a46..0f256d9633 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -302,6 +302,16 @@ const size_t hw_compat_2_1_len = G_N_ELEMENTS(hw_compat_2_1); + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_9_5[] = { ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { "migration", "zero-page-detection", "legacy"}, ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" }, ++}; ++const size_t hw_compat_rhel_9_5_len = G_N_ELEMENTS(hw_compat_rhel_9_5); ++ + GlobalProperty hw_compat_rhel_9_4[] = { + /* hw_compat_rhel_9_4 from hw_compat_8_0 */ + { TYPE_VIRTIO_NET, "host_uso", "off"}, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index a647262d63..6b260682eb 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1015,7 +1015,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + object_class_property_set_description(oc, "x-south-bridge", + "Use a different south bridge than PIIX3"); + +- ++ compat_props_add(m->compat_props, hw_compat_rhel_9_5, ++ hw_compat_rhel_9_5_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_4, + hw_compat_rhel_9_4_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_3, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index e872dc7e46..2b54944c0f 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -733,6 +733,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_5, ++ hw_compat_rhel_9_5_len); + } + + DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index ff753a29e0..9ad54682c6 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1282,6 +1282,7 @@ static void ccw_machine_rhel940_instance_options(MachineState *machine) + + static void ccw_machine_rhel940_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_5, hw_compat_rhel_9_5_len); + } + DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 46b8725c41..cca62f906b 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -514,6 +514,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_5[]; ++extern const size_t hw_compat_rhel_9_5_len; ++ + extern GlobalProperty hw_compat_rhel_9_4[]; + extern const size_t hw_compat_rhel_9_4_len; + +-- +2.39.3 + diff --git a/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch b/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch deleted file mode 100644 index 4e62baa..0000000 --- a/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 3b9b38339346ebfaf3e8ddf0822eba1cc9e78408 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Thu, 14 Dec 2023 04:42:01 -0500 -Subject: Introduce RHEL 9.4.0 qemu-kvm machine type for aarch64 - -Jira: https://issues.redhat.com/browse/RHEL-17168 - -Adding new machine type to support enabling new features. - -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index c541efee5e..0b17c94ad7 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3630,14 +3630,21 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - -+static void rhel940_virt_options(MachineClass *mc) -+{ -+} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) -+ - static void rhel920_virt_options(MachineClass *mc) - { -+ rhel940_virt_options(mc); -+ - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); - } --DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) -+DEFINE_RHEL_MACHINE(9, 2, 0) - - static void rhel900_virt_options(MachineClass *mc) - { --- -2.39.3 - diff --git a/SOURCES/0017-x86-rhel-9.4.0-machine-type-compat-fix.patch b/SOURCES/0017-x86-rhel-9.4.0-machine-type-compat-fix.patch new file mode 100644 index 0000000..5befe68 --- /dev/null +++ b/SOURCES/0017-x86-rhel-9.4.0-machine-type-compat-fix.patch @@ -0,0 +1,30 @@ +From ba574acacf679850e337ec2d5e7836b8277cf393 Mon Sep 17 00:00:00 2001 +From: Sebastian Ott +Date: Thu, 18 Apr 2024 15:04:28 +0200 +Subject: x86: rhel 9.4.0 machine type compat fix + +Fix up the compatibility for 9.4.0. Ensure that pc-q35-rhel9.4.0 +still uses SMBIOS 3.X by default. + +Signed-off-by: Sebastian Ott +--- + hw/i386/pc_q35.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 2b54944c0f..2f11f9af7d 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -734,6 +734,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; + ++ /* From pc_q35_8_2_machine_options() - use SMBIOS 3.X by default */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; ++ + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch b/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch deleted file mode 100644 index ed776c0..0000000 --- a/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 363d6aedc82314a70bdfbe9fa23b7e8fdda50138 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 11 Jan 2024 12:26:19 -0500 -Subject: [PATCH 066/101] Compile IOMMUFD object on aarch64 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [65/67] 9358030fdd499c5fe122dee3bb4f114966fac9c2 (eauger1/centos-qemu-kvm) - -Upstream: RHEL only - -Compiles the IOMMUFD object on aarch64 to be able to use -the IOMMUFD VFIO backend. - -Signed-off-by: Eric Auger ---- - configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -index aec1831199..b0191d3c69 100644 ---- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -39,3 +39,4 @@ CONFIG_PXB=y - CONFIG_VHOST_VSOCK=y - CONFIG_VHOST_USER_VSOCK=y - CONFIG_VHOST_USER_FS=y -+CONFIG_IOMMUFD=y --- -2.39.3 - diff --git a/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch b/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch deleted file mode 100644 index 9a98477..0000000 --- a/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch +++ /dev/null @@ -1,37 +0,0 @@ -From c1e9ddf8d0ea6d358fcaa5cacd3a91920f36e73b Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 11 Jan 2024 12:33:17 -0500 -Subject: [PATCH 067/101] Compile IOMMUFD on s390x -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [66/67] d3004aafca2bb76d817ac99c3d65973b8fbd4557 (eauger1/centos-qemu-kvm) - -Upstream: RHEL only - -Compiles the IOMMUFD object on s390x to be able to use -the IOMMUFD VFIO backend. - -Signed-off-by: Eric Auger ---- - configs/devices/s390x-softmmu/s390x-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak -index 69a799adbd..24cf6dbd03 100644 ---- a/configs/devices/s390x-softmmu/s390x-rh-devices.mak -+++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak -@@ -16,3 +16,4 @@ CONFIG_WDT_DIAG288=y - CONFIG_VHOST_VSOCK=y - CONFIG_VHOST_USER_VSOCK=y - CONFIG_VHOST_USER_FS=y -+CONFIG_IOMMUFD=y --- -2.39.3 - diff --git a/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch b/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch deleted file mode 100644 index a3eb40e..0000000 --- a/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch +++ /dev/null @@ -1,37 +0,0 @@ -From be2c3d9bbee1bdec061c901f507bc999fa40a53e Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 11 Jan 2024 12:34:44 -0500 -Subject: [PATCH 068/101] Compile IOMMUFD on x86_64 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [67/67] 411d48a5cc7ce1f05be793fd9a89c143ce34c91a (eauger1/centos-qemu-kvm) - -Upstream: RHEL only - -Compiles the IOMMUFD object on s390x to be able to use -the IOMMUFD VFIO backend. - -Signed-off-by: Eric Auger ---- - configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -index ce5be73633..ba41108e0c 100644 ---- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -108,3 +108,4 @@ CONFIG_SGX=y - CONFIG_VHOST_VSOCK=y - CONFIG_VHOST_USER_VSOCK=y - CONFIG_VHOST_USER_FS=y -+CONFIG_IOMMUFD=y --- -2.39.3 - diff --git a/SOURCES/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch b/SOURCES/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch new file mode 100644 index 0000000..65da2cc --- /dev/null +++ b/SOURCES/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch @@ -0,0 +1,139 @@ +From 93ea86ac8849ad9ca365b1646313dde9a34ba59c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:03 -0500 +Subject: [PATCH 031/100] HostMem: Add mechanism to opt in kvm guest memfd via + MachineState + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [31/91] 43ce32aef954479cdb736301d1adcb919602c321 (bonzini/rhel-qemu-kvm) + +Add a new member "guest_memfd" to memory backends. When it's set +to true, it enables RAM_GUEST_MEMFD in ram_flags, thus private kvm +guest_memfd will be allocated during RAMBlock allocation. + +Memory backend's @guest_memfd is wired with @require_guest_memfd +field of MachineState. It avoid looking up the machine in phymem.c. + +MachineState::require_guest_memfd is supposed to be set by any VMs +that requires KVM guest memfd as private memory, e.g., TDX VM. + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Message-ID: <20240320083945.991426-8-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 37662d85b0b7dded0ebdf6747bef6c3bb7ed6a0c) +Signed-off-by: Paolo Bonzini +--- + backends/hostmem-file.c | 1 + + backends/hostmem-memfd.c | 1 + + backends/hostmem-ram.c | 1 + + backends/hostmem.c | 1 + + hw/core/machine.c | 5 +++++ + include/hw/boards.h | 2 ++ + include/sysemu/hostmem.h | 1 + + 7 files changed, 12 insertions(+) + +diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c +index ac3e433cbd..3c69db7946 100644 +--- a/backends/hostmem-file.c ++++ b/backends/hostmem-file.c +@@ -85,6 +85,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + ram_flags |= fb->readonly ? RAM_READONLY_FD : 0; + ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + ram_flags |= fb->is_pmem ? RAM_PMEM : 0; + ram_flags |= RAM_NAMED_FILE; + return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name, +diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c +index 3923ea9364..745ead0034 100644 +--- a/backends/hostmem-memfd.c ++++ b/backends/hostmem-memfd.c +@@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + name = host_memory_backend_get_name(backend); + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, + backend->size, ram_flags, fd, 0, errp); + } +diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c +index d121249f0f..f7d81af783 100644 +--- a/backends/hostmem-ram.c ++++ b/backends/hostmem-ram.c +@@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + name = host_memory_backend_get_name(backend); + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), + name, backend->size, + ram_flags, errp); +diff --git a/backends/hostmem.c b/backends/hostmem.c +index 81a72ce40b..eb9682b4a8 100644 +--- a/backends/hostmem.c ++++ b/backends/hostmem.c +@@ -277,6 +277,7 @@ static void host_memory_backend_init(Object *obj) + /* TODO: convert access to globals to compat properties */ + backend->merge = machine_mem_merge(machine); + backend->dump = machine_dump_guest_core(machine); ++ backend->guest_memfd = machine_require_guest_memfd(machine); + backend->reserve = true; + backend->prealloc_threads = machine->smp.cpus; + } +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 92609aae27..07b994e136 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1480,6 +1480,11 @@ bool machine_mem_merge(MachineState *machine) + return machine->mem_merge; + } + ++bool machine_require_guest_memfd(MachineState *machine) ++{ ++ return machine->require_guest_memfd; ++} ++ + static char *cpu_slot_to_string(const CPUArchId *cpu) + { + GString *s = g_string_new(NULL); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index cca62f906b..815a1c4b26 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine); + int machine_phandle_start(MachineState *machine); + bool machine_dump_guest_core(MachineState *machine); + bool machine_mem_merge(MachineState *machine); ++bool machine_require_guest_memfd(MachineState *machine); + HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); + void machine_set_cpu_numa_node(MachineState *machine, + const CpuInstanceProperties *props, +@@ -372,6 +373,7 @@ struct MachineState { + char *dt_compatible; + bool dump_guest_core; + bool mem_merge; ++ bool require_guest_memfd; + bool usb; + bool usb_disabled; + char *firmware; +diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h +index 0e411aaa29..04b884bf42 100644 +--- a/include/sysemu/hostmem.h ++++ b/include/sysemu/hostmem.h +@@ -74,6 +74,7 @@ struct HostMemoryBackend { + uint64_t size; + bool merge, dump, use_canonical_path; + bool prealloc, is_mapped, share, reserve; ++ bool guest_memfd; + uint32_t prealloc_threads; + ThreadContext *prealloc_context; + DECLARE_BITMAP(host_nodes, MAX_NODES + 1); +-- +2.39.3 + diff --git a/SOURCES/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch b/SOURCES/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch new file mode 100644 index 0000000..aaedcf4 --- /dev/null +++ b/SOURCES/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch @@ -0,0 +1,203 @@ +From c46ac3db0a4db60e667edeabc9ed451c6e8e0ccf Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 14:41:33 -0400 +Subject: [PATCH 020/100] KVM: remove kvm_arch_cpu_check_are_resettable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [20/91] d7745bd1a0ed1b215847f150f4a1bb2e912beabc (bonzini/rhel-qemu-kvm) + +Board reset requires writing a fresh CPU state. As far as KVM is +concerned, the only thing that blocks reset is that CPU state is +encrypted; therefore, kvm_cpus_are_resettable() can simply check +if that is the case. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit a99c0c66ebe7d8db3af6f16689ade9375247e43e) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-accel-ops.c | 2 +- + accel/kvm/kvm-all.c | 5 ----- + include/sysemu/kvm.h | 10 ---------- + target/arm/kvm.c | 5 ----- + target/i386/kvm/kvm.c | 5 ----- + target/loongarch/kvm/kvm.c | 5 ----- + target/mips/kvm.c | 5 ----- + target/ppc/kvm.c | 5 ----- + target/riscv/kvm/kvm-cpu.c | 5 ----- + target/s390x/kvm/kvm.c | 5 ----- + 10 files changed, 1 insertion(+), 51 deletions(-) + +diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c +index b3c946dc4b..74e3c5785b 100644 +--- a/accel/kvm/kvm-accel-ops.c ++++ b/accel/kvm/kvm-accel-ops.c +@@ -82,7 +82,7 @@ static bool kvm_vcpu_thread_is_idle(CPUState *cpu) + + static bool kvm_cpus_are_resettable(void) + { +- return !kvm_enabled() || kvm_cpu_check_are_resettable(); ++ return !kvm_enabled() || !kvm_state->guest_state_protected; + } + + #ifdef KVM_CAP_SET_GUEST_DEBUG +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index ec0f6df7c5..b51e09a583 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2696,11 +2696,6 @@ void kvm_flush_coalesced_mmio_buffer(void) + s->coalesced_flush_in_progress = false; + } + +-bool kvm_cpu_check_are_resettable(void) +-{ +- return kvm_arch_cpu_check_are_resettable(); +-} +- + static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + { + if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 302e8f6f1e..54f4d83a37 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -525,16 +525,6 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); + /* Notify resamplefd for EOI of specific interrupts. */ + void kvm_resample_fd_notify(int gsi); + +-/** +- * kvm_cpu_check_are_resettable - return whether CPUs can be reset +- * +- * Returns: true: CPUs are resettable +- * false: CPUs are not resettable +- */ +-bool kvm_cpu_check_are_resettable(void); +- +-bool kvm_arch_cpu_check_are_resettable(void); +- + bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index ab85d628a8..21ebbf3b8f 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -1598,11 +1598,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) + return (data - 32) & 0xffff; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index e271652620..a12207a8ee 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -5623,11 +5623,6 @@ bool kvm_has_waitpkg(void) + return has_msr_umwait; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return !sev_es_enabled(); +-} +- + #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 + + void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index d630cc39cb..8224d94333 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -733,11 +733,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) + return true; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { + int ret = 0; +diff --git a/target/mips/kvm.c b/target/mips/kvm.c +index 6c52e59f55..a631ab544f 100644 +--- a/target/mips/kvm.c ++++ b/target/mips/kvm.c +@@ -1273,11 +1273,6 @@ int kvm_arch_get_default_type(MachineState *machine) + return -1; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + void kvm_arch_accel_class_init(ObjectClass *oc) + { + } +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index 59f640cf7b..9d9d9f0d79 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -2968,11 +2968,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) + } + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + void kvm_arch_accel_class_init(ObjectClass *oc) + { + } +diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c +index 6a6c6cae80..49d2f3ad58 100644 +--- a/target/riscv/kvm/kvm-cpu.c ++++ b/target/riscv/kvm/kvm-cpu.c +@@ -1475,11 +1475,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level) + } + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + static int aia_mode; + + static const char *kvm_aia_mode_str(uint64_t mode) +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 55fb4855b1..4db59658e1 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2630,11 +2630,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) + kvm_s390_vcpu_interrupt(cpu, &irq); + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + int kvm_s390_get_zpci_op(void) + { + return cap_zpci_op; +-- +2.39.3 + diff --git a/SOURCES/kvm-KVM-track-whether-guest-state-is-encrypted.patch b/SOURCES/kvm-KVM-track-whether-guest-state-is-encrypted.patch new file mode 100644 index 0000000..7cdab60 --- /dev/null +++ b/SOURCES/kvm-KVM-track-whether-guest-state-is-encrypted.patch @@ -0,0 +1,127 @@ +From 50399796da938c4ea7c69058fde84695bce9d794 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 14:41:10 -0400 +Subject: [PATCH 019/100] KVM: track whether guest state is encrypted +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [19/91] 685b9c54d43d0043d15c33d13afc3a420cbe139b (bonzini/rhel-qemu-kvm) + +So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the +guest state is encrypted, in which case they do nothing. For the new +API using VM types, instead, the ioctls will fail which is a safer and +more robust approach. + +The new API will be the only one available for SEV-SNP and TDX, but it +is also usable for SEV and SEV-ES. In preparation for that, require +architecture-specific KVM code to communicate the point at which guest +state is protected (which must be after kvm_cpu_synchronize_post_init(), +though that might change in the future in order to suppor migration). +From that point, skip reading registers so that cpu->vcpu_dirty is +never true: if it ever becomes true, kvm_arch_put_registers() will +fail miserably. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5c3131c392f84c660033d511ec39872d8beb4b1e) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 17 ++++++++++++++--- + include/sysemu/kvm.h | 2 ++ + include/sysemu/kvm_int.h | 1 + + target/i386/sev.c | 1 + + 4 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 931f74256e..ec0f6df7c5 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2703,7 +2703,7 @@ bool kvm_cpu_check_are_resettable(void) + + static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + { +- if (!cpu->vcpu_dirty) { ++ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { + int ret = kvm_arch_get_registers(cpu); + if (ret) { + error_report("Failed to get registers: %s", strerror(-ret)); +@@ -2717,7 +2717,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + + void kvm_cpu_synchronize_state(CPUState *cpu) + { +- if (!cpu->vcpu_dirty) { ++ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { + run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL); + } + } +@@ -2752,7 +2752,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) + + void kvm_cpu_synchronize_post_init(CPUState *cpu) + { +- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); ++ if (!kvm_state->guest_state_protected) { ++ /* ++ * This runs before the machine_init_done notifiers, and is the last ++ * opportunity to synchronize the state of confidential guests. ++ */ ++ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); ++ } + } + + static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) +@@ -4099,3 +4105,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) + query_stats_schema_vcpu(first_cpu, &stats_args); + } + } ++ ++void kvm_mark_guest_state_protected(void) ++{ ++ kvm_state->guest_state_protected = true; ++} +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index fad9a7e8ff..302e8f6f1e 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -539,6 +539,8 @@ bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); + ++void kvm_mark_guest_state_protected(void); ++ + /** + * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page + * reported for the VM. +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 882e37e12c..3496be7997 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -87,6 +87,7 @@ struct KVMState + bool kernel_irqchip_required; + OnOffAuto kernel_irqchip_split; + bool sync_mmu; ++ bool guest_state_protected; + uint64_t manual_dirty_log_protect; + /* The man page (and posix) say ioctl numbers are signed int, but + * they're not. Linux, glibc and *BSD all treat ioctl numbers as +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b8f79d34d1..c49a8fd55e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -755,6 +755,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + if (ret) { + exit(1); + } ++ kvm_mark_guest_state_protected(); + } + + /* query the measurement blob length */ +-- +2.39.3 + diff --git a/SOURCES/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch b/SOURCES/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch new file mode 100644 index 0000000..8e47872 --- /dev/null +++ b/SOURCES/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch @@ -0,0 +1,329 @@ +From f4b01d645926faab2cab86fadb7398c26d6b8285 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:02 -0500 +Subject: [PATCH 028/100] RAMBlock: Add support of KVM private guest memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [28/91] 95fdf196afcb67113834c20fa354ee1397411bfd (bonzini/rhel-qemu-kvm) + +Add KVM guest_memfd support to RAMBlock so both normal hva based memory +and kvm guest memfd based private memory can be associated in one RAMBlock. + +Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to +create private guest_memfd during RAMBlock setup. + +Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd +is more flexible and extensible than simply relying on the VM type because +in the future we may have the case that not all the memory of a VM need +guest memfd. As a benefit, it also avoid getting MachineState in memory +subsystem. + +Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of +confidential guests, such as TDX VM. How and when to set it for memory +backends will be implemented in the following patches. + +Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has +KVM guest_memfd allocated. + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Message-ID: <20240320083945.991426-7-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 15f7a80c49cb3637f62fa37fa4a17da913bd91ff) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++ + accel/stubs/kvm-stub.c | 5 +++++ + include/exec/memory.h | 20 +++++++++++++++++--- + include/exec/ram_addr.h | 2 +- + include/exec/ramblock.h | 1 + + include/sysemu/kvm.h | 2 ++ + system/memory.c | 5 +++++ + system/physmem.c | 34 +++++++++++++++++++++++++++++++--- + 8 files changed, 90 insertions(+), 7 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 272e945f52..a7b9a127dd 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -92,6 +92,7 @@ static bool kvm_has_guest_debug; + static int kvm_sstep_flags; + static bool kvm_immediate_exit; + static uint64_t kvm_supported_memory_attributes; ++static bool kvm_guest_memfd_supported; + static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { +@@ -2419,6 +2420,11 @@ static int kvm_init(MachineState *ms) + } + + kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); ++ kvm_guest_memfd_supported = ++ kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && ++ kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && ++ (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); ++ + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); + s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); + +@@ -4138,3 +4144,25 @@ void kvm_mark_guest_state_protected(void) + { + kvm_state->guest_state_protected = true; + } ++ ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) ++{ ++ int fd; ++ struct kvm_create_guest_memfd guest_memfd = { ++ .size = size, ++ .flags = flags, ++ }; ++ ++ if (!kvm_guest_memfd_supported) { ++ error_setg(errp, "KVM does not support guest_memfd"); ++ return -1; ++ } ++ ++ fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd); ++ if (fd < 0) { ++ error_setg_errno(errp, errno, "Error creating KVM guest_memfd"); ++ return -1; ++ } ++ ++ return fd; ++} +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index ca38172884..8e0eb22e61 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void) + { + return false; + } ++ ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) ++{ ++ return -ENOSYS; ++} +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 8626a355b3..679a847685 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent { + /* RAM FD is opened read-only */ + #define RAM_READONLY_FD (1 << 11) + ++/* RAM can be private that has kvm guest memfd backend */ ++#define RAM_GUEST_MEMFD (1 << 12) ++ + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, + IOMMUNotifierFlag flags, + hwaddr start, hwaddr end, +@@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr, + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. +- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE. ++ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE, ++ * RAM_GUEST_MEMFD. + * @errp: pointer to Error*, to store an error if it happens. + * + * Note that this function does not do anything to cause the data in the +@@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr, + * (getpagesize()) will be used. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @path: the path in which to allocate the RAM. + * @offset: offset within the file referenced by path + * @errp: pointer to Error*, to store an error if it happens. +@@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr, + * @size: size of the region. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @fd: the fd to mmap. + * @offset: offset within the file referenced by fd + * @errp: pointer to Error*, to store an error if it happens. +@@ -1722,6 +1726,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr) + */ + bool memory_region_is_protected(MemoryRegion *mr); + ++/** ++ * memory_region_has_guest_memfd: check whether a memory region has guest_memfd ++ * associated ++ * ++ * Returns %true if a memory region's ram_block has valid guest_memfd assigned. ++ * ++ * @mr: the memory region being queried ++ */ ++bool memory_region_has_guest_memfd(MemoryRegion *mr); ++ + /** + * memory_region_get_iommu: check whether a memory region is an iommu + * +diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h +index de45ba7bc9..07c8f86375 100644 +--- a/include/exec/ram_addr.h ++++ b/include/exec/ram_addr.h +@@ -110,7 +110,7 @@ long qemu_maxrampagesize(void); + * @mr: the memory region where the ram block is + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @mem_path or @fd: specify the backing file or device + * @offset: Offset into target file + * @errp: pointer to Error*, to store an error if it happens +diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h +index 848915ea5b..459c8917de 100644 +--- a/include/exec/ramblock.h ++++ b/include/exec/ramblock.h +@@ -41,6 +41,7 @@ struct RAMBlock { + QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; + int fd; + uint64_t fd_offset; ++ int guest_memfd; + size_t page_size; + /* dirty bitmap used during migration */ + unsigned long *bmap; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index f114ff6986..9e4ab7ae89 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -537,6 +537,8 @@ void kvm_mark_guest_state_protected(void); + */ + bool kvm_hwpoisoned_mem(void); + ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); ++ + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); + int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + +diff --git a/system/memory.c b/system/memory.c +index a229a79988..c756950c0c 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr) + return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); + } + ++bool memory_region_has_guest_memfd(MemoryRegion *mr) ++{ ++ return mr->ram_block && mr->ram_block->guest_memfd >= 0; ++} ++ + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) + { + uint8_t mask = mr->dirty_log_mask; +diff --git a/system/physmem.c b/system/physmem.c +index a4fe3d2bf8..f5dfa20e57 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1808,6 +1808,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + const bool shared = qemu_ram_is_shared(new_block); + RAMBlock *block; + RAMBlock *last_block = NULL; ++ bool free_on_error = false; + ram_addr_t old_ram_size, new_ram_size; + Error *err = NULL; + +@@ -1837,6 +1838,19 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + return; + } + memory_try_enable_merging(new_block->host, new_block->max_length); ++ free_on_error = true; ++ } ++ } ++ ++ if (new_block->flags & RAM_GUEST_MEMFD) { ++ assert(kvm_enabled()); ++ assert(new_block->guest_memfd < 0); ++ ++ new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, ++ 0, errp); ++ if (new_block->guest_memfd < 0) { ++ qemu_mutex_unlock_ramlist(); ++ goto out_free; + } + } + +@@ -1888,6 +1902,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + ram_block_notify_add(new_block->host, new_block->used_length, + new_block->max_length); + } ++ return; ++ ++out_free: ++ if (free_on_error) { ++ qemu_anon_ram_free(new_block->host, new_block->max_length); ++ new_block->host = NULL; ++ } + } + + #ifdef CONFIG_POSIX +@@ -1902,7 +1923,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + /* Just support these ram flags by now. */ + assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | + RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | +- RAM_READONLY_FD)) == 0); ++ RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); + + if (xen_enabled()) { + error_setg(errp, "-mem-path not supported with Xen"); +@@ -1939,6 +1960,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + new_block->used_length = size; + new_block->max_length = size; + new_block->flags = ram_flags; ++ new_block->guest_memfd = -1; + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, + errp); + if (!new_block->host) { +@@ -2018,7 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, + int align; + + assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | +- RAM_NORESERVE)) == 0); ++ RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); + assert(!host ^ (ram_flags & RAM_PREALLOC)); + + align = qemu_real_host_page_size(); +@@ -2033,6 +2055,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, + new_block->max_length = max_size; + assert(max_size >= size); + new_block->fd = -1; ++ new_block->guest_memfd = -1; + new_block->page_size = qemu_real_host_page_size(); + new_block->host = host; + new_block->flags = ram_flags; +@@ -2055,7 +2078,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, + RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, + MemoryRegion *mr, Error **errp) + { +- assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); ++ assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); + return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); + } + +@@ -2083,6 +2106,11 @@ static void reclaim_ramblock(RAMBlock *block) + } else { + qemu_anon_ram_free(block->host, block->max_length); + } ++ ++ if (block->guest_memfd >= 0) { ++ close(block->guest_memfd); ++ } ++ + g_free(block); + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch b/SOURCES/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch new file mode 100644 index 0000000..04a5fbf --- /dev/null +++ b/SOURCES/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch @@ -0,0 +1,82 @@ +From bd289293604d6f33e9fb89196f0b19117ce81f89 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 20 Mar 2024 17:45:29 +0100 +Subject: [PATCH 032/100] RAMBlock: make guest_memfd require uncoordinated + discard + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [32/91] 0c005849026c334737b88cbd20a0ac237dfca37e (bonzini/rhel-qemu-kvm) + +Some subsystems like VFIO might disable ram block discard, but guest_memfd +uses discard operations to implement conversions between private and +shared memory. Because of this, sequences like the following can result +in stale IOMMU mappings: + +1. allocate shared page +2. convert page shared->private +3. discard shared page +4. convert page private->shared +5. allocate shared page +6. issue DMA operations against that shared page + +This is not a use-after-free, because after step 3 VFIO is still pinning +the page. However, DMA operations in step 6 will hit the old mapping +that was allocated in step 1. + +Address this by taking ram_block_discard_is_enabled() into account when +deciding whether or not to discard pages. + +Since kvm_convert_memory()/guest_memfd doesn't implement a +RamDiscardManager handler to convey and replay discard operations, +this is a case of uncoordinated discard, which is blocked/released +by ram_block_discard_require(). Interestingly, this function had +no use so far. + +Alternative approaches would be to block discard of shared pages, but +this would cause guests to consume twice the memory if they use VFIO; +or to implement a RamDiscardManager and only block uncoordinated +discard, i.e. use ram_block_coordinated_discard_require(). + +[Commit message mostly by Michael Roth ] + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 852f0048f3ea9f14de18eb279a99fccb6d250e8f) +Signed-off-by: Paolo Bonzini +--- + system/physmem.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/system/physmem.c b/system/physmem.c +index f5dfa20e57..5ebcf5be11 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1846,6 +1846,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + assert(kvm_enabled()); + assert(new_block->guest_memfd < 0); + ++ if (ram_block_discard_require(true) < 0) { ++ error_setg_errno(errp, errno, ++ "cannot set up private guest memory: discard currently blocked"); ++ error_append_hint(errp, "Are you using assigned devices?\n"); ++ goto out_free; ++ } ++ + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, + 0, errp); + if (new_block->guest_memfd < 0) { +@@ -2109,6 +2116,7 @@ static void reclaim_ramblock(RAMBlock *block) + + if (block->guest_memfd >= 0) { + close(block->guest_memfd); ++ ram_block_discard_require(false); + } + + g_free(block); +-- +2.39.3 + diff --git a/SOURCES/kvm-Revert-monitor-use-aio_co_reschedule_self.patch b/SOURCES/kvm-Revert-monitor-use-aio_co_reschedule_self.patch new file mode 100644 index 0000000..32e792e --- /dev/null +++ b/SOURCES/kvm-Revert-monitor-use-aio_co_reschedule_self.patch @@ -0,0 +1,67 @@ +From d4e6f7105b00ba2536d5d733b7c03116f28ce116 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 6 May 2024 15:06:21 -0400 +Subject: [PATCH 2/5] Revert "monitor: use aio_co_reschedule_self()" + +RH-Author: Kevin Wolf +RH-MergeRequest: 248: Revert "monitor: use aio_co_reschedule_self()" +RH-Jira: RHEL-34618 RHEL-38697 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] b6a2ebd4a69dbcd2bd56c61e7c747f8f8f42337e (kmwolf/centos-qemu-kvm) + +Commit 1f25c172f837 ("monitor: use aio_co_reschedule_self()") was a code +cleanup that uses aio_co_reschedule_self() instead of open coding +coroutine rescheduling. + +Bug RHEL-34618 was reported and Kevin Wolf identified +the root cause. I missed that aio_co_reschedule_self() -> +qemu_get_current_aio_context() only knows about +qemu_aio_context/IOThread AioContexts and not about iohandler_ctx. It +does not function correctly when going back from the iohandler_ctx to +qemu_aio_context. + +Go back to open coding the AioContext transitions to avoid this bug. + +This reverts commit 1f25c172f83704e350c0829438d832384084a74d. + +Cc: qemu-stable@nongnu.org +Buglink: https://issues.redhat.com/browse/RHEL-34618 +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240506190622.56095-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 719c6819ed9a9838520fa732f9861918dc693bda) +Signed-off-by: Kevin Wolf +--- + qapi/qmp-dispatch.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index f3488afeef..176b549473 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -212,7 +212,8 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + * executing the command handler so that it can make progress if it + * involves an AIO_WAIT_WHILE(). + */ +- aio_co_reschedule_self(qemu_get_aio_context()); ++ aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); ++ qemu_coroutine_yield(); + } + + monitor_set_cur(qemu_coroutine_self(), cur_mon); +@@ -226,7 +227,9 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + * Move back to iohandler_ctx so that nested event loops for + * qemu_aio_context don't start new monitor commands. + */ +- aio_co_reschedule_self(iohandler_get_aio_context()); ++ aio_co_schedule(iohandler_get_aio_context(), ++ qemu_coroutine_self()); ++ qemu_coroutine_yield(); + } + } else { + /* +-- +2.39.3 + diff --git a/SOURCES/kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch b/SOURCES/kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch new file mode 100644 index 0000000..96756df --- /dev/null +++ b/SOURCES/kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch @@ -0,0 +1,38 @@ +From bcbc897cb19b3a6523de611f48f6bac6cea16c97 Mon Sep 17 00:00:00 2001 +From: Sebastian Ott +Date: Thu, 2 May 2024 13:17:03 +0200 +Subject: [PATCH 2/2] Revert "x86: rhel 9.4.0 machine type compat fix" + +RH-Author: Sebastian Ott +RH-MergeRequest: 237: Revert "x86: rhel 9.4.0 machine type compat fix" +RH-Jira: RHEL-30362 +RH-Acked-by: Ani Sinha +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 858ec153e65e96c39ca4db17ed93fd58c77dc2eb (seott1/cos-qemu-kvm) + +This reverts commit c46e44f0f4e861fe412ce679b0b0204881c1c2f5. + +pc-q35-rhel9.4.0 and newer should stay with SMBIOS_ENTRY_POINT_TYPE_AUTO. + +Signed-off-by: Sebastian Ott +--- + hw/i386/pc_q35.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 2f11f9af7d..2b54944c0f 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -734,9 +734,6 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; + +- /* From pc_q35_8_2_machine_options() - use SMBIOS 3.X by default */ +- pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; +- + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch b/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch deleted file mode 100644 index f30b81f..0000000 --- a/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 6b5cfed21e20b372090046a934387255ff4bda58 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:01 -0500 -Subject: [PATCH 084/101] aio: make aio_context_acquire()/aio_context_release() - a no-op - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [15/26] 723dcada900aaf08862e8221921be22506b561a8 (kmwolf/centos-qemu-kvm) - -aio_context_acquire()/aio_context_release() has been replaced by -fine-grained locking to protect state shared by multiple threads. The -AioContext lock still plays the role of balancing locking in -AIO_WAIT_WHILE() and many functions in QEMU either require that the -AioContext lock is held or not held for this reason. In other words, the -AioContext lock is purely there for consistency with itself and serves -no real purpose anymore. - -Stop actually acquiring/releasing the lock in -aio_context_acquire()/aio_context_release() so that subsequent patches -can remove callers across the codebase incrementally. - -I have performed "make check" and qemu-iotests stress tests across -x86-64, ppc64le, and aarch64 to confirm that there are no failures as a -result of eliminating the lock. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Acked-by: Kevin Wolf -Message-ID: <20231205182011.1976568-5-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - util/async.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/util/async.c b/util/async.c -index 8f90ddc304..04ee83d220 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -725,12 +725,12 @@ void aio_context_unref(AioContext *ctx) - - void aio_context_acquire(AioContext *ctx) - { -- qemu_rec_mutex_lock(&ctx->lock); -+ /* TODO remove this function */ - } - - void aio_context_release(AioContext *ctx) - { -- qemu_rec_mutex_unlock(&ctx->lock); -+ /* TODO remove this function */ - } - - QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext) --- -2.39.3 - diff --git a/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch b/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch deleted file mode 100644 index a64e246..0000000 --- a/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 14913d8970090c8914dc19dad14f3b9f91985ec3 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:07 -0500 -Subject: [PATCH 090/101] aio: remove - aio_context_acquire()/aio_context_release() API - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [21/26] 4b6d4afcac79d3248a6722b063b5fc777dc418df (kmwolf/centos-qemu-kvm) - -Delete these functions because nothing calls these functions anymore. - -I introduced these APIs in commit 98563fc3ec44 ("aio: add -aio_context_acquire() and aio_context_release()") in 2014. It's with a -sigh of relief that I delete these APIs almost 10 years later. - -Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an -understanding of where the code needed to go in order to remove the -limitations that the original dataplane and the IOThread/AioContext -approach that followed it. - -Emanuele Giuseppe Esposito had the splendid determination to convert -large parts of the codebase so that they no longer needed the AioContext -lock. This was a painstaking process, both in the actual code changes -required and the iterations of code review that Emanuele eked out of -Kevin and me over many months. - -Kevin Wolf tackled multitudes of graph locking conversions to protect -in-flight I/O from run-time changes to the block graph as well as the -clang Thread Safety Analysis annotations that allow the compiler to -check whether the graph lock is being used correctly. - -And me, well, I'm just here to add some pizzazz to the QEMU multi-queue -block layer :). Thank you to everyone who helped with this effort, -including Eric Blake, code reviewer extraordinaire, and others who I've -forgotten to mention. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-11-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - include/block/aio.h | 17 ----------------- - util/async.c | 10 ---------- - 2 files changed, 27 deletions(-) - -diff --git a/include/block/aio.h b/include/block/aio.h -index f08b358077..af05512a7d 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -278,23 +278,6 @@ void aio_context_ref(AioContext *ctx); - */ - void aio_context_unref(AioContext *ctx); - --/* Take ownership of the AioContext. If the AioContext will be shared between -- * threads, and a thread does not want to be interrupted, it will have to -- * take ownership around calls to aio_poll(). Otherwise, aio_poll() -- * automatically takes care of calling aio_context_acquire and -- * aio_context_release. -- * -- * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A -- * thread still has to call those to avoid being interrupted by the guest. -- * -- * Bottom halves, timers and callbacks can be created or removed without -- * acquiring the AioContext. -- */ --void aio_context_acquire(AioContext *ctx); -- --/* Relinquish ownership of the AioContext. */ --void aio_context_release(AioContext *ctx); -- - /** - * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will - * run only once and as soon as possible. -diff --git a/util/async.c b/util/async.c -index dfd44ef612..460529057c 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -719,16 +719,6 @@ void aio_context_unref(AioContext *ctx) - g_source_unref(&ctx->source); - } - --void aio_context_acquire(AioContext *ctx) --{ -- /* TODO remove this function */ --} -- --void aio_context_release(AioContext *ctx) --{ -- /* TODO remove this function */ --} -- - QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext) - - AioContext *qemu_get_current_aio_context(void) --- -2.39.3 - diff --git a/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch b/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch deleted file mode 100644 index 7f95b67..0000000 --- a/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch +++ /dev/null @@ -1,81 +0,0 @@ -From e1e2f3972065c4b5d6fcf37e0e1c4fb92a0d5260 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:06 -0500 -Subject: [PATCH 089/101] aio-wait: draw equivalence between AIO_WAIT_WHILE() - and AIO_WAIT_WHILE_UNLOCKED() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [20/26] 20e49777869714c99769263103f1b0c2c370cfcd (kmwolf/centos-qemu-kvm) - -Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and -AIO_WAIT_WHILE_UNLOCKED() are equivalent. - -A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED(). - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-10-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - include/block/aio-wait.h | 16 ++++------------ - 1 file changed, 4 insertions(+), 12 deletions(-) - -diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h -index 5449b6d742..157f105916 100644 ---- a/include/block/aio-wait.h -+++ b/include/block/aio-wait.h -@@ -63,9 +63,6 @@ extern AioWait global_aio_wait; - * @ctx: the aio context, or NULL if multiple aio contexts (for which the - * caller does not hold a lock) are involved in the polling condition. - * @cond: wait while this conditional expression is true -- * @unlock: whether to unlock and then lock again @ctx. This applies -- * only when waiting for another AioContext from the main loop. -- * Otherwise it's ignored. - * - * Wait while a condition is true. Use this to implement synchronous - * operations that require event loop activity. -@@ -78,7 +75,7 @@ extern AioWait global_aio_wait; - * wait on conditions between two IOThreads since that could lead to deadlock, - * go via the main loop instead. - */ --#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({ \ -+#define AIO_WAIT_WHILE_INTERNAL(ctx, cond) ({ \ - bool waited_ = false; \ - AioWait *wait_ = &global_aio_wait; \ - AioContext *ctx_ = (ctx); \ -@@ -95,13 +92,7 @@ extern AioWait global_aio_wait; - assert(qemu_get_current_aio_context() == \ - qemu_get_aio_context()); \ - while ((cond)) { \ -- if (unlock && ctx_) { \ -- aio_context_release(ctx_); \ -- } \ - aio_poll(qemu_get_aio_context(), true); \ -- if (unlock && ctx_) { \ -- aio_context_acquire(ctx_); \ -- } \ - waited_ = true; \ - } \ - } \ -@@ -109,10 +100,11 @@ extern AioWait global_aio_wait; - waited_; }) - - #define AIO_WAIT_WHILE(ctx, cond) \ -- AIO_WAIT_WHILE_INTERNAL(ctx, cond, true) -+ AIO_WAIT_WHILE_INTERNAL(ctx, cond) - -+/* TODO replace this with AIO_WAIT_WHILE() in a future patch */ - #define AIO_WAIT_WHILE_UNLOCKED(ctx, cond) \ -- AIO_WAIT_WHILE_INTERNAL(ctx, cond, false) -+ AIO_WAIT_WHILE_INTERNAL(ctx, cond) - - /** - * aio_wait_kick: --- -2.39.3 - diff --git a/SOURCES/kvm-aio-warn-about-iohandler_ctx-special-casing.patch b/SOURCES/kvm-aio-warn-about-iohandler_ctx-special-casing.patch new file mode 100644 index 0000000..a0d9d31 --- /dev/null +++ b/SOURCES/kvm-aio-warn-about-iohandler_ctx-special-casing.patch @@ -0,0 +1,64 @@ +From 0e3934e89ad1dda21681f64ff38da69b07d1b531 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 6 May 2024 15:06:22 -0400 +Subject: [PATCH 3/5] aio: warn about iohandler_ctx special casing + +RH-Author: Kevin Wolf +RH-MergeRequest: 248: Revert "monitor: use aio_co_reschedule_self()" +RH-Jira: RHEL-34618 RHEL-38697 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] cc316d70b2c187ee0412d6560ca1a03e381a69c1 (kmwolf/centos-qemu-kvm) + +The main loop has two AioContexts: qemu_aio_context and iohandler_ctx. +The main loop runs them both, but nested aio_poll() calls on +qemu_aio_context exclude iohandler_ctx. + +Which one should qemu_get_current_aio_context() return when called from +the main loop? Document that it's always qemu_aio_context. + +This has subtle effects on functions that use +qemu_get_current_aio_context(). For example, aio_co_reschedule_self() +does not work when moving from iohandler_ctx to qemu_aio_context because +qemu_get_current_aio_context() does not differentiate these two +AioContexts. + +Document this in order to reduce the chance of future bugs. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240506190622.56095-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit e669e800fc9ef8806af5c5578249ab758a4f8a5a) +Signed-off-by: Kevin Wolf +--- + include/block/aio.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/include/block/aio.h b/include/block/aio.h +index 8378553eb9..4ee81936ed 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -629,6 +629,9 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co); + * + * Move the currently running coroutine to new_ctx. If the coroutine is already + * running in new_ctx, do nothing. ++ * ++ * Note that this function cannot reschedule from iohandler_ctx to ++ * qemu_aio_context. + */ + void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx); + +@@ -661,6 +664,9 @@ void aio_co_enter(AioContext *ctx, Coroutine *co); + * If called from an IOThread this will be the IOThread's AioContext. If + * called from the main thread or with the "big QEMU lock" taken it + * will be the main loop AioContext. ++ * ++ * Note that the return value is never the main loop's iohandler_ctx and the ++ * return value is the main loop AioContext instead. + */ + AioContext *qemu_get_current_aio_context(void); + +-- +2.39.3 + diff --git a/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch b/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch deleted file mode 100644 index 898e35b..0000000 --- a/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch +++ /dev/null @@ -1,476 +0,0 @@ -From 0d8255c98b3ef6f603ff0279592d3e91de26de0e Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 21 Nov 2023 16:44:00 +0800 -Subject: [PATCH 021/101] backends/iommufd: Introduce the iommufd object -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [20/67] 8a56344ab4a2126f248bfa492ccddd19265f39be (eauger1/centos-qemu-kvm) - -Introduce an iommufd object which allows the interaction -with the host /dev/iommu device. - -The /dev/iommu can have been already pre-opened outside of qemu, -in which case the fd can be passed directly along with the -iommufd object: - -This allows the iommufd object to be shared accross several -subsystems (VFIO, VDPA, ...). For example, libvirt would open -the /dev/iommu once. - -If no fd is passed along with the iommufd object, the /dev/iommu -is opened by the qemu code. - -Suggested-by: Alex Williamson -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be) -Signed-off-by: Eric Auger ---- - MAINTAINERS | 8 ++ - backends/Kconfig | 4 + - backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++ - backends/meson.build | 1 + - backends/trace-events | 10 ++ - include/sysemu/iommufd.h | 38 ++++++ - qapi/qom.json | 19 +++ - qemu-options.hx | 12 ++ - 8 files changed, 337 insertions(+) - create mode 100644 backends/iommufd.c - create mode 100644 include/sysemu/iommufd.h - -diff --git a/MAINTAINERS b/MAINTAINERS -index 695e0bd34f..a5a446914a 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c - F: docs/system/s390x/vfio-ap.rst - L: qemu-s390x@nongnu.org - -+iommufd -+M: Yi Liu -+M: Eric Auger -+M: Zhenzhong Duan -+S: Supported -+F: backends/iommufd.c -+F: include/sysemu/iommufd.h -+ - vhost - M: Michael S. Tsirkin - S: Supported -diff --git a/backends/Kconfig b/backends/Kconfig -index f35abc1609..2cb23f62fa 100644 ---- a/backends/Kconfig -+++ b/backends/Kconfig -@@ -1 +1,5 @@ - source tpm/Kconfig -+ -+config IOMMUFD -+ bool -+ depends on VFIO -diff --git a/backends/iommufd.c b/backends/iommufd.c -new file mode 100644 -index 0000000000..ba58a0eb0d ---- /dev/null -+++ b/backends/iommufd.c -@@ -0,0 +1,245 @@ -+/* -+ * iommufd container backend -+ * -+ * Copyright (C) 2023 Intel Corporation. -+ * Copyright Red Hat, Inc. 2023 -+ * -+ * Authors: Yi Liu -+ * Eric Auger -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include "sysemu/iommufd.h" -+#include "qapi/error.h" -+#include "qapi/qmp/qerror.h" -+#include "qemu/module.h" -+#include "qom/object_interfaces.h" -+#include "qemu/error-report.h" -+#include "monitor/monitor.h" -+#include "trace.h" -+#include -+#include -+ -+static void iommufd_backend_init(Object *obj) -+{ -+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); -+ -+ be->fd = -1; -+ be->users = 0; -+ be->owned = true; -+ qemu_mutex_init(&be->lock); -+} -+ -+static void iommufd_backend_finalize(Object *obj) -+{ -+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); -+ -+ if (be->owned) { -+ close(be->fd); -+ be->fd = -1; -+ } -+} -+ -+static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) -+{ -+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); -+ int fd = -1; -+ -+ fd = monitor_fd_param(monitor_cur(), str, errp); -+ if (fd == -1) { -+ error_prepend(errp, "Could not parse remote object fd %s:", str); -+ return; -+ } -+ qemu_mutex_lock(&be->lock); -+ be->fd = fd; -+ be->owned = false; -+ qemu_mutex_unlock(&be->lock); -+ trace_iommu_backend_set_fd(be->fd); -+} -+ -+static bool iommufd_backend_can_be_deleted(UserCreatable *uc) -+{ -+ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); -+ -+ return !be->users; -+} -+ -+static void iommufd_backend_class_init(ObjectClass *oc, void *data) -+{ -+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); -+ -+ ucc->can_be_deleted = iommufd_backend_can_be_deleted; -+ -+ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); -+} -+ -+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) -+{ -+ int fd, ret = 0; -+ -+ qemu_mutex_lock(&be->lock); -+ if (be->users == UINT32_MAX) { -+ error_setg(errp, "too many connections"); -+ ret = -E2BIG; -+ goto out; -+ } -+ if (be->owned && !be->users) { -+ fd = qemu_open_old("/dev/iommu", O_RDWR); -+ if (fd < 0) { -+ error_setg_errno(errp, errno, "/dev/iommu opening failed"); -+ ret = fd; -+ goto out; -+ } -+ be->fd = fd; -+ } -+ be->users++; -+out: -+ trace_iommufd_backend_connect(be->fd, be->owned, -+ be->users, ret); -+ qemu_mutex_unlock(&be->lock); -+ return ret; -+} -+ -+void iommufd_backend_disconnect(IOMMUFDBackend *be) -+{ -+ qemu_mutex_lock(&be->lock); -+ if (!be->users) { -+ goto out; -+ } -+ be->users--; -+ if (!be->users && be->owned) { -+ close(be->fd); -+ be->fd = -1; -+ } -+out: -+ trace_iommufd_backend_disconnect(be->fd, be->users); -+ qemu_mutex_unlock(&be->lock); -+} -+ -+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, -+ Error **errp) -+{ -+ int ret, fd = be->fd; -+ struct iommu_ioas_alloc alloc_data = { -+ .size = sizeof(alloc_data), -+ .flags = 0, -+ }; -+ -+ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); -+ if (ret) { -+ error_setg_errno(errp, errno, "Failed to allocate ioas"); -+ return ret; -+ } -+ -+ *ioas_id = alloc_data.out_ioas_id; -+ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); -+ -+ return ret; -+} -+ -+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) -+{ -+ int ret, fd = be->fd; -+ struct iommu_destroy des = { -+ .size = sizeof(des), -+ .id = id, -+ }; -+ -+ ret = ioctl(fd, IOMMU_DESTROY, &des); -+ trace_iommufd_backend_free_id(fd, id, ret); -+ if (ret) { -+ error_report("Failed to free id: %u %m", id); -+ } -+} -+ -+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, -+ ram_addr_t size, void *vaddr, bool readonly) -+{ -+ int ret, fd = be->fd; -+ struct iommu_ioas_map map = { -+ .size = sizeof(map), -+ .flags = IOMMU_IOAS_MAP_READABLE | -+ IOMMU_IOAS_MAP_FIXED_IOVA, -+ .ioas_id = ioas_id, -+ .__reserved = 0, -+ .user_va = (uintptr_t)vaddr, -+ .iova = iova, -+ .length = size, -+ }; -+ -+ if (!readonly) { -+ map.flags |= IOMMU_IOAS_MAP_WRITEABLE; -+ } -+ -+ ret = ioctl(fd, IOMMU_IOAS_MAP, &map); -+ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size, -+ vaddr, readonly, ret); -+ if (ret) { -+ ret = -errno; -+ -+ /* TODO: Not support mapping hardware PCI BAR region for now. */ -+ if (errno == EFAULT) { -+ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?"); -+ } else { -+ error_report("IOMMU_IOAS_MAP failed: %m"); -+ } -+ } -+ return ret; -+} -+ -+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, -+ hwaddr iova, ram_addr_t size) -+{ -+ int ret, fd = be->fd; -+ struct iommu_ioas_unmap unmap = { -+ .size = sizeof(unmap), -+ .ioas_id = ioas_id, -+ .iova = iova, -+ .length = size, -+ }; -+ -+ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); -+ /* -+ * IOMMUFD takes mapping as some kind of object, unmapping -+ * nonexistent mapping is treated as deleting a nonexistent -+ * object and return ENOENT. This is different from legacy -+ * backend which allows it. vIOMMU may trigger a lot of -+ * redundant unmapping, to avoid flush the log, treat them -+ * as succeess for IOMMUFD just like legacy backend. -+ */ -+ if (ret && errno == ENOENT) { -+ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret); -+ ret = 0; -+ } else { -+ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret); -+ } -+ -+ if (ret) { -+ ret = -errno; -+ error_report("IOMMU_IOAS_UNMAP failed: %m"); -+ } -+ return ret; -+} -+ -+static const TypeInfo iommufd_backend_info = { -+ .name = TYPE_IOMMUFD_BACKEND, -+ .parent = TYPE_OBJECT, -+ .instance_size = sizeof(IOMMUFDBackend), -+ .instance_init = iommufd_backend_init, -+ .instance_finalize = iommufd_backend_finalize, -+ .class_size = sizeof(IOMMUFDBackendClass), -+ .class_init = iommufd_backend_class_init, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_USER_CREATABLE }, -+ { } -+ } -+}; -+ -+static void register_types(void) -+{ -+ type_register_static(&iommufd_backend_info); -+} -+ -+type_init(register_types); -diff --git a/backends/meson.build b/backends/meson.build -index 914c7c4afb..9a5cea480d 100644 ---- a/backends/meson.build -+++ b/backends/meson.build -@@ -20,6 +20,7 @@ if have_vhost_user - system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) - endif - system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) -+system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c')) - if have_vhost_user_crypto - system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) - endif -diff --git a/backends/trace-events b/backends/trace-events -index 652eb76a57..d45c6e31a6 100644 ---- a/backends/trace-events -+++ b/backends/trace-events -@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void) - dbus_vmstate_post_load(int version_id) "version_id: %d" - dbus_vmstate_loading(const char *id) "id: %s" - dbus_vmstate_saving(const char *id) "id: %s" -+ -+# iommufd.c -+iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" -+iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" -+iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" -+iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" -+iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" -+iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" -+iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" -+iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" -diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h -new file mode 100644 -index 0000000000..9c5524b0ed ---- /dev/null -+++ b/include/sysemu/iommufd.h -@@ -0,0 +1,38 @@ -+#ifndef SYSEMU_IOMMUFD_H -+#define SYSEMU_IOMMUFD_H -+ -+#include "qom/object.h" -+#include "qemu/thread.h" -+#include "exec/hwaddr.h" -+#include "exec/cpu-common.h" -+ -+#define TYPE_IOMMUFD_BACKEND "iommufd" -+OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND) -+ -+struct IOMMUFDBackendClass { -+ ObjectClass parent_class; -+}; -+ -+struct IOMMUFDBackend { -+ Object parent; -+ -+ /*< protected >*/ -+ int fd; /* /dev/iommu file descriptor */ -+ bool owned; /* is the /dev/iommu opened internally */ -+ QemuMutex lock; -+ uint32_t users; -+ -+ /*< public >*/ -+}; -+ -+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); -+void iommufd_backend_disconnect(IOMMUFDBackend *be); -+ -+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, -+ Error **errp); -+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); -+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, -+ ram_addr_t size, void *vaddr, bool readonly); -+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, -+ hwaddr iova, ram_addr_t size); -+#endif -diff --git a/qapi/qom.json b/qapi/qom.json -index c53ef978ff..95516ba325 100644 ---- a/qapi/qom.json -+++ b/qapi/qom.json -@@ -794,6 +794,23 @@ - { 'struct': 'VfioUserServerProperties', - 'data': { 'socket': 'SocketAddress', 'device': 'str' } } - -+## -+# @IOMMUFDProperties: -+# -+# Properties for iommufd objects. -+# -+# @fd: file descriptor name previously passed via 'getfd' command, -+# which represents a pre-opened /dev/iommu. This allows the -+# iommufd object to be shared accross several subsystems -+# (VFIO, VDPA, ...), and the file descriptor to be shared -+# with other process, e.g. DPDK. (default: QEMU opens -+# /dev/iommu by itself) -+# -+# Since: 9.0 -+## -+{ 'struct': 'IOMMUFDProperties', -+ 'data': { '*fd': 'str' } } -+ - ## - # @RngProperties: - # -@@ -934,6 +951,7 @@ - 'input-barrier', - { 'name': 'input-linux', - 'if': 'CONFIG_LINUX' }, -+ 'iommufd', - 'iothread', - 'main-loop', - { 'name': 'memory-backend-epc', -@@ -1003,6 +1021,7 @@ - 'input-barrier': 'InputBarrierProperties', - 'input-linux': { 'type': 'InputLinuxProperties', - 'if': 'CONFIG_LINUX' }, -+ 'iommufd': 'IOMMUFDProperties', - 'iothread': 'IothreadProperties', - 'main-loop': 'MainLoopProperties', - 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', -diff --git a/qemu-options.hx b/qemu-options.hx -index 557118cb1f..0814f43066 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -5224,6 +5224,18 @@ SRST - - The ``share`` boolean option is on by default with memfd. - -+ ``-object iommufd,id=id[,fd=fd]`` -+ Creates an iommufd backend which allows control of DMA mapping -+ through the ``/dev/iommu`` device. -+ -+ The ``id`` parameter is a unique ID which frontends (such as -+ vfio-pci of vdpa) will use to connect with the iommufd backend. -+ -+ The ``fd`` parameter is an optional pre-opened file descriptor -+ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared -+ across all subsystems, bringing the benefit of centralized -+ reference counting. -+ - ``-object rng-builtin,id=id`` - Creates a random number generator backend which obtains entropy - from QEMU builtin functions. The ``id`` parameter is a unique ID --- -2.39.3 - diff --git a/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch b/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch deleted file mode 100644 index 5ee365b..0000000 --- a/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch +++ /dev/null @@ -1,47 +0,0 @@ -From da9a24793e876f6f2727d57f939d882be26a47b8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Fri, 22 Dec 2023 08:55:23 +0100 -Subject: [PATCH 064/101] backends/iommufd: Remove check on number of backend - users -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [63/67] ac4d4589d1f2de5ac3f0adfd8d1f27dbf6bbfdee (eauger1/centos-qemu-kvm) - -QOM already has a ref count on objects and it will assert much -earlier, when INT_MAX is reached. - -Reviewed-by: Eric Auger -Reviewed-by: Zhenzhong Duan -Signed-off-by: Cédric Le Goater -(cherry picked from commit c2ab3a6f7411c895e538e8350fee8948ac07c1a0) -Signed-off-by: Eric Auger ---- - backends/iommufd.c | 5 ----- - 1 file changed, 5 deletions(-) - -diff --git a/backends/iommufd.c b/backends/iommufd.c -index ba58a0eb0d..393c0d9a37 100644 ---- a/backends/iommufd.c -+++ b/backends/iommufd.c -@@ -80,11 +80,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) - int fd, ret = 0; - - qemu_mutex_lock(&be->lock); -- if (be->users == UINT32_MAX) { -- error_setg(errp, "too many connections"); -- ret = -E2BIG; -- goto out; -- } - if (be->owned && !be->users) { - fd = qemu_open_old("/dev/iommu", O_RDWR); - if (fd < 0) { --- -2.39.3 - diff --git a/SOURCES/kvm-backends-iommufd-Remove-mutex.patch b/SOURCES/kvm-backends-iommufd-Remove-mutex.patch deleted file mode 100644 index 83878d5..0000000 --- a/SOURCES/kvm-backends-iommufd-Remove-mutex.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 92aff3cc1a412de01e9563802fa48848eae5283f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Thu, 21 Dec 2023 16:58:41 +0100 -Subject: [PATCH 065/101] backends/iommufd: Remove mutex -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [64/67] 65518432b18f18ceadafe1b0698cdaa962e84f61 (eauger1/centos-qemu-kvm) - -Coverity reports a concurrent data access violation because be->users -is being accessed in iommufd_backend_can_be_deleted() without holding -the mutex. - -However, these routines are called from the QEMU main thread when a -device is created. In this case, the code paths should be protected by -the BQL lock and it should be safe to drop the IOMMUFD backend mutex. -Simply remove it. - -Fixes: CID 1531550 -Fixes: CID 1531549 -Reviewed-by: Eric Auger -Reviewed-by: Zhenzhong Duan -Signed-off-by: Cédric Le Goater -(cherry picked from commit 19368b1905b4b917e915526fcbd5bfa3f7439451) -Signed-off-by: Eric Auger ---- - backends/iommufd.c | 7 ------- - include/sysemu/iommufd.h | 2 -- - 2 files changed, 9 deletions(-) - -diff --git a/backends/iommufd.c b/backends/iommufd.c -index 393c0d9a37..1ef683c7b0 100644 ---- a/backends/iommufd.c -+++ b/backends/iommufd.c -@@ -29,7 +29,6 @@ static void iommufd_backend_init(Object *obj) - be->fd = -1; - be->users = 0; - be->owned = true; -- qemu_mutex_init(&be->lock); - } - - static void iommufd_backend_finalize(Object *obj) -@@ -52,10 +51,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) - error_prepend(errp, "Could not parse remote object fd %s:", str); - return; - } -- qemu_mutex_lock(&be->lock); - be->fd = fd; - be->owned = false; -- qemu_mutex_unlock(&be->lock); - trace_iommu_backend_set_fd(be->fd); - } - -@@ -79,7 +76,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) - { - int fd, ret = 0; - -- qemu_mutex_lock(&be->lock); - if (be->owned && !be->users) { - fd = qemu_open_old("/dev/iommu", O_RDWR); - if (fd < 0) { -@@ -93,13 +89,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) - out: - trace_iommufd_backend_connect(be->fd, be->owned, - be->users, ret); -- qemu_mutex_unlock(&be->lock); - return ret; - } - - void iommufd_backend_disconnect(IOMMUFDBackend *be) - { -- qemu_mutex_lock(&be->lock); - if (!be->users) { - goto out; - } -@@ -110,7 +104,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) - } - out: - trace_iommufd_backend_disconnect(be->fd, be->users); -- qemu_mutex_unlock(&be->lock); - } - - int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, -diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h -index 9c5524b0ed..9af27ebd6c 100644 ---- a/include/sysemu/iommufd.h -+++ b/include/sysemu/iommufd.h -@@ -2,7 +2,6 @@ - #define SYSEMU_IOMMUFD_H - - #include "qom/object.h" --#include "qemu/thread.h" - #include "exec/hwaddr.h" - #include "exec/cpu-common.h" - -@@ -19,7 +18,6 @@ struct IOMMUFDBackend { - /*< protected >*/ - int fd; /* /dev/iommu file descriptor */ - bool owned; /* is the /dev/iommu opened internally */ -- QemuMutex lock; - uint32_t users; - - /*< public >*/ --- -2.39.3 - diff --git a/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch b/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch new file mode 100644 index 0000000..8db6199 --- /dev/null +++ b/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch @@ -0,0 +1,252 @@ +From 2ee645a339e9ef9cd92620a8b784d18d512326be Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:56:02 +0200 +Subject: [PATCH 4/4] block: Parse filenames only when explicitly requested + +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake +RH-Commit: [4/4] f44c2941d4419e60f16dea3e9adca164e75aa78d + +When handling image filenames from legacy options such as -drive or from +tools, these filenames are parsed for protocol prefixes, including for +the json:{} pseudo-protocol. + +This behaviour is intended for filenames that come directly from the +command line and for backing files, which may come from the image file +itself. Higher level management tools generally take care to verify that +untrusted images don't contain a bad (or any) backing file reference; +'qemu-img info' is a suitable tool for this. + +However, for other files that can be referenced in images, such as +qcow2 data files or VMDK extents, the string from the image file is +usually not verified by management tools - and 'qemu-img info' wouldn't +be suitable because in contrast to backing files, it already opens these +other referenced files. So here the string should be interpreted as a +literal local filename. More complex configurations need to be specified +explicitly on the command line or in QMP. + +This patch changes bdrv_open_inherit() so that it only parses filenames +if a new parameter parse_filename is true. It is set for the top level +in bdrv_open(), for the file child and for the backing file child. All +other callers pass false and disable filename parsing this way. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek +--- + block.c | 90 ++++++++++++++++++++++++++++++++++++--------------------- + 1 file changed, 57 insertions(+), 33 deletions(-) + +diff --git a/block.c b/block.c +index 468cf5e67d..50bdd197b7 100644 +--- a/block.c ++++ b/block.c +@@ -86,6 +86,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, + BlockDriverState *parent, + const BdrvChildClass *child_class, + BdrvChildRole child_role, ++ bool parse_filename, + Error **errp); + + static bool bdrv_recurse_has_child(BlockDriverState *bs, +@@ -2058,7 +2059,8 @@ static void parse_json_protocol(QDict *options, const char **pfilename, + * block driver has been specified explicitly. + */ + static int bdrv_fill_options(QDict **options, const char *filename, +- int *flags, Error **errp) ++ int *flags, bool allow_parse_filename, ++ Error **errp) + { + const char *drvname; + bool protocol = *flags & BDRV_O_PROTOCOL; +@@ -2100,7 +2102,7 @@ static int bdrv_fill_options(QDict **options, const char *filename, + if (protocol && filename) { + if (!qdict_haskey(*options, "filename")) { + qdict_put_str(*options, "filename", filename); +- parse_filename = true; ++ parse_filename = allow_parse_filename; + } else { + error_setg(errp, "Can't specify 'file' and 'filename' options at " + "the same time"); +@@ -3663,7 +3665,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + } + + backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, +- &child_of_bds, bdrv_backing_role(bs), errp); ++ &child_of_bds, bdrv_backing_role(bs), true, ++ errp); + if (!backing_hd) { + bs->open_flags |= BDRV_O_NO_BACKING; + error_prepend(errp, "Could not open backing file: "); +@@ -3697,7 +3700,8 @@ free_exit: + static BlockDriverState * + bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, + BlockDriverState *parent, const BdrvChildClass *child_class, +- BdrvChildRole child_role, bool allow_none, Error **errp) ++ BdrvChildRole child_role, bool allow_none, ++ bool parse_filename, Error **errp) + { + BlockDriverState *bs = NULL; + QDict *image_options; +@@ -3728,7 +3732,8 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, + } + + bs = bdrv_open_inherit(filename, reference, image_options, 0, +- parent, child_class, child_role, errp); ++ parent, child_class, child_role, parse_filename, ++ errp); + if (!bs) { + goto done; + } +@@ -3738,6 +3743,33 @@ done: + return bs; + } + ++static BdrvChild *bdrv_open_child_common(const char *filename, ++ QDict *options, const char *bdref_key, ++ BlockDriverState *parent, ++ const BdrvChildClass *child_class, ++ BdrvChildRole child_role, ++ bool allow_none, bool parse_filename, ++ Error **errp) ++{ ++ BlockDriverState *bs; ++ BdrvChild *child; ++ ++ GLOBAL_STATE_CODE(); ++ ++ bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, ++ child_role, allow_none, parse_filename, errp); ++ if (bs == NULL) { ++ return NULL; ++ } ++ ++ bdrv_graph_wrlock(); ++ child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, ++ errp); ++ bdrv_graph_wrunlock(); ++ ++ return child; ++} ++ + /* + * Opens a disk image whose options are given as BlockdevRef in another block + * device's options. +@@ -3761,27 +3793,15 @@ BdrvChild *bdrv_open_child(const char *filename, + BdrvChildRole child_role, + bool allow_none, Error **errp) + { +- BlockDriverState *bs; +- BdrvChild *child; +- +- GLOBAL_STATE_CODE(); +- +- bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, +- child_role, allow_none, errp); +- if (bs == NULL) { +- return NULL; +- } +- +- bdrv_graph_wrlock(); +- child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, +- errp); +- bdrv_graph_wrunlock(); +- +- return child; ++ return bdrv_open_child_common(filename, options, bdref_key, parent, ++ child_class, child_role, allow_none, false, ++ errp); + } + + /* +- * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. ++ * This does mostly the same as bdrv_open_child(), but for opening the primary ++ * child of a node. A notable difference from bdrv_open_child() is that it ++ * enables filename parsing for protocol names (including json:). + * + * @parent can move to a different AioContext in this function. + */ +@@ -3796,8 +3816,8 @@ int bdrv_open_file_child(const char *filename, + role = parent->drv->is_filter ? + (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE; + +- if (!bdrv_open_child(filename, options, bdref_key, parent, +- &child_of_bds, role, false, errp)) ++ if (!bdrv_open_child_common(filename, options, bdref_key, parent, ++ &child_of_bds, role, false, true, errp)) + { + return -EINVAL; + } +@@ -3842,7 +3862,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) + + } + +- bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); ++ bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, false, ++ errp); + obj = NULL; + qobject_unref(obj); + visit_free(v); +@@ -3932,7 +3953,7 @@ static BlockDriverState * no_coroutine_fn + bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + int flags, BlockDriverState *parent, + const BdrvChildClass *child_class, BdrvChildRole child_role, +- Error **errp) ++ bool parse_filename, Error **errp) + { + int ret; + BlockBackend *file = NULL; +@@ -3980,9 +4001,11 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + } + + /* json: syntax counts as explicit options, as if in the QDict */ +- parse_json_protocol(options, &filename, &local_err); +- if (local_err) { +- goto fail; ++ if (parse_filename) { ++ parse_json_protocol(options, &filename, &local_err); ++ if (local_err) { ++ goto fail; ++ } + } + + bs->explicit_options = qdict_clone_shallow(options); +@@ -4007,7 +4030,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + parent->open_flags, parent->options); + } + +- ret = bdrv_fill_options(&options, filename, &flags, &local_err); ++ ret = bdrv_fill_options(&options, filename, &flags, parse_filename, ++ &local_err); + if (ret < 0) { + goto fail; + } +@@ -4076,7 +4100,7 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + + file_bs = bdrv_open_child_bs(filename, options, "file", bs, + &child_of_bds, BDRV_CHILD_IMAGE, +- true, &local_err); ++ true, true, &local_err); + if (local_err) { + goto fail; + } +@@ -4225,7 +4249,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, + GLOBAL_STATE_CODE(); + + return bdrv_open_inherit(filename, reference, options, flags, NULL, +- NULL, 0, errp); ++ NULL, 0, true, errp); + } + + /* Return true if the NULL-terminated @list contains @str */ +-- +2.39.3 + diff --git a/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch b/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch deleted file mode 100644 index 155fa19..0000000 --- a/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch +++ /dev/null @@ -1,104 +0,0 @@ -From afa842e9fdf6e1d6e5d5785679a22779632142bd Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 2 Feb 2024 15:47:54 +0100 -Subject: [PATCH 03/22] block-backend: Allow concurrent context changes - -RH-Author: Hanna Czenczek -RH-MergeRequest: 222: Allow concurrent BlockBackend context changes -RH-Jira: RHEL-24593 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Commit: [1/2] 9e1b535f60f7afa94a0817dc3e71136e41631c71 (hreitz/qemu-kvm-c-9-s) - -Since AioContext locks have been removed, a BlockBackend's AioContext -may really change at any time (only exception is that it is often -confined to a drained section, as noted in this patch). Therefore, -blk_get_aio_context() cannot rely on its root node's context always -matching that of the BlockBackend. - -In practice, whether they match does not matter anymore anyway: Requests -can be sent to BDSs from any context, so anyone who requests the BB's -context should have no reason to require the root node to have the same -context. Therefore, we can and should remove the assertion to that -effect. - -In addition, because the context can be set and queried from different -threads concurrently, it has to be accessed with atomic operations. - -Buglink: https://issues.redhat.com/browse/RHEL-19381 -Suggested-by: Kevin Wolf -Signed-off-by: Hanna Czenczek -Message-ID: <20240202144755.671354-2-hreitz@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit ad893672027ffe26db498947d70cde6d4f58a111) ---- - block/block-backend.c | 22 +++++++++++----------- - 1 file changed, 11 insertions(+), 11 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 209eb07528..9c4de79e6b 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -44,7 +44,7 @@ struct BlockBackend { - char *name; - int refcnt; - BdrvChild *root; -- AioContext *ctx; -+ AioContext *ctx; /* access with atomic operations only */ - DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ - QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ - QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ -@@ -2414,22 +2414,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) - } - } - -+/** -+ * Return BB's current AioContext. Note that this context may change -+ * concurrently at any time, with one exception: If the BB has a root node -+ * attached, its context will only change through bdrv_try_change_aio_context(), -+ * which creates a drained section. Therefore, incrementing such a BB's -+ * in-flight counter will prevent its context from changing. -+ */ - AioContext *blk_get_aio_context(BlockBackend *blk) - { -- BlockDriverState *bs; - IO_CODE(); - - if (!blk) { - return qemu_get_aio_context(); - } - -- bs = blk_bs(blk); -- if (bs) { -- AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); -- assert(ctx == blk->ctx); -- } -- -- return blk->ctx; -+ return qatomic_read(&blk->ctx); - } - - int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, -@@ -2442,7 +2442,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, - GLOBAL_STATE_CODE(); - - if (!bs) { -- blk->ctx = new_context; -+ qatomic_set(&blk->ctx, new_context); - return 0; - } - -@@ -2471,7 +2471,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque) - AioContext *new_context = s->new_ctx; - ThrottleGroupMember *tgm = &blk->public.throttle_group_member; - -- blk->ctx = new_context; -+ qatomic_set(&blk->ctx, new_context); - if (tgm->throttle_state) { - throttle_group_detach_aio_context(tgm); - throttle_group_attach_aio_context(tgm, new_context); --- -2.39.3 - diff --git a/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch b/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch deleted file mode 100644 index df764fb..0000000 --- a/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch +++ /dev/null @@ -1,69 +0,0 @@ -From b1a68aebadecd7d339cf5eaffeda15099c998528 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 12 Sep 2023 19:10:37 -0400 -Subject: [PATCH 095/101] block-coroutine-wrapper: use - qemu_get_current_aio_context() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [26/26] cde767bcdc626e90721792e3889952057a548ac5 (kmwolf/centos-qemu-kvm) - -Use qemu_get_current_aio_context() in mixed wrappers and coroutine -wrappers so that code runs in the caller's AioContext instead of moving -to the BlockDriverState's AioContext. This change is necessary for the -multi-queue block layer where any thread can call into the block layer. - -Most wrappers are IO_CODE where it's safe to use the current AioContext -nowadays. BlockDrivers and the core block layer use their own locks and -no longer depend on the AioContext lock for thread-safety. - -The bdrv_create() wrapper invokes GLOBAL_STATE code. Using the current -AioContext is safe because this code is only called with the BQL held -from the main loop thread. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20230912231037.826804-6-stefanha@redhat.com> -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - scripts/block-coroutine-wrapper.py | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py -index c9c09fcacd..dbbde99e39 100644 ---- a/scripts/block-coroutine-wrapper.py -+++ b/scripts/block-coroutine-wrapper.py -@@ -92,8 +92,6 @@ def __init__(self, wrapper_type: str, return_type: str, name: str, - f"{self.name}") - self.target_name = f'{subsystem}_{subname}' - -- self.ctx = self.gen_ctx() -- - self.get_result = 's->ret = ' - self.ret = 'return s.ret;' - self.co_ret = 'return ' -@@ -167,7 +165,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str: - {func.co_ret}{name}({ func.gen_list('{name}') }); - }} else {{ - {struct_name} s = {{ -- .poll_state.ctx = {func.ctx}, -+ .poll_state.ctx = qemu_get_current_aio_context(), - .poll_state.in_progress = true, - - { func.gen_block(' .{name} = {name},') } -@@ -191,7 +189,7 @@ def create_co_wrapper(func: FuncDecl) -> str: - {func.return_type} {func.name}({ func.gen_list('{decl}') }) - {{ - {struct_name} s = {{ -- .poll_state.ctx = {func.ctx}, -+ .poll_state.ctx = qemu_get_current_aio_context(), - .poll_state.in_progress = true, - - { func.gen_block(' .{name} = {name},') } --- -2.39.3 - diff --git a/SOURCES/kvm-block-crypto-create-ciphers-on-demand.patch b/SOURCES/kvm-block-crypto-create-ciphers-on-demand.patch new file mode 100644 index 0000000..c2b9c47 --- /dev/null +++ b/SOURCES/kvm-block-crypto-create-ciphers-on-demand.patch @@ -0,0 +1,330 @@ +From a67edfb4b591acdffc5b4987601a30224376996f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 27 May 2024 11:58:50 -0400 +Subject: [PATCH 4/5] block/crypto: create ciphers on demand +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 251: block/crypto: create ciphers on demand +RH-Jira: RHEL-36159 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] 22a4c87fef774cad98a6f5a79f27df50a208013d (stefanha/centos-stream-qemu-kvm) + +Ciphers are pre-allocated by qcrypto_block_init_cipher() depending on +the given number of threads. The -device +virtio-blk-pci,iothread-vq-mapping= feature allows users to assign +multiple IOThreads to a virtio-blk device, but the association between +the virtio-blk device and the block driver happens after the block +driver is already open. + +When the number of threads given to qcrypto_block_init_cipher() is +smaller than the actual number of threads at runtime, the +block->n_free_ciphers > 0 assertion in qcrypto_block_pop_cipher() can +fail. + +Get rid of qcrypto_block_init_cipher() n_thread's argument and allocate +ciphers on demand. + +Reported-by: Qing Wang +Buglink: https://issues.redhat.com/browse/RHEL-36159 +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240527155851.892885-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Acked-by: Daniel P. Berrangé +Signed-off-by: Kevin Wolf +(cherry picked from commit af206c284e4c1b17cdfb0f17e898b288c0fc1751) +Signed-off-by: Stefan Hajnoczi +--- + crypto/block-luks.c | 3 +- + crypto/block-qcow.c | 2 +- + crypto/block.c | 111 ++++++++++++++++++++++++++------------------ + crypto/blockpriv.h | 12 +++-- + 4 files changed, 78 insertions(+), 50 deletions(-) + +diff --git a/crypto/block-luks.c b/crypto/block-luks.c +index 3ee928fb5a..3357852c0a 100644 +--- a/crypto/block-luks.c ++++ b/crypto/block-luks.c +@@ -1262,7 +1262,6 @@ qcrypto_block_luks_open(QCryptoBlock *block, + luks->cipher_mode, + masterkey, + luks->header.master_key_len, +- n_threads, + errp) < 0) { + goto fail; + } +@@ -1456,7 +1455,7 @@ qcrypto_block_luks_create(QCryptoBlock *block, + /* Setup the block device payload encryption objects */ + if (qcrypto_block_init_cipher(block, luks_opts.cipher_alg, + luks_opts.cipher_mode, masterkey, +- luks->header.master_key_len, 1, errp) < 0) { ++ luks->header.master_key_len, errp) < 0) { + goto error; + } + +diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c +index 4d7cf36a8f..02305058e3 100644 +--- a/crypto/block-qcow.c ++++ b/crypto/block-qcow.c +@@ -75,7 +75,7 @@ qcrypto_block_qcow_init(QCryptoBlock *block, + ret = qcrypto_block_init_cipher(block, QCRYPTO_CIPHER_ALG_AES_128, + QCRYPTO_CIPHER_MODE_CBC, + keybuf, G_N_ELEMENTS(keybuf), +- n_threads, errp); ++ errp); + if (ret < 0) { + ret = -ENOTSUP; + goto fail; +diff --git a/crypto/block.c b/crypto/block.c +index 506ea1d1a3..ba6d1cebc7 100644 +--- a/crypto/block.c ++++ b/crypto/block.c +@@ -20,6 +20,7 @@ + + #include "qemu/osdep.h" + #include "qapi/error.h" ++#include "qemu/lockable.h" + #include "blockpriv.h" + #include "block-qcow.h" + #include "block-luks.h" +@@ -57,6 +58,8 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + { + QCryptoBlock *block = g_new0(QCryptoBlock, 1); + ++ qemu_mutex_init(&block->mutex); ++ + block->format = options->format; + + if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) || +@@ -76,8 +79,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + return NULL; + } + +- qemu_mutex_init(&block->mutex); +- + return block; + } + +@@ -92,6 +93,8 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options, + { + QCryptoBlock *block = g_new0(QCryptoBlock, 1); + ++ qemu_mutex_init(&block->mutex); ++ + block->format = options->format; + + if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) || +@@ -111,8 +114,6 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options, + return NULL; + } + +- qemu_mutex_init(&block->mutex); +- + return block; + } + +@@ -227,37 +228,42 @@ QCryptoCipher *qcrypto_block_get_cipher(QCryptoBlock *block) + * This function is used only in test with one thread (it's safe to skip + * pop/push interface), so it's enough to assert it here: + */ +- assert(block->n_ciphers <= 1); +- return block->ciphers ? block->ciphers[0] : NULL; ++ assert(block->max_free_ciphers <= 1); ++ return block->free_ciphers ? block->free_ciphers[0] : NULL; + } + + +-static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block) ++static QCryptoCipher *qcrypto_block_pop_cipher(QCryptoBlock *block, ++ Error **errp) + { +- QCryptoCipher *cipher; +- +- qemu_mutex_lock(&block->mutex); +- +- assert(block->n_free_ciphers > 0); +- block->n_free_ciphers--; +- cipher = block->ciphers[block->n_free_ciphers]; +- +- qemu_mutex_unlock(&block->mutex); ++ /* Usually there is a free cipher available */ ++ WITH_QEMU_LOCK_GUARD(&block->mutex) { ++ if (block->n_free_ciphers > 0) { ++ block->n_free_ciphers--; ++ return block->free_ciphers[block->n_free_ciphers]; ++ } ++ } + +- return cipher; ++ /* Otherwise allocate a new cipher */ ++ return qcrypto_cipher_new(block->alg, block->mode, block->key, ++ block->nkey, errp); + } + + + static void qcrypto_block_push_cipher(QCryptoBlock *block, + QCryptoCipher *cipher) + { +- qemu_mutex_lock(&block->mutex); ++ QEMU_LOCK_GUARD(&block->mutex); + +- assert(block->n_free_ciphers < block->n_ciphers); +- block->ciphers[block->n_free_ciphers] = cipher; +- block->n_free_ciphers++; ++ if (block->n_free_ciphers == block->max_free_ciphers) { ++ block->max_free_ciphers++; ++ block->free_ciphers = g_renew(QCryptoCipher *, ++ block->free_ciphers, ++ block->max_free_ciphers); ++ } + +- qemu_mutex_unlock(&block->mutex); ++ block->free_ciphers[block->n_free_ciphers] = cipher; ++ block->n_free_ciphers++; + } + + +@@ -265,24 +271,31 @@ int qcrypto_block_init_cipher(QCryptoBlock *block, + QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, size_t nkey, +- size_t n_threads, Error **errp) ++ Error **errp) + { +- size_t i; ++ QCryptoCipher *cipher; + +- assert(!block->ciphers && !block->n_ciphers && !block->n_free_ciphers); ++ assert(!block->free_ciphers && !block->max_free_ciphers && ++ !block->n_free_ciphers); + +- block->ciphers = g_new0(QCryptoCipher *, n_threads); ++ /* Stash away cipher parameters for qcrypto_block_pop_cipher() */ ++ block->alg = alg; ++ block->mode = mode; ++ block->key = g_memdup2(key, nkey); ++ block->nkey = nkey; + +- for (i = 0; i < n_threads; i++) { +- block->ciphers[i] = qcrypto_cipher_new(alg, mode, key, nkey, errp); +- if (!block->ciphers[i]) { +- qcrypto_block_free_cipher(block); +- return -1; +- } +- block->n_ciphers++; +- block->n_free_ciphers++; ++ /* ++ * Create a new cipher to validate the parameters now. This reduces the ++ * chance of cipher creation failing at I/O time. ++ */ ++ cipher = qcrypto_block_pop_cipher(block, errp); ++ if (!cipher) { ++ g_free(block->key); ++ block->key = NULL; ++ return -1; + } + ++ qcrypto_block_push_cipher(block, cipher); + return 0; + } + +@@ -291,19 +304,23 @@ void qcrypto_block_free_cipher(QCryptoBlock *block) + { + size_t i; + +- if (!block->ciphers) { ++ g_free(block->key); ++ block->key = NULL; ++ ++ if (!block->free_ciphers) { + return; + } + +- assert(block->n_ciphers == block->n_free_ciphers); ++ /* All popped ciphers were eventually pushed back */ ++ assert(block->n_free_ciphers == block->max_free_ciphers); + +- for (i = 0; i < block->n_ciphers; i++) { +- qcrypto_cipher_free(block->ciphers[i]); ++ for (i = 0; i < block->max_free_ciphers; i++) { ++ qcrypto_cipher_free(block->free_ciphers[i]); + } + +- g_free(block->ciphers); +- block->ciphers = NULL; +- block->n_ciphers = block->n_free_ciphers = 0; ++ g_free(block->free_ciphers); ++ block->free_ciphers = NULL; ++ block->max_free_ciphers = block->n_free_ciphers = 0; + } + + QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block) +@@ -311,7 +328,7 @@ QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block) + /* ivgen should be accessed under mutex. However, this function is used only + * in test with one thread, so it's enough to assert it here: + */ +- assert(block->n_ciphers <= 1); ++ assert(block->max_free_ciphers <= 1); + return block->ivgen; + } + +@@ -446,7 +463,10 @@ int qcrypto_block_decrypt_helper(QCryptoBlock *block, + Error **errp) + { + int ret; +- QCryptoCipher *cipher = qcrypto_block_pop_cipher(block); ++ QCryptoCipher *cipher = qcrypto_block_pop_cipher(block, errp); ++ if (!cipher) { ++ return -1; ++ } + + ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen, + &block->mutex, sectorsize, offset, buf, +@@ -465,7 +485,10 @@ int qcrypto_block_encrypt_helper(QCryptoBlock *block, + Error **errp) + { + int ret; +- QCryptoCipher *cipher = qcrypto_block_pop_cipher(block); ++ QCryptoCipher *cipher = qcrypto_block_pop_cipher(block, errp); ++ if (!cipher) { ++ return -1; ++ } + + ret = do_qcrypto_block_cipher_encdec(cipher, block->niv, block->ivgen, + &block->mutex, sectorsize, offset, buf, +diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h +index 836f3b4726..4bf6043d5d 100644 +--- a/crypto/blockpriv.h ++++ b/crypto/blockpriv.h +@@ -32,8 +32,14 @@ struct QCryptoBlock { + const QCryptoBlockDriver *driver; + void *opaque; + +- QCryptoCipher **ciphers; +- size_t n_ciphers; ++ /* Cipher parameters */ ++ QCryptoCipherAlgorithm alg; ++ QCryptoCipherMode mode; ++ uint8_t *key; ++ size_t nkey; ++ ++ QCryptoCipher **free_ciphers; ++ size_t max_free_ciphers; + size_t n_free_ciphers; + QCryptoIVGen *ivgen; + QemuMutex mutex; +@@ -130,7 +136,7 @@ int qcrypto_block_init_cipher(QCryptoBlock *block, + QCryptoCipherAlgorithm alg, + QCryptoCipherMode mode, + const uint8_t *key, size_t nkey, +- size_t n_threads, Error **errp); ++ Error **errp); + + void qcrypto_block_free_cipher(QCryptoBlock *block); + +-- +2.39.3 + diff --git a/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch b/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch deleted file mode 100644 index 1783a64..0000000 --- a/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch +++ /dev/null @@ -1,217 +0,0 @@ -From 25cce5df341861e8ba8ec57722558e2dee3ce56a Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 14 Sep 2023 10:00:58 -0400 -Subject: [PATCH 073/101] block/file-posix: set up Linux AIO and io_uring in - the current thread - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [4/26] 74c7daf805daefe706378308c3afeb28d861164b (kmwolf/centos-qemu-kvm) - -The file-posix block driver currently only sets up Linux AIO and -io_uring in the BDS's AioContext. In the multi-queue block layer we must -be able to submit I/O requests in AioContexts that do not have Linux AIO -and io_uring set up yet since any thread can call into the block driver. - -Set up Linux AIO and io_uring for the current AioContext during request -submission. We lose the ability to return an error from -.bdrv_file_open() when Linux AIO and io_uring setup fails (e.g. due to -resource limits). Instead the user only gets warnings and we fall back -to aio=threads. This is still better than a fatal error after startup. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20230914140101.1065008-2-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf ---- - block/file-posix.c | 103 ++++++++++++++++++++++----------------------- - 1 file changed, 51 insertions(+), 52 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index b862406c71..35684f7e21 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -712,17 +712,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, - - #ifdef CONFIG_LINUX_AIO - /* Currently Linux does AIO only for files opened with O_DIRECT */ -- if (s->use_linux_aio) { -- if (!(s->open_flags & O_DIRECT)) { -- error_setg(errp, "aio=native was specified, but it requires " -- "cache.direct=on, which was not specified."); -- ret = -EINVAL; -- goto fail; -- } -- if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) { -- error_prepend(errp, "Unable to use native AIO: "); -- goto fail; -- } -+ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) { -+ error_setg(errp, "aio=native was specified, but it requires " -+ "cache.direct=on, which was not specified."); -+ ret = -EINVAL; -+ goto fail; - } - #else - if (s->use_linux_aio) { -@@ -733,14 +727,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, - } - #endif /* !defined(CONFIG_LINUX_AIO) */ - --#ifdef CONFIG_LINUX_IO_URING -- if (s->use_linux_io_uring) { -- if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) { -- error_prepend(errp, "Unable to use io_uring: "); -- goto fail; -- } -- } --#else -+#ifndef CONFIG_LINUX_IO_URING - if (s->use_linux_io_uring) { - error_setg(errp, "aio=io_uring was specified, but is not supported " - "in this build."); -@@ -2444,6 +2431,48 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) - return true; - } - -+#ifdef CONFIG_LINUX_IO_URING -+static inline bool raw_check_linux_io_uring(BDRVRawState *s) -+{ -+ Error *local_err = NULL; -+ AioContext *ctx; -+ -+ if (!s->use_linux_io_uring) { -+ return false; -+ } -+ -+ ctx = qemu_get_current_aio_context(); -+ if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) { -+ error_reportf_err(local_err, "Unable to use linux io_uring, " -+ "falling back to thread pool: "); -+ s->use_linux_io_uring = false; -+ return false; -+ } -+ return true; -+} -+#endif -+ -+#ifdef CONFIG_LINUX_AIO -+static inline bool raw_check_linux_aio(BDRVRawState *s) -+{ -+ Error *local_err = NULL; -+ AioContext *ctx; -+ -+ if (!s->use_linux_aio) { -+ return false; -+ } -+ -+ ctx = qemu_get_current_aio_context(); -+ if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) { -+ error_reportf_err(local_err, "Unable to use Linux AIO, " -+ "falling back to thread pool: "); -+ s->use_linux_aio = false; -+ return false; -+ } -+ return true; -+} -+#endif -+ - static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, - uint64_t bytes, QEMUIOVector *qiov, int type) - { -@@ -2474,13 +2503,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, - if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) { - type |= QEMU_AIO_MISALIGNED; - #ifdef CONFIG_LINUX_IO_URING -- } else if (s->use_linux_io_uring) { -+ } else if (raw_check_linux_io_uring(s)) { - assert(qiov->size == bytes); - ret = luring_co_submit(bs, s->fd, offset, qiov, type); - goto out; - #endif - #ifdef CONFIG_LINUX_AIO -- } else if (s->use_linux_aio) { -+ } else if (raw_check_linux_aio(s)) { - assert(qiov->size == bytes); - ret = laio_co_submit(s->fd, offset, qiov, type, - s->aio_max_batch); -@@ -2567,39 +2596,13 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) - }; - - #ifdef CONFIG_LINUX_IO_URING -- if (s->use_linux_io_uring) { -+ if (raw_check_linux_io_uring(s)) { - return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH); - } - #endif - return raw_thread_pool_submit(handle_aiocb_flush, &acb); - } - --static void raw_aio_attach_aio_context(BlockDriverState *bs, -- AioContext *new_context) --{ -- BDRVRawState __attribute__((unused)) *s = bs->opaque; --#ifdef CONFIG_LINUX_AIO -- if (s->use_linux_aio) { -- Error *local_err = NULL; -- if (!aio_setup_linux_aio(new_context, &local_err)) { -- error_reportf_err(local_err, "Unable to use native AIO, " -- "falling back to thread pool: "); -- s->use_linux_aio = false; -- } -- } --#endif --#ifdef CONFIG_LINUX_IO_URING -- if (s->use_linux_io_uring) { -- Error *local_err = NULL; -- if (!aio_setup_linux_io_uring(new_context, &local_err)) { -- error_reportf_err(local_err, "Unable to use linux io_uring, " -- "falling back to thread pool: "); -- s->use_linux_io_uring = false; -- } -- } --#endif --} -- - static void raw_close(BlockDriverState *bs) - { - BDRVRawState *s = bs->opaque; -@@ -3896,7 +3899,6 @@ BlockDriver bdrv_file = { - .bdrv_co_copy_range_from = raw_co_copy_range_from, - .bdrv_co_copy_range_to = raw_co_copy_range_to, - .bdrv_refresh_limits = raw_refresh_limits, -- .bdrv_attach_aio_context = raw_aio_attach_aio_context, - - .bdrv_co_truncate = raw_co_truncate, - .bdrv_co_getlength = raw_co_getlength, -@@ -4266,7 +4268,6 @@ static BlockDriver bdrv_host_device = { - .bdrv_co_copy_range_from = raw_co_copy_range_from, - .bdrv_co_copy_range_to = raw_co_copy_range_to, - .bdrv_refresh_limits = raw_refresh_limits, -- .bdrv_attach_aio_context = raw_aio_attach_aio_context, - - .bdrv_co_truncate = raw_co_truncate, - .bdrv_co_getlength = raw_co_getlength, -@@ -4402,7 +4403,6 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_co_flush_to_disk = raw_co_flush_to_disk, - .bdrv_refresh_limits = cdrom_refresh_limits, -- .bdrv_attach_aio_context = raw_aio_attach_aio_context, - - .bdrv_co_truncate = raw_co_truncate, - .bdrv_co_getlength = raw_co_getlength, -@@ -4528,7 +4528,6 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_co_flush_to_disk = raw_co_flush_to_disk, - .bdrv_refresh_limits = cdrom_refresh_limits, -- .bdrv_attach_aio_context = raw_aio_attach_aio_context, - - .bdrv_co_truncate = raw_co_truncate, - .bdrv_co_getlength = raw_co_getlength, --- -2.39.3 - diff --git a/SOURCES/kvm-block-remove-AioContext-locking.patch b/SOURCES/kvm-block-remove-AioContext-locking.patch deleted file mode 100644 index 5bcd859..0000000 --- a/SOURCES/kvm-block-remove-AioContext-locking.patch +++ /dev/null @@ -1,4438 +0,0 @@ -From df1400991580e8a60d711079865b56ed95830b28 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:03 -0500 -Subject: [PATCH 086/101] block: remove AioContext locking - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [17/26] b29c3ac7ea91ca356335ba047c66187317c482f9 (kmwolf/centos-qemu-kvm) - -This is the big patch that removes -aio_context_acquire()/aio_context_release() from the block layer and -affected block layer users. - -There isn't a clean way to split this patch and the reviewers are likely -the same group of people, so I decided to do it in one patch. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Reviewed-by: Paul Durrant -Message-ID: <20231205182011.1976568-7-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - block.c | 234 +--------------------- - block/block-backend.c | 14 -- - block/copy-before-write.c | 22 +-- - block/export/export.c | 22 +-- - block/io.c | 45 +---- - block/mirror.c | 19 -- - block/monitor/bitmap-qmp-cmds.c | 20 +- - block/monitor/block-hmp-cmds.c | 29 --- - block/qapi-sysemu.c | 27 +-- - block/qapi.c | 18 +- - block/raw-format.c | 5 - - block/replication.c | 58 +----- - block/snapshot.c | 22 +-- - block/write-threshold.c | 6 - - blockdev.c | 307 +++++------------------------ - blockjob.c | 18 -- - hw/block/dataplane/virtio-blk.c | 10 - - hw/block/dataplane/xen-block.c | 17 +- - hw/block/virtio-blk.c | 13 -- - hw/core/qdev-properties-system.c | 9 - - include/block/block-global-state.h | 9 +- - include/block/block-io.h | 3 +- - include/block/snapshot.h | 2 - - job.c | 16 -- - migration/block.c | 34 +--- - migration/migration-hmp-cmds.c | 3 - - migration/savevm.c | 22 --- - net/colo-compare.c | 2 - - qemu-img.c | 4 - - qemu-io.c | 10 +- - qemu-nbd.c | 2 - - replay/replay-debugging.c | 4 - - scripts/block-coroutine-wrapper.py | 3 - - tests/tsan/suppressions.tsan | 1 - - tests/unit/test-bdrv-drain.c | 51 +---- - tests/unit/test-bdrv-graph-mod.c | 6 - - tests/unit/test-block-iothread.c | 31 --- - tests/unit/test-blockjob.c | 137 ------------- - tests/unit/test-replication.c | 11 -- - util/async.c | 4 - - util/vhost-user-server.c | 3 - - 41 files changed, 104 insertions(+), 1169 deletions(-) - -diff --git a/block.c b/block.c -index 25e1ebc606..91ace5d2d5 100644 ---- a/block.c -+++ b/block.c -@@ -1625,7 +1625,6 @@ static int no_coroutine_fn GRAPH_UNLOCKED - bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, - QDict *options, int open_flags, Error **errp) - { -- AioContext *ctx; - Error *local_err = NULL; - int i, ret; - GLOBAL_STATE_CODE(); -@@ -1673,21 +1672,15 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, - bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF; - bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF; - -- /* Get the context after .bdrv_open, it can change the context */ -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); -- - ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); - if (ret < 0) { - error_setg_errno(errp, -ret, "Could not refresh total sector count"); -- aio_context_release(ctx); - return ret; - } - - bdrv_graph_rdlock_main_loop(); - bdrv_refresh_limits(bs, NULL, &local_err); - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(ctx); - - if (local_err) { - error_propagate(errp, local_err); -@@ -3062,7 +3055,7 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - Transaction *tran, Error **errp) - { - BdrvChild *new_child; -- AioContext *parent_ctx, *new_child_ctx; -+ AioContext *parent_ctx; - AioContext *child_ctx = bdrv_get_aio_context(child_bs); - - assert(child_class->get_parent_desc); -@@ -3114,12 +3107,6 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - } - } - -- new_child_ctx = bdrv_get_aio_context(child_bs); -- if (new_child_ctx != child_ctx) { -- aio_context_release(child_ctx); -- aio_context_acquire(new_child_ctx); -- } -- - bdrv_ref(child_bs); - /* - * Let every new BdrvChild start with a drained parent. Inserting the child -@@ -3149,11 +3136,6 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - }; - tran_add(tran, &bdrv_attach_child_common_drv, s); - -- if (new_child_ctx != child_ctx) { -- aio_context_release(new_child_ctx); -- aio_context_acquire(child_ctx); -- } -- - return new_child; - } - -@@ -3605,7 +3587,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, - int ret = 0; - bool implicit_backing = false; - BlockDriverState *backing_hd; -- AioContext *backing_hd_ctx; - QDict *options; - QDict *tmp_parent_options = NULL; - Error *local_err = NULL; -@@ -3691,11 +3672,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, - - /* Hook up the backing file link; drop our reference, bs owns the - * backing_hd reference now */ -- backing_hd_ctx = bdrv_get_aio_context(backing_hd); -- aio_context_acquire(backing_hd_ctx); - ret = bdrv_set_backing_hd(bs, backing_hd, errp); - bdrv_unref(backing_hd); -- aio_context_release(backing_hd_ctx); - - if (ret < 0) { - goto free_exit; -@@ -3780,7 +3758,6 @@ BdrvChild *bdrv_open_child(const char *filename, - { - BlockDriverState *bs; - BdrvChild *child; -- AioContext *ctx; - - GLOBAL_STATE_CODE(); - -@@ -3791,11 +3768,8 @@ BdrvChild *bdrv_open_child(const char *filename, - } - - bdrv_graph_wrlock(); -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, - errp); -- aio_context_release(ctx); - bdrv_graph_wrunlock(); - - return child; -@@ -3881,7 +3855,6 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, - int64_t total_size; - QemuOpts *opts = NULL; - BlockDriverState *bs_snapshot = NULL; -- AioContext *ctx = bdrv_get_aio_context(bs); - int ret; - - GLOBAL_STATE_CODE(); -@@ -3890,9 +3863,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, - instead of opening 'filename' directly */ - - /* Get the required size from the image */ -- aio_context_acquire(ctx); - total_size = bdrv_getlength(bs); -- aio_context_release(ctx); - - if (total_size < 0) { - error_setg_errno(errp, -total_size, "Could not get image size"); -@@ -3927,10 +3898,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, - goto out; - } - -- aio_context_acquire(ctx); - ret = bdrv_append(bs_snapshot, bs, errp); -- aio_context_release(ctx); -- - if (ret < 0) { - bs_snapshot = NULL; - goto out; -@@ -3974,7 +3942,6 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, - Error *local_err = NULL; - QDict *snapshot_options = NULL; - int snapshot_flags = 0; -- AioContext *ctx = qemu_get_aio_context(); - - assert(!child_class || !flags); - assert(!child_class == !parent); -@@ -4115,12 +4082,10 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, - /* Not requesting BLK_PERM_CONSISTENT_READ because we're only - * looking at the header to guess the image format. This works even - * in cases where a guest would not see a consistent state. */ -- ctx = bdrv_get_aio_context(file_bs); -- aio_context_acquire(ctx); -+ AioContext *ctx = bdrv_get_aio_context(file_bs); - file = blk_new(ctx, 0, BLK_PERM_ALL); - blk_insert_bs(file, file_bs, &local_err); - bdrv_unref(file_bs); -- aio_context_release(ctx); - - if (local_err) { - goto fail; -@@ -4167,13 +4132,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, - goto fail; - } - -- /* The AioContext could have changed during bdrv_open_common() */ -- ctx = bdrv_get_aio_context(bs); -- - if (file) { -- aio_context_acquire(ctx); - blk_unref(file); -- aio_context_release(ctx); - file = NULL; - } - -@@ -4231,16 +4191,13 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, - * (snapshot_bs); thus, we have to drop the strong reference to bs - * (which we obtained by calling bdrv_new()). bs will not be deleted, - * though, because the overlay still has a reference to it. */ -- aio_context_acquire(ctx); - bdrv_unref(bs); -- aio_context_release(ctx); - bs = snapshot_bs; - } - - return bs; - - fail: -- aio_context_acquire(ctx); - blk_unref(file); - qobject_unref(snapshot_options); - qobject_unref(bs->explicit_options); -@@ -4249,14 +4206,11 @@ fail: - bs->options = NULL; - bs->explicit_options = NULL; - bdrv_unref(bs); -- aio_context_release(ctx); - error_propagate(errp, local_err); - return NULL; - - close_and_fail: -- aio_context_acquire(ctx); - bdrv_unref(bs); -- aio_context_release(ctx); - qobject_unref(snapshot_options); - qobject_unref(options); - error_propagate(errp, local_err); -@@ -4540,12 +4494,7 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) - if (bs_queue) { - BlockReopenQueueEntry *bs_entry, *next; - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { -- AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); -- -- aio_context_acquire(ctx); - bdrv_drained_end(bs_entry->state.bs); -- aio_context_release(ctx); -- - qobject_unref(bs_entry->state.explicit_options); - qobject_unref(bs_entry->state.options); - g_free(bs_entry); -@@ -4577,7 +4526,6 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - { - int ret = -1; - BlockReopenQueueEntry *bs_entry, *next; -- AioContext *ctx; - Transaction *tran = tran_new(); - g_autoptr(GSList) refresh_list = NULL; - -@@ -4586,10 +4534,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - GLOBAL_STATE_CODE(); - - QTAILQ_FOREACH(bs_entry, bs_queue, entry) { -- ctx = bdrv_get_aio_context(bs_entry->state.bs); -- aio_context_acquire(ctx); - ret = bdrv_flush(bs_entry->state.bs); -- aio_context_release(ctx); - if (ret < 0) { - error_setg_errno(errp, -ret, "Error flushing drive"); - goto abort; -@@ -4598,10 +4543,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - - QTAILQ_FOREACH(bs_entry, bs_queue, entry) { - assert(bs_entry->state.bs->quiesce_counter > 0); -- ctx = bdrv_get_aio_context(bs_entry->state.bs); -- aio_context_acquire(ctx); - ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); -- aio_context_release(ctx); - if (ret < 0) { - goto abort; - } -@@ -4644,10 +4586,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - * to first element. - */ - QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { -- ctx = bdrv_get_aio_context(bs_entry->state.bs); -- aio_context_acquire(ctx); - bdrv_reopen_commit(&bs_entry->state); -- aio_context_release(ctx); - } - - bdrv_graph_wrlock(); -@@ -4658,10 +4597,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - BlockDriverState *bs = bs_entry->state.bs; - - if (bs->drv->bdrv_reopen_commit_post) { -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - bs->drv->bdrv_reopen_commit_post(&bs_entry->state); -- aio_context_release(ctx); - } - } - -@@ -4675,10 +4611,7 @@ abort: - - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - if (bs_entry->prepared) { -- ctx = bdrv_get_aio_context(bs_entry->state.bs); -- aio_context_acquire(ctx); - bdrv_reopen_abort(&bs_entry->state); -- aio_context_release(ctx); - } - } - -@@ -4691,24 +4624,13 @@ cleanup: - int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - Error **errp) - { -- AioContext *ctx = bdrv_get_aio_context(bs); - BlockReopenQueue *queue; -- int ret; - - GLOBAL_STATE_CODE(); - - queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); - -- if (ctx != qemu_get_aio_context()) { -- aio_context_release(ctx); -- } -- ret = bdrv_reopen_multiple(queue, errp); -- -- if (ctx != qemu_get_aio_context()) { -- aio_context_acquire(ctx); -- } -- -- return ret; -+ return bdrv_reopen_multiple(queue, errp); - } - - int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, -@@ -4760,7 +4682,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - const char *child_name = is_backing ? "backing" : "file"; - QObject *value; - const char *str; -- AioContext *ctx, *old_ctx; - bool has_child; - int ret; - -@@ -4844,13 +4765,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - bdrv_drained_begin(old_child_bs); - } - -- old_ctx = bdrv_get_aio_context(bs); -- ctx = bdrv_get_aio_context(new_child_bs); -- if (old_ctx != ctx) { -- aio_context_release(old_ctx); -- aio_context_acquire(ctx); -- } -- - bdrv_graph_rdunlock_main_loop(); - bdrv_graph_wrlock(); - -@@ -4859,11 +4773,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - - bdrv_graph_wrunlock(); - -- if (old_ctx != ctx) { -- aio_context_release(ctx); -- aio_context_acquire(old_ctx); -- } -- - if (old_child_bs) { - bdrv_drained_end(old_child_bs); - bdrv_unref(old_child_bs); -@@ -5537,7 +5446,6 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - int ret; - BdrvChild *child; - Transaction *tran = tran_new(); -- AioContext *old_context, *new_context = NULL; - - GLOBAL_STATE_CODE(); - -@@ -5545,21 +5453,8 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - assert(!bs_new->backing); - bdrv_graph_rdunlock_main_loop(); - -- old_context = bdrv_get_aio_context(bs_top); - bdrv_drained_begin(bs_top); -- -- /* -- * bdrv_drained_begin() requires that only the AioContext of the drained -- * node is locked, and at this point it can still differ from the AioContext -- * of bs_top. -- */ -- new_context = bdrv_get_aio_context(bs_new); -- aio_context_release(old_context); -- aio_context_acquire(new_context); - bdrv_drained_begin(bs_new); -- aio_context_release(new_context); -- aio_context_acquire(old_context); -- new_context = NULL; - - bdrv_graph_wrlock(); - -@@ -5571,18 +5466,6 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - goto out; - } - -- /* -- * bdrv_attach_child_noperm could change the AioContext of bs_top and -- * bs_new, but at least they are in the same AioContext now. This is the -- * AioContext that we need to lock for the rest of the function. -- */ -- new_context = bdrv_get_aio_context(bs_top); -- -- if (old_context != new_context) { -- aio_context_release(old_context); -- aio_context_acquire(new_context); -- } -- - ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); - if (ret < 0) { - goto out; -@@ -5598,11 +5481,6 @@ out: - bdrv_drained_end(bs_top); - bdrv_drained_end(bs_new); - -- if (new_context && old_context != new_context) { -- aio_context_release(new_context); -- aio_context_acquire(old_context); -- } -- - return ret; - } - -@@ -5697,12 +5575,8 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, - - GLOBAL_STATE_CODE(); - -- aio_context_release(ctx); -- aio_context_acquire(qemu_get_aio_context()); - new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, - errp); -- aio_context_release(qemu_get_aio_context()); -- aio_context_acquire(ctx); - assert(bdrv_get_aio_context(bs) == ctx); - - options = NULL; /* bdrv_new_open_driver() eats options */ -@@ -7037,12 +6911,9 @@ void bdrv_activate_all(Error **errp) - GRAPH_RDLOCK_GUARD_MAINLOOP(); - - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { -- AioContext *aio_context = bdrv_get_aio_context(bs); - int ret; - -- aio_context_acquire(aio_context); - ret = bdrv_activate(bs, errp); -- aio_context_release(aio_context); - if (ret < 0) { - bdrv_next_cleanup(&it); - return; -@@ -7137,20 +7008,10 @@ int bdrv_inactivate_all(void) - BlockDriverState *bs = NULL; - BdrvNextIterator it; - int ret = 0; -- GSList *aio_ctxs = NULL, *ctx; - - GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); - -- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { -- AioContext *aio_context = bdrv_get_aio_context(bs); -- -- if (!g_slist_find(aio_ctxs, aio_context)) { -- aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); -- aio_context_acquire(aio_context); -- } -- } -- - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { - /* Nodes with BDS parents are covered by recursion from the last - * parent that gets inactivated. Don't inactivate them a second -@@ -7161,17 +7022,10 @@ int bdrv_inactivate_all(void) - ret = bdrv_inactivate_recurse(bs); - if (ret < 0) { - bdrv_next_cleanup(&it); -- goto out; -+ break; - } - } - --out: -- for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { -- AioContext *aio_context = ctx->data; -- aio_context_release(aio_context); -- } -- g_slist_free(aio_ctxs); -- - return ret; - } - -@@ -7257,11 +7111,8 @@ void bdrv_unref(BlockDriverState *bs) - static void bdrv_schedule_unref_bh(void *opaque) - { - BlockDriverState *bs = opaque; -- AioContext *ctx = bdrv_get_aio_context(bs); - -- aio_context_acquire(ctx); - bdrv_unref(bs); -- aio_context_release(ctx); - } - - /* -@@ -7398,8 +7249,6 @@ void bdrv_img_create(const char *filename, const char *fmt, - return; - } - -- aio_context_acquire(qemu_get_aio_context()); -- - /* Create parameter list */ - create_opts = qemu_opts_append(create_opts, drv->create_opts); - create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -@@ -7549,7 +7398,6 @@ out: - qemu_opts_del(opts); - qemu_opts_free(create_opts); - error_propagate(errp, local_err); -- aio_context_release(qemu_get_aio_context()); - } - - AioContext *bdrv_get_aio_context(BlockDriverState *bs) -@@ -7585,29 +7433,12 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) - - void coroutine_fn bdrv_co_lock(BlockDriverState *bs) - { -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- /* In the main thread, bs->aio_context won't change concurrently */ -- assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -- -- /* -- * We're in coroutine context, so we already hold the lock of the main -- * loop AioContext. Don't lock it twice to avoid deadlocks. -- */ -- assert(qemu_in_coroutine()); -- if (ctx != qemu_get_aio_context()) { -- aio_context_acquire(ctx); -- } -+ /* TODO removed in next patch */ - } - - void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) - { -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- assert(qemu_in_coroutine()); -- if (ctx != qemu_get_aio_context()) { -- aio_context_release(ctx); -- } -+ /* TODO removed in next patch */ - } - - static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) -@@ -7728,21 +7559,8 @@ static void bdrv_set_aio_context_commit(void *opaque) - BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; - BlockDriverState *bs = (BlockDriverState *) state->bs; - AioContext *new_context = state->new_ctx; -- AioContext *old_context = bdrv_get_aio_context(bs); - -- /* -- * Take the old AioContex when detaching it from bs. -- * At this point, new_context lock is already acquired, and we are now -- * also taking old_context. This is safe as long as bdrv_detach_aio_context -- * does not call AIO_POLL_WHILE(). -- */ -- if (old_context != qemu_get_aio_context()) { -- aio_context_acquire(old_context); -- } - bdrv_detach_aio_context(bs); -- if (old_context != qemu_get_aio_context()) { -- aio_context_release(old_context); -- } - bdrv_attach_aio_context(bs, new_context); - } - -@@ -7827,7 +7645,6 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - Transaction *tran; - GHashTable *visited; - int ret; -- AioContext *old_context = bdrv_get_aio_context(bs); - GLOBAL_STATE_CODE(); - - /* -@@ -7857,34 +7674,7 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - return -EPERM; - } - -- /* -- * Release old AioContext, it won't be needed anymore, as all -- * bdrv_drained_begin() have been called already. -- */ -- if (qemu_get_aio_context() != old_context) { -- aio_context_release(old_context); -- } -- -- /* -- * Acquire new AioContext since bdrv_drained_end() is going to be called -- * after we switched all nodes in the new AioContext, and the function -- * assumes that the lock of the bs is always taken. -- */ -- if (qemu_get_aio_context() != ctx) { -- aio_context_acquire(ctx); -- } -- - tran_commit(tran); -- -- if (qemu_get_aio_context() != ctx) { -- aio_context_release(ctx); -- } -- -- /* Re-acquire the old AioContext, since the caller takes and releases it. */ -- if (qemu_get_aio_context() != old_context) { -- aio_context_acquire(old_context); -- } -- - return 0; - } - -@@ -8006,7 +7796,6 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, - const char *node_name, Error **errp) - { - BlockDriverState *to_replace_bs = bdrv_find_node(node_name); -- AioContext *aio_context; - - GLOBAL_STATE_CODE(); - -@@ -8015,12 +7804,8 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, - return NULL; - } - -- aio_context = bdrv_get_aio_context(to_replace_bs); -- aio_context_acquire(aio_context); -- - if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { -- to_replace_bs = NULL; -- goto out; -+ return NULL; - } - - /* We don't want arbitrary node of the BDS chain to be replaced only the top -@@ -8033,12 +7818,9 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, - "because it cannot be guaranteed that doing so would not " - "lead to an abrupt change of visible data", - node_name, parent_bs->node_name); -- to_replace_bs = NULL; -- goto out; -+ return NULL; - } - --out: -- aio_context_release(aio_context); - return to_replace_bs; - } - -diff --git a/block/block-backend.c b/block/block-backend.c -index abac4e0235..f412bed274 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -429,7 +429,6 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, - { - BlockBackend *blk; - BlockDriverState *bs; -- AioContext *ctx; - uint64_t perm = 0; - uint64_t shared = BLK_PERM_ALL; - -@@ -459,23 +458,18 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, - shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; - } - -- aio_context_acquire(qemu_get_aio_context()); - bs = bdrv_open(filename, reference, options, flags, errp); -- aio_context_release(qemu_get_aio_context()); - if (!bs) { - return NULL; - } - - /* bdrv_open() could have moved bs to a different AioContext */ -- ctx = bdrv_get_aio_context(bs); - blk = blk_new(bdrv_get_aio_context(bs), perm, shared); - blk->perm = perm; - blk->shared_perm = shared; - -- aio_context_acquire(ctx); - blk_insert_bs(blk, bs, errp); - bdrv_unref(bs); -- aio_context_release(ctx); - - if (!blk->root) { - blk_unref(blk); -@@ -577,13 +571,9 @@ void blk_remove_all_bs(void) - GLOBAL_STATE_CODE(); - - while ((blk = blk_all_next(blk)) != NULL) { -- AioContext *ctx = blk_get_aio_context(blk); -- -- aio_context_acquire(ctx); - if (blk->root) { - blk_remove_bs(blk); - } -- aio_context_release(ctx); - } - } - -@@ -2736,20 +2726,16 @@ int blk_commit_all(void) - GRAPH_RDLOCK_GUARD_MAINLOOP(); - - while ((blk = blk_all_next(blk)) != NULL) { -- AioContext *aio_context = blk_get_aio_context(blk); - BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk)); - -- aio_context_acquire(aio_context); - if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) { - int ret; - - ret = bdrv_commit(unfiltered_bs); - if (ret < 0) { -- aio_context_release(aio_context); - return ret; - } - } -- aio_context_release(aio_context); - } - return 0; - } -diff --git a/block/copy-before-write.c b/block/copy-before-write.c -index 13972879b1..0842a1a6df 100644 ---- a/block/copy-before-write.c -+++ b/block/copy-before-write.c -@@ -412,7 +412,6 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, - int64_t cluster_size; - g_autoptr(BlockdevOptions) full_opts = NULL; - BlockdevOptionsCbw *opts; -- AioContext *ctx; - int ret; - - full_opts = cbw_parse_options(options, errp); -@@ -435,15 +434,11 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, - - GRAPH_RDLOCK_GUARD_MAINLOOP(); - -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); -- - if (opts->bitmap) { - bitmap = block_dirty_bitmap_lookup(opts->bitmap->node, - opts->bitmap->name, NULL, errp); - if (!bitmap) { -- ret = -EINVAL; -- goto out; -+ return -EINVAL; - } - } - s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error : -@@ -461,24 +456,21 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, - s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp); - if (!s->bcs) { - error_prepend(errp, "Cannot create block-copy-state: "); -- ret = -EINVAL; -- goto out; -+ return -EINVAL; - } - - cluster_size = block_copy_cluster_size(s->bcs); - - s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); - if (!s->done_bitmap) { -- ret = -EINVAL; -- goto out; -+ return -EINVAL; - } - bdrv_disable_dirty_bitmap(s->done_bitmap); - - /* s->access_bitmap starts equal to bcs bitmap */ - s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); - if (!s->access_bitmap) { -- ret = -EINVAL; -- goto out; -+ return -EINVAL; - } - bdrv_disable_dirty_bitmap(s->access_bitmap); - bdrv_dirty_bitmap_merge_internal(s->access_bitmap, -@@ -487,11 +479,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, - - qemu_co_mutex_init(&s->lock); - QLIST_INIT(&s->frozen_read_reqs); -- -- ret = 0; --out: -- aio_context_release(ctx); -- return ret; -+ return 0; - } - - static void cbw_close(BlockDriverState *bs) -diff --git a/block/export/export.c b/block/export/export.c -index a8f274e526..6d51ae8ed7 100644 ---- a/block/export/export.c -+++ b/block/export/export.c -@@ -114,7 +114,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) - } - - ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - - if (export->iothread) { - IOThread *iothread; -@@ -133,8 +132,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) - set_context_errp = fixed_iothread ? errp : NULL; - ret = bdrv_try_change_aio_context(bs, new_ctx, NULL, set_context_errp); - if (ret == 0) { -- aio_context_release(ctx); -- aio_context_acquire(new_ctx); - ctx = new_ctx; - } else if (fixed_iothread) { - goto fail; -@@ -191,8 +188,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) - assert(exp->blk != NULL); - - QLIST_INSERT_HEAD(&block_exports, exp, next); -- -- aio_context_release(ctx); - return exp; - - fail: -@@ -200,7 +195,6 @@ fail: - blk_set_dev_ops(blk, NULL, NULL); - blk_unref(blk); - } -- aio_context_release(ctx); - if (exp) { - g_free(exp->id); - g_free(exp); -@@ -218,9 +212,6 @@ void blk_exp_ref(BlockExport *exp) - static void blk_exp_delete_bh(void *opaque) - { - BlockExport *exp = opaque; -- AioContext *aio_context = exp->ctx; -- -- aio_context_acquire(aio_context); - - assert(exp->refcount == 0); - QLIST_REMOVE(exp, next); -@@ -230,8 +221,6 @@ static void blk_exp_delete_bh(void *opaque) - qapi_event_send_block_export_deleted(exp->id); - g_free(exp->id); - g_free(exp); -- -- aio_context_release(aio_context); - } - - void blk_exp_unref(BlockExport *exp) -@@ -249,22 +238,16 @@ void blk_exp_unref(BlockExport *exp) - * connections and other internally held references start to shut down. When - * the function returns, there may still be active references while the export - * is in the process of shutting down. -- * -- * Acquires exp->ctx internally. Callers must *not* hold the lock. - */ - void blk_exp_request_shutdown(BlockExport *exp) - { -- AioContext *aio_context = exp->ctx; -- -- aio_context_acquire(aio_context); -- - /* - * If the user doesn't own the export any more, it is already shutting - * down. We must not call .request_shutdown and decrease the refcount a - * second time. - */ - if (!exp->user_owned) { -- goto out; -+ return; - } - - exp->drv->request_shutdown(exp); -@@ -272,9 +255,6 @@ void blk_exp_request_shutdown(BlockExport *exp) - assert(exp->user_owned); - exp->user_owned = false; - blk_exp_unref(exp); -- --out: -- aio_context_release(aio_context); - } - - /* -diff --git a/block/io.c b/block/io.c -index 7e62fabbf5..8fa7670571 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -294,8 +294,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) - BlockDriverState *bs = data->bs; - - if (bs) { -- AioContext *ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { - bdrv_do_drained_begin(bs, data->parent, data->poll); -@@ -303,7 +301,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) - assert(!data->poll); - bdrv_do_drained_end(bs, data->parent); - } -- aio_context_release(ctx); - } else { - assert(data->begin); - bdrv_drain_all_begin(); -@@ -320,8 +317,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - { - BdrvCoDrainData data; - Coroutine *self = qemu_coroutine_self(); -- AioContext *ctx = bdrv_get_aio_context(bs); -- AioContext *co_ctx = qemu_coroutine_get_aio_context(self); - - /* Calling bdrv_drain() from a BH ensures the current coroutine yields and - * other coroutines run if they were queued by aio_co_enter(). */ -@@ -340,17 +335,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bdrv_inc_in_flight(bs); - } - -- /* -- * Temporarily drop the lock across yield or we would get deadlocks. -- * bdrv_co_drain_bh_cb() reaquires the lock as needed. -- * -- * When we yield below, the lock for the current context will be -- * released, so if this is actually the lock that protects bs, don't drop -- * it a second time. -- */ -- if (ctx != co_ctx) { -- aio_context_release(ctx); -- } - replay_bh_schedule_oneshot_event(qemu_get_aio_context(), - bdrv_co_drain_bh_cb, &data); - -@@ -358,11 +342,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - /* If we are resumed from some other event (such as an aio completion or a - * timer callback), it is a bug in the caller that should be fixed. */ - assert(data.done); -- -- /* Reacquire the AioContext of bs if we dropped it */ -- if (ctx != co_ctx) { -- aio_context_acquire(ctx); -- } - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -@@ -478,13 +457,12 @@ static bool bdrv_drain_all_poll(void) - GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); - -- /* bdrv_drain_poll() can't make changes to the graph and we are holding the -- * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ -+ /* -+ * bdrv_drain_poll() can't make changes to the graph and we hold the BQL, -+ * so iterating bdrv_next_all_states() is safe. -+ */ - while ((bs = bdrv_next_all_states(bs))) { -- AioContext *aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - result |= bdrv_drain_poll(bs, NULL, true); -- aio_context_release(aio_context); - } - - return result; -@@ -525,11 +503,7 @@ void bdrv_drain_all_begin_nopoll(void) - /* Quiesce all nodes, without polling in-flight requests yet. The graph - * cannot change during this loop. */ - while ((bs = bdrv_next_all_states(bs))) { -- AioContext *aio_context = bdrv_get_aio_context(bs); -- -- aio_context_acquire(aio_context); - bdrv_do_drained_begin(bs, NULL, false); -- aio_context_release(aio_context); - } - } - -@@ -588,11 +562,7 @@ void bdrv_drain_all_end(void) - } - - while ((bs = bdrv_next_all_states(bs))) { -- AioContext *aio_context = bdrv_get_aio_context(bs); -- -- aio_context_acquire(aio_context); - bdrv_do_drained_end(bs, NULL); -- aio_context_release(aio_context); - } - - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -@@ -2368,15 +2338,10 @@ int bdrv_flush_all(void) - } - - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { -- AioContext *aio_context = bdrv_get_aio_context(bs); -- int ret; -- -- aio_context_acquire(aio_context); -- ret = bdrv_flush(bs); -+ int ret = bdrv_flush(bs); - if (ret < 0 && !result) { - result = ret; - } -- aio_context_release(aio_context); - } - - return result; -diff --git a/block/mirror.c b/block/mirror.c -index 51f9e2f17c..5145eb53e1 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -662,7 +662,6 @@ static int mirror_exit_common(Job *job) - MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); - BlockJob *bjob = &s->common; - MirrorBDSOpaque *bs_opaque; -- AioContext *replace_aio_context = NULL; - BlockDriverState *src; - BlockDriverState *target_bs; - BlockDriverState *mirror_top_bs; -@@ -677,7 +676,6 @@ static int mirror_exit_common(Job *job) - } - s->prepared = true; - -- aio_context_acquire(qemu_get_aio_context()); - bdrv_graph_rdlock_main_loop(); - - mirror_top_bs = s->mirror_top_bs; -@@ -742,11 +740,6 @@ static int mirror_exit_common(Job *job) - } - bdrv_graph_rdunlock_main_loop(); - -- if (s->to_replace) { -- replace_aio_context = bdrv_get_aio_context(s->to_replace); -- aio_context_acquire(replace_aio_context); -- } -- - if (s->should_complete && !abort) { - BlockDriverState *to_replace = s->to_replace ?: src; - bool ro = bdrv_is_read_only(to_replace); -@@ -785,9 +778,6 @@ static int mirror_exit_common(Job *job) - error_free(s->replace_blocker); - bdrv_unref(s->to_replace); - } -- if (replace_aio_context) { -- aio_context_release(replace_aio_context); -- } - g_free(s->replaces); - - /* -@@ -811,8 +801,6 @@ static int mirror_exit_common(Job *job) - bdrv_unref(mirror_top_bs); - bdrv_unref(src); - -- aio_context_release(qemu_get_aio_context()); -- - return ret; - } - -@@ -1191,24 +1179,17 @@ static void mirror_complete(Job *job, Error **errp) - - /* block all operations on to_replace bs */ - if (s->replaces) { -- AioContext *replace_aio_context; -- - s->to_replace = bdrv_find_node(s->replaces); - if (!s->to_replace) { - error_setg(errp, "Node name '%s' not found", s->replaces); - return; - } - -- replace_aio_context = bdrv_get_aio_context(s->to_replace); -- aio_context_acquire(replace_aio_context); -- - /* TODO Translate this into child freeze system. */ - error_setg(&s->replace_blocker, - "block device is in use by block-job-complete"); - bdrv_op_block_all(s->to_replace, s->replace_blocker); - bdrv_ref(s->to_replace); -- -- aio_context_release(replace_aio_context); - } - - s->should_complete = true; -diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c -index 70d01a3776..a738e7bbf7 100644 ---- a/block/monitor/bitmap-qmp-cmds.c -+++ b/block/monitor/bitmap-qmp-cmds.c -@@ -95,7 +95,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -- AioContext *aio_context; - - if (!name || name[0] == '\0') { - error_setg(errp, "Bitmap name cannot be empty"); -@@ -107,14 +106,11 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - if (has_granularity) { - if (granularity < 512 || !is_power_of_2(granularity)) { - error_setg(errp, "Granularity must be power of 2 " - "and at least 512"); -- goto out; -+ return; - } - } else { - /* Default to cluster size, if available: */ -@@ -132,12 +128,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - if (persistent && - !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) - { -- goto out; -+ return; - } - - bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); - if (bitmap == NULL) { -- goto out; -+ return; - } - - if (disabled) { -@@ -145,9 +141,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - } - - bdrv_dirty_bitmap_set_persistence(bitmap, persistent); -- --out: -- aio_context_release(aio_context); - } - - BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, -@@ -157,7 +150,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -- AioContext *aio_context; - - GLOBAL_STATE_CODE(); - -@@ -166,19 +158,14 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, - return NULL; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, - errp)) { -- aio_context_release(aio_context); - return NULL; - } - - if (bdrv_dirty_bitmap_get_persistence(bitmap) && - bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) - { -- aio_context_release(aio_context); - return NULL; - } - -@@ -190,7 +177,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, - *bitmap_bs = bs; - } - -- aio_context_release(aio_context); - return release ? NULL : bitmap; - } - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index c729cbf1eb..bdbb5cb141 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -141,7 +141,6 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) - const char *id = qdict_get_str(qdict, "id"); - BlockBackend *blk; - BlockDriverState *bs; -- AioContext *aio_context; - Error *local_err = NULL; - - GLOBAL_STATE_CODE(); -@@ -168,14 +167,10 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) - return; - } - -- aio_context = blk_get_aio_context(blk); -- aio_context_acquire(aio_context); -- - bs = blk_bs(blk); - if (bs) { - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) { - error_report_err(local_err); -- aio_context_release(aio_context); - return; - } - -@@ -196,8 +191,6 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) - } else { - blk_unref(blk); - } -- -- aio_context_release(aio_context); - } - - void hmp_commit(Monitor *mon, const QDict *qdict) -@@ -213,7 +206,6 @@ void hmp_commit(Monitor *mon, const QDict *qdict) - ret = blk_commit_all(); - } else { - BlockDriverState *bs; -- AioContext *aio_context; - - blk = blk_by_name(device); - if (!blk) { -@@ -222,18 +214,13 @@ void hmp_commit(Monitor *mon, const QDict *qdict) - } - - bs = bdrv_skip_implicit_filters(blk_bs(blk)); -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - if (!blk_is_available(blk)) { - error_report("Device '%s' has no medium", device); -- aio_context_release(aio_context); - return; - } - - ret = bdrv_commit(bs); -- -- aio_context_release(aio_context); - } - if (ret < 0) { - error_report("'commit' error for '%s': %s", device, strerror(-ret)); -@@ -560,7 +547,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) - BlockBackend *blk = NULL; - BlockDriverState *bs = NULL; - BlockBackend *local_blk = NULL; -- AioContext *ctx = NULL; - bool qdev = qdict_get_try_bool(qdict, "qdev", false); - const char *device = qdict_get_str(qdict, "device"); - const char *command = qdict_get_str(qdict, "command"); -@@ -582,9 +568,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) - } - } - -- ctx = blk ? blk_get_aio_context(blk) : bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); -- - if (bs) { - blk = local_blk = blk_new(bdrv_get_aio_context(bs), 0, BLK_PERM_ALL); - ret = blk_insert_bs(blk, bs, &err); -@@ -622,11 +605,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) - - fail: - blk_unref(local_blk); -- -- if (ctx) { -- aio_context_release(ctx); -- } -- - hmp_handle_error(mon, err); - } - -@@ -882,7 +860,6 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) - int nb_sns, i; - int total; - int *global_snapshots; -- AioContext *aio_context; - - typedef struct SnapshotEntry { - QEMUSnapshotInfo sn; -@@ -909,11 +886,8 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) - error_report_err(err); - return; - } -- aio_context = bdrv_get_aio_context(bs); - -- aio_context_acquire(aio_context); - nb_sns = bdrv_snapshot_list(bs, &sn_tab); -- aio_context_release(aio_context); - - if (nb_sns < 0) { - monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns); -@@ -924,9 +898,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) - int bs1_nb_sns = 0; - ImageEntry *ie; - SnapshotEntry *se; -- AioContext *ctx = bdrv_get_aio_context(bs1); - -- aio_context_acquire(ctx); - if (bdrv_can_snapshot(bs1)) { - sn = NULL; - bs1_nb_sns = bdrv_snapshot_list(bs1, &sn); -@@ -944,7 +916,6 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) - } - g_free(sn); - } -- aio_context_release(ctx); - } - - if (no_snapshot) { -diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c -index 1618cd225a..e4282631d2 100644 ---- a/block/qapi-sysemu.c -+++ b/block/qapi-sysemu.c -@@ -174,7 +174,6 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) - { - BlockBackend *blk; - BlockDriverState *bs; -- AioContext *aio_context; - bool has_attached_device; - - GLOBAL_STATE_CODE(); -@@ -204,13 +203,10 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - bdrv_graph_rdlock_main_loop(); - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) { - bdrv_graph_rdunlock_main_loop(); -- goto out; -+ return; - } - bdrv_graph_rdunlock_main_loop(); - -@@ -223,9 +219,6 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) - * value passed here (i.e. false). */ - blk_dev_change_media_cb(blk, false, &error_abort); - } -- --out: -- aio_context_release(aio_context); - } - - void qmp_blockdev_remove_medium(const char *id, Error **errp) -@@ -237,7 +230,6 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, - BlockDriverState *bs, Error **errp) - { - Error *local_err = NULL; -- AioContext *ctx; - bool has_device; - int ret; - -@@ -259,11 +251,7 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, - return; - } - -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - ret = blk_insert_bs(blk, bs, errp); -- aio_context_release(ctx); -- - if (ret < 0) { - return; - } -@@ -374,9 +362,7 @@ void qmp_blockdev_change_medium(const char *device, - qdict_put_str(options, "driver", format); - } - -- aio_context_acquire(qemu_get_aio_context()); - medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp); -- aio_context_release(qemu_get_aio_context()); - - if (!medium_bs) { - goto fail; -@@ -437,20 +423,16 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) - ThrottleConfig cfg; - BlockDriverState *bs; - BlockBackend *blk; -- AioContext *aio_context; - - blk = qmp_get_blk(arg->device, arg->id, errp); - if (!blk) { - return; - } - -- aio_context = blk_get_aio_context(blk); -- aio_context_acquire(aio_context); -- - bs = blk_bs(blk); - if (!bs) { - error_setg(errp, "Device has no medium"); -- goto out; -+ return; - } - - throttle_config_init(&cfg); -@@ -505,7 +487,7 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) - } - - if (!throttle_is_valid(&cfg, errp)) { -- goto out; -+ return; - } - - if (throttle_enabled(&cfg)) { -@@ -522,9 +504,6 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) - /* If all throttling settings are set to 0, disable I/O limits */ - blk_io_limits_disable(blk); - } -- --out: -- aio_context_release(aio_context); - } - - void qmp_block_latency_histogram_set( -diff --git a/block/qapi.c b/block/qapi.c -index 82a30b38fe..9e806fa230 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -234,13 +234,11 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) - int ret; - Error *err = NULL; - -- aio_context_acquire(bdrv_get_aio_context(bs)); -- - size = bdrv_getlength(bs); - if (size < 0) { - error_setg_errno(errp, -size, "Can't get image size '%s'", - bs->exact_filename); -- goto out; -+ return; - } - - bdrv_refresh_filename(bs); -@@ -265,7 +263,7 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) - info->format_specific = bdrv_get_specific_info(bs, &err); - if (err) { - error_propagate(errp, err); -- goto out; -+ return; - } - backing_filename = bs->backing_file; - if (backing_filename[0] != '\0') { -@@ -300,11 +298,8 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) - break; - default: - error_propagate(errp, err); -- goto out; -+ return; - } -- --out: -- aio_context_release(bdrv_get_aio_context(bs)); - } - - /** -@@ -709,15 +704,10 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, - /* Just to be safe if query_nodes is not always initialized */ - if (has_query_nodes && query_nodes) { - for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) { -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- aio_context_acquire(ctx); - QAPI_LIST_APPEND(tail, bdrv_query_bds_stats(bs, false)); -- aio_context_release(ctx); - } - } else { - for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { -- AioContext *ctx = blk_get_aio_context(blk); - BlockStats *s; - char *qdev; - -@@ -725,7 +715,6 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, - continue; - } - -- aio_context_acquire(ctx); - s = bdrv_query_bds_stats(blk_bs(blk), true); - s->device = g_strdup(blk_name(blk)); - -@@ -737,7 +726,6 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, - } - - bdrv_query_blk_stats(s->stats, blk); -- aio_context_release(ctx); - - QAPI_LIST_APPEND(tail, s); - } -diff --git a/block/raw-format.c b/block/raw-format.c -index 1111dffd54..ac7e8495f6 100644 ---- a/block/raw-format.c -+++ b/block/raw-format.c -@@ -470,7 +470,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, - Error **errp) - { - BDRVRawState *s = bs->opaque; -- AioContext *ctx; - bool has_size; - uint64_t offset, size; - BdrvChildRole file_role; -@@ -522,11 +521,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, - bs->file->bs->filename); - } - -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - ret = raw_apply_options(bs, s, offset, has_size, size, errp); -- aio_context_release(ctx); -- - if (ret < 0) { - return ret; - } -diff --git a/block/replication.c b/block/replication.c -index 424b537ff7..ca6bd0a720 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -394,14 +394,7 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, - } - - if (reopen_queue) { -- AioContext *ctx = bdrv_get_aio_context(bs); -- if (ctx != qemu_get_aio_context()) { -- aio_context_release(ctx); -- } - bdrv_reopen_multiple(reopen_queue, errp); -- if (ctx != qemu_get_aio_context()) { -- aio_context_acquire(ctx); -- } - } - } - -@@ -462,14 +455,11 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - BlockDriverState *top_bs; - BdrvChild *active_disk, *hidden_disk, *secondary_disk; - int64_t active_length, hidden_length, disk_length; -- AioContext *aio_context; - Error *local_err = NULL; - BackupPerf perf = { .use_copy_range = true, .max_workers = 1 }; - - GLOBAL_STATE_CODE(); - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - s = bs->opaque; - - if (s->stage == BLOCK_REPLICATION_DONE || -@@ -479,20 +469,17 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - * Ignore the request because the secondary side of replication - * doesn't have to do anything anymore. - */ -- aio_context_release(aio_context); - return; - } - - if (s->stage != BLOCK_REPLICATION_NONE) { - error_setg(errp, "Block replication is running or done"); -- aio_context_release(aio_context); - return; - } - - if (s->mode != mode) { - error_setg(errp, "The parameter mode's value is invalid, needs %d," - " but got %d", s->mode, mode); -- aio_context_release(aio_context); - return; - } - -@@ -505,7 +492,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (!active_disk || !active_disk->bs || !active_disk->bs->backing) { - error_setg(errp, "Active disk doesn't have backing file"); - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(aio_context); - return; - } - -@@ -513,7 +499,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (!hidden_disk->bs || !hidden_disk->bs->backing) { - error_setg(errp, "Hidden disk doesn't have backing file"); - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(aio_context); - return; - } - -@@ -521,7 +506,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) { - error_setg(errp, "The secondary disk doesn't have block backend"); - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(aio_context); - return; - } - bdrv_graph_rdunlock_main_loop(); -@@ -534,7 +518,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - active_length != hidden_length || hidden_length != disk_length) { - error_setg(errp, "Active disk, hidden disk, secondary disk's length" - " are not the same"); -- aio_context_release(aio_context); - return; - } - -@@ -546,7 +529,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - !hidden_disk->bs->drv->bdrv_make_empty) { - error_setg(errp, - "Active disk or hidden disk doesn't support make_empty"); -- aio_context_release(aio_context); - bdrv_graph_rdunlock_main_loop(); - return; - } -@@ -556,7 +538,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - reopen_backing_file(bs, true, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- aio_context_release(aio_context); - return; - } - -@@ -569,7 +550,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (local_err) { - error_propagate(errp, local_err); - bdrv_graph_wrunlock(); -- aio_context_release(aio_context); - return; - } - -@@ -580,7 +560,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (local_err) { - error_propagate(errp, local_err); - bdrv_graph_wrunlock(); -- aio_context_release(aio_context); - return; - } - -@@ -594,7 +573,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - error_setg(errp, "No top_bs or it is invalid"); - bdrv_graph_wrunlock(); - reopen_backing_file(bs, false, NULL); -- aio_context_release(aio_context); - return; - } - bdrv_op_block_all(top_bs, s->blocker); -@@ -612,13 +590,11 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (local_err) { - error_propagate(errp, local_err); - backup_job_cleanup(bs); -- aio_context_release(aio_context); - return; - } - job_start(&s->backup_job->job); - break; - default: -- aio_context_release(aio_context); - abort(); - } - -@@ -629,18 +605,12 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - } - - s->error = 0; -- aio_context_release(aio_context); - } - - static void replication_do_checkpoint(ReplicationState *rs, Error **errp) - { - BlockDriverState *bs = rs->opaque; -- BDRVReplicationState *s; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- s = bs->opaque; -+ BDRVReplicationState *s = bs->opaque; - - if (s->stage == BLOCK_REPLICATION_DONE || - s->stage == BLOCK_REPLICATION_FAILOVER) { -@@ -649,38 +619,28 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp) - * Ignore the request because the secondary side of replication - * doesn't have to do anything anymore. - */ -- aio_context_release(aio_context); - return; - } - - if (s->mode == REPLICATION_MODE_SECONDARY) { - secondary_do_checkpoint(bs, errp); - } -- aio_context_release(aio_context); - } - - static void replication_get_error(ReplicationState *rs, Error **errp) - { - BlockDriverState *bs = rs->opaque; -- BDRVReplicationState *s; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- s = bs->opaque; -+ BDRVReplicationState *s = bs->opaque; - - if (s->stage == BLOCK_REPLICATION_NONE) { - error_setg(errp, "Block replication is not running"); -- aio_context_release(aio_context); - return; - } - - if (s->error) { - error_setg(errp, "I/O error occurred"); -- aio_context_release(aio_context); - return; - } -- aio_context_release(aio_context); - } - - static void replication_done(void *opaque, int ret) -@@ -708,12 +668,7 @@ static void replication_done(void *opaque, int ret) - static void replication_stop(ReplicationState *rs, bool failover, Error **errp) - { - BlockDriverState *bs = rs->opaque; -- BDRVReplicationState *s; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- s = bs->opaque; -+ BDRVReplicationState *s = bs->opaque; - - if (s->stage == BLOCK_REPLICATION_DONE || - s->stage == BLOCK_REPLICATION_FAILOVER) { -@@ -722,13 +677,11 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) - * Ignore the request because the secondary side of replication - * doesn't have to do anything anymore. - */ -- aio_context_release(aio_context); - return; - } - - if (s->stage != BLOCK_REPLICATION_RUNNING) { - error_setg(errp, "Block replication is not running"); -- aio_context_release(aio_context); - return; - } - -@@ -744,15 +697,12 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) - * disk, secondary disk in backup_job_completed(). - */ - if (s->backup_job) { -- aio_context_release(aio_context); - job_cancel_sync(&s->backup_job->job, true); -- aio_context_acquire(aio_context); - } - - if (!failover) { - secondary_do_checkpoint(bs, errp); - s->stage = BLOCK_REPLICATION_DONE; -- aio_context_release(aio_context); - return; - } - -@@ -765,10 +715,8 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) - bdrv_graph_rdunlock_main_loop(); - break; - default: -- aio_context_release(aio_context); - abort(); - } -- aio_context_release(aio_context); - } - - static const char *const replication_strong_runtime_opts[] = { -diff --git a/block/snapshot.c b/block/snapshot.c -index e486d3e205..a28f2b039f 100644 ---- a/block/snapshot.c -+++ b/block/snapshot.c -@@ -525,9 +525,7 @@ static bool GRAPH_RDLOCK bdrv_all_snapshots_includes_bs(BlockDriverState *bs) - return bdrv_has_blk(bs) || QLIST_EMPTY(&bs->parents); - } - --/* Group operations. All block drivers are involved. -- * These functions will properly handle dataplane (take aio_context_acquire -- * when appropriate for appropriate block drivers) */ -+/* Group operations. All block drivers are involved. */ - - bool bdrv_all_can_snapshot(bool has_devices, strList *devices, - Error **errp) -@@ -545,14 +543,11 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - bool ok = true; - -- aio_context_acquire(ctx); - if (devices || bdrv_all_snapshots_includes_bs(bs)) { - ok = bdrv_can_snapshot(bs); - } -- aio_context_release(ctx); - if (!ok) { - error_setg(errp, "Device '%s' is writable but does not support " - "snapshots", bdrv_get_device_or_node_name(bs)); -@@ -582,18 +577,15 @@ int bdrv_all_delete_snapshot(const char *name, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - QEMUSnapshotInfo sn1, *snapshot = &sn1; - int ret = 0; - -- aio_context_acquire(ctx); - if ((devices || bdrv_all_snapshots_includes_bs(bs)) && - bdrv_snapshot_find(bs, snapshot, name) >= 0) - { - ret = bdrv_snapshot_delete(bs, snapshot->id_str, - snapshot->name, errp); - } -- aio_context_release(ctx); - if (ret < 0) { - error_prepend(errp, "Could not delete snapshot '%s' on '%s': ", - name, bdrv_get_device_or_node_name(bs)); -@@ -628,17 +620,14 @@ int bdrv_all_goto_snapshot(const char *name, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - bool all_snapshots_includes_bs; - -- aio_context_acquire(ctx); - bdrv_graph_rdlock_main_loop(); - all_snapshots_includes_bs = bdrv_all_snapshots_includes_bs(bs); - bdrv_graph_rdunlock_main_loop(); - - ret = (devices || all_snapshots_includes_bs) ? - bdrv_snapshot_goto(bs, name, errp) : 0; -- aio_context_release(ctx); - if (ret < 0) { - bdrv_graph_rdlock_main_loop(); - error_prepend(errp, "Could not load snapshot '%s' on '%s': ", -@@ -670,15 +659,12 @@ int bdrv_all_has_snapshot(const char *name, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - QEMUSnapshotInfo sn; - int ret = 0; - -- aio_context_acquire(ctx); - if (devices || bdrv_all_snapshots_includes_bs(bs)) { - ret = bdrv_snapshot_find(bs, &sn, name); - } -- aio_context_release(ctx); - if (ret < 0) { - if (ret == -ENOENT) { - return 0; -@@ -715,10 +701,8 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - int ret = 0; - -- aio_context_acquire(ctx); - if (bs == vm_state_bs) { - sn->vm_state_size = vm_state_size; - ret = bdrv_snapshot_create(bs, sn); -@@ -726,7 +710,6 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, - sn->vm_state_size = 0; - ret = bdrv_snapshot_create(bs, sn); - } -- aio_context_release(ctx); - if (ret < 0) { - error_setg(errp, "Could not create snapshot '%s' on '%s'", - sn->name, bdrv_get_device_or_node_name(bs)); -@@ -757,13 +740,10 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs, - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; -- AioContext *ctx = bdrv_get_aio_context(bs); - bool found = false; - -- aio_context_acquire(ctx); - found = (devices || bdrv_all_snapshots_includes_bs(bs)) && - bdrv_can_snapshot(bs); -- aio_context_release(ctx); - - if (vmstate_bs) { - if (g_str_equal(vmstate_bs, -diff --git a/block/write-threshold.c b/block/write-threshold.c -index 76d8885677..56fe88de81 100644 ---- a/block/write-threshold.c -+++ b/block/write-threshold.c -@@ -33,7 +33,6 @@ void qmp_block_set_write_threshold(const char *node_name, - Error **errp) - { - BlockDriverState *bs; -- AioContext *aio_context; - - bs = bdrv_find_node(node_name); - if (!bs) { -@@ -41,12 +40,7 @@ void qmp_block_set_write_threshold(const char *node_name, - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - bdrv_write_threshold_set(bs, threshold_bytes); -- -- aio_context_release(aio_context); - } - - void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset, -diff --git a/blockdev.c b/blockdev.c -index 9e1381169d..5d8b3a23eb 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -662,7 +662,6 @@ err_no_opts: - /* Takes the ownership of bs_opts */ - BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) - { -- BlockDriverState *bs; - int bdrv_flags = 0; - - GLOBAL_STATE_CODE(); -@@ -677,11 +676,7 @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) - bdrv_flags |= BDRV_O_INACTIVE; - } - -- aio_context_acquire(qemu_get_aio_context()); -- bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); -- aio_context_release(qemu_get_aio_context()); -- -- return bs; -+ return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); - } - - void blockdev_close_all_bdrv_states(void) -@@ -690,11 +685,7 @@ void blockdev_close_all_bdrv_states(void) - - GLOBAL_STATE_CODE(); - QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) { -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- aio_context_acquire(ctx); - bdrv_unref(bs); -- aio_context_release(ctx); - } - } - -@@ -1048,7 +1039,6 @@ fail: - static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp) - { - BlockDriverState *bs; -- AioContext *aio_context; - - GRAPH_RDLOCK_GUARD_MAINLOOP(); - -@@ -1062,16 +1052,11 @@ static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp) - return NULL; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - if (!bdrv_is_inserted(bs)) { - error_setg(errp, "Device has no medium"); - bs = NULL; - } - -- aio_context_release(aio_context); -- - return bs; - } - -@@ -1141,7 +1126,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, - Error **errp) - { - BlockDriverState *bs; -- AioContext *aio_context; - QEMUSnapshotInfo sn; - Error *local_err = NULL; - SnapshotInfo *info = NULL; -@@ -1154,39 +1138,35 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, - if (!bs) { - return NULL; - } -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - if (!id && !name) { - error_setg(errp, "Name or id must be provided"); -- goto out_aio_context; -+ return NULL; - } - - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) { -- goto out_aio_context; -+ return NULL; - } - - ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- goto out_aio_context; -+ return NULL; - } - if (!ret) { - error_setg(errp, - "Snapshot with id '%s' and name '%s' does not exist on " - "device '%s'", - STR_OR_NULL(id), STR_OR_NULL(name), device); -- goto out_aio_context; -+ return NULL; - } - - bdrv_snapshot_delete(bs, id, name, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- goto out_aio_context; -+ return NULL; - } - -- aio_context_release(aio_context); -- - info = g_new0(SnapshotInfo, 1); - info->id = g_strdup(sn.id_str); - info->name = g_strdup(sn.name); -@@ -1201,10 +1181,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, - } - - return info; -- --out_aio_context: -- aio_context_release(aio_context); -- return NULL; - } - - /* internal snapshot private data */ -@@ -1232,7 +1208,6 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - bool ret; - int64_t rt; - InternalSnapshotState *state = g_new0(InternalSnapshotState, 1); -- AioContext *aio_context; - int ret1; - - GLOBAL_STATE_CODE(); -@@ -1248,33 +1223,30 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - state->bs = bs; - - /* Paired with .clean() */ - bdrv_drained_begin(bs); - - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) { -- goto out; -+ return; - } - - if (bdrv_is_read_only(bs)) { - error_setg(errp, "Device '%s' is read only", device); -- goto out; -+ return; - } - - if (!bdrv_can_snapshot(bs)) { - error_setg(errp, "Block format '%s' used by device '%s' " - "does not support internal snapshots", - bs->drv->format_name, device); -- goto out; -+ return; - } - - if (!strlen(name)) { - error_setg(errp, "Name is empty"); -- goto out; -+ return; - } - - /* check whether a snapshot with name exist */ -@@ -1282,12 +1254,12 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - &local_err); - if (local_err) { - error_propagate(errp, local_err); -- goto out; -+ return; - } else if (ret) { - error_setg(errp, - "Snapshot with name '%s' already exists on device '%s'", - name, device); -- goto out; -+ return; - } - - /* 3. take the snapshot */ -@@ -1308,14 +1280,11 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - error_setg_errno(errp, -ret1, - "Failed to create snapshot '%s' on device '%s'", - name, device); -- goto out; -+ return; - } - - /* 4. succeed, mark a snapshot is created */ - state->created = true; -- --out: -- aio_context_release(aio_context); - } - - static void internal_snapshot_abort(void *opaque) -@@ -1323,7 +1292,6 @@ static void internal_snapshot_abort(void *opaque) - InternalSnapshotState *state = opaque; - BlockDriverState *bs = state->bs; - QEMUSnapshotInfo *sn = &state->sn; -- AioContext *aio_context; - Error *local_error = NULL; - - GLOBAL_STATE_CODE(); -@@ -1333,9 +1301,6 @@ static void internal_snapshot_abort(void *opaque) - return; - } - -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); -- - if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) { - error_reportf_err(local_error, - "Failed to delete snapshot with id '%s' and " -@@ -1343,25 +1308,17 @@ static void internal_snapshot_abort(void *opaque) - sn->id_str, sn->name, - bdrv_get_device_name(bs)); - } -- -- aio_context_release(aio_context); - } - - static void internal_snapshot_clean(void *opaque) - { - g_autofree InternalSnapshotState *state = opaque; -- AioContext *aio_context; - - if (!state->bs) { - return; - } - -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); -- - bdrv_drained_end(state->bs); -- -- aio_context_release(aio_context); - } - - /* external snapshot private data */ -@@ -1395,7 +1352,6 @@ static void external_snapshot_action(TransactionAction *action, - /* File name of the new image (for 'blockdev-snapshot-sync') */ - const char *new_image_file; - ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1); -- AioContext *aio_context; - uint64_t perm, shared; - - /* TODO We'll eventually have to take a writer lock in this function */ -@@ -1435,26 +1391,23 @@ static void external_snapshot_action(TransactionAction *action, - return; - } - -- aio_context = bdrv_get_aio_context(state->old_bs); -- aio_context_acquire(aio_context); -- - /* Paired with .clean() */ - bdrv_drained_begin(state->old_bs); - - if (!bdrv_is_inserted(state->old_bs)) { - error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); -- goto out; -+ return; - } - - if (bdrv_op_is_blocked(state->old_bs, - BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, errp)) { -- goto out; -+ return; - } - - if (!bdrv_is_read_only(state->old_bs)) { - if (bdrv_flush(state->old_bs)) { - error_setg(errp, QERR_IO_ERROR); -- goto out; -+ return; - } - } - -@@ -1466,13 +1419,13 @@ static void external_snapshot_action(TransactionAction *action, - - if (node_name && !snapshot_node_name) { - error_setg(errp, "New overlay node-name missing"); -- goto out; -+ return; - } - - if (snapshot_node_name && - bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) { - error_setg(errp, "New overlay node-name already in use"); -- goto out; -+ return; - } - - flags = state->old_bs->open_flags; -@@ -1485,20 +1438,18 @@ static void external_snapshot_action(TransactionAction *action, - int64_t size = bdrv_getlength(state->old_bs); - if (size < 0) { - error_setg_errno(errp, -size, "bdrv_getlength failed"); -- goto out; -+ return; - } - bdrv_refresh_filename(state->old_bs); - -- aio_context_release(aio_context); - bdrv_img_create(new_image_file, format, - state->old_bs->filename, - state->old_bs->drv->format_name, - NULL, size, flags, false, &local_err); -- aio_context_acquire(aio_context); - - if (local_err) { - error_propagate(errp, local_err); -- goto out; -+ return; - } - } - -@@ -1508,20 +1459,15 @@ static void external_snapshot_action(TransactionAction *action, - } - qdict_put_str(options, "driver", format); - } -- aio_context_release(aio_context); - -- aio_context_acquire(qemu_get_aio_context()); - state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags, - errp); -- aio_context_release(qemu_get_aio_context()); - - /* We will manually add the backing_hd field to the bs later */ - if (!state->new_bs) { - return; - } - -- aio_context_acquire(aio_context); -- - /* - * Allow attaching a backing file to an overlay that's already in use only - * if the parents don't assume that they are already seeing a valid image. -@@ -1530,41 +1476,34 @@ static void external_snapshot_action(TransactionAction *action, - bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); - if (perm & BLK_PERM_CONSISTENT_READ) { - error_setg(errp, "The overlay is already in use"); -- goto out; -+ return; - } - - if (state->new_bs->drv->is_filter) { - error_setg(errp, "Filters cannot be used as overlays"); -- goto out; -+ return; - } - - if (bdrv_cow_child(state->new_bs)) { - error_setg(errp, "The overlay already has a backing image"); -- goto out; -+ return; - } - - if (!state->new_bs->drv->supports_backing) { - error_setg(errp, "The overlay does not support backing images"); -- goto out; -+ return; - } - - ret = bdrv_append(state->new_bs, state->old_bs, errp); - if (ret < 0) { -- goto out; -+ return; - } - state->overlay_appended = true; -- --out: -- aio_context_release(aio_context); - } - - static void external_snapshot_commit(void *opaque) - { - ExternalSnapshotState *state = opaque; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(state->old_bs); -- aio_context_acquire(aio_context); - - /* We don't need (or want) to use the transactional - * bdrv_reopen_multiple() across all the entries at once, because we -@@ -1572,8 +1511,6 @@ static void external_snapshot_commit(void *opaque) - if (!qatomic_read(&state->old_bs->copy_on_read)) { - bdrv_reopen_set_read_only(state->old_bs, true, NULL); - } -- -- aio_context_release(aio_context); - } - - static void external_snapshot_abort(void *opaque) -@@ -1586,7 +1523,6 @@ static void external_snapshot_abort(void *opaque) - int ret; - - aio_context = bdrv_get_aio_context(state->old_bs); -- aio_context_acquire(aio_context); - - bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() - close state->old_bs; we need it */ -@@ -1599,15 +1535,9 @@ static void external_snapshot_abort(void *opaque) - */ - tmp_context = bdrv_get_aio_context(state->old_bs); - if (aio_context != tmp_context) { -- aio_context_release(aio_context); -- aio_context_acquire(tmp_context); -- - ret = bdrv_try_change_aio_context(state->old_bs, - aio_context, NULL, NULL); - assert(ret == 0); -- -- aio_context_release(tmp_context); -- aio_context_acquire(aio_context); - } - - bdrv_drained_begin(state->new_bs); -@@ -1617,8 +1547,6 @@ static void external_snapshot_abort(void *opaque) - bdrv_drained_end(state->new_bs); - - bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ -- -- aio_context_release(aio_context); - } - } - } -@@ -1626,19 +1554,13 @@ static void external_snapshot_abort(void *opaque) - static void external_snapshot_clean(void *opaque) - { - g_autofree ExternalSnapshotState *state = opaque; -- AioContext *aio_context; - - if (!state->old_bs) { - return; - } - -- aio_context = bdrv_get_aio_context(state->old_bs); -- aio_context_acquire(aio_context); -- - bdrv_drained_end(state->old_bs); - bdrv_unref(state->new_bs); -- -- aio_context_release(aio_context); - } - - typedef struct DriveBackupState { -@@ -1670,7 +1592,6 @@ static void drive_backup_action(DriveBackup *backup, - BlockDriverState *target_bs; - BlockDriverState *source = NULL; - AioContext *aio_context; -- AioContext *old_context; - const char *format; - QDict *options; - Error *local_err = NULL; -@@ -1698,7 +1619,6 @@ static void drive_backup_action(DriveBackup *backup, - } - - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - state->bs = bs; - /* Paired with .clean() */ -@@ -1713,7 +1633,7 @@ static void drive_backup_action(DriveBackup *backup, - bdrv_graph_rdlock_main_loop(); - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { - bdrv_graph_rdunlock_main_loop(); -- goto out; -+ return; - } - - flags = bs->open_flags | BDRV_O_RDWR; -@@ -1744,7 +1664,7 @@ static void drive_backup_action(DriveBackup *backup, - size = bdrv_getlength(bs); - if (size < 0) { - error_setg_errno(errp, -size, "bdrv_getlength failed"); -- goto out; -+ return; - } - - if (backup->mode != NEW_IMAGE_MODE_EXISTING) { -@@ -1770,7 +1690,7 @@ static void drive_backup_action(DriveBackup *backup, - - if (local_err) { - error_propagate(errp, local_err); -- goto out; -+ return; - } - - options = qdict_new(); -@@ -1779,30 +1699,18 @@ static void drive_backup_action(DriveBackup *backup, - if (format) { - qdict_put_str(options, "driver", format); - } -- aio_context_release(aio_context); - -- aio_context_acquire(qemu_get_aio_context()); - target_bs = bdrv_open(backup->target, NULL, options, flags, errp); -- aio_context_release(qemu_get_aio_context()); -- - if (!target_bs) { - return; - } - -- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ -- old_context = bdrv_get_aio_context(target_bs); -- aio_context_acquire(old_context); -- - ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); - if (ret < 0) { - bdrv_unref(target_bs); -- aio_context_release(old_context); - return; - } - -- aio_context_release(old_context); -- aio_context_acquire(aio_context); -- - if (set_backing_hd) { - if (bdrv_set_backing_hd(target_bs, source, errp) < 0) { - goto unref; -@@ -1815,22 +1723,14 @@ static void drive_backup_action(DriveBackup *backup, - - unref: - bdrv_unref(target_bs); --out: -- aio_context_release(aio_context); - } - - static void drive_backup_commit(void *opaque) - { - DriveBackupState *state = opaque; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); - - assert(state->job); - job_start(&state->job->job); -- -- aio_context_release(aio_context); - } - - static void drive_backup_abort(void *opaque) -@@ -1845,18 +1745,12 @@ static void drive_backup_abort(void *opaque) - static void drive_backup_clean(void *opaque) - { - g_autofree DriveBackupState *state = opaque; -- AioContext *aio_context; - - if (!state->bs) { - return; - } - -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); -- - bdrv_drained_end(state->bs); -- -- aio_context_release(aio_context); - } - - typedef struct BlockdevBackupState { -@@ -1881,7 +1775,6 @@ static void blockdev_backup_action(BlockdevBackup *backup, - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -- AioContext *old_context; - int ret; - - tran_add(tran, &blockdev_backup_drv, state); -@@ -1898,17 +1791,12 @@ static void blockdev_backup_action(BlockdevBackup *backup, - - /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ - aio_context = bdrv_get_aio_context(bs); -- old_context = bdrv_get_aio_context(target_bs); -- aio_context_acquire(old_context); - - ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); - if (ret < 0) { -- aio_context_release(old_context); - return; - } - -- aio_context_release(old_context); -- aio_context_acquire(aio_context); - state->bs = bs; - - /* Paired with .clean() */ -@@ -1917,22 +1805,14 @@ static void blockdev_backup_action(BlockdevBackup *backup, - state->job = do_backup_common(qapi_BlockdevBackup_base(backup), - bs, target_bs, aio_context, - block_job_txn, errp); -- -- aio_context_release(aio_context); - } - - static void blockdev_backup_commit(void *opaque) - { - BlockdevBackupState *state = opaque; -- AioContext *aio_context; -- -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); - - assert(state->job); - job_start(&state->job->job); -- -- aio_context_release(aio_context); - } - - static void blockdev_backup_abort(void *opaque) -@@ -1947,18 +1827,12 @@ static void blockdev_backup_abort(void *opaque) - static void blockdev_backup_clean(void *opaque) - { - g_autofree BlockdevBackupState *state = opaque; -- AioContext *aio_context; - - if (!state->bs) { - return; - } - -- aio_context = bdrv_get_aio_context(state->bs); -- aio_context_acquire(aio_context); -- - bdrv_drained_end(state->bs); -- -- aio_context_release(aio_context); - } - - typedef struct BlockDirtyBitmapState { -@@ -2454,7 +2328,6 @@ void qmp_block_stream(const char *job_id, const char *device, - } - - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - bdrv_graph_rdlock_main_loop(); - if (base) { -@@ -2521,7 +2394,7 @@ void qmp_block_stream(const char *job_id, const char *device, - if (!base_bs && backing_file) { - error_setg(errp, "backing file specified, but streaming the " - "entire chain"); -- goto out; -+ return; - } - - if (has_auto_finalize && !auto_finalize) { -@@ -2536,18 +2409,14 @@ void qmp_block_stream(const char *job_id, const char *device, - filter_node_name, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- goto out; -+ return; - } - - trace_qmp_block_stream(bs); -- --out: -- aio_context_release(aio_context); - return; - - out_rdlock: - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(aio_context); - } - - void qmp_block_commit(const char *job_id, const char *device, -@@ -2606,10 +2475,9 @@ void qmp_block_commit(const char *job_id, const char *device, - } - - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) { -- goto out; -+ return; - } - - /* default top_bs is the active layer */ -@@ -2617,16 +2485,16 @@ void qmp_block_commit(const char *job_id, const char *device, - - if (top_node && top) { - error_setg(errp, "'top-node' and 'top' are mutually exclusive"); -- goto out; -+ return; - } else if (top_node) { - top_bs = bdrv_lookup_bs(NULL, top_node, errp); - if (top_bs == NULL) { -- goto out; -+ return; - } - if (!bdrv_chain_contains(bs, top_bs)) { - error_setg(errp, "'%s' is not in this backing file chain", - top_node); -- goto out; -+ return; - } - } else if (top) { - /* This strcmp() is just a shortcut, there is no need to -@@ -2640,35 +2508,35 @@ void qmp_block_commit(const char *job_id, const char *device, - - if (top_bs == NULL) { - error_setg(errp, "Top image file %s not found", top ? top : "NULL"); -- goto out; -+ return; - } - - assert(bdrv_get_aio_context(top_bs) == aio_context); - - if (base_node && base) { - error_setg(errp, "'base-node' and 'base' are mutually exclusive"); -- goto out; -+ return; - } else if (base_node) { - base_bs = bdrv_lookup_bs(NULL, base_node, errp); - if (base_bs == NULL) { -- goto out; -+ return; - } - if (!bdrv_chain_contains(top_bs, base_bs)) { - error_setg(errp, "'%s' is not in this backing file chain", - base_node); -- goto out; -+ return; - } - } else if (base) { - base_bs = bdrv_find_backing_image(top_bs, base); - if (base_bs == NULL) { - error_setg(errp, "Can't find '%s' in the backing chain", base); -- goto out; -+ return; - } - } else { - base_bs = bdrv_find_base(top_bs); - if (base_bs == NULL) { - error_setg(errp, "There is no backimg image"); -- goto out; -+ return; - } - } - -@@ -2678,14 +2546,14 @@ void qmp_block_commit(const char *job_id, const char *device, - iter = bdrv_filter_or_cow_bs(iter)) - { - if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { -- goto out; -+ return; - } - } - - /* Do not allow attempts to commit an image into itself */ - if (top_bs == base_bs) { - error_setg(errp, "cannot commit an image into itself"); -- goto out; -+ return; - } - - /* -@@ -2708,7 +2576,7 @@ void qmp_block_commit(const char *job_id, const char *device, - error_setg(errp, "'backing-file' specified, but 'top' has a " - "writer on it"); - } -- goto out; -+ return; - } - if (!job_id) { - /* -@@ -2724,7 +2592,7 @@ void qmp_block_commit(const char *job_id, const char *device, - } else { - BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); - if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { -- goto out; -+ return; - } - commit_start(job_id, bs, base_bs, top_bs, job_flags, - speed, on_error, backing_file, -@@ -2732,11 +2600,8 @@ void qmp_block_commit(const char *job_id, const char *device, - } - if (local_err != NULL) { - error_propagate(errp, local_err); -- goto out; -+ return; - } -- --out: -- aio_context_release(aio_context); - } - - /* Common QMP interface for drive-backup and blockdev-backup */ -@@ -2985,8 +2850,6 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - - if (replaces) { - BlockDriverState *to_replace_bs; -- AioContext *aio_context; -- AioContext *replace_aio_context; - int64_t bs_size, replace_size; - - bs_size = bdrv_getlength(bs); -@@ -3000,19 +2863,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- replace_aio_context = bdrv_get_aio_context(to_replace_bs); -- /* -- * bdrv_getlength() is a co-wrapper and uses AIO_WAIT_WHILE. Be sure not -- * to acquire the same AioContext twice. -- */ -- if (replace_aio_context != aio_context) { -- aio_context_acquire(replace_aio_context); -- } - replace_size = bdrv_getlength(to_replace_bs); -- if (replace_aio_context != aio_context) { -- aio_context_release(replace_aio_context); -- } - - if (replace_size < 0) { - error_setg_errno(errp, -replace_size, -@@ -3041,7 +2892,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - BlockDriverState *bs; - BlockDriverState *target_backing_bs, *target_bs; - AioContext *aio_context; -- AioContext *old_context; - BlockMirrorBackingMode backing_mode; - Error *local_err = NULL; - QDict *options = NULL; -@@ -3064,7 +2914,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - } - - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - if (!arg->has_mode) { - arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -@@ -3088,14 +2937,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - size = bdrv_getlength(bs); - if (size < 0) { - error_setg_errno(errp, -size, "bdrv_getlength failed"); -- goto out; -+ return; - } - - if (arg->replaces) { - if (!arg->node_name) { - error_setg(errp, "a node-name must be provided when replacing a" - " named node of the graph"); -- goto out; -+ return; - } - } - -@@ -3143,7 +2992,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - - if (local_err) { - error_propagate(errp, local_err); -- goto out; -+ return; - } - - options = qdict_new(); -@@ -3153,15 +3002,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - if (format) { - qdict_put_str(options, "driver", format); - } -- aio_context_release(aio_context); - - /* Mirroring takes care of copy-on-write using the source's backing - * file. - */ -- aio_context_acquire(qemu_get_aio_context()); - target_bs = bdrv_open(arg->target, NULL, options, flags, errp); -- aio_context_release(qemu_get_aio_context()); -- - if (!target_bs) { - return; - } -@@ -3173,20 +3018,12 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - bdrv_graph_rdunlock_main_loop(); - - -- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ -- old_context = bdrv_get_aio_context(target_bs); -- aio_context_acquire(old_context); -- - ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); - if (ret < 0) { - bdrv_unref(target_bs); -- aio_context_release(old_context); - return; - } - -- aio_context_release(old_context); -- aio_context_acquire(aio_context); -- - blockdev_mirror_common(arg->job_id, bs, target_bs, - arg->replaces, arg->sync, - backing_mode, zero_target, -@@ -3202,8 +3039,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - arg->has_auto_dismiss, arg->auto_dismiss, - errp); - bdrv_unref(target_bs); --out: -- aio_context_release(aio_context); - } - - void qmp_blockdev_mirror(const char *job_id, -@@ -3226,7 +3061,6 @@ void qmp_blockdev_mirror(const char *job_id, - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -- AioContext *old_context; - BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; - bool zero_target; - int ret; -@@ -3243,18 +3077,11 @@ void qmp_blockdev_mirror(const char *job_id, - - zero_target = (sync == MIRROR_SYNC_MODE_FULL); - -- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ -- old_context = bdrv_get_aio_context(target_bs); - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(old_context); - - ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); -- -- aio_context_release(old_context); -- aio_context_acquire(aio_context); -- - if (ret < 0) { -- goto out; -+ return; - } - - blockdev_mirror_common(job_id, bs, target_bs, -@@ -3269,8 +3096,6 @@ void qmp_blockdev_mirror(const char *job_id, - has_auto_finalize, auto_finalize, - has_auto_dismiss, auto_dismiss, - errp); --out: -- aio_context_release(aio_context); - } - - /* -@@ -3433,7 +3258,6 @@ void qmp_change_backing_file(const char *device, - Error **errp) - { - BlockDriverState *bs = NULL; -- AioContext *aio_context; - BlockDriverState *image_bs = NULL; - Error *local_err = NULL; - bool ro; -@@ -3444,9 +3268,6 @@ void qmp_change_backing_file(const char *device, - return; - } - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- - bdrv_graph_rdlock_main_loop(); - - image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err); -@@ -3485,7 +3306,7 @@ void qmp_change_backing_file(const char *device, - - if (ro) { - if (bdrv_reopen_set_read_only(image_bs, false, errp) != 0) { -- goto out; -+ return; - } - } - -@@ -3503,14 +3324,10 @@ void qmp_change_backing_file(const char *device, - if (ro) { - bdrv_reopen_set_read_only(image_bs, true, errp); - } -- --out: -- aio_context_release(aio_context); - return; - - out_rdlock: - bdrv_graph_rdunlock_main_loop(); -- aio_context_release(aio_context); - } - - void qmp_blockdev_add(BlockdevOptions *options, Error **errp) -@@ -3550,7 +3367,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - for (; reopen_list != NULL; reopen_list = reopen_list->next) { - BlockdevOptions *options = reopen_list->value; - BlockDriverState *bs; -- AioContext *ctx; - QObject *obj; - Visitor *v; - QDict *qdict; -@@ -3578,12 +3394,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - - qdict_flatten(qdict); - -- ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); -- - queue = bdrv_reopen_queue(queue, bs, qdict, false); -- -- aio_context_release(ctx); - } - - /* Perform the reopen operation */ -@@ -3596,7 +3407,6 @@ fail: - - void qmp_blockdev_del(const char *node_name, Error **errp) - { -- AioContext *aio_context; - BlockDriverState *bs; - - GLOBAL_STATE_CODE(); -@@ -3611,30 +3421,25 @@ void qmp_blockdev_del(const char *node_name, Error **errp) - error_setg(errp, "Node %s is in use", node_name); - return; - } -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) { -- goto out; -+ return; - } - - if (!QTAILQ_IN_USE(bs, monitor_list)) { - error_setg(errp, "Node %s is not owned by the monitor", - bs->node_name); -- goto out; -+ return; - } - - if (bs->refcnt > 1) { - error_setg(errp, "Block device %s is in use", - bdrv_get_device_or_node_name(bs)); -- goto out; -+ return; - } - - QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list); - bdrv_unref(bs); -- --out: -- aio_context_release(aio_context); - } - - static BdrvChild * GRAPH_RDLOCK -@@ -3724,7 +3529,6 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp) - void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, - bool has_force, bool force, Error **errp) - { -- AioContext *old_context; - AioContext *new_context; - BlockDriverState *bs; - -@@ -3756,12 +3560,7 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, - new_context = qemu_get_aio_context(); - } - -- old_context = bdrv_get_aio_context(bs); -- aio_context_acquire(old_context); -- - bdrv_try_change_aio_context(bs, new_context, NULL, errp); -- -- aio_context_release(old_context); - } - - QemuOptsList qemu_common_drive_opts = { -diff --git a/blockjob.c b/blockjob.c -index 7310412313..d5f29e14af 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -198,9 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) - * one to make sure that such a concurrent access does not attempt - * to process an already freed BdrvChild. - */ -- aio_context_release(job->job.aio_context); - bdrv_graph_wrlock(); -- aio_context_acquire(job->job.aio_context); - while (job->nodes) { - GSList *l = job->nodes; - BdrvChild *c = l->data; -@@ -234,28 +232,12 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, - uint64_t perm, uint64_t shared_perm, Error **errp) - { - BdrvChild *c; -- AioContext *ctx = bdrv_get_aio_context(bs); -- bool need_context_ops; - GLOBAL_STATE_CODE(); - - bdrv_ref(bs); - -- need_context_ops = ctx != job->job.aio_context; -- -- if (need_context_ops) { -- if (job->job.aio_context != qemu_get_aio_context()) { -- aio_context_release(job->job.aio_context); -- } -- aio_context_acquire(ctx); -- } - c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job, - errp); -- if (need_context_ops) { -- aio_context_release(ctx); -- if (job->job.aio_context != qemu_get_aio_context()) { -- aio_context_acquire(job->job.aio_context); -- } -- } - if (c == NULL) { - return -EPERM; - } -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index f83bb0f116..7bbbd981ad 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -124,7 +124,6 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - VirtIOBlockDataPlane *s = vblk->dataplane; - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -- AioContext *old_context; - unsigned i; - unsigned nvqs = s->conf->num_queues; - Error *local_err = NULL; -@@ -178,10 +177,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - - trace_virtio_blk_data_plane_start(s); - -- old_context = blk_get_aio_context(s->conf->conf.blk); -- aio_context_acquire(old_context); - r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); -- aio_context_release(old_context); - if (r < 0) { - error_report_err(local_err); - goto fail_aio_context; -@@ -208,13 +204,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - - /* Get this show started by hooking up our callbacks */ - if (!blk_in_drain(s->conf->conf.blk)) { -- aio_context_acquire(s->ctx); - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); - - virtio_queue_aio_attach_host_notifier(vq, s->ctx); - } -- aio_context_release(s->ctx); - } - return 0; - -@@ -314,8 +308,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - */ - vblk->dataplane_started = false; - -- aio_context_acquire(s->ctx); -- - /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ - blk_drain(s->conf->conf.blk); - -@@ -325,8 +317,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - */ - blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); - -- aio_context_release(s->ctx); -- - /* Clean up guest notifier (irq) */ - k->set_guest_notifiers(qbus->parent, nvqs, false); - -diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c -index c4bb28c66f..98501e6885 100644 ---- a/hw/block/dataplane/xen-block.c -+++ b/hw/block/dataplane/xen-block.c -@@ -260,8 +260,6 @@ static void xen_block_complete_aio(void *opaque, int ret) - XenBlockRequest *request = opaque; - XenBlockDataPlane *dataplane = request->dataplane; - -- aio_context_acquire(dataplane->ctx); -- - if (ret != 0) { - error_report("%s I/O error", - request->req.operation == BLKIF_OP_READ ? -@@ -273,10 +271,10 @@ static void xen_block_complete_aio(void *opaque, int ret) - if (request->presync) { - request->presync = 0; - xen_block_do_aio(request); -- goto done; -+ return; - } - if (request->aio_inflight > 0) { -- goto done; -+ return; - } - - switch (request->req.operation) { -@@ -318,9 +316,6 @@ static void xen_block_complete_aio(void *opaque, int ret) - if (dataplane->more_work) { - qemu_bh_schedule(dataplane->bh); - } -- --done: -- aio_context_release(dataplane->ctx); - } - - static bool xen_block_split_discard(XenBlockRequest *request, -@@ -601,9 +596,7 @@ static void xen_block_dataplane_bh(void *opaque) - { - XenBlockDataPlane *dataplane = opaque; - -- aio_context_acquire(dataplane->ctx); - xen_block_handle_requests(dataplane); -- aio_context_release(dataplane->ctx); - } - - static bool xen_block_dataplane_event(void *opaque) -@@ -703,10 +696,8 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) - xen_block_dataplane_detach(dataplane); - } - -- aio_context_acquire(dataplane->ctx); - /* Xen doesn't have multiple users for nodes, so this can't fail */ - blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(dataplane->ctx); - - /* - * Now that the context has been moved onto the main thread, cancel -@@ -752,7 +743,6 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, - { - ERRP_GUARD(); - XenDevice *xendev = dataplane->xendev; -- AioContext *old_context; - unsigned int ring_size; - unsigned int i; - -@@ -836,11 +826,8 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, - goto stop; - } - -- old_context = blk_get_aio_context(dataplane->blk); -- aio_context_acquire(old_context); - /* If other users keep the BlockBackend in the iothread, that's ok */ - blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); -- aio_context_release(old_context); - - if (!blk_in_drain(dataplane->blk)) { - xen_block_dataplane_attach(dataplane); -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index e110f9718b..ec9ed09a6a 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1210,17 +1210,13 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, - static void virtio_blk_reset(VirtIODevice *vdev) - { - VirtIOBlock *s = VIRTIO_BLK(vdev); -- AioContext *ctx; - VirtIOBlockReq *req; - - /* Dataplane has stopped... */ - assert(!s->dataplane_started); - - /* ...but requests may still be in flight. */ -- ctx = blk_get_aio_context(s->blk); -- aio_context_acquire(ctx); - blk_drain(s->blk); -- aio_context_release(ctx); - - /* We drop queued requests after blk_drain() because blk_drain() itself can - * produce them. */ -@@ -1250,10 +1246,6 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) - uint64_t capacity; - int64_t length; - int blk_size = conf->logical_block_size; -- AioContext *ctx; -- -- ctx = blk_get_aio_context(s->blk); -- aio_context_acquire(ctx); - - blk_get_geometry(s->blk, &capacity); - memset(&blkcfg, 0, sizeof(blkcfg)); -@@ -1277,7 +1269,6 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) - * per track (cylinder). - */ - length = blk_getlength(s->blk); -- aio_context_release(ctx); - if (length > 0 && length / conf->heads / conf->secs % blk_size) { - blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; - } else { -@@ -1344,9 +1335,7 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) - - memcpy(&blkcfg, config, s->config_size); - -- aio_context_acquire(blk_get_aio_context(s->blk)); - blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); -- aio_context_release(blk_get_aio_context(s->blk)); - } - - static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, -@@ -1414,11 +1403,9 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) - * s->blk would erroneously be placed in writethrough mode. - */ - if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { -- aio_context_acquire(blk_get_aio_context(s->blk)); - blk_set_enable_write_cache(s->blk, - virtio_vdev_has_feature(vdev, - VIRTIO_BLK_F_WCE)); -- aio_context_release(blk_get_aio_context(s->blk)); - } - } - -diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c -index 1473ab3d5e..73cced4626 100644 ---- a/hw/core/qdev-properties-system.c -+++ b/hw/core/qdev-properties-system.c -@@ -120,9 +120,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, - "node"); - } - -- aio_context_acquire(ctx); - blk_replace_bs(blk, bs, errp); -- aio_context_release(ctx); - return; - } - -@@ -148,10 +146,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, - 0, BLK_PERM_ALL); - blk_created = true; - -- aio_context_acquire(ctx); - ret = blk_insert_bs(blk, bs, errp); -- aio_context_release(ctx); -- - if (ret < 0) { - goto fail; - } -@@ -207,12 +202,8 @@ static void release_drive(Object *obj, const char *name, void *opaque) - BlockBackend **ptr = object_field_prop_ptr(obj, prop); - - if (*ptr) { -- AioContext *ctx = blk_get_aio_context(*ptr); -- -- aio_context_acquire(ctx); - blockdev_auto_del(*ptr); - blk_detach_dev(*ptr, dev); -- aio_context_release(ctx); - } - } - -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index 6b21fbc73f..0327f1c605 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -31,11 +31,10 @@ - /* - * Global state (GS) API. These functions run under the BQL. - * -- * If a function modifies the graph, it also uses drain and/or -- * aio_context_acquire/release to be sure it has unique access. -- * aio_context locking is needed together with BQL because of -- * the thread-safe I/O API that concurrently runs and accesses -- * the graph without the BQL. -+ * If a function modifies the graph, it also uses the graph lock to be sure it -+ * has unique access. The graph lock is needed together with BQL because of the -+ * thread-safe I/O API that concurrently runs and accesses the graph without -+ * the BQL. - * - * It is important to note that not all of these functions are - * necessarily limited to running under the BQL, but they would -diff --git a/include/block/block-io.h b/include/block/block-io.h -index f8729ccc55..8eb39a858b 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -31,8 +31,7 @@ - - /* - * I/O API functions. These functions are thread-safe, and therefore -- * can run in any thread as long as the thread has called -- * aio_context_acquire/release(). -+ * can run in any thread. - * - * These functions can only call functions from I/O and Common categories, - * but can be invoked by GS, "I/O or GS" and I/O APIs. -diff --git a/include/block/snapshot.h b/include/block/snapshot.h -index d49c5599d9..304cc6ea61 100644 ---- a/include/block/snapshot.h -+++ b/include/block/snapshot.h -@@ -86,8 +86,6 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, - - /* - * Group operations. All block drivers are involved. -- * These functions will properly handle dataplane (take aio_context_acquire -- * when appropriate for appropriate block drivers - */ - - bool bdrv_all_can_snapshot(bool has_devices, strList *devices, -diff --git a/job.c b/job.c -index 99a2e54b54..660ce22c56 100644 ---- a/job.c -+++ b/job.c -@@ -464,12 +464,8 @@ void job_unref_locked(Job *job) - assert(!job->txn); - - if (job->driver->free) { -- AioContext *aio_context = job->aio_context; - job_unlock(); -- /* FIXME: aiocontext lock is required because cb calls blk_unref */ -- aio_context_acquire(aio_context); - job->driver->free(job); -- aio_context_release(aio_context); - job_lock(); - } - -@@ -840,12 +836,10 @@ static void job_clean(Job *job) - - /* - * Called with job_mutex held, but releases it temporarily. -- * Takes AioContext lock internally to invoke a job->driver callback. - */ - static int job_finalize_single_locked(Job *job) - { - int job_ret; -- AioContext *ctx = job->aio_context; - - assert(job_is_completed_locked(job)); - -@@ -854,7 +848,6 @@ static int job_finalize_single_locked(Job *job) - - job_ret = job->ret; - job_unlock(); -- aio_context_acquire(ctx); - - if (!job_ret) { - job_commit(job); -@@ -867,7 +860,6 @@ static int job_finalize_single_locked(Job *job) - job->cb(job->opaque, job_ret); - } - -- aio_context_release(ctx); - job_lock(); - - /* Emit events only if we actually started */ -@@ -886,17 +878,13 @@ static int job_finalize_single_locked(Job *job) - - /* - * Called with job_mutex held, but releases it temporarily. -- * Takes AioContext lock internally to invoke a job->driver callback. - */ - static void job_cancel_async_locked(Job *job, bool force) - { -- AioContext *ctx = job->aio_context; - GLOBAL_STATE_CODE(); - if (job->driver->cancel) { - job_unlock(); -- aio_context_acquire(ctx); - force = job->driver->cancel(job, force); -- aio_context_release(ctx); - job_lock(); - } else { - /* No .cancel() means the job will behave as if force-cancelled */ -@@ -931,7 +919,6 @@ static void job_cancel_async_locked(Job *job, bool force) - - /* - * Called with job_mutex held, but releases it temporarily. -- * Takes AioContext lock internally to invoke a job->driver callback. - */ - static void job_completed_txn_abort_locked(Job *job) - { -@@ -979,15 +966,12 @@ static void job_completed_txn_abort_locked(Job *job) - static int job_prepare_locked(Job *job) - { - int ret; -- AioContext *ctx = job->aio_context; - - GLOBAL_STATE_CODE(); - - if (job->ret == 0 && job->driver->prepare) { - job_unlock(); -- aio_context_acquire(ctx); - ret = job->driver->prepare(job); -- aio_context_release(ctx); - job_lock(); - job->ret = ret; - job_update_rc_locked(job); -diff --git a/migration/block.c b/migration/block.c -index a15f9bddcb..6ec6a1d6e6 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -66,7 +66,7 @@ typedef struct BlkMigDevState { - /* Protected by block migration lock. */ - int64_t completed_sectors; - -- /* During migration this is protected by iothread lock / AioContext. -+ /* During migration this is protected by bdrv_dirty_bitmap_lock(). - * Allocation and free happen during setup and cleanup respectively. - */ - BdrvDirtyBitmap *dirty_bitmap; -@@ -101,7 +101,7 @@ typedef struct BlkMigState { - int prev_progress; - int bulk_completed; - -- /* Lock must be taken _inside_ the iothread lock and any AioContexts. */ -+ /* Lock must be taken _inside_ the iothread lock. */ - QemuMutex lock; - } BlkMigState; - -@@ -270,7 +270,6 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) - - if (bmds->shared_base) { - qemu_mutex_lock_iothread(); -- aio_context_acquire(blk_get_aio_context(bb)); - /* Skip unallocated sectors; intentionally treats failure or - * partial sector as an allocated sector */ - while (cur_sector < total_sectors && -@@ -281,7 +280,6 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) - } - cur_sector += count >> BDRV_SECTOR_BITS; - } -- aio_context_release(blk_get_aio_context(bb)); - qemu_mutex_unlock_iothread(); - } - -@@ -313,21 +311,16 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) - block_mig_state.submitted++; - blk_mig_unlock(); - -- /* We do not know if bs is under the main thread (and thus does -- * not acquire the AioContext when doing AIO) or rather under -- * dataplane. Thus acquire both the iothread mutex and the -- * AioContext. -- * -- * This is ugly and will disappear when we make bdrv_* thread-safe, -- * without the need to acquire the AioContext. -+ /* -+ * The migration thread does not have an AioContext. Lock the BQL so that -+ * I/O runs in the main loop AioContext (see -+ * qemu_get_current_aio_context()). - */ - qemu_mutex_lock_iothread(); -- aio_context_acquire(blk_get_aio_context(bmds->blk)); - bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector * BDRV_SECTOR_SIZE, - nr_sectors * BDRV_SECTOR_SIZE); - blk->aiocb = blk_aio_preadv(bb, cur_sector * BDRV_SECTOR_SIZE, &blk->qiov, - 0, blk_mig_read_cb, blk); -- aio_context_release(blk_get_aio_context(bmds->blk)); - qemu_mutex_unlock_iothread(); - - bmds->cur_sector = cur_sector + nr_sectors; -@@ -512,7 +505,7 @@ static void blk_mig_reset_dirty_cursor(void) - } - } - --/* Called with iothread lock and AioContext taken. */ -+/* Called with iothread lock taken. */ - - static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, - int is_async) -@@ -606,9 +599,7 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async) - int ret = 1; - - QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { -- aio_context_acquire(blk_get_aio_context(bmds->blk)); - ret = mig_save_device_dirty(f, bmds, is_async); -- aio_context_release(blk_get_aio_context(bmds->blk)); - if (ret <= 0) { - break; - } -@@ -666,9 +657,9 @@ static int64_t get_remaining_dirty(void) - int64_t dirty = 0; - - QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { -- aio_context_acquire(blk_get_aio_context(bmds->blk)); -+ bdrv_dirty_bitmap_lock(bmds->dirty_bitmap); - dirty += bdrv_get_dirty_count(bmds->dirty_bitmap); -- aio_context_release(blk_get_aio_context(bmds->blk)); -+ bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap); - } - - return dirty; -@@ -681,7 +672,6 @@ static void block_migration_cleanup_bmds(void) - { - BlkMigDevState *bmds; - BlockDriverState *bs; -- AioContext *ctx; - - unset_dirty_tracking(); - -@@ -693,13 +683,7 @@ static void block_migration_cleanup_bmds(void) - bdrv_op_unblock_all(bs, bmds->blocker); - } - error_free(bmds->blocker); -- -- /* Save ctx, because bmds->blk can disappear during blk_unref. */ -- ctx = blk_get_aio_context(bmds->blk); -- aio_context_acquire(ctx); - blk_unref(bmds->blk); -- aio_context_release(ctx); -- - g_free(bmds->blk_name); - g_free(bmds->aio_bitmap); - g_free(bmds); -diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c -index 86ae832176..99710c8ffb 100644 ---- a/migration/migration-hmp-cmds.c -+++ b/migration/migration-hmp-cmds.c -@@ -852,14 +852,11 @@ static void vm_completion(ReadLineState *rs, const char *str) - - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { - SnapshotInfoList *snapshots, *snapshot; -- AioContext *ctx = bdrv_get_aio_context(bs); - bool ok = false; - -- aio_context_acquire(ctx); - if (bdrv_can_snapshot(bs)) { - ok = bdrv_query_snapshot_info_list(bs, &snapshots, NULL) == 0; - } -- aio_context_release(ctx); - if (!ok) { - continue; - } -diff --git a/migration/savevm.c b/migration/savevm.c -index eec5503a42..1b9ab7b8ee 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -3049,7 +3049,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - int saved_vm_running; - uint64_t vm_state_size; - g_autoptr(GDateTime) now = g_date_time_new_now_local(); -- AioContext *aio_context; - - GLOBAL_STATE_CODE(); - -@@ -3092,7 +3091,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - if (bs == NULL) { - return false; - } -- aio_context = bdrv_get_aio_context(bs); - - saved_vm_running = runstate_is_running(); - -@@ -3101,8 +3099,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - - bdrv_drain_all_begin(); - -- aio_context_acquire(aio_context); -- - memset(sn, 0, sizeof(*sn)); - - /* fill auxiliary fields */ -@@ -3139,14 +3135,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - goto the_end; - } - -- /* The bdrv_all_create_snapshot() call that follows acquires the AioContext -- * for itself. BDRV_POLL_WHILE() does not support nested locking because -- * it only releases the lock once. Therefore synchronous I/O will deadlock -- * unless we release the AioContext before bdrv_all_create_snapshot(). -- */ -- aio_context_release(aio_context); -- aio_context = NULL; -- - ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, - has_devices, devices, errp); - if (ret < 0) { -@@ -3157,10 +3145,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - ret = 0; - - the_end: -- if (aio_context) { -- aio_context_release(aio_context); -- } -- - bdrv_drain_all_end(); - - if (saved_vm_running) { -@@ -3258,7 +3242,6 @@ bool load_snapshot(const char *name, const char *vmstate, - QEMUSnapshotInfo sn; - QEMUFile *f; - int ret; -- AioContext *aio_context; - MigrationIncomingState *mis = migration_incoming_get_current(); - - if (!bdrv_all_can_snapshot(has_devices, devices, errp)) { -@@ -3278,12 +3261,9 @@ bool load_snapshot(const char *name, const char *vmstate, - if (!bs_vm_state) { - return false; - } -- aio_context = bdrv_get_aio_context(bs_vm_state); - - /* Don't even try to load empty VM states */ -- aio_context_acquire(aio_context); - ret = bdrv_snapshot_find(bs_vm_state, &sn, name); -- aio_context_release(aio_context); - if (ret < 0) { - return false; - } else if (sn.vm_state_size == 0) { -@@ -3320,10 +3300,8 @@ bool load_snapshot(const char *name, const char *vmstate, - ret = -EINVAL; - goto err_drain; - } -- aio_context_acquire(aio_context); - ret = qemu_loadvm_state(f); - migration_incoming_state_destroy(); -- aio_context_release(aio_context); - - bdrv_drain_all_end(); - -diff --git a/net/colo-compare.c b/net/colo-compare.c -index 7f9e6f89ce..f2dfc0ebdc 100644 ---- a/net/colo-compare.c -+++ b/net/colo-compare.c -@@ -1439,12 +1439,10 @@ static void colo_compare_finalize(Object *obj) - qemu_bh_delete(s->event_bh); - - AioContext *ctx = iothread_get_aio_context(s->iothread); -- aio_context_acquire(ctx); - AIO_WAIT_WHILE(ctx, !s->out_sendco.done); - if (s->notify_dev) { - AIO_WAIT_WHILE(ctx, !s->notify_sendco.done); - } -- aio_context_release(ctx); - - /* Release all unhandled packets after compare thead exited */ - g_queue_foreach(&s->conn_list, colo_flush_packets, s); -diff --git a/qemu-img.c b/qemu-img.c -index 5a77f67719..7668f86769 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -960,7 +960,6 @@ static int img_commit(int argc, char **argv) - Error *local_err = NULL; - CommonBlockJobCBInfo cbi; - bool image_opts = false; -- AioContext *aio_context; - int64_t rate_limit = 0; - - fmt = NULL; -@@ -1078,12 +1077,9 @@ static int img_commit(int argc, char **argv) - .bs = bs, - }; - -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); - commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit, - BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, - &cbi, false, &local_err); -- aio_context_release(aio_context); - if (local_err) { - goto done; - } -diff --git a/qemu-io.c b/qemu-io.c -index 050c70835f..6cb1e00385 100644 ---- a/qemu-io.c -+++ b/qemu-io.c -@@ -414,15 +414,7 @@ static void prep_fetchline(void *opaque) - - static int do_qemuio_command(const char *cmd) - { -- int ret; -- AioContext *ctx = -- qemuio_blk ? blk_get_aio_context(qemuio_blk) : qemu_get_aio_context(); -- -- aio_context_acquire(ctx); -- ret = qemuio_command(qemuio_blk, cmd); -- aio_context_release(ctx); -- -- return ret; -+ return qemuio_command(qemuio_blk, cmd); - } - - static int command_loop(void) -diff --git a/qemu-nbd.c b/qemu-nbd.c -index 186e6468b1..bac0b5e3ec 100644 ---- a/qemu-nbd.c -+++ b/qemu-nbd.c -@@ -1123,9 +1123,7 @@ int main(int argc, char **argv) - qdict_put_str(raw_opts, "file", bs->node_name); - qdict_put_int(raw_opts, "offset", dev_offset); - -- aio_context_acquire(qemu_get_aio_context()); - bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal); -- aio_context_release(qemu_get_aio_context()); - - blk_remove_bs(blk); - blk_insert_bs(blk, bs, &error_fatal); -diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c -index 3e60549a4a..82c66fff26 100644 ---- a/replay/replay-debugging.c -+++ b/replay/replay-debugging.c -@@ -144,7 +144,6 @@ static char *replay_find_nearest_snapshot(int64_t icount, - char *ret = NULL; - int rv; - int nb_sns, i; -- AioContext *aio_context; - - *snapshot_icount = -1; - -@@ -152,11 +151,8 @@ static char *replay_find_nearest_snapshot(int64_t icount, - if (!bs) { - goto fail; - } -- aio_context = bdrv_get_aio_context(bs); - -- aio_context_acquire(aio_context); - nb_sns = bdrv_snapshot_list(bs, &sn_tab); -- aio_context_release(aio_context); - - for (i = 0; i < nb_sns; i++) { - rv = bdrv_all_has_snapshot(sn_tab[i].name, false, NULL, NULL); -diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py -index 38364fa557..c9c09fcacd 100644 ---- a/scripts/block-coroutine-wrapper.py -+++ b/scripts/block-coroutine-wrapper.py -@@ -278,12 +278,9 @@ def gen_no_co_wrapper(func: FuncDecl) -> str: - static void {name}_bh(void *opaque) - {{ - {struct_name} *s = opaque; -- AioContext *ctx = {func.gen_ctx('s->')}; - - {graph_lock} -- aio_context_acquire(ctx); - {func.get_result}{name}({ func.gen_list('s->{name}') }); -- aio_context_release(ctx); - {graph_unlock} - - aio_co_wake(s->co); -diff --git a/tests/tsan/suppressions.tsan b/tests/tsan/suppressions.tsan -index d9a002a2ef..b3ef59c27c 100644 ---- a/tests/tsan/suppressions.tsan -+++ b/tests/tsan/suppressions.tsan -@@ -4,7 +4,6 @@ - - # TSan reports a double lock on RECURSIVE mutexes. - # Since the recursive lock is intentional, we choose to ignore it. --mutex:aio_context_acquire - mutex:pthread_mutex_lock - - # TSan reports a race between pthread_mutex_init() and -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index d9754dfebc..17830a69c1 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -179,13 +179,7 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) - - static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs) - { -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_acquire(bdrv_get_aio_context(bs)); -- } - do_drain_begin(drain_type, bs); -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_release(bdrv_get_aio_context(bs)); -- } - } - - static BlockBackend * no_coroutine_fn test_setup(void) -@@ -209,13 +203,7 @@ static BlockBackend * no_coroutine_fn test_setup(void) - - static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs) - { -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_acquire(bdrv_get_aio_context(bs)); -- } - do_drain_end(drain_type, bs); -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_release(bdrv_get_aio_context(bs)); -- } - } - - /* -@@ -520,12 +508,8 @@ static void test_iothread_main_thread_bh(void *opaque) - { - struct test_iothread_data *data = opaque; - -- /* Test that the AioContext is not yet locked in a random BH that is -- * executed during drain, otherwise this would deadlock. */ -- aio_context_acquire(bdrv_get_aio_context(data->bs)); - bdrv_flush(data->bs); - bdrv_dec_in_flight(data->bs); /* incremented by test_iothread_common() */ -- aio_context_release(bdrv_get_aio_context(data->bs)); - } - - /* -@@ -567,7 +551,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - blk_set_disable_request_queuing(blk, true); - - blk_set_aio_context(blk, ctx_a, &error_abort); -- aio_context_acquire(ctx_a); - - s->bh_indirection_ctx = ctx_b; - -@@ -582,8 +565,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - g_assert(acb != NULL); - g_assert_cmpint(aio_ret, ==, -EINPROGRESS); - -- aio_context_release(ctx_a); -- - data = (struct test_iothread_data) { - .bs = bs, - .drain_type = drain_type, -@@ -592,10 +573,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - - switch (drain_thread) { - case 0: -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_acquire(ctx_a); -- } -- - /* - * Increment in_flight so that do_drain_begin() waits for - * test_iothread_main_thread_bh(). This prevents the race between -@@ -613,20 +590,10 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - do_drain_begin(drain_type, bs); - g_assert_cmpint(bs->in_flight, ==, 0); - -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_release(ctx_a); -- } - qemu_event_wait(&done_event); -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_acquire(ctx_a); -- } - - g_assert_cmpint(aio_ret, ==, 0); - do_drain_end(drain_type, bs); -- -- if (drain_type != BDRV_DRAIN_ALL) { -- aio_context_release(ctx_a); -- } - break; - case 1: - co = qemu_coroutine_create(test_iothread_drain_co_entry, &data); -@@ -637,9 +604,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - g_assert_not_reached(); - } - -- aio_context_acquire(ctx_a); - blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(ctx_a); - - bdrv_unref(bs); - blk_unref(blk); -@@ -757,7 +722,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - BlockJob *job; - TestBlockJob *tjob; - IOThread *iothread = NULL; -- AioContext *ctx; - int ret; - - src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, -@@ -787,11 +751,11 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - } - - if (use_iothread) { -+ AioContext *ctx; -+ - iothread = iothread_new(); - ctx = iothread_get_aio_context(iothread); - blk_set_aio_context(blk_src, ctx, &error_abort); -- } else { -- ctx = qemu_get_aio_context(); - } - - target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, -@@ -800,7 +764,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - blk_insert_bs(blk_target, target, &error_abort); - blk_set_allow_aio_context_change(blk_target, true); - -- aio_context_acquire(ctx); - tjob = block_job_create("job0", &test_job_driver, NULL, src, - 0, BLK_PERM_ALL, - 0, 0, NULL, NULL, &error_abort); -@@ -821,7 +784,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - tjob->prepare_ret = -EIO; - break; - } -- aio_context_release(ctx); - - job_start(&job->job); - -@@ -912,12 +874,10 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - } - g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); - -- aio_context_acquire(ctx); - if (use_iothread) { - blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort); - assert(blk_get_aio_context(blk_target) == qemu_get_aio_context()); - } -- aio_context_release(ctx); - - blk_unref(blk_src); - blk_unref(blk_target); -@@ -1401,9 +1361,7 @@ static void test_append_to_drained(void) - g_assert_cmpint(base_s->drain_count, ==, 1); - g_assert_cmpint(base->in_flight, ==, 0); - -- aio_context_acquire(qemu_get_aio_context()); - bdrv_append(overlay, base, &error_abort); -- aio_context_release(qemu_get_aio_context()); - - g_assert_cmpint(base->in_flight, ==, 0); - g_assert_cmpint(overlay->in_flight, ==, 0); -@@ -1438,16 +1396,11 @@ static void test_set_aio_context(void) - - bdrv_drained_begin(bs); - bdrv_try_change_aio_context(bs, ctx_a, NULL, &error_abort); -- -- aio_context_acquire(ctx_a); - bdrv_drained_end(bs); - - bdrv_drained_begin(bs); - bdrv_try_change_aio_context(bs, ctx_b, NULL, &error_abort); -- aio_context_release(ctx_a); -- aio_context_acquire(ctx_b); - bdrv_try_change_aio_context(bs, qemu_get_aio_context(), NULL, &error_abort); -- aio_context_release(ctx_b); - bdrv_drained_end(bs); - - bdrv_unref(bs); -diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c -index 8ee6ef38d8..cafc023db4 100644 ---- a/tests/unit/test-bdrv-graph-mod.c -+++ b/tests/unit/test-bdrv-graph-mod.c -@@ -142,10 +142,8 @@ static void test_update_perm_tree(void) - BDRV_CHILD_DATA, &error_abort); - bdrv_graph_wrunlock(); - -- aio_context_acquire(qemu_get_aio_context()); - ret = bdrv_append(filter, bs, NULL); - g_assert_cmpint(ret, <, 0); -- aio_context_release(qemu_get_aio_context()); - - bdrv_unref(filter); - blk_unref(root); -@@ -211,9 +209,7 @@ static void test_should_update_child(void) - bdrv_attach_child(filter, target, "target", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); - bdrv_graph_wrunlock(); -- aio_context_acquire(qemu_get_aio_context()); - bdrv_append(filter, bs, &error_abort); -- aio_context_release(qemu_get_aio_context()); - - bdrv_graph_rdlock_main_loop(); - g_assert(target->backing->bs == bs); -@@ -440,9 +436,7 @@ static void test_append_greedy_filter(void) - &error_abort); - bdrv_graph_wrunlock(); - -- aio_context_acquire(qemu_get_aio_context()); - bdrv_append(fl, base, &error_abort); -- aio_context_release(qemu_get_aio_context()); - bdrv_unref(fl); - bdrv_unref(top); - } -diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c -index 9b15d2768c..3766d5de6b 100644 ---- a/tests/unit/test-block-iothread.c -+++ b/tests/unit/test-block-iothread.c -@@ -483,7 +483,6 @@ static void test_sync_op(const void *opaque) - bdrv_graph_rdunlock_main_loop(); - - blk_set_aio_context(blk, ctx, &error_abort); -- aio_context_acquire(ctx); - if (t->fn) { - t->fn(c); - } -@@ -491,7 +490,6 @@ static void test_sync_op(const void *opaque) - t->blkfn(blk); - } - blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(ctx); - - bdrv_unref(bs); - blk_unref(blk); -@@ -576,9 +574,7 @@ static void test_attach_blockjob(void) - aio_poll(qemu_get_aio_context(), false); - } - -- aio_context_acquire(ctx); - blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(ctx); - - tjob->n = 0; - while (tjob->n == 0) { -@@ -595,9 +591,7 @@ static void test_attach_blockjob(void) - WITH_JOB_LOCK_GUARD() { - job_complete_sync_locked(&tjob->common.job, &error_abort); - } -- aio_context_acquire(ctx); - blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(ctx); - - bdrv_unref(bs); - blk_unref(blk); -@@ -654,9 +648,7 @@ static void test_propagate_basic(void) - - /* Switch the AioContext back */ - main_ctx = qemu_get_aio_context(); -- aio_context_acquire(ctx); - blk_set_aio_context(blk, main_ctx, &error_abort); -- aio_context_release(ctx); - g_assert(blk_get_aio_context(blk) == main_ctx); - g_assert(bdrv_get_aio_context(bs_a) == main_ctx); - g_assert(bdrv_get_aio_context(bs_verify) == main_ctx); -@@ -732,9 +724,7 @@ static void test_propagate_diamond(void) - - /* Switch the AioContext back */ - main_ctx = qemu_get_aio_context(); -- aio_context_acquire(ctx); - blk_set_aio_context(blk, main_ctx, &error_abort); -- aio_context_release(ctx); - g_assert(blk_get_aio_context(blk) == main_ctx); - g_assert(bdrv_get_aio_context(bs_verify) == main_ctx); - g_assert(bdrv_get_aio_context(bs_a) == main_ctx); -@@ -764,13 +754,11 @@ static void test_propagate_mirror(void) - &error_abort); - - /* Start a mirror job */ -- aio_context_acquire(main_ctx); - mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, - MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false, - BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, - false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, - &error_abort); -- aio_context_release(main_ctx); - - WITH_JOB_LOCK_GUARD() { - job = job_get_locked("job0"); -@@ -785,9 +773,7 @@ static void test_propagate_mirror(void) - g_assert(job->aio_context == ctx); - - /* Change the AioContext of target */ -- aio_context_acquire(ctx); - bdrv_try_change_aio_context(target, main_ctx, NULL, &error_abort); -- aio_context_release(ctx); - g_assert(bdrv_get_aio_context(src) == main_ctx); - g_assert(bdrv_get_aio_context(target) == main_ctx); - g_assert(bdrv_get_aio_context(filter) == main_ctx); -@@ -805,10 +791,8 @@ static void test_propagate_mirror(void) - g_assert(bdrv_get_aio_context(filter) == main_ctx); - - /* ...unless we explicitly allow it */ -- aio_context_acquire(ctx); - blk_set_allow_aio_context_change(blk, true); - bdrv_try_change_aio_context(target, ctx, NULL, &error_abort); -- aio_context_release(ctx); - - g_assert(blk_get_aio_context(blk) == ctx); - g_assert(bdrv_get_aio_context(src) == ctx); -@@ -817,10 +801,8 @@ static void test_propagate_mirror(void) - - job_cancel_sync_all(); - -- aio_context_acquire(ctx); - blk_set_aio_context(blk, main_ctx, &error_abort); - bdrv_try_change_aio_context(target, main_ctx, NULL, &error_abort); -- aio_context_release(ctx); - - blk_unref(blk); - bdrv_unref(src); -@@ -836,7 +818,6 @@ static void test_attach_second_node(void) - BlockDriverState *bs, *filter; - QDict *options; - -- aio_context_acquire(main_ctx); - blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); - bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); - blk_insert_bs(blk, bs, &error_abort); -@@ -846,15 +827,12 @@ static void test_attach_second_node(void) - qdict_put_str(options, "file", "base"); - - filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); -- aio_context_release(main_ctx); - - g_assert(blk_get_aio_context(blk) == ctx); - g_assert(bdrv_get_aio_context(bs) == ctx); - g_assert(bdrv_get_aio_context(filter) == ctx); - -- aio_context_acquire(ctx); - blk_set_aio_context(blk, main_ctx, &error_abort); -- aio_context_release(ctx); - g_assert(blk_get_aio_context(blk) == main_ctx); - g_assert(bdrv_get_aio_context(bs) == main_ctx); - g_assert(bdrv_get_aio_context(filter) == main_ctx); -@@ -868,11 +846,9 @@ static void test_attach_preserve_blk_ctx(void) - { - IOThread *iothread = iothread_new(); - AioContext *ctx = iothread_get_aio_context(iothread); -- AioContext *main_ctx = qemu_get_aio_context(); - BlockBackend *blk; - BlockDriverState *bs; - -- aio_context_acquire(main_ctx); - blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); - bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); - bs->total_sectors = 65536 / BDRV_SECTOR_SIZE; -@@ -881,25 +857,18 @@ static void test_attach_preserve_blk_ctx(void) - blk_insert_bs(blk, bs, &error_abort); - g_assert(blk_get_aio_context(blk) == ctx); - g_assert(bdrv_get_aio_context(bs) == ctx); -- aio_context_release(main_ctx); - - /* Remove the node again */ -- aio_context_acquire(ctx); - blk_remove_bs(blk); -- aio_context_release(ctx); - g_assert(blk_get_aio_context(blk) == ctx); - g_assert(bdrv_get_aio_context(bs) == qemu_get_aio_context()); - - /* Re-attach the node */ -- aio_context_acquire(main_ctx); - blk_insert_bs(blk, bs, &error_abort); -- aio_context_release(main_ctx); - g_assert(blk_get_aio_context(blk) == ctx); - g_assert(bdrv_get_aio_context(bs) == ctx); - -- aio_context_acquire(ctx); - blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); -- aio_context_release(ctx); - bdrv_unref(bs); - blk_unref(blk); - } -diff --git a/tests/unit/test-blockjob.c b/tests/unit/test-blockjob.c -index a130f6fefb..fe3e0d2d38 100644 ---- a/tests/unit/test-blockjob.c -+++ b/tests/unit/test-blockjob.c -@@ -228,7 +228,6 @@ static void cancel_common(CancelJob *s) - BlockJob *job = &s->common; - BlockBackend *blk = s->blk; - JobStatus sts = job->job.status; -- AioContext *ctx = job->job.aio_context; - - job_cancel_sync(&job->job, true); - WITH_JOB_LOCK_GUARD() { -@@ -240,9 +239,7 @@ static void cancel_common(CancelJob *s) - job_unref_locked(&job->job); - } - -- aio_context_acquire(ctx); - destroy_blk(blk); -- aio_context_release(ctx); - - } - -@@ -391,132 +388,6 @@ static void test_cancel_concluded(void) - cancel_common(s); - } - --/* (See test_yielding_driver for the job description) */ --typedef struct YieldingJob { -- BlockJob common; -- bool should_complete; --} YieldingJob; -- --static void yielding_job_complete(Job *job, Error **errp) --{ -- YieldingJob *s = container_of(job, YieldingJob, common.job); -- s->should_complete = true; -- job_enter(job); --} -- --static int coroutine_fn yielding_job_run(Job *job, Error **errp) --{ -- YieldingJob *s = container_of(job, YieldingJob, common.job); -- -- job_transition_to_ready(job); -- -- while (!s->should_complete) { -- job_yield(job); -- } -- -- return 0; --} -- --/* -- * This job transitions immediately to the READY state, and then -- * yields until it is to complete. -- */ --static const BlockJobDriver test_yielding_driver = { -- .job_driver = { -- .instance_size = sizeof(YieldingJob), -- .free = block_job_free, -- .user_resume = block_job_user_resume, -- .run = yielding_job_run, -- .complete = yielding_job_complete, -- }, --}; -- --/* -- * Test that job_complete_locked() works even on jobs that are in a paused -- * state (i.e., STANDBY). -- * -- * To do this, run YieldingJob in an IO thread, get it into the READY -- * state, then have a drained section. Before ending the section, -- * acquire the context so the job will not be entered and will thus -- * remain on STANDBY. -- * -- * job_complete_locked() should still work without error. -- * -- * Note that on the QMP interface, it is impossible to lock an IO -- * thread before a drained section ends. In practice, the -- * bdrv_drain_all_end() and the aio_context_acquire() will be -- * reversed. However, that makes for worse reproducibility here: -- * Sometimes, the job would no longer be in STANDBY then but already -- * be started. We cannot prevent that, because the IO thread runs -- * concurrently. We can only prevent it by taking the lock before -- * ending the drained section, so we do that. -- * -- * (You can reverse the order of operations and most of the time the -- * test will pass, but sometimes the assert(status == STANDBY) will -- * fail.) -- */ --static void test_complete_in_standby(void) --{ -- BlockBackend *blk; -- IOThread *iothread; -- AioContext *ctx; -- Job *job; -- BlockJob *bjob; -- -- /* Create a test drive, move it to an IO thread */ -- blk = create_blk(NULL); -- iothread = iothread_new(); -- -- ctx = iothread_get_aio_context(iothread); -- blk_set_aio_context(blk, ctx, &error_abort); -- -- /* Create our test job */ -- bjob = mk_job(blk, "job", &test_yielding_driver, true, -- JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); -- job = &bjob->job; -- assert_job_status_is(job, JOB_STATUS_CREATED); -- -- /* Wait for the job to become READY */ -- job_start(job); -- /* -- * Here we are waiting for the status to change, so don't bother -- * protecting the read every time. -- */ -- AIO_WAIT_WHILE_UNLOCKED(ctx, job->status != JOB_STATUS_READY); -- -- /* Begin the drained section, pausing the job */ -- bdrv_drain_all_begin(); -- assert_job_status_is(job, JOB_STATUS_STANDBY); -- -- /* Lock the IO thread to prevent the job from being run */ -- aio_context_acquire(ctx); -- /* This will schedule the job to resume it */ -- bdrv_drain_all_end(); -- aio_context_release(ctx); -- -- WITH_JOB_LOCK_GUARD() { -- /* But the job cannot run, so it will remain on standby */ -- assert(job->status == JOB_STATUS_STANDBY); -- -- /* Even though the job is on standby, this should work */ -- job_complete_locked(job, &error_abort); -- -- /* The test is done now, clean up. */ -- job_finish_sync_locked(job, NULL, &error_abort); -- assert(job->status == JOB_STATUS_PENDING); -- -- job_finalize_locked(job, &error_abort); -- assert(job->status == JOB_STATUS_CONCLUDED); -- -- job_dismiss_locked(&job, &error_abort); -- } -- -- aio_context_acquire(ctx); -- destroy_blk(blk); -- aio_context_release(ctx); -- iothread_join(iothread); --} -- - int main(int argc, char **argv) - { - qemu_init_main_loop(&error_abort); -@@ -531,13 +402,5 @@ int main(int argc, char **argv) - g_test_add_func("/blockjob/cancel/standby", test_cancel_standby); - g_test_add_func("/blockjob/cancel/pending", test_cancel_pending); - g_test_add_func("/blockjob/cancel/concluded", test_cancel_concluded); -- -- /* -- * This test is flaky and sometimes fails in CI and otherwise: -- * don't run unless user opts in via environment variable. -- */ -- if (getenv("QEMU_TEST_FLAKY_TESTS")) { -- g_test_add_func("/blockjob/complete_in_standby", test_complete_in_standby); -- } - return g_test_run(); - } -diff --git a/tests/unit/test-replication.c b/tests/unit/test-replication.c -index afff908d77..5d2003b8ce 100644 ---- a/tests/unit/test-replication.c -+++ b/tests/unit/test-replication.c -@@ -199,17 +199,13 @@ static BlockBackend *start_primary(void) - static void teardown_primary(void) - { - BlockBackend *blk; -- AioContext *ctx; - - /* remove P_ID */ - blk = blk_by_name(P_ID); - assert(blk); - -- ctx = blk_get_aio_context(blk); -- aio_context_acquire(ctx); - monitor_remove_blk(blk); - blk_unref(blk); -- aio_context_release(ctx); - } - - static void test_primary_read(void) -@@ -345,27 +341,20 @@ static void teardown_secondary(void) - { - /* only need to destroy two BBs */ - BlockBackend *blk; -- AioContext *ctx; - - /* remove S_LOCAL_DISK_ID */ - blk = blk_by_name(S_LOCAL_DISK_ID); - assert(blk); - -- ctx = blk_get_aio_context(blk); -- aio_context_acquire(ctx); - monitor_remove_blk(blk); - blk_unref(blk); -- aio_context_release(ctx); - - /* remove S_ID */ - blk = blk_by_name(S_ID); - assert(blk); - -- ctx = blk_get_aio_context(blk); -- aio_context_acquire(ctx); - monitor_remove_blk(blk); - blk_unref(blk); -- aio_context_release(ctx); - } - - static void test_secondary_read(void) -diff --git a/util/async.c b/util/async.c -index 04ee83d220..dfd44ef612 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -562,12 +562,10 @@ static void co_schedule_bh_cb(void *opaque) - Coroutine *co = QSLIST_FIRST(&straight); - QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); - trace_aio_co_schedule_bh_cb(ctx, co); -- aio_context_acquire(ctx); - - /* Protected by write barrier in qemu_aio_coroutine_enter */ - qatomic_set(&co->scheduled, NULL); - qemu_aio_coroutine_enter(ctx, co); -- aio_context_release(ctx); - } - } - -@@ -707,9 +705,7 @@ void aio_co_enter(AioContext *ctx, Coroutine *co) - assert(self != co); - QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next); - } else { -- aio_context_acquire(ctx); - qemu_aio_coroutine_enter(ctx, co); -- aio_context_release(ctx); - } - } - -diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c -index a9a48fffb8..3bfb1ad3ec 100644 ---- a/util/vhost-user-server.c -+++ b/util/vhost-user-server.c -@@ -360,10 +360,7 @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc, - - qio_channel_set_follow_coroutine_ctx(server->ioc, true); - -- /* Attaching the AioContext starts the vu_client_trip coroutine */ -- aio_context_acquire(server->ctx); - vhost_user_server_attach_aio_context(server, server->ctx); -- aio_context_release(server->ctx); - } - - /* server->ctx acquired by caller */ --- -2.39.3 - diff --git a/SOURCES/kvm-block-remove-bdrv_co_lock.patch b/SOURCES/kvm-block-remove-bdrv_co_lock.patch deleted file mode 100644 index b219c1c..0000000 --- a/SOURCES/kvm-block-remove-bdrv_co_lock.patch +++ /dev/null @@ -1,97 +0,0 @@ -From d0514c7d5d6cc1aa140119c95d5ea2c1591b01e9 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:04 -0500 -Subject: [PATCH 087/101] block: remove bdrv_co_lock() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [18/26] a303f861ea5e84d8e89fd51e530fd0cb2da17b89 (kmwolf/centos-qemu-kvm) - -The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops. -Remove them. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231205182011.1976568-8-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - block.c | 10 ---------- - blockdev.c | 5 ----- - include/block/block-global-state.h | 14 -------------- - 3 files changed, 29 deletions(-) - -diff --git a/block.c b/block.c -index 91ace5d2d5..434b7f4d72 100644 ---- a/block.c -+++ b/block.c -@@ -7431,16 +7431,6 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) - bdrv_dec_in_flight(bs); - } - --void coroutine_fn bdrv_co_lock(BlockDriverState *bs) --{ -- /* TODO removed in next patch */ --} -- --void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) --{ -- /* TODO removed in next patch */ --} -- - static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) - { - GLOBAL_STATE_CODE(); -diff --git a/blockdev.c b/blockdev.c -index 5d8b3a23eb..3a5e7222ec 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2264,18 +2264,13 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, - return; - } - -- bdrv_co_lock(bs); - bdrv_drained_begin(bs); -- bdrv_co_unlock(bs); - - old_ctx = bdrv_co_enter(bs); - blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); - bdrv_co_leave(bs, old_ctx); - -- bdrv_co_lock(bs); - bdrv_drained_end(bs); -- bdrv_co_unlock(bs); -- - blk_co_unref(blk); - } - -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index 0327f1c605..4ec0b217f0 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -267,20 +267,6 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); - int bdrv_debug_resume(BlockDriverState *bs, const char *tag); - bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); - --/** -- * Locks the AioContext of @bs if it's not the current AioContext. This avoids -- * double locking which could lead to deadlocks: This is a coroutine_fn, so we -- * know we already own the lock of the current AioContext. -- * -- * May only be called in the main thread. -- */ --void coroutine_fn bdrv_co_lock(BlockDriverState *bs); -- --/** -- * Unlocks the AioContext of @bs if it's not the current AioContext. -- */ --void coroutine_fn bdrv_co_unlock(BlockDriverState *bs); -- - bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); --- -2.39.3 - diff --git a/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch b/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch deleted file mode 100644 index d6670c1..0000000 --- a/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch +++ /dev/null @@ -1,411 +0,0 @@ -From dc4eb64185957a01948217814478abc450ce5f26 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:11 -0500 -Subject: [PATCH 094/101] block: remove outdated AioContext locking comments - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [25/26] 395e18fb40d28d4bc961acee1a00da7f60748076 (kmwolf/centos-qemu-kvm) - -The AioContext lock no longer exists. - -There is one noteworthy change: - - - * More specifically, these functions use BDRV_POLL_WHILE(bs), which - - * requires the caller to be either in the main thread and hold - - * the BlockdriverState (bs) AioContext lock, or directly in the - - * home thread that runs the bs AioContext. Calling them from - - * another thread in another AioContext would cause deadlocks. - + * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires - + * the caller to be either in the main thread or directly in the home thread - + * that runs the bs AioContext. Calling them from another thread in another - + * AioContext would cause deadlocks. - -I am not sure whether deadlocks are still possible. Maybe they have just -moved to the fine-grained locks that have replaced the AioContext. Since -I am not sure if the deadlocks are gone, I have kept the substance -unchanged and just removed mention of the AioContext. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-15-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - block.c | 73 ++++++---------------------- - block/block-backend.c | 8 --- - block/export/vhost-user-blk-server.c | 4 -- - include/block/block-common.h | 3 -- - include/block/block-io.h | 9 ++-- - include/block/block_int-common.h | 2 - - tests/qemu-iotests/202 | 2 +- - tests/qemu-iotests/203 | 3 +- - 8 files changed, 22 insertions(+), 82 deletions(-) - -diff --git a/block.c b/block.c -index 434b7f4d72..a097772238 100644 ---- a/block.c -+++ b/block.c -@@ -1616,11 +1616,6 @@ out: - g_free(gen_node_name); - } - --/* -- * The caller must always hold @bs AioContext lock, because this function calls -- * bdrv_refresh_total_sectors() which polls when called from non-coroutine -- * context. -- */ - static int no_coroutine_fn GRAPH_UNLOCKED - bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, - QDict *options, int open_flags, Error **errp) -@@ -2901,7 +2896,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) - * Replaces the node that a BdrvChild points to without updating permissions. - * - * If @new_bs is non-NULL, the parent of @child must already be drained through -- * @child and the caller must hold the AioContext lock for @new_bs. -+ * @child. - */ - static void GRAPH_WRLOCK - bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) -@@ -3041,9 +3036,8 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { - * - * Returns new created child. - * -- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and -- * @child_bs can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * Both @parent_bs and @child_bs can move to a different AioContext in this -+ * function. - */ - static BdrvChild * GRAPH_WRLOCK - bdrv_attach_child_common(BlockDriverState *child_bs, -@@ -3142,9 +3136,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - /* - * Function doesn't update permissions, caller is responsible for this. - * -- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and -- * @child_bs can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * Both @parent_bs and @child_bs can move to a different AioContext in this -+ * function. - * - * After calling this function, the transaction @tran may only be completed - * while holding a writer lock for the graph. -@@ -3184,9 +3177,6 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs, - * - * On failure NULL is returned, errp is set and the reference to - * child_bs is also dropped. -- * -- * The caller must hold the AioContext lock @child_bs, but not that of @ctx -- * (unless @child_bs is already in @ctx). - */ - BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - const char *child_name, -@@ -3226,9 +3216,6 @@ out: - * - * On failure NULL is returned, errp is set and the reference to - * child_bs is also dropped. -- * -- * If @parent_bs and @child_bs are in different AioContexts, the caller must -- * hold the AioContext lock for @child_bs, but not for @parent_bs. - */ - BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, -@@ -3418,9 +3405,8 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) - * - * Function doesn't update permissions, caller is responsible for this. - * -- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and -- * @child_bs can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * Both @parent_bs and @child_bs can move to a different AioContext in this -+ * function. - * - * After calling this function, the transaction @tran may only be completed - * while holding a writer lock for the graph. -@@ -3513,9 +3499,8 @@ out: - } - - /* -- * The caller must hold the AioContext lock for @backing_hd. Both @bs and -- * @backing_hd can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * Both @bs and @backing_hd can move to a different AioContext in this -+ * function. - * - * If a backing child is already present (i.e. we're detaching a node), that - * child node must be drained. -@@ -3574,8 +3559,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - * itself, all options starting with "${bdref_key}." are considered part of the - * BlockdevRef. - * -- * The caller must hold the main AioContext lock. -- * - * TODO Can this be unified with bdrv_open_image()? - */ - int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, -@@ -3745,9 +3728,7 @@ done: - * - * The BlockdevRef will be removed from the options QDict. - * -- * The caller must hold the lock of the main AioContext and no other AioContext. -- * @parent can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * @parent can move to a different AioContext in this function. - */ - BdrvChild *bdrv_open_child(const char *filename, - QDict *options, const char *bdref_key, -@@ -3778,9 +3759,7 @@ BdrvChild *bdrv_open_child(const char *filename, - /* - * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. - * -- * The caller must hold the lock of the main AioContext and no other AioContext. -- * @parent can move to a different AioContext in this function. Callers must -- * make sure that their AioContext locking is still correct after this. -+ * @parent can move to a different AioContext in this function. - */ - int bdrv_open_file_child(const char *filename, - QDict *options, const char *bdref_key, -@@ -3923,8 +3902,6 @@ out: - * The reference parameter may be used to specify an existing block device which - * should be opened. If specified, neither options nor a filename may be given, - * nor can an existing BDS be reused (that is, *pbs has to be NULL). -- * -- * The caller must always hold the main AioContext lock. - */ - static BlockDriverState * no_coroutine_fn - bdrv_open_inherit(const char *filename, const char *reference, QDict *options, -@@ -4217,7 +4194,6 @@ close_and_fail: - return NULL; - } - --/* The caller must always hold the main AioContext lock. */ - BlockDriverState *bdrv_open(const char *filename, const char *reference, - QDict *options, int flags, Error **errp) - { -@@ -4665,10 +4641,7 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, - * - * Return 0 on success, otherwise return < 0 and set @errp. - * -- * The caller must hold the AioContext lock of @reopen_state->bs. - * @reopen_state->bs can move to a different AioContext in this function. -- * Callers must make sure that their AioContext locking is still correct after -- * this. - */ - static int GRAPH_UNLOCKED - bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, -@@ -4801,8 +4774,6 @@ out_rdlock: - * It is the responsibility of the caller to then call the abort() or - * commit() for any other BDS that have been left in a prepare() state - * -- * The caller must hold the AioContext lock of @reopen_state->bs. -- * - * After calling this function, the transaction @change_child_tran may only be - * completed while holding a writer lock for the graph. - */ -@@ -5437,8 +5408,6 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) - * child. - * - * This function does not create any image files. -- * -- * The caller must hold the AioContext lock for @bs_top. - */ - int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - Error **errp) -@@ -5545,9 +5514,8 @@ static void bdrv_delete(BlockDriverState *bs) - * after the call (even on failure), so if the caller intends to reuse the - * dictionary, it needs to use qobject_ref() before calling bdrv_open. - * -- * The caller holds the AioContext lock for @bs. It must make sure that @bs -- * stays in the same AioContext, i.e. @options must not refer to nodes in a -- * different AioContext. -+ * The caller must make sure that @bs stays in the same AioContext, i.e. -+ * @options must not refer to nodes in a different AioContext. - */ - BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, - int flags, Error **errp) -@@ -7565,10 +7533,6 @@ static TransactionActionDrv set_aio_context = { - * - * Must be called from the main AioContext. - * -- * The caller must own the AioContext lock for the old AioContext of bs, but it -- * must not own the AioContext lock for new_context (unless new_context is the -- * same as the current context of bs). -- * - * @visited will accumulate all visited BdrvChild objects. The caller is - * responsible for freeing the list afterwards. - */ -@@ -7621,13 +7585,6 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, - * - * If ignore_child is not NULL, that child (and its subgraph) will not - * be touched. -- * -- * This function still requires the caller to take the bs current -- * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE -- * assumes the lock is always held if bs is in another AioContext. -- * For the same reason, it temporarily also holds the new AioContext, since -- * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too. -- * Therefore the new AioContext lock must not be taken by the caller. - */ - int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp) -@@ -7653,8 +7610,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - - /* - * Linear phase: go through all callbacks collected in the transaction. -- * Run all callbacks collected in the recursion to switch all nodes -- * AioContext lock (transaction commit), or undo all changes done in the -+ * Run all callbacks collected in the recursion to switch every node's -+ * AioContext (transaction commit), or undo all changes done in the - * recursion (transaction abort). - */ - -diff --git a/block/block-backend.c b/block/block-backend.c -index f412bed274..209eb07528 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -390,8 +390,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) - * Both sets of permissions can be changed later using blk_set_perm(). - * - * Return the new BlockBackend on success, null on failure. -- * -- * Callers must hold the AioContext lock of @bs. - */ - BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, - uint64_t shared_perm, Error **errp) -@@ -416,8 +414,6 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, - * Just as with bdrv_open(), after having called this function the reference to - * @options belongs to the block layer (even on failure). - * -- * Called without holding an AioContext lock. -- * - * TODO: Remove @filename and @flags; it should be possible to specify a whole - * BDS tree just by specifying the @options QDict (or @reference, - * alternatively). At the time of adding this function, this is not possible, -@@ -872,8 +868,6 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) - - /* - * Disassociates the currently associated BlockDriverState from @blk. -- * -- * The caller must hold the AioContext lock for the BlockBackend. - */ - void blk_remove_bs(BlockBackend *blk) - { -@@ -915,8 +909,6 @@ void blk_remove_bs(BlockBackend *blk) - - /* - * Associates a new BlockDriverState with @blk. -- * -- * Callers must hold the AioContext lock of @bs. - */ - int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) - { -diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c -index 16f48388d3..50c358e8cd 100644 ---- a/block/export/vhost-user-blk-server.c -+++ b/block/export/vhost-user-blk-server.c -@@ -278,7 +278,6 @@ static void vu_blk_exp_resize(void *opaque) - vu_config_change_msg(&vexp->vu_server.vu_dev); - } - --/* Called with vexp->export.ctx acquired */ - static void vu_blk_drained_begin(void *opaque) - { - VuBlkExport *vexp = opaque; -@@ -287,7 +286,6 @@ static void vu_blk_drained_begin(void *opaque) - vhost_user_server_detach_aio_context(&vexp->vu_server); - } - --/* Called with vexp->export.blk AioContext acquired */ - static void vu_blk_drained_end(void *opaque) - { - VuBlkExport *vexp = opaque; -@@ -300,8 +298,6 @@ static void vu_blk_drained_end(void *opaque) - * Ensures that bdrv_drained_begin() waits until in-flight requests complete - * and the server->co_trip coroutine has terminated. It will be restarted in - * vhost_user_server_attach_aio_context(). -- * -- * Called with vexp->export.ctx acquired. - */ - static bool vu_blk_drained_poll(void *opaque) - { -diff --git a/include/block/block-common.h b/include/block/block-common.h -index d7599564db..a846023a09 100644 ---- a/include/block/block-common.h -+++ b/include/block/block-common.h -@@ -70,9 +70,6 @@ - * automatically takes the graph rdlock when calling the wrapped function. In - * the same way, no_co_wrapper_bdrv_wrlock functions automatically take the - * graph wrlock. -- * -- * If the first parameter of the function is a BlockDriverState, BdrvChild or -- * BlockBackend pointer, the AioContext lock for it is taken in the wrapper. - */ - #define no_co_wrapper - #define no_co_wrapper_bdrv_rdlock -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 8eb39a858b..b49e0537dd 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -332,11 +332,10 @@ bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, - * "I/O or GS" API functions. These functions can run without - * the BQL, but only in one specific iothread/main loop. - * -- * More specifically, these functions use BDRV_POLL_WHILE(bs), which -- * requires the caller to be either in the main thread and hold -- * the BlockdriverState (bs) AioContext lock, or directly in the -- * home thread that runs the bs AioContext. Calling them from -- * another thread in another AioContext would cause deadlocks. -+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires -+ * the caller to be either in the main thread or directly in the home thread -+ * that runs the bs AioContext. Calling them from another thread in another -+ * AioContext would cause deadlocks. - * - * Therefore, these functions are not proper I/O, because they - * can't run in *any* iothreads, but only in a specific one. -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 4e31d161c5..151279d481 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -1192,8 +1192,6 @@ struct BlockDriverState { - /* The error object in use for blocking operations on backing_hd */ - Error *backing_blocker; - -- /* Protected by AioContext lock */ -- - /* - * If we are reading a disk image, give its size in sectors. - * Generally read-only; it is written to by load_snapshot and -diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202 -index b784dcd791..13304242e5 100755 ---- a/tests/qemu-iotests/202 -+++ b/tests/qemu-iotests/202 -@@ -21,7 +21,7 @@ - # Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a - # single IOThread completes successfully. This particular command triggered a - # hang due to recursive AioContext locking and BDRV_POLL_WHILE(). Protect --# against regressions. -+# against regressions even though the AioContext lock no longer exists. - - import iotests - -diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203 -index ab80fd0e44..1ba878522b 100755 ---- a/tests/qemu-iotests/203 -+++ b/tests/qemu-iotests/203 -@@ -21,7 +21,8 @@ - # Check that QMP 'migrate' with multiple drives on a single IOThread completes - # successfully. This particular command triggered a hang in the source QEMU - # process due to recursive AioContext locking in bdrv_invalidate_all() and --# BDRV_POLL_WHILE(). -+# BDRV_POLL_WHILE(). Protect against regressions even though the AioContext -+# lock no longer exists. - - import iotests - --- -2.39.3 - diff --git a/SOURCES/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch b/SOURCES/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch new file mode 100644 index 0000000..785b437 --- /dev/null +++ b/SOURCES/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch @@ -0,0 +1,90 @@ +From 0f0a3a860a07addea21a0282556a5022b9cb8b2c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:35 -0500 +Subject: [PATCH 011/100] confidential guest support: Add kvm_init() and + kvm_reset() in class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [11/91] 21d2178178bf181a8e4d0b051f64bd983f0d0cf1 (bonzini/rhel-qemu-kvm) + +Different confidential VMs in different architectures all have the same +needs to do their specific initialization (and maybe resetting) stuffs +with KVM. Currently each of them exposes individual *_kvm_init() +functions and let machine code or kvm code to call it. + +To facilitate the introduction of confidential guest technology from +different x86 vendors, add two virtual functions, kvm_init() and kvm_reset() +in ConfidentialGuestSupportClass, and expose two helpers functions for +invodking them. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 41a605944e3fecae43ca18ded95ec31f28e0c7fe) +Signed-off-by: Paolo Bonzini +--- + include/exec/confidential-guest-support.h | 34 ++++++++++++++++++++++- + 1 file changed, 33 insertions(+), 1 deletion(-) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index ba2dd4b5df..e5b188cffb 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -23,7 +23,10 @@ + #include "qom/object.h" + + #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" +-OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) ++OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, ++ ConfidentialGuestSupportClass, ++ CONFIDENTIAL_GUEST_SUPPORT) ++ + + struct ConfidentialGuestSupport { + Object parent; +@@ -55,8 +58,37 @@ struct ConfidentialGuestSupport { + + typedef struct ConfidentialGuestSupportClass { + ObjectClass parent; ++ ++ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); ++ int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp); + } ConfidentialGuestSupportClass; + ++static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs, ++ Error **errp) ++{ ++ ConfidentialGuestSupportClass *klass; ++ ++ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); ++ if (klass->kvm_init) { ++ return klass->kvm_init(cgs, errp); ++ } ++ ++ return 0; ++} ++ ++static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs, ++ Error **errp) ++{ ++ ConfidentialGuestSupportClass *klass; ++ ++ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); ++ if (klass->kvm_reset) { ++ return klass->kvm_reset(cgs, errp); ++ } ++ ++ return 0; ++} ++ + #endif /* !CONFIG_USER_ONLY */ + + #endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch b/SOURCES/kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch new file mode 100644 index 0000000..b5fcef5 --- /dev/null +++ b/SOURCES/kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch @@ -0,0 +1,228 @@ +From 117486e0820f135f191e19f8ebb8838a98b121c6 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 27 May 2024 11:58:51 -0400 +Subject: [PATCH 5/5] crypto/block: drop qcrypto_block_open() n_threads + argument +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 251: block/crypto: create ciphers on demand +RH-Jira: RHEL-36159 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] 68290935b174b1f2b76aa857a926da9011e54abe (stefanha/centos-stream-qemu-kvm) + +The n_threads argument is no longer used since the previous commit. +Remove it. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240527155851.892885-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Acked-by: Daniel P. Berrangé +Signed-off-by: Kevin Wolf +(cherry picked from commit 3ab0f063e58ed9224237d69c4211ca83335164c4) +Signed-off-by: Stefan Hajnoczi +--- + block/crypto.c | 1 - + block/qcow.c | 2 +- + block/qcow2.c | 5 ++--- + crypto/block-luks.c | 1 - + crypto/block-qcow.c | 6 ++---- + crypto/block.c | 3 +-- + crypto/blockpriv.h | 1 - + include/crypto/block.h | 2 -- + tests/unit/test-crypto-block.c | 4 ---- + 9 files changed, 6 insertions(+), 19 deletions(-) + +diff --git a/block/crypto.c b/block/crypto.c +index 21eed909c1..4eed3ffa6a 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -363,7 +363,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, + block_crypto_read_func, + bs, + cflags, +- 1, + errp); + + if (!crypto->block) { +diff --git a/block/qcow.c b/block/qcow.c +index ca8e1d5ec8..c2f89db055 100644 +--- a/block/qcow.c ++++ b/block/qcow.c +@@ -211,7 +211,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(crypto_opts, "encrypt.", +- NULL, NULL, cflags, 1, errp); ++ NULL, NULL, cflags, errp); + if (!s->crypto) { + ret = -EINVAL; + goto fail; +diff --git a/block/qcow2.c b/block/qcow2.c +index 0e8b2f7518..0ebd455dc8 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -321,7 +321,7 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, + } + s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", + qcow2_crypto_hdr_read_func, +- bs, cflags, QCOW2_MAX_THREADS, errp); ++ bs, cflags, errp); + if (!s->crypto) { + return -EINVAL; + } +@@ -1707,8 +1707,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", +- NULL, NULL, cflags, +- QCOW2_MAX_THREADS, errp); ++ NULL, NULL, cflags, errp); + if (!s->crypto) { + ret = -EINVAL; + goto fail; +diff --git a/crypto/block-luks.c b/crypto/block-luks.c +index 3357852c0a..5b777c15d3 100644 +--- a/crypto/block-luks.c ++++ b/crypto/block-luks.c +@@ -1189,7 +1189,6 @@ qcrypto_block_luks_open(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp) + { + QCryptoBlockLUKS *luks = NULL; +diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c +index 02305058e3..42e9556e42 100644 +--- a/crypto/block-qcow.c ++++ b/crypto/block-qcow.c +@@ -44,7 +44,6 @@ qcrypto_block_qcow_has_format(const uint8_t *buf G_GNUC_UNUSED, + static int + qcrypto_block_qcow_init(QCryptoBlock *block, + const char *keysecret, +- size_t n_threads, + Error **errp) + { + char *password; +@@ -100,7 +99,6 @@ qcrypto_block_qcow_open(QCryptoBlock *block, + QCryptoBlockReadFunc readfunc G_GNUC_UNUSED, + void *opaque G_GNUC_UNUSED, + unsigned int flags, +- size_t n_threads, + Error **errp) + { + if (flags & QCRYPTO_BLOCK_OPEN_NO_IO) { +@@ -115,7 +113,7 @@ qcrypto_block_qcow_open(QCryptoBlock *block, + return -1; + } + return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, +- n_threads, errp); ++ errp); + } + } + +@@ -135,7 +133,7 @@ qcrypto_block_qcow_create(QCryptoBlock *block, + return -1; + } + /* QCow2 has no special header, since everything is hardwired */ +- return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, 1, errp); ++ return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, errp); + } + + +diff --git a/crypto/block.c b/crypto/block.c +index ba6d1cebc7..3bcc4270c3 100644 +--- a/crypto/block.c ++++ b/crypto/block.c +@@ -53,7 +53,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp) + { + QCryptoBlock *block = g_new0(QCryptoBlock, 1); +@@ -73,7 +72,7 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + block->driver = qcrypto_block_drivers[options->format]; + + if (block->driver->open(block, options, optprefix, +- readfunc, opaque, flags, n_threads, errp) < 0) ++ readfunc, opaque, flags, errp) < 0) + { + g_free(block); + return NULL; +diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h +index 4bf6043d5d..b8f77cb5eb 100644 +--- a/crypto/blockpriv.h ++++ b/crypto/blockpriv.h +@@ -59,7 +59,6 @@ struct QCryptoBlockDriver { + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp); + + int (*create)(QCryptoBlock *block, +diff --git a/include/crypto/block.h b/include/crypto/block.h +index 92e823c9f2..5b5d039800 100644 +--- a/include/crypto/block.h ++++ b/include/crypto/block.h +@@ -76,7 +76,6 @@ typedef enum { + * @readfunc: callback for reading data from the volume + * @opaque: data to pass to @readfunc + * @flags: bitmask of QCryptoBlockOpenFlags values +- * @n_threads: allow concurrent I/O from up to @n_threads threads + * @errp: pointer to a NULL-initialized error object + * + * Create a new block encryption object for an existing +@@ -113,7 +112,6 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options, + QCryptoBlockReadFunc readfunc, + void *opaque, + unsigned int flags, +- size_t n_threads, + Error **errp); + + typedef enum { +diff --git a/tests/unit/test-crypto-block.c b/tests/unit/test-crypto-block.c +index 6cfc817a92..42cfab6067 100644 +--- a/tests/unit/test-crypto-block.c ++++ b/tests/unit/test-crypto-block.c +@@ -303,7 +303,6 @@ static void test_block(gconstpointer opaque) + test_block_read_func, + &header, + 0, +- 1, + NULL); + g_assert(blk == NULL); + +@@ -312,7 +311,6 @@ static void test_block(gconstpointer opaque) + test_block_read_func, + &header, + QCRYPTO_BLOCK_OPEN_NO_IO, +- 1, + &error_abort); + + g_assert(qcrypto_block_get_cipher(blk) == NULL); +@@ -327,7 +325,6 @@ static void test_block(gconstpointer opaque) + test_block_read_func, + &header, + 0, +- 1, + &error_abort); + g_assert(blk); + +@@ -384,7 +381,6 @@ test_luks_bad_header(gconstpointer data) + test_block_read_func, + &buf, + 0, +- 1, + &err); + g_assert(!blk); + g_assert(err); +-- +2.39.3 + diff --git a/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch b/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch deleted file mode 100644 index 735f2a3..0000000 --- a/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch +++ /dev/null @@ -1,75 +0,0 @@ -From ac9dc8ea241ef6d3a0447d696620d4d4053b71bf Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 4 Dec 2023 11:42:59 -0500 -Subject: [PATCH 080/101] dma-helpers: don't lock AioContext in dma_blk_cb() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [11/26] a8580463ba6aee4ca248c0b947b9e72bd9e87aab (kmwolf/centos-qemu-kvm) - -Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs -dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb() -to avoid a race with scsi_device_purge_requests() running in the main -loop thread. - -The SCSI code no longer calls dma_aio_cancel() from the main loop thread -while I/O is running in the IOThread AioContext. Therefore it is no -longer necessary to take this lock to protect DMAAIOCB fields. The -->cb() function also does not require the lock because blk_aio_*() and -friends do not need the AioContext lock. - -Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't -rely on it taking the AioContext lock, so this change is safe. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Message-ID: <20231204164259.1515217-5-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - system/dma-helpers.c | 7 ++----- - 1 file changed, 2 insertions(+), 5 deletions(-) - -diff --git a/system/dma-helpers.c b/system/dma-helpers.c -index 36211acc7e..528117f256 100644 ---- a/system/dma-helpers.c -+++ b/system/dma-helpers.c -@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret) - - trace_dma_blk_cb(dbs, ret); - -- aio_context_acquire(ctx); - dbs->acb = NULL; - dbs->offset += dbs->iov.size; - - if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { - dma_complete(dbs, ret); -- goto out; -+ return; - } - dma_blk_unmap(dbs); - -@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret) - trace_dma_map_wait(dbs); - dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); - cpu_register_map_client(dbs->bh); -- goto out; -+ return; - } - - if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { -@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret) - dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, - dma_blk_cb, dbs, dbs->io_func_opaque); - assert(dbs->acb); --out: -- aio_context_release(ctx); - } - - static void dma_aio_cancel(BlockAIOCB *acb) --- -2.39.3 - diff --git a/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch b/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch deleted file mode 100644 index dbe48d7..0000000 --- a/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch +++ /dev/null @@ -1,228 +0,0 @@ -From 71aa0219f7c84cbf175eb2a091d48d5fd5daa40b Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:26 +0800 -Subject: [PATCH 047/101] docs/devel: Add VFIO iommufd backend documentation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [46/67] 6cf49d00e87788f894d690a985bb6798eae24505 (eauger1/centos-qemu-kvm) - -Suggested-by: Cédric Le Goater -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Zhenzhong Duan -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 98dad2b01931f6064c6c4b48ca3c2a1d9f542cd8) -Signed-off-by: Eric Auger ---- - MAINTAINERS | 1 + - docs/devel/index-internals.rst | 1 + - docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++ - 3 files changed, 168 insertions(+) - create mode 100644 docs/devel/vfio-iommufd.rst - -diff --git a/MAINTAINERS b/MAINTAINERS -index ca70bb4e64..0ddb20a35f 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -2176,6 +2176,7 @@ F: backends/iommufd.c - F: include/sysemu/iommufd.h - F: include/qemu/chardev_open.h - F: util/chardev_open.c -+F: docs/devel/vfio-iommufd.rst - - vhost - M: Michael S. Tsirkin -diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst -index 6f81df92bc..3def4a138b 100644 ---- a/docs/devel/index-internals.rst -+++ b/docs/devel/index-internals.rst -@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them. - s390-dasd-ipl - tracing - vfio-migration -+ vfio-iommufd - writing-monitor-commands - virtio-backends -diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst -new file mode 100644 -index 0000000000..3d1c11f175 ---- /dev/null -+++ b/docs/devel/vfio-iommufd.rst -@@ -0,0 +1,166 @@ -+=============================== -+IOMMUFD BACKEND usage with VFIO -+=============================== -+ -+(Same meaning for backend/container/BE) -+ -+With the introduction of iommufd, the Linux kernel provides a generic -+interface for user space drivers to propagate their DMA mappings to kernel -+for assigned devices. While the legacy kernel interface is group-centric, -+the new iommufd interface is device-centric, relying on device fd and iommufd. -+ -+To support both interfaces in the QEMU VFIO device, introduce a base container -+to abstract the common part of VFIO legacy and iommufd container. So that the -+generic VFIO code can use either container. -+ -+The base container implements generic functions such as memory_listener and -+address space management whereas the derived container implements callbacks -+specific to either legacy or iommufd. Each container has its own way to setup -+secure context and dma management interface. The below diagram shows how it -+looks like with both containers. -+ -+:: -+ -+ VFIO AddressSpace/Memory -+ +-------+ +----------+ +-----+ +-----+ -+ | pci | | platform | | ap | | ccw | -+ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+ -+ | | | | | AddressSpace | -+ | | | | +------------+---------+ -+ +---V-----------V-----------V--------V----+ / -+ | VFIOAddressSpace | <------------+ -+ | | | MemoryListener -+ | VFIOContainerBase list | -+ +-------+----------------------------+----+ -+ | | -+ | | -+ +-------V------+ +--------V----------+ -+ | iommufd | | vfio legacy | -+ | container | | container | -+ +-------+------+ +--------+----------+ -+ | | -+ | /dev/iommu | /dev/vfio/vfio -+ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id -+ Userspace | | -+ ============+============================+=========================== -+ Kernel | device fd | -+ +---------------+ | group/container fd -+ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU) -+ | ATTACH_IOAS) | | device fd -+ | | | -+ | +-------V------------V-----------------+ -+ iommufd | | vfio | -+ (map/unmap | +---------+--------------------+-------+ -+ ioas_copy) | | | map/unmap -+ | | | -+ +------V------+ +-----V------+ +------V--------+ -+ | iommfd core | | device | | vfio iommu | -+ +-------------+ +------------+ +---------------+ -+ -+* Secure Context setup -+ -+ - iommufd BE: uses device fd and iommufd to setup secure context -+ (bind_iommufd, attach_ioas) -+ - vfio legacy BE: uses group fd and container fd to setup secure context -+ (set_container, set_iommu) -+ -+* Device access -+ -+ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX`` -+ - vfio legacy BE: device fd is retrieved from group fd ioctl -+ -+* DMA Mapping flow -+ -+ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener -+ 2. VFIO populates DMA map/unmap via the container BEs -+ * iommufd BE: uses iommufd -+ * vfio legacy BE: uses container fd -+ -+Example configuration -+===================== -+ -+Step 1: configure the host device -+--------------------------------- -+ -+It's exactly same as the VFIO device with legacy VFIO container. -+ -+Step 2: configure QEMU -+---------------------- -+ -+Interactions with the ``/dev/iommu`` are abstracted by a new iommufd -+object (compiled in with the ``CONFIG_IOMMUFD`` option). -+ -+Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must -+be linked with an iommufd object. It gets a new optional property -+named iommufd which allows to pass an iommufd object. Take ``vfio-pci`` -+device for example: -+ -+.. code-block:: bash -+ -+ -object iommufd,id=iommufd0 -+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0 -+ -+Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a -+management layer. In such a case the fd is passed, the fd supports a -+string naming the fd or a number, for example: -+ -+.. code-block:: bash -+ -+ -object iommufd,id=iommufd0,fd=22 -+ -device vfio-pci,iommufd=iommufd0,fd=23 -+ -+If the ``fd`` property is not passed, the fd is opened by QEMU. -+ -+If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd -+is not used and the user gets the behavior based on the legacy VFIO -+container: -+ -+.. code-block:: bash -+ -+ -device vfio-pci,host=0000:02:00.0 -+ -+Supported platform -+================== -+ -+Supports x86, ARM and s390x currently. -+ -+Caveats -+======= -+ -+Dirty page sync -+--------------- -+ -+Dirty page sync with iommufd backend is unsupported yet, live migration is -+disabled by default. But it can be force enabled like below, low efficient -+though. -+ -+.. code-block:: bash -+ -+ -object iommufd,id=iommufd0 -+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on -+ -+P2P DMA -+------- -+ -+PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI -+BAR region yet. Below warning shows for assigned PCI device, it's not a bug. -+ -+.. code-block:: none -+ -+ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR? -+ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address) -+ -+FD passing with mdev -+-------------------- -+ -+``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev. -+If FD passing is used, there is no way to know that and the mdev is treated -+like a real PCI device. There is an error as below if user wants to enable -+RAM discarding for mdev. -+ -+.. code-block:: none -+ -+ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices -+ -+``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend -+devices are always mdev and RAM discarding is force enabled. --- -2.39.3 - diff --git a/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch b/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch deleted file mode 100644 index 80adc69..0000000 --- a/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch +++ /dev/null @@ -1,98 +0,0 @@ -From fc69df3a70bed5722643cc16828ca20beae3a20d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:08 -0500 -Subject: [PATCH 091/101] docs: remove AioContext lock from IOThread docs - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [22/26] ab89cda483e74ded983d26e1c6e50217405e0a55 (kmwolf/centos-qemu-kvm) - -Encourage the use of locking primitives and stop mentioning the -AioContext lock since it is being removed. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-12-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - docs/devel/multiple-iothreads.txt | 47 +++++++++++-------------------- - 1 file changed, 16 insertions(+), 31 deletions(-) - -diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt -index a3e949f6b3..4865196bde 100644 ---- a/docs/devel/multiple-iothreads.txt -+++ b/docs/devel/multiple-iothreads.txt -@@ -88,27 +88,18 @@ loop, depending on which AioContext instance the caller passes in. - - How to synchronize with an IOThread - ----------------------------------- --AioContext is not thread-safe so some rules must be followed when using file --descriptors, event notifiers, timers, or BHs across threads: -+Variables that can be accessed by multiple threads require some form of -+synchronization such as qemu_mutex_lock(), rcu_read_lock(), etc. - --1. AioContext functions can always be called safely. They handle their --own locking internally. -- --2. Other threads wishing to access the AioContext must use --aio_context_acquire()/aio_context_release() for mutual exclusion. Once the --context is acquired no other thread can access it or run event loop iterations --in this AioContext. -- --Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls. --Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro --used in the block layer and can lead to hangs. -- --There is currently no lock ordering rule if a thread needs to acquire multiple --AioContexts simultaneously. Therefore, it is only safe for code holding the --QEMU global mutex to acquire other AioContexts. -+AioContext functions like aio_set_fd_handler(), aio_set_event_notifier(), -+aio_bh_new(), and aio_timer_new() are thread-safe. They can be used to trigger -+activity in an IOThread. - - Side note: the best way to schedule a function call across threads is to call --aio_bh_schedule_oneshot(). No acquire/release or locking is needed. -+aio_bh_schedule_oneshot(). -+ -+The main loop thread can wait synchronously for a condition using -+AIO_WAIT_WHILE(). - - AioContext and the block layer - ------------------------------ -@@ -124,22 +115,16 @@ Block layer code must therefore expect to run in an IOThread and avoid using - old APIs that implicitly use the main loop. See the "How to program for - IOThreads" above for information on how to do that. - --If main loop code such as a QMP function wishes to access a BlockDriverState --it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure --that callbacks in the IOThread do not run in parallel. -- - Code running in the monitor typically needs to ensure that past - requests from the guest are completed. When a block device is running - in an IOThread, the IOThread can also process requests from the guest - (via ioeventfd). To achieve both objects, wrap the code between - bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained --section". The functions must be called between aio_context_acquire() --and aio_context_release(). You can freely release and re-acquire the --AioContext within a drained section. -- --Long-running jobs (usually in the form of coroutines) are best scheduled in --the BlockDriverState's AioContext to avoid the need to acquire/release around --each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier, --or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends, --can be used to get a notification whenever bdrv_try_change_aio_context() moves a -+section". -+ -+Long-running jobs (usually in the form of coroutines) are often scheduled in -+the BlockDriverState's AioContext. The functions -+bdrv_add/remove_aio_context_notifier, or alternatively -+blk_add/remove_aio_context_notifier if you use BlockBackends, can be used to -+get a notification whenever bdrv_try_change_aio_context() moves a - BlockDriverState to a different AioContext. --- -2.39.3 - diff --git a/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch b/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch deleted file mode 100644 index 2fff9ba..0000000 --- a/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch +++ /dev/null @@ -1,1190 +0,0 @@ -From 57d96b5774fab588c6bb6812ef8ef281ffe018d7 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:02 -0500 -Subject: [PATCH 085/101] graph-lock: remove AioContext locking - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [16/26] 9575a8b834aaaa03abaf869e96f0808172e87824 (kmwolf/centos-qemu-kvm) - -Stop acquiring/releasing the AioContext lock in -bdrv_graph_wrlock()/bdrv_graph_unlock() since the lock no longer has any -effect. - -The distinction between bdrv_graph_wrunlock() and -bdrv_graph_wrunlock_ctx() becomes meaningless and they can be collapsed -into one function. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Message-ID: <20231205182011.1976568-6-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - block.c | 50 +++++++++++++++--------------- - block/backup.c | 4 +-- - block/blklogwrites.c | 8 ++--- - block/blkverify.c | 4 +-- - block/block-backend.c | 11 +++---- - block/commit.c | 16 +++++----- - block/graph-lock.c | 44 ++------------------------ - block/mirror.c | 22 ++++++------- - block/qcow2.c | 4 +-- - block/quorum.c | 8 ++--- - block/replication.c | 14 ++++----- - block/snapshot.c | 4 +-- - block/stream.c | 12 +++---- - block/vmdk.c | 20 ++++++------ - blockdev.c | 8 ++--- - blockjob.c | 12 +++---- - include/block/graph-lock.h | 21 ++----------- - scripts/block-coroutine-wrapper.py | 4 +-- - tests/unit/test-bdrv-drain.c | 40 ++++++++++++------------ - tests/unit/test-bdrv-graph-mod.c | 20 ++++++------ - 20 files changed, 133 insertions(+), 193 deletions(-) - -diff --git a/block.c b/block.c -index bfb0861ec6..25e1ebc606 100644 ---- a/block.c -+++ b/block.c -@@ -1708,12 +1708,12 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, - open_failed: - bs->drv = NULL; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - if (bs->file != NULL) { - bdrv_unref_child(bs, bs->file); - assert(!bs->file); - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - g_free(bs->opaque); - bs->opaque = NULL; -@@ -3575,9 +3575,9 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - - bdrv_ref(drain_bs); - bdrv_drained_begin(drain_bs); -- bdrv_graph_wrlock(backing_hd); -+ bdrv_graph_wrlock(); - ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); -- bdrv_graph_wrunlock(backing_hd); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(drain_bs); - bdrv_unref(drain_bs); - -@@ -3790,13 +3790,13 @@ BdrvChild *bdrv_open_child(const char *filename, - return NULL; - } - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - ctx = bdrv_get_aio_context(bs); - aio_context_acquire(ctx); - child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, - errp); - aio_context_release(ctx); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - return child; - } -@@ -4650,9 +4650,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - aio_context_release(ctx); - } - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - tran_commit(tran); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { - BlockDriverState *bs = bs_entry->state.bs; -@@ -4669,9 +4669,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) - goto cleanup; - - abort: -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - tran_abort(tran); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - if (bs_entry->prepared) { -@@ -4852,12 +4852,12 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - } - - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(new_child_bs); -+ bdrv_graph_wrlock(); - - ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, - tran, errp); - -- bdrv_graph_wrunlock_ctx(ctx); -+ bdrv_graph_wrunlock(); - - if (old_ctx != ctx) { - aio_context_release(ctx); -@@ -5209,14 +5209,14 @@ static void bdrv_close(BlockDriverState *bs) - bs->drv = NULL; - } - -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - bdrv_unref_child(bs, child); - } - - assert(!bs->backing); - assert(!bs->file); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - - g_free(bs->opaque); - bs->opaque = NULL; -@@ -5509,9 +5509,9 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) - bdrv_graph_rdunlock_main_loop(); - - bdrv_drained_begin(child_bs); -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - ret = bdrv_replace_node_common(bs, child_bs, true, true, errp); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(child_bs); - - return ret; -@@ -5561,7 +5561,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - aio_context_acquire(old_context); - new_context = NULL; - -- bdrv_graph_wrlock(bs_top); -+ bdrv_graph_wrlock(); - - child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", - &child_of_bds, bdrv_backing_role(bs_new), -@@ -5593,7 +5593,7 @@ out: - tran_finalize(tran, ret); - - bdrv_refresh_limits(bs_top, NULL, NULL); -- bdrv_graph_wrunlock(bs_top); -+ bdrv_graph_wrunlock(); - - bdrv_drained_end(bs_top); - bdrv_drained_end(bs_new); -@@ -5620,7 +5620,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, - bdrv_ref(old_bs); - bdrv_drained_begin(old_bs); - bdrv_drained_begin(new_bs); -- bdrv_graph_wrlock(new_bs); -+ bdrv_graph_wrlock(); - - bdrv_replace_child_tran(child, new_bs, tran); - -@@ -5631,7 +5631,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, - - tran_finalize(tran, ret); - -- bdrv_graph_wrunlock(new_bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(old_bs); - bdrv_drained_end(new_bs); - bdrv_unref(old_bs); -@@ -5718,9 +5718,9 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, - bdrv_ref(bs); - bdrv_drained_begin(bs); - bdrv_drained_begin(new_node_bs); -- bdrv_graph_wrlock(new_node_bs); -+ bdrv_graph_wrlock(); - ret = bdrv_replace_node(bs, new_node_bs, errp); -- bdrv_graph_wrunlock(new_node_bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(new_node_bs); - bdrv_drained_end(bs); - bdrv_unref(bs); -@@ -5975,7 +5975,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - - bdrv_ref(top); - bdrv_drained_begin(base); -- bdrv_graph_wrlock(base); -+ bdrv_graph_wrlock(); - - if (!top->drv || !base->drv) { - goto exit_wrlock; -@@ -6015,7 +6015,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - * That's a FIXME. - */ - bdrv_replace_node_common(top, base, false, false, &local_err); -- bdrv_graph_wrunlock(base); -+ bdrv_graph_wrunlock(); - - if (local_err) { - error_report_err(local_err); -@@ -6052,7 +6052,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - goto exit; - - exit_wrlock: -- bdrv_graph_wrunlock(base); -+ bdrv_graph_wrunlock(); - exit: - bdrv_drained_end(base); - bdrv_unref(top); -diff --git a/block/backup.c b/block/backup.c -index 8aae5836d7..ec29d6b810 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -496,10 +496,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - block_copy_set_speed(bcs, speed); - - /* Required permissions are taken by copy-before-write filter target */ -- bdrv_graph_wrlock(target); -+ bdrv_graph_wrlock(); - block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, - &error_abort); -- bdrv_graph_wrunlock(target); -+ bdrv_graph_wrunlock(); - - return &job->common; - -diff --git a/block/blklogwrites.c b/block/blklogwrites.c -index 3678f6cf42..7207b2e757 100644 ---- a/block/blklogwrites.c -+++ b/block/blklogwrites.c -@@ -251,9 +251,9 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, - ret = 0; - fail_log: - if (ret < 0) { -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->log_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - s->log_file = NULL; - } - fail: -@@ -265,10 +265,10 @@ static void blk_log_writes_close(BlockDriverState *bs) - { - BDRVBlkLogWritesState *s = bs->opaque; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->log_file); - s->log_file = NULL; -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - } - - static int64_t coroutine_fn GRAPH_RDLOCK -diff --git a/block/blkverify.c b/block/blkverify.c -index 9b17c46644..ec45d8335e 100644 ---- a/block/blkverify.c -+++ b/block/blkverify.c -@@ -151,10 +151,10 @@ static void blkverify_close(BlockDriverState *bs) - { - BDRVBlkverifyState *s = bs->opaque; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->test_file); - s->test_file = NULL; -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - } - - static int64_t coroutine_fn GRAPH_RDLOCK -diff --git a/block/block-backend.c b/block/block-backend.c -index ec21148806..abac4e0235 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -889,7 +889,6 @@ void blk_remove_bs(BlockBackend *blk) - { - ThrottleGroupMember *tgm = &blk->public.throttle_group_member; - BdrvChild *root; -- AioContext *ctx; - - GLOBAL_STATE_CODE(); - -@@ -919,10 +918,9 @@ void blk_remove_bs(BlockBackend *blk) - root = blk->root; - blk->root = NULL; - -- ctx = bdrv_get_aio_context(root->bs); -- bdrv_graph_wrlock(root->bs); -+ bdrv_graph_wrlock(); - bdrv_root_unref_child(root); -- bdrv_graph_wrunlock_ctx(ctx); -+ bdrv_graph_wrunlock(); - } - - /* -@@ -933,16 +931,15 @@ void blk_remove_bs(BlockBackend *blk) - int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) - { - ThrottleGroupMember *tgm = &blk->public.throttle_group_member; -- AioContext *ctx = bdrv_get_aio_context(bs); - - GLOBAL_STATE_CODE(); - bdrv_ref(bs); -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - blk->root = bdrv_root_attach_child(bs, "root", &child_root, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - blk->perm, blk->shared_perm, - blk, errp); -- bdrv_graph_wrunlock_ctx(ctx); -+ bdrv_graph_wrunlock(); - if (blk->root == NULL) { - return -EPERM; - } -diff --git a/block/commit.c b/block/commit.c -index 69cc75be0c..1dd7a65ffb 100644 ---- a/block/commit.c -+++ b/block/commit.c -@@ -100,9 +100,9 @@ static void commit_abort(Job *job) - bdrv_graph_rdunlock_main_loop(); - - bdrv_drained_begin(commit_top_backing_bs); -- bdrv_graph_wrlock(commit_top_backing_bs); -+ bdrv_graph_wrlock(); - bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort); -- bdrv_graph_wrunlock(commit_top_backing_bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(commit_top_backing_bs); - - bdrv_unref(s->commit_top_bs); -@@ -339,7 +339,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, - * this is the responsibility of the interface (i.e. whoever calls - * commit_start()). - */ -- bdrv_graph_wrlock(top); -+ bdrv_graph_wrlock(); - s->base_overlay = bdrv_find_overlay(top, base); - assert(s->base_overlay); - -@@ -370,19 +370,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, - ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - iter_shared_perms, errp); - if (ret < 0) { -- bdrv_graph_wrunlock(top); -+ bdrv_graph_wrunlock(); - goto fail; - } - } - - if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) { -- bdrv_graph_wrunlock(top); -+ bdrv_graph_wrunlock(); - goto fail; - } - s->chain_frozen = true; - - ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); -- bdrv_graph_wrunlock(top); -+ bdrv_graph_wrunlock(); - - if (ret < 0) { - goto fail; -@@ -434,9 +434,9 @@ fail: - * otherwise this would fail because of lack of permissions. */ - if (commit_top_bs) { - bdrv_drained_begin(top); -- bdrv_graph_wrlock(top); -+ bdrv_graph_wrlock(); - bdrv_replace_node(commit_top_bs, top, &error_abort); -- bdrv_graph_wrunlock(top); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(top); - } - } -diff --git a/block/graph-lock.c b/block/graph-lock.c -index 079e878d9b..c81162b147 100644 ---- a/block/graph-lock.c -+++ b/block/graph-lock.c -@@ -106,27 +106,12 @@ static uint32_t reader_count(void) - return rd; - } - --void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs) -+void no_coroutine_fn bdrv_graph_wrlock(void) - { -- AioContext *ctx = NULL; -- - GLOBAL_STATE_CODE(); - assert(!qatomic_read(&has_writer)); - assert(!qemu_in_coroutine()); - -- /* -- * Release only non-mainloop AioContext. The mainloop often relies on the -- * BQL and doesn't lock the main AioContext before doing things. -- */ -- if (bs) { -- ctx = bdrv_get_aio_context(bs); -- if (ctx != qemu_get_aio_context()) { -- aio_context_release(ctx); -- } else { -- ctx = NULL; -- } -- } -- - /* Make sure that constantly arriving new I/O doesn't cause starvation */ - bdrv_drain_all_begin_nopoll(); - -@@ -155,27 +140,13 @@ void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs) - } while (reader_count() >= 1); - - bdrv_drain_all_end(); -- -- if (ctx) { -- aio_context_acquire(bdrv_get_aio_context(bs)); -- } - } - --void no_coroutine_fn bdrv_graph_wrunlock_ctx(AioContext *ctx) -+void no_coroutine_fn bdrv_graph_wrunlock(void) - { - GLOBAL_STATE_CODE(); - assert(qatomic_read(&has_writer)); - -- /* -- * Release only non-mainloop AioContext. The mainloop often relies on the -- * BQL and doesn't lock the main AioContext before doing things. -- */ -- if (ctx && ctx != qemu_get_aio_context()) { -- aio_context_release(ctx); -- } else { -- ctx = NULL; -- } -- - WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) { - /* - * No need for memory barriers, this works in pair with -@@ -197,17 +168,6 @@ void no_coroutine_fn bdrv_graph_wrunlock_ctx(AioContext *ctx) - * progress. - */ - aio_bh_poll(qemu_get_aio_context()); -- -- if (ctx) { -- aio_context_acquire(ctx); -- } --} -- --void no_coroutine_fn bdrv_graph_wrunlock(BlockDriverState *bs) --{ -- AioContext *ctx = bs ? bdrv_get_aio_context(bs) : NULL; -- -- bdrv_graph_wrunlock_ctx(ctx); - } - - void coroutine_fn bdrv_graph_co_rdlock(void) -diff --git a/block/mirror.c b/block/mirror.c -index cd9d3ad4a8..51f9e2f17c 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -764,7 +764,7 @@ static int mirror_exit_common(Job *job) - * check for an op blocker on @to_replace, and we have our own - * there. - */ -- bdrv_graph_wrlock(target_bs); -+ bdrv_graph_wrlock(); - if (bdrv_recurse_can_replace(src, to_replace)) { - bdrv_replace_node(to_replace, target_bs, &local_err); - } else { -@@ -773,7 +773,7 @@ static int mirror_exit_common(Job *job) - "would not lead to an abrupt change of visible data", - to_replace->node_name, target_bs->node_name); - } -- bdrv_graph_wrunlock(target_bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(to_replace); - if (local_err) { - error_report_err(local_err); -@@ -796,9 +796,9 @@ static int mirror_exit_common(Job *job) - * valid. - */ - block_job_remove_all_bdrv(bjob); -- bdrv_graph_wrlock(mirror_top_bs); -+ bdrv_graph_wrlock(); - bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); -- bdrv_graph_wrunlock(mirror_top_bs); -+ bdrv_graph_wrunlock(); - - bdrv_drained_end(target_bs); - bdrv_unref(target_bs); -@@ -1914,13 +1914,13 @@ static BlockJob *mirror_start_job( - */ - bdrv_disable_dirty_bitmap(s->dirty_bitmap); - -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - ret = block_job_add_bdrv(&s->common, "source", bs, 0, - BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | - BLK_PERM_CONSISTENT_READ, - errp); - if (ret < 0) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - goto fail; - } - -@@ -1965,17 +1965,17 @@ static BlockJob *mirror_start_job( - ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - iter_shared_perms, errp); - if (ret < 0) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - goto fail; - } - } - - if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - goto fail; - } - } -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - - QTAILQ_INIT(&s->ops_in_flight); - -@@ -2001,12 +2001,12 @@ fail: - - bs_opaque->stop = true; - bdrv_drained_begin(bs); -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - assert(mirror_top_bs->backing->bs == bs); - bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, - &error_abort); - bdrv_replace_node(mirror_top_bs, bs, &error_abort); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(bs); - - bdrv_unref(mirror_top_bs); -diff --git a/block/qcow2.c b/block/qcow2.c -index 7968735346..d91b7b91d3 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -2813,9 +2813,9 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file) - if (close_data_file && has_data_file(bs)) { - GLOBAL_STATE_CODE(); - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->data_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - s->data_file = NULL; - bdrv_graph_rdlock_main_loop(); - } -diff --git a/block/quorum.c b/block/quorum.c -index 505b8b3e18..db8fe891c4 100644 ---- a/block/quorum.c -+++ b/block/quorum.c -@@ -1037,14 +1037,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, - - close_exit: - /* cleanup on error */ -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - for (i = 0; i < s->num_children; i++) { - if (!opened[i]) { - continue; - } - bdrv_unref_child(bs, s->children[i]); - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - g_free(s->children); - g_free(opened); - exit: -@@ -1057,11 +1057,11 @@ static void quorum_close(BlockDriverState *bs) - BDRVQuorumState *s = bs->opaque; - int i; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - for (i = 0; i < s->num_children; i++) { - bdrv_unref_child(bs, s->children[i]); - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - g_free(s->children); - } -diff --git a/block/replication.c b/block/replication.c -index 5ded5f1ca9..424b537ff7 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -560,7 +560,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - return; - } - -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - - bdrv_ref(hidden_disk->bs); - s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk", -@@ -568,7 +568,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - &local_err); - if (local_err) { - error_propagate(errp, local_err); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - aio_context_release(aio_context); - return; - } -@@ -579,7 +579,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - BDRV_CHILD_DATA, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - aio_context_release(aio_context); - return; - } -@@ -592,7 +592,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (!top_bs || !bdrv_is_root_node(top_bs) || - !check_top_bs(top_bs, bs)) { - error_setg(errp, "No top_bs or it is invalid"); -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - reopen_backing_file(bs, false, NULL); - aio_context_release(aio_context); - return; -@@ -600,7 +600,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - bdrv_op_block_all(top_bs, s->blocker); - bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker); - -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - - s->backup_job = backup_job_create( - NULL, s->secondary_disk->bs, s->hidden_disk->bs, -@@ -691,12 +691,12 @@ static void replication_done(void *opaque, int ret) - if (ret == 0) { - s->stage = BLOCK_REPLICATION_DONE; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->secondary_disk); - s->secondary_disk = NULL; - bdrv_unref_child(bs, s->hidden_disk); - s->hidden_disk = NULL; -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - s->error = 0; - } else { -diff --git a/block/snapshot.c b/block/snapshot.c -index ec8cf4810b..e486d3e205 100644 ---- a/block/snapshot.c -+++ b/block/snapshot.c -@@ -290,9 +290,9 @@ int bdrv_snapshot_goto(BlockDriverState *bs, - } - - /* .bdrv_open() will re-attach it */ -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, fallback); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); - open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err); -diff --git a/block/stream.c b/block/stream.c -index 01fe7c0f16..048c2d282f 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -99,9 +99,9 @@ static int stream_prepare(Job *job) - } - } - -- bdrv_graph_wrlock(s->target_bs); -+ bdrv_graph_wrlock(); - bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); -- bdrv_graph_wrunlock(s->target_bs); -+ bdrv_graph_wrunlock(); - - /* - * This call will do I/O, so the graph can change again from here on. -@@ -366,10 +366,10 @@ void stream_start(const char *job_id, BlockDriverState *bs, - * already have our own plans. Also don't allow resize as the image size is - * queried only at the job start and then cached. - */ -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - if (block_job_add_bdrv(&s->common, "active node", bs, 0, - basic_flags | BLK_PERM_WRITE, errp)) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - goto fail; - } - -@@ -389,11 +389,11 @@ void stream_start(const char *job_id, BlockDriverState *bs, - ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, - basic_flags, errp); - if (ret < 0) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - goto fail; - } - } -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - - s->base_overlay = base_overlay; - s->above_base = above_base; -diff --git a/block/vmdk.c b/block/vmdk.c -index d6971c7067..bf78e12383 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -272,7 +272,7 @@ static void vmdk_free_extents(BlockDriverState *bs) - BDRVVmdkState *s = bs->opaque; - VmdkExtent *e; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - for (i = 0; i < s->num_extents; i++) { - e = &s->extents[i]; - g_free(e->l1_table); -@@ -283,7 +283,7 @@ static void vmdk_free_extents(BlockDriverState *bs) - bdrv_unref_child(bs, e->file); - } - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - g_free(s->extents); - } -@@ -1247,9 +1247,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - 0, 0, 0, 0, 0, &extent, errp); - if (ret < 0) { - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - bdrv_graph_rdlock_main_loop(); - goto out; - } -@@ -1266,9 +1266,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - g_free(buf); - if (ret) { - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - bdrv_graph_rdlock_main_loop(); - goto out; - } -@@ -1277,9 +1277,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); - if (ret) { - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - bdrv_graph_rdlock_main_loop(); - goto out; - } -@@ -1287,9 +1287,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - } else { - error_setg(errp, "Unsupported extent type '%s'", type); - bdrv_graph_rdunlock_main_loop(); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - bdrv_graph_rdlock_main_loop(); - ret = -ENOTSUP; - goto out; -diff --git a/blockdev.c b/blockdev.c -index c91f49e7b6..9e1381169d 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1611,9 +1611,9 @@ static void external_snapshot_abort(void *opaque) - } - - bdrv_drained_begin(state->new_bs); -- bdrv_graph_wrlock(state->old_bs); -+ bdrv_graph_wrlock(); - bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); -- bdrv_graph_wrunlock(state->old_bs); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(state->new_bs); - - bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ -@@ -3657,7 +3657,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, - BlockDriverState *parent_bs, *new_bs = NULL; - BdrvChild *p_child; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - - parent_bs = bdrv_lookup_bs(parent, parent, errp); - if (!parent_bs) { -@@ -3693,7 +3693,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, - } - - out: -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - } - - BlockJobInfoList *qmp_query_block_jobs(Error **errp) -diff --git a/blockjob.c b/blockjob.c -index b7a29052b9..7310412313 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -199,7 +199,7 @@ void block_job_remove_all_bdrv(BlockJob *job) - * to process an already freed BdrvChild. - */ - aio_context_release(job->job.aio_context); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - aio_context_acquire(job->job.aio_context); - while (job->nodes) { - GSList *l = job->nodes; -@@ -212,7 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) - - g_slist_free_1(l); - } -- bdrv_graph_wrunlock_ctx(job->job.aio_context); -+ bdrv_graph_wrunlock(); - } - - bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) -@@ -514,7 +514,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - int ret; - GLOBAL_STATE_CODE(); - -- bdrv_graph_wrlock(bs); -+ bdrv_graph_wrlock(); - - if (job_id == NULL && !(flags & JOB_INTERNAL)) { - job_id = bdrv_get_device_name(bs); -@@ -523,7 +523,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - job = job_create(job_id, &driver->job_driver, txn, bdrv_get_aio_context(bs), - flags, cb, opaque, errp); - if (job == NULL) { -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - return NULL; - } - -@@ -563,11 +563,11 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - goto fail; - } - -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - return job; - - fail: -- bdrv_graph_wrunlock(bs); -+ bdrv_graph_wrunlock(); - job_early_fail(&job->job); - return NULL; - } -diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h -index 22b5db1ed9..d7545e82d0 100644 ---- a/include/block/graph-lock.h -+++ b/include/block/graph-lock.h -@@ -110,34 +110,17 @@ void unregister_aiocontext(AioContext *ctx); - * - * The wrlock can only be taken from the main loop, with BQL held, as only the - * main loop is allowed to modify the graph. -- * -- * If @bs is non-NULL, its AioContext is temporarily released. -- * -- * This function polls. Callers must not hold the lock of any AioContext other -- * than the current one and the one of @bs. - */ - void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA --bdrv_graph_wrlock(BlockDriverState *bs); -+bdrv_graph_wrlock(void); - - /* - * bdrv_graph_wrunlock: - * Write finished, reset global has_writer to 0 and restart - * all readers that are waiting. -- * -- * If @bs is non-NULL, its AioContext is temporarily released. -- */ --void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA --bdrv_graph_wrunlock(BlockDriverState *bs); -- --/* -- * bdrv_graph_wrunlock_ctx: -- * Write finished, reset global has_writer to 0 and restart -- * all readers that are waiting. -- * -- * If @ctx is non-NULL, its lock is temporarily released. - */ - void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA --bdrv_graph_wrunlock_ctx(AioContext *ctx); -+bdrv_graph_wrunlock(void); - - /* - * bdrv_graph_co_rdlock: -diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py -index a38e5833fb..38364fa557 100644 ---- a/scripts/block-coroutine-wrapper.py -+++ b/scripts/block-coroutine-wrapper.py -@@ -261,8 +261,8 @@ def gen_no_co_wrapper(func: FuncDecl) -> str: - graph_lock=' bdrv_graph_rdlock_main_loop();' - graph_unlock=' bdrv_graph_rdunlock_main_loop();' - elif func.graph_wrlock: -- graph_lock=' bdrv_graph_wrlock(NULL);' -- graph_unlock=' bdrv_graph_wrunlock(NULL);' -+ graph_lock=' bdrv_graph_wrlock();' -+ graph_unlock=' bdrv_graph_wrunlock();' - - return f"""\ - /* -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 704d1a3f36..d9754dfebc 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -807,9 +807,9 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - tjob->bs = src; - job = &tjob->common; - -- bdrv_graph_wrlock(target); -+ bdrv_graph_wrlock(); - block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); -- bdrv_graph_wrunlock(target); -+ bdrv_graph_wrunlock(); - - switch (result) { - case TEST_JOB_SUCCESS: -@@ -991,11 +991,11 @@ static void bdrv_test_top_close(BlockDriverState *bs) - { - BdrvChild *c, *next_c; - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { - bdrv_unref_child(bs, c); - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - } - - static int coroutine_fn GRAPH_RDLOCK -@@ -1085,10 +1085,10 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, - - null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &error_abort); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - /* This child will be the one to pass to requests through to, and - * it will stall until a drain occurs */ -@@ -1096,21 +1096,21 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, - &error_abort); - child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; - /* Takes our reference to child_bs */ -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", - &child_of_bds, - BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, - &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - /* This child is just there to be deleted - * (for detach_instead_of_delete == true) */ - null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &error_abort); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, - &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - blk_insert_bs(blk, bs, &error_abort); -@@ -1193,14 +1193,14 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque) - - bdrv_dec_in_flight(data->child_b->bs); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_unref_child(data->parent_b, data->child_b); - - bdrv_ref(data->c); - data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C", - &child_of_bds, BDRV_CHILD_DATA, - &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - } - - static void coroutine_mixed_fn detach_by_parent_aio_cb(void *opaque, int ret) -@@ -1298,7 +1298,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) - /* Set child relationships */ - bdrv_ref(b); - bdrv_ref(a); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); - child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_of_bds, -@@ -1308,7 +1308,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) - bdrv_attach_child(parent_a, a, "PA-A", - by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class, - BDRV_CHILD_DATA, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - g_assert_cmpint(parent_a->refcnt, ==, 1); - g_assert_cmpint(parent_b->refcnt, ==, 1); -@@ -1727,7 +1727,7 @@ static void test_drop_intermediate_poll(void) - * Establish the chain last, so the chain links are the first - * elements in the BDS.parents lists - */ -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - for (i = 0; i < 3; i++) { - if (i) { - /* Takes the reference to chain[i - 1] */ -@@ -1735,7 +1735,7 @@ static void test_drop_intermediate_poll(void) - &chain_child_class, BDRV_CHILD_COW, &error_abort); - } - } -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - job = block_job_create("job", &test_simple_job_driver, NULL, job_node, - 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); -@@ -1982,10 +1982,10 @@ static void do_test_replace_child_mid_drain(int old_drain_count, - new_child_bs->total_sectors = 1; - - bdrv_ref(old_child_bs); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, - BDRV_CHILD_COW, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - parent_s->setup_completed = true; - - for (i = 0; i < old_drain_count; i++) { -@@ -2016,9 +2016,9 @@ static void do_test_replace_child_mid_drain(int old_drain_count, - g_assert(parent_bs->quiesce_counter == old_drain_count); - bdrv_drained_begin(old_child_bs); - bdrv_drained_begin(new_child_bs); -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_replace_node(old_child_bs, new_child_bs, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - bdrv_drained_end(new_child_bs); - bdrv_drained_end(old_child_bs); - g_assert(parent_bs->quiesce_counter == new_drain_count); -diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c -index 074adcbb93..8ee6ef38d8 100644 ---- a/tests/unit/test-bdrv-graph-mod.c -+++ b/tests/unit/test-bdrv-graph-mod.c -@@ -137,10 +137,10 @@ static void test_update_perm_tree(void) - - blk_insert_bs(root, bs, &error_abort); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(filter, bs, "child", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - aio_context_acquire(qemu_get_aio_context()); - ret = bdrv_append(filter, bs, NULL); -@@ -206,11 +206,11 @@ static void test_should_update_child(void) - - bdrv_set_backing_hd(target, bs, &error_abort); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - g_assert(target->backing->bs == bs); - bdrv_attach_child(filter, target, "target", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - aio_context_acquire(qemu_get_aio_context()); - bdrv_append(filter, bs, &error_abort); - aio_context_release(qemu_get_aio_context()); -@@ -248,7 +248,7 @@ static void test_parallel_exclusive_write(void) - bdrv_ref(base); - bdrv_ref(fl1); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(top, fl1, "backing", &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - &error_abort); -@@ -260,7 +260,7 @@ static void test_parallel_exclusive_write(void) - &error_abort); - - bdrv_replace_node(fl1, fl2, &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - bdrv_drained_end(fl2); - bdrv_drained_end(fl1); -@@ -367,7 +367,7 @@ static void test_parallel_perm_update(void) - */ - bdrv_ref(base); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(top, ws, "file", &child_of_bds, BDRV_CHILD_DATA, - &error_abort); - c_fl1 = bdrv_attach_child(ws, fl1, "first", &child_of_bds, -@@ -380,7 +380,7 @@ static void test_parallel_perm_update(void) - bdrv_attach_child(fl2, base, "backing", &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - /* Select fl1 as first child to be active */ - s->selected = c_fl1; -@@ -434,11 +434,11 @@ static void test_append_greedy_filter(void) - BlockDriverState *base = no_perm_node("base"); - BlockDriverState *fl = exclusive_writer_node("fl1"); - -- bdrv_graph_wrlock(NULL); -+ bdrv_graph_wrlock(); - bdrv_attach_child(top, base, "backing", &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - &error_abort); -- bdrv_graph_wrunlock(NULL); -+ bdrv_graph_wrunlock(); - - aio_context_acquire(qemu_get_aio_context()); - bdrv_append(fl, base, &error_abort); --- -2.39.3 - diff --git a/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch b/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch deleted file mode 100644 index 4fb4844..0000000 --- a/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch +++ /dev/null @@ -1,94 +0,0 @@ -From a5b4eec5f456b1ca3fe753e1d76f96cf3f8914ef Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Wed, 17 Jan 2024 14:55:53 +0100 -Subject: [PATCH 01/22] hv-balloon: use get_min_alignment() to express 32 GiB - alignment - -RH-Author: David Hildenbrand -RH-MergeRequest: 221: memory-device: reintroduce memory region size check -RH-Jira: RHEL-20341 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Igor Mammedov -RH-Commit: [1/2] cbe092fe549552928270892253b31cd8fe199825 - -https://issues.redhat.com/browse/RHEL-20341 - -Let's implement the get_min_alignment() callback for memory devices, and -copy for the device memory region the alignment of the host memory -region. This mimics what virtio-mem does, and allows for re-introducing -proper alignment checks for the memory region size (where we don't care -about additional device requirements) in memory device core. - -Message-ID: <20240117135554.787344-2-david@redhat.com> -Reviewed-by: Maciej S. Szmigiero -Signed-off-by: David Hildenbrand -(cherry picked from commit f77c5f38f49c71bc14cf1019ac92b0b95f572414) -Signed-off-by: David Hildenbrand ---- - hw/hyperv/hv-balloon.c | 37 +++++++++++++++++++++---------------- - 1 file changed, 21 insertions(+), 16 deletions(-) - -diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c -index 66f297c1d7..0829c495b0 100644 ---- a/hw/hyperv/hv-balloon.c -+++ b/hw/hyperv/hv-balloon.c -@@ -1476,22 +1476,7 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon) - balloon->mr = g_new0(MemoryRegion, 1); - memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON, - memory_region_size(hostmem_mr)); -- -- /* -- * The VM can indicate an alignment up to 32 GiB. Memory device core can -- * usually only handle/guarantee 1 GiB alignment. The user will have to -- * specify a larger maxmem eventually. -- * -- * The memory device core will warn the user in case maxmem might have to be -- * increased and will fail plugging the device if there is not sufficient -- * space after alignment. -- * -- * TODO: we could do the alignment ourselves in a slightly bigger region. -- * But this feels better, although the warning might be annoying. Maybe -- * we can optimize that in the future (e.g., with such a device on the -- * cmdline place/size the device memory region differently. -- */ -- balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr)); -+ balloon->mr->align = memory_region_get_alignment(hostmem_mr); - } - - static void hv_balloon_free_mr(HvBalloon *balloon) -@@ -1653,6 +1638,25 @@ static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md, - return balloon->mr; - } - -+static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md) -+{ -+ /* -+ * The VM can indicate an alignment up to 32 GiB. Memory device core can -+ * usually only handle/guarantee 1 GiB alignment. The user will have to -+ * specify a larger maxmem eventually. -+ * -+ * The memory device core will warn the user in case maxmem might have to be -+ * increased and will fail plugging the device if there is not sufficient -+ * space after alignment. -+ * -+ * TODO: we could do the alignment ourselves in a slightly bigger region. -+ * But this feels better, although the warning might be annoying. Maybe -+ * we can optimize that in the future (e.g., with such a device on the -+ * cmdline place/size the device memory region differently. -+ */ -+ return 32 * GiB; -+} -+ - static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md, - MemoryDeviceInfo *info) - { -@@ -1765,5 +1769,6 @@ static void hv_balloon_class_init(ObjectClass *klass, void *data) - mdc->get_memory_region = hv_balloon_md_get_memory_region; - mdc->decide_memslots = hv_balloon_decide_memslots; - mdc->get_memslots = hv_balloon_get_memslots; -+ mdc->get_min_alignment = hv_balloon_md_get_min_alignment; - mdc->fill_device_info = hv_balloon_md_fill_device_info; - } --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch b/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch deleted file mode 100644 index 84f6108..0000000 --- a/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch +++ /dev/null @@ -1,42 +0,0 @@ -From ceaee9c4372bbdc4196cb6808515047388f7aa26 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 21 Nov 2023 16:44:18 +0800 -Subject: [PATCH 039/101] hw/arm: Activate IOMMUFD for virt machines -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [38/67] 0a059ae661616e95eb8455e17f35774495cae8e7 (eauger1/centos-qemu-kvm) - -Signed-off-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 0970238343af45a8b547695bfc22f18d4eb7da7e) -Signed-off-by: Eric Auger ---- - hw/arm/Kconfig | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig -index 3ada335a24..660f49db49 100644 ---- a/hw/arm/Kconfig -+++ b/hw/arm/Kconfig -@@ -8,6 +8,7 @@ config ARM_VIRT - imply TPM_TIS_SYSBUS - imply TPM_TIS_I2C - imply NVDIMM -+ imply IOMMUFD - select ARM_GIC - select ACPI - select ARM_SMMUV3 --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch b/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch deleted file mode 100644 index 76ab341..0000000 --- a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch +++ /dev/null @@ -1,88 +0,0 @@ -From e670722b9a6460d41497688d820d5a9a9b51d8e9 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 9 Jan 2024 11:36:42 +1000 -Subject: [PATCH 001/101] hw/arm/virt: Add properties to disable high memory - regions - -RH-Author: Gavin Shan -RH-MergeRequest: 210: hw/arm/virt: Add properties to disable high memory regions -RH-Jira: RHEL-19738 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Auger -RH-Commit: [1/1] 4097ba5133a67126e30b84202cb40df4e019c5f4 - -Upstream: RHEL-only -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=57927352 - -There are 3 high memory regions for GICv3 or GICv4 redistributor, PCI -ECAM and PCI MMIO. Each of them has a property introduced by upstream -commit 6a48c64eec ("hw/arm/virt: Add properties to disable high memory -regions") so that the corresponding high memory region can be disabled. - -It's notable that another property ("compact-highmem") introduced by -upstream commit f40408a9fe ("hw/arm/virt: Add 'compact-highmem' property") -so that the compact high memory region layout during assignment can be -disabled, compatible to the old machine types. However, we don't have -the compatible issue since the compact high memory region layout is -always kept as disabled until RHEL9.2.0 machine type and onwards. - -Expose those 3 properties: "highmem-redists", "highmem-ecam" and -"highmem-mmio". The property "compact-highmem" is kept as hidden. - -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 24 +++++++++++++++++++++++- - 1 file changed, 23 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 5cab00b4cd..60f117f0d2 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2456,6 +2456,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) - - vms->highmem_compact = value; - } -+#endif /* disabled for RHEL */ - - static bool virt_get_highmem_redists(Object *obj, Error **errp) - { -@@ -2498,7 +2499,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) - - vms->highmem_mmio = value; - } --#endif /* disabled for RHEL */ - - static bool virt_get_its(Object *obj, Error **errp) - { -@@ -3521,6 +3521,28 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Set on/off to enable/disable using " - "physical address space above 32 bits"); - -+ object_class_property_add_bool(oc, "highmem-redists", -+ virt_get_highmem_redists, -+ virt_set_highmem_redists); -+ object_class_property_set_description(oc, "highmem-redists", -+ "Set on/off to enable/disable high " -+ "memory region for GICv3 or GICv4 " -+ "redistributor"); -+ -+ object_class_property_add_bool(oc, "highmem-ecam", -+ virt_get_highmem_ecam, -+ virt_set_highmem_ecam); -+ object_class_property_set_description(oc, "highmem-ecam", -+ "Set on/off to enable/disable high " -+ "memory region for PCI ECAM"); -+ -+ object_class_property_add_bool(oc, "highmem-mmio", -+ virt_get_highmem_mmio, -+ virt_set_highmem_mmio); -+ object_class_property_set_description(oc, "highmem-mmio", -+ "Set on/off to enable/disable high " -+ "memory region for PCI MMIO"); -+ - object_class_property_add_str(oc, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_class_property_set_description(oc, "gic-version", --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch b/SOURCES/kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch new file mode 100644 index 0000000..29991d5 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch @@ -0,0 +1,120 @@ +From 41c4083269ec772b406c6c57b496ca2011f928c7 Mon Sep 17 00:00:00 2001 +From: Zhenyu Zhang +Date: Tue, 9 Jul 2024 23:08:59 -0400 +Subject: [PATCH 2/2] hw/arm/virt: Avoid unexpected warning from Linux guest on + host with Fujitsu CPUs + +RH-Author: zhenyzha +RH-MergeRequest: 256: hw/arm/virt: Avoid unexpected warning from Linux guest on host with Fujitsu CPUs +RH-Jira: RHEL-39936 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Acked-by: Cornelia Huck +RH-Commit: [1/1] fdf156fd05b219a06e2e2ca409fff0f728c1e2cf (zhenyzha/qemu-kvm) + +JIRA: https://issues.redhat.com/browse/RHEL-39936 + +Multiple warning messages and corresponding backtraces are observed when Linux +guest is booted on the host with Fujitsu CPUs. One of them is shown as below. + +[ 0.032443] ------------[ cut here ]------------ +[ 0.032446] uart-pl011 9000000.pl011: ARCH_DMA_MINALIGN smaller than +CTR_EL0.CWG (128 < 256) +[ 0.032454] WARNING: CPU: 0 PID: 1 at arch/arm64/mm/dma-mapping.c:54 +arch_setup_dma_ops+0xbc/0xcc +[ 0.032470] Modules linked in: +[ 0.032475] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-452.el9.aarch64 +[ 0.032481] Hardware name: linux,dummy-virt (DT) +[ 0.032484] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 0.032490] pc : arch_setup_dma_ops+0xbc/0xcc +[ 0.032496] lr : arch_setup_dma_ops+0xbc/0xcc +[ 0.032501] sp : ffff80008003b860 +[ 0.032503] x29: ffff80008003b860 x28: 0000000000000000 x27: ffffaae4b949049c +[ 0.032510] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 +[ 0.032517] x23: 0000000000000100 x22: 0000000000000000 x21: 0000000000000000 +[ 0.032523] x20: 0000000100000000 x19: ffff2f06c02ea400 x18: ffffffffffffffff +[ 0.032529] x17: 00000000208a5f76 x16: 000000006589dbcb x15: ffffaae4ba071c89 +[ 0.032535] x14: 0000000000000000 x13: ffffaae4ba071c84 x12: 455f525443206e61 +[ 0.032541] x11: 68742072656c6c61 x10: 0000000000000029 x9 : ffffaae4b7d21da4 +[ 0.032547] x8 : 0000000000000029 x7 : 4c414e494d5f414d x6 : 0000000000000029 +[ 0.032553] x5 : 000000000000000f x4 : ffffaae4b9617a00 x3 : 0000000000000001 +[ 0.032558] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff2f06c029be40 +[ 0.032564] Call trace: +[ 0.032566] arch_setup_dma_ops+0xbc/0xcc +[ 0.032572] of_dma_configure_id+0x138/0x300 +[ 0.032591] amba_dma_configure+0x34/0xc0 +[ 0.032600] really_probe+0x78/0x3dc +[ 0.032614] __driver_probe_device+0x108/0x160 +[ 0.032619] driver_probe_device+0x44/0x114 +[ 0.032624] __device_attach_driver+0xb8/0x14c +[ 0.032629] bus_for_each_drv+0x88/0xe4 +[ 0.032634] __device_attach+0xb0/0x1e0 +[ 0.032638] device_initial_probe+0x18/0x20 +[ 0.032643] bus_probe_device+0xa8/0xb0 +[ 0.032648] device_add+0x4b4/0x6c0 +[ 0.032652] amba_device_try_add.part.0+0x48/0x360 +[ 0.032657] amba_device_add+0x104/0x144 +[ 0.032662] of_amba_device_create.isra.0+0x100/0x1c4 +[ 0.032666] of_platform_bus_create+0x294/0x35c +[ 0.032669] of_platform_populate+0x5c/0x150 +[ 0.032672] of_platform_default_populate_init+0xd0/0xec +[ 0.032697] do_one_initcall+0x4c/0x2e0 +[ 0.032701] do_initcalls+0x100/0x13c +[ 0.032707] kernel_init_freeable+0x1c8/0x21c +[ 0.032712] kernel_init+0x28/0x140 +[ 0.032731] ret_from_fork+0x10/0x20 +[ 0.032735] ---[ end trace 0000000000000000 ]--- + +In Linux, a check is applied to every device which is exposed through +device-tree node. The warning message is raised when the device isn't +DMA coherent and the cache line size is larger than ARCH_DMA_MINALIGN +(128 bytes). The cache line is sorted from CTR_EL0[CWG], which corresponds +to 256 bytes on the guest CPUs. The DMA coherent capability is claimed +through 'dma-coherent' in their device-tree nodes or parent nodes. +This happens even when the device doesn't implement or use DMA at all, +for legacy reasons. + +Fix the issue by adding 'dma-coherent' property to the device-tree root +node, meaning all devices are capable of DMA coherent by default. +This both suppresses the spurious kernel warnings and also guards +against possible future QEMU bugs where we add a DMA-capable device +and forget to mark it as dma-coherent. + +Signed-off-by: Zhenyu Zhang +Reviewed-by: Gavin Shan +Reviewed-by: Donald Dutile +Message-id: 20240612020506.307793-1-zhenyzha@redhat.com +[PMM: tweaked commit message] +Signed-off-by: Peter Maydell +(cherry picked from commit dda533087ad5559674ff486e7031c88dc01e0abd) +Signed-off-by: Zhenyu Zhang +--- + hw/arm/virt.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 3f0496cdb9..6ece67f11d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -330,6 +330,17 @@ static void create_fdt(VirtMachineState *vms) + qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x2); + qemu_fdt_setprop_string(fdt, "/", "model", "linux,dummy-virt"); + ++ /* ++ * For QEMU, all DMA is coherent. Advertising this in the root node ++ * has two benefits: ++ * ++ * - It avoids potential bugs where we forget to mark a DMA ++ * capable device as being dma-coherent ++ * - It avoids spurious warnings from the Linux kernel about ++ * devices which can't do DMA at all ++ */ ++ qemu_fdt_setprop(fdt, "/", "dma-coherent", NULL, 0); ++ + /* /chosen must exist for load_dtb to fill in necessary properties later */ + qemu_fdt_add_subnode(fdt, "/chosen"); + if (vms->dtb_randomness) { +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Fix-compats.patch b/SOURCES/kvm-hw-arm-virt-Fix-compats.patch deleted file mode 100644 index 7e3af18..0000000 --- a/SOURCES/kvm-hw-arm-virt-Fix-compats.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 3f58194f8642a71c47d91d3c00a34faf44ea2c11 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 3 Jan 2024 05:57:38 -0500 -Subject: [PATCH] hw/arm/virt: Fix compats - -RH-Author: Eric Auger -RH-MergeRequest: 209: hw/arm/virt: Fix compats -RH-Jira: RHEL-17168 -RH-Acked-by: Gavin Shan -RH-Acked-by: Sebastian Ott -RH-Commit: [1/1] bcdf6493bbd6d7b52b0b88ff44441d22aeddfde2 (eauger1/centos-qemu-kvm) - -arm_rhel_compat is not added for virt-rhel9.4.0 machine causing -the efi-virtio.rom to be looked for when instantiating a virtio-net-pci -device and it won't be found since not shipped on ARM. This is a -regression compared to 9.2. - -Actually we do not need any rom file for any virtio-net-pci variant -because edk2 already brings the functionality. So for 9.4 onwards, we -want to set romfiles to "" for all of them. - -However at the moment we apply arm_rhel_compat from the latest -rhel*_virt_options(). This is not aligned with the generic compat -usage which sets compats for a given machine type to accomodate for -changes that occured after its advent. Here we are somehow abusing -the compat infra to set general driver options that should apply for -all machines. On top of that this is really error prone and we have -forgotten to add arm_rhel_compat several times in the past. - -So let's introduce set_arm_rhel_compat() being called before any -*virt_options in the non abstract machine class. That way the setting -will apply to any machine type without any need to add it in any -future machine types. - -For < 9.4 machines we don't really care keeping non void romfiles -for transitional and non transitional devices because anyway this was -not working. So let's keep things simple and apply the new defaults for -all RHEL9 machine types. - -Finally, to follow the generic pattern we should set hw_compat_rhel_9_0 -in 9.0 machine as it is done on x86 or ccw. This has no consequence on -aarch64 because it only contains x86 stuff but that helps understanding -the consistency. - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 43 +++++++++++++++++++++++++++++-------------- - 1 file changed, 29 insertions(+), 14 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 0b17c94ad7..5cab00b4cd 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -111,11 +111,39 @@ - DEFINE_VIRT_MACHINE_LATEST(major, minor, false) - #endif /* disabled for RHEL */ - -+/* -+ * This variable is for changes to properties that are RHEL specific, -+ * different to the current upstream and to be applied to the latest -+ * machine type. They may be overriden by older machine compats. -+ * -+ * virtio-net-pci variant romfiles are not needed because edk2 does -+ * fully support the pxe boot. Besides virtio romfiles are not shipped -+ * on rhel/aarch64. -+ */ -+GlobalProperty arm_rhel_compat[] = { -+ {"virtio-net-pci", "romfile", "" }, -+ {"virtio-net-pci-transitional", "romfile", "" }, -+ {"virtio-net-pci-non-transitional", "romfile", "" }, -+}; -+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); -+ -+/* -+ * This cannot be called from the rhel_virt_class_init() because -+ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new() -+ * only is called on virt-rhelm.n.s non abstract class init. -+ */ -+static void arm_rhel_compat_set(MachineClass *mc) -+{ -+ compat_props_add(mc->compat_props, arm_rhel_compat, -+ arm_rhel_compat_len); -+} -+ - #define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ - static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ - void *data) \ - { \ - MachineClass *mc = MACHINE_CLASS(oc); \ -+ arm_rhel_compat_set(mc); \ - rhel##m##n##s##_virt_options(mc); \ - mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ - if (latest) { \ -@@ -139,19 +167,6 @@ - #define DEFINE_RHEL_MACHINE(major, minor, subminor) \ - DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) - --/* This variable is for changes to properties that are RHEL specific, -- * different to the current upstream and to be applied to the latest -- * machine type. -- */ --GlobalProperty arm_rhel_compat[] = { -- { -- .driver = "virtio-net-pci", -- .property = "romfile", -- .value = "", -- }, --}; --const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); -- - /* Number of external interrupt lines to configure the GIC with */ - #define NUM_IRQS 256 - -@@ -3639,7 +3654,6 @@ static void rhel920_virt_options(MachineClass *mc) - { - rhel940_virt_options(mc); - -- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); -@@ -3653,6 +3667,7 @@ static void rhel900_virt_options(MachineClass *mc) - rhel920_virt_options(mc); - - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ - vmc->no_tcg_lpa2 = true; --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch b/SOURCES/kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch new file mode 100644 index 0000000..8128a4e --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch @@ -0,0 +1,59 @@ +From e3360c415f7de923d27c3167260a93cb679afabe Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 6 May 2024 15:09:43 +0200 +Subject: [PATCH 1/2] hw/arm/virt: Fix spurious call to arm_virt_compat_set() + +RH-Author: Eric Auger +RH-MergeRequest: 238: hw/arm/virt: Fix spurious call to arm_virt_compat_set() +RH-Jira: RHEL-34945 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Gavin Shan +RH-Commit: [1/1] a858a3e1dff12b28e14f7e4bd2b896a9f06eacbb (eauger1/centos-qemu-kvm) + +JIRA: https://issues.redhat.com/browse/RHEL-34945 +Status: RHEL-only + +Downstream, we apply arm_rhel_compat in place of arm_virt_compat. +This is done though arm_rhel_compat_set() transparently called in +DEFINE_RHEL_MACHINE_LATEST(). So there is no need to call +arm_virt_compat_set() in rhel_machine_class_init(). Besides +this triggers a "GLib: g_ptr_array_add: assertion 'rarray' failed" +warning. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f1af9495c6..3f0496cdb9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -85,6 +85,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static GlobalProperty arm_virt_compat[] = { + { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "48" }, + }; +@@ -101,7 +102,6 @@ static void arm_virt_compat_set(MachineClass *mc) + arm_virt_compat_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -3536,7 +3536,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); +- arm_virt_compat_set(mc); + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch b/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch deleted file mode 100644 index 4770a58..0000000 --- a/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 4c1d07995a7afb6fae68a7e7a8b6b6c94fa0a7bb Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Mon, 12 Feb 2024 10:37:54 +0100 -Subject: [PATCH 5/6] hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types - -RH-Author: Cornelia Huck -RH-MergeRequest: 225: hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types -RH-Jira: RHEL-24988 -RH-Acked-by: Sebastian Ott -RH-Acked-by: Eric Auger -RH-Commit: [1/1] f15579db44808fa8a2d7bc01b3915aa59c064411 (cohuck/qemu-kvm-c9s) - -Jira: https://issues.redhat.com/browse/RHEL-24988 -Upstream: RHEL only - -We do not plan to support any machine types prior to 9.4.0; leave them -in, but mark as deprecated. - -Signed-off-by: Cornelia Huck ---- - hw/arm/virt.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 60f117f0d2..943c563391 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3679,6 +3679,10 @@ static void rhel920_virt_options(MachineClass *mc) - compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); -+ -+ /* RHEL 9.4 is the first supported release */ -+ mc->deprecation_reason = -+ "machine types for versions prior to 9.4 are deprecated"; - } - DEFINE_RHEL_MACHINE(9, 2, 0) - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch b/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch deleted file mode 100644 index 81c20e5..0000000 --- a/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 7a6be312c11911bdd2ce82566be22a3e014947c2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 21 Nov 2023 16:44:20 +0800 -Subject: [PATCH 041/101] hw/i386: Activate IOMMUFD for q35 machines -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [40/67] b15764ab24fd57389a8d219736613484acd7d29e (eauger1/centos-qemu-kvm) - -Signed-off-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 64ad06f6eba66c514477f490bcba409439a480d8) -Signed-off-by: Eric Auger ---- - hw/i386/Kconfig | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig -index 55850791df..a1846be6f7 100644 ---- a/hw/i386/Kconfig -+++ b/hw/i386/Kconfig -@@ -95,6 +95,7 @@ config Q35 - imply E1000E_PCI_EXPRESS - imply VMPORT - imply VMMOUSE -+ imply IOMMUFD - select PC_PCI - select PC_ACPI - select PCI_EXPRESS_Q35 --- -2.39.3 - diff --git a/SOURCES/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch b/SOURCES/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch new file mode 100644 index 0000000..ee2f88e --- /dev/null +++ b/SOURCES/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch @@ -0,0 +1,73 @@ +From e74980be81d641736ea9d44d0fe9af02af63a220 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:40 -0500 +Subject: [PATCH 083/100] hw/i386: Add support for loading BIOS using + guest_memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [83/91] 7b77d212ef7d83b66ad9d8348179ee84e64fb911 (bonzini/rhel-qemu-kvm) + +When guest_memfd is enabled, the BIOS is generally part of the initial +encrypted guest image and will be accessed as private guest memory. Add +the necessary changes to set up the associated RAM region with a +guest_memfd backend to allow for this. + +Current support centers around using -bios to load the BIOS data. +Support for loading the BIOS via pflash requires additional enablement +since those interfaces rely on the use of ROM memory regions which make +use of the KVM_MEM_READONLY memslot flag, which is not supported for +guest_memfd-backed memslots. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-29-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit fc7a69e177e4ba26d11fcf47b853f85115b35a11) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86-common.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +index 35fe6eabea..6cbb76c25c 100644 +--- a/hw/i386/x86-common.c ++++ b/hw/i386/x86-common.c +@@ -969,8 +969,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + (bios_size % 65536) != 0) { + goto bios_error; + } +- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, +- &error_fatal); ++ if (machine_require_guest_memfd(MACHINE(x86ms))) { ++ memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", ++ bios_size, &error_fatal); ++ } else { ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", ++ bios_size, &error_fatal); ++ } + if (sev_enabled()) { + /* + * The concept of a "reset" simply doesn't exist for +@@ -991,9 +996,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + } + g_free(filename); + +- /* map the last 128KB of the BIOS in ISA space */ +- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, +- !isapc_ram_fw); ++ if (!machine_require_guest_memfd(MACHINE(x86ms))) { ++ /* map the last 128KB of the BIOS in ISA space */ ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); ++ } + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch b/SOURCES/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch new file mode 100644 index 0000000..1fafe03 --- /dev/null +++ b/SOURCES/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch @@ -0,0 +1,106 @@ +From c1e615d6b8f609b72a94ffe6d31a9848a41744ef Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Tue, 30 Apr 2024 17:06:39 +0200 +Subject: [PATCH 038/100] hw/i386: Have x86_bios_rom_init() take + X86MachineState rather than MachineState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [38/91] 59f388b1dffc5d0aa2f0fff768194d755bc3efbb (bonzini/rhel-qemu-kvm) + +The function creates and leaks two MemoryRegion objects regarding the BIOS which +will be moved into X86MachineState in the next steps to avoid the leakage. + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240430150643.111976-3-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 848351840148f8c3b53ddf6210194506547d3ffd) +Signed-off-by: Paolo Bonzini +--- + hw/i386/microvm.c | 2 +- + hw/i386/pc_sysfw.c | 4 ++-- + hw/i386/x86.c | 4 ++-- + include/hw/i386/x86.h | 2 +- + 4 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c +index 61a772dfe6..fec63cacfa 100644 +--- a/hw/i386/microvm.c ++++ b/hw/i386/microvm.c +@@ -278,7 +278,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) + default_firmware = x86_machine_is_acpi_enabled(x86ms) + ? MICROVM_BIOS_FILENAME + : MICROVM_QBOOT_FILENAME; +- x86_bios_rom_init(MACHINE(mms), default_firmware, get_system_memory(), true); ++ x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true); + } + + static void microvm_memory_init(MicrovmMachineState *mms) +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 3efabbbab2..ef7dea9798 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -206,7 +206,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; + + if (!pcmc->pci_enabled) { +- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, true); ++ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); + return; + } + +@@ -227,7 +227,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + + if (!pflash_blk[0]) { + /* Machine property pflash0 not set, use ROM mode */ +- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, false); ++ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); + } else { + if (kvm_enabled() && !kvm_readonly_mem_enabled()) { + /* +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 2a4f3ee285..6d3c72f124 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1128,7 +1128,7 @@ void x86_load_linux(X86MachineState *x86ms, + nb_option_roms++; + } + +-void x86_bios_rom_init(MachineState *ms, const char *default_firmware, ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw) + { + const char *bios_name; +@@ -1138,7 +1138,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + ssize_t ret; + + /* BIOS load */ +- bios_name = ms->firmware ?: default_firmware; ++ bios_name = MACHINE(x86ms)->firmware ?: default_firmware; + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + bios_size = get_image_size(filename); +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index 4dc30dcb4d..cb07618d19 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -116,7 +116,7 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + +-void x86_bios_rom_init(MachineState *ms, const char *default_firmware, ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw); + + void x86_load_linux(X86MachineState *x86ms, +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch b/SOURCES/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch new file mode 100644 index 0000000..a789fb7 --- /dev/null +++ b/SOURCES/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch @@ -0,0 +1,51 @@ +From 7bb1f124413891bc5d2187f12cd19da6e794904b Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 3 Apr 2024 10:59:53 -0400 +Subject: [PATCH 010/100] hw/i386/acpi: Set PCAT_COMPAT bit only when pic is + not disabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [10/91] 62110e4bf52cb3e106c8d2a902bbd31548beba00 (bonzini/rhel-qemu-kvm) + +A value 1 of PCAT_COMPAT (bit 0) of MADT.Flags indicates that the system +also has a PC-AT-compatible dual-8259 setup, i.e., the PIC. When PIC +is not enabled (pic=off) for x86 machine, the PCAT_COMPAT bit needs to +be cleared. The PIC probe should then print: + + [ 0.155970] Using NULL legacy PIC + +However, no such log printed in guest kernel unless PCAT_COMPAT is +cleared. + +Signed-off-by: Xiaoyao Li +Message-ID: <20240403145953.3082491-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 292dd287e78e0cbafde9d1522c729349d132d844) +Signed-off-by: Paolo Bonzini +--- + hw/i386/acpi-common.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c +index 20f19269da..0cc2919bb8 100644 +--- a/hw/i386/acpi-common.c ++++ b/hw/i386/acpi-common.c +@@ -107,7 +107,9 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, + acpi_table_begin(&table, table_data); + /* Local APIC Address */ + build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4); +- build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ ++ /* Flags. bit 0: PCAT_COMPAT */ ++ build_append_int_noprefix(table_data, ++ x86ms->pic != ON_OFF_AUTO_OFF ? 1 : 0 , 4); + + for (i = 0; i < apic_ids->len; i++) { + pc_madt_cpu_entry(i, apic_ids, table_data, false); +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch b/SOURCES/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch new file mode 100644 index 0000000..021db3d --- /dev/null +++ b/SOURCES/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch @@ -0,0 +1,164 @@ +From fd6de3c5e97bdf13a39342fc71815a20c66867ae Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:07 +0200 +Subject: [PATCH 043/100] hw/i386/pc_sysfw: Alias rather than copy isa-bios + region + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [43/91] f64dab2a091838a10a9b94e3d09ea11432b0809f (bonzini/rhel-qemu-kvm) + +In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped +to the top of the 4G memory boundary. Do the same in the -pflash case, but only +for new machine versions for migration compatibility. This establishes common +behavior and makes pflash commands work in the "isa-bios" region which some +real-world legacy bioses rely on. + +Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash +case will now also point to encrypted memory, just like it already does in the +-bios case. + +When running `info mtree` before and after this commit with +`qemu-system-x86_64 -S -drive \ +if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running +`diff -u before.mtree after.mtree` results in the following changes in the +memory tree: + +| --- before.mtree +| +++ after.mtree +| @@ -71,7 +71,7 @@ +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff +| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff +| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff +| @@ -108,7 +108,7 @@ +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff +| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff +| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff +| @@ -131,11 +131,14 @@ +| memory-region: pc.ram +| 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram +| +| +memory-region: system.flash0 +| + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 +| + +| memory-region: pci +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| +| memory-region: smram +| 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff + +Note that in both cases the "system" memory region contains the entry + + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 + +but the "system.flash0" memory region only appears standalone when "isa-bios" is +an alias. + +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-7-shentey@gmail.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a44ea3fa7f2aa1d809fdca1b84a52695b53d8ad0) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 1 + + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + hw/i386/pc_sysfw.c | 8 +++++++- + include/hw/i386/pc.h | 1 + + 5 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 1a34bc4522..660a59c63b 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1967,6 +1967,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->has_reserved_memory = true; + pcmc->enforce_aligned_dimm = true; + pcmc->enforce_amd_1tb_hole = true; ++ pcmc->isa_bios_alias = true; + /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported + * to be used at the moment, 32K should be enough for a while. */ + pcmc->acpi_data_size = 0x20000 + 0x8000; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index bef3e8b73e..dbb7f2ed17 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -975,6 +975,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->alias = "pc"; + m->is_default = 1; + m->smp_props.prefer_sockets = true; ++ pcmc->isa_bios_alias = false; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index dedc86eec9..f9900ad798 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -735,6 +735,7 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; ++ pcmc->isa_bios_alias = false; + + compat_props_add(m->compat_props, pc_rhel_9_5_compat, + pc_rhel_9_5_compat_len); +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 82d37cb376..ac88ad4eb9 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -135,6 +135,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + MemoryRegion *rom_memory) + { + X86MachineState *x86ms = X86_MACHINE(pcms); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + hwaddr total_size = 0; + int i; + BlockBackend *blk; +@@ -184,7 +185,12 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ if (pcmc->isa_bios_alias) { ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, ++ true); ++ } else { ++ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ } + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 467e7fb52f..3f53ec73ac 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -122,6 +122,7 @@ struct PCMachineClass { + bool enforce_aligned_dimm; + bool broken_reserved_end; + bool enforce_amd_1tb_hole; ++ bool isa_bios_alias; + + /* generate legacy CPU hotplug AML */ + bool legacy_cpu_hotplug; +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch b/SOURCES/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch new file mode 100644 index 0000000..4188fd3 --- /dev/null +++ b/SOURCES/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch @@ -0,0 +1,53 @@ +From 9bf1d368c4b53139db39649833d475e097fc98d1 Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Mon, 22 Apr 2024 22:06:22 +0200 +Subject: [PATCH 039/100] hw/i386/pc_sysfw: Remove unused parameter from + pc_isa_bios_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [39/91] c0019dc2706a8e3f40486fd4a4c0dd1fbe23237b (bonzini/rhel-qemu-kvm) + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240422200625.2768-2-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit f4b63768b91811cdcf1fb7b270587123251dfea5) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index ef7dea9798..59c7a81692 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -41,8 +41,7 @@ + #define FLASH_SECTOR_SIZE 4096 + + static void pc_isa_bios_init(MemoryRegion *rom_memory, +- MemoryRegion *flash_mem, +- int ram_size) ++ MemoryRegion *flash_mem) + { + int isa_bios_size; + MemoryRegion *isa_bios; +@@ -186,7 +185,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(rom_memory, flash_mem, size); ++ pc_isa_bios_init(rom_memory, flash_mem); + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch b/SOURCES/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch new file mode 100644 index 0000000..a543c79 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch @@ -0,0 +1,158 @@ +From e6472ff46cbed97c2a238a8ef7d321351931333a Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:30 -0500 +Subject: [PATCH 070/100] hw/i386/sev: Add function to get SEV metadata from + OVMF header + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [70/91] ba818dade96119c8a51ca1fb222f4f69e2752396 (bonzini/rhel-qemu-kvm) + +A recent version of OVMF expanded the reset vector GUID list to add +SEV-specific metadata GUID. The SEV metadata describes the reserved +memory regions such as the secrets and CPUID page used during the SEV-SNP +guest launch. + +The pc_system_get_ovmf_sev_metadata_ptr() is used to retieve the SEV +metadata pointer from the OVMF GUID list. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-19-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit f3c30c575d34122573b7370a7da5ca3a27dde481) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 4 ++++ + include/hw/i386/pc.h | 26 ++++++++++++++++++++++++++ + target/i386/sev-sysemu-stub.c | 4 ++++ + target/i386/sev.c | 32 ++++++++++++++++++++++++++++++++ + target/i386/sev.h | 2 ++ + 5 files changed, 68 insertions(+) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index ac88ad4eb9..9b8671c441 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -260,6 +260,10 @@ void x86_firmware_configure(void *ptr, int size) + pc_system_parse_ovmf_flash(ptr, size); + + if (sev_enabled()) { ++ ++ /* Copy the SEV metadata table (if it exists) */ ++ pc_system_parse_sev_metadata(ptr, size); ++ + ret = sev_es_save_reset_vector(ptr, size); + if (ret) { + error_report("failed to locate and/or save reset vector"); +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 3f53ec73ac..94b49310f5 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -167,6 +167,32 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); + #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" + #define PCI_HOST_PROP_SMM_RANGES "smm-ranges" + ++typedef enum { ++ SEV_DESC_TYPE_UNDEF, ++ /* The section contains the region that must be validated by the VMM. */ ++ SEV_DESC_TYPE_SNP_SEC_MEM, ++ /* The section contains the SNP secrets page */ ++ SEV_DESC_TYPE_SNP_SECRETS, ++ /* The section contains address that can be used as a CPUID page */ ++ SEV_DESC_TYPE_CPUID, ++ ++} ovmf_sev_metadata_desc_type; ++ ++typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc { ++ uint32_t base; ++ uint32_t len; ++ ovmf_sev_metadata_desc_type type; ++} OvmfSevMetadataDesc; ++ ++typedef struct __attribute__((__packed__)) OvmfSevMetadata { ++ uint8_t signature[4]; ++ uint32_t len; ++ uint32_t version; ++ uint32_t num_desc; ++ OvmfSevMetadataDesc descs[]; ++} OvmfSevMetadata; ++ ++OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void); + + void pc_pci_as_mapping_init(MemoryRegion *system_memory, + MemoryRegion *pci_address_space); +diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c +index 96e1c15cc3..fc1c57c411 100644 +--- a/target/i386/sev-sysemu-stub.c ++++ b/target/i386/sev-sysemu-stub.c +@@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) + { + monitor_printf(mon, "SEV is not available in this QEMU\n"); + } ++ ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) ++{ ++} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index e84e4395a5..17281bb2c7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -597,6 +597,38 @@ SevCapability *qmp_query_sev_capabilities(Error **errp) + return sev_get_capabilities(errp); + } + ++static OvmfSevMetadata *ovmf_sev_metadata_table; ++ ++#define OVMF_SEV_META_DATA_GUID "dc886566-984a-4798-A75e-5585a7bf67cc" ++typedef struct __attribute__((__packed__)) OvmfSevMetadataOffset { ++ uint32_t offset; ++} OvmfSevMetadataOffset; ++ ++OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void) ++{ ++ return ovmf_sev_metadata_table; ++} ++ ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) ++{ ++ OvmfSevMetadata *metadata; ++ OvmfSevMetadataOffset *data; ++ ++ if (!pc_system_ovmf_table_find(OVMF_SEV_META_DATA_GUID, (uint8_t **)&data, ++ NULL)) { ++ return; ++ } ++ ++ metadata = (OvmfSevMetadata *)(flash_ptr + flash_size - data->offset); ++ if (memcmp(metadata->signature, "ASEV", 4) != 0 || ++ metadata->len < sizeof(OvmfSevMetadata) || ++ metadata->len > flash_size - data->offset) { ++ return; ++ } ++ ++ ovmf_sev_metadata_table = g_memdup2(metadata, metadata->len); ++} ++ + static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + Error **errp) + { +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 5dc4767b1e..cc12824dd6 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -66,4 +66,6 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); + void sev_es_set_reset_vector(CPUState *cpu); + ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size); ++ + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch b/SOURCES/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch new file mode 100644 index 0000000..c5a7a28 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch @@ -0,0 +1,165 @@ +From 226cf6c3d3e2fd1a35422043dbe0b73d1216df83 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:36 -0500 +Subject: [PATCH 073/100] hw/i386/sev: Add support to encrypt BIOS when SEV-SNP + is enabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [73/91] 844afd322c12c3e8992cf6ec692c94e70747bd0c (bonzini/rhel-qemu-kvm) + +As with SEV, an SNP guest requires that the BIOS be part of the initial +encrypted/measured guest payload. Extend sev_encrypt_flash() to handle +the SNP case and plumb through the GPA of the BIOS location since this +is needed for SNP. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-25-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 77d1abd91e5352ad30ae2f83790f95fa6a3c0b6b) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 12 +++++++----- + hw/i386/x86-common.c | 2 +- + include/hw/i386/x86.h | 2 +- + target/i386/sev-sysemu-stub.c | 2 +- + target/i386/sev.c | 5 +++-- + target/i386/sev.h | 2 +- + 6 files changed, 14 insertions(+), 11 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 9b8671c441..7cdbafc8d2 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -148,6 +148,8 @@ static void pc_system_flash_map(PCMachineState *pcms, + assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled); + + for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) { ++ hwaddr gpa; ++ + system_flash = pcms->flash[i]; + blk = pflash_cfi01_get_blk(system_flash); + if (!blk) { +@@ -177,11 +179,11 @@ static void pc_system_flash_map(PCMachineState *pcms, + } + + total_size += size; ++ gpa = 0x100000000ULL - total_size; /* where the flash is mapped */ + qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks", + size / FLASH_SECTOR_SIZE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal); +- sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, +- 0x100000000ULL - total_size); ++ sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa); + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +@@ -196,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + if (sev_enabled()) { + flash_ptr = memory_region_get_ram_ptr(flash_mem); + flash_size = memory_region_size(flash_mem); +- x86_firmware_configure(flash_ptr, flash_size); ++ x86_firmware_configure(gpa, flash_ptr, flash_size); + } + } + } +@@ -249,7 +251,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + pc_system_flash_cleanup_unused(pcms); + } + +-void x86_firmware_configure(void *ptr, int size) ++void x86_firmware_configure(hwaddr gpa, void *ptr, int size) + { + int ret; + +@@ -270,6 +272,6 @@ void x86_firmware_configure(void *ptr, int size) + exit(1); + } + +- sev_encrypt_flash(ptr, size, &error_fatal); ++ sev_encrypt_flash(gpa, ptr, size, &error_fatal); + } + } +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +index 67b03c913a..35fe6eabea 100644 +--- a/hw/i386/x86-common.c ++++ b/hw/i386/x86-common.c +@@ -981,7 +981,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + */ + void *ptr = memory_region_get_ram_ptr(&x86ms->bios); + load_image_size(filename, ptr, bios_size); +- x86_firmware_configure(ptr, bios_size); ++ x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size); + } else { + memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index b006f16b8d..d43cb3908e 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -154,6 +154,6 @@ void ioapic_init_gsi(GSIState *gsi_state, Object *parent); + DeviceState *ioapic_init_secondary(GSIState *gsi_state); + + /* pc_sysfw.c */ +-void x86_firmware_configure(void *ptr, int size); ++void x86_firmware_configure(hwaddr gpa, void *ptr, int size); + + #endif +diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c +index fc1c57c411..d5bf886e79 100644 +--- a/target/i386/sev-sysemu-stub.c ++++ b/target/i386/sev-sysemu-stub.c +@@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, + error_setg(errp, "SEV is not available in this QEMU"); + } + +-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) ++int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + g_assert_not_reached(); + } +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 06401f0526..7b5c4b4874 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1484,7 +1484,7 @@ static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + int +-sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) ++sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + +@@ -1841,7 +1841,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + +- if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { + ret = false; + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index cc12824dd6..858005a119 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -59,7 +59,7 @@ uint32_t sev_get_cbit_position(void); + uint32_t sev_get_reduced_phys_bits(void); + bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); + +-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); ++int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); + int sev_inject_launch_secret(const char *hdr, const char *secret, + uint64_t gpa, Error **errp); + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch b/SOURCES/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch new file mode 100644 index 0000000..050a522 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch @@ -0,0 +1,123 @@ +From a20b2e3e52b9589ac1abc8b9b818d526c86368cf Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:39 -0500 +Subject: [PATCH 082/100] hw/i386/sev: Use guest_memfd for legacy ROMs + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [82/91] a591e85e00c353009803b143c80852b8c9b1f15e (bonzini/rhel-qemu-kvm) + +Current SNP guest kernels will attempt to access these regions with +with C-bit set, so guest_memfd is needed to handle that. Otherwise, +kvm_convert_memory() will fail when the guest kernel tries to access it +and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges +to private. + +Whether guests should actually try to access ROM regions in this way (or +need to deal with legacy ROM regions at all), is a separate issue to be +addressed on kernel side, but current SNP guest kernels will exhibit +this behavior and so this handling is needed to allow QEMU to continue +running existing SNP guest kernels. + +Signed-off-by: Michael Roth +[pankaj: Added sev_snp_enabled() check] +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-28-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 413a67450750e0459efeffc3db3ba9759c3e381c) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 14 ++++++++++---- + hw/i386/pc_sysfw.c | 19 +++++++++++++------ + 2 files changed, 23 insertions(+), 10 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 0aca0cc79e..b25d075b59 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -62,6 +62,7 @@ + #include "hw/mem/memory-device.h" + #include "e820_memory_layout.h" + #include "trace.h" ++#include "sev.h" + #include CONFIG_DEVICES + + #ifdef CONFIG_XEN_EMU +@@ -1173,10 +1174,15 @@ void pc_memory_init(PCMachineState *pcms, + pc_system_firmware_init(pcms, rom_memory); + + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); +- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, +- &error_fatal); +- if (pcmc->pci_enabled) { +- memory_region_set_readonly(option_rom_mr, true); ++ if (machine_require_guest_memfd(machine)) { ++ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", ++ PC_ROM_SIZE, &error_fatal); ++ } else { ++ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, ++ &error_fatal); ++ if (pcmc->pci_enabled) { ++ memory_region_set_readonly(option_rom_mr, true); ++ } + } + memory_region_add_subregion_overlap(rom_memory, + PC_ROM_MIN_VGA, +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 7cdbafc8d2..ef80281d28 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -40,8 +40,8 @@ + + #define FLASH_SECTOR_SIZE 4096 + +-static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, +- MemoryRegion *flash_mem) ++static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios, ++ MemoryRegion *rom_memory, MemoryRegion *flash_mem) + { + int isa_bios_size; + uint64_t flash_size; +@@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(flash_size, 128 * KiB); +- memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, +- &error_fatal); ++ if (machine_require_guest_memfd(MACHINE(pcms))) { ++ memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios", ++ isa_bios_size, &error_fatal); ++ } else { ++ memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, ++ &error_fatal); ++ } + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, + isa_bios, +@@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size), + isa_bios_size); + +- memory_region_set_readonly(isa_bios, true); ++ if (!machine_require_guest_memfd(current_machine)) { ++ memory_region_set_readonly(isa_bios, true); ++ } + } + + static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms, +@@ -191,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, + true); + } else { +- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem); + } + + /* Encrypt the pflash boot ROM */ +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch b/SOURCES/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch new file mode 100644 index 0000000..7b03cb4 --- /dev/null +++ b/SOURCES/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch @@ -0,0 +1,58 @@ +From 4331180aa09e44550ff8de781c618bae5e99bb70 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Apr 2024 18:07:43 -0500 +Subject: [PATCH 025/100] hw/i386/sev: Use legacy SEV VM types for older + machine types + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [25/91] 8c73cd312736ccb0818b4d3216fd13712f21f3c9 (bonzini/rhel-qemu-kvm) + +Newer 9.1 machine types will default to using the KVM_SEV_INIT2 API for +creating SEV/SEV-ES going forward. However, this API results in guest +measurement changes which are generally not expected for users of these +older guest types and can cause disruption if they switch to a newer +QEMU/kernel version. Avoid this by continuing to use the older +KVM_SEV_INIT/KVM_SEV_ES_INIT APIs for older machine types. + +Signed-off-by: Michael Roth +Message-ID: <20240409230743.962513-4-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit ea7fbd37537b3a598335c21ccb2ea674630fc810) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 1 + + target/i386/sev.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b9fde3cec1..1a34bc4522 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -351,6 +351,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + GlobalProperty pc_rhel_9_5_compat[] = { + /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ + { TYPE_X86_CPU, "guest-phys-bits", "0" }, ++ { "sev-guest", "legacy-vm-type", "true" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index f4ee317cb0..d30b68c11e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1417,6 +1417,7 @@ sev_guest_instance_init(Object *obj) + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); ++ object_apply_compat_props(obj); + } + + /* sev guest info */ +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-split-x86.c-in-multiple-parts.patch b/SOURCES/kvm-hw-i386-split-x86.c-in-multiple-parts.patch new file mode 100644 index 0000000..40ca52b --- /dev/null +++ b/SOURCES/kvm-hw-i386-split-x86.c-in-multiple-parts.patch @@ -0,0 +1,2301 @@ +From bf2206fae2e640da9de7fc0648b4b90ad3ddfbe3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:41 +0200 +Subject: [PATCH 046/100] hw/i386: split x86.c in multiple parts + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [46/91] 3d6e8364aa9b691c25bdcf54a30b116da5d33874 (bonzini/rhel-qemu-kvm) + +Keep the basic X86MachineState definition in x86.c. Move out functions that +are only needed by other files: x86-common.c for the pc and microvm machines, +x86-cpu.c for those used by accelerator code. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-11-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b061f0598b9231f7992aff4fcdf3f336f9747d11) +Signed-off-by: Paolo Bonzini +--- + hw/i386/meson.build | 4 +- + hw/i386/x86-common.c | 1007 +++++++++++++++++++++++++++++++++++++++ + hw/i386/x86-cpu.c | 97 ++++ + hw/i386/x86.c | 1052 +---------------------------------------- + include/hw/i386/x86.h | 6 +- + 5 files changed, 1113 insertions(+), 1053 deletions(-) + create mode 100644 hw/i386/x86-common.c + create mode 100644 hw/i386/x86-cpu.c + +diff --git a/hw/i386/meson.build b/hw/i386/meson.build +index d9da676038..3437da0aad 100644 +--- a/hw/i386/meson.build ++++ b/hw/i386/meson.build +@@ -4,6 +4,7 @@ i386_ss.add(files( + 'e820_memory_layout.c', + 'multiboot.c', + 'x86.c', ++ 'x86-cpu.c', + )) + + i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c')) +@@ -12,7 +13,7 @@ i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), + i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), + if_false: files('amd_iommu-stub.c')) + i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c')) +-i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) ++i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) + i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c')) + i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c')) + i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c')) +@@ -22,6 +23,7 @@ i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'), + + i386_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-common.c')) + i386_ss.add(when: 'CONFIG_PC', if_true: files( ++ 'x86-common.c', + 'pc.c', + 'pc_sysfw.c', + 'acpi-build.c', +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +new file mode 100644 +index 0000000000..67b03c913a +--- /dev/null ++++ b/hw/i386/x86-common.c +@@ -0,0 +1,1007 @@ ++/* ++ * Copyright (c) 2003-2004 Fabrice Bellard ++ * Copyright (c) 2019, 2024 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "qemu/osdep.h" ++#include "qemu/error-report.h" ++#include "qemu/cutils.h" ++#include "qemu/units.h" ++#include "qemu/datadir.h" ++#include "qapi/error.h" ++#include "sysemu/numa.h" ++#include "sysemu/sysemu.h" ++#include "sysemu/xen.h" ++#include "trace.h" ++ ++#include "hw/i386/x86.h" ++#include "target/i386/cpu.h" ++#include "hw/rtc/mc146818rtc.h" ++#include "target/i386/sev.h" ++ ++#include "hw/acpi/cpu_hotplug.h" ++#include "hw/irq.h" ++#include "hw/loader.h" ++#include "multiboot.h" ++#include "elf.h" ++#include "standard-headers/asm-x86/bootparam.h" ++#include CONFIG_DEVICES ++#include "kvm/kvm_i386.h" ++ ++#ifdef CONFIG_XEN_EMU ++#include "hw/xen/xen.h" ++#include "hw/i386/kvm/xen_evtchn.h" ++#endif ++ ++/* Physical Address of PVH entry point read from kernel ELF NOTE */ ++static size_t pvh_start_addr; ++ ++static void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) ++{ ++ Object *cpu = object_new(MACHINE(x86ms)->cpu_type); ++ ++ if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { ++ goto out; ++ } ++ qdev_realize(DEVICE(cpu), NULL, errp); ++ ++out: ++ object_unref(cpu); ++} ++ ++void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) ++{ ++ int i; ++ const CPUArchIdList *possible_cpus; ++ MachineState *ms = MACHINE(x86ms); ++ MachineClass *mc = MACHINE_GET_CLASS(x86ms); ++ ++ x86_cpu_set_default_version(default_cpu_version); ++ ++ /* ++ * Calculates the limit to CPU APIC ID values ++ * ++ * Limit for the APIC ID value, so that all ++ * CPU APIC IDs are < x86ms->apic_id_limit. ++ * ++ * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). ++ */ ++ x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, ++ ms->smp.max_cpus - 1) + 1; ++ ++ /* ++ * Can we support APIC ID 255 or higher? With KVM, that requires ++ * both in-kernel lapic and X2APIC userspace API. ++ * ++ * kvm_enabled() must go first to ensure that kvm_* references are ++ * not emitted for the linker to consume (kvm_enabled() is ++ * a literal `0` in configurations where kvm_* aren't defined) ++ */ ++ if (kvm_enabled() && x86ms->apic_id_limit > 255 && ++ kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { ++ error_report("current -smp configuration requires kernel " ++ "irqchip and X2APIC API support."); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (kvm_enabled()) { ++ kvm_set_max_apic_id(x86ms->apic_id_limit); ++ } ++ ++ if (!kvm_irqchip_in_kernel()) { ++ apic_set_max_apic_id(x86ms->apic_id_limit); ++ } ++ ++ possible_cpus = mc->possible_cpu_arch_ids(ms); ++ for (i = 0; i < ms->smp.cpus; i++) { ++ x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); ++ } ++} ++ ++void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) ++{ ++ MC146818RtcState *rtc = MC146818_RTC(s); ++ ++ if (cpus_count > 0xff) { ++ /* ++ * If the number of CPUs can't be represented in 8 bits, the ++ * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just ++ * to make old BIOSes fail more predictably. ++ */ ++ mc146818rtc_set_cmos_data(rtc, 0x5f, 0); ++ } else { ++ mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); ++ } ++} ++ ++static int x86_apic_cmp(const void *a, const void *b) ++{ ++ CPUArchId *apic_a = (CPUArchId *)a; ++ CPUArchId *apic_b = (CPUArchId *)b; ++ ++ return apic_a->arch_id - apic_b->arch_id; ++} ++ ++/* ++ * returns pointer to CPUArchId descriptor that matches CPU's apic_id ++ * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no ++ * entry corresponding to CPU's apic_id returns NULL. ++ */ ++static CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) ++{ ++ CPUArchId apic_id, *found_cpu; ++ ++ apic_id.arch_id = id; ++ found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, ++ ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), ++ x86_apic_cmp); ++ if (found_cpu && idx) { ++ *idx = found_cpu - ms->possible_cpus->cpus; ++ } ++ return found_cpu; ++} ++ ++void x86_cpu_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ CPUArchId *found_cpu; ++ Error *local_err = NULL; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ if (x86ms->acpi_dev) { ++ hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ /* increment the number of CPUs */ ++ x86ms->boot_cpus++; ++ if (x86ms->rtc) { ++ x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); ++ } ++ if (x86ms->fw_cfg) { ++ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); ++ } ++ ++ found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); ++ found_cpu->cpu = CPU(dev); ++out: ++ error_propagate(errp, local_err); ++} ++ ++void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ int idx = -1; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ if (!x86ms->acpi_dev) { ++ error_setg(errp, "CPU hot unplug not supported without ACPI"); ++ return; ++ } ++ ++ x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); ++ assert(idx != -1); ++ if (idx == 0) { ++ error_setg(errp, "Boot CPU is unpluggable"); ++ return; ++ } ++ ++ hotplug_handler_unplug_request(x86ms->acpi_dev, dev, ++ errp); ++} ++ ++void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ CPUArchId *found_cpu; ++ Error *local_err = NULL; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); ++ found_cpu->cpu = NULL; ++ qdev_unrealize(dev); ++ ++ /* decrement the number of CPUs */ ++ x86ms->boot_cpus--; ++ /* Update the number of CPUs in CMOS */ ++ x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); ++ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); ++ out: ++ error_propagate(errp, local_err); ++} ++ ++void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ int idx; ++ CPUState *cs; ++ CPUArchId *cpu_slot; ++ X86CPUTopoIDs topo_ids; ++ X86CPU *cpu = X86_CPU(dev); ++ CPUX86State *env = &cpu->env; ++ MachineState *ms = MACHINE(hotplug_dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ unsigned int smp_cores = ms->smp.cores; ++ unsigned int smp_threads = ms->smp.threads; ++ X86CPUTopoInfo topo_info; ++ ++ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { ++ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", ++ ms->cpu_type); ++ return; ++ } ++ ++ if (x86ms->acpi_dev) { ++ Error *local_err = NULL; ++ ++ hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, ++ &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ } ++ ++ init_topo_info(&topo_info, x86ms); ++ ++ env->nr_dies = ms->smp.dies; ++ ++ /* ++ * If APIC ID is not set, ++ * set it based on socket/die/core/thread properties. ++ */ ++ if (cpu->apic_id == UNASSIGNED_APIC_ID) { ++ int max_socket = (ms->smp.max_cpus - 1) / ++ smp_threads / smp_cores / ms->smp.dies; ++ ++ /* ++ * die-id was optional in QEMU 4.0 and older, so keep it optional ++ * if there's only one die per socket. ++ */ ++ if (cpu->die_id < 0 && ms->smp.dies == 1) { ++ cpu->die_id = 0; ++ } ++ ++ if (cpu->socket_id < 0) { ++ error_setg(errp, "CPU socket-id is not set"); ++ return; ++ } else if (cpu->socket_id > max_socket) { ++ error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", ++ cpu->socket_id, max_socket); ++ return; ++ } ++ if (cpu->die_id < 0) { ++ error_setg(errp, "CPU die-id is not set"); ++ return; ++ } else if (cpu->die_id > ms->smp.dies - 1) { ++ error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", ++ cpu->die_id, ms->smp.dies - 1); ++ return; ++ } ++ if (cpu->core_id < 0) { ++ error_setg(errp, "CPU core-id is not set"); ++ return; ++ } else if (cpu->core_id > (smp_cores - 1)) { ++ error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", ++ cpu->core_id, smp_cores - 1); ++ return; ++ } ++ if (cpu->thread_id < 0) { ++ error_setg(errp, "CPU thread-id is not set"); ++ return; ++ } else if (cpu->thread_id > (smp_threads - 1)) { ++ error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", ++ cpu->thread_id, smp_threads - 1); ++ return; ++ } ++ ++ topo_ids.pkg_id = cpu->socket_id; ++ topo_ids.die_id = cpu->die_id; ++ topo_ids.core_id = cpu->core_id; ++ topo_ids.smt_id = cpu->thread_id; ++ cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); ++ } ++ ++ cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); ++ if (!cpu_slot) { ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ error_setg(errp, ++ "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" ++ " APIC ID %" PRIu32 ", valid index range 0:%d", ++ topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, ++ cpu->apic_id, ms->possible_cpus->len - 1); ++ return; ++ } ++ ++ if (cpu_slot->cpu) { ++ error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", ++ idx, cpu->apic_id); ++ return; ++ } ++ ++ /* if 'address' properties socket-id/core-id/thread-id are not set, set them ++ * so that machine_query_hotpluggable_cpus would show correct values ++ */ ++ /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() ++ * once -smp refactoring is complete and there will be CPU private ++ * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { ++ error_setg(errp, "property socket-id: %u doesn't match set apic-id:" ++ " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, ++ topo_ids.pkg_id); ++ return; ++ } ++ cpu->socket_id = topo_ids.pkg_id; ++ ++ if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { ++ error_setg(errp, "property die-id: %u doesn't match set apic-id:" ++ " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); ++ return; ++ } ++ cpu->die_id = topo_ids.die_id; ++ ++ if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { ++ error_setg(errp, "property core-id: %u doesn't match set apic-id:" ++ " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, ++ topo_ids.core_id); ++ return; ++ } ++ cpu->core_id = topo_ids.core_id; ++ ++ if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { ++ error_setg(errp, "property thread-id: %u doesn't match set apic-id:" ++ " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, ++ topo_ids.smt_id); ++ return; ++ } ++ cpu->thread_id = topo_ids.smt_id; ++ ++ /* ++ * kvm_enabled() must go first to ensure that kvm_* references are ++ * not emitted for the linker to consume (kvm_enabled() is ++ * a literal `0` in configurations where kvm_* aren't defined) ++ */ ++ if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && ++ !kvm_hv_vpindex_settable()) { ++ error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); ++ return; ++ } ++ ++ cs = CPU(cpu); ++ cs->cpu_index = idx; ++ ++ numa_cpu_pre_plug(cpu_slot, dev, errp); ++} ++ ++static long get_file_size(FILE *f) ++{ ++ long where, size; ++ ++ /* XXX: on Unix systems, using fstat() probably makes more sense */ ++ ++ where = ftell(f); ++ fseek(f, 0, SEEK_END); ++ size = ftell(f); ++ fseek(f, where, SEEK_SET); ++ ++ return size; ++} ++ ++void gsi_handler(void *opaque, int n, int level) ++{ ++ GSIState *s = opaque; ++ ++ trace_x86_gsi_interrupt(n, level); ++ switch (n) { ++ case 0 ... ISA_NUM_IRQS - 1: ++ if (s->i8259_irq[n]) { ++ /* Under KVM, Kernel will forward to both PIC and IOAPIC */ ++ qemu_set_irq(s->i8259_irq[n], level); ++ } ++ /* fall through */ ++ case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: ++#ifdef CONFIG_XEN_EMU ++ /* ++ * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC ++ * routing actually works properly under Xen). And then to ++ * *either* the PIRQ handling or the I/OAPIC depending on ++ * whether the former wants it. ++ */ ++ if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { ++ break; ++ } ++#endif ++ qemu_set_irq(s->ioapic_irq[n], level); ++ break; ++ case IO_APIC_SECONDARY_IRQBASE ++ ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: ++ qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); ++ break; ++ } ++} ++ ++void ioapic_init_gsi(GSIState *gsi_state, Object *parent) ++{ ++ DeviceState *dev; ++ SysBusDevice *d; ++ unsigned int i; ++ ++ assert(parent); ++ if (kvm_ioapic_in_kernel()) { ++ dev = qdev_new(TYPE_KVM_IOAPIC); ++ } else { ++ dev = qdev_new(TYPE_IOAPIC); ++ } ++ object_property_add_child(parent, "ioapic", OBJECT(dev)); ++ d = SYS_BUS_DEVICE(dev); ++ sysbus_realize_and_unref(d, &error_fatal); ++ sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); ++ ++ for (i = 0; i < IOAPIC_NUM_PINS; i++) { ++ gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); ++ } ++} ++ ++DeviceState *ioapic_init_secondary(GSIState *gsi_state) ++{ ++ DeviceState *dev; ++ SysBusDevice *d; ++ unsigned int i; ++ ++ dev = qdev_new(TYPE_IOAPIC); ++ d = SYS_BUS_DEVICE(dev); ++ sysbus_realize_and_unref(d, &error_fatal); ++ sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); ++ ++ for (i = 0; i < IOAPIC_NUM_PINS; i++) { ++ gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); ++ } ++ return dev; ++} ++ ++/* ++ * The entry point into the kernel for PVH boot is different from ++ * the native entry point. The PVH entry is defined by the x86/HVM ++ * direct boot ABI and is available in an ELFNOTE in the kernel binary. ++ * ++ * This function is passed to load_elf() when it is called from ++ * load_elfboot() which then additionally checks for an ELF Note of ++ * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to ++ * parse the PVH entry address from the ELF Note. ++ * ++ * Due to trickery in elf_opts.h, load_elf() is actually available as ++ * load_elf32() or load_elf64() and this routine needs to be able ++ * to deal with being called as 32 or 64 bit. ++ * ++ * The address of the PVH entry point is saved to the 'pvh_start_addr' ++ * global variable. (although the entry point is 32-bit, the kernel ++ * binary can be either 32-bit or 64-bit). ++ */ ++static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) ++{ ++ size_t *elf_note_data_addr; ++ ++ /* Check if ELF Note header passed in is valid */ ++ if (arg1 == NULL) { ++ return 0; ++ } ++ ++ if (is64) { ++ struct elf64_note *nhdr64 = (struct elf64_note *)arg1; ++ uint64_t nhdr_size64 = sizeof(struct elf64_note); ++ uint64_t phdr_align = *(uint64_t *)arg2; ++ uint64_t nhdr_namesz = nhdr64->n_namesz; ++ ++ elf_note_data_addr = ++ ((void *)nhdr64) + nhdr_size64 + ++ QEMU_ALIGN_UP(nhdr_namesz, phdr_align); ++ ++ pvh_start_addr = *elf_note_data_addr; ++ } else { ++ struct elf32_note *nhdr32 = (struct elf32_note *)arg1; ++ uint32_t nhdr_size32 = sizeof(struct elf32_note); ++ uint32_t phdr_align = *(uint32_t *)arg2; ++ uint32_t nhdr_namesz = nhdr32->n_namesz; ++ ++ elf_note_data_addr = ++ ((void *)nhdr32) + nhdr_size32 + ++ QEMU_ALIGN_UP(nhdr_namesz, phdr_align); ++ ++ pvh_start_addr = *(uint32_t *)elf_note_data_addr; ++ } ++ ++ return pvh_start_addr; ++} ++ ++static bool load_elfboot(const char *kernel_filename, ++ int kernel_file_size, ++ uint8_t *header, ++ size_t pvh_xen_start_addr, ++ FWCfgState *fw_cfg) ++{ ++ uint32_t flags = 0; ++ uint32_t mh_load_addr = 0; ++ uint32_t elf_kernel_size = 0; ++ uint64_t elf_entry; ++ uint64_t elf_low, elf_high; ++ int kernel_size; ++ ++ if (ldl_p(header) != 0x464c457f) { ++ return false; /* no elfboot */ ++ } ++ ++ bool elf_is64 = header[EI_CLASS] == ELFCLASS64; ++ flags = elf_is64 ? ++ ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; ++ ++ if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ ++ error_report("elfboot unsupported flags = %x", flags); ++ exit(1); ++ } ++ ++ uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; ++ kernel_size = load_elf(kernel_filename, read_pvh_start_addr, ++ NULL, &elf_note_type, &elf_entry, ++ &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, ++ 0, 0); ++ ++ if (kernel_size < 0) { ++ error_report("Error while loading elf kernel"); ++ exit(1); ++ } ++ mh_load_addr = elf_low; ++ elf_kernel_size = elf_high - elf_low; ++ ++ if (pvh_start_addr == 0) { ++ error_report("Error loading uncompressed kernel without PVH ELF Note"); ++ exit(1); ++ } ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); ++ ++ return true; ++} ++ ++void x86_load_linux(X86MachineState *x86ms, ++ FWCfgState *fw_cfg, ++ int acpi_data_size, ++ bool pvh_enabled) ++{ ++ bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; ++ uint16_t protocol; ++ int setup_size, kernel_size, cmdline_size; ++ int dtb_size, setup_data_offset; ++ uint32_t initrd_max; ++ uint8_t header[8192], *setup, *kernel; ++ hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; ++ FILE *f; ++ char *vmode; ++ MachineState *machine = MACHINE(x86ms); ++ struct setup_data *setup_data; ++ const char *kernel_filename = machine->kernel_filename; ++ const char *initrd_filename = machine->initrd_filename; ++ const char *dtb_filename = machine->dtb; ++ const char *kernel_cmdline = machine->kernel_cmdline; ++ SevKernelLoaderContext sev_load_ctx = {}; ++ ++ /* Align to 16 bytes as a paranoia measure */ ++ cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; ++ ++ /* load the kernel header */ ++ f = fopen(kernel_filename, "rb"); ++ if (!f) { ++ fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", ++ kernel_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ kernel_size = get_file_size(f); ++ if (!kernel_size || ++ fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != ++ MIN(ARRAY_SIZE(header), kernel_size)) { ++ fprintf(stderr, "qemu: could not load kernel '%s': %s\n", ++ kernel_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ /* kernel protocol version */ ++ if (ldl_p(header + 0x202) == 0x53726448) { ++ protocol = lduw_p(header + 0x206); ++ } else { ++ /* ++ * This could be a multiboot kernel. If it is, let's stop treating it ++ * like a Linux kernel. ++ * Note: some multiboot images could be in the ELF format (the same of ++ * PVH), so we try multiboot first since we check the multiboot magic ++ * header before to load it. ++ */ ++ if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, ++ kernel_cmdline, kernel_size, header)) { ++ return; ++ } ++ /* ++ * Check if the file is an uncompressed kernel file (ELF) and load it, ++ * saving the PVH entry point used by the x86/HVM direct boot ABI. ++ * If load_elfboot() is successful, populate the fw_cfg info. ++ */ ++ if (pvh_enabled && ++ load_elfboot(kernel_filename, kernel_size, ++ header, pvh_start_addr, fw_cfg)) { ++ fclose(f); ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, ++ strlen(kernel_cmdline) + 1); ++ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, ++ header, sizeof(header)); ++ ++ /* load initrd */ ++ if (initrd_filename) { ++ GMappedFile *mapped_file; ++ gsize initrd_size; ++ gchar *initrd_data; ++ GError *gerr = NULL; ++ ++ mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); ++ if (!mapped_file) { ++ fprintf(stderr, "qemu: error reading initrd %s: %s\n", ++ initrd_filename, gerr->message); ++ exit(1); ++ } ++ x86ms->initrd_mapped_file = mapped_file; ++ ++ initrd_data = g_mapped_file_get_contents(mapped_file); ++ initrd_size = g_mapped_file_get_length(mapped_file); ++ initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; ++ if (initrd_size >= initrd_max) { ++ fprintf(stderr, "qemu: initrd is too large, cannot support." ++ "(max: %"PRIu32", need %"PRId64")\n", ++ initrd_max, (uint64_t)initrd_size); ++ exit(1); ++ } ++ ++ initrd_addr = (initrd_max - initrd_size) & ~4095; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, ++ initrd_size); ++ } ++ ++ option_rom[nb_option_roms].bootindex = 0; ++ option_rom[nb_option_roms].name = "pvh.bin"; ++ nb_option_roms++; ++ ++ return; ++ } ++ protocol = 0; ++ } ++ ++ if (protocol < 0x200 || !(header[0x211] & 0x01)) { ++ /* Low kernel */ ++ real_addr = 0x90000; ++ cmdline_addr = 0x9a000 - cmdline_size; ++ prot_addr = 0x10000; ++ } else if (protocol < 0x202) { ++ /* High but ancient kernel */ ++ real_addr = 0x90000; ++ cmdline_addr = 0x9a000 - cmdline_size; ++ prot_addr = 0x100000; ++ } else { ++ /* High and recent kernel */ ++ real_addr = 0x10000; ++ cmdline_addr = 0x20000; ++ prot_addr = 0x100000; ++ } ++ ++ /* highest address for loading the initrd */ ++ if (protocol >= 0x20c && ++ lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { ++ /* ++ * Linux has supported initrd up to 4 GB for a very long time (2007, ++ * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), ++ * though it only sets initrd_max to 2 GB to "work around bootloader ++ * bugs". Luckily, QEMU firmware(which does something like bootloader) ++ * has supported this. ++ * ++ * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can ++ * be loaded into any address. ++ * ++ * In addition, initrd_max is uint32_t simply because QEMU doesn't ++ * support the 64-bit boot protocol (specifically the ext_ramdisk_image ++ * field). ++ * ++ * Therefore here just limit initrd_max to UINT32_MAX simply as well. ++ */ ++ initrd_max = UINT32_MAX; ++ } else if (protocol >= 0x203) { ++ initrd_max = ldl_p(header + 0x22c); ++ } else { ++ initrd_max = 0x37ffffff; ++ } ++ ++ if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { ++ initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; ++ } ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); ++ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); ++ sev_load_ctx.cmdline_data = (char *)kernel_cmdline; ++ sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; ++ ++ if (protocol >= 0x202) { ++ stl_p(header + 0x228, cmdline_addr); ++ } else { ++ stw_p(header + 0x20, 0xA33F); ++ stw_p(header + 0x22, cmdline_addr - real_addr); ++ } ++ ++ /* handle vga= parameter */ ++ vmode = strstr(kernel_cmdline, "vga="); ++ if (vmode) { ++ unsigned int video_mode; ++ const char *end; ++ int ret; ++ /* skip "vga=" */ ++ vmode += 4; ++ if (!strncmp(vmode, "normal", 6)) { ++ video_mode = 0xffff; ++ } else if (!strncmp(vmode, "ext", 3)) { ++ video_mode = 0xfffe; ++ } else if (!strncmp(vmode, "ask", 3)) { ++ video_mode = 0xfffd; ++ } else { ++ ret = qemu_strtoui(vmode, &end, 0, &video_mode); ++ if (ret != 0 || (*end && *end != ' ')) { ++ fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); ++ exit(1); ++ } ++ } ++ stw_p(header + 0x1fa, video_mode); ++ } ++ ++ /* loader type */ ++ /* ++ * High nybble = B reserved for QEMU; low nybble is revision number. ++ * If this code is substantially changed, you may want to consider ++ * incrementing the revision. ++ */ ++ if (protocol >= 0x200) { ++ header[0x210] = 0xB0; ++ } ++ /* heap */ ++ if (protocol >= 0x201) { ++ header[0x211] |= 0x80; /* CAN_USE_HEAP */ ++ stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); ++ } ++ ++ /* load initrd */ ++ if (initrd_filename) { ++ GMappedFile *mapped_file; ++ gsize initrd_size; ++ gchar *initrd_data; ++ GError *gerr = NULL; ++ ++ if (protocol < 0x200) { ++ fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); ++ exit(1); ++ } ++ ++ mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); ++ if (!mapped_file) { ++ fprintf(stderr, "qemu: error reading initrd %s: %s\n", ++ initrd_filename, gerr->message); ++ exit(1); ++ } ++ x86ms->initrd_mapped_file = mapped_file; ++ ++ initrd_data = g_mapped_file_get_contents(mapped_file); ++ initrd_size = g_mapped_file_get_length(mapped_file); ++ if (initrd_size >= initrd_max) { ++ fprintf(stderr, "qemu: initrd is too large, cannot support." ++ "(max: %"PRIu32", need %"PRId64")\n", ++ initrd_max, (uint64_t)initrd_size); ++ exit(1); ++ } ++ ++ initrd_addr = (initrd_max - initrd_size) & ~4095; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); ++ sev_load_ctx.initrd_data = initrd_data; ++ sev_load_ctx.initrd_size = initrd_size; ++ ++ stl_p(header + 0x218, initrd_addr); ++ stl_p(header + 0x21c, initrd_size); ++ } ++ ++ /* load kernel and setup */ ++ setup_size = header[0x1f1]; ++ if (setup_size == 0) { ++ setup_size = 4; ++ } ++ setup_size = (setup_size + 1) * 512; ++ if (setup_size > kernel_size) { ++ fprintf(stderr, "qemu: invalid kernel header\n"); ++ exit(1); ++ } ++ kernel_size -= setup_size; ++ ++ setup = g_malloc(setup_size); ++ kernel = g_malloc(kernel_size); ++ fseek(f, 0, SEEK_SET); ++ if (fread(setup, 1, setup_size, f) != setup_size) { ++ fprintf(stderr, "fread() failed\n"); ++ exit(1); ++ } ++ if (fread(kernel, 1, kernel_size, f) != kernel_size) { ++ fprintf(stderr, "fread() failed\n"); ++ exit(1); ++ } ++ fclose(f); ++ ++ /* append dtb to kernel */ ++ if (dtb_filename) { ++ if (protocol < 0x209) { ++ fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); ++ exit(1); ++ } ++ ++ dtb_size = get_image_size(dtb_filename); ++ if (dtb_size <= 0) { ++ fprintf(stderr, "qemu: error reading dtb %s: %s\n", ++ dtb_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); ++ kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; ++ kernel = g_realloc(kernel, kernel_size); ++ ++ stq_p(header + 0x250, prot_addr + setup_data_offset); ++ ++ setup_data = (struct setup_data *)(kernel + setup_data_offset); ++ setup_data->next = 0; ++ setup_data->type = cpu_to_le32(SETUP_DTB); ++ setup_data->len = cpu_to_le32(dtb_size); ++ ++ load_image_size(dtb_filename, setup_data->data, dtb_size); ++ } ++ ++ /* ++ * If we're starting an encrypted VM, it will be OVMF based, which uses the ++ * efi stub for booting and doesn't require any values to be placed in the ++ * kernel header. We therefore don't update the header so the hash of the ++ * kernel on the other side of the fw_cfg interface matches the hash of the ++ * file the user passed in. ++ */ ++ if (!sev_enabled()) { ++ memcpy(setup, header, MIN(sizeof(header), setup_size)); ++ } ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); ++ sev_load_ctx.kernel_data = (char *)kernel; ++ sev_load_ctx.kernel_size = kernel_size; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); ++ sev_load_ctx.setup_data = (char *)setup; ++ sev_load_ctx.setup_size = setup_size; ++ ++ if (sev_enabled()) { ++ sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); ++ } ++ ++ option_rom[nb_option_roms].bootindex = 0; ++ option_rom[nb_option_roms].name = "linuxboot.bin"; ++ if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { ++ option_rom[nb_option_roms].name = "linuxboot_dma.bin"; ++ } ++ nb_option_roms++; ++} ++ ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only) ++{ ++ uint64_t bios_size = memory_region_size(bios); ++ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); ++ ++ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ bios_size - isa_bios_size, isa_bios_size); ++ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, ++ isa_bios, 1); ++ memory_region_set_readonly(isa_bios, read_only); ++} ++ ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, ++ MemoryRegion *rom_memory, bool isapc_ram_fw) ++{ ++ const char *bios_name; ++ char *filename; ++ int bios_size; ++ ssize_t ret; ++ ++ /* BIOS load */ ++ bios_name = MACHINE(x86ms)->firmware ?: default_firmware; ++ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); ++ if (filename) { ++ bios_size = get_image_size(filename); ++ } else { ++ bios_size = -1; ++ } ++ if (bios_size <= 0 || ++ (bios_size % 65536) != 0) { ++ goto bios_error; ++ } ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, ++ &error_fatal); ++ if (sev_enabled()) { ++ /* ++ * The concept of a "reset" simply doesn't exist for ++ * confidential computing guests, we have to destroy and ++ * re-launch them instead. So there is no need to register ++ * the firmware as rom to properly re-initialize on reset. ++ * Just go for a straight file load instead. ++ */ ++ void *ptr = memory_region_get_ram_ptr(&x86ms->bios); ++ load_image_size(filename, ptr, bios_size); ++ x86_firmware_configure(ptr, bios_size); ++ } else { ++ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); ++ ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); ++ if (ret != 0) { ++ goto bios_error; ++ } ++ } ++ g_free(filename); ++ ++ /* map the last 128KB of the BIOS in ISA space */ ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); ++ ++ /* map all the bios at the top of memory */ ++ memory_region_add_subregion(rom_memory, ++ (uint32_t)(-bios_size), ++ &x86ms->bios); ++ return; ++ ++bios_error: ++ fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); ++ exit(1); ++} +diff --git a/hw/i386/x86-cpu.c b/hw/i386/x86-cpu.c +new file mode 100644 +index 0000000000..ab2920522d +--- /dev/null ++++ b/hw/i386/x86-cpu.c +@@ -0,0 +1,97 @@ ++/* ++ * Copyright (c) 2003-2004 Fabrice Bellard ++ * Copyright (c) 2019, 2024 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "qemu/osdep.h" ++#include "sysemu/whpx.h" ++#include "sysemu/cpu-timers.h" ++#include "trace.h" ++ ++#include "hw/i386/x86.h" ++#include "target/i386/cpu.h" ++#include "hw/intc/i8259.h" ++#include "hw/irq.h" ++#include "sysemu/kvm.h" ++ ++/* TSC handling */ ++uint64_t cpu_get_tsc(CPUX86State *env) ++{ ++ return cpus_get_elapsed_ticks(); ++} ++ ++/* IRQ handling */ ++static void pic_irq_request(void *opaque, int irq, int level) ++{ ++ CPUState *cs = first_cpu; ++ X86CPU *cpu = X86_CPU(cs); ++ ++ trace_x86_pic_interrupt(irq, level); ++ if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && ++ !whpx_apic_in_platform()) { ++ CPU_FOREACH(cs) { ++ cpu = X86_CPU(cs); ++ if (apic_accept_pic_intr(cpu->apic_state)) { ++ apic_deliver_pic_intr(cpu->apic_state, level); ++ } ++ } ++ } else { ++ if (level) { ++ cpu_interrupt(cs, CPU_INTERRUPT_HARD); ++ } else { ++ cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); ++ } ++ } ++} ++ ++qemu_irq x86_allocate_cpu_irq(void) ++{ ++ return qemu_allocate_irq(pic_irq_request, NULL, 0); ++} ++ ++int cpu_get_pic_interrupt(CPUX86State *env) ++{ ++ X86CPU *cpu = env_archcpu(env); ++ int intno; ++ ++ if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { ++ intno = apic_get_interrupt(cpu->apic_state); ++ if (intno >= 0) { ++ return intno; ++ } ++ /* read the irq from the PIC */ ++ if (!apic_accept_pic_intr(cpu->apic_state)) { ++ return -1; ++ } ++ } ++ ++ intno = pic_read_irq(isa_pic); ++ return intno; ++} ++ ++DeviceState *cpu_get_current_apic(void) ++{ ++ if (current_cpu) { ++ X86CPU *cpu = X86_CPU(current_cpu); ++ return cpu->apic_state; ++ } else { ++ return NULL; ++ } ++} +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index fcef652c1e..0b5cc59956 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -22,52 +22,25 @@ + */ + #include "qemu/osdep.h" + #include "qemu/error-report.h" +-#include "qemu/option.h" +-#include "qemu/cutils.h" + #include "qemu/units.h" +-#include "qemu/datadir.h" + #include "qapi/error.h" + #include "qapi/qapi-visit-common.h" +-#include "qapi/clone-visitor.h" + #include "qapi/qapi-visit-machine.h" + #include "qapi/visitor.h" + #include "sysemu/qtest.h" +-#include "sysemu/whpx.h" + #include "sysemu/numa.h" +-#include "sysemu/replay.h" +-#include "sysemu/sysemu.h" +-#include "sysemu/cpu-timers.h" +-#include "sysemu/xen.h" + #include "trace.h" + ++#include "hw/acpi/aml-build.h" + #include "hw/i386/x86.h" +-#include "target/i386/cpu.h" + #include "hw/i386/topology.h" +-#include "hw/i386/fw_cfg.h" +-#include "hw/intc/i8259.h" +-#include "hw/rtc/mc146818rtc.h" +-#include "target/i386/sev.h" + +-#include "hw/acpi/cpu_hotplug.h" +-#include "hw/irq.h" + #include "hw/nmi.h" +-#include "hw/loader.h" +-#include "multiboot.h" +-#include "elf.h" +-#include "standard-headers/asm-x86/bootparam.h" +-#include CONFIG_DEVICES + #include "kvm/kvm_i386.h" + +-#ifdef CONFIG_XEN_EMU +-#include "hw/xen/xen.h" +-#include "hw/i386/kvm/xen_evtchn.h" +-#endif + +-/* Physical Address of PVH entry point read from kernel ELF NOTE */ +-static size_t pvh_start_addr; +- +-static void init_topo_info(X86CPUTopoInfo *topo_info, +- const X86MachineState *x86ms) ++void init_topo_info(X86CPUTopoInfo *topo_info, ++ const X86MachineState *x86ms) + { + MachineState *ms = MACHINE(x86ms); + +@@ -94,355 +67,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + return x86_apicid_from_cpu_idx(&topo_info, cpu_index); + } + +- +-void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) +-{ +- Object *cpu = object_new(MACHINE(x86ms)->cpu_type); +- +- if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { +- goto out; +- } +- qdev_realize(DEVICE(cpu), NULL, errp); +- +-out: +- object_unref(cpu); +-} +- +-void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) +-{ +- int i; +- const CPUArchIdList *possible_cpus; +- MachineState *ms = MACHINE(x86ms); +- MachineClass *mc = MACHINE_GET_CLASS(x86ms); +- +- x86_cpu_set_default_version(default_cpu_version); +- +- /* +- * Calculates the limit to CPU APIC ID values +- * +- * Limit for the APIC ID value, so that all +- * CPU APIC IDs are < x86ms->apic_id_limit. +- * +- * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). +- */ +- x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, +- ms->smp.max_cpus - 1) + 1; +- +- /* +- * Can we support APIC ID 255 or higher? With KVM, that requires +- * both in-kernel lapic and X2APIC userspace API. +- * +- * kvm_enabled() must go first to ensure that kvm_* references are +- * not emitted for the linker to consume (kvm_enabled() is +- * a literal `0` in configurations where kvm_* aren't defined) +- */ +- if (kvm_enabled() && x86ms->apic_id_limit > 255 && +- kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { +- error_report("current -smp configuration requires kernel " +- "irqchip and X2APIC API support."); +- exit(EXIT_FAILURE); +- } +- +- if (kvm_enabled()) { +- kvm_set_max_apic_id(x86ms->apic_id_limit); +- } +- +- if (!kvm_irqchip_in_kernel()) { +- apic_set_max_apic_id(x86ms->apic_id_limit); +- } +- +- possible_cpus = mc->possible_cpu_arch_ids(ms); +- for (i = 0; i < ms->smp.cpus; i++) { +- x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); +- } +-} +- +-void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) +-{ +- MC146818RtcState *rtc = MC146818_RTC(s); +- +- if (cpus_count > 0xff) { +- /* +- * If the number of CPUs can't be represented in 8 bits, the +- * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just +- * to make old BIOSes fail more predictably. +- */ +- mc146818rtc_set_cmos_data(rtc, 0x5f, 0); +- } else { +- mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); +- } +-} +- +-static int x86_apic_cmp(const void *a, const void *b) +-{ +- CPUArchId *apic_a = (CPUArchId *)a; +- CPUArchId *apic_b = (CPUArchId *)b; +- +- return apic_a->arch_id - apic_b->arch_id; +-} +- +-/* +- * returns pointer to CPUArchId descriptor that matches CPU's apic_id +- * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no +- * entry corresponding to CPU's apic_id returns NULL. +- */ +-CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) +-{ +- CPUArchId apic_id, *found_cpu; +- +- apic_id.arch_id = id; +- found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, +- ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), +- x86_apic_cmp); +- if (found_cpu && idx) { +- *idx = found_cpu - ms->possible_cpus->cpus; +- } +- return found_cpu; +-} +- +-void x86_cpu_plug(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- CPUArchId *found_cpu; +- Error *local_err = NULL; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- if (x86ms->acpi_dev) { +- hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); +- if (local_err) { +- goto out; +- } +- } +- +- /* increment the number of CPUs */ +- x86ms->boot_cpus++; +- if (x86ms->rtc) { +- x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); +- } +- if (x86ms->fw_cfg) { +- fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +- } +- +- found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); +- found_cpu->cpu = CPU(dev); +-out: +- error_propagate(errp, local_err); +-} +- +-void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- int idx = -1; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- if (!x86ms->acpi_dev) { +- error_setg(errp, "CPU hot unplug not supported without ACPI"); +- return; +- } +- +- x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); +- assert(idx != -1); +- if (idx == 0) { +- error_setg(errp, "Boot CPU is unpluggable"); +- return; +- } +- +- hotplug_handler_unplug_request(x86ms->acpi_dev, dev, +- errp); +-} +- +-void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- CPUArchId *found_cpu; +- Error *local_err = NULL; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); +- if (local_err) { +- goto out; +- } +- +- found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); +- found_cpu->cpu = NULL; +- qdev_unrealize(dev); +- +- /* decrement the number of CPUs */ +- x86ms->boot_cpus--; +- /* Update the number of CPUs in CMOS */ +- x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); +- fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +- out: +- error_propagate(errp, local_err); +-} +- +-void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- int idx; +- CPUState *cs; +- CPUArchId *cpu_slot; +- X86CPUTopoIDs topo_ids; +- X86CPU *cpu = X86_CPU(dev); +- CPUX86State *env = &cpu->env; +- MachineState *ms = MACHINE(hotplug_dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- unsigned int smp_cores = ms->smp.cores; +- unsigned int smp_threads = ms->smp.threads; +- X86CPUTopoInfo topo_info; +- +- if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { +- error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", +- ms->cpu_type); +- return; +- } +- +- if (x86ms->acpi_dev) { +- Error *local_err = NULL; +- +- hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, +- &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- return; +- } +- } +- +- init_topo_info(&topo_info, x86ms); +- +- env->nr_dies = ms->smp.dies; +- +- /* +- * If APIC ID is not set, +- * set it based on socket/die/core/thread properties. +- */ +- if (cpu->apic_id == UNASSIGNED_APIC_ID) { +- int max_socket = (ms->smp.max_cpus - 1) / +- smp_threads / smp_cores / ms->smp.dies; +- +- /* +- * die-id was optional in QEMU 4.0 and older, so keep it optional +- * if there's only one die per socket. +- */ +- if (cpu->die_id < 0 && ms->smp.dies == 1) { +- cpu->die_id = 0; +- } +- +- if (cpu->socket_id < 0) { +- error_setg(errp, "CPU socket-id is not set"); +- return; +- } else if (cpu->socket_id > max_socket) { +- error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", +- cpu->socket_id, max_socket); +- return; +- } +- if (cpu->die_id < 0) { +- error_setg(errp, "CPU die-id is not set"); +- return; +- } else if (cpu->die_id > ms->smp.dies - 1) { +- error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", +- cpu->die_id, ms->smp.dies - 1); +- return; +- } +- if (cpu->core_id < 0) { +- error_setg(errp, "CPU core-id is not set"); +- return; +- } else if (cpu->core_id > (smp_cores - 1)) { +- error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", +- cpu->core_id, smp_cores - 1); +- return; +- } +- if (cpu->thread_id < 0) { +- error_setg(errp, "CPU thread-id is not set"); +- return; +- } else if (cpu->thread_id > (smp_threads - 1)) { +- error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", +- cpu->thread_id, smp_threads - 1); +- return; +- } +- +- topo_ids.pkg_id = cpu->socket_id; +- topo_ids.die_id = cpu->die_id; +- topo_ids.core_id = cpu->core_id; +- topo_ids.smt_id = cpu->thread_id; +- cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); +- } +- +- cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); +- if (!cpu_slot) { +- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); +- error_setg(errp, +- "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" +- " APIC ID %" PRIu32 ", valid index range 0:%d", +- topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, +- cpu->apic_id, ms->possible_cpus->len - 1); +- return; +- } +- +- if (cpu_slot->cpu) { +- error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", +- idx, cpu->apic_id); +- return; +- } +- +- /* if 'address' properties socket-id/core-id/thread-id are not set, set them +- * so that machine_query_hotpluggable_cpus would show correct values +- */ +- /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() +- * once -smp refactoring is complete and there will be CPU private +- * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ +- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); +- if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { +- error_setg(errp, "property socket-id: %u doesn't match set apic-id:" +- " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, +- topo_ids.pkg_id); +- return; +- } +- cpu->socket_id = topo_ids.pkg_id; +- +- if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { +- error_setg(errp, "property die-id: %u doesn't match set apic-id:" +- " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); +- return; +- } +- cpu->die_id = topo_ids.die_id; +- +- if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { +- error_setg(errp, "property core-id: %u doesn't match set apic-id:" +- " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, +- topo_ids.core_id); +- return; +- } +- cpu->core_id = topo_ids.core_id; +- +- if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { +- error_setg(errp, "property thread-id: %u doesn't match set apic-id:" +- " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, +- topo_ids.smt_id); +- return; +- } +- cpu->thread_id = topo_ids.smt_id; +- +- /* +- * kvm_enabled() must go first to ensure that kvm_* references are +- * not emitted for the linker to consume (kvm_enabled() is +- * a literal `0` in configurations where kvm_* aren't defined) +- */ +- if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && +- !kvm_hv_vpindex_settable()) { +- error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); +- return; +- } +- +- cs = CPU(cpu); +- cs->cpu_index = idx; +- +- numa_cpu_pre_plug(cpu_slot, dev, errp); +-} +- + static CpuInstanceProperties + x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + { +@@ -528,676 +152,6 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) + } + } + +-static long get_file_size(FILE *f) +-{ +- long where, size; +- +- /* XXX: on Unix systems, using fstat() probably makes more sense */ +- +- where = ftell(f); +- fseek(f, 0, SEEK_END); +- size = ftell(f); +- fseek(f, where, SEEK_SET); +- +- return size; +-} +- +-/* TSC handling */ +-uint64_t cpu_get_tsc(CPUX86State *env) +-{ +- return cpus_get_elapsed_ticks(); +-} +- +-/* IRQ handling */ +-static void pic_irq_request(void *opaque, int irq, int level) +-{ +- CPUState *cs = first_cpu; +- X86CPU *cpu = X86_CPU(cs); +- +- trace_x86_pic_interrupt(irq, level); +- if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && +- !whpx_apic_in_platform()) { +- CPU_FOREACH(cs) { +- cpu = X86_CPU(cs); +- if (apic_accept_pic_intr(cpu->apic_state)) { +- apic_deliver_pic_intr(cpu->apic_state, level); +- } +- } +- } else { +- if (level) { +- cpu_interrupt(cs, CPU_INTERRUPT_HARD); +- } else { +- cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); +- } +- } +-} +- +-qemu_irq x86_allocate_cpu_irq(void) +-{ +- return qemu_allocate_irq(pic_irq_request, NULL, 0); +-} +- +-int cpu_get_pic_interrupt(CPUX86State *env) +-{ +- X86CPU *cpu = env_archcpu(env); +- int intno; +- +- if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { +- intno = apic_get_interrupt(cpu->apic_state); +- if (intno >= 0) { +- return intno; +- } +- /* read the irq from the PIC */ +- if (!apic_accept_pic_intr(cpu->apic_state)) { +- return -1; +- } +- } +- +- intno = pic_read_irq(isa_pic); +- return intno; +-} +- +-DeviceState *cpu_get_current_apic(void) +-{ +- if (current_cpu) { +- X86CPU *cpu = X86_CPU(current_cpu); +- return cpu->apic_state; +- } else { +- return NULL; +- } +-} +- +-void gsi_handler(void *opaque, int n, int level) +-{ +- GSIState *s = opaque; +- +- trace_x86_gsi_interrupt(n, level); +- switch (n) { +- case 0 ... ISA_NUM_IRQS - 1: +- if (s->i8259_irq[n]) { +- /* Under KVM, Kernel will forward to both PIC and IOAPIC */ +- qemu_set_irq(s->i8259_irq[n], level); +- } +- /* fall through */ +- case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: +-#ifdef CONFIG_XEN_EMU +- /* +- * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC +- * routing actually works properly under Xen). And then to +- * *either* the PIRQ handling or the I/OAPIC depending on +- * whether the former wants it. +- */ +- if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { +- break; +- } +-#endif +- qemu_set_irq(s->ioapic_irq[n], level); +- break; +- case IO_APIC_SECONDARY_IRQBASE +- ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: +- qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); +- break; +- } +-} +- +-void ioapic_init_gsi(GSIState *gsi_state, Object *parent) +-{ +- DeviceState *dev; +- SysBusDevice *d; +- unsigned int i; +- +- assert(parent); +- if (kvm_ioapic_in_kernel()) { +- dev = qdev_new(TYPE_KVM_IOAPIC); +- } else { +- dev = qdev_new(TYPE_IOAPIC); +- } +- object_property_add_child(parent, "ioapic", OBJECT(dev)); +- d = SYS_BUS_DEVICE(dev); +- sysbus_realize_and_unref(d, &error_fatal); +- sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); +- +- for (i = 0; i < IOAPIC_NUM_PINS; i++) { +- gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); +- } +-} +- +-DeviceState *ioapic_init_secondary(GSIState *gsi_state) +-{ +- DeviceState *dev; +- SysBusDevice *d; +- unsigned int i; +- +- dev = qdev_new(TYPE_IOAPIC); +- d = SYS_BUS_DEVICE(dev); +- sysbus_realize_and_unref(d, &error_fatal); +- sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); +- +- for (i = 0; i < IOAPIC_NUM_PINS; i++) { +- gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); +- } +- return dev; +-} +- +-/* +- * The entry point into the kernel for PVH boot is different from +- * the native entry point. The PVH entry is defined by the x86/HVM +- * direct boot ABI and is available in an ELFNOTE in the kernel binary. +- * +- * This function is passed to load_elf() when it is called from +- * load_elfboot() which then additionally checks for an ELF Note of +- * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to +- * parse the PVH entry address from the ELF Note. +- * +- * Due to trickery in elf_opts.h, load_elf() is actually available as +- * load_elf32() or load_elf64() and this routine needs to be able +- * to deal with being called as 32 or 64 bit. +- * +- * The address of the PVH entry point is saved to the 'pvh_start_addr' +- * global variable. (although the entry point is 32-bit, the kernel +- * binary can be either 32-bit or 64-bit). +- */ +-static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) +-{ +- size_t *elf_note_data_addr; +- +- /* Check if ELF Note header passed in is valid */ +- if (arg1 == NULL) { +- return 0; +- } +- +- if (is64) { +- struct elf64_note *nhdr64 = (struct elf64_note *)arg1; +- uint64_t nhdr_size64 = sizeof(struct elf64_note); +- uint64_t phdr_align = *(uint64_t *)arg2; +- uint64_t nhdr_namesz = nhdr64->n_namesz; +- +- elf_note_data_addr = +- ((void *)nhdr64) + nhdr_size64 + +- QEMU_ALIGN_UP(nhdr_namesz, phdr_align); +- +- pvh_start_addr = *elf_note_data_addr; +- } else { +- struct elf32_note *nhdr32 = (struct elf32_note *)arg1; +- uint32_t nhdr_size32 = sizeof(struct elf32_note); +- uint32_t phdr_align = *(uint32_t *)arg2; +- uint32_t nhdr_namesz = nhdr32->n_namesz; +- +- elf_note_data_addr = +- ((void *)nhdr32) + nhdr_size32 + +- QEMU_ALIGN_UP(nhdr_namesz, phdr_align); +- +- pvh_start_addr = *(uint32_t *)elf_note_data_addr; +- } +- +- return pvh_start_addr; +-} +- +-static bool load_elfboot(const char *kernel_filename, +- int kernel_file_size, +- uint8_t *header, +- size_t pvh_xen_start_addr, +- FWCfgState *fw_cfg) +-{ +- uint32_t flags = 0; +- uint32_t mh_load_addr = 0; +- uint32_t elf_kernel_size = 0; +- uint64_t elf_entry; +- uint64_t elf_low, elf_high; +- int kernel_size; +- +- if (ldl_p(header) != 0x464c457f) { +- return false; /* no elfboot */ +- } +- +- bool elf_is64 = header[EI_CLASS] == ELFCLASS64; +- flags = elf_is64 ? +- ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; +- +- if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ +- error_report("elfboot unsupported flags = %x", flags); +- exit(1); +- } +- +- uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; +- kernel_size = load_elf(kernel_filename, read_pvh_start_addr, +- NULL, &elf_note_type, &elf_entry, +- &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, +- 0, 0); +- +- if (kernel_size < 0) { +- error_report("Error while loading elf kernel"); +- exit(1); +- } +- mh_load_addr = elf_low; +- elf_kernel_size = elf_high - elf_low; +- +- if (pvh_start_addr == 0) { +- error_report("Error loading uncompressed kernel without PVH ELF Note"); +- exit(1); +- } +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); +- +- return true; +-} +- +-void x86_load_linux(X86MachineState *x86ms, +- FWCfgState *fw_cfg, +- int acpi_data_size, +- bool pvh_enabled) +-{ +- bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; +- uint16_t protocol; +- int setup_size, kernel_size, cmdline_size; +- int dtb_size, setup_data_offset; +- uint32_t initrd_max; +- uint8_t header[8192], *setup, *kernel; +- hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; +- FILE *f; +- char *vmode; +- MachineState *machine = MACHINE(x86ms); +- struct setup_data *setup_data; +- const char *kernel_filename = machine->kernel_filename; +- const char *initrd_filename = machine->initrd_filename; +- const char *dtb_filename = machine->dtb; +- const char *kernel_cmdline = machine->kernel_cmdline; +- SevKernelLoaderContext sev_load_ctx = {}; +- +- /* Align to 16 bytes as a paranoia measure */ +- cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; +- +- /* load the kernel header */ +- f = fopen(kernel_filename, "rb"); +- if (!f) { +- fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", +- kernel_filename, strerror(errno)); +- exit(1); +- } +- +- kernel_size = get_file_size(f); +- if (!kernel_size || +- fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != +- MIN(ARRAY_SIZE(header), kernel_size)) { +- fprintf(stderr, "qemu: could not load kernel '%s': %s\n", +- kernel_filename, strerror(errno)); +- exit(1); +- } +- +- /* kernel protocol version */ +- if (ldl_p(header + 0x202) == 0x53726448) { +- protocol = lduw_p(header + 0x206); +- } else { +- /* +- * This could be a multiboot kernel. If it is, let's stop treating it +- * like a Linux kernel. +- * Note: some multiboot images could be in the ELF format (the same of +- * PVH), so we try multiboot first since we check the multiboot magic +- * header before to load it. +- */ +- if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, +- kernel_cmdline, kernel_size, header)) { +- return; +- } +- /* +- * Check if the file is an uncompressed kernel file (ELF) and load it, +- * saving the PVH entry point used by the x86/HVM direct boot ABI. +- * If load_elfboot() is successful, populate the fw_cfg info. +- */ +- if (pvh_enabled && +- load_elfboot(kernel_filename, kernel_size, +- header, pvh_start_addr, fw_cfg)) { +- fclose(f); +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, +- strlen(kernel_cmdline) + 1); +- fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, +- header, sizeof(header)); +- +- /* load initrd */ +- if (initrd_filename) { +- GMappedFile *mapped_file; +- gsize initrd_size; +- gchar *initrd_data; +- GError *gerr = NULL; +- +- mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); +- if (!mapped_file) { +- fprintf(stderr, "qemu: error reading initrd %s: %s\n", +- initrd_filename, gerr->message); +- exit(1); +- } +- x86ms->initrd_mapped_file = mapped_file; +- +- initrd_data = g_mapped_file_get_contents(mapped_file); +- initrd_size = g_mapped_file_get_length(mapped_file); +- initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; +- if (initrd_size >= initrd_max) { +- fprintf(stderr, "qemu: initrd is too large, cannot support." +- "(max: %"PRIu32", need %"PRId64")\n", +- initrd_max, (uint64_t)initrd_size); +- exit(1); +- } +- +- initrd_addr = (initrd_max - initrd_size) & ~4095; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, +- initrd_size); +- } +- +- option_rom[nb_option_roms].bootindex = 0; +- option_rom[nb_option_roms].name = "pvh.bin"; +- nb_option_roms++; +- +- return; +- } +- protocol = 0; +- } +- +- if (protocol < 0x200 || !(header[0x211] & 0x01)) { +- /* Low kernel */ +- real_addr = 0x90000; +- cmdline_addr = 0x9a000 - cmdline_size; +- prot_addr = 0x10000; +- } else if (protocol < 0x202) { +- /* High but ancient kernel */ +- real_addr = 0x90000; +- cmdline_addr = 0x9a000 - cmdline_size; +- prot_addr = 0x100000; +- } else { +- /* High and recent kernel */ +- real_addr = 0x10000; +- cmdline_addr = 0x20000; +- prot_addr = 0x100000; +- } +- +- /* highest address for loading the initrd */ +- if (protocol >= 0x20c && +- lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { +- /* +- * Linux has supported initrd up to 4 GB for a very long time (2007, +- * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), +- * though it only sets initrd_max to 2 GB to "work around bootloader +- * bugs". Luckily, QEMU firmware(which does something like bootloader) +- * has supported this. +- * +- * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can +- * be loaded into any address. +- * +- * In addition, initrd_max is uint32_t simply because QEMU doesn't +- * support the 64-bit boot protocol (specifically the ext_ramdisk_image +- * field). +- * +- * Therefore here just limit initrd_max to UINT32_MAX simply as well. +- */ +- initrd_max = UINT32_MAX; +- } else if (protocol >= 0x203) { +- initrd_max = ldl_p(header + 0x22c); +- } else { +- initrd_max = 0x37ffffff; +- } +- +- if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { +- initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; +- } +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); +- fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); +- sev_load_ctx.cmdline_data = (char *)kernel_cmdline; +- sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; +- +- if (protocol >= 0x202) { +- stl_p(header + 0x228, cmdline_addr); +- } else { +- stw_p(header + 0x20, 0xA33F); +- stw_p(header + 0x22, cmdline_addr - real_addr); +- } +- +- /* handle vga= parameter */ +- vmode = strstr(kernel_cmdline, "vga="); +- if (vmode) { +- unsigned int video_mode; +- const char *end; +- int ret; +- /* skip "vga=" */ +- vmode += 4; +- if (!strncmp(vmode, "normal", 6)) { +- video_mode = 0xffff; +- } else if (!strncmp(vmode, "ext", 3)) { +- video_mode = 0xfffe; +- } else if (!strncmp(vmode, "ask", 3)) { +- video_mode = 0xfffd; +- } else { +- ret = qemu_strtoui(vmode, &end, 0, &video_mode); +- if (ret != 0 || (*end && *end != ' ')) { +- fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); +- exit(1); +- } +- } +- stw_p(header + 0x1fa, video_mode); +- } +- +- /* loader type */ +- /* +- * High nybble = B reserved for QEMU; low nybble is revision number. +- * If this code is substantially changed, you may want to consider +- * incrementing the revision. +- */ +- if (protocol >= 0x200) { +- header[0x210] = 0xB0; +- } +- /* heap */ +- if (protocol >= 0x201) { +- header[0x211] |= 0x80; /* CAN_USE_HEAP */ +- stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); +- } +- +- /* load initrd */ +- if (initrd_filename) { +- GMappedFile *mapped_file; +- gsize initrd_size; +- gchar *initrd_data; +- GError *gerr = NULL; +- +- if (protocol < 0x200) { +- fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); +- exit(1); +- } +- +- mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); +- if (!mapped_file) { +- fprintf(stderr, "qemu: error reading initrd %s: %s\n", +- initrd_filename, gerr->message); +- exit(1); +- } +- x86ms->initrd_mapped_file = mapped_file; +- +- initrd_data = g_mapped_file_get_contents(mapped_file); +- initrd_size = g_mapped_file_get_length(mapped_file); +- if (initrd_size >= initrd_max) { +- fprintf(stderr, "qemu: initrd is too large, cannot support." +- "(max: %"PRIu32", need %"PRId64")\n", +- initrd_max, (uint64_t)initrd_size); +- exit(1); +- } +- +- initrd_addr = (initrd_max - initrd_size) & ~4095; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); +- sev_load_ctx.initrd_data = initrd_data; +- sev_load_ctx.initrd_size = initrd_size; +- +- stl_p(header + 0x218, initrd_addr); +- stl_p(header + 0x21c, initrd_size); +- } +- +- /* load kernel and setup */ +- setup_size = header[0x1f1]; +- if (setup_size == 0) { +- setup_size = 4; +- } +- setup_size = (setup_size + 1) * 512; +- if (setup_size > kernel_size) { +- fprintf(stderr, "qemu: invalid kernel header\n"); +- exit(1); +- } +- kernel_size -= setup_size; +- +- setup = g_malloc(setup_size); +- kernel = g_malloc(kernel_size); +- fseek(f, 0, SEEK_SET); +- if (fread(setup, 1, setup_size, f) != setup_size) { +- fprintf(stderr, "fread() failed\n"); +- exit(1); +- } +- if (fread(kernel, 1, kernel_size, f) != kernel_size) { +- fprintf(stderr, "fread() failed\n"); +- exit(1); +- } +- fclose(f); +- +- /* append dtb to kernel */ +- if (dtb_filename) { +- if (protocol < 0x209) { +- fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); +- exit(1); +- } +- +- dtb_size = get_image_size(dtb_filename); +- if (dtb_size <= 0) { +- fprintf(stderr, "qemu: error reading dtb %s: %s\n", +- dtb_filename, strerror(errno)); +- exit(1); +- } +- +- setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); +- kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; +- kernel = g_realloc(kernel, kernel_size); +- +- stq_p(header + 0x250, prot_addr + setup_data_offset); +- +- setup_data = (struct setup_data *)(kernel + setup_data_offset); +- setup_data->next = 0; +- setup_data->type = cpu_to_le32(SETUP_DTB); +- setup_data->len = cpu_to_le32(dtb_size); +- +- load_image_size(dtb_filename, setup_data->data, dtb_size); +- } +- +- /* +- * If we're starting an encrypted VM, it will be OVMF based, which uses the +- * efi stub for booting and doesn't require any values to be placed in the +- * kernel header. We therefore don't update the header so the hash of the +- * kernel on the other side of the fw_cfg interface matches the hash of the +- * file the user passed in. +- */ +- if (!sev_enabled()) { +- memcpy(setup, header, MIN(sizeof(header), setup_size)); +- } +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); +- sev_load_ctx.kernel_data = (char *)kernel; +- sev_load_ctx.kernel_size = kernel_size; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); +- sev_load_ctx.setup_data = (char *)setup; +- sev_load_ctx.setup_size = setup_size; +- +- if (sev_enabled()) { +- sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); +- } +- +- option_rom[nb_option_roms].bootindex = 0; +- option_rom[nb_option_roms].name = "linuxboot.bin"; +- if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { +- option_rom[nb_option_roms].name = "linuxboot_dma.bin"; +- } +- nb_option_roms++; +-} +- +-void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, +- MemoryRegion *bios, bool read_only) +-{ +- uint64_t bios_size = memory_region_size(bios); +- uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); +- +- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, +- bios_size - isa_bios_size, isa_bios_size); +- memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, +- isa_bios, 1); +- memory_region_set_readonly(isa_bios, read_only); +-} +- +-void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, +- MemoryRegion *rom_memory, bool isapc_ram_fw) +-{ +- const char *bios_name; +- char *filename; +- int bios_size; +- ssize_t ret; +- +- /* BIOS load */ +- bios_name = MACHINE(x86ms)->firmware ?: default_firmware; +- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); +- if (filename) { +- bios_size = get_image_size(filename); +- } else { +- bios_size = -1; +- } +- if (bios_size <= 0 || +- (bios_size % 65536) != 0) { +- goto bios_error; +- } +- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, +- &error_fatal); +- if (sev_enabled()) { +- /* +- * The concept of a "reset" simply doesn't exist for +- * confidential computing guests, we have to destroy and +- * re-launch them instead. So there is no need to register +- * the firmware as rom to properly re-initialize on reset. +- * Just go for a straight file load instead. +- */ +- void *ptr = memory_region_get_ram_ptr(&x86ms->bios); +- load_image_size(filename, ptr, bios_size); +- x86_firmware_configure(ptr, bios_size); +- } else { +- memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); +- ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); +- if (ret != 0) { +- goto bios_error; +- } +- } +- g_free(filename); +- +- /* map the last 128KB of the BIOS in ISA space */ +- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, +- !isapc_ram_fw); +- +- /* map all the bios at the top of memory */ +- memory_region_add_subregion(rom_memory, +- (uint32_t)(-bios_size), +- &x86ms->bios); +- return; +- +-bios_error: +- fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); +- exit(1); +-} +- + bool x86_machine_is_smm_enabled(const X86MachineState *x86ms) + { + bool smm_available = false; +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index c2062db13f..b006f16b8d 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -21,6 +21,7 @@ + #include "exec/memory.h" + + #include "hw/boards.h" ++#include "hw/i386/topology.h" + #include "hw/intc/ioapic.h" + #include "hw/isa/isa.h" + #include "qom/object.h" +@@ -109,12 +110,11 @@ struct X86MachineState { + #define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") + OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE) + +-uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, ++void init_topo_info(X86CPUTopoInfo *topo_info, const X86MachineState *x86ms); ++uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + unsigned int cpu_index); + +-void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); + void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); +-CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); + void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); + void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch b/SOURCES/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch new file mode 100644 index 0000000..38fd870 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch @@ -0,0 +1,133 @@ +From ebf08d2a822576acfa60fbd5f552d26de1e4c4be Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:04 +0200 +Subject: [PATCH 040/100] hw/i386/x86: Don't leak "isa-bios" memory regions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [40/91] bb595357c6cc2d5a80bf3873853c69553c5feee5 (bonzini/rhel-qemu-kvm) + +Fix the leaking in x86_bios_rom_init() and pc_isa_bios_init() by adding an +"isa_bios" attribute to X86MachineState. + +Suggested-by: Philippe Mathieu-Daudé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-4-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 32d3ee87a17fc91e981a23dba94855bff89f5920) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 7 +++---- + hw/i386/x86.c | 9 ++++----- + include/hw/i386/x86.h | 7 +++++++ + 3 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 59c7a81692..82d37cb376 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -40,11 +40,10 @@ + + #define FLASH_SECTOR_SIZE 4096 + +-static void pc_isa_bios_init(MemoryRegion *rom_memory, ++static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + MemoryRegion *flash_mem) + { + int isa_bios_size; +- MemoryRegion *isa_bios; + uint64_t flash_size; + void *flash_ptr, *isa_bios_ptr; + +@@ -52,7 +51,6 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(flash_size, 128 * KiB); +- isa_bios = g_malloc(sizeof(*isa_bios)); + memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, + &error_fatal); + memory_region_add_subregion_overlap(rom_memory, +@@ -136,6 +134,7 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms) + static void pc_system_flash_map(PCMachineState *pcms, + MemoryRegion *rom_memory) + { ++ X86MachineState *x86ms = X86_MACHINE(pcms); + hwaddr total_size = 0; + int i; + BlockBackend *blk; +@@ -185,7 +184,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(rom_memory, flash_mem); ++ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 6d3c72f124..457e8a34a5 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1133,7 +1133,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + { + const char *bios_name; + char *filename; +- MemoryRegion *bios, *isa_bios; ++ MemoryRegion *bios; + int bios_size, isa_bios_size; + ssize_t ret; + +@@ -1173,14 +1173,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(bios_size, 128 * KiB); +- isa_bios = g_malloc(sizeof(*isa_bios)); +- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, +- isa_bios, ++ &x86ms->isa_bios, + 1); +- memory_region_set_readonly(isa_bios, !isapc_ram_fw); ++ memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index cb07618d19..a07de79167 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -18,6 +18,7 @@ + #define HW_I386_X86_H + + #include "exec/hwaddr.h" ++#include "exec/memory.h" + + #include "hw/boards.h" + #include "hw/intc/ioapic.h" +@@ -52,6 +53,12 @@ struct X86MachineState { + GMappedFile *initrd_mapped_file; + HotplugHandler *acpi_dev; + ++ /* ++ * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address ++ * boundary. ++ */ ++ MemoryRegion isa_bios; ++ + /* RAM information (sizes, addresses, configuration): */ + ram_addr_t below_4g_mem_size, above_4g_mem_size; + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch b/SOURCES/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch new file mode 100644 index 0000000..7a61f95 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch @@ -0,0 +1,105 @@ +From e1f2265b5f6bf5b63bf3808bb540888f3cf8badb Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:05 +0200 +Subject: [PATCH 041/100] hw/i386/x86: Don't leak "pc.bios" memory region +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [41/91] a9cd61d8d240134c09c46e244efb89217cadf60c (bonzini/rhel-qemu-kvm) + +Fix the leaking in x86_bios_rom_init() by adding a "bios" attribute to +X86MachineState. Note that it is only used in the -bios case. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-5-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 865d95321ffc8d9941e33000b10140550f094556) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 13 ++++++------- + include/hw/i386/x86.h | 6 ++++++ + 2 files changed, 12 insertions(+), 7 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 457e8a34a5..29167de97d 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1133,7 +1133,6 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + { + const char *bios_name; + char *filename; +- MemoryRegion *bios; + int bios_size, isa_bios_size; + ssize_t ret; + +@@ -1149,8 +1148,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + (bios_size % 65536) != 0) { + goto bios_error; + } +- bios = g_malloc(sizeof(*bios)); +- memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, ++ &error_fatal); + if (sev_enabled()) { + /* + * The concept of a "reset" simply doesn't exist for +@@ -1159,11 +1158,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + * the firmware as rom to properly re-initialize on reset. + * Just go for a straight file load instead. + */ +- void *ptr = memory_region_get_ram_ptr(bios); ++ void *ptr = memory_region_get_ram_ptr(&x86ms->bios); + load_image_size(filename, ptr, bios_size); + x86_firmware_configure(ptr, bios_size); + } else { +- memory_region_set_readonly(bios, !isapc_ram_fw); ++ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + goto bios_error; +@@ -1173,7 +1172,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(bios_size, 128 * KiB); +- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios, ++ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, +@@ -1184,7 +1183,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, + (uint32_t)(-bios_size), +- bios); ++ &x86ms->bios); + return; + + bios_error: +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index a07de79167..55c6809ae0 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -53,6 +53,12 @@ struct X86MachineState { + GMappedFile *initrd_mapped_file; + HotplugHandler *acpi_dev; + ++ /* ++ * Map the whole BIOS just underneath the 4 GiB address boundary. Only used ++ * in the ROM (-bios) case. ++ */ ++ MemoryRegion bios; ++ + /* + * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address + * boundary. +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch b/SOURCES/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch new file mode 100644 index 0000000..b9c18e7 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch @@ -0,0 +1,69 @@ +From b9d0c78f04160fbc1eee6cfd94b17f1133a35d83 Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Tue, 30 Apr 2024 17:06:38 +0200 +Subject: [PATCH 037/100] hw/i386/x86: Eliminate two if statements in + x86_bios_rom_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [37/91] 1ef6a13214e85f6ef773f5c894c720f20330912b (bonzini/rhel-qemu-kvm) + +Given that memory_region_set_readonly() is a no-op when the readonlyness is +already as requested it is possible to simplify the pattern + + if (condition) { + foo(true); + } + +to + + foo(condition); + +which is shorter and allows to see the invariant of the code more easily. + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240430150643.111976-2-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 014dbdac8798799d081abc9dff3e4876ca54f49e) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 3d5b51e92d..2a4f3ee285 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1163,9 +1163,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + load_image_size(filename, ptr, bios_size); + x86_firmware_configure(ptr, bios_size); + } else { +- if (!isapc_ram_fw) { +- memory_region_set_readonly(bios, true); +- } ++ memory_region_set_readonly(bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + goto bios_error; +@@ -1182,9 +1180,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + 0x100000 - isa_bios_size, + isa_bios, + 1); +- if (!isapc_ram_fw) { +- memory_region_set_readonly(isa_bios, true); +- } ++ memory_region_set_readonly(isa_bios, !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch b/SOURCES/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch new file mode 100644 index 0000000..6ce9c72 --- /dev/null +++ b/SOURCES/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch @@ -0,0 +1,98 @@ +From 1baf67564d4227d6ba98923217a15814c438c32b Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:06 +0200 +Subject: [PATCH 042/100] hw/i386/x86: Extract x86_isa_bios_init() from + x86_bios_rom_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [42/91] 1db417a5995480924f7fd0661a306f2d2bfa0a77 (bonzini/rhel-qemu-kvm) + +The function is inspired by pc_isa_bios_init() and should eventually replace it. +Using x86_isa_bios_init() rather than pc_isa_bios_init() fixes pflash commands +to work in the isa-bios region. + +While at it convert the magic number 0x100000 (== 1MiB) to increase readability. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-6-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 5c5ffec12c30d2017cbdee6798f54d8fad3f9656) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 25 ++++++++++++++++--------- + include/hw/i386/x86.h | 2 ++ + 2 files changed, 18 insertions(+), 9 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 29167de97d..c61f4ebfa6 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1128,12 +1128,25 @@ void x86_load_linux(X86MachineState *x86ms, + nb_option_roms++; + } + ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only) ++{ ++ uint64_t bios_size = memory_region_size(bios); ++ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); ++ ++ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ bios_size - isa_bios_size, isa_bios_size); ++ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, ++ isa_bios, 1); ++ memory_region_set_readonly(isa_bios, read_only); ++} ++ + void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw) + { + const char *bios_name; + char *filename; +- int bios_size, isa_bios_size; ++ int bios_size; + ssize_t ret; + + /* BIOS load */ +@@ -1171,14 +1184,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + g_free(filename); + + /* map the last 128KB of the BIOS in ISA space */ +- isa_bios_size = MIN(bios_size, 128 * KiB); +- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios, +- bios_size - isa_bios_size, isa_bios_size); +- memory_region_add_subregion_overlap(rom_memory, +- 0x100000 - isa_bios_size, +- &x86ms->isa_bios, +- 1); +- memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw); ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index 55c6809ae0..d7b7d3f3ce 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -129,6 +129,8 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only); + void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw); + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch b/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch deleted file mode 100644 index f850765..0000000 --- a/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 84f378c41832602dcf9bad6167b1f532c7c53e37 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 21 Nov 2023 15:03:55 +0100 -Subject: [PATCH 048/101] hw/ppc/Kconfig: Imply VFIO_PCI -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [47/67] c1a40cdab9bf62b16cb428d57a20b3e0eaa6de38 (eauger1/centos-qemu-kvm) - -When the legacy and iommufd backends were introduced, a set of common -vfio-pci routines were exported in pci.c for both backends to use : - - vfio_pci_pre_reset - vfio_pci_get_pci_hot_reset_info - vfio_pci_host_match - vfio_pci_post_reset - -This introduced a build failure on PPC when --without-default-devices -is use because VFIO is always selected in ppc/Kconfig but VFIO_PCI is -not. - -Use an 'imply VFIO_PCI' in ppc/Kconfig and bypass compilation of the -VFIO EEH hooks routines defined in hw/ppc/spapr_pci_vfio.c with -CONFIG_VFIO_PCI. - -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Zhenzhong Duan -Signed-off-by: Cédric Le Goater -(cherry picked from commit 4278df9d1d2383b738338c857406357660f11e42) -Signed-off-by: Eric Auger ---- - hw/ppc/Kconfig | 2 +- - hw/ppc/spapr_pci_vfio.c | 36 ++++++++++++++++++++++++++++++++++++ - 2 files changed, 37 insertions(+), 1 deletion(-) - -diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig -index 56f0475a8e..44263a58c4 100644 ---- a/hw/ppc/Kconfig -+++ b/hw/ppc/Kconfig -@@ -3,11 +3,11 @@ config PSERIES - imply PCI_DEVICES - imply TEST_DEVICES - imply VIRTIO_VGA -+ imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c - select NVDIMM - select DIMM - select PCI - select SPAPR_VSCSI -- select VFIO if LINUX # needed by spapr_pci_vfio.c - select XICS - select XIVE - select MSI_NONBROKEN -diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c -index d1d07bec46..76b2a3487b 100644 ---- a/hw/ppc/spapr_pci_vfio.c -+++ b/hw/ppc/spapr_pci_vfio.c -@@ -26,10 +26,12 @@ - #include "hw/pci/pci_device.h" - #include "hw/vfio/vfio-common.h" - #include "qemu/error-report.h" -+#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */ - - /* - * Interfaces for IBM EEH (Enhanced Error Handling) - */ -+#ifdef CONFIG_VFIO_PCI - static bool vfio_eeh_container_ok(VFIOContainer *container) - { - /* -@@ -314,3 +316,37 @@ int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) - - return RTAS_OUT_SUCCESS; - } -+ -+#else -+ -+bool spapr_phb_eeh_available(SpaprPhbState *sphb) -+{ -+ return false; -+} -+ -+void spapr_phb_vfio_reset(DeviceState *qdev) -+{ -+} -+ -+int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, -+ unsigned int addr, int option) -+{ -+ return RTAS_OUT_NOT_SUPPORTED; -+} -+ -+int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) -+{ -+ return RTAS_OUT_NOT_SUPPORTED; -+} -+ -+int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) -+{ -+ return RTAS_OUT_NOT_SUPPORTED; -+} -+ -+int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) -+{ -+ return RTAS_OUT_NOT_SUPPORTED; -+} -+ -+#endif /* CONFIG_VFIO_PCI */ --- -2.39.3 - diff --git a/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch b/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch deleted file mode 100644 index 2c7f6ff..0000000 --- a/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 8f27893a37e55a31180bb66cd9eae7199911881b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Volker=20R=C3=BCmelin?= -Date: Fri, 29 Dec 2023 21:38:54 +0100 -Subject: [PATCH 060/101] hw/vfio: fix iteration over global VFIODevice list -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [59/67] f926e1233c8c5ad418e8794b1a103371c9dc5eb0 (eauger1/centos-qemu-kvm) - -Commit 3d779abafe ("vfio/common: Introduce a global VFIODevice list") -introduced a global VFIODevice list, but forgot to update the list -element field name when iterating over the new list. Change the code -to use the correct list element field. - -Fixes: 3d779abafe ("vfio/common: Introduce a global VFIODevice list") -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2061 -Signed-off-by: Volker Rümelin -Reviewed-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -(cherry picked from commit 9353b6da430f90e47f352dbf6dc31120c8914da6) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 0d4d8b8416..0b3352f2a9 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -73,7 +73,7 @@ bool vfio_mig_active(void) - return false; - } - -- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { -+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { - if (vbasedev->migration_blocker) { - return false; - } -@@ -94,7 +94,7 @@ static bool vfio_multiple_devices_migration_is_supported(void) - unsigned int device_num = 0; - bool all_support_p2p = true; - -- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { -+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { - if (vbasedev->migration) { - device_num++; - -@@ -1366,13 +1366,13 @@ void vfio_reset_handler(void *opaque) - { - VFIODevice *vbasedev; - -- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { -+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { - if (vbasedev->dev->realized) { - vbasedev->ops->vfio_compute_needs_reset(vbasedev); - } - } - -- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { -+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { - if (vbasedev->dev->realized && vbasedev->needs_reset) { - vbasedev->ops->vfio_hot_reset_multi(vbasedev); - } --- -2.39.3 - diff --git a/SOURCES/kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch b/SOURCES/kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch new file mode 100644 index 0000000..7b2e1b6 --- /dev/null +++ b/SOURCES/kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch @@ -0,0 +1,108 @@ +From c554f8768a18ceba173aedbd582c1cae43a41e2c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 18 Jun 2024 14:19:58 +0200 +Subject: [PATCH 1/2] hw/virtio: Fix the de-initialization of vhost-user + devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 255: hw/virtio: Fix the de-initialization of vhost-user devices +RH-Jira: RHEL-40708 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] c7815a249ec135993f45934cab1c1f2c038b80ea (thuth/qemu-kvm-cs9) + +JIRA: https://issues.redhat.com/browse/RHEL-40708 + +The unrealize functions of the various vhost-user devices are +calling the corresponding vhost_*_set_status() functions with a +status of 0 to shut down the device correctly. + +Now these vhost_*_set_status() functions all follow this scheme: + + bool should_start = virtio_device_should_start(vdev, status); + + if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { + return; + } + + if (should_start) { + /* ... do the initialization stuff ... */ + } else { + /* ... do the cleanup stuff ... */ + } + +The problem here is virtio_device_should_start(vdev, 0) currently +always returns "true" since it internally only looks at vdev->started +instead of looking at the "status" parameter. Thus once the device +got started once, virtio_device_should_start() always returns true +and thus the vhost_*_set_status() functions return early, without +ever doing any clean-up when being called with status == 0. This +causes e.g. problems when trying to hot-plug and hot-unplug a vhost +user devices multiple times since the de-initialization step is +completely skipped during the unplug operation. + +This bug has been introduced in commit 9f6bcfd99f ("hw/virtio: move +vm_running check to virtio_device_started") which replaced + + should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + +with + + should_start = virtio_device_started(vdev, status); + +which later got replaced by virtio_device_should_start(). This blocked +the possibility to set should_start to false in case the status flag +VIRTIO_CONFIG_S_DRIVER_OK was not set. + +Fix it by adjusting the virtio_device_should_start() function to +only consider the status flag instead of vdev->started. Since this +function is only used in the various vhost_*_set_status() functions +for exactly the same purpose, it should be fine to fix it in this +central place there without any risk to change the behavior of other +code. + +Fixes: 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started") +Buglink: https://issues.redhat.com/browse/RHEL-40708 +Signed-off-by: Thomas Huth +Message-Id: <20240618121958.88673-1-thuth@redhat.com> +Reviewed-by: Manos Pitsidianakis +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d72479b11797c28893e1e3fc565497a9cae5ca16) +Signed-off-by: Thomas Huth +--- + include/hw/virtio/virtio.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 7d5ffdc145..2eafad17b8 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -470,9 +470,9 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status) + * @vdev - the VirtIO device + * @status - the devices status bits + * +- * This is similar to virtio_device_started() but also encapsulates a +- * check on the VM status which would prevent a device starting +- * anyway. ++ * This is similar to virtio_device_started() but ignores vdev->started ++ * and also encapsulates a check on the VM status which would prevent a ++ * device from starting anyway. + */ + static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status) + { +@@ -480,7 +480,7 @@ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status + return false; + } + +- return virtio_device_started(vdev, status); ++ return status & VIRTIO_CONFIG_S_DRIVER_OK; + } + + static inline void virtio_set_started(VirtIODevice *vdev, bool started) +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch b/SOURCES/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch new file mode 100644 index 0000000..8f69f9e --- /dev/null +++ b/SOURCES/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch @@ -0,0 +1,68 @@ +From f572a40924c7138072e387111d0f092185972477 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:39 +0200 +Subject: [PATCH 044/100] i386: correctly select code in hw/i386 that depends + on other components + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [44/91] 1327a5eb2b91edacf56cc4e93255cad456abbbeb (bonzini/rhel-qemu-kvm) + +fw_cfg.c and vapic.c are currently included unconditionally but +depend on other components. vapic.c depends on the local APIC, +while fw_cfg.c includes a piece of AML builder code that depends +on CONFIG_ACPI. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-9-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7974e51342775c87f6e759a8c525db1045ddfa24) +Signed-off-by: Paolo Bonzini +--- + hw/i386/fw_cfg.c | 2 ++ + hw/i386/meson.build | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 283c3f4c16..7f97d40616 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -204,6 +204,7 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg) + fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); + } + ++#ifdef CONFIG_ACPI + void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) + { + /* +@@ -230,3 +231,4 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } ++#endif +diff --git a/hw/i386/meson.build b/hw/i386/meson.build +index d8b70ef3e9..d9da676038 100644 +--- a/hw/i386/meson.build ++++ b/hw/i386/meson.build +@@ -1,12 +1,12 @@ + i386_ss = ss.source_set() + i386_ss.add(files( + 'fw_cfg.c', +- 'vapic.c', + 'e820_memory_layout.c', + 'multiboot.c', + 'x86.c', + )) + ++i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c')) + i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), + if_false: files('x86-iommu-stub.c')) + i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch b/SOURCES/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch new file mode 100644 index 0000000..31a7e92 --- /dev/null +++ b/SOURCES/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch @@ -0,0 +1,40 @@ +From 127f3c60668e1bd08ec00856a317cb841adf0440 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:23 -0500 +Subject: [PATCH 063/100] i386/cpu: Set SEV-SNP CPUID bit when SNP enabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [63/91] 0f834a6897c5cdc0e29a5b1862e621f8ce309657 (bonzini/rhel-qemu-kvm) + +SNP guests will rely on this bit to determine certain feature support. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-12-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7831221941cccbde922412c1550ed8b4bce7c361) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 489c853b42..13737cd703 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6822,6 +6822,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; ++ *eax |= sev_snp_enabled() ? 0x10 : 0; + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch b/SOURCES/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch new file mode 100644 index 0000000..fd604d2 --- /dev/null +++ b/SOURCES/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch @@ -0,0 +1,145 @@ +From 14aa42bbacde75b2ce9a59d1267f73d613026461 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:42 -0500 +Subject: [PATCH 076/100] i386/kvm: Add KVM_EXIT_HYPERCALL handling for + KVM_HC_MAP_GPA_RANGE + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [76/91] 3e1201c330dc826af1ec4650974d47053270eb16 (bonzini/rhel-qemu-kvm) + +KVM_HC_MAP_GPA_RANGE will be used to send requests to userspace for +private/shared memory attribute updates requested by the guest. +Implement handling for that use-case along with some basic +infrastructure for enabling specific hypercall events. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-31-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 47e76d03b155e43beca550251a6eb7ea926c059f) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 55 ++++++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 1 + + target/i386/kvm/trace-events | 1 + + 3 files changed, 57 insertions(+) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 75e75d9772..2935e3931a 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -21,6 +21,7 @@ + #include + + #include ++#include + #include "standard-headers/asm-x86/kvm_para.h" + #include "hw/xen/interface/arch-x86/cpuid.h" + +@@ -208,6 +209,13 @@ int kvm_get_vm_type(MachineState *ms) + return kvm_type; + } + ++bool kvm_enable_hypercall(uint64_t enable_mask) ++{ ++ KVMState *s = KVM_STATE(current_accel()); ++ ++ return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask); ++} ++ + bool kvm_has_smm(void) + { + return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); +@@ -5325,6 +5333,50 @@ static bool host_supports_vmx(void) + return ecx & CPUID_EXT_VMX; + } + ++/* ++ * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE ++ * to service guest-initiated memory attribute update requests so that ++ * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be ++ * backed by the private memory pool provided by guest_memfd, and as such ++ * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX). ++ * ++ * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live ++ * migration, are not implemented here currently. ++ * ++ * For the guest_memfd use-case, these exits will generally be synthesized ++ * by KVM based on platform-specific hypercalls, like GHCB requests in the ++ * case of SEV-SNP, and not issued directly within the guest though the ++ * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is ++ * not actually advertised to guests via the KVM CPUID feature bit, as ++ * opposed to SEV live migration where it would be. Since it is unlikely the ++ * SEV live migration use-case would be useful for guest-memfd backed guests, ++ * because private/shared page tracking is already provided through other ++ * means, these 2 use-cases should be treated as being mutually-exclusive. ++ */ ++static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) ++{ ++ uint64_t gpa, size, attributes; ++ ++ if (!machine_require_guest_memfd(current_machine)) ++ return -EINVAL; ++ ++ gpa = run->hypercall.args[0]; ++ size = run->hypercall.args[1] * TARGET_PAGE_SIZE; ++ attributes = run->hypercall.args[2]; ++ ++ trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); ++ ++ return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); ++} ++ ++static int kvm_handle_hypercall(struct kvm_run *run) ++{ ++ if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) ++ return kvm_handle_hc_map_gpa_range(run); ++ ++ return -EINVAL; ++} ++ + #define VMX_INVALID_GUEST_STATE 0x80000021 + + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) +@@ -5420,6 +5472,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + ret = kvm_xen_handle_exit(cpu, &run->xen); + break; + #endif ++ case KVM_EXIT_HYPERCALL: ++ ret = kvm_handle_hypercall(run); ++ break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index 6b44844d95..34fc60774b 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -33,6 +33,7 @@ + bool kvm_has_smm(void); + bool kvm_enable_x2apic(void); + bool kvm_hv_vpindex_settable(void); ++bool kvm_enable_hypercall(uint64_t enable_mask); + + bool kvm_enable_sgx_provisioning(KVMState *s); + bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); +diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events +index b365a8e8e2..74a6234ff7 100644 +--- a/target/i386/kvm/trace-events ++++ b/target/i386/kvm/trace-events +@@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" + kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" + kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" + kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" ++kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64 + + # xen-emu.c + kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch b/SOURCES/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch new file mode 100644 index 0000000..4b91e93 --- /dev/null +++ b/SOURCES/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch @@ -0,0 +1,536 @@ +From 5ead79f45e8e90b7a04586c89e70cb9d0b66b730 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 29 Feb 2024 01:36:43 -0500 +Subject: [PATCH 004/100] i386/kvm: Move architectural CPUID leaf generation to + separate helper + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [4/91] 06ecdbcf05ad3d658273980b114f02477d0b0475 (bonzini/rhel-qemu-kvm) + +Move the architectural (for lack of a better term) CPUID leaf generation +to a separate helper so that the generation code can be reused by TDX, +which needs to generate a canonical VM-scoped configuration. + +For now this is just a cleanup, so keep the function static. + +Signed-off-by: Sean Christopherson +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-23-xiaoyao.li@intel.com> +Reviewed-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit a5acf4f26c208a05d05ef1bde65553ce2ab5e5d0) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 417 +++++++++++++++++++++--------------------- + 1 file changed, 211 insertions(+), 206 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 739f33db47..5f30b649a0 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1706,195 +1706,22 @@ static void kvm_init_nested_state(CPUX86State *env) + } + } + +-int kvm_arch_init_vcpu(CPUState *cs) ++static uint32_t kvm_x86_build_cpuid(CPUX86State *env, ++ struct kvm_cpuid_entry2 *entries, ++ uint32_t cpuid_i) + { +- struct { +- struct kvm_cpuid2 cpuid; +- struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +- } cpuid_data; +- /* +- * The kernel defines these structs with padding fields so there +- * should be no extra padding in our cpuid_data struct. +- */ +- QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != +- sizeof(struct kvm_cpuid2) + +- sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); +- +- X86CPU *cpu = X86_CPU(cs); +- CPUX86State *env = &cpu->env; +- uint32_t limit, i, j, cpuid_i; ++ uint32_t limit, i, j; + uint32_t unused; + struct kvm_cpuid_entry2 *c; +- uint32_t signature[3]; +- int kvm_base = KVM_CPUID_SIGNATURE; +- int max_nested_state_len; +- int r; +- Error *local_err = NULL; +- +- memset(&cpuid_data, 0, sizeof(cpuid_data)); +- +- cpuid_i = 0; +- +- has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); +- +- r = kvm_arch_set_tsc_khz(cs); +- if (r < 0) { +- return r; +- } +- +- /* vcpu's TSC frequency is either specified by user, or following +- * the value used by KVM if the former is not present. In the +- * latter case, we query it from KVM and record in env->tsc_khz, +- * so that vcpu's TSC frequency can be migrated later via this field. +- */ +- if (!env->tsc_khz) { +- r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? +- kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : +- -ENOTSUP; +- if (r > 0) { +- env->tsc_khz = r; +- } +- } +- +- env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; +- +- /* +- * kvm_hyperv_expand_features() is called here for the second time in case +- * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle +- * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to +- * check which Hyper-V enlightenments are supported and which are not, we +- * can still proceed and check/expand Hyper-V enlightenments here so legacy +- * behavior is preserved. +- */ +- if (!kvm_hyperv_expand_features(cpu, &local_err)) { +- error_report_err(local_err); +- return -ENOSYS; +- } +- +- if (hyperv_enabled(cpu)) { +- r = hyperv_init_vcpu(cpu); +- if (r) { +- return r; +- } +- +- cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); +- kvm_base = KVM_CPUID_SIGNATURE_NEXT; +- has_msr_hv_hypercall = true; +- } +- +- if (cs->kvm_state->xen_version) { +-#ifdef CONFIG_XEN_EMU +- struct kvm_cpuid_entry2 *xen_max_leaf; +- +- memcpy(signature, "XenVMMXenVMM", 12); +- +- xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_SIGNATURE; +- c->eax = kvm_base + XEN_CPUID_TIME; +- c->ebx = signature[0]; +- c->ecx = signature[1]; +- c->edx = signature[2]; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_VENDOR; +- c->eax = cs->kvm_state->xen_version; +- c->ebx = 0; +- c->ecx = 0; +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_HVM_MSR; +- /* Number of hypercall-transfer pages */ +- c->eax = 1; +- /* Hypercall MSR base address */ +- if (hyperv_enabled(cpu)) { +- c->ebx = XEN_HYPERCALL_MSR_HYPERV; +- kvm_xen_init(cs->kvm_state, c->ebx); +- } else { +- c->ebx = XEN_HYPERCALL_MSR; +- } +- c->ecx = 0; +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_TIME; +- c->eax = ((!!tsc_is_stable_and_known(env) << 1) | +- (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); +- /* default=0 (emulate if necessary) */ +- c->ebx = 0; +- /* guest tsc frequency */ +- c->ecx = env->user_tsc_khz; +- /* guest tsc incarnation (migration count) */ +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_HVM; +- xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { +- c->function = kvm_base + XEN_CPUID_HVM; +- +- if (cpu->xen_vapic) { +- c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; +- c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; +- } +- +- c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; +- +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { +- c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; +- c->ebx = cs->cpu_index; +- } +- +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { +- c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; +- } +- } +- +- r = kvm_xen_init_vcpu(cs); +- if (r) { +- return r; +- } +- +- kvm_base += 0x100; +-#else /* CONFIG_XEN_EMU */ +- /* This should never happen as kvm_arch_init() would have died first. */ +- fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); +- abort(); +-#endif +- } else if (cpu->expose_kvm) { +- memcpy(signature, "KVMKVMKVM\0\0\0", 12); +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = KVM_CPUID_SIGNATURE | kvm_base; +- c->eax = KVM_CPUID_FEATURES | kvm_base; +- c->ebx = signature[0]; +- c->ecx = signature[1]; +- c->edx = signature[2]; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = KVM_CPUID_FEATURES | kvm_base; +- c->eax = env->features[FEAT_KVM]; +- c->edx = env->features[FEAT_KVM_HINTS]; +- } + + cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); + +- if (cpu->kvm_pv_enforce_cpuid) { +- r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); +- if (r < 0) { +- fprintf(stderr, +- "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", +- strerror(-r)); +- abort(); +- } +- } +- + for (i = 0; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported level value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; +- ++ c = &entries[cpuid_i++]; + switch (i) { + case 2: { + /* Keep reading function 2 till all the input is received */ +@@ -1908,11 +1735,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + + for (j = 1; j < times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:2):eax & 0xf = 0x%x\n", times); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + c->function = i; + c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); +@@ -1951,11 +1776,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + continue; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + case 0x12: +@@ -1970,11 +1793,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x12,ecx:0x%x)\n", j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + case 0x7: +@@ -1991,11 +1812,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + + for (j = 1; j <= times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + c->function = i; + c->index = j; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; +@@ -2048,11 +1867,11 @@ int kvm_arch_init_vcpu(CPUState *cs) + cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0x80000000; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + + switch (i) { + case 0x8000001d: +@@ -2067,11 +1886,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + break; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + default: +@@ -2094,11 +1911,11 @@ int kvm_arch_init_vcpu(CPUState *cs) + cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0xC0000000; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + + c->function = i; + c->flags = 0; +@@ -2106,6 +1923,194 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + } + ++ return cpuid_i; ++ ++full: ++ fprintf(stderr, "cpuid_data is full, no space for " ++ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); ++ abort(); ++} ++ ++int kvm_arch_init_vcpu(CPUState *cs) ++{ ++ struct { ++ struct kvm_cpuid2 cpuid; ++ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; ++ } cpuid_data; ++ /* ++ * The kernel defines these structs with padding fields so there ++ * should be no extra padding in our cpuid_data struct. ++ */ ++ QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != ++ sizeof(struct kvm_cpuid2) + ++ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); ++ ++ X86CPU *cpu = X86_CPU(cs); ++ CPUX86State *env = &cpu->env; ++ uint32_t cpuid_i; ++ struct kvm_cpuid_entry2 *c; ++ uint32_t signature[3]; ++ int kvm_base = KVM_CPUID_SIGNATURE; ++ int max_nested_state_len; ++ int r; ++ Error *local_err = NULL; ++ ++ memset(&cpuid_data, 0, sizeof(cpuid_data)); ++ ++ cpuid_i = 0; ++ ++ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); ++ ++ r = kvm_arch_set_tsc_khz(cs); ++ if (r < 0) { ++ return r; ++ } ++ ++ /* vcpu's TSC frequency is either specified by user, or following ++ * the value used by KVM if the former is not present. In the ++ * latter case, we query it from KVM and record in env->tsc_khz, ++ * so that vcpu's TSC frequency can be migrated later via this field. ++ */ ++ if (!env->tsc_khz) { ++ r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? ++ kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : ++ -ENOTSUP; ++ if (r > 0) { ++ env->tsc_khz = r; ++ } ++ } ++ ++ env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; ++ ++ /* ++ * kvm_hyperv_expand_features() is called here for the second time in case ++ * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle ++ * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to ++ * check which Hyper-V enlightenments are supported and which are not, we ++ * can still proceed and check/expand Hyper-V enlightenments here so legacy ++ * behavior is preserved. ++ */ ++ if (!kvm_hyperv_expand_features(cpu, &local_err)) { ++ error_report_err(local_err); ++ return -ENOSYS; ++ } ++ ++ if (hyperv_enabled(cpu)) { ++ r = hyperv_init_vcpu(cpu); ++ if (r) { ++ return r; ++ } ++ ++ cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); ++ kvm_base = KVM_CPUID_SIGNATURE_NEXT; ++ has_msr_hv_hypercall = true; ++ } ++ ++ if (cs->kvm_state->xen_version) { ++#ifdef CONFIG_XEN_EMU ++ struct kvm_cpuid_entry2 *xen_max_leaf; ++ ++ memcpy(signature, "XenVMMXenVMM", 12); ++ ++ xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_SIGNATURE; ++ c->eax = kvm_base + XEN_CPUID_TIME; ++ c->ebx = signature[0]; ++ c->ecx = signature[1]; ++ c->edx = signature[2]; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_VENDOR; ++ c->eax = cs->kvm_state->xen_version; ++ c->ebx = 0; ++ c->ecx = 0; ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_HVM_MSR; ++ /* Number of hypercall-transfer pages */ ++ c->eax = 1; ++ /* Hypercall MSR base address */ ++ if (hyperv_enabled(cpu)) { ++ c->ebx = XEN_HYPERCALL_MSR_HYPERV; ++ kvm_xen_init(cs->kvm_state, c->ebx); ++ } else { ++ c->ebx = XEN_HYPERCALL_MSR; ++ } ++ c->ecx = 0; ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_TIME; ++ c->eax = ((!!tsc_is_stable_and_known(env) << 1) | ++ (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); ++ /* default=0 (emulate if necessary) */ ++ c->ebx = 0; ++ /* guest tsc frequency */ ++ c->ecx = env->user_tsc_khz; ++ /* guest tsc incarnation (migration count) */ ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_HVM; ++ xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { ++ c->function = kvm_base + XEN_CPUID_HVM; ++ ++ if (cpu->xen_vapic) { ++ c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; ++ c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; ++ } ++ ++ c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; ++ ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { ++ c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; ++ c->ebx = cs->cpu_index; ++ } ++ ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { ++ c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; ++ } ++ } ++ ++ r = kvm_xen_init_vcpu(cs); ++ if (r) { ++ return r; ++ } ++ ++ kvm_base += 0x100; ++#else /* CONFIG_XEN_EMU */ ++ /* This should never happen as kvm_arch_init() would have died first. */ ++ fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); ++ abort(); ++#endif ++ } else if (cpu->expose_kvm) { ++ memcpy(signature, "KVMKVMKVM\0\0\0", 12); ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = KVM_CPUID_SIGNATURE | kvm_base; ++ c->eax = KVM_CPUID_FEATURES | kvm_base; ++ c->ebx = signature[0]; ++ c->ecx = signature[1]; ++ c->edx = signature[2]; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = KVM_CPUID_FEATURES | kvm_base; ++ c->eax = env->features[FEAT_KVM]; ++ c->edx = env->features[FEAT_KVM_HINTS]; ++ } ++ ++ if (cpu->kvm_pv_enforce_cpuid) { ++ r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); ++ if (r < 0) { ++ fprintf(stderr, ++ "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", ++ strerror(-r)); ++ abort(); ++ } ++ } ++ ++ cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); + cpuid_data.cpuid.nent = cpuid_i; + + if (((env->cpuid_version >> 8)&0xF) >= 6 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch b/SOURCES/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch new file mode 100644 index 0000000..65d09ab --- /dev/null +++ b/SOURCES/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch @@ -0,0 +1,91 @@ +From 03e275023b482ac79b4f92ca4ceef6de3caa634f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:40 +0200 +Subject: [PATCH 045/100] i386: pc: remove unnecessary MachineClass overrides + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [45/91] c03d5b57014d0d02f6ce0cdfb19a34996d100dea (bonzini/rhel-qemu-kvm) + +There is no need to override these fields of MachineClass because they are +already set to the right value in the superclass. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-10-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b348fdcdac9f9fc70be9ae56c54e41765e9aae24) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 3 --- + hw/i386/x86.c | 6 +++--- + include/hw/i386/x86.h | 4 ---- + 3 files changed, 3 insertions(+), 10 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 660a59c63b..0aca0cc79e 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1979,9 +1979,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; +- mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +- mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; +- mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; + mc->has_hotpluggable_cpus = true; +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index c61f4ebfa6..fcef652c1e 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -443,7 +443,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + numa_cpu_pre_plug(cpu_slot, dev, errp); + } + +-CpuInstanceProperties ++static CpuInstanceProperties + x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +@@ -453,7 +453,7 @@ x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + return possible_cpus->cpus[cpu_index].props; + } + +-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) ++static int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) + { + X86CPUTopoIDs topo_ids; + X86MachineState *x86ms = X86_MACHINE(ms); +@@ -467,7 +467,7 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) + return topo_ids.pkg_id % ms->numa_state->num_nodes; + } + +-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) ++static const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) + { + X86MachineState *x86ms = X86_MACHINE(ms); + unsigned int max_cpus = ms->smp.max_cpus; +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index d7b7d3f3ce..c2062db13f 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -114,10 +114,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, + + void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); + void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); +-CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, +- unsigned cpu_index); +-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); +-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); + CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); + void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); + void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch b/SOURCES/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch new file mode 100644 index 0000000..fce51aa --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch @@ -0,0 +1,116 @@ +From 652793962000d6906e219ceae36348a476b78c28 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 12:44:44 +0200 +Subject: [PATCH 065/100] i386/sev: Add a class method to determine KVM VM type + for SNP guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [65/91] c6cbeac0a6f691138df212b80efaa9b1143fdaa8 (bonzini/rhel-qemu-kvm) + +SEV guests can use either KVM_X86_DEFAULT_VM, KVM_X86_SEV_VM, +or KVM_X86_SEV_ES_VM depending on the configuration and what +the host kernel supports. SNP guests on the other hand can only +ever use KVM_X86_SNP_VM, so split determination of VM type out +into a separate class method that can be set accordingly for +sev-guest vs. sev-snp-guest objects and add handling for SNP. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-14-pankaj.gupta@amd.com> +[Remove unnecessary function pointer declaration. - Paolo] +Signed-off-by: Paolo Bonzini +(cherry picked from commit a808132f6d8e855bd83a400570ec91d2e00bebe3) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 1 + + target/i386/sev.c | 15 ++++++++++++--- + 2 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 408568d053..75e75d9772 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -166,6 +166,7 @@ static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", + [KVM_X86_SEV_VM] = "SEV", + [KVM_X86_SEV_ES_VM] = "SEV-ES", ++ [KVM_X86_SNP_VM] = "SEV-SNP", + }; + + bool kvm_is_vm_type_supported(int type) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c3daaf1ad5..072cc4f853 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -885,6 +885,11 @@ out: + return sev_common->kvm_type; + } + ++static int sev_snp_kvm_type(X86ConfidentialGuest *cg) ++{ ++ return KVM_X86_SNP_VM; ++} ++ + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + char *devname; +@@ -894,6 +899,8 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + struct sev_user_data_status status = {}; + SevCommonState *sev_common = SEV_COMMON(cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); ++ X86ConfidentialGuestClass *x86_klass = ++ X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); + + sev_common->state = SEV_STATE_UNINIT; + +@@ -964,7 +971,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { ++ if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); +@@ -1441,10 +1448,8 @@ static void + sev_common_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); +- X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_common_kvm_init; +- x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", + sev_common_get_sev_device, +@@ -1529,10 +1534,12 @@ static void + sev_guest_class_init(ObjectClass *oc, void *data) + { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; + klass->kvm_init = sev_kvm_init; ++ x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +@@ -1770,8 +1777,10 @@ static void + sev_snp_guest_class_init(ObjectClass *oc, void *data) + { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_snp_kvm_init; ++ x86_klass->kvm_type = sev_snp_kvm_type; + + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch b/SOURCES/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch new file mode 100644 index 0000000..d194994 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch @@ -0,0 +1,84 @@ +From 82a714b79851b5c2d1389d2fa7a01548c486a854 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:20 -0500 +Subject: [PATCH 060/100] i386/sev: Add a sev_snp_enabled() helper + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [60/91] c35ead095028ccfb1e1be0fe010ca4f7688530a0 (bonzini/rhel-qemu-kvm) + +Add a simple helper to check if the current guest type is SNP. Also have +SNP-enabled imply that SEV-ES is enabled as well, and fix up any places +where the sev_es_enabled() check is expecting a pure/non-SNP guest. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-9-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 99190f805dca9475fe244fbd8041961842657dc2) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 13 ++++++++++++- + target/i386/sev.h | 2 ++ + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index a81b3228d4..4edfedc139 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -325,12 +325,21 @@ sev_enabled(void) + return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); + } + ++bool ++sev_snp_enabled(void) ++{ ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST); ++} ++ + bool + sev_es_enabled(void) + { + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + +- return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); ++ return sev_snp_enabled() || ++ (sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES); + } + + uint32_t +@@ -946,7 +955,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + "support", __func__); + goto err; + } ++ } + ++ if (sev_es_enabled() && !sev_snp_enabled()) { + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { + error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", +diff --git a/target/i386/sev.h b/target/i386/sev.h +index bedc667eeb..94295ee74f 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -45,9 +45,11 @@ typedef struct SevKernelLoaderContext { + #ifdef CONFIG_SEV + bool sev_enabled(void); + bool sev_es_enabled(void); ++bool sev_snp_enabled(void); + #else + #define sev_enabled() 0 + #define sev_es_enabled() 0 ++#define sev_snp_enabled() 0 + #endif + + uint32_t sev_get_cbit_position(void); +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch b/SOURCES/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch new file mode 100644 index 0000000..2bab2ac --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch @@ -0,0 +1,187 @@ +From 0e435819540b0d39da2c828aacc0f35ecaadbdf6 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:28 -0500 +Subject: [PATCH 068/100] i386/sev: Add handling to encrypt/finalize guest + launch data + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [68/91] fe77931d279aa8df061823da88a320fb5f72ffea (bonzini/rhel-qemu-kvm) + +Process any queued up launch data and encrypt/measure it into the SNP +guest instance prior to initial guest launch. + +This also updates the KVM_SEV_SNP_LAUNCH_UPDATE call to handle partial +update responses. + +Signed-off-by: Brijesh Singh +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-17-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9f3a6999f9730a694d7db448a99f9c9cb6515992) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 112 ++++++++++++++++++++++++++++++++++++++- + target/i386/trace-events | 2 + + 2 files changed, 113 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index e89b87d2f5..ef2e592ca7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -756,6 +756,76 @@ out: + return ret; + } + ++static const char * ++snp_page_type_to_str(int type) ++{ ++ switch (type) { ++ case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal"; ++ case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero"; ++ case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured"; ++ case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets"; ++ case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid"; ++ default: return "unknown"; ++ } ++} ++ ++static int ++sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, ++ SevLaunchUpdateData *data) ++{ ++ int ret, fw_error; ++ struct kvm_sev_snp_launch_update update = {0}; ++ ++ if (!data->hva || !data->len) { ++ error_report("SNP_LAUNCH_UPDATE called with invalid address" ++ "/ length: %p / %lx", ++ data->hva, data->len); ++ return 1; ++ } ++ ++ update.uaddr = (__u64)(unsigned long)data->hva; ++ update.gfn_start = data->gpa >> TARGET_PAGE_BITS; ++ update.len = data->len; ++ update.type = data->type; ++ ++ /* ++ * KVM_SEV_SNP_LAUNCH_UPDATE requires that GPA ranges have the private ++ * memory attribute set in advance. ++ */ ++ ret = kvm_set_memory_attributes_private(data->gpa, data->len); ++ if (ret) { ++ error_report("SEV-SNP: failed to configure initial" ++ "private guest memory"); ++ goto out; ++ } ++ ++ while (update.len || ret == -EAGAIN) { ++ trace_kvm_sev_snp_launch_update(update.uaddr, update.gfn_start << ++ TARGET_PAGE_BITS, update.len, ++ snp_page_type_to_str(update.type)); ++ ++ ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd, ++ KVM_SEV_SNP_LAUNCH_UPDATE, ++ &update, &fw_error); ++ if (ret && ret != -EAGAIN) { ++ error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", ++ ret, fw_error, fw_error_to_str(fw_error)); ++ break; ++ } ++ } ++ ++out: ++ if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) { ++ error_report("SEV-SNP: expected update of GPA range %lx-%lx," ++ "got GPA range %lx-%llx", ++ data->gpa, data->gpa + data->len, data->gpa, ++ update.gfn_start << TARGET_PAGE_BITS); ++ ret = -EIO; ++ } ++ ++ return ret; ++} ++ + static int + sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + { +@@ -901,6 +971,46 @@ sev_launch_finish(SevCommonState *sev_common) + migrate_add_blocker(&sev_mig_blocker, &error_fatal); + } + ++static void ++sev_snp_launch_finish(SevCommonState *sev_common) ++{ ++ int ret, error; ++ Error *local_err = NULL; ++ SevLaunchUpdateData *data; ++ SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; ++ ++ QTAILQ_FOREACH(data, &launch_update, next) { ++ ret = sev_snp_launch_update(sev_snp, data); ++ if (ret) { ++ exit(1); ++ } ++ } ++ ++ trace_kvm_sev_snp_launch_finish(sev_snp->id_block, sev_snp->id_auth, ++ sev_snp->host_data); ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH, ++ finish, &error); ++ if (ret) { ++ error_report("SNP_LAUNCH_FINISH ret=%d fw_error=%d '%s'", ++ ret, error, fw_error_to_str(error)); ++ exit(1); ++ } ++ ++ sev_set_guest_state(sev_common, SEV_STATE_RUNNING); ++ ++ /* add migration blocker */ ++ error_setg(&sev_mig_blocker, ++ "SEV-SNP: Migration is not implemented"); ++ ret = migrate_add_blocker(&sev_mig_blocker, &local_err); ++ if (local_err) { ++ error_report_err(local_err); ++ error_free(sev_mig_blocker); ++ exit(1); ++ } ++} ++ ++ + static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { +@@ -1832,10 +1942,10 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->launch_start = sev_snp_launch_start; ++ klass->launch_finish = sev_snp_launch_finish; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + +- + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +diff --git a/target/i386/trace-events b/target/i386/trace-events +index cb26d8a925..06b44ead2e 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -12,3 +12,5 @@ kvm_sev_launch_finish(void) "" + kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" + kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" + kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" ++kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)" ++kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s" +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch b/SOURCES/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch new file mode 100644 index 0000000..572dddc --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch @@ -0,0 +1,127 @@ +From 2872c423fa44dcbf50b581a5c3feac064a0473a0 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Apr 2024 18:07:41 -0500 +Subject: [PATCH 024/100] i386/sev: Add 'legacy-vm-type' parameter for SEV + guest objects + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [24/91] ce35d1b09fe8aa8772ff149543f7760455c1e6b5 (bonzini/rhel-qemu-kvm) + +QEMU will currently automatically make use of the KVM_SEV_INIT2 API for +initializing SEV and SEV-ES guests verses the older +KVM_SEV_INIT/KVM_SEV_ES_INIT interfaces. + +However, the older interfaces will silently avoid sync'ing FPU/XSAVE +state to the VMSA prior to encryption, thus relying on behavior and +measurements that assume the related fields to be allow zero. + +With KVM_SEV_INIT2, this state is now synced into the VMSA, resulting in +measurements changes and, theoretically, behaviorial changes, though the +latter are unlikely to be seen in practice. + +To allow a smooth transition to the newer interface, while still +providing a mechanism to maintain backward compatibility with VMs +created using the older interfaces, provide a new command-line +parameter: + + -object sev-guest,legacy-vm-type=true,... + +and have it default to false. + +Signed-off-by: Michael Roth +Message-ID: <20240409230743.962513-2-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 023267334da375226720e62963df9545aa8fc2fd) +Signed-off-by: Paolo Bonzini +--- + qapi/qom.json | 11 ++++++++++- + target/i386/sev.c | 18 +++++++++++++++++- + 2 files changed, 27 insertions(+), 2 deletions(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 85e6b4f84a..38dde6d785 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -898,6 +898,14 @@ + # designated guest firmware page for measured boot with -kernel + # (default: false) (since 6.2) + # ++# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. ++# The newer KVM_SEV_INIT2 interface syncs additional vCPU ++# state when initializing the VMSA structures, which will ++# result in a different guest measurement. Set this to ++# maintain compatibility with older QEMU or kernel versions ++# that rely on legacy KVM_SEV_INIT behavior. ++# (default: false) (since 9.1) ++# + # Since: 2.12 + ## + { 'struct': 'SevGuestProperties', +@@ -908,7 +916,8 @@ + '*handle': 'uint32', + '*cbitpos': 'uint32', + 'reduced-phys-bits': 'uint32', +- '*kernel-hashes': 'bool' } } ++ '*kernel-hashes': 'bool', ++ '*legacy-vm-type': 'bool' } } + + ## + # @ThreadContextProperties: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 9dab4060b8..f4ee317cb0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -67,6 +67,7 @@ struct SevGuestState { + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; ++ bool legacy_vm_type; + + /* runtime state */ + uint32_t handle; +@@ -356,6 +357,16 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) + sev->kernel_hashes = value; + } + ++static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++{ ++ return SEV_GUEST(obj)->legacy_vm_type; ++} ++ ++static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++{ ++ SEV_GUEST(obj)->legacy_vm_type = value; ++} ++ + bool + sev_enabled(void) + { +@@ -863,7 +874,7 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) + } + + kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type)) { ++ if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { + sev->kvm_type = kvm_type; + } else { + sev->kvm_type = KVM_X86_DEFAULT_VM; +@@ -1381,6 +1392,11 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); ++ object_class_property_add_bool(oc, "legacy-vm-type", ++ sev_guest_get_legacy_vm_type, ++ sev_guest_set_legacy_vm_type); ++ object_class_property_set_description(oc, "legacy-vm-type", ++ "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); + } + + static void +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch b/SOURCES/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch new file mode 100644 index 0000000..ca1338c --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch @@ -0,0 +1,203 @@ +From a236548a903aa8350fff9601d481b2f529c8d4a7 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:21 -0500 +Subject: [PATCH 061/100] i386/sev: Add sev_kvm_init() override for SEV class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [61/91] b24fcbc8712e7394e029312229da023c63803969 (bonzini/rhel-qemu-kvm) + +Some aspects of the init routine SEV are specific to SEV and not +applicable for SNP guests, so move the SEV-specific bits into +separate class method and retain only the common functionality. + +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-10-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 990da8d243a8c59dafcbed78b56a0e4ffb1605d9) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 72 +++++++++++++++++++++++++++++++++-------------- + 1 file changed, 51 insertions(+), 21 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 4edfedc139..5519de1c6b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -73,6 +73,7 @@ struct SevCommonStateClass { + /* public */ + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); ++ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + }; + + /** +@@ -882,7 +883,7 @@ out: + return sev_common->kvm_type; + } + +-static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; +@@ -892,12 +893,6 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + struct sev_user_data_status status = {}; + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + +- ret = ram_block_discard_disable(true); +- if (ret) { +- error_report("%s: cannot disable RAM discard", __func__); +- return -1; +- } +- + sev_common->state = SEV_STATE_UNINIT; + + host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); +@@ -911,7 +906,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (host_cbitpos != sev_common->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", + __func__, host_cbitpos, sev_common->cbitpos); +- goto err; ++ return -1; + } + + /* +@@ -924,7 +919,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: reduced_phys_bits check failed," + " it should be in the range of 1 to 63, requested '%d'", + __func__, sev_common->reduced_phys_bits); +- goto err; ++ return -1; + } + + devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); +@@ -933,7 +928,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: Failed to open %s '%s'", __func__, + devname, strerror(errno)); + g_free(devname); +- goto err; ++ return -1; + } + g_free(devname); + +@@ -943,7 +938,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: failed to get platform status ret=%d " + "fw_error='%d: %s'", __func__, ret, fw_error, + fw_error_to_str(fw_error)); +- goto err; ++ return -1; + } + sev_common->build_id = status.build; + sev_common->api_major = status.api_major; +@@ -953,7 +948,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (!kvm_kernel_irqchip_allowed()) { + error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" + "support", __func__); +- goto err; ++ return -1; + } + } + +@@ -962,7 +957,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", + __func__); +- goto err; ++ return -1; + } + } + +@@ -980,25 +975,59 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (ret) { + error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +- goto err; ++ return -1; + } + + ret = klass->launch_start(sev_common); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); +- goto err; ++ return -1; ++ } ++ ++ if (klass->kvm_init && klass->kvm_init(cgs, errp)) { ++ return -1; + } + +- ram_block_notifier_add(&sev_ram_notifier); +- qemu_add_machine_init_done_notifier(&sev_machine_done_notify); + qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); + + cgs->ready = true; + + return 0; +-err: +- ram_block_discard_disable(false); +- return -1; ++} ++ ++static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ int ret; ++ ++ /* ++ * SEV/SEV-ES rely on pinned memory to back guest RAM so discarding ++ * isn't actually possible. With SNP, only guest_memfd pages are used ++ * for private guest memory, so discarding of shared memory is still ++ * possible.. ++ */ ++ ret = ram_block_discard_disable(true); ++ if (ret) { ++ error_setg(errp, "%s: cannot disable RAM discard", __func__); ++ return -1; ++ } ++ ++ /* ++ * SEV uses these notifiers to register/pin pages prior to guest use, ++ * but SNP relies on guest_memfd for private pages, which has its ++ * own internal mechanisms for registering/pinning private memory. ++ */ ++ ram_block_notifier_add(&sev_ram_notifier); ++ ++ /* ++ * The machine done notify event is used for SEV guests to get the ++ * measurement of the encrypted images. When SEV-SNP is enabled, the ++ * measurement is part of the guest attestation process where it can ++ * be collected without any reliance on the VMM. So skip registering ++ * the notifier for SNP in favor of using guest attestation instead. ++ */ ++ qemu_add_machine_init_done_notifier(&sev_machine_done_notify); ++ ++ return 0; + } + + int +@@ -1397,7 +1426,7 @@ sev_common_class_init(ObjectClass *oc, void *data) + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + +- klass->kvm_init = sev_kvm_init; ++ klass->kvm_init = sev_common_kvm_init; + x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", +@@ -1486,6 +1515,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; ++ klass->kvm_init = sev_kvm_init; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch b/SOURCES/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch new file mode 100644 index 0000000..0db345c --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch @@ -0,0 +1,94 @@ +From 35ceebdeccbf5dceb374c6f89a12e9981def570b Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:22 -0500 +Subject: [PATCH 062/100] i386/sev: Add snp_kvm_init() override for SNP class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [62/91] 8fa537961c9262b99a4ffb99e1c25f080d76d1de (bonzini/rhel-qemu-kvm) + +SNP does not support SMM and requires guest_memfd for +private guest memory, so add SNP specific kvm_init() +functionality in snp_kvm_init() class method. + +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-11-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 125b95a6d465a03ff30816eff0b1889aec01f0c3) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 5519de1c6b..6525b3c1a0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -885,12 +885,12 @@ out: + + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; ++ SevCommonState *sev_common = SEV_COMMON(cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + + sev_common->state = SEV_STATE_UNINIT; +@@ -1030,6 +1030,21 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return 0; + } + ++static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ X86MachineState *x86ms = X86_MACHINE(ms); ++ ++ if (x86ms->smm == ON_OFF_AUTO_AUTO) { ++ x86ms->smm = ON_OFF_AUTO_OFF; ++ } else if (x86ms->smm == ON_OFF_AUTO_ON) { ++ error_setg(errp, "SEV-SNP does not support SMM."); ++ return -1; ++ } ++ ++ return 0; ++} ++ + int + sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) + { +@@ -1752,6 +1767,10 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) + static void + sev_snp_guest_class_init(ObjectClass *oc, void *data) + { ++ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ ++ klass->kvm_init = sev_snp_kvm_init; ++ + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +@@ -1778,8 +1797,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + static void + sev_snp_guest_instance_init(Object *obj) + { ++ ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + ++ cgs->require_guest_memfd = true; ++ + /* default init/start/finish params for kvm */ + sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch b/SOURCES/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch new file mode 100644 index 0000000..c10f75f --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch @@ -0,0 +1,262 @@ +From 4013364679757161d6b9754bfc33ae38be0a1b7f Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:32 -0500 +Subject: [PATCH 072/100] i386/sev: Add support for SNP CPUID validation + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [72/91] 080e2942552dc7de8966e69d0d0d3b8951392030 (bonzini/rhel-qemu-kvm) + +SEV-SNP firmware allows a special guest page to be populated with a +table of guest CPUID values so that they can be validated through +firmware before being loaded into encrypted guest memory where they can +be used in place of hypervisor-provided values[1]. + +As part of SEV-SNP guest initialization, use this interface to validate +the CPUID entries reported by KVM_GET_CPUID2 prior to initial guest +start and populate the CPUID page reserved by OVMF with the resulting +encrypted data. + +[1] SEV SNP Firmware ABI Specification, Rev. 0.8, 8.13.2.6 + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-21-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 70943ad8e4dfbe5f77006b880290219be9d03553) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 164 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 162 insertions(+), 2 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c57534fca2..06401f0526 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -200,6 +200,36 @@ static const char *const sev_fw_errlist[] = { + + #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) + ++/* doesn't expose this, so re-use the max from kvm.c */ ++#define KVM_MAX_CPUID_ENTRIES 100 ++ ++typedef struct KvmCpuidInfo { ++ struct kvm_cpuid2 cpuid; ++ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; ++} KvmCpuidInfo; ++ ++#define SNP_CPUID_FUNCTION_MAXCOUNT 64 ++#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF ++ ++typedef struct { ++ uint32_t eax_in; ++ uint32_t ecx_in; ++ uint64_t xcr0_in; ++ uint64_t xss_in; ++ uint32_t eax; ++ uint32_t ebx; ++ uint32_t ecx; ++ uint32_t edx; ++ uint64_t reserved; ++} __attribute__((packed)) SnpCpuidFunc; ++ ++typedef struct { ++ uint32_t count; ++ uint32_t reserved1; ++ uint64_t reserved2; ++ SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT]; ++} __attribute__((packed)) SnpCpuidInfo; ++ + static int + sev_ioctl(int fd, int cmd, void *data, int *error) + { +@@ -788,6 +818,35 @@ out: + return ret; + } + ++static void ++sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, ++ SnpCpuidInfo *new) ++{ ++ size_t i; ++ ++ if (old->count != new->count) { ++ error_report("SEV-SNP: CPUID validation failed due to count mismatch," ++ "provided: %d, expected: %d", old->count, new->count); ++ return; ++ } ++ ++ for (i = 0; i < old->count; i++) { ++ SnpCpuidFunc *old_func, *new_func; ++ ++ old_func = &old->entries[i]; ++ new_func = &new->entries[i]; ++ ++ if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { ++ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" ++ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" ++ "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", ++ old_func->eax_in, old_func->ecx_in, ++ old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, ++ new_func->eax, new_func->ebx, new_func->ecx, new_func->edx); ++ } ++ } ++} ++ + static const char * + snp_page_type_to_str(int type) + { +@@ -806,6 +865,7 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + SevLaunchUpdateData *data) + { + int ret, fw_error; ++ SnpCpuidInfo snp_cpuid_info; + struct kvm_sev_snp_launch_update update = {0}; + + if (!data->hva || !data->len) { +@@ -815,6 +875,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + return 1; + } + ++ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ /* Save a copy for comparison in case the LAUNCH_UPDATE fails */ ++ memcpy(&snp_cpuid_info, data->hva, sizeof(snp_cpuid_info)); ++ } ++ + update.uaddr = (__u64)(unsigned long)data->hva; + update.gfn_start = data->gpa >> TARGET_PAGE_BITS; + update.len = data->len; +@@ -842,6 +907,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + if (ret && ret != -EAGAIN) { + error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); ++ ++ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ sev_snp_cpuid_report_mismatches(&snp_cpuid_info, data->hva); ++ error_report("SEV-SNP: failed update CPUID page"); ++ } + break; + } + } +@@ -1004,7 +1074,8 @@ sev_launch_finish(SevCommonState *sev_common) + } + + static int +-snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) ++snp_launch_update_data(uint64_t gpa, void *hva, ++ uint32_t len, int type) + { + SevLaunchUpdateData *data; + +@@ -1019,6 +1090,90 @@ snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) + return 0; + } + ++static int ++sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, ++ const KvmCpuidInfo *kvm_cpuid_info) ++{ ++ size_t i; ++ ++ if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { ++ error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", ++ kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); ++ return -1; ++ } ++ ++ memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info)); ++ ++ for (i = 0; i < kvm_cpuid_info->cpuid.nent; i++) { ++ const struct kvm_cpuid_entry2 *kvm_cpuid_entry; ++ SnpCpuidFunc *snp_cpuid_entry; ++ ++ kvm_cpuid_entry = &kvm_cpuid_info->entries[i]; ++ snp_cpuid_entry = &snp_cpuid_info->entries[i]; ++ ++ snp_cpuid_entry->eax_in = kvm_cpuid_entry->function; ++ if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) { ++ snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index; ++ } ++ snp_cpuid_entry->eax = kvm_cpuid_entry->eax; ++ snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx; ++ snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx; ++ snp_cpuid_entry->edx = kvm_cpuid_entry->edx; ++ ++ /* ++ * Guest kernels will calculate EBX themselves using the 0xD ++ * subfunctions corresponding to the individual XSAVE areas, so only ++ * encode the base XSAVE size in the initial leaves, corresponding ++ * to the initial XCR0=1 state. ++ */ ++ if (snp_cpuid_entry->eax_in == 0xD && ++ (snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 0x1)) { ++ snp_cpuid_entry->ebx = 0x240; ++ snp_cpuid_entry->xcr0_in = 1; ++ snp_cpuid_entry->xss_in = 0; ++ } ++ } ++ ++ snp_cpuid_info->count = i; ++ ++ return 0; ++} ++ ++static int ++snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) ++{ ++ KvmCpuidInfo kvm_cpuid_info = {0}; ++ SnpCpuidInfo snp_cpuid_info; ++ CPUState *cs = first_cpu; ++ int ret; ++ uint32_t i = 0; ++ ++ assert(sizeof(snp_cpuid_info) <= cpuid_len); ++ ++ /* get the cpuid list from KVM */ ++ do { ++ kvm_cpuid_info.cpuid.nent = ++i; ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, &kvm_cpuid_info); ++ } while (ret == -E2BIG); ++ ++ if (ret) { ++ error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", ++ strerror(-ret)); ++ return 1; ++ } ++ ++ ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); ++ if (ret) { ++ error_report("SEV-SNP: failed to generate CPUID table information"); ++ return 1; ++ } ++ ++ memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); ++ ++ return snp_launch_update_data(cpuid_addr, hva, cpuid_len, ++ KVM_SEV_SNP_PAGE_TYPE_CPUID); ++} ++ + static int + snp_metadata_desc_to_page_type(int desc_type) + { +@@ -1053,7 +1208,12 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + exit(1); + } + +- ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ ret = snp_launch_update_cpuid(desc->base, hva, desc->len); ++ } else { ++ ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ } ++ + if (ret) { + error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", + __func__, desc->base, desc->len, desc->type); +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch b/SOURCES/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch new file mode 100644 index 0000000..4691679 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch @@ -0,0 +1,127 @@ +From b2cfd4d89026e76ba86ea7adea323f2c3a588790 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:31 -0500 +Subject: [PATCH 071/100] i386/sev: Add support for populating OVMF metadata + pages + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [71/91] b563442c0e2f6ea01937425d300b56d9e641fd57 (bonzini/rhel-qemu-kvm) + +OVMF reserves various pages so they can be pre-initialized/validated +prior to launching the guest. Add support for populating these pages +with the expected content. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-20-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3d8c2a7f4806ff39423312e503737fd76c34dcae) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 74 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 17281bb2c7..c57534fca2 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1003,15 +1003,89 @@ sev_launch_finish(SevCommonState *sev_common) + migrate_add_blocker(&sev_mig_blocker, &error_fatal); + } + ++static int ++snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) ++{ ++ SevLaunchUpdateData *data; ++ ++ data = g_new0(SevLaunchUpdateData, 1); ++ data->gpa = gpa; ++ data->hva = hva; ++ data->len = len; ++ data->type = type; ++ ++ QTAILQ_INSERT_TAIL(&launch_update, data, next); ++ ++ return 0; ++} ++ ++static int ++snp_metadata_desc_to_page_type(int desc_type) ++{ ++ switch (desc_type) { ++ /* Add the umeasured prevalidated pages as a zero page */ ++ case SEV_DESC_TYPE_SNP_SEC_MEM: return KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ case SEV_DESC_TYPE_SNP_SECRETS: return KVM_SEV_SNP_PAGE_TYPE_SECRETS; ++ case SEV_DESC_TYPE_CPUID: return KVM_SEV_SNP_PAGE_TYPE_CPUID; ++ default: ++ return KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ } ++} ++ ++static void ++snp_populate_metadata_pages(SevSnpGuestState *sev_snp, ++ OvmfSevMetadata *metadata) ++{ ++ OvmfSevMetadataDesc *desc; ++ int type, ret, i; ++ void *hva; ++ MemoryRegion *mr = NULL; ++ ++ for (i = 0; i < metadata->num_desc; i++) { ++ desc = &metadata->descs[i]; ++ ++ type = snp_metadata_desc_to_page_type(desc->type); ++ ++ hva = gpa2hva(&mr, desc->base, desc->len, NULL); ++ if (!hva) { ++ error_report("%s: Failed to get HVA for GPA 0x%x sz 0x%x", ++ __func__, desc->base, desc->len); ++ exit(1); ++ } ++ ++ ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ if (ret) { ++ error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", ++ __func__, desc->base, desc->len, desc->type); ++ exit(1); ++ } ++ } ++} ++ + static void + sev_snp_launch_finish(SevCommonState *sev_common) + { + int ret, error; + Error *local_err = NULL; ++ OvmfSevMetadata *metadata; + SevLaunchUpdateData *data; + SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); + struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; + ++ /* ++ * To boot the SNP guest, the hypervisor is required to populate the CPUID ++ * and Secrets page before finalizing the launch flow. The location of ++ * the secrets and CPUID page is available through the OVMF metadata GUID. ++ */ ++ metadata = pc_system_get_ovmf_sev_metadata_ptr(); ++ if (metadata == NULL) { ++ error_report("%s: Failed to locate SEV metadata header", __func__); ++ exit(1); ++ } ++ ++ /* Populate all the metadata pages */ ++ snp_populate_metadata_pages(sev_snp, metadata); ++ + QTAILQ_FOREACH(data, &launch_update, next) { + ret = sev_snp_launch_update(sev_snp, data); + if (ret) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Add-the-SNP-launch-start-context.patch b/SOURCES/kvm-i386-sev-Add-the-SNP-launch-start-context.patch new file mode 100644 index 0000000..5da793f --- /dev/null +++ b/SOURCES/kvm-i386-sev-Add-the-SNP-launch-start-context.patch @@ -0,0 +1,122 @@ +From 0f7432f2b968298b64fd243df793b176f67a538f Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:27 -0500 +Subject: [PATCH 067/100] i386/sev: Add the SNP launch start context + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [67/91] 63759a25a413a7a9a7274fb4c3b8bc2528634855 (bonzini/rhel-qemu-kvm) + +The SNP_LAUNCH_START is called first to create a cryptographic launch +context within the firmware. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-16-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit d3107f882ec22cfb211eab7efa0c4e95f5ce11bb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 39 +++++++++++++++++++++++++++++++++++++++ + target/i386/trace-events | 1 + + 2 files changed, 40 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 43d1c48bd9..e89b87d2f5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -39,6 +39,7 @@ + #include "confidential-guest.h" + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" ++#include "qemu/queue.h" + + OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) +@@ -115,6 +116,16 @@ struct SevSnpGuestState { + #define DEFAULT_SEV_DEVICE "/dev/sev" + #define DEFAULT_SEV_SNP_POLICY 0x30000 + ++typedef struct SevLaunchUpdateData { ++ QTAILQ_ENTRY(SevLaunchUpdateData) next; ++ hwaddr gpa; ++ void *hva; ++ uint64_t len; ++ int type; ++} SevLaunchUpdateData; ++ ++static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; ++ + #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" + typedef struct __attribute__((__packed__)) SevInfoBlock { + /* SEV-ES Reset Vector Address */ +@@ -674,6 +685,31 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + return 0; + } + ++static int ++sev_snp_launch_start(SevCommonState *sev_common) ++{ ++ int fw_error, rc; ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); ++ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; ++ ++ trace_kvm_sev_snp_launch_start(start->policy, ++ sev_snp_guest->guest_visible_workarounds); ++ ++ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, ++ start, &fw_error); ++ if (rc < 0) { ++ error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'", ++ __func__, rc, fw_error, fw_error_to_str(fw_error)); ++ return 1; ++ } ++ ++ QTAILQ_INIT(&launch_update); ++ ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); ++ ++ return 0; ++} ++ + static int + sev_launch_start(SevCommonState *sev_common) + { +@@ -1003,6 +1039,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + ret = klass->launch_start(sev_common); ++ + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + return -1; +@@ -1794,9 +1831,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->launch_start = sev_snp_launch_start; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + ++ + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +diff --git a/target/i386/trace-events b/target/i386/trace-events +index 2cd8726eeb..cb26d8a925 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -11,3 +11,4 @@ kvm_sev_launch_measurement(const char *value) "data %s" + kvm_sev_launch_finish(void) "" + kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" + kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" ++kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch b/SOURCES/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch new file mode 100644 index 0000000..f809242 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch @@ -0,0 +1,237 @@ +From ec786a1ec0a76775e980862d77500f5196a937e3 Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:35 -0500 +Subject: [PATCH 080/100] i386/sev: Allow measured direct kernel boot on SNP + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [80/91] 11c629862519c1a279566febf5a537c63c5fcf61 (bonzini/rhel-qemu-kvm) + +In SNP, the hashes page designated with a specific metadata entry +published in AmdSev OVMF. + +Therefore, if the user enabled kernel hashes (for measured direct boot), +QEMU should prepare the content of hashes table, and during the +processing of the metadata entry it copy the content into the designated +page and encrypt it. + +Note that in SNP (unlike SEV and SEV-ES) the measurements is done in +whole 4KB pages. Therefore QEMU zeros the whole page that includes the +hashes table, and fills in the kernel hashes area in that page, and then +encrypts the whole page. The rest of the page is reserved for SEV +launch secrets which are not usable anyway on SNP. + +If the user disabled kernel hashes, QEMU pre-validates the kernel hashes +page as a zero page. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-24-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c1996992cc882b00139f78067d6a64e2ec9cb0d8) +Signed-off-by: Paolo Bonzini +--- + include/hw/i386/pc.h | 2 + + target/i386/sev.c | 111 ++++++++++++++++++++++++++++++++----------- + 2 files changed, 85 insertions(+), 28 deletions(-) + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 94b49310f5..ee3bfb7be9 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -175,6 +175,8 @@ typedef enum { + SEV_DESC_TYPE_SNP_SECRETS, + /* The section contains address that can be used as a CPUID page */ + SEV_DESC_TYPE_CPUID, ++ /* The section contains the region for kernel hashes for measured direct boot */ ++ SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10, + + } ovmf_sev_metadata_desc_type; + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 3fce4c08eb..004c667ac1 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -115,6 +115,10 @@ struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + + /* public */ ++ bool (*build_kernel_loader_hashes)(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp); + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); +@@ -154,6 +158,9 @@ struct SevSnpGuestState { + + struct kvm_sev_snp_launch_start kvm_start_conf; + struct kvm_sev_snp_launch_finish kvm_finish_conf; ++ ++ uint32_t kernel_hashes_offset; ++ PaddedSevHashTable *kernel_hashes_data; + }; + + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ +@@ -1189,6 +1196,23 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) + KVM_SEV_SNP_PAGE_TYPE_CPUID); + } + ++static int ++snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, ++ void *hva, uint32_t len) ++{ ++ int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ if (sev_snp->parent_obj.kernel_hashes) { ++ assert(sev_snp->kernel_hashes_data); ++ assert((sev_snp->kernel_hashes_offset + ++ sizeof(*sev_snp->kernel_hashes_data)) <= len); ++ memset(hva, 0, len); ++ memcpy(hva + sev_snp->kernel_hashes_offset, sev_snp->kernel_hashes_data, ++ sizeof(*sev_snp->kernel_hashes_data)); ++ type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; ++ } ++ return snp_launch_update_data(addr, hva, len, type); ++} ++ + static int + snp_metadata_desc_to_page_type(int desc_type) + { +@@ -1225,6 +1249,9 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + + if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + ret = snp_launch_update_cpuid(desc->base, hva, desc->len); ++ } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { ++ ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, ++ desc->len); + } else { + ret = snp_launch_update_data(desc->base, hva, desc->len, type); + } +@@ -1823,6 +1850,58 @@ static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, + return true; + } + ++static bool sev_snp_build_kernel_loader_hashes(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp) ++{ ++ /* ++ * SNP: Populate the hashes table in an area that later in ++ * snp_launch_update_kernel_hashes() will be copied to the guest memory ++ * and encrypted. ++ */ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); ++ sev_snp_guest->kernel_hashes_offset = area->base & ~TARGET_PAGE_MASK; ++ sev_snp_guest->kernel_hashes_data = g_new0(PaddedSevHashTable, 1); ++ return build_kernel_loader_hashes(sev_snp_guest->kernel_hashes_data, ctx, errp); ++} ++ ++static bool sev_build_kernel_loader_hashes(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp) ++{ ++ PaddedSevHashTable *padded_ht; ++ hwaddr mapped_len = sizeof(*padded_ht); ++ MemTxAttrs attrs = { 0 }; ++ bool ret = true; ++ ++ /* ++ * Populate the hashes table in the guest's memory at the OVMF-designated ++ * area for the SEV hashes table ++ */ ++ padded_ht = address_space_map(&address_space_memory, area->base, ++ &mapped_len, true, attrs); ++ if (!padded_ht || mapped_len != sizeof(*padded_ht)) { ++ error_setg(errp, "SEV: cannot map hashes table guest memory area"); ++ return false; ++ } ++ ++ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { ++ ret = false; ++ } ++ } else { ++ ret = false; ++ } ++ ++ address_space_unmap(&address_space_memory, padded_ht, ++ mapped_len, true, mapped_len); ++ ++ return ret; ++} ++ + /* + * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page + * which is included in SEV's initial memory measurement. +@@ -1831,11 +1910,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + { + uint8_t *data; + SevHashTableDescriptor *area; +- PaddedSevHashTable *padded_ht; +- hwaddr mapped_len = sizeof(*padded_ht); +- MemTxAttrs attrs = { 0 }; +- bool ret = true; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly +@@ -1858,30 +1934,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return false; + } + +- /* +- * Populate the hashes table in the guest's memory at the OVMF-designated +- * area for the SEV hashes table +- */ +- padded_ht = address_space_map(&address_space_memory, area->base, +- &mapped_len, true, attrs); +- if (!padded_ht || mapped_len != sizeof(*padded_ht)) { +- error_setg(errp, "SEV: cannot map hashes table guest memory area"); +- return false; +- } +- +- if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { +- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, +- sizeof(*padded_ht), errp) < 0) { +- ret = false; +- } +- } else { +- ret = false; +- } +- +- address_space_unmap(&address_space_memory, padded_ht, +- mapped_len, true, mapped_len); +- +- return ret; ++ return klass->build_kernel_loader_hashes(sev_common, area, ctx, errp); + } + + static char * +@@ -1998,6 +2051,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->build_kernel_loader_hashes = sev_build_kernel_loader_hashes; + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; + klass->launch_update_data = sev_launch_update_data; +@@ -2242,6 +2296,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->build_kernel_loader_hashes = sev_snp_build_kernel_loader_hashes; + klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; + klass->launch_update_data = sev_snp_launch_update_data; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch b/SOURCES/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch new file mode 100644 index 0000000..aacb0da --- /dev/null +++ b/SOURCES/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch @@ -0,0 +1,268 @@ +From ab6197309551bd6ddd9f8239191f68dfac23684b Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Jul 2024 23:10:05 -0500 +Subject: [PATCH 090/100] i386/sev: Don't allow automatic fallback to legacy + KVM_SEV*_INIT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [90/91] 2b1345faa56f993bb6e13d63e11656c784e20412 (bonzini/rhel-qemu-kvm) + +Currently if the 'legacy-vm-type' property of the sev-guest object is +'on', QEMU will attempt to use the newer KVM_SEV_INIT2 kernel +interface in conjunction with the newer KVM_X86_SEV_VM and +KVM_X86_SEV_ES_VM KVM VM types. + +This can lead to measurement changes if, for instance, an SEV guest was +created on a host that originally had an older kernel that didn't +support KVM_SEV_INIT2, but is booted on the same host later on after the +host kernel was upgraded. + +Instead, if legacy-vm-type is 'off', QEMU should fail if the +KVM_SEV_INIT2 interface is not provided by the current host kernel. +Modify the fallback handling accordingly. + +In the future, VMSA features and other flags might be added to QEMU +which will require legacy-vm-type to be 'off' because they will rely +on the newer KVM_SEV_INIT2 interface. It may be difficult to convey to +users what values of legacy-vm-type are compatible with which +features/options, so as part of this rework, switch legacy-vm-type to a +tri-state OnOffAuto option. 'auto' in this case will automatically +switch to using the newer KVM_SEV_INIT2, but only if it is required to +make use of new VMSA features or other options only available via +KVM_SEV_INIT2. + +Defining 'auto' in this way would avoid inadvertantly breaking +compatibility with older kernels since it would only be used in cases +where users opt into newer features that are only available via +KVM_SEV_INIT2 and newer kernels, and provide better default behavior +than the legacy-vm-type=off behavior that was previously in place, so +make it the default for 9.1+ machine types. + +Cc: Daniel P. Berrangé +Cc: Paolo Bonzini +cc: kvm@vger.kernel.org +Signed-off-by: Michael Roth +Reviewed-by: Daniel P. Berrangé +Link: https://lore.kernel.org/r/20240710041005.83720-1-michael.roth@amd.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9d38d9dca2a81aaf5752d45d221021ef96d496cd) + +RHEL: adjust compatiility setting, applying it to 9.4 machine type +--- + hw/i386/pc.c | 2 +- + qapi/qom.json | 18 ++++++---- + target/i386/sev.c | 85 +++++++++++++++++++++++++++++++++++++++-------- + 3 files changed, 83 insertions(+), 22 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b25d075b59..e9c5ea5d8f 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -352,7 +352,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + GlobalProperty pc_rhel_9_5_compat[] = { + /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ + { TYPE_X86_CPU, "guest-phys-bits", "0" }, +- { "sev-guest", "legacy-vm-type", "true" }, ++ { "sev-guest", "legacy-vm-type", "on" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/qapi/qom.json b/qapi/qom.json +index 8bd299265e..17bd5a0cf7 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -912,12 +912,16 @@ + # @handle: SEV firmware handle (default: 0) + # + # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. +-# The newer KVM_SEV_INIT2 interface syncs additional vCPU +-# state when initializing the VMSA structures, which will +-# result in a different guest measurement. Set this to +-# maintain compatibility with older QEMU or kernel versions +-# that rely on legacy KVM_SEV_INIT behavior. +-# (default: false) (since 9.1) ++# The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, syncs ++# additional vCPU state when initializing the VMSA structures, ++# which will result in a different guest measurement. Set ++# this to 'on' to force compatibility with older QEMU or kernel ++# versions that rely on legacy KVM_SEV_INIT behavior. 'auto' ++# will behave identically to 'on', but will automatically ++# switch to using KVM_SEV_INIT2 if the user specifies any ++# additional options that require it. If set to 'off', QEMU ++# will require KVM_SEV_INIT2 unconditionally. ++# (default: off) (since 9.1) + # + # Since: 2.12 + ## +@@ -927,7 +931,7 @@ + '*session-file': 'str', + '*policy': 'uint32', + '*handle': 'uint32', +- '*legacy-vm-type': 'bool' } } ++ '*legacy-vm-type': 'OnOffAuto' } } + + ## + # @SevSnpGuestProperties: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 491fab74fd..b921defb63 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -144,7 +144,7 @@ struct SevGuestState { + uint32_t policy; + char *dh_cert_file; + char *session_file; +- bool legacy_vm_type; ++ OnOffAuto legacy_vm_type; + }; + + struct SevSnpGuestState { +@@ -1334,6 +1334,17 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + ++/* ++ * This helper is to examine sev-guest properties and determine if any options ++ * have been set which rely on the newer KVM_SEV_INIT2 interface and associated ++ * KVM VM types. ++ */ ++static bool sev_init2_required(SevGuestState *sev_guest) ++{ ++ /* Currently no KVM_SEV_INIT2-specific options are exposed via QEMU */ ++ return false; ++} ++ + static int sev_kvm_type(X86ConfidentialGuest *cg) + { + SevCommonState *sev_common = SEV_COMMON(cg); +@@ -1344,14 +1355,39 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) + goto out; + } + ++ /* These are the only cases where legacy VM types can be used. */ ++ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_ON || ++ (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO && ++ !sev_init2_required(sev_guest))) { ++ sev_common->kvm_type = KVM_X86_DEFAULT_VM; ++ goto out; ++ } ++ ++ /* ++ * Newer VM types are required, either explicitly via legacy-vm-type=on, or ++ * implicitly via legacy-vm-type=auto along with additional sev-guest ++ * properties that require the newer VM types. ++ */ + kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? + KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { +- sev_common->kvm_type = kvm_type; +- } else { +- sev_common->kvm_type = KVM_X86_DEFAULT_VM; ++ if (!kvm_is_vm_type_supported(kvm_type)) { ++ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO) { ++ error_report("SEV: host kernel does not support requested %s VM type, which is required " ++ "for the set of options specified. To allow use of the legacy " ++ "KVM_X86_DEFAULT_VM VM type, please disable any options that are not " ++ "compatible with the legacy VM type, or upgrade your kernel.", ++ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); ++ } else { ++ error_report("SEV: host kernel does not support requested %s VM type. To allow use of " ++ "the legacy KVM_X86_DEFAULT_VM VM type, the 'legacy-vm-type' argument " ++ "must be set to 'on' or 'auto' for the sev-guest object.", ++ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); ++ } ++ ++ return -1; + } + ++ sev_common->kvm_type = kvm_type; + out: + return sev_common->kvm_type; + } +@@ -1442,14 +1478,24 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { ++ switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) { ++ case KVM_X86_DEFAULT_VM: + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); +- } else { ++ break; ++ case KVM_X86_SEV_VM: ++ case KVM_X86_SEV_ES_VM: ++ case KVM_X86_SNP_VM: { + struct kvm_sev_init args = { 0 }; + + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ break; ++ } ++ default: ++ error_setg(errp, "%s: host kernel does not support the requested SEV configuration.", ++ __func__); ++ return -1; + } + + if (ret) { +@@ -2037,14 +2083,23 @@ sev_guest_set_session_file(Object *obj, const char *value, Error **errp) + SEV_GUEST(obj)->session_file = g_strdup(value); + } + +-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++static void sev_guest_get_legacy_vm_type(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { +- return SEV_GUEST(obj)->legacy_vm_type; ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ OnOffAuto legacy_vm_type = sev_guest->legacy_vm_type; ++ ++ visit_type_OnOffAuto(v, name, &legacy_vm_type, errp); + } + +-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { +- SEV_GUEST(obj)->legacy_vm_type = value; ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ ++ visit_type_OnOffAuto(v, name, &sev_guest->legacy_vm_type, errp); + } + + static void +@@ -2070,9 +2125,9 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "legacy-vm-type", +- sev_guest_get_legacy_vm_type, +- sev_guest_set_legacy_vm_type); ++ object_class_property_add(oc, "legacy-vm-type", "OnOffAuto", ++ sev_guest_get_legacy_vm_type, ++ sev_guest_set_legacy_vm_type, NULL, NULL); + object_class_property_set_description(oc, "legacy-vm-type", + "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); + } +@@ -2088,6 +2143,8 @@ sev_guest_instance_init(Object *obj) + object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, + OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); ++ ++ sev_guest->legacy_vm_type = ON_OFF_AUTO_AUTO; + } + + /* guest info specific sev/sev-es */ +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch b/SOURCES/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch new file mode 100644 index 0000000..739a145 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch @@ -0,0 +1,46 @@ +From ebb3c3536366c383fa09b0987a4efb68d018b7b8 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:24 -0500 +Subject: [PATCH 064/100] i386/sev: Don't return launch measurements for + SEV-SNP guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [64/91] 5a29bb2d8b5a07aec6fd271ec37345e665e9cce4 (bonzini/rhel-qemu-kvm) + +For SEV-SNP guests, launch measurement is queried from within the guest +during attestation, so don't attempt to return it as part of +query-sev-launch-measure. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-13-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 73ae63b162fc1fed520f53ad200712964d7d0264) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 6525b3c1a0..c3daaf1ad5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -795,7 +795,9 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + + static char *sev_get_launch_measurement(void) + { +- SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ SevGuestState *sev_guest = ++ (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); + + if (sev_guest && + SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch b/SOURCES/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch new file mode 100644 index 0000000..e438cd3 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch @@ -0,0 +1,54 @@ +From 0612c7ed587422ec7e07c27c8ca11b89c7aa8b02 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:43 -0500 +Subject: [PATCH 077/100] i386/sev: Enable KVM_HC_MAP_GPA_RANGE hcall for SNP + guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [77/91] 3c494eb54499c24121cc2c47045626478b8bb41e (bonzini/rhel-qemu-kvm) + +KVM will forward GHCB page-state change requests to userspace in the +form of KVM_HC_MAP_GPA_RANGE, so make sure the hypercall handling is +enabled for SNP guests. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-32-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit e3cddff93c1f88fea3b26841e792dc0be6b6fae8) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index eaf5fc6c6b..abb63062ac 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + + #include ++#include + #include + + #include +@@ -758,6 +759,10 @@ sev_snp_launch_start(SevCommonState *sev_common) + trace_kvm_sev_snp_launch_start(start->policy, + sev_snp_guest->guest_visible_workarounds); + ++ if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { ++ return 1; ++ } ++ + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, + start, &fw_error); + if (rc < 0) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch b/SOURCES/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch new file mode 100644 index 0000000..a06301d --- /dev/null +++ b/SOURCES/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch @@ -0,0 +1,167 @@ +From eed17520567c202f53ab767bfd42cfe303838772 Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:33 -0500 +Subject: [PATCH 078/100] i386/sev: Extract build_kernel_loader_hashes + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [78/91] 291ea10e774178826d1afd38fc8292d67c5fd42d (bonzini/rhel-qemu-kvm) + +Extract the building of the kernel hashes table out from +sev_add_kernel_loader_hashes() to allow building it in +other memory areas (for SNP support). + +No functional change intended. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-22-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 06cbd66cecaa3230cccb330facac241a677b29d5) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 102 ++++++++++++++++++++++++++-------------------- + 1 file changed, 58 insertions(+), 44 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index abb63062ac..73f9406715 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1754,45 +1754,16 @@ static const QemuUUID sev_cmdline_entry_guid = { + 0x4d, 0x36, 0xab, 0x2a) + }; + +-/* +- * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page +- * which is included in SEV's initial memory measurement. +- */ +-bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) ++static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, ++ SevKernelLoaderContext *ctx, ++ Error **errp) + { +- uint8_t *data; +- SevHashTableDescriptor *area; + SevHashTable *ht; +- PaddedSevHashTable *padded_ht; + uint8_t cmdline_hash[HASH_SIZE]; + uint8_t initrd_hash[HASH_SIZE]; + uint8_t kernel_hash[HASH_SIZE]; + uint8_t *hashp; + size_t hash_len = HASH_SIZE; +- hwaddr mapped_len = sizeof(*padded_ht); +- MemTxAttrs attrs = { 0 }; +- bool ret = true; +- SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- +- /* +- * Only add the kernel hashes if the sev-guest configuration explicitly +- * stated kernel-hashes=on. +- */ +- if (!sev_common->kernel_hashes) { +- return false; +- } +- +- if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { +- error_setg(errp, "SEV: kernel specified but guest firmware " +- "has no hashes table GUID"); +- return false; +- } +- area = (SevHashTableDescriptor *)data; +- if (!area->base || area->size < sizeof(PaddedSevHashTable)) { +- error_setg(errp, "SEV: guest firmware hashes table area is invalid " +- "(base=0x%x size=0x%x)", area->base, area->size); +- return false; +- } + + /* + * Calculate hash of kernel command-line with the terminating null byte. If +@@ -1829,16 +1800,6 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + } + assert(hash_len == HASH_SIZE); + +- /* +- * Populate the hashes table in the guest's memory at the OVMF-designated +- * area for the SEV hashes table +- */ +- padded_ht = address_space_map(&address_space_memory, area->base, +- &mapped_len, true, attrs); +- if (!padded_ht || mapped_len != sizeof(*padded_ht)) { +- error_setg(errp, "SEV: cannot map hashes table guest memory area"); +- return false; +- } + ht = &padded_ht->ht; + + ht->guid = sev_hash_table_header_guid; +@@ -1859,8 +1820,61 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + +- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, +- sizeof(*padded_ht), errp) < 0) { ++ return true; ++} ++ ++/* ++ * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page ++ * which is included in SEV's initial memory measurement. ++ */ ++bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) ++{ ++ uint8_t *data; ++ SevHashTableDescriptor *area; ++ PaddedSevHashTable *padded_ht; ++ hwaddr mapped_len = sizeof(*padded_ht); ++ MemTxAttrs attrs = { 0 }; ++ bool ret = true; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ /* ++ * Only add the kernel hashes if the sev-guest configuration explicitly ++ * stated kernel-hashes=on. ++ */ ++ if (!sev_common->kernel_hashes) { ++ return false; ++ } ++ ++ if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { ++ error_setg(errp, "SEV: kernel specified but guest firmware " ++ "has no hashes table GUID"); ++ return false; ++ } ++ ++ area = (SevHashTableDescriptor *)data; ++ if (!area->base || area->size < sizeof(PaddedSevHashTable)) { ++ error_setg(errp, "SEV: guest firmware hashes table area is invalid " ++ "(base=0x%x size=0x%x)", area->base, area->size); ++ return false; ++ } ++ ++ /* ++ * Populate the hashes table in the guest's memory at the OVMF-designated ++ * area for the SEV hashes table ++ */ ++ padded_ht = address_space_map(&address_space_memory, area->base, ++ &mapped_len, true, attrs); ++ if (!padded_ht || mapped_len != sizeof(*padded_ht)) { ++ error_setg(errp, "SEV: cannot map hashes table guest memory area"); ++ return false; ++ } ++ ++ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { ++ ret = false; ++ } ++ } else { + ret = false; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch b/SOURCES/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch new file mode 100644 index 0000000..1d30674 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch @@ -0,0 +1,65 @@ +From a9530c89225fce9e381929c4cd8e372068827acf Mon Sep 17 00:00:00 2001 +From: Michal Privoznik +Date: Mon, 24 Jun 2024 10:52:49 +0200 +Subject: [PATCH 089/100] i386/sev: Fallback to the default SEV device if none + provided in sev_get_capabilities() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [89/91] 22318c20d7102815f754cec0efaf383e05ef79c1 (bonzini/rhel-qemu-kvm) + +When management tools (e.g. libvirt) query QEMU capabilities, +they start QEMU with a minimalistic configuration and issue +various commands on monitor. One of the command issued is/might +be "query-sev-capabilities" to learn values like cbitpos or +reduced-phys-bits. But as of v9.0.0-1145-g16dcf200dc the monitor +command returns an error instead. + +This creates a chicken-egg problem because in order to query +those aforementioned values QEMU needs to be started with a +'sev-guest' object. But to start QEMU with the values must be +known. + +I think it's safe to assume that the default path ("/dev/sev") +provides the same data as user provided one. So fall back to it. + +Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0 +Signed-off-by: Michal Privoznik +Link: https://lore.kernel.org/r/157f93712c23818be193ce785f648f0060b33dee.1719218926.git.mprivozn@redhat.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3fb24530b2bb1346a44e17becefc9865b40a2257) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 53b7f7315b..491fab74fd 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -585,13 +585,13 @@ static SevCapability *sev_get_capabilities(Error **errp) + } + + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- if (!sev_common) { +- error_setg(errp, "SEV is not configured"); +- return NULL; ++ if (sev_common) { ++ sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", ++ &error_abort); ++ } else { ++ sev_device = g_strdup(DEFAULT_SEV_DEVICE); + } + +- sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", +- &error_abort); + fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch b/SOURCES/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch new file mode 100644 index 0000000..b23e008 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch @@ -0,0 +1,48 @@ +From b672cdf8c10a530b5bcf6dd4489632891eb2c731 Mon Sep 17 00:00:00 2001 +From: Michal Privoznik +Date: Mon, 24 Jun 2024 10:52:48 +0200 +Subject: [PATCH 088/100] i386/sev: Fix error message in sev_get_capabilities() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [88/91] ff8a8b27af02e565172ffe39d0571c234317713d (bonzini/rhel-qemu-kvm) + +When a custom path is provided to sev-guest object and opening +the path fails an error message is reported. But the error +message still mentions DEFAULT_SEV_DEVICE ("/dev/sev") instead of +the custom path. + +Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0 +Signed-off-by: Michal Privoznik +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/b4648905d399780063dc70851d3d6a3cd28719a5.1719218926.git.mprivozn@redhat.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit e306ae87e0ef04bc7a5dec6db693f6ea09d64d45) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 37de80adc7..53b7f7315b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -595,7 +595,7 @@ static SevCapability *sev_get_capabilities(Error **errp) + fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", +- DEFAULT_SEV_DEVICE); ++ sev_device); + g_free(sev_device); + return NULL; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch b/SOURCES/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch new file mode 100644 index 0000000..2d167af --- /dev/null +++ b/SOURCES/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch @@ -0,0 +1,1118 @@ +From e6cf2115eb9db545821180b8a978cdccc6a2c2db Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:16 -0500 +Subject: [PATCH 056/100] i386/sev: Introduce "sev-common" type to encapsulate + common SEV state + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [56/91] b52d5c9c5e4997d2fd791fa36dd5d4c836dfc32f (bonzini/rhel-qemu-kvm) + +Currently all SEV/SEV-ES functionality is managed through a single +'sev-guest' QOM type. With upcoming support for SEV-SNP, taking this +same approach won't work well since some of the properties/state +managed by 'sev-guest' is not applicable to SEV-SNP, which will instead +rely on a new QOM type with its own set of properties/state. + +To prepare for this, this patch moves common state into an abstract +'sev-common' parent type to encapsulate properties/state that are +common to both SEV/SEV-ES and SEV-SNP, leaving only SEV/SEV-ES-specific +properties/state in the current 'sev-guest' type. This should not +affect current behavior or command-line options. + +As part of this patch, some related changes are also made: + + - a static 'sev_guest' variable is currently used to keep track of + the 'sev-guest' instance. SEV-SNP would similarly introduce an + 'sev_snp_guest' static variable. But these instances are now + available via qdev_get_machine()->cgs, so switch to using that + instead and drop the static variable. + + - 'sev_guest' is currently used as the name for the static variable + holding a pointer to the 'sev-guest' instance. Re-purpose the name + as a local variable referring the 'sev-guest' instance, and use + that consistently throughout the code so it can be easily + distinguished from sev-common/sev-snp-guest instances. + + - 'sev' is generally used as the name for local variables holding a + pointer to the 'sev-guest' instance. In cases where that now points + to common state, use the name 'sev_common'; in cases where that now + points to state specific to 'sev-guest' instance, use the name + 'sev_guest' + +In order to enable kernel-hashes for SNP, pull it from +SevGuestProperties to its parent SevCommonProperties so +it will be available for both SEV and SNP. + +Signed-off-by: Michael Roth +Co-developed-by: Dov Murik +Signed-off-by: Dov Murik +Acked-by: Markus Armbruster (QAPI schema) +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-5-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 16dcf200dc951c1cde3e5b442457db5f690b8cf0) +Signed-off-by: Paolo Bonzini +--- + qapi/qom.json | 40 ++-- + target/i386/sev.c | 489 ++++++++++++++++++++++++++-------------------- + target/i386/sev.h | 3 + + 3 files changed, 301 insertions(+), 231 deletions(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 38dde6d785..056b38f491 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -875,20 +875,12 @@ + 'data': { '*filename': 'str' } } + + ## +-# @SevGuestProperties: ++# @SevCommonProperties: + # +-# Properties for sev-guest objects. ++# Properties common to objects that are derivatives of sev-common. + # + # @sev-device: SEV device to use (default: "/dev/sev") + # +-# @dh-cert-file: guest owners DH certificate (encoded with base64) +-# +-# @session-file: guest owners session parameters (encoded with base64) +-# +-# @policy: SEV policy value (default: 0x1) +-# +-# @handle: SEV firmware handle (default: 0) +-# + # @cbitpos: C-bit location in page table entry (default: 0) + # + # @reduced-phys-bits: number of bits in physical addresses that become +@@ -898,6 +890,27 @@ + # designated guest firmware page for measured boot with -kernel + # (default: false) (since 6.2) + # ++# Since: 9.1 ++## ++{ 'struct': 'SevCommonProperties', ++ 'data': { '*sev-device': 'str', ++ '*cbitpos': 'uint32', ++ 'reduced-phys-bits': 'uint32', ++ '*kernel-hashes': 'bool' } } ++ ++## ++# @SevGuestProperties: ++# ++# Properties for sev-guest objects. ++# ++# @dh-cert-file: guest owners DH certificate (encoded with base64) ++# ++# @session-file: guest owners session parameters (encoded with base64) ++# ++# @policy: SEV policy value (default: 0x1) ++# ++# @handle: SEV firmware handle (default: 0) ++# + # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. + # The newer KVM_SEV_INIT2 interface syncs additional vCPU + # state when initializing the VMSA structures, which will +@@ -909,14 +922,11 @@ + # Since: 2.12 + ## + { 'struct': 'SevGuestProperties', +- 'data': { '*sev-device': 'str', +- '*dh-cert-file': 'str', ++ 'base': 'SevCommonProperties', ++ 'data': { '*dh-cert-file': 'str', + '*session-file': 'str', + '*policy': 'uint32', + '*handle': 'uint32', +- '*cbitpos': 'uint32', +- 'reduced-phys-bits': 'uint32', +- '*kernel-hashes': 'bool', + '*legacy-vm-type': 'bool' } } + + ## +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 67ed32e5ea..33e606eea0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -40,49 +40,59 @@ + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" + +-#define TYPE_SEV_GUEST "sev-guest" +-OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) ++OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) ++OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) + +- +-/** +- * SevGuestState: +- * +- * The SevGuestState object is used for creating and managing a SEV +- * guest. +- * +- * # $QEMU \ +- * -object sev-guest,id=sev0 \ +- * -machine ...,memory-encryption=sev0 +- */ +-struct SevGuestState { ++struct SevCommonState { + X86ConfidentialGuest parent_obj; + + int kvm_type; + + /* configuration parameters */ + char *sev_device; +- uint32_t policy; +- char *dh_cert_file; +- char *session_file; + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; +- bool legacy_vm_type; + + /* runtime state */ +- uint32_t handle; + uint8_t api_major; + uint8_t api_minor; + uint8_t build_id; + int sev_fd; + SevState state; +- gchar *measurement; + + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; + }; + ++struct SevCommonStateClass { ++ X86ConfidentialGuestClass parent_class; ++ ++}; ++ ++/** ++ * SevGuestState: ++ * ++ * The SevGuestState object is used for creating and managing a SEV ++ * guest. ++ * ++ * # $QEMU \ ++ * -object sev-guest,id=sev0 \ ++ * -machine ...,memory-encryption=sev0 ++ */ ++struct SevGuestState { ++ SevCommonState parent_obj; ++ gchar *measurement; ++ ++ /* configuration parameters */ ++ uint32_t handle; ++ uint32_t policy; ++ char *dh_cert_file; ++ char *session_file; ++ bool legacy_vm_type; ++}; ++ + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ + #define DEFAULT_SEV_DEVICE "/dev/sev" + +@@ -128,7 +138,6 @@ typedef struct QEMU_PACKED PaddedSevHashTable { + + QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + +-static SevGuestState *sev_guest; + static Error *sev_mig_blocker; + + static const char *const sev_fw_errlist[] = { +@@ -209,21 +218,21 @@ fw_error_to_str(int code) + } + + static bool +-sev_check_state(const SevGuestState *sev, SevState state) ++sev_check_state(const SevCommonState *sev_common, SevState state) + { +- assert(sev); +- return sev->state == state ? true : false; ++ assert(sev_common); ++ return sev_common->state == state ? true : false; + } + + static void +-sev_set_guest_state(SevGuestState *sev, SevState new_state) ++sev_set_guest_state(SevCommonState *sev_common, SevState new_state) + { + assert(new_state < SEV_STATE__MAX); +- assert(sev); ++ assert(sev_common); + +- trace_kvm_sev_change_state(SevState_str(sev->state), ++ trace_kvm_sev_change_state(SevState_str(sev_common->state), + SevState_str(new_state)); +- sev->state = new_state; ++ sev_common->state = new_state; + } + + static void +@@ -290,121 +299,61 @@ static struct RAMBlockNotifier sev_ram_notifier = { + .ram_block_removed = sev_ram_block_removed, + }; + +-static void +-sev_guest_finalize(Object *obj) +-{ +-} +- +-static char * +-sev_guest_get_session_file(Object *obj, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- return s->session_file ? g_strdup(s->session_file) : NULL; +-} +- +-static void +-sev_guest_set_session_file(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- s->session_file = g_strdup(value); +-} +- +-static char * +-sev_guest_get_dh_cert_file(Object *obj, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- return g_strdup(s->dh_cert_file); +-} +- +-static void +-sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- s->dh_cert_file = g_strdup(value); +-} +- +-static char * +-sev_guest_get_sev_device(Object *obj, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- return g_strdup(sev->sev_device); +-} +- +-static void +-sev_guest_set_sev_device(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->sev_device = g_strdup(value); +-} +- +-static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- return sev->kernel_hashes; +-} +- +-static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->kernel_hashes = value; +-} +- +-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) +-{ +- return SEV_GUEST(obj)->legacy_vm_type; +-} +- +-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) +-{ +- SEV_GUEST(obj)->legacy_vm_type = value; +-} +- + bool + sev_enabled(void) + { +- return !!sev_guest; ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); + } + + bool + sev_es_enabled(void) + { +- return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES); ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); + } + + uint32_t + sev_get_cbit_position(void) + { +- return sev_guest ? sev_guest->cbitpos : 0; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ return sev_common ? sev_common->cbitpos : 0; + } + + uint32_t + sev_get_reduced_phys_bits(void) + { +- return sev_guest ? sev_guest->reduced_phys_bits : 0; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ return sev_common ? sev_common->reduced_phys_bits : 0; + } + + static SevInfo *sev_get_info(void) + { + SevInfo *info; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevGuestState *sev_guest = ++ (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), ++ TYPE_SEV_GUEST); + + info = g_new0(SevInfo, 1); + info->enabled = sev_enabled(); + + if (info->enabled) { +- info->api_major = sev_guest->api_major; +- info->api_minor = sev_guest->api_minor; +- info->build_id = sev_guest->build_id; +- info->policy = sev_guest->policy; +- info->state = sev_guest->state; +- info->handle = sev_guest->handle; ++ if (sev_guest) { ++ info->handle = sev_guest->handle; ++ } ++ info->api_major = sev_common->api_major; ++ info->api_minor = sev_common->api_minor; ++ info->build_id = sev_common->build_id; ++ info->state = sev_common->state; ++ /* we only report the lower 32-bits of policy for SNP, ok for now... */ ++ info->policy = ++ (uint32_t)object_property_get_uint(OBJECT(sev_common), ++ "policy", NULL); + } + + return info; +@@ -530,6 +479,8 @@ static SevCapability *sev_get_capabilities(Error **errp) + size_t pdh_len = 0, cert_chain_len = 0, cpu0_id_len = 0; + uint32_t ebx; + int fd; ++ SevCommonState *sev_common; ++ char *sev_device; + + if (!kvm_enabled()) { + error_setg(errp, "KVM not enabled"); +@@ -540,12 +491,21 @@ static SevCapability *sev_get_capabilities(Error **errp) + return NULL; + } + +- fd = open(DEFAULT_SEV_DEVICE, O_RDWR); ++ sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ if (!sev_common) { ++ error_setg(errp, "SEV is not configured"); ++ } ++ ++ sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", ++ &error_abort); ++ fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", + DEFAULT_SEV_DEVICE); ++ g_free(sev_device); + return NULL; + } ++ g_free(sev_device); + + if (sev_get_pdh_info(fd, &pdh_data, &pdh_len, + &cert_chain_data, &cert_chain_len, errp)) { +@@ -588,7 +548,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + { + struct kvm_sev_attestation_report input = {}; + SevAttestationReport *report = NULL; +- SevGuestState *sev = sev_guest; ++ SevCommonState *sev_common; + g_autofree guchar *data = NULL; + g_autofree guchar *buf = NULL; + gsize len; +@@ -613,8 +573,10 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + return NULL; + } + ++ sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ + /* Query the report length */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + &input, &err); + if (ret < 0) { + if (err != SEV_RET_INVALID_LEN) { +@@ -630,7 +592,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + memcpy(input.mnonce, buf, sizeof(input.mnonce)); + + /* Query the report */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + &input, &err); + if (ret) { + error_setg_errno(errp, errno, "SEV: Failed to get attestation report" +@@ -670,26 +632,27 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + } + + static int +-sev_launch_start(SevGuestState *sev) ++sev_launch_start(SevGuestState *sev_guest) + { + gsize sz; + int ret = 1; + int fw_error, rc; + struct kvm_sev_launch_start start = { +- .handle = sev->handle, .policy = sev->policy ++ .handle = sev_guest->handle, .policy = sev_guest->policy + }; + guchar *session = NULL, *dh_cert = NULL; ++ SevCommonState *sev_common = SEV_COMMON(sev_guest); + +- if (sev->session_file) { +- if (sev_read_file_base64(sev->session_file, &session, &sz) < 0) { ++ if (sev_guest->session_file) { ++ if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { + goto out; + } + start.session_uaddr = (unsigned long)session; + start.session_len = sz; + } + +- if (sev->dh_cert_file) { +- if (sev_read_file_base64(sev->dh_cert_file, &dh_cert, &sz) < 0) { ++ if (sev_guest->dh_cert_file) { ++ if (sev_read_file_base64(sev_guest->dh_cert_file, &dh_cert, &sz) < 0) { + goto out; + } + start.dh_uaddr = (unsigned long)dh_cert; +@@ -697,15 +660,15 @@ sev_launch_start(SevGuestState *sev) + } + + trace_kvm_sev_launch_start(start.policy, session, dh_cert); +- rc = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); ++ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); + if (rc < 0) { + error_report("%s: LAUNCH_START ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto out; + } + +- sev_set_guest_state(sev, SEV_STATE_LAUNCH_UPDATE); +- sev->handle = start.handle; ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); ++ sev_guest->handle = start.handle; + ret = 0; + + out: +@@ -715,7 +678,7 @@ out: + } + + static int +-sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) ++sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + { + int ret, fw_error; + struct kvm_sev_launch_update_data update; +@@ -727,7 +690,7 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) + update.uaddr = (uintptr_t)addr; + update.len = len; + trace_kvm_sev_launch_update_data(addr, len); +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + &update, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", +@@ -738,11 +701,12 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) + } + + static int +-sev_launch_update_vmsa(SevGuestState *sev) ++sev_launch_update_vmsa(SevGuestState *sev_guest) + { + int ret, fw_error; + +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, ++ NULL, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE_VMSA ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +@@ -754,18 +718,19 @@ sev_launch_update_vmsa(SevGuestState *sev) + static void + sev_launch_get_measure(Notifier *notifier, void *unused) + { +- SevGuestState *sev = sev_guest; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + int ret, error; + g_autofree guchar *data = NULL; + struct kvm_sev_launch_measure measurement = {}; + +- if (!sev_check_state(sev, SEV_STATE_LAUNCH_UPDATE)) { ++ if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { + return; + } + + if (sev_es_enabled()) { + /* measure all the VM save areas before getting launch_measure */ +- ret = sev_launch_update_vmsa(sev); ++ ret = sev_launch_update_vmsa(sev_guest); + if (ret) { + exit(1); + } +@@ -773,7 +738,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + } + + /* query the measurement blob length */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, + &measurement, &error); + if (!measurement.len) { + error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", +@@ -785,7 +750,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + measurement.uaddr = (unsigned long)data; + + /* get the measurement blob */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, + &measurement, &error); + if (ret) { + error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", +@@ -793,17 +758,19 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + return; + } + +- sev_set_guest_state(sev, SEV_STATE_LAUNCH_SECRET); ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_SECRET); + + /* encode the measurement value and emit the event */ +- sev->measurement = g_base64_encode(data, measurement.len); +- trace_kvm_sev_launch_measurement(sev->measurement); ++ sev_guest->measurement = g_base64_encode(data, measurement.len); ++ trace_kvm_sev_launch_measurement(sev_guest->measurement); + } + + static char *sev_get_launch_measurement(void) + { ++ SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); ++ + if (sev_guest && +- sev_guest->state >= SEV_STATE_LAUNCH_SECRET) { ++ SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { + return g_strdup(sev_guest->measurement); + } + +@@ -832,19 +799,20 @@ static Notifier sev_machine_done_notify = { + }; + + static void +-sev_launch_finish(SevGuestState *sev) ++sev_launch_finish(SevGuestState *sev_guest) + { + int ret, error; + + trace_kvm_sev_launch_finish(); +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, ++ &error); + if (ret) { + error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", + __func__, ret, error, fw_error_to_str(error)); + exit(1); + } + +- sev_set_guest_state(sev, SEV_STATE_RUNNING); ++ sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, +@@ -855,38 +823,40 @@ sev_launch_finish(SevGuestState *sev) + static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { +- SevGuestState *sev = opaque; ++ SevCommonState *sev_common = opaque; + + if (running) { +- if (!sev_check_state(sev, SEV_STATE_RUNNING)) { +- sev_launch_finish(sev); ++ if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { ++ sev_launch_finish(SEV_GUEST(sev_common)); + } + } + } + + static int sev_kvm_type(X86ConfidentialGuest *cg) + { +- SevGuestState *sev = SEV_GUEST(cg); ++ SevCommonState *sev_common = SEV_COMMON(cg); ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + int kvm_type; + +- if (sev->kvm_type != -1) { ++ if (sev_common->kvm_type != -1) { + goto out; + } + +- kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { +- sev->kvm_type = kvm_type; ++ kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? ++ KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; ++ if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { ++ sev_common->kvm_type = kvm_type; + } else { +- sev->kvm_type = KVM_X86_DEFAULT_VM; ++ sev_common->kvm_type = KVM_X86_DEFAULT_VM; + } + + out: +- return sev->kvm_type; ++ return sev_common->kvm_type; + } + + static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevGuestState *sev = SEV_GUEST(cgs); ++ SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; +@@ -899,8 +869,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return -1; + } + +- sev_guest = sev; +- sev->state = SEV_STATE_UNINIT; ++ sev_common->state = SEV_STATE_UNINIT; + + host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); + host_cbitpos = ebx & 0x3f; +@@ -910,9 +879,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + * register of CPUID 0x8000001F. No need to verify the range as the + * comparison against the host value accomplishes that. + */ +- if (host_cbitpos != sev->cbitpos) { ++ if (host_cbitpos != sev_common->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", +- __func__, host_cbitpos, sev->cbitpos); ++ __func__, host_cbitpos, sev_common->cbitpos); + goto err; + } + +@@ -921,16 +890,17 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + * the EBX register of CPUID 0x8000001F, so verify the supplied value + * is in the range of 1 to 63. + */ +- if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { ++ if (sev_common->reduced_phys_bits < 1 || ++ sev_common->reduced_phys_bits > 63) { + error_setg(errp, "%s: reduced_phys_bits check failed," + " it should be in the range of 1 to 63, requested '%d'", +- __func__, sev->reduced_phys_bits); ++ __func__, sev_common->reduced_phys_bits); + goto err; + } + +- devname = object_property_get_str(OBJECT(sev), "sev-device", NULL); +- sev->sev_fd = open(devname, O_RDWR); +- if (sev->sev_fd < 0) { ++ devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); ++ sev_common->sev_fd = open(devname, O_RDWR); ++ if (sev_common->sev_fd < 0) { + error_setg(errp, "%s: Failed to open %s '%s'", __func__, + devname, strerror(errno)); + g_free(devname); +@@ -938,7 +908,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + g_free(devname); + +- ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, &status, ++ ret = sev_platform_ioctl(sev_common->sev_fd, SEV_PLATFORM_STATUS, &status, + &fw_error); + if (ret) { + error_setg(errp, "%s: failed to get platform status ret=%d " +@@ -946,9 +916,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + fw_error_to_str(fw_error)); + goto err; + } +- sev->build_id = status.build; +- sev->api_major = status.api_major; +- sev->api_minor = status.api_minor; ++ sev_common->build_id = status.build; ++ sev_common->api_major = status.api_major; ++ sev_common->api_minor = status.api_minor; + + if (sev_es_enabled()) { + if (!kvm_kernel_irqchip_allowed()) { +@@ -966,14 +936,14 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { ++ if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + +- ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); + } else { + struct kvm_sev_init args = { 0 }; + +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + } + + if (ret) { +@@ -982,7 +952,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + goto err; + } + +- ret = sev_launch_start(sev); ++ sev_launch_start(SEV_GUEST(sev_common)); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + goto err; +@@ -990,13 +960,12 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + + ram_block_notifier_add(&sev_ram_notifier); + qemu_add_machine_init_done_notifier(&sev_machine_done_notify); +- qemu_add_vm_change_state_handler(sev_vm_state_change, sev); ++ qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); + + cgs->ready = true; + + return 0; + err: +- sev_guest = NULL; + ram_block_discard_disable(false); + return -1; + } +@@ -1004,13 +973,15 @@ err: + int + sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) + { +- if (!sev_guest) { ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ if (!sev_common) { + return 0; + } + + /* if SEV is in update state then encrypt the data else do nothing */ +- if (sev_check_state(sev_guest, SEV_STATE_LAUNCH_UPDATE)) { +- int ret = sev_launch_update_data(sev_guest, ptr, len); ++ if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { ++ int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); + if (ret < 0) { + error_setg(errp, "SEV: Failed to encrypt pflash rom"); + return ret; +@@ -1030,16 +1001,17 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + void *hva; + gsize hdr_sz = 0, data_sz = 0; + MemoryRegion *mr = NULL; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + +- if (!sev_guest) { ++ if (!sev_common) { + error_setg(errp, "SEV not enabled for guest"); + return 1; + } + + /* secret can be injected only in this state */ +- if (!sev_check_state(sev_guest, SEV_STATE_LAUNCH_SECRET)) { ++ if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_SECRET)) { + error_setg(errp, "SEV: Not in correct state. (LSECRET) %x", +- sev_guest->state); ++ sev_common->state); + return 1; + } + +@@ -1073,7 +1045,7 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + trace_kvm_sev_launch_secret(gpa, input.guest_uaddr, + input.trans_uaddr, input.trans_len); + +- ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_LAUNCH_SECRET, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_SECRET, + &input, &error); + if (ret) { + error_setg(errp, "SEV: failed to inject secret ret=%d fw_error=%d '%s'", +@@ -1180,9 +1152,10 @@ void sev_es_set_reset_vector(CPUState *cpu) + { + X86CPU *x86; + CPUX86State *env; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + /* Only update if we have valid reset information */ +- if (!sev_guest || !sev_guest->reset_data_valid) { ++ if (!sev_common || !sev_common->reset_data_valid) { + return; + } + +@@ -1194,11 +1167,11 @@ void sev_es_set_reset_vector(CPUState *cpu) + x86 = X86_CPU(cpu); + env = &x86->env; + +- cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff, ++ cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | + DESC_R_MASK | DESC_A_MASK); + +- env->eip = sev_guest->reset_ip; ++ env->eip = sev_common->reset_ip; + } + + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) +@@ -1206,6 +1179,7 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) + CPUState *cpu; + uint32_t addr; + int ret; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + if (!sev_es_enabled()) { + return 0; +@@ -1219,9 +1193,9 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) + } + + if (addr) { +- sev_guest->reset_cs = addr & 0xffff0000; +- sev_guest->reset_ip = addr & 0x0000ffff; +- sev_guest->reset_data_valid = true; ++ sev_common->reset_cs = addr & 0xffff0000; ++ sev_common->reset_ip = addr & 0x0000ffff; ++ sev_common->reset_data_valid = true; + + CPU_FOREACH(cpu) { + sev_es_set_reset_vector(cpu); +@@ -1267,12 +1241,13 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ +- if (!sev_guest->kernel_hashes) { ++ if (!sev_common->kernel_hashes) { + return false; + } + +@@ -1363,8 +1338,30 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return ret; + } + ++static char * ++sev_common_get_sev_device(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_COMMON(obj)->sev_device); ++} ++ + static void +-sev_guest_class_init(ObjectClass *oc, void *data) ++sev_common_set_sev_device(Object *obj, const char *value, Error **errp) ++{ ++ SEV_COMMON(obj)->sev_device = g_strdup(value); ++} ++ ++static bool sev_common_get_kernel_hashes(Object *obj, Error **errp) ++{ ++ return SEV_COMMON(obj)->kernel_hashes; ++} ++ ++static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) ++{ ++ SEV_COMMON(obj)->kernel_hashes = value; ++} ++ ++static void ++sev_common_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); +@@ -1373,10 +1370,87 @@ sev_guest_class_init(ObjectClass *oc, void *data) + x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", +- sev_guest_get_sev_device, +- sev_guest_set_sev_device); ++ sev_common_get_sev_device, ++ sev_common_set_sev_device); + object_class_property_set_description(oc, "sev-device", + "SEV device to use"); ++ object_class_property_add_bool(oc, "kernel-hashes", ++ sev_common_get_kernel_hashes, ++ sev_common_set_kernel_hashes); ++ object_class_property_set_description(oc, "kernel-hashes", ++ "add kernel hashes to guest firmware for measured Linux boot"); ++} ++ ++static void ++sev_common_instance_init(Object *obj) ++{ ++ SevCommonState *sev_common = SEV_COMMON(obj); ++ ++ sev_common->kvm_type = -1; ++ ++ sev_common->sev_device = g_strdup(DEFAULT_SEV_DEVICE); ++ ++ object_property_add_uint32_ptr(obj, "cbitpos", &sev_common->cbitpos, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "reduced-phys-bits", ++ &sev_common->reduced_phys_bits, ++ OBJ_PROP_FLAG_READWRITE); ++} ++ ++/* sev guest info common to sev/sev-es/sev-snp */ ++static const TypeInfo sev_common_info = { ++ .parent = TYPE_X86_CONFIDENTIAL_GUEST, ++ .name = TYPE_SEV_COMMON, ++ .instance_size = sizeof(SevCommonState), ++ .instance_init = sev_common_instance_init, ++ .class_size = sizeof(SevCommonStateClass), ++ .class_init = sev_common_class_init, ++ .abstract = true, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static char * ++sev_guest_get_dh_cert_file(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_GUEST(obj)->dh_cert_file); ++} ++ ++static void ++sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) ++{ ++ SEV_GUEST(obj)->dh_cert_file = g_strdup(value); ++} ++ ++static char * ++sev_guest_get_session_file(Object *obj, Error **errp) ++{ ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ ++ return sev_guest->session_file ? g_strdup(sev_guest->session_file) : NULL; ++} ++ ++static void ++sev_guest_set_session_file(Object *obj, const char *value, Error **errp) ++{ ++ SEV_GUEST(obj)->session_file = g_strdup(value); ++} ++ ++static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++{ ++ return SEV_GUEST(obj)->legacy_vm_type; ++} ++ ++static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++{ ++ SEV_GUEST(obj)->legacy_vm_type = value; ++} ++ ++static void ++sev_guest_class_init(ObjectClass *oc, void *data) ++{ + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); +@@ -1387,11 +1461,6 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "kernel-hashes", +- sev_guest_get_kernel_hashes, +- sev_guest_set_kernel_hashes); +- object_class_property_set_description(oc, "kernel-hashes", +- "add kernel hashes to guest firmware for measured Linux boot"); + object_class_property_add_bool(oc, "legacy-vm-type", + sev_guest_get_legacy_vm_type, + sev_guest_set_legacy_vm_type); +@@ -1402,41 +1471,29 @@ sev_guest_class_init(ObjectClass *oc, void *data) + static void + sev_guest_instance_init(Object *obj) + { +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->kvm_type = -1; ++ SevGuestState *sev_guest = SEV_GUEST(obj); + +- sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); +- sev->policy = DEFAULT_GUEST_POLICY; +- object_property_add_uint32_ptr(obj, "policy", &sev->policy, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "handle", &sev->handle, ++ sev_guest->policy = DEFAULT_GUEST_POLICY; ++ object_property_add_uint32_ptr(obj, "handle", &sev_guest->handle, + OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "reduced-phys-bits", +- &sev->reduced_phys_bits, ++ object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, + OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); + } + +-/* sev guest info */ ++/* guest info specific sev/sev-es */ + static const TypeInfo sev_guest_info = { +- .parent = TYPE_X86_CONFIDENTIAL_GUEST, ++ .parent = TYPE_SEV_COMMON, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), +- .instance_finalize = sev_guest_finalize, +- .class_init = sev_guest_class_init, + .instance_init = sev_guest_instance_init, +- .interfaces = (InterfaceInfo[]) { +- { TYPE_USER_CREATABLE }, +- { } +- } ++ .class_init = sev_guest_class_init, + }; + + static void + sev_register_types(void) + { ++ type_register_static(&sev_common_info); + type_register_static(&sev_guest_info); + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 9e10d09539..668374eef3 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -20,6 +20,9 @@ + + #include "exec/confidential-guest-support.h" + ++#define TYPE_SEV_COMMON "sev-common" ++#define TYPE_SEV_GUEST "sev-guest" ++ + #define SEV_POLICY_NODBG 0x1 + #define SEV_POLICY_NOKS 0x2 + #define SEV_POLICY_ES 0x4 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Introduce-sev-snp-guest-object.patch b/SOURCES/kvm-i386-sev-Introduce-sev-snp-guest-object.patch new file mode 100644 index 0000000..b347bf6 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Introduce-sev-snp-guest-object.patch @@ -0,0 +1,530 @@ +From 900859fd3445b9a71f1a9a8befda17f0c33f3923 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:19 -0500 +Subject: [PATCH 059/100] i386/sev: Introduce 'sev-snp-guest' object + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [59/91] 3e585113d209176c2b97ad5e4fe943f19dfdcaeb (bonzini/rhel-qemu-kvm) + +SEV-SNP support relies on a different set of properties/state than the +existing 'sev-guest' object. This patch introduces the 'sev-snp-guest' +object, which can be used to configure an SEV-SNP guest. For example, +a default-configured SEV-SNP guest with no additional information +passed in for use with attestation: + + -object sev-snp-guest,id=sev0 + +or a fully-specified SEV-SNP guest where all spec-defined binary +blobs are passed in as base64-encoded strings: + + -object sev-snp-guest,id=sev0, \ + policy=0x30000, \ + init-flags=0, \ + id-block=YWFhYWFhYWFhYWFhYWFhCg==, \ + id-auth=CxHK/OKLkXGn/KpAC7Wl1FSiisWDbGTEKz..., \ + author-key-enabled=on, \ + host-data=LNkCWBRC5CcdGXirbNUV1OrsR28s..., \ + guest-visible-workarounds=AA==, \ + +See the QAPI schema updates included in this patch for more usage +details. + +In some cases these blobs may be up to 4096 characters, but this is +generally well below the default limit for linux hosts where +command-line sizes are defined by the sysconf-configurable ARG_MAX +value, which defaults to 2097152 characters for Ubuntu hosts, for +example. + +Signed-off-by: Brijesh Singh +Co-developed-by: Michael Roth +Acked-by: Markus Armbruster (for QAPI schema) +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-8-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7b34df44260b391e33bc3acf1ced30019d9aadf1) +Signed-off-by: Paolo Bonzini +--- + docs/system/i386/amd-memory-encryption.rst | 70 +++++- + qapi/qom.json | 58 +++++ + target/i386/sev.c | 253 +++++++++++++++++++++ + target/i386/sev.h | 1 + + 4 files changed, 380 insertions(+), 2 deletions(-) + +diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst +index e9bc142bc1..748f5094ba 100644 +--- a/docs/system/i386/amd-memory-encryption.rst ++++ b/docs/system/i386/amd-memory-encryption.rst +@@ -25,8 +25,8 @@ support for notifying a guest's operating system when certain types of VMEXITs + are about to occur. This allows the guest to selectively share information with + the hypervisor to satisfy the requested function. + +-Launching +---------- ++Launching (SEV and SEV-ES) ++-------------------------- + + Boot images (such as bios) must be encrypted before a guest can be booted. The + ``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ``LAUNCH_START``, +@@ -161,6 +161,72 @@ The value of GCTX.LD is + If kernel hashes are not used, or SEV-ES is disabled, use empty blobs for + ``kernel_hashes_blob`` and ``vmsas_blob`` as needed. + ++Launching (SEV-SNP) ++------------------- ++Boot images (such as bios) must be encrypted before a guest can be booted. The ++``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ++``SNP_LAUNCH_START``, ``SNP_LAUNCH_UPDATE``, and ``SNP_LAUNCH_FINISH``. These ++three commands communicate with SEV-SNP firmware to generate a fresh memory ++encryption key for the VM, encrypt the boot images for a successful launch. For ++more details on the SEV-SNP firmware interfaces used by these commands please ++see the SEV-SNP Firmware ABI. ++ ++``SNP_LAUNCH_START`` is called first to create a cryptographic launch context ++within the firmware. To create this context, the guest owner must provide a ++guest policy and other parameters as described in the SEV-SNP firmware ++specification. The launch parameters should be specified as described in the ++QAPI schema for the sev-snp-guest object. ++ ++The ``SNP_LAUNCH_START`` uses the following parameters, which can be configured ++by the corresponding parameters documented in the QAPI schema for the ++'sev-snp-guest' object. ++ +++--------+-------+----------+-------------------------------------------------+ ++| key | type | default | meaning | +++---------------------------+-------------------------------------------------+ ++| policy | hex | 0x30000 | a 64-bit guest policy | +++---------------------------+-------------------------------------------------+ ++| guest-visible-workarounds | string| 0 | 16-byte base64 encoded string| ++| | | | for guest OS visible | ++| | | | workarounds. | +++---------------------------+-------------------------------------------------+ ++ ++``SNP_LAUNCH_UPDATE`` encrypts the memory region using the cryptographic context ++created via the ``SNP_LAUNCH_START`` command. If required, this command can be ++called multiple times to encrypt different memory regions. The command also ++calculates the measurement of the memory contents as it encrypts. ++ ++``SNP_LAUNCH_FINISH`` finalizes the guest launch flow. Optionally, while ++finalizing the launch the firmware can perform checks on the launch digest ++computing through the ``SNP_LAUNCH_UPDATE``. To perform the check the user must ++supply the id block, authentication blob and host data that should be included ++in the attestation report. See the SEV-SNP spec for further details. ++ ++The ``SNP_LAUNCH_FINISH`` uses the following parameters, which can be configured ++by the corresponding parameters documented in the QAPI schema for the ++'sev-snp-guest' object. ++ +++--------------------+-------+----------+-------------------------------------+ ++| key | type | default | meaning | +++--------------------+-------+----------+-------------------------------------+ ++| id-block | string| none | base64 encoded ID block | +++--------------------+-------+----------+-------------------------------------+ ++| id-auth | string| none | base64 encoded authentication | ++| | | | information | +++--------------------+-------+----------+-------------------------------------+ ++| author-key-enabled | bool | 0 | auth block contains author key | +++--------------------+-------+----------+-------------------------------------+ ++| host_data | string| none | host provided data | +++--------------------+-------+----------+-------------------------------------+ ++ ++To launch a SEV-SNP guest (additional parameters are documented in the QAPI ++schema for the 'sev-snp-guest' object):: ++ ++ # ${QEMU} \ ++ -machine ...,confidential-guest-support=sev0 \ ++ -object sev-snp-guest,id=sev0,cbitpos=51,reduced-phys-bits=1 ++ ++ + Debugging + --------- + +diff --git a/qapi/qom.json b/qapi/qom.json +index 056b38f491..8bd299265e 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -929,6 +929,62 @@ + '*handle': 'uint32', + '*legacy-vm-type': 'bool' } } + ++## ++# @SevSnpGuestProperties: ++# ++# Properties for sev-snp-guest objects. Most of these are direct ++# arguments for the KVM_SNP_* interfaces documented in the Linux ++# kernel source under ++# Documentation/arch/x86/amd-memory-encryption.rst, which are in turn ++# closely coupled with the SNP_INIT/SNP_LAUNCH_* firmware commands ++# documented in the SEV-SNP Firmware ABI Specification (Rev 0.9). ++# ++# More usage information is also available in the QEMU source tree ++# under docs/amd-memory-encryption. ++# ++# @policy: the 'POLICY' parameter to the SNP_LAUNCH_START command, as ++# defined in the SEV-SNP firmware ABI (default: 0x30000) ++# ++# @guest-visible-workarounds: 16-byte, base64-encoded blob to report ++# hypervisor-defined workarounds, corresponding to the 'GOSVW' ++# parameter of the SNP_LAUNCH_START command defined in the SEV-SNP ++# firmware ABI (default: all-zero) ++# ++# @id-block: 96-byte, base64-encoded blob to provide the 'ID Block' ++# structure for the SNP_LAUNCH_FINISH command defined in the ++# SEV-SNP firmware ABI (default: all-zero) ++# ++# @id-auth: 4096-byte, base64-encoded blob to provide the 'ID ++# Authentication Information Structure' for the SNP_LAUNCH_FINISH ++# command defined in the SEV-SNP firmware ABI (default: all-zero) ++# ++# @author-key-enabled: true if 'id-auth' blob contains the 'AUTHOR_KEY' ++# field defined SEV-SNP firmware ABI (default: false) ++# ++# @host-data: 32-byte, base64-encoded, user-defined blob to provide to ++# the guest, as documented for the 'HOST_DATA' parameter of the ++# SNP_LAUNCH_FINISH command in the SEV-SNP firmware ABI (default: ++# all-zero) ++# ++# @vcek-disabled: Guests are by default allowed to choose between VLEK ++# (Versioned Loaded Endorsement Key) or VCEK (Versioned Chip ++# Endorsement Key) when requesting attestation reports from ++# firmware. Set this to true to disable the use of VCEK. ++# (default: false) (since: 9.1) ++# ++# Since: 9.1 ++## ++{ 'struct': 'SevSnpGuestProperties', ++ 'base': 'SevCommonProperties', ++ 'data': { ++ '*policy': 'uint64', ++ '*guest-visible-workarounds': 'str', ++ '*id-block': 'str', ++ '*id-auth': 'str', ++ '*author-key-enabled': 'bool', ++ '*host-data': 'str', ++ '*vcek-disabled': 'bool' } } ++ + ## + # @ThreadContextProperties: + # +@@ -1007,6 +1063,7 @@ + { 'name': 'secret_keyring', + 'if': 'CONFIG_SECRET_KEYRING' }, + 'sev-guest', ++ 'sev-snp-guest', + 'thread-context', + 's390-pv-guest', + 'throttle-group', +@@ -1077,6 +1134,7 @@ + 'secret_keyring': { 'type': 'SecretKeyringProperties', + 'if': 'CONFIG_SECRET_KEYRING' }, + 'sev-guest': 'SevGuestProperties', ++ 'sev-snp-guest': 'SevSnpGuestProperties', + 'thread-context': 'ThreadContextProperties', + 'throttle-group': 'ThrottleGroupProperties', + 'tls-creds-anon': 'TlsCredsAnonProperties', +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 28a018ed83..a81b3228d4 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -42,6 +42,7 @@ + + OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) ++OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) + + struct SevCommonState { + X86ConfidentialGuest parent_obj; +@@ -96,8 +97,22 @@ struct SevGuestState { + bool legacy_vm_type; + }; + ++struct SevSnpGuestState { ++ SevCommonState parent_obj; ++ ++ /* configuration parameters */ ++ char *guest_visible_workarounds; ++ char *id_block; ++ char *id_auth; ++ char *host_data; ++ ++ struct kvm_sev_snp_launch_start kvm_start_conf; ++ struct kvm_sev_snp_launch_finish kvm_finish_conf; ++}; ++ + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ + #define DEFAULT_SEV_DEVICE "/dev/sev" ++#define DEFAULT_SEV_SNP_POLICY 0x30000 + + #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" + typedef struct __attribute__((__packed__)) SevInfoBlock { +@@ -1500,11 +1515,249 @@ static const TypeInfo sev_guest_info = { + .class_init = sev_guest_class_init, + }; + ++static void ++sev_snp_guest_get_policy(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ visit_type_uint64(v, name, ++ (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, ++ errp); ++} ++ ++static void ++sev_snp_guest_set_policy(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ visit_type_uint64(v, name, ++ (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, ++ errp); ++} ++ ++static char * ++sev_snp_guest_get_guest_visible_workarounds(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_SNP_GUEST(obj)->guest_visible_workarounds); ++} ++ ++static void ++sev_snp_guest_set_guest_visible_workarounds(Object *obj, const char *value, ++ Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; ++ g_autofree guchar *blob; ++ gsize len; ++ ++ g_free(sev_snp_guest->guest_visible_workarounds); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->guest_visible_workarounds = g_strdup(value); ++ ++ blob = qbase64_decode(sev_snp_guest->guest_visible_workarounds, ++ -1, &len, errp); ++ if (!blob) { ++ return; ++ } ++ ++ if (len != sizeof(start->gosvw)) { ++ error_setg(errp, "parameter length of %lu exceeds max of %lu", ++ len, sizeof(start->gosvw)); ++ return; ++ } ++ ++ memcpy(start->gosvw, blob, len); ++} ++ ++static char * ++sev_snp_guest_get_id_block(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->id_block); ++} ++ ++static void ++sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ gsize len; ++ ++ g_free(sev_snp_guest->id_block); ++ g_free((guchar *)finish->id_block_uaddr); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->id_block = g_strdup(value); ++ ++ finish->id_block_uaddr = ++ (uint64_t)qbase64_decode(sev_snp_guest->id_block, -1, &len, errp); ++ ++ if (!finish->id_block_uaddr) { ++ return; ++ } ++ ++ if (len != KVM_SEV_SNP_ID_BLOCK_SIZE) { ++ error_setg(errp, "parameter length of %lu not equal to %u", ++ len, KVM_SEV_SNP_ID_BLOCK_SIZE); ++ return; ++ } ++ ++ finish->id_block_en = (len) ? 1 : 0; ++} ++ ++static char * ++sev_snp_guest_get_id_auth(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->id_auth); ++} ++ ++static void ++sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ gsize len; ++ ++ g_free(sev_snp_guest->id_auth); ++ g_free((guchar *)finish->id_auth_uaddr); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->id_auth = g_strdup(value); ++ ++ finish->id_auth_uaddr = ++ (uint64_t)qbase64_decode(sev_snp_guest->id_auth, -1, &len, errp); ++ ++ if (!finish->id_auth_uaddr) { ++ return; ++ } ++ ++ if (len > KVM_SEV_SNP_ID_AUTH_SIZE) { ++ error_setg(errp, "parameter length:ID_AUTH %lu exceeds max of %u", ++ len, KVM_SEV_SNP_ID_AUTH_SIZE); ++ return; ++ } ++} ++ ++static bool ++sev_snp_guest_get_author_key_enabled(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return !!sev_snp_guest->kvm_finish_conf.auth_key_en; ++} ++ ++static void ++sev_snp_guest_set_author_key_enabled(Object *obj, bool value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ sev_snp_guest->kvm_finish_conf.auth_key_en = value; ++} ++ ++static bool ++sev_snp_guest_get_vcek_disabled(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return !!sev_snp_guest->kvm_finish_conf.vcek_disabled; ++} ++ ++static void ++sev_snp_guest_set_vcek_disabled(Object *obj, bool value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ sev_snp_guest->kvm_finish_conf.vcek_disabled = value; ++} ++ ++static char * ++sev_snp_guest_get_host_data(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->host_data); ++} ++ ++static void ++sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ g_autofree guchar *blob; ++ gsize len; ++ ++ g_free(sev_snp_guest->host_data); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->host_data = g_strdup(value); ++ ++ blob = qbase64_decode(sev_snp_guest->host_data, -1, &len, errp); ++ ++ if (!blob) { ++ return; ++ } ++ ++ if (len != sizeof(finish->host_data)) { ++ error_setg(errp, "parameter length of %lu not equal to %lu", ++ len, sizeof(finish->host_data)); ++ return; ++ } ++ ++ memcpy(finish->host_data, blob, len); ++} ++ ++static void ++sev_snp_guest_class_init(ObjectClass *oc, void *data) ++{ ++ object_class_property_add(oc, "policy", "uint64", ++ sev_snp_guest_get_policy, ++ sev_snp_guest_set_policy, NULL, NULL); ++ object_class_property_add_str(oc, "guest-visible-workarounds", ++ sev_snp_guest_get_guest_visible_workarounds, ++ sev_snp_guest_set_guest_visible_workarounds); ++ object_class_property_add_str(oc, "id-block", ++ sev_snp_guest_get_id_block, ++ sev_snp_guest_set_id_block); ++ object_class_property_add_str(oc, "id-auth", ++ sev_snp_guest_get_id_auth, ++ sev_snp_guest_set_id_auth); ++ object_class_property_add_bool(oc, "author-key-enabled", ++ sev_snp_guest_get_author_key_enabled, ++ sev_snp_guest_set_author_key_enabled); ++ object_class_property_add_bool(oc, "vcek-required", ++ sev_snp_guest_get_vcek_disabled, ++ sev_snp_guest_set_vcek_disabled); ++ object_class_property_add_str(oc, "host-data", ++ sev_snp_guest_get_host_data, ++ sev_snp_guest_set_host_data); ++} ++ ++static void ++sev_snp_guest_instance_init(Object *obj) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ /* default init/start/finish params for kvm */ ++ sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; ++} ++ ++/* guest info specific to sev-snp */ ++static const TypeInfo sev_snp_guest_info = { ++ .parent = TYPE_SEV_COMMON, ++ .name = TYPE_SEV_SNP_GUEST, ++ .instance_size = sizeof(SevSnpGuestState), ++ .class_init = sev_snp_guest_class_init, ++ .instance_init = sev_snp_guest_instance_init, ++}; ++ + static void + sev_register_types(void) + { + type_register_static(&sev_common_info); + type_register_static(&sev_guest_info); ++ type_register_static(&sev_snp_guest_info); + } + + type_init(sev_register_types); +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 668374eef3..bedc667eeb 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -22,6 +22,7 @@ + + #define TYPE_SEV_COMMON "sev-common" + #define TYPE_SEV_GUEST "sev-guest" ++#define TYPE_SEV_SNP_GUEST "sev-snp-guest" + + #define SEV_POLICY_NODBG 0x1 + #define SEV_POLICY_NOKS 0x2 +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch new file mode 100644 index 0000000..265da66 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch @@ -0,0 +1,85 @@ +From be37914ae54c8aebc218cf41b37bc0ea1563daae Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 12:51:44 +0200 +Subject: [PATCH 074/100] i386/sev: Invoke launch_updata_data() for SEV class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [74/91] f1b588a9ffecd6944a78186d88a6be3849698710 (bonzini/rhel-qemu-kvm) + +Add launch_update_data() in SevCommonStateClass and +invoke as sev_launch_update_data() for SEV object. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-26-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9861405a8f845133b7984322c2df0c43a45553c3) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 7b5c4b4874..8834cf9441 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -74,6 +74,7 @@ struct SevCommonStateClass { + /* public */ + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); ++ int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + }; + +@@ -929,7 +930,7 @@ out: + } + + static int +-sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) ++sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, uint8_t *addr, uint64_t len) + { + int ret, fw_error; + struct kvm_sev_launch_update_data update; +@@ -941,7 +942,7 @@ sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + update.uaddr = (uintptr_t)addr; + update.len = len; + trace_kvm_sev_launch_update_data(addr, len); +- ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + &update, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", +@@ -1487,6 +1488,7 @@ int + sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + if (!sev_common) { + return 0; +@@ -1494,7 +1496,9 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + + /* if SEV is in update state then encrypt the data else do nothing */ + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { +- int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); ++ int ret; ++ ++ ret = klass->launch_update_data(sev_common, gpa, ptr, len); + if (ret < 0) { + error_setg(errp, "SEV: Failed to encrypt pflash rom"); + return ret; +@@ -1968,6 +1972,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; ++ klass->launch_update_data = sev_launch_update_data; + klass->kvm_init = sev_kvm_init; + x86_klass->kvm_type = sev_kvm_type; + +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch new file mode 100644 index 0000000..f28004d --- /dev/null +++ b/SOURCES/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch @@ -0,0 +1,55 @@ +From 32899eb4fa5143b795b107de4857adce2cf1d434 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:38 -0500 +Subject: [PATCH 075/100] i386/sev: Invoke launch_updata_data() for SNP class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [75/91] 3520af5847f8dddb6d7fe7ad5feb308230f387b9 (bonzini/rhel-qemu-kvm) + +Invoke as sev_snp_launch_update_data() for SNP object. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-27-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0765d136eba400ad1cb7cae18438bb10eace64dc) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 8834cf9441..eaf5fc6c6b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1091,6 +1091,15 @@ snp_launch_update_data(uint64_t gpa, void *hva, + return 0; + } + ++static int ++sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, ++ uint8_t *ptr, uint64_t len) ++{ ++ int ret = snp_launch_update_data(gpa, ptr, len, ++ KVM_SEV_SNP_PAGE_TYPE_NORMAL); ++ return ret; ++} ++ + static int + sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, + const KvmCpuidInfo *kvm_cpuid_info) +@@ -2216,6 +2225,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; ++ klass->launch_update_data = sev_snp_launch_update_data; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch b/SOURCES/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch new file mode 100644 index 0000000..e38615b --- /dev/null +++ b/SOURCES/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch @@ -0,0 +1,47 @@ +From fa6076291eb45255bc2fe523399d7d0647fc5570 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:10 -0500 +Subject: [PATCH 085/100] i386/sev: Move SEV_COMMON null check before + dereferencing + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [85/91] e8d2bfd077766a5e7777b9337d0e77146f883224 (bonzini/rhel-qemu-kvm) + +Fixes Coverity CID 1546886. + +Fixes: 9861405a8f ("i386/sev: Invoke launch_updata_data() for SEV class") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-3-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 48779faef3c8e2fe70bd8285bffa731bd76dc844) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 7c9df621de..f18432f58e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1529,11 +1529,12 @@ int + sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); ++ SevCommonStateClass *klass; + + if (!sev_common) { + return 0; + } ++ klass = SEV_COMMON_GET_CLASS(sev_common); + + /* if SEV is in update state then encrypt the data else do nothing */ + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch b/SOURCES/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch new file mode 100644 index 0000000..250a723 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch @@ -0,0 +1,88 @@ +From 4d96ca893126d4c17c9fe03c76973b1d4a414f21 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:18 -0500 +Subject: [PATCH 058/100] i386/sev: Move sev_launch_finish to separate class + method + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [58/91] 7865710d320a6df7038ef7016d350aa9cdcea326 (bonzini/rhel-qemu-kvm) + +When sev-snp-guest objects are introduced there will be a number of +differences in how the launch finish is handled compared to the existing +sev-guest object. Move sev_launch_finish() to a class method to make it +easier to implement SNP-specific launch update functionality later. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-7-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit bce615a14aec07cab0488e5a242f6a91e641efcb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b2aa0d6f99..28a018ed83 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -71,6 +71,7 @@ struct SevCommonStateClass { + + /* public */ + int (*launch_start)(SevCommonState *sev_common); ++ void (*launch_finish)(SevCommonState *sev_common); + }; + + /** +@@ -801,12 +802,12 @@ static Notifier sev_machine_done_notify = { + }; + + static void +-sev_launch_finish(SevGuestState *sev_guest) ++sev_launch_finish(SevCommonState *sev_common) + { + int ret, error; + + trace_kvm_sev_launch_finish(); +- ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, + &error); + if (ret) { + error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", +@@ -814,7 +815,7 @@ sev_launch_finish(SevGuestState *sev_guest) + exit(1); + } + +- sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); ++ sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, +@@ -826,10 +827,11 @@ static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { + SevCommonState *sev_common = opaque; ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(opaque); + + if (running) { + if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { +- sev_launch_finish(SEV_GUEST(sev_common)); ++ klass->launch_finish(sev_common); + } + } + } +@@ -1457,6 +1459,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + + klass->launch_start = sev_launch_start; ++ klass->launch_finish = sev_launch_finish; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch b/SOURCES/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch new file mode 100644 index 0000000..12824ec --- /dev/null +++ b/SOURCES/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch @@ -0,0 +1,91 @@ +From a170ba2c7dbf2775eb9047779d3643a2a81bb372 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:17 -0500 +Subject: [PATCH 057/100] i386/sev: Move sev_launch_update to separate class + method + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [57/91] 4f31e7afaec6f2c2a7c06cda4d7d27d4037e53e0 (bonzini/rhel-qemu-kvm) + +When sev-snp-guest objects are introduced there will be a number of +differences in how the launch data is handled compared to the existing +sev-guest object. Move sev_launch_start() to a class method to make it +easier to implement SNP-specific launch update functionality later. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-6-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6600f1ac0c81cbe67faf048ea07f78542dea925f) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 33e606eea0..b2aa0d6f99 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -69,6 +69,8 @@ struct SevCommonState { + struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + ++ /* public */ ++ int (*launch_start)(SevCommonState *sev_common); + }; + + /** +@@ -632,16 +634,16 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + } + + static int +-sev_launch_start(SevGuestState *sev_guest) ++sev_launch_start(SevCommonState *sev_common) + { + gsize sz; + int ret = 1; + int fw_error, rc; ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + struct kvm_sev_launch_start start = { + .handle = sev_guest->handle, .policy = sev_guest->policy + }; + guchar *session = NULL, *dh_cert = NULL; +- SevCommonState *sev_common = SEV_COMMON(sev_guest); + + if (sev_guest->session_file) { + if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { +@@ -862,6 +864,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + + ret = ram_block_discard_disable(true); + if (ret) { +@@ -952,7 +955,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + goto err; + } + +- sev_launch_start(SEV_GUEST(sev_common)); ++ ret = klass->launch_start(sev_common); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + goto err; +@@ -1451,6 +1454,10 @@ static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) + static void + sev_guest_class_init(ObjectClass *oc, void *data) + { ++ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ ++ klass->launch_start = sev_launch_start; ++ + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Reorder-struct-declarations.patch b/SOURCES/kvm-i386-sev-Reorder-struct-declarations.patch new file mode 100644 index 0000000..746317d --- /dev/null +++ b/SOURCES/kvm-i386-sev-Reorder-struct-declarations.patch @@ -0,0 +1,134 @@ +From d009fa2cebebd1da80f4f2f5d0c4fffb87e02afc Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:34 -0500 +Subject: [PATCH 079/100] i386/sev: Reorder struct declarations + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [79/91] 1274d4620e88dda99ec10173ca5e3cd4184c8fb6 (bonzini/rhel-qemu-kvm) + +Move the declaration of PaddedSevHashTable before SevSnpGuest so +we can add a new such field to the latter. + +No functional change intended. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-23-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cc483bf911931f405dea682c74a3d8b9b6c54369) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 84 +++++++++++++++++++++++------------------------ + 1 file changed, 42 insertions(+), 42 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 73f9406715..3fce4c08eb 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -46,6 +46,48 @@ OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) + OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) + ++/* hard code sha256 digest size */ ++#define HASH_SIZE 32 ++ ++typedef struct QEMU_PACKED SevHashTableEntry { ++ QemuUUID guid; ++ uint16_t len; ++ uint8_t hash[HASH_SIZE]; ++} SevHashTableEntry; ++ ++typedef struct QEMU_PACKED SevHashTable { ++ QemuUUID guid; ++ uint16_t len; ++ SevHashTableEntry cmdline; ++ SevHashTableEntry initrd; ++ SevHashTableEntry kernel; ++} SevHashTable; ++ ++/* ++ * Data encrypted by sev_encrypt_flash() must be padded to a multiple of ++ * 16 bytes. ++ */ ++typedef struct QEMU_PACKED PaddedSevHashTable { ++ SevHashTable ht; ++ uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; ++} PaddedSevHashTable; ++ ++QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); ++ ++#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" ++typedef struct __attribute__((__packed__)) SevInfoBlock { ++ /* SEV-ES Reset Vector Address */ ++ uint32_t reset_addr; ++} SevInfoBlock; ++ ++#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" ++typedef struct QEMU_PACKED SevHashTableDescriptor { ++ /* SEV hash table area guest address */ ++ uint32_t base; ++ /* SEV hash table area size (in bytes) */ ++ uint32_t size; ++} SevHashTableDescriptor; ++ + struct SevCommonState { + X86ConfidentialGuest parent_obj; + +@@ -128,48 +170,6 @@ typedef struct SevLaunchUpdateData { + + static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; + +-#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" +-typedef struct __attribute__((__packed__)) SevInfoBlock { +- /* SEV-ES Reset Vector Address */ +- uint32_t reset_addr; +-} SevInfoBlock; +- +-#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" +-typedef struct QEMU_PACKED SevHashTableDescriptor { +- /* SEV hash table area guest address */ +- uint32_t base; +- /* SEV hash table area size (in bytes) */ +- uint32_t size; +-} SevHashTableDescriptor; +- +-/* hard code sha256 digest size */ +-#define HASH_SIZE 32 +- +-typedef struct QEMU_PACKED SevHashTableEntry { +- QemuUUID guid; +- uint16_t len; +- uint8_t hash[HASH_SIZE]; +-} SevHashTableEntry; +- +-typedef struct QEMU_PACKED SevHashTable { +- QemuUUID guid; +- uint16_t len; +- SevHashTableEntry cmdline; +- SevHashTableEntry initrd; +- SevHashTableEntry kernel; +-} SevHashTable; +- +-/* +- * Data encrypted by sev_encrypt_flash() must be padded to a multiple of +- * 16 bytes. +- */ +-typedef struct QEMU_PACKED PaddedSevHashTable { +- SevHashTable ht; +- uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; +-} PaddedSevHashTable; +- +-QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); +- + static Error *sev_mig_blocker; + + static const char *const sev_fw_errlist[] = { +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Replace-error_report-with-error_setg.patch b/SOURCES/kvm-i386-sev-Replace-error_report-with-error_setg.patch new file mode 100644 index 0000000..ba66cde --- /dev/null +++ b/SOURCES/kvm-i386-sev-Replace-error_report-with-error_setg.patch @@ -0,0 +1,46 @@ +From 80c1d78e31b2567d1c610c8939b75d159ff6ea27 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:13 -0500 +Subject: [PATCH 055/100] i386/sev: Replace error_report with error_setg + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [55/91] 1e15fc2458687e564af9fa5022c29e79ddc8edfd (bonzini/rhel-qemu-kvm) + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-2-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 18c453409a3a84cf7b2c764c5a03fb429a73bbeb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index d30b68c11e..67ed32e5ea 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -952,13 +952,13 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + + if (sev_es_enabled()) { + if (!kvm_kernel_irqchip_allowed()) { +- error_report("%s: SEV-ES guests require in-kernel irqchip support", +- __func__); ++ error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" ++ "support", __func__); + goto err; + } + + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { +- error_report("%s: guest policy requires SEV-ES, but " ++ error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", + __func__); + goto err; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Return-when-sev_common-is-null.patch b/SOURCES/kvm-i386-sev-Return-when-sev_common-is-null.patch new file mode 100644 index 0000000..6fc68aa --- /dev/null +++ b/SOURCES/kvm-i386-sev-Return-when-sev_common-is-null.patch @@ -0,0 +1,40 @@ +From 88da6d01b1de2b92adb5c47c6d482876a054705f Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:11 -0500 +Subject: [PATCH 086/100] i386/sev: Return when sev_common is null + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [86/91] 02ce4a6a51ce9fd961f417c13db0a760673591ba (bonzini/rhel-qemu-kvm) + +Fixes Coverity CID 1546885. + +Fixes: 16dcf200dc ("i386/sev: Introduce "sev-common" type to encapsulate common SEV state") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-4-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cd7093a7a168a823d07671348996f049d45e8f67) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index f18432f58e..c40562dce3 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -587,6 +587,7 @@ static SevCapability *sev_get_capabilities(Error **errp) + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (!sev_common) { + error_setg(errp, "SEV is not configured"); ++ return NULL; + } + + sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch b/SOURCES/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch new file mode 100644 index 0000000..8548e22 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch @@ -0,0 +1,47 @@ +From c7649ac1b958dc48de50f32b1ad80d84b17945a8 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:29 -0500 +Subject: [PATCH 069/100] i386/sev: Set CPU state to protected once SNP guest + payload is finalized + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [69/91] 09280f987a186511ec7d62c3f340b2148e8556d7 (bonzini/rhel-qemu-kvm) + +Once KVM_SNP_LAUNCH_FINISH is called the vCPU state is copied into the +vCPU's VMSA page and measured/encrypted. Any attempt to read/write CPU +state afterward will only be acting on the initial data and so are +effectively no-ops. + +Set the vCPU state to protected at this point so that QEMU don't +continue trying to re-sync vCPU data during guest runtime. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-18-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3d44fdff60ea66fbd7a33f5d32b50843cd80f48a) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index ef2e592ca7..e84e4395a5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -997,6 +997,7 @@ sev_snp_launch_finish(SevCommonState *sev_common) + exit(1); + } + ++ kvm_mark_guest_state_protected(); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch b/SOURCES/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch new file mode 100644 index 0000000..05ccb0a --- /dev/null +++ b/SOURCES/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch @@ -0,0 +1,268 @@ +From 5540bb5ca052531563df1ade68995e268ae65224 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:36 -0500 +Subject: [PATCH 012/100] i386/sev: Switch to use confidential_guest_kvm_init() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [12/91] 6f5f8d1b818826f7ee4b6ae527963ef23c97f531 (bonzini/rhel-qemu-kvm) + +Use confidential_guest_kvm_init() instead of calling SEV +specific sev_kvm_init(). This allows the introduction of multiple +confidential-guest-support subclasses for different x86 vendors. + +As a bonus, stubs are not needed anymore since there is no +direct call from target/i386/kvm/kvm.c to SEV code. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 637c95b37b106c2eeba313e0abb38ec12e918a59) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 10 +-- + target/i386/kvm/meson.build | 2 - + target/i386/kvm/sev-stub.c | 21 ------ + target/i386/sev.c | 127 ++++++++++++++++++------------------ + target/i386/sev.h | 2 - + 5 files changed, 69 insertions(+), 93 deletions(-) + delete mode 100644 target/i386/kvm/sev-stub.c + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5f30b649a0..e271652620 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -2543,10 +2543,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + * mechanisms are supported in future (e.g. TDX), they'll need + * their own initialization either here or elsewhere. + */ +- ret = sev_kvm_init(ms->cgs, &local_err); +- if (ret < 0) { +- error_report_err(local_err); +- return ret; ++ if (ms->cgs) { ++ ret = confidential_guest_kvm_init(ms->cgs, &local_err); ++ if (ret < 0) { ++ error_report_err(local_err); ++ return ret; ++ } + } + + has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); +diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build +index 84d9143e60..e7850981e6 100644 +--- a/target/i386/kvm/meson.build ++++ b/target/i386/kvm/meson.build +@@ -7,8 +7,6 @@ i386_kvm_ss.add(files( + + i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) + +-i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) +- + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) + + i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) +diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c +deleted file mode 100644 +index 1be5341e8a..0000000000 +--- a/target/i386/kvm/sev-stub.c ++++ /dev/null +@@ -1,21 +0,0 @@ +-/* +- * QEMU SEV stub +- * +- * Copyright Advanced Micro Devices 2018 +- * +- * Authors: +- * Brijesh Singh +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#include "qemu/osdep.h" +-#include "sev.h" +- +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +-{ +- /* If we get here, cgs must be some non-SEV thing */ +- return 0; +-} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 72930ff0dc..b8f79d34d1 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -353,63 +353,6 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) + sev->kernel_hashes = value; + } + +-static void +-sev_guest_class_init(ObjectClass *oc, void *data) +-{ +- object_class_property_add_str(oc, "sev-device", +- sev_guest_get_sev_device, +- sev_guest_set_sev_device); +- object_class_property_set_description(oc, "sev-device", +- "SEV device to use"); +- object_class_property_add_str(oc, "dh-cert-file", +- sev_guest_get_dh_cert_file, +- sev_guest_set_dh_cert_file); +- object_class_property_set_description(oc, "dh-cert-file", +- "guest owners DH certificate (encoded with base64)"); +- object_class_property_add_str(oc, "session-file", +- sev_guest_get_session_file, +- sev_guest_set_session_file); +- object_class_property_set_description(oc, "session-file", +- "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "kernel-hashes", +- sev_guest_get_kernel_hashes, +- sev_guest_set_kernel_hashes); +- object_class_property_set_description(oc, "kernel-hashes", +- "add kernel hashes to guest firmware for measured Linux boot"); +-} +- +-static void +-sev_guest_instance_init(Object *obj) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); +- sev->policy = DEFAULT_GUEST_POLICY; +- object_property_add_uint32_ptr(obj, "policy", &sev->policy, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "handle", &sev->handle, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "reduced-phys-bits", +- &sev->reduced_phys_bits, +- OBJ_PROP_FLAG_READWRITE); +-} +- +-/* sev guest info */ +-static const TypeInfo sev_guest_info = { +- .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, +- .name = TYPE_SEV_GUEST, +- .instance_size = sizeof(SevGuestState), +- .instance_finalize = sev_guest_finalize, +- .class_init = sev_guest_class_init, +- .instance_init = sev_guest_instance_init, +- .interfaces = (InterfaceInfo[]) { +- { TYPE_USER_CREATABLE }, +- { } +- } +-}; +- + bool + sev_enabled(void) + { +@@ -906,20 +849,15 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevGuestState *sev +- = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); ++ SevGuestState *sev = SEV_GUEST(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; + +- if (!sev) { +- return 0; +- } +- + ret = ram_block_discard_disable(true); + if (ret) { + error_report("%s: cannot disable RAM discard", __func__); +@@ -1384,6 +1322,67 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return ret; + } + ++static void ++sev_guest_class_init(ObjectClass *oc, void *data) ++{ ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = sev_kvm_init; ++ ++ object_class_property_add_str(oc, "sev-device", ++ sev_guest_get_sev_device, ++ sev_guest_set_sev_device); ++ object_class_property_set_description(oc, "sev-device", ++ "SEV device to use"); ++ object_class_property_add_str(oc, "dh-cert-file", ++ sev_guest_get_dh_cert_file, ++ sev_guest_set_dh_cert_file); ++ object_class_property_set_description(oc, "dh-cert-file", ++ "guest owners DH certificate (encoded with base64)"); ++ object_class_property_add_str(oc, "session-file", ++ sev_guest_get_session_file, ++ sev_guest_set_session_file); ++ object_class_property_set_description(oc, "session-file", ++ "guest owners session parameters (encoded with base64)"); ++ object_class_property_add_bool(oc, "kernel-hashes", ++ sev_guest_get_kernel_hashes, ++ sev_guest_set_kernel_hashes); ++ object_class_property_set_description(oc, "kernel-hashes", ++ "add kernel hashes to guest firmware for measured Linux boot"); ++} ++ ++static void ++sev_guest_instance_init(Object *obj) ++{ ++ SevGuestState *sev = SEV_GUEST(obj); ++ ++ sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); ++ sev->policy = DEFAULT_GUEST_POLICY; ++ object_property_add_uint32_ptr(obj, "policy", &sev->policy, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "handle", &sev->handle, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "reduced-phys-bits", ++ &sev->reduced_phys_bits, ++ OBJ_PROP_FLAG_READWRITE); ++} ++ ++/* sev guest info */ ++static const TypeInfo sev_guest_info = { ++ .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, ++ .name = TYPE_SEV_GUEST, ++ .instance_size = sizeof(SevGuestState), ++ .instance_finalize = sev_guest_finalize, ++ .class_init = sev_guest_class_init, ++ .instance_init = sev_guest_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ + static void + sev_register_types(void) + { +diff --git a/target/i386/sev.h b/target/i386/sev.h +index e7499c95b1..9e10d09539 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -57,6 +57,4 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); + void sev_es_set_reset_vector(CPUState *cpu); + +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +- + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch b/SOURCES/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch new file mode 100644 index 0000000..27852d5 --- /dev/null +++ b/SOURCES/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch @@ -0,0 +1,240 @@ +From a870e7c31d9605baea4741d82521612b6164c99b Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:26 -0500 +Subject: [PATCH 066/100] i386/sev: Update query-sev QAPI format to handle + SEV-SNP + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [66/91] a19b3e226e857f3995176e7d2ef1ce2e4329a885 (bonzini/rhel-qemu-kvm) + +Most of the current 'query-sev' command is relevant to both legacy +SEV/SEV-ES guests and SEV-SNP guests, with 2 exceptions: + + - 'policy' is a 64-bit field for SEV-SNP, not 32-bit, and + the meaning of the bit positions has changed + - 'handle' is not relevant to SEV-SNP + +To address this, this patch adds a new 'sev-type' field that can be +used as a discriminator to select between SEV and SEV-SNP-specific +fields/formats without breaking compatibility for existing management +tools (so long as management tools that add support for launching +SEV-SNP guest update their handling of query-sev appropriately). + +The corresponding HMP command has also been fixed up similarly. + +Signed-off-by: Michael Roth +Co-developed-by:Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-15-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 59d3740cb4ac0f010ce35877572904f6297284b4) +Signed-off-by: Paolo Bonzini +--- + qapi/misc-target.json | 72 ++++++++++++++++++++++++++++++++++--------- + target/i386/sev.c | 55 +++++++++++++++++++++------------ + target/i386/sev.h | 3 ++ + 3 files changed, 96 insertions(+), 34 deletions(-) + +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 4e0a6492a9..2d7d4d89bd 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -47,6 +47,50 @@ + 'send-update', 'receive-update' ], + 'if': 'TARGET_I386' } + ++## ++# @SevGuestType: ++# ++# An enumeration indicating the type of SEV guest being run. ++# ++# @sev: The guest is a legacy SEV or SEV-ES guest. ++# ++# @sev-snp: The guest is an SEV-SNP guest. ++# ++# Since: 6.2 ++## ++{ 'enum': 'SevGuestType', ++ 'data': [ 'sev', 'sev-snp' ], ++ 'if': 'TARGET_I386' } ++ ++## ++# @SevGuestInfo: ++# ++# Information specific to legacy SEV/SEV-ES guests. ++# ++# @policy: SEV policy value ++# ++# @handle: SEV firmware handle ++# ++# Since: 2.12 ++## ++{ 'struct': 'SevGuestInfo', ++ 'data': { 'policy': 'uint32', ++ 'handle': 'uint32' }, ++ 'if': 'TARGET_I386' } ++ ++## ++# @SevSnpGuestInfo: ++# ++# Information specific to SEV-SNP guests. ++# ++# @snp-policy: SEV-SNP policy value ++# ++# Since: 9.1 ++## ++{ 'struct': 'SevSnpGuestInfo', ++ 'data': { 'snp-policy': 'uint64' }, ++ 'if': 'TARGET_I386' } ++ + ## + # @SevInfo: + # +@@ -60,25 +104,25 @@ + # + # @build-id: SEV FW build id + # +-# @policy: SEV policy value +-# + # @state: SEV guest state + # +-# @handle: SEV firmware handle ++# @sev-type: Type of SEV guest being run + # + # Since: 2.12 + ## +-{ 'struct': 'SevInfo', +- 'data': { 'enabled': 'bool', +- 'api-major': 'uint8', +- 'api-minor' : 'uint8', +- 'build-id' : 'uint8', +- 'policy' : 'uint32', +- 'state' : 'SevState', +- 'handle' : 'uint32' +- }, +- 'if': 'TARGET_I386' +-} ++{ 'union': 'SevInfo', ++ 'base': { 'enabled': 'bool', ++ 'api-major': 'uint8', ++ 'api-minor' : 'uint8', ++ 'build-id' : 'uint8', ++ 'state' : 'SevState', ++ 'sev-type' : 'SevGuestType' }, ++ 'discriminator': 'sev-type', ++ 'data': { ++ 'sev': 'SevGuestInfo', ++ 'sev-snp': 'SevSnpGuestInfo' }, ++ 'if': 'TARGET_I386' } ++ + + ## + # @query-sev: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 072cc4f853..43d1c48bd9 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -363,25 +363,27 @@ static SevInfo *sev_get_info(void) + { + SevInfo *info; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- SevGuestState *sev_guest = +- (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), +- TYPE_SEV_GUEST); + + info = g_new0(SevInfo, 1); + info->enabled = sev_enabled(); + + if (info->enabled) { +- if (sev_guest) { +- info->handle = sev_guest->handle; +- } + info->api_major = sev_common->api_major; + info->api_minor = sev_common->api_minor; + info->build_id = sev_common->build_id; + info->state = sev_common->state; +- /* we only report the lower 32-bits of policy for SNP, ok for now... */ +- info->policy = +- (uint32_t)object_property_get_uint(OBJECT(sev_common), +- "policy", NULL); ++ ++ if (sev_snp_enabled()) { ++ info->sev_type = SEV_GUEST_TYPE_SEV_SNP; ++ info->u.sev_snp.snp_policy = ++ object_property_get_uint(OBJECT(sev_common), "policy", NULL); ++ } else { ++ info->sev_type = SEV_GUEST_TYPE_SEV; ++ info->u.sev.handle = SEV_GUEST(sev_common)->handle; ++ info->u.sev.policy = ++ (uint32_t)object_property_get_uint(OBJECT(sev_common), ++ "policy", NULL); ++ } + } + + return info; +@@ -404,20 +406,33 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) + { + SevInfo *info = sev_get_info(); + +- if (info && info->enabled) { +- monitor_printf(mon, "handle: %d\n", info->handle); +- monitor_printf(mon, "state: %s\n", SevState_str(info->state)); +- monitor_printf(mon, "build: %d\n", info->build_id); +- monitor_printf(mon, "api version: %d.%d\n", +- info->api_major, info->api_minor); ++ if (!info || !info->enabled) { ++ monitor_printf(mon, "SEV is not enabled\n"); ++ goto out; ++ } ++ ++ monitor_printf(mon, "SEV type: %s\n", SevGuestType_str(info->sev_type)); ++ monitor_printf(mon, "state: %s\n", SevState_str(info->state)); ++ monitor_printf(mon, "build: %d\n", info->build_id); ++ monitor_printf(mon, "api version: %d.%d\n", info->api_major, ++ info->api_minor); ++ ++ if (sev_snp_enabled()) { + monitor_printf(mon, "debug: %s\n", +- info->policy & SEV_POLICY_NODBG ? "off" : "on"); +- monitor_printf(mon, "key-sharing: %s\n", +- info->policy & SEV_POLICY_NOKS ? "off" : "on"); ++ info->u.sev_snp.snp_policy & SEV_SNP_POLICY_DBG ? "on" ++ : "off"); ++ monitor_printf(mon, "SMT allowed: %s\n", ++ info->u.sev_snp.snp_policy & SEV_SNP_POLICY_SMT ? "on" ++ : "off"); + } else { +- monitor_printf(mon, "SEV is not enabled\n"); ++ monitor_printf(mon, "handle: %d\n", info->u.sev.handle); ++ monitor_printf(mon, "debug: %s\n", ++ info->u.sev.policy & SEV_POLICY_NODBG ? "off" : "on"); ++ monitor_printf(mon, "key-sharing: %s\n", ++ info->u.sev.policy & SEV_POLICY_NOKS ? "off" : "on"); + } + ++out: + qapi_free_SevInfo(info); + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 94295ee74f..5dc4767b1e 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -31,6 +31,9 @@ + #define SEV_POLICY_DOMAIN 0x10 + #define SEV_POLICY_SEV 0x20 + ++#define SEV_SNP_POLICY_SMT 0x10000 ++#define SEV_SNP_POLICY_DBG 0x80000 ++ + typedef struct SevKernelLoaderContext { + char *setup_data; + size_t setup_size; +-- +2.39.3 + diff --git a/SOURCES/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch b/SOURCES/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch new file mode 100644 index 0000000..56f9f6f --- /dev/null +++ b/SOURCES/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch @@ -0,0 +1,51 @@ +From 98057e3adafa052b21a4fe5ef22835d30df3e644 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:09 -0500 +Subject: [PATCH 084/100] i386/sev: fix unreachable code coverity issue + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [84/91] dc7bf28f491bf675b22a98ea593fba72d8bc415a (bonzini/rhel-qemu-kvm) + +Set 'finish->id_block_en' early, so that it is properly reset. + +Fixes coverity CID 1546887. + +Fixes: 7b34df4426 ("i386/sev: Introduce 'sev-snp-guest' object") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-2-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c94eb5db8e409c932da9eb187e68d4cdc14acc5b) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 004c667ac1..7c9df621de 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -2165,6 +2165,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + gsize len; + ++ finish->id_block_en = 0; + g_free(sev_snp_guest->id_block); + g_free((guchar *)finish->id_block_uaddr); + +@@ -2184,7 +2185,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) + return; + } + +- finish->id_block_en = (len) ? 1 : 0; ++ finish->id_block_en = 1; + } + + static char * +-- +2.39.3 + diff --git a/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch b/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch deleted file mode 100644 index 0cf782e..0000000 --- a/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 51b8f29cddb73eb02f91af5f52a205fdd3af6583 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 17 Jan 2024 21:08:59 +0100 -Subject: [PATCH 099/101] include/ui/rect.h: fix qemu_rect_init() - mis-assignment -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 216: Fix regression in QEMU's virtio-gpu VNC sessions -RH-Jira: RHEL-21570 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Cédric Le Goater -RH-Commit: [1/1] a9d487be04e2c1847b80c479b5cc790af81e3428 (thuth/qemu-kvm-cs9) - -JIRA: https://issues.redhat.com/browse/RHEL-21570 - -commit 9d5b42beb6978dc6219d5dc029c9d453c6b8d503 -Author: Elen Avan -Date: Fri Dec 22 22:17:21 2023 +0300 - - include/ui/rect.h: fix qemu_rect_init() mis-assignment - - Signed-off-by: Elen Avan - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2051 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2050 - Fixes: a200d53b1fde "virtio-gpu: replace PIXMAN for region/rect test" - Cc: qemu-stable@nongnu.org - Reviewed-by: Michael Tokarev - Reviewed-by: Marc-André Lureau - Signed-off-by: Michael Tokarev - -Signed-off-by: Thomas Huth ---- - include/ui/rect.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/ui/rect.h b/include/ui/rect.h -index 94898f92d0..68f05d78a8 100644 ---- a/include/ui/rect.h -+++ b/include/ui/rect.h -@@ -19,7 +19,7 @@ static inline void qemu_rect_init(QemuRect *rect, - uint16_t width, uint16_t height) - { - rect->x = x; -- rect->y = x; -+ rect->y = y; - rect->width = width; - rect->height = height; - } --- -2.39.3 - diff --git a/SOURCES/kvm-introduce-pc_rhel_9_5_compat.patch b/SOURCES/kvm-introduce-pc_rhel_9_5_compat.patch new file mode 100644 index 0000000..9a17dda --- /dev/null +++ b/SOURCES/kvm-introduce-pc_rhel_9_5_compat.patch @@ -0,0 +1,81 @@ +From deae6c3b57c3919946a5ce1613e667a3240cf158 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 15 Apr 2024 12:45:09 +0200 +Subject: [PATCH 001/100] introduce pc_rhel_9_5_compat + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [1/91] cfd402fa5080eddba7c954e81ed79f9a1dd654cf (bonzini/rhel-qemu-kvm) + +Allow undoing backported changes that impact guest ABI. + +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 4 ++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + include/hw/i386/pc.h | 3 +++ + 4 files changed, 11 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 4a154c1a9a..648762d908 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -348,6 +348,10 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_5_compat[] = { ++}; ++const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); ++ + GlobalProperty pc_rhel_9_3_compat[] = { + /* pc_rhel_9_3_compat from pc_compat_8_0 */ + { "virtio-mem", "unplugged-inaccessible", "auto" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 6b260682eb..bef3e8b73e 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1015,6 +1015,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + object_class_property_set_description(oc, "x-south-bridge", + "Use a different south bridge than PIIX3"); + ++ compat_props_add(m->compat_props, pc_rhel_9_5_compat, ++ pc_rhel_9_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_4, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 2b54944c0f..9adcdadce8 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -734,6 +734,8 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; + ++ compat_props_add(m->compat_props, pc_rhel_9_5_compat, ++ pc_rhel_9_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + } +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index a984c951ad..87420783ab 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -294,6 +294,9 @@ extern const size_t pc_compat_2_0_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_5_compat[]; ++extern const size_t pc_rhel_9_5_compat_len; ++ + extern GlobalProperty pc_rhel_9_3_compat[]; + extern const size_t pc_rhel_9_3_compat_len; + +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch b/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch new file mode 100644 index 0000000..137cb53 --- /dev/null +++ b/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch @@ -0,0 +1,61 @@ +From 16c2e9e339a4c83055fd39e032fa16a0e732ed17 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:49:40 +0200 +Subject: [PATCH 2/4] iotests/244: Don't store data-file with protocol in image + +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake +RH-Commit: [2/4] 92e00dab8be1570b13172353d77d2af44cb4e22b + +We want to disable filename parsing for data files because it's too easy +to abuse in malicious image files. Make the test ready for the change by +passing the data file explicitly in command line options. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/244 | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244 +index 3e61fa25bb..bb9cc6512f 100755 +--- a/tests/qemu-iotests/244 ++++ b/tests/qemu-iotests/244 +@@ -215,9 +215,22 @@ $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" + $QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" + + # blkdebug doesn't support copy offloading, so this tests the error path +-$QEMU_IMG amend -f $IMGFMT -o "data_file=blkdebug::$TEST_IMG.data" "$TEST_IMG" +-$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" +-$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" ++test_img_with_blkdebug="json:{ ++ 'driver': 'qcow2', ++ 'file': { ++ 'driver': 'file', ++ 'filename': '$TEST_IMG' ++ }, ++ 'data-file': { ++ 'driver': 'blkdebug', ++ 'image': { ++ 'driver': 'file', ++ 'filename': '$TEST_IMG.data' ++ } ++ } ++}" ++$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$test_img_with_blkdebug" ++$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$test_img_with_blkdebug" + + echo + echo "=== Flushing should flush the data file ===" +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch b/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch new file mode 100644 index 0000000..2c1d6ae --- /dev/null +++ b/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch @@ -0,0 +1,64 @@ +From d70daa2eb5b670513ccd36c0baa5b36ec8cef666 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Apr 2024 14:49:40 +0200 +Subject: [PATCH 3/4] iotests/270: Don't store data-file with json: prefix in + image + +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake +RH-Commit: [3/4] 705bcc2819ce8e0f8b9d660a93bc48de26413aec + +We want to disable filename parsing for data files because it's too easy +to abuse in malicious image files. Make the test ready for the change by +passing the data file explicitly in command line options. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/270 | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/tests/qemu-iotests/270 b/tests/qemu-iotests/270 +index 74352342db..c37b674aa2 100755 +--- a/tests/qemu-iotests/270 ++++ b/tests/qemu-iotests/270 +@@ -60,8 +60,16 @@ _make_test_img -o cluster_size=2M,data_file="$TEST_IMG.orig" \ + # "write" 2G of data without using any space. + # (qemu-img create does not like it, though, because null-co does not + # support image creation.) +-$QEMU_IMG amend -o data_file="json:{'driver':'null-co',,'size':'4294967296'}" \ +- "$TEST_IMG" ++test_img_with_null_data="json:{ ++ 'driver': '$IMGFMT', ++ 'file': { ++ 'filename': '$TEST_IMG' ++ }, ++ 'data-file': { ++ 'driver': 'null-co', ++ 'size':'4294967296' ++ } ++}" + + # This gives us a range of: + # 2^31 - 512 + 768 - 1 = 2^31 + 255 > 2^31 +@@ -74,7 +82,7 @@ $QEMU_IMG amend -o data_file="json:{'driver':'null-co',,'size':'4294967296'}" \ + # on L2 boundaries, we need large L2 tables; hence the cluster size of + # 2 MB. (Anything from 256 kB should work, though, because then one L2 + # table covers 8 GB.) +-$QEMU_IO -c "write 768 $((2 ** 31 - 512))" "$TEST_IMG" | _filter_qemu_io ++$QEMU_IO -c "write 768 $((2 ** 31 - 512))" "$test_img_with_null_data" | _filter_qemu_io + + _check_test_img + +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch b/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch deleted file mode 100644 index d9072f5..0000000 --- a/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch +++ /dev/null @@ -1,49 +0,0 @@ -From a9be663beaace1c31d75ca353e5d3bb0657a4f6c Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 18 Jan 2024 09:48:21 -0500 -Subject: [PATCH 11/22] iotests: add filter_qmp_generated_node_ids() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [7/17] 8dd20acc5b1e992294ed422e80897a9c221940dd (stefanha/centos-stream-qemu-kvm) - -Add a filter function for QMP responses that contain QEMU's -automatically generated node ids. The ids change between runs and must -be masked in the reference output. - -The next commit will use this new function. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240118144823.1497953-2-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit da62b507a20510d819bcfbe8f5e573409b954006) -Signed-off-by: Stefan Hajnoczi ---- - tests/qemu-iotests/iotests.py | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index e5c5798c71..ea48af4a7b 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -651,6 +651,13 @@ def _filter(_key, value): - def filter_generated_node_ids(msg): - return re.sub("#block[0-9]+", "NODE_NAME", msg) - -+def filter_qmp_generated_node_ids(qmsg): -+ def _filter(_key, value): -+ if is_str(value): -+ return filter_generated_node_ids(value) -+ return value -+ return filter_qmp(qmsg, _filter) -+ - def filter_img_info(output: str, filename: str, - drop_child_info: bool = True) -> str: - lines = [] --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch b/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch deleted file mode 100644 index ab63004..0000000 --- a/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 453da839a7d81896d03b827a95c1991a60740dc5 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 25 Jan 2024 16:21:50 +0100 -Subject: [PATCH 21/22] iotests/iothreads-stream: Use the right TimeoutError - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [17/17] ca5a512ccccb668089b726d7499562d1e294c828 (stefanha/centos-stream-qemu-kvm) - -Since Python 3.11 asyncio.TimeoutError is an alias for TimeoutError, but -in older versions it's not. We really have to catch asyncio.TimeoutError -here, otherwise a slow test run will fail (as has happened multiple -times on CI recently). - -Signed-off-by: Kevin Wolf -Message-ID: <20240125152150.42389-1-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c9c0b37ff4c11b712b21efabe8e5381d223d0295) -Signed-off-by: Stefan Hajnoczi ---- - tests/qemu-iotests/tests/iothreads-stream | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/tests/qemu-iotests/tests/iothreads-stream b/tests/qemu-iotests/tests/iothreads-stream -index 503f221f16..231195b5e8 100755 ---- a/tests/qemu-iotests/tests/iothreads-stream -+++ b/tests/qemu-iotests/tests/iothreads-stream -@@ -18,6 +18,7 @@ - # - # Creator/Owner: Kevin Wolf - -+import asyncio - import iotests - - iotests.script_initialize(supported_fmts=['qcow2'], -@@ -69,6 +70,6 @@ with iotests.FilePath('disk1.img') as base1_path, \ - # The test is done once both jobs are gone - if finished == 2: - break -- except TimeoutError: -+ except asyncio.TimeoutError: - pass - vm.cmd('query-jobs') --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch b/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch deleted file mode 100644 index 209bd1e..0000000 --- a/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch +++ /dev/null @@ -1,592 +0,0 @@ -From 70efc3bbf1f7d7b1b0c2475d9ce3bb70cc9d1cc7 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 18 Jan 2024 09:48:22 -0500 -Subject: [PATCH 12/22] iotests: port 141 to Python for reliable QMP testing - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [8/17] 0783f536508916feac4b4c39e41c22c24a2e52e7 (stefanha/centos-stream-qemu-kvm) - -The common.qemu bash functions allow tests to interact with the QMP -monitor of a QEMU process. I spent two days trying to update 141 when -the order of the test output changed, but found it would still fail -occassionally because printf() and QMP events race with synchronous QMP -communication. - -I gave up and ported 141 to the existing Python API for QMP tests. The -Python API is less affected by the order in which QEMU prints output -because it does not print all QMP traffic by default. - -The next commit changes the order in which QMP messages are received. -Make 141 reliable first. - -Cc: Hanna Czenczek -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240118144823.1497953-3-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 9ee2dd4c22a3639c5462b3fc20df60c005c3de64) -Signed-off-by: Stefan Hajnoczi ---- - tests/qemu-iotests/141 | 307 ++++++++++++++++--------------------- - tests/qemu-iotests/141.out | 200 ++++++------------------ - 2 files changed, 176 insertions(+), 331 deletions(-) - -diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141 -index a37030ee17..a7d3985a02 100755 ---- a/tests/qemu-iotests/141 -+++ b/tests/qemu-iotests/141 -@@ -1,9 +1,12 @@ --#!/usr/bin/env bash -+#!/usr/bin/env python3 - # group: rw auto quick - # - # Test case for ejecting BDSs with block jobs still running on them - # --# Copyright (C) 2016 Red Hat, Inc. -+# Originally written in bash by Hanna Czenczek, ported to Python by Stefan -+# Hajnoczi. -+# -+# Copyright Red Hat - # - # This program is free software; you can redistribute it and/or modify - # it under the terms of the GNU General Public License as published by -@@ -19,177 +22,129 @@ - # along with this program. If not, see . - # - --# creator --owner=hreitz@redhat.com -- --seq="$(basename $0)" --echo "QA output created by $seq" -- --status=1 # failure is the default! -- --_cleanup() --{ -- _cleanup_qemu -- _cleanup_test_img -- for img in "$TEST_DIR"/{b,m,o}.$IMGFMT; do -- _rm_test_img "$img" -- done --} --trap "_cleanup; exit \$status" 0 1 2 3 15 -- --# get standard environment, filters and checks --. ./common.rc --. ./common.filter --. ./common.qemu -- --# Needs backing file and backing format support --_supported_fmt qcow2 qed --_supported_proto file --_supported_os Linux -- -- --test_blockjob() --{ -- _send_qemu_cmd $QEMU_HANDLE \ -- "{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': '$IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': '$TEST_IMG' -- }}}" \ -- 'return' -- -- # If "$2" is an event, we may or may not see it before the -- # {"return": {}}. Therefore, filter the {"return": {}} out both -- # here and in the next command. (Naturally, if we do not see it -- # here, we will see it before the next command can be executed, -- # so it will appear in the next _send_qemu_cmd's output.) -- _send_qemu_cmd $QEMU_HANDLE \ -- "$1" \ -- "$2" \ -- | _filter_img_create | _filter_qmp_empty_return -- -- # We want this to return an error because the block job is still running -- _send_qemu_cmd $QEMU_HANDLE \ -- "{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}}" \ -- 'error' | _filter_generated_node_ids | _filter_qmp_empty_return -- -- _send_qemu_cmd $QEMU_HANDLE \ -- "{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}}" \ -- "$3" -- -- _send_qemu_cmd $QEMU_HANDLE \ -- "{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}}" \ -- 'return' --} -- -- --TEST_IMG="$TEST_DIR/b.$IMGFMT" _make_test_img 1M --TEST_IMG="$TEST_DIR/m.$IMGFMT" _make_test_img -b "$TEST_DIR/b.$IMGFMT" -F $IMGFMT 1M --_make_test_img -b "$TEST_DIR/m.$IMGFMT" 1M -F $IMGFMT -- --_launch_qemu -nodefaults -- --_send_qemu_cmd $QEMU_HANDLE \ -- "{'execute': 'qmp_capabilities'}" \ -- 'return' -- --echo --echo '=== Testing drive-backup ===' --echo -- --# drive-backup will not send BLOCK_JOB_READY by itself, and cancelling the job --# will consequently result in BLOCK_JOB_CANCELLED being emitted. -- --test_blockjob \ -- "{'execute': 'drive-backup', -- 'arguments': {'job-id': 'job0', -- 'device': 'drv0', -- 'target': '$TEST_DIR/o.$IMGFMT', -- 'format': '$IMGFMT', -- 'sync': 'none'}}" \ -- 'return' \ -- '"status": "null"' -- --echo --echo '=== Testing drive-mirror ===' --echo -- --# drive-mirror will send BLOCK_JOB_READY basically immediately, and cancelling --# the job will consequently result in BLOCK_JOB_COMPLETED being emitted. -- --test_blockjob \ -- "{'execute': 'drive-mirror', -- 'arguments': {'job-id': 'job0', -- 'device': 'drv0', -- 'target': '$TEST_DIR/o.$IMGFMT', -- 'format': '$IMGFMT', -- 'sync': 'none'}}" \ -- 'BLOCK_JOB_READY' \ -- '"status": "null"' -- --echo --echo '=== Testing active block-commit ===' --echo -- --# An active block-commit will send BLOCK_JOB_READY basically immediately, and --# cancelling the job will consequently result in BLOCK_JOB_COMPLETED being --# emitted. -- --test_blockjob \ -- "{'execute': 'block-commit', -- 'arguments': {'job-id': 'job0', 'device': 'drv0'}}" \ -- 'BLOCK_JOB_READY' \ -- '"status": "null"' -- --echo --echo '=== Testing non-active block-commit ===' --echo -- --# Give block-commit something to work on, otherwise it would be done --# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just --# fine without the block job still running. -- --$QEMU_IO -c 'write 0 1M' "$TEST_DIR/m.$IMGFMT" | _filter_qemu_io -- --test_blockjob \ -- "{'execute': 'block-commit', -- 'arguments': {'job-id': 'job0', -- 'device': 'drv0', -- 'top': '$TEST_DIR/m.$IMGFMT', -- 'speed': 1}}" \ -- 'return' \ -- '"status": "null"' -- --echo --echo '=== Testing block-stream ===' --echo -- --# Give block-stream something to work on, otherwise it would be done --# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just --# fine without the block job still running. -- --$QEMU_IO -c 'write 0 1M' "$TEST_DIR/b.$IMGFMT" | _filter_qemu_io -- --# With some data to stream (and @speed set to 1), block-stream will not complete --# until we send the block-job-cancel command. -- --test_blockjob \ -- "{'execute': 'block-stream', -- 'arguments': {'job-id': 'job0', -- 'device': 'drv0', -- 'speed': 1}}" \ -- 'return' \ -- '"status": "null"' -- --_cleanup_qemu -- --# success, all done --echo "*** done" --rm -f $seq.full --status=0 -+import iotests -+ -+# Common filters to mask values that vary in the test output -+QMP_FILTERS = [iotests.filter_qmp_testfiles, \ -+ iotests.filter_qmp_imgfmt] -+ -+ -+class TestCase: -+ def __init__(self, name, vm, image_path, cancel_event): -+ self.name = name -+ self.vm = vm -+ self.image_path = image_path -+ self.cancel_event = cancel_event -+ -+ def __enter__(self): -+ iotests.log(f'=== Testing {self.name} ===') -+ self.vm.qmp_log('blockdev-add', \ -+ node_name='drv0', \ -+ driver=iotests.imgfmt, \ -+ file={'driver': 'file', 'filename': self.image_path}, \ -+ filters=QMP_FILTERS) -+ -+ def __exit__(self, *exc_details): -+ # This is expected to fail because the job still exists -+ self.vm.qmp_log('blockdev-del', node_name='drv0', \ -+ filters=[iotests.filter_qmp_generated_node_ids]) -+ -+ self.vm.qmp_log('block-job-cancel', device='job0') -+ event = self.vm.event_wait(self.cancel_event) -+ iotests.log(event, filters=[iotests.filter_qmp_event]) -+ -+ # This time it succeeds -+ self.vm.qmp_log('blockdev-del', node_name='drv0') -+ -+ # Separate test cases in output -+ iotests.log('') -+ -+ -+def main() -> None: -+ with iotests.FilePath('bottom', 'middle', 'top', 'target') as \ -+ (bottom_path, middle_path, top_path, target_path), \ -+ iotests.VM() as vm: -+ -+ iotests.log('Creating bottom <- middle <- top backing file chain...') -+ IMAGE_SIZE='1M' -+ iotests.qemu_img_create('-f', iotests.imgfmt, bottom_path, IMAGE_SIZE) -+ iotests.qemu_img_create('-f', iotests.imgfmt, \ -+ '-F', iotests.imgfmt, \ -+ '-b', bottom_path, \ -+ middle_path, \ -+ IMAGE_SIZE) -+ iotests.qemu_img_create('-f', iotests.imgfmt, \ -+ '-F', iotests.imgfmt, \ -+ '-b', middle_path, \ -+ top_path, \ -+ IMAGE_SIZE) -+ -+ iotests.log('Starting VM...') -+ vm.add_args('-nodefaults') -+ vm.launch() -+ -+ # drive-backup will not send BLOCK_JOB_READY by itself, and cancelling -+ # the job will consequently result in BLOCK_JOB_CANCELLED being -+ # emitted. -+ with TestCase('drive-backup', vm, top_path, 'BLOCK_JOB_CANCELLED'): -+ vm.qmp_log('drive-backup', \ -+ job_id='job0', \ -+ device='drv0', \ -+ target=target_path, \ -+ format=iotests.imgfmt, \ -+ sync='none', \ -+ filters=QMP_FILTERS) -+ -+ # drive-mirror will send BLOCK_JOB_READY basically immediately, and -+ # cancelling the job will consequently result in BLOCK_JOB_COMPLETED -+ # being emitted. -+ with TestCase('drive-mirror', vm, top_path, 'BLOCK_JOB_COMPLETED'): -+ vm.qmp_log('drive-mirror', \ -+ job_id='job0', \ -+ device='drv0', \ -+ target=target_path, \ -+ format=iotests.imgfmt, \ -+ sync='none', \ -+ filters=QMP_FILTERS) -+ event = vm.event_wait('BLOCK_JOB_READY') -+ assert event is not None # silence mypy -+ iotests.log(event, filters=[iotests.filter_qmp_event]) -+ -+ # An active block-commit will send BLOCK_JOB_READY basically -+ # immediately, and cancelling the job will consequently result in -+ # BLOCK_JOB_COMPLETED being emitted. -+ with TestCase('active block-commit', vm, top_path, \ -+ 'BLOCK_JOB_COMPLETED'): -+ vm.qmp_log('block-commit', \ -+ job_id='job0', \ -+ device='drv0') -+ event = vm.event_wait('BLOCK_JOB_READY') -+ assert event is not None # silence mypy -+ iotests.log(event, filters=[iotests.filter_qmp_event]) -+ -+ # Give block-commit something to work on, otherwise it would be done -+ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would -+ # work just fine without the block job still running. -+ iotests.qemu_io(middle_path, '-c', f'write 0 {IMAGE_SIZE}') -+ with TestCase('non-active block-commit', vm, top_path, \ -+ 'BLOCK_JOB_CANCELLED'): -+ vm.qmp_log('block-commit', \ -+ job_id='job0', \ -+ device='drv0', \ -+ top=middle_path, \ -+ speed=1, \ -+ filters=[iotests.filter_qmp_testfiles]) -+ -+ # Give block-stream something to work on, otherwise it would be done -+ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would -+ # work just fine without the block job still running. -+ iotests.qemu_io(bottom_path, '-c', f'write 0 {IMAGE_SIZE}') -+ with TestCase('block-stream', vm, top_path, 'BLOCK_JOB_CANCELLED'): -+ vm.qmp_log('block-stream', \ -+ job_id='job0', \ -+ device='drv0', \ -+ speed=1) -+ -+if __name__ == '__main__': -+ iotests.script_main(main, supported_fmts=['qcow2', 'qed'], -+ supported_protocols=['file']) -diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out -index 63203d9944..91b7ba50af 100644 ---- a/tests/qemu-iotests/141.out -+++ b/tests/qemu-iotests/141.out -@@ -1,179 +1,69 @@ --QA output created by 141 --Formatting 'TEST_DIR/b.IMGFMT', fmt=IMGFMT size=1048576 --Formatting 'TEST_DIR/m.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/b.IMGFMT backing_fmt=IMGFMT --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m.IMGFMT backing_fmt=IMGFMT --{'execute': 'qmp_capabilities'} --{"return": {}} -- -+Creating bottom <- middle <- top backing file chain... -+Starting VM... - === Testing drive-backup === -- --{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': 'IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': 'TEST_DIR/t.IMGFMT' -- }}} --{"return": {}} --{'execute': 'drive-backup', --'arguments': {'job-id': 'job0', --'device': 'drv0', --'target': 'TEST_DIR/o.IMGFMT', --'format': 'IMGFMT', --'sync': 'none'}} --Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} -+{"return": {}} -+{"execute": "drive-backup", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} --{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}} -+{"execute": "block-job-cancel", "arguments": {"device": "job0"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"return": {}} - - === Testing drive-mirror === -- --{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': 'IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': 'TEST_DIR/t.IMGFMT' -- }}} --{"return": {}} --{'execute': 'drive-mirror', --'arguments': {'job-id': 'job0', --'device': 'drv0', --'target': 'TEST_DIR/o.IMGFMT', --'format': 'IMGFMT', --'sync': 'none'}} --Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} -+{"return": {}} -+{"execute": "drive-mirror", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}} -+{"return": {}} -+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: mirror"}} --{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}} -+{"execute": "block-job-cancel", "arguments": {"device": "job0"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"return": {}} - - === Testing active block-commit === -- --{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': 'IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': 'TEST_DIR/t.IMGFMT' -- }}} --{"return": {}} --{'execute': 'block-commit', --'arguments': {'job-id': 'job0', 'device': 'drv0'}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} -+{"return": {}} -+{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0"}} -+{"return": {}} -+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}} --{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}} -+{"execute": "block-job-cancel", "arguments": {"device": "job0"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"return": {}} - - === Testing non-active block-commit === -- --wrote 1048576/1048576 bytes at offset 0 --1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) --{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': 'IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': 'TEST_DIR/t.IMGFMT' -- }}} --{"return": {}} --{'execute': 'block-commit', --'arguments': {'job-id': 'job0', --'device': 'drv0', --'top': 'TEST_DIR/m.IMGFMT', --'speed': 1}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} -+{"return": {}} -+{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1, "top": "TEST_DIR/PID-middle"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}} --{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}} -+{"execute": "block-job-cancel", "arguments": {"device": "job0"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"return": {}} - - === Testing block-stream === -- --wrote 1048576/1048576 bytes at offset 0 --1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) --{'execute': 'blockdev-add', -- 'arguments': { -- 'node-name': 'drv0', -- 'driver': 'IMGFMT', -- 'file': { -- 'driver': 'file', -- 'filename': 'TEST_DIR/t.IMGFMT' -- }}} --{"return": {}} --{'execute': 'block-stream', --'arguments': {'job-id': 'job0', --'device': 'drv0', --'speed': 1}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} -+{"return": {}} -+{"execute": "block-stream", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: stream"}} --{'execute': 'block-job-cancel', -- 'arguments': {'device': 'job0'}} -+{"execute": "block-job-cancel", "arguments": {"device": "job0"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{'execute': 'blockdev-del', -- 'arguments': {'node-name': 'drv0'}} -+{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} - {"return": {}} --*** done -+ --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch b/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch new file mode 100644 index 0000000..5640a7e --- /dev/null +++ b/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch @@ -0,0 +1,275 @@ +From 88adaeaecb1d7753aa8ac3da40f617d93eaf8bdc Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 17 May 2024 21:50:15 -0500 +Subject: [PATCH 2/4] iotests: test NBD+TLS+iothread +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 244: qio: Inherit follow_coroutine_ctx across TLS +RH-Jira: RHEL-33440 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/2] 29c3128a158b7c49fa79c141a9adcc12693f7de4 (ebblake/centos-qemu-kvm) + +Prevent regressions when using NBD with TLS in the presence of +iothreads, adding coverage the fix to qio channels made in the +previous patch. + +The shell function pick_unused_port() was copied from +nbdkit.git/tests/functions.sh.in, where it had all authors from Red +Hat, agreeing to the resulting relicensing from 2-clause BSD to GPLv2. + +CC: qemu-stable@nongnu.org +CC: "Richard W.M. Jones" +Signed-off-by: Eric Blake +Message-ID: <20240531180639.1392905-6-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +(cherry picked from commit a73c99378022ebb785481e84cfe1e81097546268) +Jira: https://issues.redhat.com/browse/RHEL-33440 +Signed-off-by: Eric Blake +--- + tests/qemu-iotests/tests/nbd-tls-iothread | 168 ++++++++++++++++++ + tests/qemu-iotests/tests/nbd-tls-iothread.out | 54 ++++++ + 2 files changed, 222 insertions(+) + create mode 100755 tests/qemu-iotests/tests/nbd-tls-iothread + create mode 100644 tests/qemu-iotests/tests/nbd-tls-iothread.out + +diff --git a/tests/qemu-iotests/tests/nbd-tls-iothread b/tests/qemu-iotests/tests/nbd-tls-iothread +new file mode 100755 +index 0000000000..a2fb07206e +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-tls-iothread +@@ -0,0 +1,168 @@ ++#!/usr/bin/env bash ++# group: rw quick ++# ++# Test of NBD+TLS+iothread ++# ++# Copyright (C) 2024 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=eblake@redhat.com ++ ++seq=`basename $0` ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_qemu ++ _cleanup_test_img ++ rm -f "$dst_image" ++ tls_x509_cleanup ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++. ./common.qemu ++. ./common.tls ++. ./common.nbd ++ ++_supported_fmt qcow2 # Hardcoded to qcow2 command line and QMP below ++_supported_proto file ++ ++# pick_unused_port ++# ++# Picks and returns an "unused" port, setting the global variable ++# $port. ++# ++# This is inherently racy, but we need it because qemu does not currently ++# permit NBD+TLS over a Unix domain socket ++pick_unused_port () ++{ ++ if ! (ss --version) >/dev/null 2>&1; then ++ _notrun "ss utility required, skipped this test" ++ fi ++ ++ # Start at a random port to make it less likely that two parallel ++ # tests will conflict. ++ port=$(( 50000 + (RANDOM%15000) )) ++ while ss -ltn | grep -sqE ":$port\b"; do ++ ((port++)) ++ if [ $port -eq 65000 ]; then port=50000; fi ++ done ++ echo picked unused port ++} ++ ++tls_x509_init ++ ++size=1G ++DST_IMG="$TEST_DIR/dst.qcow2" ++ ++echo ++echo "== preparing TLS creds and spare port ==" ++ ++pick_unused_port ++tls_x509_create_root_ca "ca1" ++tls_x509_create_server "ca1" "server1" ++tls_x509_create_client "ca1" "client1" ++tls_obj_base=tls-creds-x509,id=tls0,verify-peer=true,dir="${tls_dir}" ++ ++echo ++echo "== preparing image ==" ++ ++_make_test_img $size ++$QEMU_IMG create -f qcow2 "$DST_IMG" $size | _filter_img_create ++ ++echo ++echo === Starting Src QEMU === ++echo ++ ++_launch_qemu -machine q35 \ ++ -object iothread,id=iothread0 \ ++ -object "${tls_obj_base}"/client1,endpoint=client \ ++ -device '{"driver":"pcie-root-port", "id":"root0", "multifunction":true, ++ "bus":"pcie.0"}' \ ++ -device '{"driver":"virtio-scsi-pci", "id":"virtio_scsi_pci0", ++ "bus":"root0", "iothread":"iothread0"}' \ ++ -device '{"driver":"scsi-hd", "id":"image1", "drive":"drive_image1", ++ "bus":"virtio_scsi_pci0.0"}' \ ++ -blockdev '{"driver":"file", "cache":{"direct":true, "no-flush":false}, ++ "filename":"'"$TEST_IMG"'", "node-name":"drive_sys1"}' \ ++ -blockdev '{"driver":"qcow2", "node-name":"drive_image1", ++ "file":"drive_sys1"}' ++h1=$QEMU_HANDLE ++_send_qemu_cmd $h1 '{"execute": "qmp_capabilities"}' 'return' ++ ++echo ++echo === Starting Dst VM2 === ++echo ++ ++_launch_qemu -machine q35 \ ++ -object iothread,id=iothread0 \ ++ -object "${tls_obj_base}"/server1,endpoint=server \ ++ -device '{"driver":"pcie-root-port", "id":"root0", "multifunction":true, ++ "bus":"pcie.0"}' \ ++ -device '{"driver":"virtio-scsi-pci", "id":"virtio_scsi_pci0", ++ "bus":"root0", "iothread":"iothread0"}' \ ++ -device '{"driver":"scsi-hd", "id":"image1", "drive":"drive_image1", ++ "bus":"virtio_scsi_pci0.0"}' \ ++ -blockdev '{"driver":"file", "cache":{"direct":true, "no-flush":false}, ++ "filename":"'"$DST_IMG"'", "node-name":"drive_sys1"}' \ ++ -blockdev '{"driver":"qcow2", "node-name":"drive_image1", ++ "file":"drive_sys1"}' \ ++ -incoming defer ++h2=$QEMU_HANDLE ++_send_qemu_cmd $h2 '{"execute": "qmp_capabilities"}' 'return' ++ ++echo ++echo === Dst VM: Enable NBD server for incoming storage migration === ++echo ++ ++_send_qemu_cmd $h2 '{"execute": "nbd-server-start", "arguments": ++ {"addr": {"type": "inet", "data": {"host": "127.0.0.1", "port": "'$port'"}}, ++ "tls-creds": "tls0"}}' '{"return": {}}' | sed "s/\"$port\"/PORT/g" ++_send_qemu_cmd $h2 '{"execute": "block-export-add", "arguments": ++ {"node-name": "drive_image1", "type": "nbd", "writable": true, ++ "id": "drive_image1"}}' '{"return": {}}' ++ ++echo ++echo === Src VM: Mirror to dst NBD for outgoing storage migration === ++echo ++ ++_send_qemu_cmd $h1 '{"execute": "blockdev-add", "arguments": ++ {"node-name": "mirror", "driver": "nbd", ++ "server": {"type": "inet", "host": "127.0.0.1", "port": "'$port'"}, ++ "export": "drive_image1", "tls-creds": "tls0", ++ "tls-hostname": "127.0.0.1"}}' '{"return": {}}' | sed "s/\"$port\"/PORT/g" ++_send_qemu_cmd $h1 '{"execute": "blockdev-mirror", "arguments": ++ {"sync": "full", "device": "drive_image1", "target": "mirror", ++ "job-id": "drive_image1_53"}}' '{"return": {}}' ++_timed_wait_for $h1 '"ready"' ++ ++echo ++echo === Cleaning up === ++echo ++ ++_send_qemu_cmd $h1 '{"execute":"quit"}' '' ++_send_qemu_cmd $h2 '{"execute":"quit"}' '' ++ ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/tests/nbd-tls-iothread.out b/tests/qemu-iotests/tests/nbd-tls-iothread.out +new file mode 100644 +index 0000000000..1d83d4f903 +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-tls-iothread.out +@@ -0,0 +1,54 @@ ++QA output created by nbd-tls-iothread ++ ++== preparing TLS creds and spare port == ++picked unused port ++Generating a self signed certificate... ++Generating a signed certificate... ++Generating a signed certificate... ++ ++== preparing image == ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 ++Formatting 'TEST_DIR/dst.IMGFMT', fmt=IMGFMT size=1073741824 ++ ++=== Starting Src QEMU === ++ ++{"execute": "qmp_capabilities"} ++{"return": {}} ++ ++=== Starting Dst VM2 === ++ ++{"execute": "qmp_capabilities"} ++{"return": {}} ++ ++=== Dst VM: Enable NBD server for incoming storage migration === ++ ++{"execute": "nbd-server-start", "arguments": ++ {"addr": {"type": "inet", "data": {"host": "127.0.0.1", "port": PORT}}, ++ "tls-creds": "tls0"}} ++{"return": {}} ++{"execute": "block-export-add", "arguments": ++ {"node-name": "drive_image1", "type": "nbd", "writable": true, ++ "id": "drive_image1"}} ++{"return": {}} ++ ++=== Src VM: Mirror to dst NBD for outgoing storage migration === ++ ++{"execute": "blockdev-add", "arguments": ++ {"node-name": "mirror", "driver": "nbd", ++ "server": {"type": "inet", "host": "127.0.0.1", "port": PORT}, ++ "export": "drive_image1", "tls-creds": "tls0", ++ "tls-hostname": "127.0.0.1"}} ++{"return": {}} ++{"execute": "blockdev-mirror", "arguments": ++ {"sync": "full", "device": "drive_image1", "target": "mirror", ++ "job-id": "drive_image1_53"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "drive_image1_53"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "drive_image1_53"}} ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "drive_image1_53"}} ++ ++=== Cleaning up === ++ ++{"execute":"quit"} ++{"execute":"quit"} ++*** done +-- +2.39.3 + diff --git a/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch b/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch deleted file mode 100644 index fc1c62f..0000000 --- a/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 4ab25b33831fa207500179bd30f29388d81e4cce Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:10 -0500 -Subject: [PATCH 093/101] job: remove outdated AioContext locking comments - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [24/26] 15ff2928be82d6905c22619458487fbb72d6044a (kmwolf/centos-qemu-kvm) - -The AioContext lock no longer exists. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-14-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - include/qemu/job.h | 20 -------------------- - 1 file changed, 20 deletions(-) - -diff --git a/include/qemu/job.h b/include/qemu/job.h -index e502787dd8..9ea98b5927 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -67,8 +67,6 @@ typedef struct Job { - - /** - * The completion function that will be called when the job completes. -- * Called with AioContext lock held, since many callback implementations -- * use bdrv_* functions that require to hold the lock. - */ - BlockCompletionFunc *cb; - -@@ -264,9 +262,6 @@ struct JobDriver { - * - * This callback will not be invoked if the job has already failed. - * If it fails, abort and then clean will be called. -- * -- * Called with AioContext lock held, since many callbacs implementations -- * use bdrv_* functions that require to hold the lock. - */ - int (*prepare)(Job *job); - -@@ -277,9 +272,6 @@ struct JobDriver { - * - * All jobs will complete with a call to either .commit() or .abort() but - * never both. -- * -- * Called with AioContext lock held, since many callback implementations -- * use bdrv_* functions that require to hold the lock. - */ - void (*commit)(Job *job); - -@@ -290,9 +282,6 @@ struct JobDriver { - * - * All jobs will complete with a call to either .commit() or .abort() but - * never both. -- * -- * Called with AioContext lock held, since many callback implementations -- * use bdrv_* functions that require to hold the lock. - */ - void (*abort)(Job *job); - -@@ -301,9 +290,6 @@ struct JobDriver { - * .commit() or .abort(). Regardless of which callback is invoked after - * completion, .clean() will always be called, even if the job does not - * belong to a transaction group. -- * -- * Called with AioContext lock held, since many callbacs implementations -- * use bdrv_* functions that require to hold the lock. - */ - void (*clean)(Job *job); - -@@ -318,17 +304,12 @@ struct JobDriver { - * READY). - * (If the callback is NULL, the job is assumed to terminate - * without I/O.) -- * -- * Called with AioContext lock held, since many callback implementations -- * use bdrv_* functions that require to hold the lock. - */ - bool (*cancel)(Job *job, bool force); - - - /** - * Called when the job is freed. -- * Called with AioContext lock held, since many callback implementations -- * use bdrv_* functions that require to hold the lock. - */ - void (*free)(Job *job); - }; -@@ -424,7 +405,6 @@ void job_ref_locked(Job *job); - * Release a reference that was previously acquired with job_ref_locked() or - * job_create(). If it's the last reference to the object, it will be freed. - * -- * Takes AioContext lock internally to invoke a job->driver callback. - * Called with job lock held. - */ - void job_unref_locked(Job *job); --- -2.39.3 - diff --git a/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch b/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch deleted file mode 100644 index 3e562b8..0000000 --- a/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 9c2eb4ab03903bc084c53ac29b60b8d2121c9fed Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 21 Nov 2023 16:44:19 +0800 -Subject: [PATCH 040/101] kconfig: Activate IOMMUFD for s390x machines -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [39/67] cf0ebe770b8db5916dd35247618c0a325dc1eaab (eauger1/centos-qemu-kvm) - -Signed-off-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Matthew Rosato -Reviewed-by: Eric Farman -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 73e2df669335047b542b67d37ade060a6ae40dd8) -Signed-off-by: Eric Auger ---- - hw/s390x/Kconfig | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig -index 4c068d7960..26ad104485 100644 ---- a/hw/s390x/Kconfig -+++ b/hw/s390x/Kconfig -@@ -6,6 +6,7 @@ config S390_CCW_VIRTIO - imply VFIO_CCW - imply WDT_DIAG288 - imply PCIE_DEVICES -+ imply IOMMUFD - select PCI_EXPRESS - select S390_FLIC - select S390_FLIC_KVM if KVM --- -2.39.3 - diff --git a/SOURCES/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch b/SOURCES/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch new file mode 100644 index 0000000..10e98a7 --- /dev/null +++ b/SOURCES/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch @@ -0,0 +1,153 @@ +From 120157257ac239050779fdddc9abb56bd39958b3 Mon Sep 17 00:00:00 2001 +From: Chao Peng +Date: Wed, 20 Mar 2024 03:39:05 -0500 +Subject: [PATCH 029/100] kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [29/91] 9a08c8699f632cd046a6307e33bd053a7cc7db46 (bonzini/rhel-qemu-kvm) + +Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM. + +With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that +backend'ed both by hva-based shared memory and guest memfd based private +memory. + +Signed-off-by: Chao Peng +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li +Message-ID: <20240320083945.991426-10-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit ce5a983233b4ca94ced88c9581014346509b5c71) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 46 +++++++++++++++++++++++++++++++++------- + accel/kvm/trace-events | 2 +- + include/sysemu/kvm_int.h | 2 ++ + 3 files changed, 41 insertions(+), 9 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index a7b9a127dd..5ef55e4dd7 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -284,35 +284,58 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, + static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new) + { + KVMState *s = kvm_state; +- struct kvm_userspace_memory_region mem; ++ struct kvm_userspace_memory_region2 mem; + int ret; + + mem.slot = slot->slot | (kml->as_id << 16); + mem.guest_phys_addr = slot->start_addr; + mem.userspace_addr = (unsigned long)slot->ram; + mem.flags = slot->flags; ++ mem.guest_memfd = slot->guest_memfd; ++ mem.guest_memfd_offset = slot->guest_memfd_offset; + + if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) { + /* Set the slot size to 0 before setting the slot to the desired + * value. This is needed based on KVM commit 75d61fbc. */ + mem.memory_size = 0; +- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ ++ if (kvm_guest_memfd_supported) { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); ++ } else { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ } + if (ret < 0) { + goto err; + } + } + mem.memory_size = slot->memory_size; +- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ if (kvm_guest_memfd_supported) { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); ++ } else { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ } + slot->old_flags = mem.flags; + err: + trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, + mem.guest_phys_addr, mem.memory_size, +- mem.userspace_addr, ret); ++ mem.userspace_addr, mem.guest_memfd, ++ mem.guest_memfd_offset, ret); + if (ret < 0) { +- error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," +- " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", +- __func__, mem.slot, slot->start_addr, +- (uint64_t)mem.memory_size, strerror(errno)); ++ if (kvm_guest_memfd_supported) { ++ error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d," ++ " start=0x%" PRIx64 ", size=0x%" PRIx64 "," ++ " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 "," ++ " guest_memfd_offset=0x%" PRIx64 ": %s", ++ __func__, mem.slot, slot->start_addr, ++ (uint64_t)mem.memory_size, mem.flags, ++ mem.guest_memfd, (uint64_t)mem.guest_memfd_offset, ++ strerror(errno)); ++ } else { ++ error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," ++ " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", ++ __func__, mem.slot, slot->start_addr, ++ (uint64_t)mem.memory_size, strerror(errno)); ++ } + } + return ret; + } +@@ -467,6 +490,10 @@ static int kvm_mem_flags(MemoryRegion *mr) + if (readonly && kvm_readonly_mem_allowed) { + flags |= KVM_MEM_READONLY; + } ++ if (memory_region_has_guest_memfd(mr)) { ++ assert(kvm_guest_memfd_supported); ++ flags |= KVM_MEM_GUEST_MEMFD; ++ } + return flags; + } + +@@ -1394,6 +1421,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + mem->ram_start_offset = ram_start_offset; + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); ++ mem->guest_memfd = mr->ram_block->guest_memfd; ++ mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host; ++ + kvm_slot_init_dirty_bitmap(mem); + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index 9f599abc17..e8c52cb9e7 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" + kvm_irqchip_release_virq(int virq) "virq %d" + kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" + kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" +-kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" ++kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d" + kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 + kvm_resample_fd_notify(int gsi) "gsi %d" + kvm_dirty_ring_full(int id) "vcpu %d" +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 3496be7997..a5a3fee411 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -30,6 +30,8 @@ typedef struct KVMSlot + int as_id; + /* Cache of the offset in ram address space */ + ram_addr_t ram_start_offset; ++ int guest_memfd; ++ hwaddr guest_memfd_offset; + } KVMSlot; + + typedef struct KVMMemoryUpdate { +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-Introduce-support-for-memory_attributes.patch b/SOURCES/kvm-kvm-Introduce-support-for-memory_attributes.patch new file mode 100644 index 0000000..1f043a9 --- /dev/null +++ b/SOURCES/kvm-kvm-Introduce-support-for-memory_attributes.patch @@ -0,0 +1,103 @@ +From 37e6c98987bb2d4be7ce1fdda4475cd0266271c3 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:06 -0500 +Subject: [PATCH 027/100] kvm: Introduce support for memory_attributes + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [27/91] 1b4428289949478f7390196ae4b098c5e6f36bb0 (bonzini/rhel-qemu-kvm) + +Introduce the helper functions to set the attributes of a range of +memory to private or shared. + +This is necessary to notify KVM the private/shared attribute of each gpa +range. KVM needs the information to decide the GPA needs to be mapped at +hva-based shared memory or guest_memfd based private memory. + +Signed-off-by: Xiaoyao Li +Message-ID: <20240320083945.991426-11-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0811baed49010a9b651b8029ab6b9828b09a884f) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 32 ++++++++++++++++++++++++++++++++ + include/sysemu/kvm.h | 4 ++++ + 2 files changed, 36 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 9bd235c969..272e945f52 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -91,6 +91,7 @@ bool kvm_msi_use_devid; + static bool kvm_has_guest_debug; + static int kvm_sstep_flags; + static bool kvm_immediate_exit; ++static uint64_t kvm_supported_memory_attributes; + static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { +@@ -1266,6 +1267,36 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) + kvm_max_slot_size = max_slot_size; + } + ++static int kvm_set_memory_attributes(hwaddr start, uint64_t size, uint64_t attr) ++{ ++ struct kvm_memory_attributes attrs; ++ int r; ++ ++ assert((attr & kvm_supported_memory_attributes) == attr); ++ attrs.attributes = attr; ++ attrs.address = start; ++ attrs.size = size; ++ attrs.flags = 0; ++ ++ r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs); ++ if (r) { ++ error_report("failed to set memory (0x%" HWADDR_PRIx "+0x%" PRIx64 ") " ++ "with attr 0x%" PRIx64 " error '%s'", ++ start, size, attr, strerror(errno)); ++ } ++ return r; ++} ++ ++int kvm_set_memory_attributes_private(hwaddr start, uint64_t size) ++{ ++ return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); ++} ++ ++int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size) ++{ ++ return kvm_set_memory_attributes(start, size, 0); ++} ++ + /* Called with KVMMemoryListener.slots_lock held */ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) +@@ -2387,6 +2418,7 @@ static int kvm_init(MachineState *ms) + goto err; + } + ++ kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); + s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); + +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 54f4d83a37..f114ff6986 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -536,4 +536,8 @@ void kvm_mark_guest_state_protected(void); + * reported for the VM. + */ + bool kvm_hwpoisoned_mem(void); ++ ++int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); ++int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); ++ + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-add-support-for-guest-physical-bits.patch b/SOURCES/kvm-kvm-add-support-for-guest-physical-bits.patch new file mode 100644 index 0000000..97b94eb --- /dev/null +++ b/SOURCES/kvm-kvm-add-support-for-guest-physical-bits.patch @@ -0,0 +1,116 @@ +From 31cc494d69449811f4d995326479372da7c1241e Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 18 Mar 2024 16:53:35 +0100 +Subject: [PATCH 003/100] kvm: add support for guest physical bits + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [3/91] abb1ba3a584152d8efabd8255b86afe609f8ffbd (bonzini/rhel-qemu-kvm) + +Query kvm for supported guest physical address bits, in cpuid +function 80000008, eax[23:16]. Usually this is identical to host +physical address bits. With NPT or EPT being used this might be +restricted to 48 (max 4-level paging address space size) even if +the host cpu supports more physical address bits. + +When set pass this to the guest, using cpuid too. Guest firmware +can use this to figure how big the usable guest physical address +space is, so PCI bar mapping are actually reachable. + +Signed-off-by: Gerd Hoffmann +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <20240318155336.156197-2-kraxel@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0d08c423688edcca857f88dab20f1fc56de2b281) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm-cpu.c | 50 ++++++++++++++++++++++++++++++++------- + 1 file changed, 42 insertions(+), 8 deletions(-) + +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index b91af5051f..7ef94c681f 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -18,10 +18,32 @@ + #include "kvm_i386.h" + #include "hw/core/accel-cpu.h" + ++static void kvm_set_guest_phys_bits(CPUState *cs) ++{ ++ X86CPU *cpu = X86_CPU(cs); ++ uint32_t eax, guest_phys_bits; ++ ++ eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x80000008, 0, R_EAX); ++ guest_phys_bits = (eax >> 16) & 0xff; ++ if (!guest_phys_bits) { ++ return; ++ } ++ cpu->guest_phys_bits = guest_phys_bits; ++ if (cpu->guest_phys_bits > cpu->phys_bits) { ++ cpu->guest_phys_bits = cpu->phys_bits; ++ } ++ ++ if (cpu->host_phys_bits && cpu->host_phys_bits_limit && ++ cpu->guest_phys_bits > cpu->host_phys_bits_limit) { ++ cpu->guest_phys_bits = cpu->host_phys_bits_limit; ++ } ++} ++ + static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + { + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; ++ bool ret; + + /* + * The realize order is important, since x86_cpu_realize() checks if +@@ -32,13 +54,15 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + * + * realize order: + * +- * x86_cpu_realize(): +- * -> x86_cpu_expand_features() +- * -> cpu_exec_realizefn(): +- * -> accel_cpu_common_realize() +- * kvm_cpu_realizefn() -> host_cpu_realizefn() +- * -> cpu_common_realizefn() +- * -> check/update ucode_rev, phys_bits, mwait ++ * x86_cpu_realizefn(): ++ * x86_cpu_expand_features() ++ * cpu_exec_realizefn(): ++ * accel_cpu_common_realize() ++ * kvm_cpu_realizefn() ++ * host_cpu_realizefn() ++ * kvm_set_guest_phys_bits() ++ * check/update ucode_rev, phys_bits, guest_phys_bits, mwait ++ * cpu_common_realizefn() (via xcc->parent_realize) + */ + if (cpu->max_features) { + if (enable_cpu_pm && kvm_has_waitpkg()) { +@@ -50,7 +74,17 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + MSR_IA32_UCODE_REV); + } + } +- return host_cpu_realizefn(cs, errp); ++ ret = host_cpu_realizefn(cs, errp); ++ if (!ret) { ++ return ret; ++ } ++ ++ if ((env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) && ++ cpu->guest_phys_bits == -1) { ++ kvm_set_guest_phys_bits(cs); ++ } ++ ++ return true; + } + + static bool lmce_supported(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch b/SOURCES/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch new file mode 100644 index 0000000..9baa06f --- /dev/null +++ b/SOURCES/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch @@ -0,0 +1,194 @@ +From 33cc1b469689ee2bb7c4f745189472c74a0a98ab Mon Sep 17 00:00:00 2001 +From: Chao Peng +Date: Wed, 20 Mar 2024 03:39:08 -0500 +Subject: [PATCH 034/100] kvm: handle KVM_EXIT_MEMORY_FAULT + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [34/91] 59c672f6b19a3afcb61878775eb6425c6fdea6d5 (bonzini/rhel-qemu-kvm) + +Upon an KVM_EXIT_MEMORY_FAULT exit, userspace needs to do the memory +conversion on the RAMBlock to turn the memory into desired attribute, +switching between private and shared. + +Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when +KVM_EXIT_MEMORY_FAULT happens. + +Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has +guest_memfd memory backend. + +Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is +added. + +When page is converted from shared to private, the original shared +memory can be discarded via ram_block_discard_range(). Note, shared +memory can be discarded only when it's not back'ed by hugetlb because +hugetlb is supposed to be pre-allocated and no need for discarding. + +Signed-off-by: Chao Peng +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li + +Message-ID: <20240320083945.991426-13-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c15e5684071d93174e446be318f49d8d59b15d6d) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 98 +++++++++++++++++++++++++++++++++++++----- + accel/kvm/trace-events | 2 + + include/sysemu/kvm.h | 2 + + 3 files changed, 92 insertions(+), 10 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 3f99efc8cc..09164e346c 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2900,6 +2900,69 @@ static void kvm_eat_signals(CPUState *cpu) + } while (sigismember(&chkset, SIG_IPI)); + } + ++int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) ++{ ++ MemoryRegionSection section; ++ ram_addr_t offset; ++ MemoryRegion *mr; ++ RAMBlock *rb; ++ void *addr; ++ int ret = -1; ++ ++ trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared"); ++ ++ if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) || ++ !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) { ++ return -1; ++ } ++ ++ if (!size) { ++ return -1; ++ } ++ ++ section = memory_region_find(get_system_memory(), start, size); ++ mr = section.mr; ++ if (!mr) { ++ return -1; ++ } ++ ++ if (!memory_region_has_guest_memfd(mr)) { ++ error_report("Converting non guest_memfd backed memory region " ++ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", ++ start, size, to_private ? "private" : "shared"); ++ goto out_unref; ++ } ++ ++ if (to_private) { ++ ret = kvm_set_memory_attributes_private(start, size); ++ } else { ++ ret = kvm_set_memory_attributes_shared(start, size); ++ } ++ if (ret) { ++ goto out_unref; ++ } ++ ++ addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; ++ rb = qemu_ram_block_from_host(addr, false, &offset); ++ ++ if (to_private) { ++ if (rb->page_size != qemu_real_host_page_size()) { ++ /* ++ * shared memory is backed by hugetlb, which is supposed to be ++ * pre-allocated and doesn't need to be discarded ++ */ ++ goto out_unref; ++ } ++ ret = ram_block_discard_range(rb, offset, size); ++ } else { ++ ret = ram_block_discard_guest_memfd_range(rb, offset, size); ++ } ++ ++out_unref: ++ memory_region_unref(mr); ++ return ret; ++} ++ + int kvm_cpu_exec(CPUState *cpu) + { + struct kvm_run *run = cpu->kvm_run; +@@ -2967,18 +3030,20 @@ int kvm_cpu_exec(CPUState *cpu) + ret = EXCP_INTERRUPT; + break; + } +- fprintf(stderr, "error: kvm run failed %s\n", +- strerror(-run_ret)); ++ if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) { ++ fprintf(stderr, "error: kvm run failed %s\n", ++ strerror(-run_ret)); + #ifdef TARGET_PPC +- if (run_ret == -EBUSY) { +- fprintf(stderr, +- "This is probably because your SMT is enabled.\n" +- "VCPU can only run on primary threads with all " +- "secondary threads offline.\n"); +- } ++ if (run_ret == -EBUSY) { ++ fprintf(stderr, ++ "This is probably because your SMT is enabled.\n" ++ "VCPU can only run on primary threads with all " ++ "secondary threads offline.\n"); ++ } + #endif +- ret = -1; +- break; ++ ret = -1; ++ break; ++ } + } + + trace_kvm_run_exit(cpu->cpu_index, run->exit_reason); +@@ -3061,6 +3126,19 @@ int kvm_cpu_exec(CPUState *cpu) + break; + } + break; ++ case KVM_EXIT_MEMORY_FAULT: ++ trace_kvm_memory_fault(run->memory_fault.gpa, ++ run->memory_fault.size, ++ run->memory_fault.flags); ++ if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) { ++ error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64, ++ (uint64_t)run->memory_fault.flags); ++ ret = -1; ++ break; ++ } ++ ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size, ++ run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE); ++ break; + default: + ret = kvm_arch_handle_exit(cpu, run); + break; +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index e8c52cb9e7..681ccb667d 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -31,3 +31,5 @@ kvm_cpu_exec(void) "" + kvm_interrupt_exit_request(void) "" + kvm_io_window_exit(void) "" + kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32 ++kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s" ++kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64 +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 9e4ab7ae89..74f23dff9c 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -542,4 +542,6 @@ int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); + int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + ++int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private); ++ + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch b/SOURCES/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch new file mode 100644 index 0000000..8f9756b --- /dev/null +++ b/SOURCES/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch @@ -0,0 +1,56 @@ +From f9dc55dd179bb534d589af371c5c2a7886bd461e Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:11 -0500 +Subject: [PATCH 030/100] kvm/memory: Make memory type private by default if it + has guest memfd backend + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [30/91] 5e21edf844b5629ee32c4075843b028561b97ae2 (bonzini/rhel-qemu-kvm) + +KVM side leaves the memory to shared by default, which may incur the +overhead of paging conversion on the first visit of each page. Because +the expectation is that page is likely to private for the VMs that +require private memory (has guest memfd). + +Explicitly set the memory to private when memory region has valid +guest memfd backend. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-16-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit bd3bcf6962b664ca3bf9c60fdcc4534e8e3d0641) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5ef55e4dd7..3f99efc8cc 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1431,6 +1431,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + strerror(-err)); + abort(); + } ++ ++ if (memory_region_has_guest_memfd(mr)) { ++ err = kvm_set_memory_attributes_private(start_addr, slot_size); ++ if (err) { ++ error_report("%s: failed to set memory attribute private: %s", ++ __func__, strerror(-err)); ++ exit(1); ++ } ++ } ++ + start_addr += slot_size; + ram_start_offset += slot_size; + ram += slot_size; +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch b/SOURCES/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch new file mode 100644 index 0000000..7b578b5 --- /dev/null +++ b/SOURCES/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch @@ -0,0 +1,61 @@ +From aeaa7061139202448d466b7e18682081f9cd2097 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Thu, 29 Feb 2024 01:36:54 -0500 +Subject: [PATCH 035/100] kvm/tdx: Don't complain when converting vMMIO region + to shared + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [35/91] c42870771d7af5badc2e10d42be9b5620d72f95d (bonzini/rhel-qemu-kvm) + +Because vMMIO region needs to be shared region, guest TD may explicitly +convert such region from private to shared. Don't complain such +conversion. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-34-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c5d9425ef4da9f43fc0903905ad415456d1ab843) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 09164e346c..6efaff90a7 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2927,9 +2927,22 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) + } + + if (!memory_region_has_guest_memfd(mr)) { +- error_report("Converting non guest_memfd backed memory region " +- "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", +- start, size, to_private ? "private" : "shared"); ++ /* ++ * Because vMMIO region must be shared, guest TD may convert vMMIO ++ * region to shared explicitly. Don't complain such case. See ++ * memory_region_type() for checking if the region is MMIO region. ++ */ ++ if (!to_private && ++ !memory_region_is_ram(mr) && ++ !memory_region_is_ram_device(mr) && ++ !memory_region_is_rom(mr) && ++ !memory_region_is_romd(mr)) { ++ ret = 0; ++ } else { ++ error_report("Convert non guest_memfd backed memory region " ++ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", ++ start, size, to_private ? "private" : "shared"); ++ } + goto out_unref; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch b/SOURCES/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch new file mode 100644 index 0000000..c0f2bc6 --- /dev/null +++ b/SOURCES/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch @@ -0,0 +1,62 @@ +From 2b2dfff3e383c99d0f759a8c12659d1a0ce50e8e Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Thu, 29 Feb 2024 01:36:55 -0500 +Subject: [PATCH 036/100] kvm/tdx: Ignore memory conversion to shared of + unassigned region + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [36/91] 84515b9dcfc2e07b272bb2477acf6430e9d33f28 (bonzini/rhel-qemu-kvm) + +TDX requires vMMIO region to be shared. For KVM, MMIO region is the region +which kvm memslot isn't assigned to (except in-kernel emulation). +qemu has the memory region for vMMIO at each device level. + +While OVMF issues MapGPA(to-shared) conservatively on 32bit PCI MMIO +region, qemu doesn't find corresponding vMMIO region because it's before +PCI device allocation and memory_region_find() finds the device region, not +PCI bus region. It's safe to ignore MapGPA(to-shared) because when guest +accesses those region they use GPA with shared bit set for vMMIO. Ignore +memory conversion request of non-assigned region to shared and return +success. Otherwise OVMF is confused and panics there. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-35-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 565f4768bb9cf840b2f8cca41483bb91aa3196a3) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6efaff90a7..f6268855b4 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2923,6 +2923,18 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) + section = memory_region_find(get_system_memory(), start, size); + mr = section.mr; + if (!mr) { ++ /* ++ * Ignore converting non-assigned region to shared. ++ * ++ * TDX requires vMMIO region to be shared to inject #VE to guest. ++ * OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region, ++ * and vIO-APIC 0xFEC00000 4K page. ++ * OVMF assigns 32bit PCI MMIO region to ++ * [top of low memory: typically 2GB=0xC000000, 0xFC00000) ++ */ ++ if (!to_private) { ++ return 0; ++ } + return -1; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch b/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch new file mode 100644 index 0000000..1aba040 --- /dev/null +++ b/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch @@ -0,0 +1,127 @@ +From 287ebf9f0b8a62dc49fd7802472c1ae57f653e44 Mon Sep 17 00:00:00 2001 +From: Prasad Pandit +Date: Thu, 25 Apr 2024 12:34:12 +0530 +Subject: [PATCH 1/5] linux-aio: add IO_CMD_FDSYNC command support + +RH-Author: Prasad Pandit +RH-MergeRequest: 249: linux-aio: add IO_CMD_FDSYNC command support +RH-Jira: RHEL-42411 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Commit: [1/1] 9beff6506d2eca7741b1c11b5acdc19b635c7c75 (pjp/cs-qemu-kvm) + +Libaio defines IO_CMD_FDSYNC command to sync all outstanding +asynchronous I/O operations, by flushing out file data to the +disk storage. Enable linux-aio to submit such aio request. + +When using aio=native without fdsync() support, QEMU creates +pthreads, and destroying these pthreads results in TLB flushes. +In a real-time guest environment, TLB flushes cause a latency +spike. This patch helps to avoid such spikes. + +Jira: https://issues.redhat.com/browse/RHEL-42411 +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Prasad Pandit +Message-ID: <20240425070412.37248-1-ppandit@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 24687abf237e3c15816d689a8e4b08d7c3190dcb) +Signed-off-by: Prasad Pandit +--- + block/file-posix.c | 9 +++++++++ + block/linux-aio.c | 21 ++++++++++++++++++++- + include/block/raw-aio.h | 1 + + 3 files changed, 30 insertions(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 35684f7e21..9831b08fb6 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -159,6 +159,7 @@ typedef struct BDRVRawState { + bool has_discard:1; + bool has_write_zeroes:1; + bool use_linux_aio:1; ++ bool has_laio_fdsync:1; + bool use_linux_io_uring:1; + int page_cache_inconsistent; /* errno from fdatasync failure */ + bool has_fallocate; +@@ -718,6 +719,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + ret = -EINVAL; + goto fail; + } ++ if (s->use_linux_aio) { ++ s->has_laio_fdsync = laio_has_fdsync(s->fd); ++ } + #else + if (s->use_linux_aio) { + error_setg(errp, "aio=native was specified, but is not supported " +@@ -2599,6 +2603,11 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) + if (raw_check_linux_io_uring(s)) { + return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH); + } ++#endif ++#ifdef CONFIG_LINUX_AIO ++ if (s->has_laio_fdsync && raw_check_linux_aio(s)) { ++ return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0); ++ } + #endif + return raw_thread_pool_submit(handle_aiocb_flush, &acb); + } +diff --git a/block/linux-aio.c b/block/linux-aio.c +index ec05d946f3..e3b5ec9aba 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -384,6 +384,9 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, + case QEMU_AIO_READ: + io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset); + break; ++ case QEMU_AIO_FLUSH: ++ io_prep_fdsync(iocbs, fd); ++ break; + /* Currently Linux kernel does not support other operations */ + default: + fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", +@@ -412,7 +415,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, + AioContext *ctx = qemu_get_current_aio_context(); + struct qemu_laiocb laiocb = { + .co = qemu_coroutine_self(), +- .nbytes = qiov->size, ++ .nbytes = qiov ? qiov->size : 0, + .ctx = aio_get_linux_aio(ctx), + .ret = -EINPROGRESS, + .is_read = (type == QEMU_AIO_READ), +@@ -486,3 +489,19 @@ void laio_cleanup(LinuxAioState *s) + } + g_free(s); + } ++ ++bool laio_has_fdsync(int fd) ++{ ++ struct iocb cb; ++ struct iocb *cbs[] = {&cb, NULL}; ++ ++ io_context_t ctx = 0; ++ io_setup(1, &ctx); ++ ++ /* check if host kernel supports IO_CMD_FDSYNC */ ++ io_prep_fdsync(&cb, fd); ++ int ret = io_submit(ctx, 1, cbs); ++ ++ io_destroy(ctx); ++ return (ret == -EINVAL) ? false : true; ++} +diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h +index 20e000b8ef..626706827f 100644 +--- a/include/block/raw-aio.h ++++ b/include/block/raw-aio.h +@@ -60,6 +60,7 @@ void laio_cleanup(LinuxAioState *s); + int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, + int type, uint64_t dev_max_batch); + ++bool laio_has_fdsync(int); + void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); + void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-linux-headers-Update-to-current-kvm-next.patch b/SOURCES/kvm-linux-headers-Update-to-current-kvm-next.patch new file mode 100644 index 0000000..2fd35fd --- /dev/null +++ b/SOURCES/kvm-linux-headers-Update-to-current-kvm-next.patch @@ -0,0 +1,189 @@ +From c3e2bc3319882c16fa36eafc7a613073746cfc8b Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:14 -0500 +Subject: [PATCH 052/100] linux-headers: Update to current kvm/next + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [52/91] df77e867072f60110b8387a54ba2db6226b35007 (bonzini/rhel-qemu-kvm) + +This updates kernel headers to commit 6f627b425378 ("KVM: SVM: Add module +parameter to enable SEV-SNP", 2024-05-12). The SNP host patches will +be included in Linux 6.11, to be released next July. + +Also brings in an linux-headers/linux/vhost.h fix from v6.9-rc4. + +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-3-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5f69e42da5b40a2213f4db70ca461f554abca686) +Signed-off-by: Paolo Bonzini +--- + linux-headers/asm-loongarch/kvm.h | 4 +++ + linux-headers/asm-riscv/kvm.h | 1 + + linux-headers/asm-x86/kvm.h | 52 ++++++++++++++++++++++++++++++- + linux-headers/linux/vhost.h | 15 ++++----- + 4 files changed, 64 insertions(+), 8 deletions(-) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 109785922c..f9abef3823 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -17,6 +17,8 @@ + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 + ++#define KVM_GUESTDBG_USE_SW_BP 0x00010000 ++ + /* + * for KVM_GET_REGS and KVM_SET_REGS + */ +@@ -72,6 +74,8 @@ struct kvm_fpu { + + #define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) + #define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) ++/* Debugging: Special instruction for software breakpoint */ ++#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) + + #define LOONGARCH_REG_SHIFT 3 + #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index b1c503c295..e878e7cc39 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_ZFA, + KVM_RISCV_ISA_EXT_ZTSO, + KVM_RISCV_ISA_EXT_ZACAS, ++ KVM_RISCV_ISA_EXT_SSCOFPMF, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 31c95c2dfe..1c8f918234 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -695,6 +695,11 @@ enum sev_cmd_id { + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + ++ /* SNP-specific commands */ ++ KVM_SEV_SNP_LAUNCH_START = 100, ++ KVM_SEV_SNP_LAUNCH_UPDATE, ++ KVM_SEV_SNP_LAUNCH_FINISH, ++ + KVM_SEV_NR_MAX, + }; + +@@ -709,7 +714,9 @@ struct kvm_sev_cmd { + struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; +- __u32 pad[9]; ++ __u16 ghcb_version; ++ __u16 pad1; ++ __u32 pad2[8]; + }; + + struct kvm_sev_launch_start { +@@ -820,6 +827,48 @@ struct kvm_sev_receive_update_data { + __u32 pad2; + }; + ++struct kvm_sev_snp_launch_start { ++ __u64 policy; ++ __u8 gosvw[16]; ++ __u16 flags; ++ __u8 pad0[6]; ++ __u64 pad1[4]; ++}; ++ ++/* Kept in sync with firmware values for simplicity. */ ++#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 ++#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 ++#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 ++#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 ++#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 ++ ++struct kvm_sev_snp_launch_update { ++ __u64 gfn_start; ++ __u64 uaddr; ++ __u64 len; ++ __u8 type; ++ __u8 pad0; ++ __u16 flags; ++ __u32 pad1; ++ __u64 pad2[4]; ++}; ++ ++#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 ++#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 ++#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 ++ ++struct kvm_sev_snp_launch_finish { ++ __u64 id_block_uaddr; ++ __u64 id_auth_uaddr; ++ __u8 id_block_en; ++ __u8 auth_key_en; ++ __u8 vcek_disabled; ++ __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; ++ __u8 pad0[3]; ++ __u16 flags; ++ __u64 pad1[4]; ++}; ++ + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) + #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +@@ -870,5 +919,6 @@ struct kvm_hyperv_eventfd { + #define KVM_X86_SW_PROTECTED_VM 1 + #define KVM_X86_SEV_VM 2 + #define KVM_X86_SEV_ES_VM 3 ++#define KVM_X86_SNP_VM 4 + + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index bea6973906..b95dd84eef 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -179,12 +179,6 @@ + /* Get the config size */ + #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +-/* Get the count of all virtqueues */ +-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) +- +-/* Get the number of virtqueue groups. */ +-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) +- + /* Get the number of address spaces. */ + #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) + +@@ -228,10 +222,17 @@ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) + ++ ++/* Get the count of all virtqueues */ ++#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) ++ ++/* Get the number of virtqueue groups. */ ++#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) ++ + /* Get the queue size of a specific virtqueue. + * userspace set the vring index in vhost_vring_state.index + * kernel set the queue size in vhost_vring_state.num + */ +-#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ ++#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ + struct vhost_vring_state) + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-linux-headers-update-to-current-kvm-next.patch b/SOURCES/kvm-linux-headers-update-to-current-kvm-next.patch new file mode 100644 index 0000000..4c3dd73 --- /dev/null +++ b/SOURCES/kvm-linux-headers-update-to-current-kvm-next.patch @@ -0,0 +1,2471 @@ +From 530296e1669c9730f261a269d5b911ea56dfcce7 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 23 Apr 2024 11:46:47 +0200 +Subject: [PATCH 017/100] linux-headers: update to current kvm/next + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [17/91] 5660d4967f10a84802de16c24540e95095eaffd5 (bonzini/rhel-qemu-kvm) + +Signed-off-by: Paolo Bonzini +(cherry picked from commit ab0c7fb22b56523f24d6e127cd4d10ecff67bf85) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 8 - + include/standard-headers/asm-x86/bootparam.h | 17 +- + include/standard-headers/asm-x86/kvm_para.h | 3 +- + include/standard-headers/asm-x86/setup_data.h | 83 +++ + include/standard-headers/linux/ethtool.h | 48 ++ + include/standard-headers/linux/fuse.h | 39 +- + .../linux/input-event-codes.h | 1 + + include/standard-headers/linux/virtio_gpu.h | 2 + + include/standard-headers/linux/virtio_pci.h | 10 +- + include/standard-headers/linux/virtio_snd.h | 154 ++++ + linux-headers/asm-arm64/kvm.h | 15 +- + linux-headers/asm-arm64/sve_context.h | 11 + + linux-headers/asm-generic/bitsperlong.h | 4 + + linux-headers/asm-loongarch/kvm.h | 2 - + linux-headers/asm-mips/kvm.h | 2 - + linux-headers/asm-powerpc/kvm.h | 45 +- + linux-headers/asm-riscv/kvm.h | 3 +- + linux-headers/asm-s390/kvm.h | 315 +++++++- + linux-headers/asm-x86/kvm.h | 328 ++++++++- + linux-headers/linux/bits.h | 15 + + linux-headers/linux/kvm.h | 689 +----------------- + linux-headers/linux/psp-sev.h | 59 ++ + linux-headers/linux/vhost.h | 7 + + 23 files changed, 1120 insertions(+), 740 deletions(-) + create mode 100644 include/standard-headers/asm-x86/setup_data.h + create mode 100644 linux-headers/linux/bits.h + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index ffbda48917..84a4801977 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -679,14 +679,6 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state) + return dev; + } + +-struct setup_data { +- uint64_t next; +- uint32_t type; +- uint32_t len; +- uint8_t data[]; +-} __attribute__((packed)); +- +- + /* + * The entry point into the kernel for PVH boot is different from + * the native entry point. The PVH entry is defined by the x86/HVM +diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h +index 0b06d2bff1..b582a105c0 100644 +--- a/include/standard-headers/asm-x86/bootparam.h ++++ b/include/standard-headers/asm-x86/bootparam.h +@@ -2,21 +2,7 @@ + #ifndef _ASM_X86_BOOTPARAM_H + #define _ASM_X86_BOOTPARAM_H + +-/* setup_data/setup_indirect types */ +-#define SETUP_NONE 0 +-#define SETUP_E820_EXT 1 +-#define SETUP_DTB 2 +-#define SETUP_PCI 3 +-#define SETUP_EFI 4 +-#define SETUP_APPLE_PROPERTIES 5 +-#define SETUP_JAILHOUSE 6 +-#define SETUP_CC_BLOB 7 +-#define SETUP_IMA 8 +-#define SETUP_RNG_SEED 9 +-#define SETUP_ENUM_MAX SETUP_RNG_SEED +- +-#define SETUP_INDIRECT (1<<31) +-#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) ++#include "standard-headers/asm-x86/setup_data.h" + + /* ram_size flags */ + #define RAMDISK_IMAGE_START_MASK 0x07FF +@@ -38,6 +24,7 @@ + #define XLF_EFI_KEXEC (1<<4) + #define XLF_5LEVEL (1<<5) + #define XLF_5LEVEL_ENABLED (1<<6) ++#define XLF_MEM_ENCRYPTION (1<<7) + + + #endif /* _ASM_X86_BOOTPARAM_H */ +diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h +index f0235e58a1..9a011d20f0 100644 +--- a/include/standard-headers/asm-x86/kvm_para.h ++++ b/include/standard-headers/asm-x86/kvm_para.h +@@ -92,7 +92,7 @@ struct kvm_clock_pairing { + #define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) + + /* MSR_KVM_ASYNC_PF_INT */ +-#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) ++#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0) + + /* MSR_KVM_MIGRATION_CONTROL */ + #define KVM_MIGRATION_READY (1 << 0) +@@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { + uint32_t token; + + uint8_t pad[56]; +- uint32_t enabled; + }; + + #define KVM_PV_EOI_BIT 0 +diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h +new file mode 100644 +index 0000000000..09355f54c5 +--- /dev/null ++++ b/include/standard-headers/asm-x86/setup_data.h +@@ -0,0 +1,83 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _ASM_X86_SETUP_DATA_H ++#define _ASM_X86_SETUP_DATA_H ++ ++/* setup_data/setup_indirect types */ ++#define SETUP_NONE 0 ++#define SETUP_E820_EXT 1 ++#define SETUP_DTB 2 ++#define SETUP_PCI 3 ++#define SETUP_EFI 4 ++#define SETUP_APPLE_PROPERTIES 5 ++#define SETUP_JAILHOUSE 6 ++#define SETUP_CC_BLOB 7 ++#define SETUP_IMA 8 ++#define SETUP_RNG_SEED 9 ++#define SETUP_ENUM_MAX SETUP_RNG_SEED ++ ++#define SETUP_INDIRECT (1<<31) ++#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) ++ ++#ifndef __ASSEMBLY__ ++ ++#include "standard-headers/linux/types.h" ++ ++/* extensible setup data list node */ ++struct setup_data { ++ uint64_t next; ++ uint32_t type; ++ uint32_t len; ++ uint8_t data[]; ++}; ++ ++/* extensible setup indirect data node */ ++struct setup_indirect { ++ uint32_t type; ++ uint32_t reserved; /* Reserved, must be set to zero. */ ++ uint64_t len; ++ uint64_t addr; ++}; ++ ++/* ++ * The E820 memory region entry of the boot protocol ABI: ++ */ ++struct boot_e820_entry { ++ uint64_t addr; ++ uint64_t size; ++ uint32_t type; ++} QEMU_PACKED; ++ ++/* ++ * The boot loader is passing platform information via this Jailhouse-specific ++ * setup data structure. ++ */ ++struct jailhouse_setup_data { ++ struct { ++ uint16_t version; ++ uint16_t compatible_version; ++ } QEMU_PACKED hdr; ++ struct { ++ uint16_t pm_timer_address; ++ uint16_t num_cpus; ++ uint64_t pci_mmconfig_base; ++ uint32_t tsc_khz; ++ uint32_t apic_khz; ++ uint8_t standard_ioapic; ++ uint8_t cpu_ids[255]; ++ } QEMU_PACKED v1; ++ struct { ++ uint32_t flags; ++ } QEMU_PACKED v2; ++} QEMU_PACKED; ++ ++/* ++ * IMA buffer setup data information from the previous kernel during kexec ++ */ ++struct ima_setup_data { ++ uint64_t addr; ++ uint64_t size; ++} QEMU_PACKED; ++ ++#endif /* __ASSEMBLY__ */ ++ ++#endif /* _ASM_X86_SETUP_DATA_H */ +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index dfb54eff6f..01503784d2 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -2023,6 +2023,53 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define IPV4_FLOW 0x10 /* hash only */ + #define IPV6_FLOW 0x11 /* hash only */ + #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ ++ ++/* Used for GTP-U IPv4 and IPv6. ++ * The format of GTP packets only includes ++ * elements such as TEID and GTP version. ++ * It is primarily intended for data communication of the UE. ++ */ ++#define GTPU_V4_FLOW 0x13 /* hash only */ ++#define GTPU_V6_FLOW 0x14 /* hash only */ ++ ++/* Use for GTP-C IPv4 and v6. ++ * The format of these GTP packets does not include TEID. ++ * Primarily expected to be used for communication ++ * to create sessions for UE data communication, ++ * commonly referred to as CSR (Create Session Request). ++ */ ++#define GTPC_V4_FLOW 0x15 /* hash only */ ++#define GTPC_V6_FLOW 0x16 /* hash only */ ++ ++/* Use for GTP-C IPv4 and v6. ++ * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. ++ * After session creation, it becomes this packet. ++ * This is mainly used for requests to realize UE handover. ++ */ ++#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ ++#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ ++ ++/* Use for GTP-U and extended headers for the PSC (PDU Session Container). ++ * The format of these GTP packets includes TEID and QFI. ++ * In 5G communication using UPF (User Plane Function), ++ * data communication with this extended header is performed. ++ */ ++#define GTPU_EH_V4_FLOW 0x19 /* hash only */ ++#define GTPU_EH_V6_FLOW 0x1a /* hash only */ ++ ++/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. ++ * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by ++ * UL/DL included in the PSC. ++ * There are differences in the data included based on Downlink/Uplink, ++ * and can be used to distinguish packets. ++ * The functions described so far are useful when you want to ++ * handle communication from the mobile network in UPF, PGW, etc. ++ */ ++#define GTPU_UL_V4_FLOW 0x1b /* hash only */ ++#define GTPU_UL_V6_FLOW 0x1c /* hash only */ ++#define GTPU_DL_V4_FLOW 0x1d /* hash only */ ++#define GTPU_DL_V6_FLOW 0x1e /* hash only */ ++ + /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ + #define FLOW_EXT 0x80000000 + #define FLOW_MAC_EXT 0x40000000 +@@ -2037,6 +2084,7 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define RXH_IP_DST (1 << 5) + #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ + #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ ++#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ + #define RXH_DISCARD (1 << 31) + + #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index fc0dcd10ae..bac9dbc49f 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -211,6 +211,12 @@ + * 7.39 + * - add FUSE_DIRECT_IO_ALLOW_MMAP + * - add FUSE_STATX and related structures ++ * ++ * 7.40 ++ * - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag ++ * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag ++ * - add FUSE_NO_EXPORT_SUPPORT init flag ++ * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag + */ + + #ifndef _LINUX_FUSE_H +@@ -242,7 +248,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 39 ++#define FUSE_KERNEL_MINOR_VERSION 40 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -349,6 +355,7 @@ struct fuse_file_lock { + * FOPEN_STREAM: the file is stream-like (no file position at all) + * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) + * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode ++ * FOPEN_PASSTHROUGH: passthrough read/write io for this open file + */ + #define FOPEN_DIRECT_IO (1 << 0) + #define FOPEN_KEEP_CACHE (1 << 1) +@@ -357,6 +364,7 @@ struct fuse_file_lock { + #define FOPEN_STREAM (1 << 4) + #define FOPEN_NOFLUSH (1 << 5) + #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) ++#define FOPEN_PASSTHROUGH (1 << 7) + + /** + * INIT request/reply flags +@@ -406,6 +414,9 @@ struct fuse_file_lock { + * symlink and mknod (single group that matches parent) + * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation + * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. ++ * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support ++ * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit ++ * of the request ID indicates resend requests + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -445,6 +456,9 @@ struct fuse_file_lock { + #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) + #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) + #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) ++#define FUSE_PASSTHROUGH (1ULL << 37) ++#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) ++#define FUSE_HAS_RESEND (1ULL << 39) + + /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ + #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP +@@ -631,6 +645,7 @@ enum fuse_notify_code { + FUSE_NOTIFY_STORE = 4, + FUSE_NOTIFY_RETRIEVE = 5, + FUSE_NOTIFY_DELETE = 6, ++ FUSE_NOTIFY_RESEND = 7, + FUSE_NOTIFY_CODE_MAX, + }; + +@@ -757,7 +772,7 @@ struct fuse_create_in { + struct fuse_open_out { + uint64_t fh; + uint32_t open_flags; +- uint32_t padding; ++ int32_t backing_id; + }; + + struct fuse_release_in { +@@ -873,7 +888,8 @@ struct fuse_init_out { + uint16_t max_pages; + uint16_t map_alignment; + uint32_t flags2; +- uint32_t unused[7]; ++ uint32_t max_stack_depth; ++ uint32_t unused[6]; + }; + + #define CUSE_INIT_INFO_MAX 4096 +@@ -956,6 +972,14 @@ struct fuse_fallocate_in { + uint32_t padding; + }; + ++/** ++ * FUSE request unique ID flag ++ * ++ * Indicates whether this is a resend request. The receiver should handle this ++ * request accordingly. ++ */ ++#define FUSE_UNIQUE_RESEND (1ULL << 63) ++ + struct fuse_in_header { + uint32_t len; + uint32_t opcode; +@@ -1045,9 +1069,18 @@ struct fuse_notify_retrieve_in { + uint64_t dummy4; + }; + ++struct fuse_backing_map { ++ int32_t fd; ++ uint32_t flags; ++ uint64_t padding; ++}; ++ + /* Device ioctls: */ + #define FUSE_DEV_IOC_MAGIC 229 + #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) ++#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \ ++ struct fuse_backing_map) ++#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) + + struct fuse_lseek_in { + uint64_t fh; +diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h +index f6bab08540..2221b0c383 100644 +--- a/include/standard-headers/linux/input-event-codes.h ++++ b/include/standard-headers/linux/input-event-codes.h +@@ -602,6 +602,7 @@ + + #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ + #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ ++#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ + + #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ + #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ +diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h +index 2da48d3d4c..2db643ed8f 100644 +--- a/include/standard-headers/linux/virtio_gpu.h ++++ b/include/standard-headers/linux/virtio_gpu.h +@@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit { + + #define VIRTIO_GPU_CAPSET_VIRGL 1 + #define VIRTIO_GPU_CAPSET_VIRGL2 2 ++/* 3 is reserved for gfxstream */ ++#define VIRTIO_GPU_CAPSET_VENUS 4 + + /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ + struct virtio_gpu_get_capset_info { +diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h +index 3e2bc2c97e..4010216103 100644 +--- a/include/standard-headers/linux/virtio_pci.h ++++ b/include/standard-headers/linux/virtio_pci.h +@@ -240,7 +240,7 @@ struct virtio_pci_cfg_cap { + #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 + #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 + +-struct QEMU_PACKED virtio_admin_cmd_hdr { ++struct virtio_admin_cmd_hdr { + uint16_t opcode; + /* + * 1 - SR-IOV +@@ -252,20 +252,20 @@ struct QEMU_PACKED virtio_admin_cmd_hdr { + uint64_t group_member_id; + }; + +-struct QEMU_PACKED virtio_admin_cmd_status { ++struct virtio_admin_cmd_status { + uint16_t status; + uint16_t status_qualifier; + /* Unused, reserved for future extensions. */ + uint8_t reserved2[4]; + }; + +-struct QEMU_PACKED virtio_admin_cmd_legacy_wr_data { ++struct virtio_admin_cmd_legacy_wr_data { + uint8_t offset; /* Starting offset of the register(s) to write. */ + uint8_t reserved[7]; + uint8_t registers[]; + }; + +-struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { ++struct virtio_admin_cmd_legacy_rd_data { + uint8_t offset; /* Starting offset of the register(s) to read. */ + }; + +@@ -275,7 +275,7 @@ struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { + + #define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 + +-struct QEMU_PACKED virtio_admin_cmd_notify_info_data { ++struct virtio_admin_cmd_notify_info_data { + uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */ + uint8_t bar; /* BAR of the member or the owner device */ + uint8_t padding[6]; +diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h +index 1af96b9fc6..860f12e0a4 100644 +--- a/include/standard-headers/linux/virtio_snd.h ++++ b/include/standard-headers/linux/virtio_snd.h +@@ -7,6 +7,14 @@ + + #include "standard-headers/linux/virtio_types.h" + ++/******************************************************************************* ++ * FEATURE BITS ++ */ ++enum { ++ /* device supports control elements */ ++ VIRTIO_SND_F_CTLS = 0 ++}; ++ + /******************************************************************************* + * CONFIGURATION SPACE + */ +@@ -17,6 +25,8 @@ struct virtio_snd_config { + uint32_t streams; + /* # of available channel maps */ + uint32_t chmaps; ++ /* # of available control elements */ ++ uint32_t controls; + }; + + enum { +@@ -55,6 +65,15 @@ enum { + /* channel map control request types */ + VIRTIO_SND_R_CHMAP_INFO = 0x0200, + ++ /* control element request types */ ++ VIRTIO_SND_R_CTL_INFO = 0x0300, ++ VIRTIO_SND_R_CTL_ENUM_ITEMS, ++ VIRTIO_SND_R_CTL_READ, ++ VIRTIO_SND_R_CTL_WRITE, ++ VIRTIO_SND_R_CTL_TLV_READ, ++ VIRTIO_SND_R_CTL_TLV_WRITE, ++ VIRTIO_SND_R_CTL_TLV_COMMAND, ++ + /* jack event types */ + VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, + VIRTIO_SND_EVT_JACK_DISCONNECTED, +@@ -63,6 +82,9 @@ enum { + VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, + VIRTIO_SND_EVT_PCM_XRUN, + ++ /* control element event types */ ++ VIRTIO_SND_EVT_CTL_NOTIFY = 0x1200, ++ + /* common status codes */ + VIRTIO_SND_S_OK = 0x8000, + VIRTIO_SND_S_BAD_MSG, +@@ -331,4 +353,136 @@ struct virtio_snd_chmap_info { + uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; + }; + ++/******************************************************************************* ++ * CONTROL ELEMENTS MESSAGES ++ */ ++struct virtio_snd_ctl_hdr { ++ /* VIRTIO_SND_R_CTL_XXX */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::controls - 1 */ ++ uint32_t control_id; ++}; ++ ++/* supported roles for control elements */ ++enum { ++ VIRTIO_SND_CTL_ROLE_UNDEFINED = 0, ++ VIRTIO_SND_CTL_ROLE_VOLUME, ++ VIRTIO_SND_CTL_ROLE_MUTE, ++ VIRTIO_SND_CTL_ROLE_GAIN ++}; ++ ++/* supported value types for control elements */ ++enum { ++ VIRTIO_SND_CTL_TYPE_BOOLEAN = 0, ++ VIRTIO_SND_CTL_TYPE_INTEGER, ++ VIRTIO_SND_CTL_TYPE_INTEGER64, ++ VIRTIO_SND_CTL_TYPE_ENUMERATED, ++ VIRTIO_SND_CTL_TYPE_BYTES, ++ VIRTIO_SND_CTL_TYPE_IEC958 ++}; ++ ++/* supported access rights for control elements */ ++enum { ++ VIRTIO_SND_CTL_ACCESS_READ = 0, ++ VIRTIO_SND_CTL_ACCESS_WRITE, ++ VIRTIO_SND_CTL_ACCESS_VOLATILE, ++ VIRTIO_SND_CTL_ACCESS_INACTIVE, ++ VIRTIO_SND_CTL_ACCESS_TLV_READ, ++ VIRTIO_SND_CTL_ACCESS_TLV_WRITE, ++ VIRTIO_SND_CTL_ACCESS_TLV_COMMAND ++}; ++ ++struct virtio_snd_ctl_info { ++ /* common header */ ++ struct virtio_snd_info hdr; ++ /* element role (VIRTIO_SND_CTL_ROLE_XXX) */ ++ uint32_t role; ++ /* element value type (VIRTIO_SND_CTL_TYPE_XXX) */ ++ uint32_t type; ++ /* element access right bit map (1 << VIRTIO_SND_CTL_ACCESS_XXX) */ ++ uint32_t access; ++ /* # of members in the element value */ ++ uint32_t count; ++ /* index for an element with a non-unique name */ ++ uint32_t index; ++ /* name identifier string for the element */ ++ uint8_t name[44]; ++ /* additional information about the element's value */ ++ union { ++ /* VIRTIO_SND_CTL_TYPE_INTEGER */ ++ struct { ++ /* minimum supported value */ ++ uint32_t min; ++ /* maximum supported value */ ++ uint32_t max; ++ /* fixed step size for value (0 = variable size) */ ++ uint32_t step; ++ } integer; ++ /* VIRTIO_SND_CTL_TYPE_INTEGER64 */ ++ struct { ++ /* minimum supported value */ ++ uint64_t min; ++ /* maximum supported value */ ++ uint64_t max; ++ /* fixed step size for value (0 = variable size) */ ++ uint64_t step; ++ } integer64; ++ /* VIRTIO_SND_CTL_TYPE_ENUMERATED */ ++ struct { ++ /* # of options supported for value */ ++ uint32_t items; ++ } enumerated; ++ } value; ++}; ++ ++struct virtio_snd_ctl_enum_item { ++ /* option name */ ++ uint8_t item[64]; ++}; ++ ++struct virtio_snd_ctl_iec958 { ++ /* AES/IEC958 channel status bits */ ++ uint8_t status[24]; ++ /* AES/IEC958 subcode bits */ ++ uint8_t subcode[147]; ++ /* nothing */ ++ uint8_t pad; ++ /* AES/IEC958 subframe bits */ ++ uint8_t dig_subframe[4]; ++}; ++ ++struct virtio_snd_ctl_value { ++ union { ++ /* VIRTIO_SND_CTL_TYPE_BOOLEAN|INTEGER value */ ++ uint32_t integer[128]; ++ /* VIRTIO_SND_CTL_TYPE_INTEGER64 value */ ++ uint64_t integer64[64]; ++ /* VIRTIO_SND_CTL_TYPE_ENUMERATED value (option indexes) */ ++ uint32_t enumerated[128]; ++ /* VIRTIO_SND_CTL_TYPE_BYTES value */ ++ uint8_t bytes[512]; ++ /* VIRTIO_SND_CTL_TYPE_IEC958 value */ ++ struct virtio_snd_ctl_iec958 iec958; ++ } value; ++}; ++ ++/* supported event reason types */ ++enum { ++ /* element's value has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_VALUE = 0, ++ /* element's information has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_INFO, ++ /* element's metadata has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_TLV ++}; ++ ++struct virtio_snd_ctl_event { ++ /* VIRTIO_SND_EVT_CTL_NOTIFY */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::controls - 1 */ ++ uint16_t control_id; ++ /* event reason bit map (1 << VIRTIO_SND_CTL_EVT_MASK_XXX) */ ++ uint16_t mask; ++}; ++ + #endif /* VIRTIO_SND_IF_H */ +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index c59ea55cd8..2af9931ae9 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -37,9 +37,7 @@ + #include + #include + +-#define __KVM_HAVE_GUEST_DEBUG + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_READONLY_MEM + #define __KVM_HAVE_VCPU_EVENTS + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +@@ -76,11 +74,11 @@ struct kvm_regs { + + /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ + #define KVM_ARM_DEVICE_TYPE_SHIFT 0 +-#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ +- KVM_ARM_DEVICE_TYPE_SHIFT) ++#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ ++ KVM_ARM_DEVICE_TYPE_SHIFT) + #define KVM_ARM_DEVICE_ID_SHIFT 16 +-#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ +- KVM_ARM_DEVICE_ID_SHIFT) ++#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ ++ KVM_ARM_DEVICE_ID_SHIFT) + + /* Supported device IDs */ + #define KVM_ARM_DEVICE_VGIC_V2 0 +@@ -162,6 +160,11 @@ struct kvm_sync_regs { + __u64 device_irq_level; + }; + ++/* Bits for run->s.regs.device_irq_level */ ++#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) ++#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) ++#define KVM_ARM_DEV_PMU (1 << 2) ++ + /* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. +diff --git a/linux-headers/asm-arm64/sve_context.h b/linux-headers/asm-arm64/sve_context.h +index 1d0e3e1d09..d1b1ec8cb1 100644 +--- a/linux-headers/asm-arm64/sve_context.h ++++ b/linux-headers/asm-arm64/sve_context.h +@@ -13,6 +13,17 @@ + + #define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ + ++/* ++ * Yes, __SVE_VQ_MAX is 512 QUADWORDS. ++ * ++ * To help ensure forward portability, this is much larger than the ++ * current maximum value defined by the SVE architecture. While arrays ++ * or static allocations can be sized based on this value, watch out! ++ * It will waste a surprisingly large amount of memory. ++ * ++ * Dynamic sizing based on the actual runtime vector length is likely to ++ * be preferable for most purposes. ++ */ + #define __SVE_VQ_MIN 1 + #define __SVE_VQ_MAX 512 + +diff --git a/linux-headers/asm-generic/bitsperlong.h b/linux-headers/asm-generic/bitsperlong.h +index 75f320fa91..1fb4f0c9f2 100644 +--- a/linux-headers/asm-generic/bitsperlong.h ++++ b/linux-headers/asm-generic/bitsperlong.h +@@ -24,4 +24,8 @@ + #endif + #endif + ++#ifndef __BITS_PER_LONG_LONG ++#define __BITS_PER_LONG_LONG 64 ++#endif ++ + #endif /* __ASM_GENERIC_BITS_PER_LONG */ +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 923d0bd382..109785922c 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -14,8 +14,6 @@ + * Some parts derived from the x86 version of this file. + */ + +-#define __KVM_HAVE_READONLY_MEM +- + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 + +diff --git a/linux-headers/asm-mips/kvm.h b/linux-headers/asm-mips/kvm.h +index edcf717c43..9673dc9cb3 100644 +--- a/linux-headers/asm-mips/kvm.h ++++ b/linux-headers/asm-mips/kvm.h +@@ -20,8 +20,6 @@ + * Some parts derived from the x86 version of this file. + */ + +-#define __KVM_HAVE_READONLY_MEM +- + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + + /* +diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h +index 9f18fa090f..1691297a76 100644 +--- a/linux-headers/asm-powerpc/kvm.h ++++ b/linux-headers/asm-powerpc/kvm.h +@@ -28,7 +28,6 @@ + #define __KVM_HAVE_PPC_SMT + #define __KVM_HAVE_IRQCHIP + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_GUEST_DEBUG + + /* Not always available, but if it is, this is the correct offset. */ + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +@@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq { + #define KVM_XIVE_TIMA_PAGE_OFFSET 0 + #define KVM_XIVE_ESB_PAGE_OFFSET 4 + ++/* for KVM_PPC_GET_PVINFO */ ++ ++#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) ++ ++struct kvm_ppc_pvinfo { ++ /* out */ ++ __u32 flags; ++ __u32 hcall[4]; ++ __u8 pad[108]; ++}; ++ ++/* for KVM_PPC_GET_SMMU_INFO */ ++#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 ++ ++struct kvm_ppc_one_page_size { ++ __u32 page_shift; /* Page shift (or 0) */ ++ __u32 pte_enc; /* Encoding in the HPTE (>>12) */ ++}; ++ ++struct kvm_ppc_one_seg_page_size { ++ __u32 page_shift; /* Base page shift of segment (or 0) */ ++ __u32 slb_enc; /* SLB encoding for BookS */ ++ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; ++}; ++ ++#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 ++#define KVM_PPC_1T_SEGMENTS 0x00000002 ++#define KVM_PPC_NO_HASH 0x00000004 ++ ++struct kvm_ppc_smmu_info { ++ __u64 flags; ++ __u32 slb_size; ++ __u16 data_keys; /* # storage keys supported for data */ ++ __u16 instr_keys; /* # storage keys supported for instructions */ ++ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; ++}; ++ ++/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ ++struct kvm_ppc_resize_hpt { ++ __u64 flags; ++ __u32 shift; ++ __u32 pad; ++}; ++ + #endif /* __LINUX_KVM_POWERPC_H */ +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 7499e88a94..b1c503c295 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -16,7 +16,6 @@ + #include + + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_READONLY_MEM + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +@@ -166,6 +165,8 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_ZVFH, + KVM_RISCV_ISA_EXT_ZVFHMIN, + KVM_RISCV_ISA_EXT_ZFA, ++ KVM_RISCV_ISA_EXT_ZTSO, ++ KVM_RISCV_ISA_EXT_ZACAS, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h +index 023a2763a9..684c4e1205 100644 +--- a/linux-headers/asm-s390/kvm.h ++++ b/linux-headers/asm-s390/kvm.h +@@ -12,7 +12,320 @@ + #include + + #define __KVM_S390 +-#define __KVM_HAVE_GUEST_DEBUG ++ ++struct kvm_s390_skeys { ++ __u64 start_gfn; ++ __u64 count; ++ __u64 skeydata_addr; ++ __u32 flags; ++ __u32 reserved[9]; ++}; ++ ++#define KVM_S390_CMMA_PEEK (1 << 0) ++ ++/** ++ * kvm_s390_cmma_log - Used for CMMA migration. ++ * ++ * Used both for input and output. ++ * ++ * @start_gfn: Guest page number to start from. ++ * @count: Size of the result buffer. ++ * @flags: Control operation mode via KVM_S390_CMMA_* flags ++ * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty ++ * pages are still remaining. ++ * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set ++ * in the PGSTE. ++ * @values: Pointer to the values buffer. ++ * ++ * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. ++ */ ++struct kvm_s390_cmma_log { ++ __u64 start_gfn; ++ __u32 count; ++ __u32 flags; ++ union { ++ __u64 remaining; ++ __u64 mask; ++ }; ++ __u64 values; ++}; ++ ++#define KVM_S390_RESET_POR 1 ++#define KVM_S390_RESET_CLEAR 2 ++#define KVM_S390_RESET_SUBSYSTEM 4 ++#define KVM_S390_RESET_CPU_INIT 8 ++#define KVM_S390_RESET_IPL 16 ++ ++/* for KVM_S390_MEM_OP */ ++struct kvm_s390_mem_op { ++ /* in */ ++ __u64 gaddr; /* the guest address */ ++ __u64 flags; /* flags */ ++ __u32 size; /* amount of bytes */ ++ __u32 op; /* type of operation */ ++ __u64 buf; /* buffer in userspace */ ++ union { ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ __u8 pad1[6]; /* ignored */ ++ __u64 old_addr; /* ignored if cmpxchg flag unset */ ++ }; ++ __u32 sida_offset; /* offset into the sida */ ++ __u8 reserved[32]; /* ignored */ ++ }; ++}; ++/* types for kvm_s390_mem_op->op */ ++#define KVM_S390_MEMOP_LOGICAL_READ 0 ++#define KVM_S390_MEMOP_LOGICAL_WRITE 1 ++#define KVM_S390_MEMOP_SIDA_READ 2 ++#define KVM_S390_MEMOP_SIDA_WRITE 3 ++#define KVM_S390_MEMOP_ABSOLUTE_READ 4 ++#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 ++#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 ++ ++/* flags for kvm_s390_mem_op->flags */ ++#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) ++#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) ++#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) ++ ++/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ ++#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) ++#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) ++ ++struct kvm_s390_psw { ++ __u64 mask; ++ __u64 addr; ++}; ++ ++/* valid values for type in kvm_s390_interrupt */ ++#define KVM_S390_SIGP_STOP 0xfffe0000u ++#define KVM_S390_PROGRAM_INT 0xfffe0001u ++#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u ++#define KVM_S390_RESTART 0xfffe0003u ++#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u ++#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u ++#define KVM_S390_MCHK 0xfffe1000u ++#define KVM_S390_INT_CLOCK_COMP 0xffff1004u ++#define KVM_S390_INT_CPU_TIMER 0xffff1005u ++#define KVM_S390_INT_VIRTIO 0xffff2603u ++#define KVM_S390_INT_SERVICE 0xffff2401u ++#define KVM_S390_INT_EMERGENCY 0xffff1201u ++#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u ++/* Anything below 0xfffe0000u is taken by INT_IO */ ++#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ ++ (((schid)) | \ ++ ((ssid) << 16) | \ ++ ((cssid) << 18) | \ ++ ((ai) << 26)) ++#define KVM_S390_INT_IO_MIN 0x00000000u ++#define KVM_S390_INT_IO_MAX 0xfffdffffu ++#define KVM_S390_INT_IO_AI_MASK 0x04000000u ++ ++ ++struct kvm_s390_interrupt { ++ __u32 type; ++ __u32 parm; ++ __u64 parm64; ++}; ++ ++struct kvm_s390_io_info { ++ __u16 subchannel_id; ++ __u16 subchannel_nr; ++ __u32 io_int_parm; ++ __u32 io_int_word; ++}; ++ ++struct kvm_s390_ext_info { ++ __u32 ext_params; ++ __u32 pad; ++ __u64 ext_params2; ++}; ++ ++struct kvm_s390_pgm_info { ++ __u64 trans_exc_code; ++ __u64 mon_code; ++ __u64 per_address; ++ __u32 data_exc_code; ++ __u16 code; ++ __u16 mon_class_nr; ++ __u8 per_code; ++ __u8 per_atmid; ++ __u8 exc_access_id; ++ __u8 per_access_id; ++ __u8 op_access_id; ++#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 ++#define KVM_S390_PGM_FLAGS_ILC_0 0x02 ++#define KVM_S390_PGM_FLAGS_ILC_1 0x04 ++#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 ++#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 ++ __u8 flags; ++ __u8 pad[2]; ++}; ++ ++struct kvm_s390_prefix_info { ++ __u32 address; ++}; ++ ++struct kvm_s390_extcall_info { ++ __u16 code; ++}; ++ ++struct kvm_s390_emerg_info { ++ __u16 code; ++}; ++ ++#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 ++struct kvm_s390_stop_info { ++ __u32 flags; ++}; ++ ++struct kvm_s390_mchk_info { ++ __u64 cr14; ++ __u64 mcic; ++ __u64 failing_storage_address; ++ __u32 ext_damage_code; ++ __u32 pad; ++ __u8 fixed_logout[16]; ++}; ++ ++struct kvm_s390_irq { ++ __u64 type; ++ union { ++ struct kvm_s390_io_info io; ++ struct kvm_s390_ext_info ext; ++ struct kvm_s390_pgm_info pgm; ++ struct kvm_s390_emerg_info emerg; ++ struct kvm_s390_extcall_info extcall; ++ struct kvm_s390_prefix_info prefix; ++ struct kvm_s390_stop_info stop; ++ struct kvm_s390_mchk_info mchk; ++ char reserved[64]; ++ } u; ++}; ++ ++struct kvm_s390_irq_state { ++ __u64 buf; ++ __u32 flags; /* will stay unused for compatibility reasons */ ++ __u32 len; ++ __u32 reserved[4]; /* will stay unused for compatibility reasons */ ++}; ++ ++struct kvm_s390_ucas_mapping { ++ __u64 user_addr; ++ __u64 vcpu_addr; ++ __u64 length; ++}; ++ ++struct kvm_s390_pv_sec_parm { ++ __u64 origin; ++ __u64 length; ++}; ++ ++struct kvm_s390_pv_unp { ++ __u64 addr; ++ __u64 size; ++ __u64 tweak; ++}; ++ ++enum pv_cmd_dmp_id { ++ KVM_PV_DUMP_INIT, ++ KVM_PV_DUMP_CONFIG_STOR_STATE, ++ KVM_PV_DUMP_COMPLETE, ++ KVM_PV_DUMP_CPU, ++}; ++ ++struct kvm_s390_pv_dmp { ++ __u64 subcmd; ++ __u64 buff_addr; ++ __u64 buff_len; ++ __u64 gaddr; /* For dump storage state */ ++ __u64 reserved[4]; ++}; ++ ++enum pv_cmd_info_id { ++ KVM_PV_INFO_VM, ++ KVM_PV_INFO_DUMP, ++}; ++ ++struct kvm_s390_pv_info_dump { ++ __u64 dump_cpu_buffer_len; ++ __u64 dump_config_mem_buffer_per_1m; ++ __u64 dump_config_finalize_len; ++}; ++ ++struct kvm_s390_pv_info_vm { ++ __u64 inst_calls_list[4]; ++ __u64 max_cpus; ++ __u64 max_guests; ++ __u64 max_guest_addr; ++ __u64 feature_indication; ++}; ++ ++struct kvm_s390_pv_info_header { ++ __u32 id; ++ __u32 len_max; ++ __u32 len_written; ++ __u32 reserved; ++}; ++ ++struct kvm_s390_pv_info { ++ struct kvm_s390_pv_info_header header; ++ union { ++ struct kvm_s390_pv_info_dump dump; ++ struct kvm_s390_pv_info_vm vm; ++ }; ++}; ++ ++enum pv_cmd_id { ++ KVM_PV_ENABLE, ++ KVM_PV_DISABLE, ++ KVM_PV_SET_SEC_PARMS, ++ KVM_PV_UNPACK, ++ KVM_PV_VERIFY, ++ KVM_PV_PREP_RESET, ++ KVM_PV_UNSHARE_ALL, ++ KVM_PV_INFO, ++ KVM_PV_DUMP, ++ KVM_PV_ASYNC_CLEANUP_PREPARE, ++ KVM_PV_ASYNC_CLEANUP_PERFORM, ++}; ++ ++struct kvm_pv_cmd { ++ __u32 cmd; /* Command to be executed */ ++ __u16 rc; /* Ultravisor return code */ ++ __u16 rrc; /* Ultravisor return reason code */ ++ __u64 data; /* Data or address */ ++ __u32 flags; /* flags for future extensions. Must be 0 for now */ ++ __u32 reserved[3]; ++}; ++ ++struct kvm_s390_zpci_op { ++ /* in */ ++ __u32 fh; /* target device */ ++ __u8 op; /* operation to perform */ ++ __u8 pad[3]; ++ union { ++ /* for KVM_S390_ZPCIOP_REG_AEN */ ++ struct { ++ __u64 ibv; /* Guest addr of interrupt bit vector */ ++ __u64 sb; /* Guest addr of summary bit */ ++ __u32 flags; ++ __u32 noi; /* Number of interrupts */ ++ __u8 isc; /* Guest interrupt subclass */ ++ __u8 sbo; /* Offset of guest summary bit vector */ ++ __u16 pad; ++ } reg_aen; ++ __u64 reserved[8]; ++ } u; ++}; ++ ++/* types for kvm_s390_zpci_op->op */ ++#define KVM_S390_ZPCIOP_REG_AEN 0 ++#define KVM_S390_ZPCIOP_DEREG_AEN 1 ++ ++/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ ++#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) + + /* Device control API: s390-specific devices */ + #define KVM_DEV_FLIC_GET_ALL_IRQS 1 +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 003fb74534..31c95c2dfe 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -7,6 +7,8 @@ + * + */ + ++#include ++#include + #include + #include + #include +@@ -40,7 +42,6 @@ + #define __KVM_HAVE_IRQ_LINE + #define __KVM_HAVE_MSI + #define __KVM_HAVE_USER_NMI +-#define __KVM_HAVE_GUEST_DEBUG + #define __KVM_HAVE_MSIX + #define __KVM_HAVE_MCE + #define __KVM_HAVE_PIT_STATE2 +@@ -49,7 +50,6 @@ + #define __KVM_HAVE_DEBUGREGS + #define __KVM_HAVE_XSAVE + #define __KVM_HAVE_XCRS +-#define __KVM_HAVE_READONLY_MEM + + /* Architectural interrupt line count. */ + #define KVM_NR_INTERRUPTS 256 +@@ -455,8 +455,13 @@ struct kvm_sync_regs { + + #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + +-/* attributes for system fd (group 0) */ +-#define KVM_X86_XCOMP_GUEST_SUPP 0 ++/* vendor-independent attributes for system fd (group 0) */ ++#define KVM_X86_GRP_SYSTEM 0 ++# define KVM_X86_XCOMP_GUEST_SUPP 0 ++ ++/* vendor-specific groups and attributes for system fd */ ++#define KVM_X86_GRP_SEV 1 ++# define KVM_X86_SEV_VMSA_FEATURES 0 + + struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +@@ -524,9 +529,310 @@ struct kvm_pmu_event_filter { + #define KVM_PMU_EVENT_ALLOW 0 + #define KVM_PMU_EVENT_DENY 1 + +-#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) ++#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0) + #define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) + ++/* for KVM_CAP_MCE */ ++struct kvm_x86_mce { ++ __u64 status; ++ __u64 addr; ++ __u64 misc; ++ __u64 mcg_status; ++ __u8 bank; ++ __u8 pad1[7]; ++ __u64 pad2[3]; ++}; ++ ++/* for KVM_CAP_XEN_HVM */ ++#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) ++#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) ++#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) ++#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) ++#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) ++#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) ++#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) ++#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) ++#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) ++ ++struct kvm_xen_hvm_config { ++ __u32 flags; ++ __u32 msr; ++ __u64 blob_addr_32; ++ __u64 blob_addr_64; ++ __u8 blob_size_32; ++ __u8 blob_size_64; ++ __u8 pad2[30]; ++}; ++ ++struct kvm_xen_hvm_attr { ++ __u16 type; ++ __u16 pad[3]; ++ union { ++ __u8 long_mode; ++ __u8 vector; ++ __u8 runstate_update_flag; ++ union { ++ __u64 gfn; ++#define KVM_XEN_INVALID_GFN ((__u64)-1) ++ __u64 hva; ++ } shared_info; ++ struct { ++ __u32 send_port; ++ __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ ++ __u32 flags; ++#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) ++#define KVM_XEN_EVTCHN_UPDATE (1 << 1) ++#define KVM_XEN_EVTCHN_RESET (1 << 2) ++ /* ++ * Events sent by the guest are either looped back to ++ * the guest itself (potentially on a different port#) ++ * or signalled via an eventfd. ++ */ ++ union { ++ struct { ++ __u32 port; ++ __u32 vcpu; ++ __u32 priority; ++ } port; ++ struct { ++ __u32 port; /* Zero for eventfd */ ++ __s32 fd; ++ } eventfd; ++ __u32 padding[4]; ++ } deliver; ++ } evtchn; ++ __u32 xen_version; ++ __u64 pad[8]; ++ } u; ++}; ++ ++ ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ ++#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 ++#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 ++#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ ++#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 ++#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ ++#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ ++#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6 ++ ++struct kvm_xen_vcpu_attr { ++ __u16 type; ++ __u16 pad[3]; ++ union { ++ __u64 gpa; ++#define KVM_XEN_INVALID_GPA ((__u64)-1) ++ __u64 hva; ++ __u64 pad[8]; ++ struct { ++ __u64 state; ++ __u64 state_entry_time; ++ __u64 time_running; ++ __u64 time_runnable; ++ __u64 time_blocked; ++ __u64 time_offline; ++ } runstate; ++ __u32 vcpu_id; ++ struct { ++ __u32 port; ++ __u32 priority; ++ __u64 expires_ns; ++ } timer; ++ __u8 vector; ++ } u; ++}; ++ ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 ++#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 ++#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9 ++ ++/* Secure Encrypted Virtualization command */ ++enum sev_cmd_id { ++ /* Guest initialization commands */ ++ KVM_SEV_INIT = 0, ++ KVM_SEV_ES_INIT, ++ /* Guest launch commands */ ++ KVM_SEV_LAUNCH_START, ++ KVM_SEV_LAUNCH_UPDATE_DATA, ++ KVM_SEV_LAUNCH_UPDATE_VMSA, ++ KVM_SEV_LAUNCH_SECRET, ++ KVM_SEV_LAUNCH_MEASURE, ++ KVM_SEV_LAUNCH_FINISH, ++ /* Guest migration commands (outgoing) */ ++ KVM_SEV_SEND_START, ++ KVM_SEV_SEND_UPDATE_DATA, ++ KVM_SEV_SEND_UPDATE_VMSA, ++ KVM_SEV_SEND_FINISH, ++ /* Guest migration commands (incoming) */ ++ KVM_SEV_RECEIVE_START, ++ KVM_SEV_RECEIVE_UPDATE_DATA, ++ KVM_SEV_RECEIVE_UPDATE_VMSA, ++ KVM_SEV_RECEIVE_FINISH, ++ /* Guest status and debug commands */ ++ KVM_SEV_GUEST_STATUS, ++ KVM_SEV_DBG_DECRYPT, ++ KVM_SEV_DBG_ENCRYPT, ++ /* Guest certificates commands */ ++ KVM_SEV_CERT_EXPORT, ++ /* Attestation report */ ++ KVM_SEV_GET_ATTESTATION_REPORT, ++ /* Guest Migration Extension */ ++ KVM_SEV_SEND_CANCEL, ++ ++ /* Second time is the charm; improved versions of the above ioctls. */ ++ KVM_SEV_INIT2, ++ ++ KVM_SEV_NR_MAX, ++}; ++ ++struct kvm_sev_cmd { ++ __u32 id; ++ __u32 pad0; ++ __u64 data; ++ __u32 error; ++ __u32 sev_fd; ++}; ++ ++struct kvm_sev_init { ++ __u64 vmsa_features; ++ __u32 flags; ++ __u32 pad[9]; ++}; ++ ++struct kvm_sev_launch_start { ++ __u32 handle; ++ __u32 policy; ++ __u64 dh_uaddr; ++ __u32 dh_len; ++ __u32 pad0; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad1; ++}; ++ ++struct kvm_sev_launch_update_data { ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++ ++struct kvm_sev_launch_secret { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++struct kvm_sev_launch_measure { ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_guest_status { ++ __u32 handle; ++ __u32 policy; ++ __u32 state; ++}; ++ ++struct kvm_sev_dbg { ++ __u64 src_uaddr; ++ __u64 dst_uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_attestation_report { ++ __u8 mnonce[16]; ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_send_start { ++ __u32 policy; ++ __u32 pad0; ++ __u64 pdh_cert_uaddr; ++ __u32 pdh_cert_len; ++ __u32 pad1; ++ __u64 plat_certs_uaddr; ++ __u32 plat_certs_len; ++ __u32 pad2; ++ __u64 amd_certs_uaddr; ++ __u32 amd_certs_len; ++ __u32 pad3; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad4; ++}; ++ ++struct kvm_sev_send_update_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++struct kvm_sev_receive_start { ++ __u32 handle; ++ __u32 policy; ++ __u64 pdh_uaddr; ++ __u32 pdh_len; ++ __u32 pad0; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad1; ++}; ++ ++struct kvm_sev_receive_update_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) ++#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) ++ ++struct kvm_hyperv_eventfd { ++ __u32 conn_id; ++ __s32 fd; ++ __u32 flags; ++ __u32 padding[3]; ++}; ++ ++#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff ++#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) ++ + /* + * Masked event layout. + * Bits Description +@@ -547,10 +853,10 @@ struct kvm_pmu_event_filter { + ((__u64)(!!(exclude)) << 55)) + + #define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ +- (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) +-#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) +-#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) +-#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) ++ (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32)) ++#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56)) ++#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8)) ++#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55)) + #define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) + + /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ +@@ -558,9 +864,11 @@ struct kvm_pmu_event_filter { + #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ + + /* x86-specific KVM_EXIT_HYPERCALL flags. */ +-#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) ++#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0) + + #define KVM_X86_DEFAULT_VM 0 + #define KVM_X86_SW_PROTECTED_VM 1 ++#define KVM_X86_SEV_VM 2 ++#define KVM_X86_SEV_ES_VM 3 + + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h +new file mode 100644 +index 0000000000..d9897771be +--- /dev/null ++++ b/linux-headers/linux/bits.h +@@ -0,0 +1,15 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* bits.h: Macros for dealing with bitmasks. */ ++ ++#ifndef _LINUX_BITS_H ++#define _LINUX_BITS_H ++ ++#define __GENMASK(h, l) \ ++ (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ ++ (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) ++ ++#define __GENMASK_ULL(h, l) \ ++ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ ++ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) ++ ++#endif /* _LINUX_BITS_H */ +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 17839229b2..038731cdef 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -16,6 +16,11 @@ + + #define KVM_API_VERSION 12 + ++/* ++ * Backwards-compatible definitions. ++ */ ++#define __KVM_HAVE_GUEST_DEBUG ++ + /* for KVM_SET_USER_MEMORY_REGION */ + struct kvm_userspace_memory_region { + __u32 slot; +@@ -85,43 +90,6 @@ struct kvm_pit_config { + + #define KVM_PIT_SPEAKER_DUMMY 1 + +-struct kvm_s390_skeys { +- __u64 start_gfn; +- __u64 count; +- __u64 skeydata_addr; +- __u32 flags; +- __u32 reserved[9]; +-}; +- +-#define KVM_S390_CMMA_PEEK (1 << 0) +- +-/** +- * kvm_s390_cmma_log - Used for CMMA migration. +- * +- * Used both for input and output. +- * +- * @start_gfn: Guest page number to start from. +- * @count: Size of the result buffer. +- * @flags: Control operation mode via KVM_S390_CMMA_* flags +- * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty +- * pages are still remaining. +- * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set +- * in the PGSTE. +- * @values: Pointer to the values buffer. +- * +- * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. +- */ +-struct kvm_s390_cmma_log { +- __u64 start_gfn; +- __u32 count; +- __u32 flags; +- union { +- __u64 remaining; +- __u64 mask; +- }; +- __u64 values; +-}; +- + struct kvm_hyperv_exit { + #define KVM_EXIT_HYPERV_SYNIC 1 + #define KVM_EXIT_HYPERV_HCALL 2 +@@ -313,11 +281,6 @@ struct kvm_run { + __u32 ipb; + } s390_sieic; + /* KVM_EXIT_S390_RESET */ +-#define KVM_S390_RESET_POR 1 +-#define KVM_S390_RESET_CLEAR 2 +-#define KVM_S390_RESET_SUBSYSTEM 4 +-#define KVM_S390_RESET_CPU_INIT 8 +-#define KVM_S390_RESET_IPL 16 + __u64 s390_reset_flags; + /* KVM_EXIT_S390_UCONTROL */ + struct { +@@ -532,43 +495,6 @@ struct kvm_translation { + __u8 pad[5]; + }; + +-/* for KVM_S390_MEM_OP */ +-struct kvm_s390_mem_op { +- /* in */ +- __u64 gaddr; /* the guest address */ +- __u64 flags; /* flags */ +- __u32 size; /* amount of bytes */ +- __u32 op; /* type of operation */ +- __u64 buf; /* buffer in userspace */ +- union { +- struct { +- __u8 ar; /* the access register number */ +- __u8 key; /* access key, ignored if flag unset */ +- __u8 pad1[6]; /* ignored */ +- __u64 old_addr; /* ignored if cmpxchg flag unset */ +- }; +- __u32 sida_offset; /* offset into the sida */ +- __u8 reserved[32]; /* ignored */ +- }; +-}; +-/* types for kvm_s390_mem_op->op */ +-#define KVM_S390_MEMOP_LOGICAL_READ 0 +-#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +-#define KVM_S390_MEMOP_SIDA_READ 2 +-#define KVM_S390_MEMOP_SIDA_WRITE 3 +-#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +-#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +-#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 +- +-/* flags for kvm_s390_mem_op->flags */ +-#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +-#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +-#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +- +-/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +-#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +-#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) +- + /* for KVM_INTERRUPT */ + struct kvm_interrupt { + /* in */ +@@ -633,124 +559,6 @@ struct kvm_mp_state { + __u32 mp_state; + }; + +-struct kvm_s390_psw { +- __u64 mask; +- __u64 addr; +-}; +- +-/* valid values for type in kvm_s390_interrupt */ +-#define KVM_S390_SIGP_STOP 0xfffe0000u +-#define KVM_S390_PROGRAM_INT 0xfffe0001u +-#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +-#define KVM_S390_RESTART 0xfffe0003u +-#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +-#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u +-#define KVM_S390_MCHK 0xfffe1000u +-#define KVM_S390_INT_CLOCK_COMP 0xffff1004u +-#define KVM_S390_INT_CPU_TIMER 0xffff1005u +-#define KVM_S390_INT_VIRTIO 0xffff2603u +-#define KVM_S390_INT_SERVICE 0xffff2401u +-#define KVM_S390_INT_EMERGENCY 0xffff1201u +-#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +-/* Anything below 0xfffe0000u is taken by INT_IO */ +-#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ +- (((schid)) | \ +- ((ssid) << 16) | \ +- ((cssid) << 18) | \ +- ((ai) << 26)) +-#define KVM_S390_INT_IO_MIN 0x00000000u +-#define KVM_S390_INT_IO_MAX 0xfffdffffu +-#define KVM_S390_INT_IO_AI_MASK 0x04000000u +- +- +-struct kvm_s390_interrupt { +- __u32 type; +- __u32 parm; +- __u64 parm64; +-}; +- +-struct kvm_s390_io_info { +- __u16 subchannel_id; +- __u16 subchannel_nr; +- __u32 io_int_parm; +- __u32 io_int_word; +-}; +- +-struct kvm_s390_ext_info { +- __u32 ext_params; +- __u32 pad; +- __u64 ext_params2; +-}; +- +-struct kvm_s390_pgm_info { +- __u64 trans_exc_code; +- __u64 mon_code; +- __u64 per_address; +- __u32 data_exc_code; +- __u16 code; +- __u16 mon_class_nr; +- __u8 per_code; +- __u8 per_atmid; +- __u8 exc_access_id; +- __u8 per_access_id; +- __u8 op_access_id; +-#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +-#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +-#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +-#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +-#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 +- __u8 flags; +- __u8 pad[2]; +-}; +- +-struct kvm_s390_prefix_info { +- __u32 address; +-}; +- +-struct kvm_s390_extcall_info { +- __u16 code; +-}; +- +-struct kvm_s390_emerg_info { +- __u16 code; +-}; +- +-#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 +-struct kvm_s390_stop_info { +- __u32 flags; +-}; +- +-struct kvm_s390_mchk_info { +- __u64 cr14; +- __u64 mcic; +- __u64 failing_storage_address; +- __u32 ext_damage_code; +- __u32 pad; +- __u8 fixed_logout[16]; +-}; +- +-struct kvm_s390_irq { +- __u64 type; +- union { +- struct kvm_s390_io_info io; +- struct kvm_s390_ext_info ext; +- struct kvm_s390_pgm_info pgm; +- struct kvm_s390_emerg_info emerg; +- struct kvm_s390_extcall_info extcall; +- struct kvm_s390_prefix_info prefix; +- struct kvm_s390_stop_info stop; +- struct kvm_s390_mchk_info mchk; +- char reserved[64]; +- } u; +-}; +- +-struct kvm_s390_irq_state { +- __u64 buf; +- __u32 flags; /* will stay unused for compatibility reasons */ +- __u32 len; +- __u32 reserved[4]; /* will stay unused for compatibility reasons */ +-}; +- + /* for KVM_SET_GUEST_DEBUG */ + + #define KVM_GUESTDBG_ENABLE 0x00000001 +@@ -806,50 +614,6 @@ struct kvm_enable_cap { + __u8 pad[64]; + }; + +-/* for KVM_PPC_GET_PVINFO */ +- +-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) +- +-struct kvm_ppc_pvinfo { +- /* out */ +- __u32 flags; +- __u32 hcall[4]; +- __u8 pad[108]; +-}; +- +-/* for KVM_PPC_GET_SMMU_INFO */ +-#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 +- +-struct kvm_ppc_one_page_size { +- __u32 page_shift; /* Page shift (or 0) */ +- __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +-}; +- +-struct kvm_ppc_one_seg_page_size { +- __u32 page_shift; /* Base page shift of segment (or 0) */ +- __u32 slb_enc; /* SLB encoding for BookS */ +- struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +-}; +- +-#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +-#define KVM_PPC_1T_SEGMENTS 0x00000002 +-#define KVM_PPC_NO_HASH 0x00000004 +- +-struct kvm_ppc_smmu_info { +- __u64 flags; +- __u32 slb_size; +- __u16 data_keys; /* # storage keys supported for data */ +- __u16 instr_keys; /* # storage keys supported for instructions */ +- struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +-}; +- +-/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +-struct kvm_ppc_resize_hpt { +- __u64 flags; +- __u32 shift; +- __u32 pad; +-}; +- + #define KVMIO 0xAE + + /* machine type bits, to be used as argument to KVM_CREATE_VM */ +@@ -919,9 +683,7 @@ struct kvm_ppc_resize_hpt { + /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ + #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 + #define KVM_CAP_USER_NMI 22 +-#ifdef __KVM_HAVE_GUEST_DEBUG + #define KVM_CAP_SET_GUEST_DEBUG 23 +-#endif + #ifdef __KVM_HAVE_PIT + #define KVM_CAP_REINJECT_CONTROL 24 + #endif +@@ -1152,8 +914,6 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_GUEST_MEMFD 234 + #define KVM_CAP_VM_TYPES 235 + +-#ifdef KVM_CAP_IRQ_ROUTING +- + struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +@@ -1218,42 +978,6 @@ struct kvm_irq_routing { + struct kvm_irq_routing_entry entries[]; + }; + +-#endif +- +-#ifdef KVM_CAP_MCE +-/* x86 MCE */ +-struct kvm_x86_mce { +- __u64 status; +- __u64 addr; +- __u64 misc; +- __u64 mcg_status; +- __u8 bank; +- __u8 pad1[7]; +- __u64 pad2[3]; +-}; +-#endif +- +-#ifdef KVM_CAP_XEN_HVM +-#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +-#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +-#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +-#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +-#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +-#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +-#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +-#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) +- +-struct kvm_xen_hvm_config { +- __u32 flags; +- __u32 msr; +- __u64 blob_addr_32; +- __u64 blob_addr_64; +- __u8 blob_size_32; +- __u8 blob_size_64; +- __u8 pad2[30]; +-}; +-#endif +- + #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) + /* + * Available with KVM_CAP_IRQFD_RESAMPLE +@@ -1438,11 +1162,6 @@ struct kvm_vfio_spapr_tce { + struct kvm_userspace_memory_region2) + + /* enable ucontrol for s390 */ +-struct kvm_s390_ucas_mapping { +- __u64 user_addr; +- __u64 vcpu_addr; +- __u64 length; +-}; + #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) + #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) + #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) +@@ -1637,89 +1356,6 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + +-struct kvm_s390_pv_sec_parm { +- __u64 origin; +- __u64 length; +-}; +- +-struct kvm_s390_pv_unp { +- __u64 addr; +- __u64 size; +- __u64 tweak; +-}; +- +-enum pv_cmd_dmp_id { +- KVM_PV_DUMP_INIT, +- KVM_PV_DUMP_CONFIG_STOR_STATE, +- KVM_PV_DUMP_COMPLETE, +- KVM_PV_DUMP_CPU, +-}; +- +-struct kvm_s390_pv_dmp { +- __u64 subcmd; +- __u64 buff_addr; +- __u64 buff_len; +- __u64 gaddr; /* For dump storage state */ +- __u64 reserved[4]; +-}; +- +-enum pv_cmd_info_id { +- KVM_PV_INFO_VM, +- KVM_PV_INFO_DUMP, +-}; +- +-struct kvm_s390_pv_info_dump { +- __u64 dump_cpu_buffer_len; +- __u64 dump_config_mem_buffer_per_1m; +- __u64 dump_config_finalize_len; +-}; +- +-struct kvm_s390_pv_info_vm { +- __u64 inst_calls_list[4]; +- __u64 max_cpus; +- __u64 max_guests; +- __u64 max_guest_addr; +- __u64 feature_indication; +-}; +- +-struct kvm_s390_pv_info_header { +- __u32 id; +- __u32 len_max; +- __u32 len_written; +- __u32 reserved; +-}; +- +-struct kvm_s390_pv_info { +- struct kvm_s390_pv_info_header header; +- union { +- struct kvm_s390_pv_info_dump dump; +- struct kvm_s390_pv_info_vm vm; +- }; +-}; +- +-enum pv_cmd_id { +- KVM_PV_ENABLE, +- KVM_PV_DISABLE, +- KVM_PV_SET_SEC_PARMS, +- KVM_PV_UNPACK, +- KVM_PV_VERIFY, +- KVM_PV_PREP_RESET, +- KVM_PV_UNSHARE_ALL, +- KVM_PV_INFO, +- KVM_PV_DUMP, +- KVM_PV_ASYNC_CLEANUP_PREPARE, +- KVM_PV_ASYNC_CLEANUP_PERFORM, +-}; +- +-struct kvm_pv_cmd { +- __u32 cmd; /* Command to be executed */ +- __u16 rc; /* Ultravisor return code */ +- __u16 rrc; /* Ultravisor return reason code */ +- __u64 data; /* Data or address */ +- __u32 flags; /* flags for future extensions. Must be 0 for now */ +- __u32 reserved[3]; +-}; +- + /* Available with KVM_CAP_S390_PROTECTED */ + #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) + +@@ -1733,58 +1369,6 @@ struct kvm_pv_cmd { + #define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) + #define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +-struct kvm_xen_hvm_attr { +- __u16 type; +- __u16 pad[3]; +- union { +- __u8 long_mode; +- __u8 vector; +- __u8 runstate_update_flag; +- struct { +- __u64 gfn; +-#define KVM_XEN_INVALID_GFN ((__u64)-1) +- } shared_info; +- struct { +- __u32 send_port; +- __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ +- __u32 flags; +-#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +-#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +-#define KVM_XEN_EVTCHN_RESET (1 << 2) +- /* +- * Events sent by the guest are either looped back to +- * the guest itself (potentially on a different port#) +- * or signalled via an eventfd. +- */ +- union { +- struct { +- __u32 port; +- __u32 vcpu; +- __u32 priority; +- } port; +- struct { +- __u32 port; /* Zero for eventfd */ +- __s32 fd; +- } eventfd; +- __u32 padding[4]; +- } deliver; +- } evtchn; +- __u32 xen_version; +- __u64 pad[8]; +- } u; +-}; +- +- +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +-#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +-#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +-#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +-#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +-#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +-#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 +- + /* Per-vCPU Xen attributes */ + #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) + #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) +@@ -1795,242 +1379,6 @@ struct kvm_xen_hvm_attr { + #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) + #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) + +-struct kvm_xen_vcpu_attr { +- __u16 type; +- __u16 pad[3]; +- union { +- __u64 gpa; +-#define KVM_XEN_INVALID_GPA ((__u64)-1) +- __u64 pad[8]; +- struct { +- __u64 state; +- __u64 state_entry_time; +- __u64 time_running; +- __u64 time_runnable; +- __u64 time_blocked; +- __u64 time_offline; +- } runstate; +- __u32 vcpu_id; +- struct { +- __u32 port; +- __u32 priority; +- __u64 expires_ns; +- } timer; +- __u8 vector; +- } u; +-}; +- +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +-#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +-#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 +- +-/* Secure Encrypted Virtualization command */ +-enum sev_cmd_id { +- /* Guest initialization commands */ +- KVM_SEV_INIT = 0, +- KVM_SEV_ES_INIT, +- /* Guest launch commands */ +- KVM_SEV_LAUNCH_START, +- KVM_SEV_LAUNCH_UPDATE_DATA, +- KVM_SEV_LAUNCH_UPDATE_VMSA, +- KVM_SEV_LAUNCH_SECRET, +- KVM_SEV_LAUNCH_MEASURE, +- KVM_SEV_LAUNCH_FINISH, +- /* Guest migration commands (outgoing) */ +- KVM_SEV_SEND_START, +- KVM_SEV_SEND_UPDATE_DATA, +- KVM_SEV_SEND_UPDATE_VMSA, +- KVM_SEV_SEND_FINISH, +- /* Guest migration commands (incoming) */ +- KVM_SEV_RECEIVE_START, +- KVM_SEV_RECEIVE_UPDATE_DATA, +- KVM_SEV_RECEIVE_UPDATE_VMSA, +- KVM_SEV_RECEIVE_FINISH, +- /* Guest status and debug commands */ +- KVM_SEV_GUEST_STATUS, +- KVM_SEV_DBG_DECRYPT, +- KVM_SEV_DBG_ENCRYPT, +- /* Guest certificates commands */ +- KVM_SEV_CERT_EXPORT, +- /* Attestation report */ +- KVM_SEV_GET_ATTESTATION_REPORT, +- /* Guest Migration Extension */ +- KVM_SEV_SEND_CANCEL, +- +- KVM_SEV_NR_MAX, +-}; +- +-struct kvm_sev_cmd { +- __u32 id; +- __u64 data; +- __u32 error; +- __u32 sev_fd; +-}; +- +-struct kvm_sev_launch_start { +- __u32 handle; +- __u32 policy; +- __u64 dh_uaddr; +- __u32 dh_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_launch_update_data { +- __u64 uaddr; +- __u32 len; +-}; +- +- +-struct kvm_sev_launch_secret { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-struct kvm_sev_launch_measure { +- __u64 uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_guest_status { +- __u32 handle; +- __u32 policy; +- __u32 state; +-}; +- +-struct kvm_sev_dbg { +- __u64 src_uaddr; +- __u64 dst_uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_attestation_report { +- __u8 mnonce[16]; +- __u64 uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_send_start { +- __u32 policy; +- __u64 pdh_cert_uaddr; +- __u32 pdh_cert_len; +- __u64 plat_certs_uaddr; +- __u32 plat_certs_len; +- __u64 amd_certs_uaddr; +- __u32 amd_certs_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_send_update_data { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-struct kvm_sev_receive_start { +- __u32 handle; +- __u32 policy; +- __u64 pdh_uaddr; +- __u32 pdh_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_receive_update_data { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +-#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) +-#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +- +-struct kvm_assigned_pci_dev { +- __u32 assigned_dev_id; +- __u32 busnr; +- __u32 devfn; +- __u32 flags; +- __u32 segnr; +- union { +- __u32 reserved[11]; +- }; +-}; +- +-#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +-#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +-#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) +- +-#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +-#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +-#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) +- +-#define KVM_DEV_IRQ_HOST_MASK 0x00ff +-#define KVM_DEV_IRQ_GUEST_MASK 0xff00 +- +-struct kvm_assigned_irq { +- __u32 assigned_dev_id; +- __u32 host_irq; /* ignored (legacy field) */ +- __u32 guest_irq; +- __u32 flags; +- union { +- __u32 reserved[12]; +- }; +-}; +- +-struct kvm_assigned_msix_nr { +- __u32 assigned_dev_id; +- __u16 entry_nr; +- __u16 padding; +-}; +- +-#define KVM_MAX_MSIX_PER_DEV 256 +-struct kvm_assigned_msix_entry { +- __u32 assigned_dev_id; +- __u32 gsi; +- __u16 entry; /* The index of entry in the MSI-X table */ +- __u16 padding[3]; +-}; +- +-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +- +-/* Available with KVM_CAP_ARM_USER_IRQ */ +- +-/* Bits for run->s.regs.device_irq_level */ +-#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +-#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +-#define KVM_ARM_DEV_PMU (1 << 2) +- +-struct kvm_hyperv_eventfd { +- __u32 conn_id; +- __s32 fd; +- __u32 flags; +- __u32 padding[3]; +-}; +- +-#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +-#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) +- + #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) + #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) + +@@ -2176,33 +1524,6 @@ struct kvm_stats_desc { + /* Available with KVM_CAP_S390_ZPCI_OP */ + #define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) + +-struct kvm_s390_zpci_op { +- /* in */ +- __u32 fh; /* target device */ +- __u8 op; /* operation to perform */ +- __u8 pad[3]; +- union { +- /* for KVM_S390_ZPCIOP_REG_AEN */ +- struct { +- __u64 ibv; /* Guest addr of interrupt bit vector */ +- __u64 sb; /* Guest addr of summary bit */ +- __u32 flags; +- __u32 noi; /* Number of interrupts */ +- __u8 isc; /* Guest interrupt subclass */ +- __u8 sbo; /* Offset of guest summary bit vector */ +- __u16 pad; +- } reg_aen; +- __u64 reserved[8]; +- } u; +-}; +- +-/* types for kvm_s390_zpci_op->op */ +-#define KVM_S390_ZPCIOP_REG_AEN 0 +-#define KVM_S390_ZPCIOP_DEREG_AEN 1 +- +-/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +-#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) +- + /* Available with KVM_CAP_MEMORY_ATTRIBUTES */ + #define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h +index bcb21339ee..c3046c6bff 100644 +--- a/linux-headers/linux/psp-sev.h ++++ b/linux-headers/linux/psp-sev.h +@@ -28,6 +28,9 @@ enum { + SEV_PEK_CERT_IMPORT, + SEV_GET_ID, /* This command is deprecated, use SEV_GET_ID2 */ + SEV_GET_ID2, ++ SNP_PLATFORM_STATUS, ++ SNP_COMMIT, ++ SNP_SET_CONFIG, + + SEV_MAX, + }; +@@ -69,6 +72,12 @@ typedef enum { + SEV_RET_RESOURCE_LIMIT, + SEV_RET_SECURE_DATA_INVALID, + SEV_RET_INVALID_KEY = 0x27, ++ SEV_RET_INVALID_PAGE_SIZE, ++ SEV_RET_INVALID_PAGE_STATE, ++ SEV_RET_INVALID_MDATA_ENTRY, ++ SEV_RET_INVALID_PAGE_OWNER, ++ SEV_RET_INVALID_PAGE_AEAD_OFLOW, ++ SEV_RET_RMP_INIT_REQUIRED, + SEV_RET_MAX, + } sev_ret_code; + +@@ -155,6 +164,56 @@ struct sev_user_data_get_id2 { + __u32 length; /* In/Out */ + } __attribute__((packed)); + ++/** ++ * struct sev_user_data_snp_status - SNP status ++ * ++ * @api_major: API major version ++ * @api_minor: API minor version ++ * @state: current platform state ++ * @is_rmp_initialized: whether RMP is initialized or not ++ * @rsvd: reserved ++ * @build_id: firmware build id for the API version ++ * @mask_chip_id: whether chip id is present in attestation reports or not ++ * @mask_chip_key: whether attestation reports are signed or not ++ * @vlek_en: VLEK (Version Loaded Endorsement Key) hashstick is loaded ++ * @rsvd1: reserved ++ * @guest_count: the number of guest currently managed by the firmware ++ * @current_tcb_version: current TCB version ++ * @reported_tcb_version: reported TCB version ++ */ ++struct sev_user_data_snp_status { ++ __u8 api_major; /* Out */ ++ __u8 api_minor; /* Out */ ++ __u8 state; /* Out */ ++ __u8 is_rmp_initialized:1; /* Out */ ++ __u8 rsvd:7; ++ __u32 build_id; /* Out */ ++ __u32 mask_chip_id:1; /* Out */ ++ __u32 mask_chip_key:1; /* Out */ ++ __u32 vlek_en:1; /* Out */ ++ __u32 rsvd1:29; ++ __u32 guest_count; /* Out */ ++ __u64 current_tcb_version; /* Out */ ++ __u64 reported_tcb_version; /* Out */ ++} __attribute__((packed)); ++ ++/** ++ * struct sev_user_data_snp_config - system wide configuration value for SNP. ++ * ++ * @reported_tcb: the TCB version to report in the guest attestation report. ++ * @mask_chip_id: whether chip id is present in attestation reports or not ++ * @mask_chip_key: whether attestation reports are signed or not ++ * @rsvd: reserved ++ * @rsvd1: reserved ++ */ ++struct sev_user_data_snp_config { ++ __u64 reported_tcb ; /* In */ ++ __u32 mask_chip_id:1; /* In */ ++ __u32 mask_chip_key:1; /* In */ ++ __u32 rsvd:30; /* In */ ++ __u8 rsvd1[52]; ++} __attribute__((packed)); ++ + /** + * struct sev_issue_cmd - SEV ioctl parameters + * +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index 649560c685..bea6973906 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -227,4 +227,11 @@ + */ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) ++ ++/* Get the queue size of a specific virtqueue. ++ * userspace set the vring index in vhost_vring_state.index ++ * kernel set the queue size in vhost_vring_state.num ++ */ ++#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ ++ struct vhost_vring_state) + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch b/SOURCES/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch new file mode 100644 index 0000000..6524d6a --- /dev/null +++ b/SOURCES/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch @@ -0,0 +1,71 @@ +From 9f485c8df885bcd1ff6c5692463c6168bfec07fb Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 13:29:53 +0200 +Subject: [PATCH 054/100] machine: allow early use of + machine_require_guest_memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [54/91] fd8c1a6624d5f27268215c8aa70dfc9d37bdb981 (bonzini/rhel-qemu-kvm) + +Ask the ConfidentialGuestSupport object whether to use guest_memfd +for KVM-backend private memory. This bool can be set in instance_init +(or user_complete) so that it is available when the machine is created. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit dc0d28ca46c0e7ee3c055ad4da24022995bd3765) +Signed-off-by: Paolo Bonzini +--- + hw/core/machine.c | 2 +- + include/exec/confidential-guest-support.h | 5 +++++ + include/hw/boards.h | 1 - + 3 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 07b994e136..2055e0d312 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1482,7 +1482,7 @@ bool machine_mem_merge(MachineState *machine) + + bool machine_require_guest_memfd(MachineState *machine) + { +- return machine->require_guest_memfd; ++ return machine->cgs && machine->cgs->require_guest_memfd; + } + + static char *cpu_slot_to_string(const CPUArchId *cpu) +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index e5b188cffb..02dc4e518f 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -31,6 +31,11 @@ OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, + struct ConfidentialGuestSupport { + Object parent; + ++ /* ++ * True if the machine should use guest_memfd for RAM. ++ */ ++ bool require_guest_memfd; ++ + /* + * ready: flag set by CGS initialization code once it's ready to + * start executing instructions in a potentially-secure +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 815a1c4b26..0d1f9533ef 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -373,7 +373,6 @@ struct MachineState { + char *dt_compatible; + bool dump_guest_core; + bool mem_merge; +- bool require_guest_memfd; + bool usb; + bool usb_disabled; + char *firmware; +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch b/SOURCES/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch new file mode 100644 index 0000000..538e82d --- /dev/null +++ b/SOURCES/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch @@ -0,0 +1,85 @@ +From 331c58d87dde8b4757e1d1e09d9b16bac2952d22 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 30 May 2024 06:16:15 -0500 +Subject: [PATCH 081/100] memory: Introduce + memory_region_init_ram_guest_memfd() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [81/91] d5b0898d791f3f90d1acda0230f96ca9bf5be5e4 (bonzini/rhel-qemu-kvm) + +Introduce memory_region_init_ram_guest_memfd() to allocate private +guset memfd on the MemoryRegion initialization. It's for the use case of +TDVF, which must be private on TDX case. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-4-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a0aa6db7ce72a08703774107185e639e73e7754c) +Signed-off-by: Paolo Bonzini +--- + include/exec/memory.h | 6 ++++++ + system/memory.c | 24 ++++++++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 679a847685..1e351f6fc8 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1603,6 +1603,12 @@ bool memory_region_init_ram(MemoryRegion *mr, + uint64_t size, + Error **errp); + ++bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, ++ Object *owner, ++ const char *name, ++ uint64_t size, ++ Error **errp); ++ + /** + * memory_region_init_rom: Initialize a ROM memory region. + * +diff --git a/system/memory.c b/system/memory.c +index c756950c0c..b09065eef3 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -3606,6 +3606,30 @@ bool memory_region_init_ram(MemoryRegion *mr, + return true; + } + ++bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, ++ Object *owner, ++ const char *name, ++ uint64_t size, ++ Error **errp) ++{ ++ DeviceState *owner_dev; ++ ++ if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size, ++ RAM_GUEST_MEMFD, errp)) { ++ return false; ++ } ++ /* This will assert if owner is neither NULL nor a DeviceState. ++ * We only want the owner here for the purposes of defining a ++ * unique name for migration. TODO: Ideally we should implement ++ * a naming scheme for Objects which are not DeviceStates, in ++ * which case we can relax this restriction. ++ */ ++ owner_dev = DEVICE(owner); ++ vmstate_register_ram(mr, owner_dev); ++ ++ return true; ++} ++ + bool memory_region_init_rom(MemoryRegion *mr, + Object *owner, + const char *name, +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch b/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch deleted file mode 100644 index 5b531f5..0000000 --- a/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 633c6a52ac88526534466ae311522fe5447bcf91 Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Wed, 17 Jan 2024 14:55:54 +0100 -Subject: [PATCH 02/22] memory-device: reintroduce memory region size check - -RH-Author: David Hildenbrand -RH-MergeRequest: 221: memory-device: reintroduce memory region size check -RH-Jira: RHEL-20341 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Igor Mammedov -RH-Commit: [2/2] e9ff2339b0c07c3f48f5834c9c80cd6d4cbc8f71 - -JIRA: https://issues.redhat.com/browse/RHEL-20341 - -We used to check that the memory region size is multiples of the overall -requested address alignment for the device memory address. - -We removed that check, because there are cases (i.e., hv-balloon) where -devices unconditionally request an address alignment that has a very large -alignment (i.e., 32 GiB), but the actual memory device size might not be -multiples of that alignment. - -However, this change: - -(a) allows for some practically impossible DIMM sizes, like "1GB+1 byte". -(b) allows for DIMMs that partially cover hugetlb pages, previously - reported in [1]. - -Both scenarios don't make any sense: we might even waste memory. - -So let's reintroduce that check, but only check that the -memory region size is multiples of the memory region alignment (i.e., -page size, huge page size), but not any additional memory device -requirements communicated using md->get_min_alignment(). - -The following examples now fail again as expected: - -(a) 1M with 2M THP - qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ - -object memory-backend-ram,id=mem1,size=1M \ - -device pc-dimm,id=dimm1,memdev=mem1 - -> backend memory size must be multiple of 0x200000 - -(b) 1G+1byte - - qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ - -object memory-backend-ram,id=mem1,size=1073741825B \ - -device pc-dimm,id=dimm1,memdev=mem1 - -> backend memory size must be multiple of 0x200000 - -(c) Unliagned hugetlb size (2M) - - qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ - -object memory-backend-file,id=mem1,mem-path=/dev/hugepages/tmp,size=511M \ - -device pc-dimm,id=dimm1,memdev=mem1 - backend memory size must be multiple of 0x200000 - -(d) Unliagned hugetlb size (1G) - - qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ - -object memory-backend-file,id=mem1,mem-path=/dev/hugepages1G/tmp,size=2047M \ - -device pc-dimm,id=dimm1,memdev=mem1 - -> backend memory size must be multiple of 0x40000000 - -Note that this fix depends on a hv-balloon change to communicate its -additional alignment requirements using get_min_alignment() instead of -through the memory region. - -[1] https://lkml.kernel.org/r/f77d641d500324525ac036fe1827b3070de75fc1.1701088320.git.mprivozn@redhat.com - -Message-ID: <20240117135554.787344-3-david@redhat.com> -Reported-by: Zhenyu Zhang -Reported-by: Michal Privoznik -Fixes: eb1b7c4bd413 ("memory-device: Drop size alignment check") -Tested-by: Zhenyu Zhang -Tested-by: Mario Casquero -Reviewed-by: Maciej S. Szmigiero -Signed-off-by: David Hildenbrand -(cherry picked from commit 540a1abbf0b243e4cfb4333c5d30a041f7080ba4) -Signed-off-by: David Hildenbrand ---- - hw/mem/memory-device.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c -index a1b1af26bc..e098585cda 100644 ---- a/hw/mem/memory-device.c -+++ b/hw/mem/memory-device.c -@@ -374,6 +374,20 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, - goto out; - } - -+ /* -+ * We always want the memory region size to be multiples of the memory -+ * region alignment: for example, DIMMs with 1G+1byte size don't make -+ * any sense. Note that we don't check that the size is multiples -+ * of any additional alignment requirements the memory device might -+ * have when it comes to the address in physical address space. -+ */ -+ if (!QEMU_IS_ALIGNED(memory_region_size(mr), -+ memory_region_get_alignment(mr))) { -+ error_setg(errp, "backend memory size must be multiple of 0x%" -+ PRIx64, memory_region_get_alignment(mr)); -+ return; -+ } -+ - if (legacy_align) { - align = *legacy_align; - } else { --- -2.39.3 - diff --git a/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch b/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch deleted file mode 100644 index 345a2b4..0000000 --- a/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch +++ /dev/null @@ -1,1630 +0,0 @@ -From 972e553e605e8916fc47c2d51cdbde940fd7d855 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 18 Jan 2024 09:48:23 -0500 -Subject: [PATCH 13/22] monitor: only run coroutine commands in - qemu_aio_context - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [9/17] ec5690fcade04a88bd1815bf2ae0377e80fe3d51 (stefanha/centos-stream-qemu-kvm) - -monitor_qmp_dispatcher_co() runs in the iohandler AioContext that is not -polled during nested event loops. The coroutine currently reschedules -itself in the main loop's qemu_aio_context AioContext, which is polled -during nested event loops. One known problem is that QMP device-add -calls drain_call_rcu(), which temporarily drops the BQL, leading to all -sorts of havoc like other vCPU threads re-entering device emulation code -while another vCPU thread is waiting in device emulation code with -aio_poll(). - -Paolo Bonzini suggested running non-coroutine QMP handlers in the -iohandler AioContext. This avoids trouble with nested event loops. His -original idea was to move coroutine rescheduling to -monitor_qmp_dispatch(), but I resorted to moving it to qmp_dispatch() -because we don't know if the QMP handler needs to run in coroutine -context in monitor_qmp_dispatch(). monitor_qmp_dispatch() would have -been nicer since it's associated with the monitor implementation and not -as general as qmp_dispatch(), which is also used by qemu-ga. - -A number of qemu-iotests need updated .out files because the order of -QMP events vs QMP responses has changed. - -Solves Issue #1933. - -Cc: qemu-stable@nongnu.org -Fixes: 7bed89958bfbf40df9ca681cefbdca63abdde39d ("device_core: use drain_call_rcu in in qmp_device_add") -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215192 -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2214985 -Buglink: https://issues.redhat.com/browse/RHEL-17369 -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240118144823.1497953-4-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Tested-by: Fiona Ebner -Signed-off-by: Kevin Wolf -(cherry picked from commit effd60c878176bcaf97fa7ce2b12d04bb8ead6f7) -Signed-off-by: Stefan Hajnoczi ---- - monitor/qmp.c | 17 ---- - qapi/qmp-dispatch.c | 24 +++++- - tests/qemu-iotests/060.out | 4 +- - tests/qemu-iotests/071.out | 4 +- - tests/qemu-iotests/081.out | 16 ++-- - tests/qemu-iotests/087.out | 12 +-- - tests/qemu-iotests/108.out | 2 +- - tests/qemu-iotests/109 | 4 +- - tests/qemu-iotests/109.out | 78 ++++++++----------- - tests/qemu-iotests/117.out | 2 +- - tests/qemu-iotests/120.out | 2 +- - tests/qemu-iotests/127.out | 2 +- - tests/qemu-iotests/140.out | 2 +- - tests/qemu-iotests/143.out | 2 +- - tests/qemu-iotests/156.out | 2 +- - tests/qemu-iotests/176.out | 16 ++-- - tests/qemu-iotests/182.out | 2 +- - tests/qemu-iotests/183.out | 4 +- - tests/qemu-iotests/184.out | 32 ++++---- - tests/qemu-iotests/185 | 6 +- - tests/qemu-iotests/185.out | 45 +++++++++-- - tests/qemu-iotests/191.out | 16 ++-- - tests/qemu-iotests/195.out | 16 ++-- - tests/qemu-iotests/223.out | 12 +-- - tests/qemu-iotests/227.out | 32 ++++---- - tests/qemu-iotests/247.out | 2 +- - tests/qemu-iotests/273.out | 8 +- - tests/qemu-iotests/308 | 4 +- - tests/qemu-iotests/308.out | 4 +- - tests/qemu-iotests/tests/file-io-error | 5 +- - tests/qemu-iotests/tests/iothreads-resize.out | 2 +- - tests/qemu-iotests/tests/qsd-jobs.out | 4 +- - 32 files changed, 205 insertions(+), 178 deletions(-) - -diff --git a/monitor/qmp.c b/monitor/qmp.c -index 6eee450fe4..a239945e8d 100644 ---- a/monitor/qmp.c -+++ b/monitor/qmp.c -@@ -321,14 +321,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) - qemu_coroutine_yield(); - } - -- /* -- * Move the coroutine from iohandler_ctx to qemu_aio_context for -- * executing the command handler so that it can make progress if it -- * involves an AIO_WAIT_WHILE(). -- */ -- aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co); -- qemu_coroutine_yield(); -- - /* Process request */ - if (req_obj->req) { - if (trace_event_get_state(TRACE_MONITOR_QMP_CMD_IN_BAND)) { -@@ -355,15 +347,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) - } - - qmp_request_free(req_obj); -- -- /* -- * Yield and reschedule so the main loop stays responsive. -- * -- * Move back to iohandler_ctx so that nested event loops for -- * qemu_aio_context don't start new monitor commands. -- */ -- aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co); -- qemu_coroutine_yield(); - } - qatomic_set(&qmp_dispatcher_co, NULL); - } -diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c -index 555528b6bb..176b549473 100644 ---- a/qapi/qmp-dispatch.c -+++ b/qapi/qmp-dispatch.c -@@ -206,9 +206,31 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ - assert(!(oob && qemu_in_coroutine())); - assert(monitor_cur() == NULL); - if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) { -+ if (qemu_in_coroutine()) { -+ /* -+ * Move the coroutine from iohandler_ctx to qemu_aio_context for -+ * executing the command handler so that it can make progress if it -+ * involves an AIO_WAIT_WHILE(). -+ */ -+ aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); -+ qemu_coroutine_yield(); -+ } -+ - monitor_set_cur(qemu_coroutine_self(), cur_mon); - cmd->fn(args, &ret, &err); - monitor_set_cur(qemu_coroutine_self(), NULL); -+ -+ if (qemu_in_coroutine()) { -+ /* -+ * Yield and reschedule so the main loop stays responsive. -+ * -+ * Move back to iohandler_ctx so that nested event loops for -+ * qemu_aio_context don't start new monitor commands. -+ */ -+ aio_co_schedule(iohandler_get_aio_context(), -+ qemu_coroutine_self()); -+ qemu_coroutine_yield(); -+ } - } else { - /* - * Actual context doesn't match the one the command needs. -@@ -232,7 +254,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ - .errp = &err, - .co = qemu_coroutine_self(), - }; -- aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh, -+ aio_bh_schedule_oneshot(iohandler_get_aio_context(), do_qmp_dispatch_bh, - &data); - qemu_coroutine_yield(); - } -diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out -index 329977d9b9..a37bf446e9 100644 ---- a/tests/qemu-iotests/060.out -+++ b/tests/qemu-iotests/060.out -@@ -421,8 +421,8 @@ QMP_VERSION - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "none0", "msg": "Preventing invalid write on metadata (overlaps with refcount table)", "offset": 65536, "node-name": "drive", "fatal": true, "size": 65536}} - write failed: Input/output error - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - === Testing incoming inactive corrupted image === - -@@ -432,8 +432,8 @@ QMP_VERSION - qcow2: Image is corrupt: L2 table offset 0x2a2a2a00 unaligned (L1 index: 0); further non-fatal corruption events will be suppressed - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "", "msg": "L2 table offset 0x2a2a2a00 unaligned (L1 index: 0)", "node-name": "drive", "fatal": false}} - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - corrupt: false - *** done -diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out -index bca0c02f5c..a2923b05c2 100644 ---- a/tests/qemu-iotests/071.out -+++ b/tests/qemu-iotests/071.out -@@ -45,8 +45,8 @@ QMP_VERSION - {"return": {}} - read failed: Input/output error - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Testing blkverify on existing block device === -@@ -84,9 +84,9 @@ wrote 512/512 bytes at offset 0 - {"return": ""} - read failed: Input/output error - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - QEMU_PROG: Failed to flush the L2 table cache: Input/output error - QEMU_PROG: Failed to flush the refcount block cache: Input/output error -+{"return": {}} - - *** done -diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out -index 615c083549..aba85ea564 100644 ---- a/tests/qemu-iotests/081.out -+++ b/tests/qemu-iotests/081.out -@@ -35,8 +35,8 @@ QMP_VERSION - read 10485760/10485760 bytes at offset 0 - 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - == using quorum rewrite corrupted mode == -@@ -67,8 +67,8 @@ QMP_VERSION - read 10485760/10485760 bytes at offset 0 - 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - -- checking that the image has been corrected -- - read 10485760/10485760 bytes at offset 0 -@@ -106,8 +106,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - Testing: - QMP_VERSION -@@ -115,8 +115,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"error": {"class": "GenericError", "desc": "Cannot add a child to a quorum in blkverify mode"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - == dynamically removing a child from a quorum == -@@ -125,31 +125,31 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - Testing: - QMP_VERSION - {"return": {}} - {"return": {}} - {"error": {"class": "GenericError", "desc": "The number of children cannot be lower than the vote threshold 2"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - Testing: - QMP_VERSION - {"return": {}} - {"error": {"class": "GenericError", "desc": "blkverify=on can only be set if there are exactly two files and vote-threshold is 2"}} - {"error": {"class": "GenericError", "desc": "Cannot find device='drive0-quorum' nor node-name='drive0-quorum'"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - Testing: - QMP_VERSION - {"return": {}} - {"return": {}} - {"error": {"class": "GenericError", "desc": "The number of children cannot be lower than the vote threshold 2"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - *** done -diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out -index e1c23a6983..97b6d8036d 100644 ---- a/tests/qemu-iotests/087.out -+++ b/tests/qemu-iotests/087.out -@@ -7,8 +7,8 @@ Testing: - QMP_VERSION - {"return": {}} - {"error": {"class": "GenericError", "desc": "'node-name' must be specified for the root node"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Duplicate ID === -@@ -18,8 +18,8 @@ QMP_VERSION - {"return": {}} - {"error": {"class": "GenericError", "desc": "node-name=disk is conflicting with a device id"}} - {"error": {"class": "GenericError", "desc": "Duplicate nodes with node-name='test-node'"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === aio=native without O_DIRECT === -@@ -28,8 +28,8 @@ Testing: - QMP_VERSION - {"return": {}} - {"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Encrypted image QCow === -@@ -40,8 +40,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Encrypted image LUKS === -@@ -52,8 +52,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Missing driver === -@@ -63,7 +63,7 @@ Testing: -S - QMP_VERSION - {"return": {}} - {"error": {"class": "GenericError", "desc": "Parameter 'driver' is missing"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - *** done -diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out -index b5401d788d..b9c876b394 100644 ---- a/tests/qemu-iotests/108.out -+++ b/tests/qemu-iotests/108.out -@@ -173,8 +173,8 @@ OK: Reftable is where we expect it - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} - {"return": {}} - { "execute": "quit" } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - wrote 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109 -index e207a555f3..0fb580f9a5 100755 ---- a/tests/qemu-iotests/109 -+++ b/tests/qemu-iotests/109 -@@ -57,13 +57,13 @@ run_qemu() - _launch_qemu -drive file="${source_img}",format=raw,cache=${CACHEMODE},aio=${AIOMODE},id=src - _send_qemu_cmd $QEMU_HANDLE "{ 'execute': 'qmp_capabilities' }" "return" - -- _send_qemu_cmd $QEMU_HANDLE \ -+ capture_events="$qmp_event" _send_qemu_cmd $QEMU_HANDLE \ - "{'execute':'drive-mirror', 'arguments':{ - 'device': 'src', 'target': '$raw_img', $qmp_format - 'mode': 'existing', 'sync': 'full'}}" \ - "return" - -- _send_qemu_cmd $QEMU_HANDLE '' "$qmp_event" -+ capture_events="$qmp_event JOB_STATUS_CHANGE" _wait_event $QEMU_HANDLE "$qmp_event" - if test "$qmp_event" = BLOCK_JOB_ERROR; then - _send_qemu_cmd $QEMU_HANDLE '' '"status": "null"' - fi -diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out -index 965c9a6a0a..3ae8552ff7 100644 ---- a/tests/qemu-iotests/109.out -+++ b/tests/qemu-iotests/109.out -@@ -7,7 +7,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -23,8 +23,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -35,12 +35,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -50,6 +48,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Writing a qcow2 header into raw === -@@ -59,7 +58,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -75,8 +74,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -87,12 +86,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 197120, "offset": 197120, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -102,6 +99,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Writing a qed header into raw === -@@ -111,7 +109,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -127,8 +125,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -139,12 +137,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -154,6 +150,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Writing a vdi header into raw === -@@ -163,7 +160,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -179,8 +176,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -191,12 +188,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -206,6 +201,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Writing a vmdk header into raw === -@@ -215,7 +211,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -231,8 +227,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -243,12 +239,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 65536, "offset": 65536, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -258,6 +252,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Writing a vpc header into raw === -@@ -267,7 +262,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -283,8 +278,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -295,12 +290,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -310,6 +303,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Copying sample image empty.bochs into raw === -@@ -318,7 +312,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -334,8 +328,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -346,12 +340,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -361,6 +353,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Copying sample image iotest-dirtylog-10G-4M.vhdx into raw === -@@ -369,7 +362,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -385,8 +378,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -397,12 +390,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 31457280, "offset": 31457280, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -412,6 +403,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Copying sample image parallels-v1 into raw === -@@ -420,7 +412,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -436,8 +428,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -448,12 +440,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -463,6 +453,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Copying sample image simple-pattern.cloop into raw === -@@ -471,7 +462,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -487,8 +478,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"query-block-jobs"} - {"return": []} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 512/512 bytes at offset 0 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } -@@ -499,12 +490,10 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2048, "offset": 2048, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -514,6 +503,7 @@ read 512/512 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - - === Write legitimate MBR into raw === -@@ -522,7 +512,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ -- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', -+ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', - 'mode': 'existing', 'sync': 'full'}} - WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. - Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. -@@ -530,12 +520,10 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -545,6 +533,7 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - { 'execute': 'qmp_capabilities' } - {"return": {}} -@@ -554,12 +543,10 @@ Images are identical. - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} - {"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} - {"execute":"query-block-jobs"} - {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} -@@ -569,5 +556,6 @@ Images are identical. - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} -+{"return": {}} - Images are identical. - *** done -diff --git a/tests/qemu-iotests/117.out b/tests/qemu-iotests/117.out -index 735ffd25c6..1cea9e0217 100644 ---- a/tests/qemu-iotests/117.out -+++ b/tests/qemu-iotests/117.out -@@ -18,8 +18,8 @@ wrote 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - {"return": ""} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - No errors were found on the image. - read 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -diff --git a/tests/qemu-iotests/120.out b/tests/qemu-iotests/120.out -index 0744c1f136..35d84a5bc5 100644 ---- a/tests/qemu-iotests/120.out -+++ b/tests/qemu-iotests/120.out -@@ -5,8 +5,8 @@ QMP_VERSION - wrote 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - {"return": ""} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - read 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - read 65536/65536 bytes at offset 0 -diff --git a/tests/qemu-iotests/127.out b/tests/qemu-iotests/127.out -index 1685c4850a..dd8c4a8aa9 100644 ---- a/tests/qemu-iotests/127.out -+++ b/tests/qemu-iotests/127.out -@@ -28,6 +28,6 @@ wrote 42/42 bytes at offset 0 - { 'execute': 'quit' } - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "mirror"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/140.out b/tests/qemu-iotests/140.out -index 312f76d5da..32866440ae 100644 ---- a/tests/qemu-iotests/140.out -+++ b/tests/qemu-iotests/140.out -@@ -19,6 +19,6 @@ read 65536/65536 bytes at offset 0 - qemu-io: can't open device nbd+unix:///drv?socket=SOCK_DIR/nbd: Requested export not available - server reported: export 'drv' not present - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out -index 9ec5888e0e..d6afa32abc 100644 ---- a/tests/qemu-iotests/143.out -+++ b/tests/qemu-iotests/143.out -@@ -10,6 +10,6 @@ server reported: export 'no_such_export' not present - qemu-io: can't open device nbd+unix:///aa--aa1?socket=SOCK_DIR/nbd: Requested export not available - server reported: export 'aa--aa...' not present - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/156.out b/tests/qemu-iotests/156.out -index 4a22f0c41a..07e5e83f5d 100644 ---- a/tests/qemu-iotests/156.out -+++ b/tests/qemu-iotests/156.out -@@ -72,8 +72,8 @@ read 65536/65536 bytes at offset 196608 - {"return": ""} - - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - read 65536/65536 bytes at offset 0 - 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out -index 9d09b60452..45e9153ef3 100644 ---- a/tests/qemu-iotests/176.out -+++ b/tests/qemu-iotests/176.out -@@ -169,8 +169,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - wrote 196608/196608 bytes at offset 2147287040 - 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - wrote 131072/131072 bytes at offset 2147352576 -@@ -206,8 +206,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {"sha256": HASH}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - === Test pass bitmap.1 === - -@@ -218,8 +218,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - wrote 196608/196608 bytes at offset 2147287040 - 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - wrote 131072/131072 bytes at offset 2147352576 -@@ -256,8 +256,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {"sha256": HASH}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - === Test pass bitmap.2 === - -@@ -268,8 +268,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - wrote 196608/196608 bytes at offset 2147287040 - 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - wrote 131072/131072 bytes at offset 2147352576 -@@ -306,8 +306,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {"sha256": HASH}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - === Test pass bitmap.3 === - -@@ -318,8 +318,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - wrote 196608/196608 bytes at offset 2147287040 - 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - wrote 131072/131072 bytes at offset 2147352576 -@@ -353,6 +353,6 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {"sha256": HASH}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/182.out b/tests/qemu-iotests/182.out -index 57f7265458..83fc1a4797 100644 ---- a/tests/qemu-iotests/182.out -+++ b/tests/qemu-iotests/182.out -@@ -53,6 +53,6 @@ Formatting 'TEST_DIR/t.qcow2.overlay', fmt=qcow2 cluster_size=65536 extended_l2= - {'execute': 'qmp_capabilities'} - {"return": {}} - {'execute': 'quit'} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/183.out b/tests/qemu-iotests/183.out -index fd9c2e52a5..51aa41c888 100644 ---- a/tests/qemu-iotests/183.out -+++ b/tests/qemu-iotests/183.out -@@ -53,11 +53,11 @@ wrote 65536/65536 bytes at offset 1048576 - === Shut down and check image === - - {"execute":"quit"} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"return": {}} - {"execute":"quit"} --{"return": {}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - No errors were found on the image. - No errors were found on the image. - wrote 65536/65536 bytes at offset 1048576 -diff --git a/tests/qemu-iotests/184.out b/tests/qemu-iotests/184.out -index 77e5489d65..e8f631f853 100644 ---- a/tests/qemu-iotests/184.out -+++ b/tests/qemu-iotests/184.out -@@ -89,10 +89,6 @@ Testing: - "return": [ - ] - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -104,6 +100,10 @@ Testing: - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - == property changes in ThrottleGroup == -@@ -169,10 +169,6 @@ Testing: - "iops-total-max": 0 - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -184,6 +180,10 @@ Testing: - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - == object creation/set errors == -@@ -211,10 +211,6 @@ Testing: - "desc": "bps/iops/max total values and read/write values cannot be used at the same time" - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -226,6 +222,10 @@ Testing: - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - == don't specify group == -@@ -247,10 +247,6 @@ Testing: - "desc": "Parameter 'throttle-group' is missing" - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -262,6 +258,10 @@ Testing: - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - *** done -diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185 -index 2ae0a85bbf..17489fb91c 100755 ---- a/tests/qemu-iotests/185 -+++ b/tests/qemu-iotests/185 -@@ -344,14 +344,14 @@ wait_for_job_and_quit() { - - sleep 1 - -+ # List of expected events -+ capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN' -+ - _send_qemu_cmd $h \ - '{"execute": "quit"}' \ - 'return' - -- # List of expected events -- capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN' - _wait_event $h 'SHUTDOWN' -- QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN - _wait_event $h 'JOB_STATUS_CHANGE' # standby - _wait_event $h 'JOB_STATUS_CHANGE' # ready - _wait_event $h 'JOB_STATUS_CHANGE' # standby -diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out -index 7292c26bae..6af0953c4d 100644 ---- a/tests/qemu-iotests/185.out -+++ b/tests/qemu-iotests/185.out -@@ -40,9 +40,16 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off comp - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "commit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} -+{"return": {}} - - === Start active commit job and exit qemu === - -@@ -56,9 +63,16 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off comp - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "commit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} -+{"return": {}} - - === Start mirror job and exit qemu === - -@@ -75,9 +89,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} -+{"return": {}} - - === Start backup job and exit qemu === - -@@ -97,9 +118,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 65536, "speed": 65536, "type": "backup"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} -+{"return": {}} - - === Start streaming job and exit qemu === - -@@ -112,9 +140,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} -+{"return": {}} - No errors were found on the image. - - === Start mirror to throttled QSD and exit qemu === -diff --git a/tests/qemu-iotests/191.out b/tests/qemu-iotests/191.out -index ea88777374..c3309e4bc6 100644 ---- a/tests/qemu-iotests/191.out -+++ b/tests/qemu-iotests/191.out -@@ -378,10 +378,6 @@ wrote 65536/65536 bytes at offset 1048576 - ] - } - { 'execute': 'quit' } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -393,6 +389,10 @@ wrote 65536/65536 bytes at offset 1048576 - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - image: TEST_DIR/t.IMGFMT - file format: IMGFMT - virtual size: 64 MiB (67108864 bytes) -@@ -796,10 +796,6 @@ wrote 65536/65536 bytes at offset 1048576 - ] - } - { 'execute': 'quit' } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -811,6 +807,10 @@ wrote 65536/65536 bytes at offset 1048576 - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - image: TEST_DIR/t.IMGFMT - file format: IMGFMT - virtual size: 64 MiB (67108864 bytes) -diff --git a/tests/qemu-iotests/195.out b/tests/qemu-iotests/195.out -index ec84df5012..91717d302e 100644 ---- a/tests/qemu-iotests/195.out -+++ b/tests/qemu-iotests/195.out -@@ -17,10 +17,6 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,backing.node-name=mid - "return": { - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -32,6 +28,10 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,backing.node-name=mid - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - image: TEST_DIR/t.IMGFMT.mid - file format: IMGFMT -@@ -55,10 +55,6 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,node-name=top - "return": { - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -70,6 +66,10 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,node-name=top - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - image: TEST_DIR/t.IMGFMT - file format: IMGFMT -diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out -index e5e7f42caa..5f5b42e2dc 100644 ---- a/tests/qemu-iotests/223.out -+++ b/tests/qemu-iotests/223.out -@@ -11,8 +11,8 @@ QMP_VERSION - {"return": {}} - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - - === Write part of the file under active bitmap === -@@ -145,14 +145,14 @@ read 2097152/2097152 bytes at offset 2097152 - - {"execute":"nbd-server-remove", - "arguments":{"name":"n"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} - {"return": {}} - {"execute":"nbd-server-remove", - "arguments":{"name":"n2"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} - {"return": {}} - {"execute":"nbd-server-remove", - "arguments":{"name":"n2"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} - {"error": {"class": "GenericError", "desc": "Export 'n2' is not found"}} - {"execute":"nbd-server-stop"} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n3"}} -@@ -267,14 +267,14 @@ read 2097152/2097152 bytes at offset 2097152 - - {"execute":"nbd-server-remove", - "arguments":{"name":"n"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} - {"return": {}} - {"execute":"nbd-server-remove", - "arguments":{"name":"n2"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} - {"return": {}} - {"execute":"nbd-server-remove", - "arguments":{"name":"n2"}} --{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} - {"error": {"class": "GenericError", "desc": "Export 'n2' is not found"}} - {"execute":"nbd-server-stop"} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n3"}} -@@ -282,8 +282,8 @@ read 2097152/2097152 bytes at offset 2097152 - {"execute":"nbd-server-stop"} - {"error": {"class": "GenericError", "desc": "NBD server not running"}} - {"execute":"quit"} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - - === Use qemu-nbd as server === - -diff --git a/tests/qemu-iotests/227.out b/tests/qemu-iotests/227.out -index a947b1a87d..d6a1d4ecb6 100644 ---- a/tests/qemu-iotests/227.out -+++ b/tests/qemu-iotests/227.out -@@ -54,10 +54,6 @@ Testing: -drive driver=null-co,read-zeroes=on,if=virtio - } - ] - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -69,6 +65,10 @@ Testing: -drive driver=null-co,read-zeroes=on,if=virtio - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - === blockstats with -drive if=none === -@@ -124,10 +124,6 @@ Testing: -drive driver=null-co,if=none - } - ] - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -139,6 +135,10 @@ Testing: -drive driver=null-co,if=none - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - === blockstats with -blockdev === -@@ -155,10 +155,6 @@ Testing: -blockdev driver=null-co,node-name=null - "return": [ - ] - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -170,6 +166,10 @@ Testing: -blockdev driver=null-co,node-name=null - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - - === blockstats with -blockdev and -device === -@@ -226,10 +226,6 @@ Testing: -blockdev driver=null-co,read-zeroes=on,node-name=null -device virtio-b - } - ] - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -241,5 +237,9 @@ Testing: -blockdev driver=null-co,read-zeroes=on,node-name=null -device virtio-b - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - *** done -diff --git a/tests/qemu-iotests/247.out b/tests/qemu-iotests/247.out -index e909e83994..7d252e7fe4 100644 ---- a/tests/qemu-iotests/247.out -+++ b/tests/qemu-iotests/247.out -@@ -17,6 +17,6 @@ QMP_VERSION - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 134217728, "offset": 134217728, "speed": 0, "type": "commit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - *** done -diff --git a/tests/qemu-iotests/273.out b/tests/qemu-iotests/273.out -index 6a74a8138b..71843f02de 100644 ---- a/tests/qemu-iotests/273.out -+++ b/tests/qemu-iotests/273.out -@@ -282,10 +282,6 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev - ] - } - } --{ -- "return": { -- } --} - { - "timestamp": { - "seconds": TIMESTAMP, -@@ -297,5 +293,9 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev - "reason": "host-qmp-quit" - } - } -+{ -+ "return": { -+ } -+} - - *** done -diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 -index de12b2b1b9..ea81dc496a 100755 ---- a/tests/qemu-iotests/308 -+++ b/tests/qemu-iotests/308 -@@ -77,6 +77,7 @@ fuse_export_add() - # $1: Export ID - fuse_export_del() - { -+ capture_events="BLOCK_EXPORT_DELETED" \ - _send_qemu_cmd $QEMU_HANDLE \ - "{'execute': 'block-export-del', - 'arguments': { -@@ -84,8 +85,7 @@ fuse_export_del() - } }" \ - 'return' - -- _send_qemu_cmd $QEMU_HANDLE \ -- '' \ -+ _wait_event $QEMU_HANDLE \ - 'BLOCK_EXPORT_DELETED' - } - -diff --git a/tests/qemu-iotests/308.out b/tests/qemu-iotests/308.out -index d5767133b1..e5e233691d 100644 ---- a/tests/qemu-iotests/308.out -+++ b/tests/qemu-iotests/308.out -@@ -165,9 +165,9 @@ OK: Post-truncate image size is as expected - - === Tear down === - {'execute': 'quit'} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export-mp"}} -+{"return": {}} - - === Compare copy with original === - Images are identical. -@@ -201,9 +201,9 @@ wrote 67108864/67108864 bytes at offset 0 - read 67108864/67108864 bytes at offset 0 - 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - {'execute': 'quit'} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export"}} -+{"return": {}} - read 67108864/67108864 bytes at offset 0 - 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - *** done -diff --git a/tests/qemu-iotests/tests/file-io-error b/tests/qemu-iotests/tests/file-io-error -index 88ee5f670c..fb8db73b31 100755 ---- a/tests/qemu-iotests/tests/file-io-error -+++ b/tests/qemu-iotests/tests/file-io-error -@@ -99,13 +99,12 @@ echo - $QEMU_IO -f file -c 'write 0 64M' "$TEST_DIR/fuse-export" | _filter_qemu_io - echo - --_send_qemu_cmd $QEMU_HANDLE \ -+capture_events=BLOCK_EXPORT_DELETED _send_qemu_cmd $QEMU_HANDLE \ - "{'execute': 'block-export-del', - 'arguments': {'id': 'exp0'}}" \ - 'return' - --_send_qemu_cmd $QEMU_HANDLE \ -- '' \ -+_wait_event $QEMU_HANDLE \ - 'BLOCK_EXPORT_DELETED' - - _send_qemu_cmd $QEMU_HANDLE \ -diff --git a/tests/qemu-iotests/tests/iothreads-resize.out b/tests/qemu-iotests/tests/iothreads-resize.out -index 2ca5a9d964..2967ac8f0d 100644 ---- a/tests/qemu-iotests/tests/iothreads-resize.out -+++ b/tests/qemu-iotests/tests/iothreads-resize.out -@@ -3,8 +3,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 - QMP_VERSION - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+{"return": {}} - image: TEST_DIR/t.IMGFMT - file format: IMGFMT - virtual size: 128 MiB (134217728 bytes) -diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out -index c1bc9b8356..aa6b6d1aef 100644 ---- a/tests/qemu-iotests/tests/qsd-jobs.out -+++ b/tests/qemu-iotests/tests/qsd-jobs.out -@@ -7,8 +7,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ - QMP_VERSION - {"return": {}} - {"return": {}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} -+{"return": {}} - - === Streaming can't get permission on base node === - -@@ -17,6 +17,6 @@ QMP_VERSION - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} - {"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}} --{"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}} -+{"return": {}} - *** done --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch new file mode 100644 index 0000000..f65d293 --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch @@ -0,0 +1,101 @@ +From 6f60c86c5dd747ba68cb4a11084e7b021769e70b Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 22 Aug 2024 09:35:29 -0500 +Subject: [PATCH] nbd/server: CVE-2024-7409: Avoid use-after-free when closing + server + +RH-Author: Eric Blake +RH-MergeRequest: 266: nbd/server: CVE-2024-7409: Avoid use-after-free when closing server +RH-Jira: RHEL-52617 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Czenczek +RH-Commit: [1/1] e6e12c985cd13dd14336d98ab0719c789b5e914d (ebblake/centos-qemu-kvm) + +Commit 3e7ef738 plugged the use-after-free of the global nbd_server +object, but overlooked a use-after-free of nbd_server->listener. +Although this race is harder to hit, notice that our shutdown path +first drops the reference count of nbd_server->listener, then triggers +actions that can result in a pending client reaching the +nbd_blockdev_client_closed() callback, which in turn calls +qio_net_listener_set_client_func on a potentially stale object. + +If we know we don't want any more clients to connect, and have already +told the listener socket to shut down, then we should not be trying to +update the listener socket's associated function. + +Reproducer: + +> #!/usr/bin/python3 +> +> import os +> from threading import Thread +> +> def start_stop(): +> while 1: +> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-start", ++"arguments":{"addr":{"type":"unix","data":{"path":"/tmp/nbd-sock"}}}}\'') +> os.system('virsh qemu-monitor-command VM \'{"execute": "nbd-server-stop"}\'') +> +> def nbd_list(): +> while 1: +> os.system('/path/to/build/qemu-nbd -L -k /tmp/nbd-sock') +> +> def test(): +> sst = Thread(target=start_stop) +> sst.start() +> nlt = Thread(target=nbd_list) +> nlt.start() +> +> sst.join() +> nlt.join() +> +> test() + +Fixes: CVE-2024-7409 +Fixes: 3e7ef738c8 ("nbd/server: CVE-2024-7409: Close stray clients at server-stop") +CC: qemu-stable@nongnu.org +Reported-by: Andrey Drobyshev +Signed-off-by: Eric Blake +Message-ID: <20240822143617.800419-2-eblake@redhat.com> +Reviewed-by: Stefan Hajnoczi + +(cherry picked from commit 3874f5f73c441c52f1c699c848d463b0eda01e4c) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index f73409ae49..b36f41b7c5 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -92,10 +92,13 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + + static void nbd_update_server_watch(NBDServerData *s) + { +- if (!s->max_connections || s->connections < s->max_connections) { +- qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, NULL); +- } else { +- qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); ++ if (s->listener) { ++ if (!s->max_connections || s->connections < s->max_connections) { ++ qio_net_listener_set_client_func(s->listener, nbd_accept, NULL, ++ NULL); ++ } else { ++ qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); ++ } + } + } + +@@ -113,6 +116,7 @@ static void nbd_server_free(NBDServerData *server) + */ + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); ++ server->listener = NULL; + QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) { + qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH, + NULL); +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch new file mode 100644 index 0000000..5459a51 --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch @@ -0,0 +1,184 @@ +From f76d73f62555ad73081558c1f56bcb832fbb8c35 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 6 Aug 2024 13:53:00 -0500 +Subject: [PATCH 098/100] nbd/server: CVE-2024-7409: Cap default + max-connections to 100 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] 1fb3b8cd9781a66bba2f4a6bee2b320e96de86aa (redhat/centos-stream/src/qemu-kvm) + +Allowing an unlimited number of clients to any web service is a recipe +for a rudimentary denial of service attack: the client merely needs to +open lots of sockets without closing them, until qemu no longer has +any more fds available to allocate. + +For qemu-nbd, we default to allowing only 1 connection unless more are +explicitly asked for (-e or --shared); this was historically picked as +a nice default (without an explicit -t, a non-persistent qemu-nbd goes +away after a client disconnects, without needing any additional +follow-up commands), and we are not going to change that interface now +(besides, someday we want to point people towards qemu-storage-daemon +instead of qemu-nbd). + +But for qemu proper, and the newer qemu-storage-daemon, the QMP +nbd-server-start command has historically had a default of unlimited +number of connections, in part because unlike qemu-nbd it is +inherently persistent until nbd-server-stop. Allowing multiple client +sockets is particularly useful for clients that can take advantage of +MULTI_CONN (creating parallel sockets to increase throughput), +although known clients that do so (such as libnbd's nbdcopy) typically +use only 8 or 16 connections (the benefits of scaling diminish once +more sockets are competing for kernel attention). Picking a number +large enough for typical use cases, but not unlimited, makes it +slightly harder for a malicious client to perform a denial of service +merely by opening lots of connections withot progressing through the +handshake. + +This change does not eliminate CVE-2024-7409 on its own, but reduces +the chance for fd exhaustion or unlimited memory usage as an attack +surface. On the other hand, by itself, it makes it more obvious that +with a finite limit, we have the problem of an unauthenticated client +holding 100 fds opened as a way to block out a legitimate client from +being able to connect; thus, later patches will further add timeouts +to reject clients that are not making progress. + +This is an INTENTIONAL change in behavior, and will break any client +of nbd-server-start that was not passing an explicit max-connections +parameter, yet expects more than 100 simultaneous connections. We are +not aware of any such client (as stated above, most clients aware of +MULTI_CONN get by just fine on 8 or 16 connections, and probably cope +with later connections failing by relying on the earlier connections; +libvirt has not yet been passing max-connections, but generally +creates NBD servers with the intent for a single client for the sake +of live storage migration; meanwhile, the KubeSAN project anticipates +a large cluster sharing multiple clients [up to 8 per node, and up to +100 nodes in a cluster], but it currently uses qemu-nbd with an +explicit --shared=0 rather than qemu-storage-daemon with +nbd-server-start). + +We considered using a deprecation period (declare that omitting +max-parameters is deprecated, and make it mandatory in 3 releases - +then we don't need to pick an arbitrary default); that has zero risk +of breaking any apps that accidentally depended on more than 100 +connections, and where such breakage might not be noticed under unit +testing but only under the larger loads of production usage. But it +does not close the denial-of-service hole until far into the future, +and requires all apps to change to add the parameter even if 100 was +good enough. It also has a drawback that any app (like libvirt) that +is accidentally relying on an unlimited default should seriously +consider their own CVE now, at which point they are going to change to +pass explicit max-connections sooner than waiting for 3 qemu releases. +Finally, if our changed default breaks an app, that app can always +pass in an explicit max-parameters with a larger value. + +It is also intentional that the HMP interface to nbd-server-start is +not changed to expose max-connections (any client needing to fine-tune +things should be using QMP). + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-12-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[ericb: Expand commit message to summarize Dan's argument for why we +break corner-case back-compat behavior without a deprecation period] +Signed-off-by: Eric Blake + +(cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + block/monitor/block-hmp-cmds.c | 3 ++- + blockdev-nbd.c | 8 ++++++++ + include/block/nbd.h | 7 +++++++ + qapi/block-export.json | 4 ++-- + 4 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index d954bec6f1..bdf2eb50b6 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -402,7 +402,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict) + goto exit; + } + +- nbd_server_start(addr, NULL, NULL, 0, &local_err); ++ nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS, ++ &local_err); + qapi_free_SocketAddress(addr); + if (local_err != NULL) { + goto exit; +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 267a1de903..24ba5382db 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -170,6 +170,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, + + void nbd_server_start_options(NbdServerOptions *arg, Error **errp) + { ++ if (!arg->has_max_connections) { ++ arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS; ++ } ++ + nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz, + arg->max_connections, errp); + } +@@ -182,6 +186,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr, + { + SocketAddress *addr_flat = socket_address_flatten(addr); + ++ if (!has_max_connections) { ++ max_connections = NBD_DEFAULT_MAX_CONNECTIONS; ++ } ++ + nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp); + qapi_free_SocketAddress(addr_flat); + } +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 1d4d65922d..d4f8b21aec 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -39,6 +39,13 @@ extern const BlockExportDriver blk_exp_nbd; + */ + #define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10 + ++/* ++ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at ++ * once; must be large enough to allow a MULTI_CONN-aware client like ++ * nbdcopy to create its typical number of 8-16 sockets. ++ */ ++#define NBD_DEFAULT_MAX_CONNECTIONS 100 ++ + /* Handshake phase structs - this struct is passed on the wire */ + + typedef struct NBDOption { +diff --git a/qapi/block-export.json b/qapi/block-export.json +index 3919a2d5b9..f45e4fd481 100644 +--- a/qapi/block-export.json ++++ b/qapi/block-export.json +@@ -28,7 +28,7 @@ + # @max-connections: The maximum number of connections to allow at the + # same time, 0 for unlimited. Setting this to 1 also stops the + # server from advertising multiple client support (since 5.2; +-# default: 0) ++# default: 100) + # + # Since: 4.2 + ## +@@ -63,7 +63,7 @@ + # @max-connections: The maximum number of connections to allow at the + # same time, 0 for unlimited. Setting this to 1 also stops the + # server from advertising multiple client support (since 5.2; +-# default: 0). ++# default: 100). + # + # Errors: + # - if the server is already running +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch new file mode 100644 index 0000000..2ba16e5 --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch @@ -0,0 +1,173 @@ +From 6522c68268f00c9c5665f8f98cf6ed1984124cf3 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 7 Aug 2024 12:23:13 -0500 +Subject: [PATCH 100/100] nbd/server: CVE-2024-7409: Close stray clients at + server-stop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] c00bb5a7e73446e9f071ef83e4f1576f73a17059 (redhat/centos-stream/src/qemu-kvm) + +A malicious client can attempt to connect to an NBD server, and then +intentionally delay progress in the handshake, including if it does +not know the TLS secrets. Although the previous two patches reduce +this behavior by capping the default max-connections parameter and +killing slow clients, they did not eliminate the possibility of a +client waiting to close the socket until after the QMP nbd-server-stop +command is executed, at which point qemu would SEGV when trying to +dereference the NULL nbd_server global which is no longer present. +This amounts to a denial of service attack. Worse, if another NBD +server is started before the malicious client disconnects, I cannot +rule out additional adverse effects when the old client interferes +with the connection count of the new server (although the most likely +is a crash due to an assertion failure when checking +nbd_server->connections > 0). + +For environments without this patch, the CVE can be mitigated by +ensuring (such as via a firewall) that only trusted clients can +connect to an NBD server. Note that using frameworks like libvirt +that ensure that TLS is used and that nbd-server-stop is not executed +while any trusted clients are still connected will only help if there +is also no possibility for an untrusted client to open a connection +but then stall on the NBD handshake. + +Given the previous patches, it would be possible to guarantee that no +clients remain connected by having nbd-server-stop sleep for longer +than the default handshake deadline before finally freeing the global +nbd_server object, but that could make QMP non-responsive for a long +time. So intead, this patch fixes the problem by tracking all client +sockets opened while the server is running, and forcefully closing any +such sockets remaining without a completed handshake at the time of +nbd-server-stop, then waiting until the coroutines servicing those +sockets notice the state change. nbd-server-stop now has a second +AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the +blk_exp_close_all_type() that disconnects all clients that completed +handshakes), but forced socket shutdown is enough to progress the +coroutines and quickly tear down all clients before the server is +freed, thus finally fixing the CVE. + +This patch relies heavily on the fact that nbd/server.c guarantees +that it only calls nbd_blockdev_client_closed() from the main loop +(see the assertion in nbd_client_put() and the hoops used in +nbd_client_put_nonzero() to achieve that); if we did not have that +guarantee, we would also need a mutex protecting our accesses of the +list of connections to survive re-entrancy from independent iothreads. + +Although I did not actually try to test old builds, it looks like this +problem has existed since at least commit 862172f45c (v2.12.0, 2017) - +even back when that patch started using a QIONetListener to handle +listening on multiple sockets, nbd_server_free() was already unaware +that the nbd_blockdev_client_closed callback can be reached later by a +client thread that has not completed handshakes (and therefore the +client's socket never got added to the list closed in +nbd_export_close_all), despite that patch intentionally tearing down +the QIONetListener to prevent new clients. + +Reported-by: Alexander Ivanov +Fixes: CVE-2024-7409 +CC: qemu-stable@nongnu.org +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-14-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé + +(cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++- + 1 file changed, 34 insertions(+), 1 deletion(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 24ba5382db..f73409ae49 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -21,12 +21,18 @@ + #include "io/channel-socket.h" + #include "io/net-listener.h" + ++typedef struct NBDConn { ++ QIOChannelSocket *cioc; ++ QLIST_ENTRY(NBDConn) next; ++} NBDConn; ++ + typedef struct NBDServerData { + QIONetListener *listener; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; + uint32_t max_connections; + uint32_t connections; ++ QLIST_HEAD(, NBDConn) conns; + } NBDServerData; + + static NBDServerData *nbd_server; +@@ -51,6 +57,14 @@ int nbd_server_max_connections(void) + + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + { ++ NBDConn *conn = nbd_client_owner(client); ++ ++ assert(qemu_in_main_thread() && nbd_server); ++ ++ object_unref(OBJECT(conn->cioc)); ++ QLIST_REMOVE(conn, next); ++ g_free(conn); ++ + nbd_client_put(client); + assert(nbd_server->connections > 0); + nbd_server->connections--; +@@ -60,14 +74,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + gpointer opaque) + { ++ NBDConn *conn = g_new0(NBDConn, 1); ++ ++ assert(qemu_in_main_thread() && nbd_server); + nbd_server->connections++; ++ object_ref(OBJECT(cioc)); ++ conn->cioc = cioc; ++ QLIST_INSERT_HEAD(&nbd_server->conns, conn, next); + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); + /* TODO - expose handshake timeout as QMP option */ + nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, + nbd_server->tlscreds, nbd_server->tlsauthz, +- nbd_blockdev_client_closed, NULL); ++ nbd_blockdev_client_closed, conn); + } + + static void nbd_update_server_watch(NBDServerData *s) +@@ -81,12 +101,25 @@ static void nbd_update_server_watch(NBDServerData *s) + + static void nbd_server_free(NBDServerData *server) + { ++ NBDConn *conn, *tmp; ++ + if (!server) { + return; + } + ++ /* ++ * Forcefully close the listener socket, and any clients that have ++ * not yet disconnected on their own. ++ */ + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); ++ QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) { ++ qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH, ++ NULL); ++ } ++ ++ AIO_WAIT_WHILE_UNLOCKED(NULL, server->connections > 0); ++ + if (server->tlscreds) { + object_unref(OBJECT(server->tlscreds)); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch new file mode 100644 index 0000000..e1755c2 --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch @@ -0,0 +1,135 @@ +From ca30846351f1136d15f55717a5534ad927f7cf52 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 8 Aug 2024 16:05:08 -0500 +Subject: [PATCH 099/100] nbd/server: CVE-2024-7409: Drop non-negotiating + clients +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] 8008a1067766951d9752bcc41c2127a07fce934d (redhat/centos-stream/src/qemu-kvm) + +A client that opens a socket but does not negotiate is merely hogging +qemu's resources (an open fd and a small amount of memory); and a +malicious client that can access the port where NBD is listening can +attempt a denial of service attack by intentionally opening and +abandoning lots of unfinished connections. The previous patch put a +default bound on the number of such ongoing connections, but once that +limit is hit, no more clients can connect (including legitimate ones). +The solution is to insist that clients complete handshake within a +reasonable time limit, defaulting to 10 seconds. A client that has +not successfully completed NBD_OPT_GO by then (including the case of +where the client didn't know TLS credentials to even reach the point +of NBD_OPT_GO) is wasting our time and does not deserve to stay +connected. Later patches will allow fine-tuning the limit away from +the default value (including disabling it for doing integration +testing of the handshake process itself). + +Note that this patch in isolation actually makes it more likely to see +qemu SEGV after nbd-server-stop, as any client socket still connected +when the server shuts down will now be closed after 10 seconds rather +than at the client's whims. That will be addressed in the next patch. + +For a demo of this patch in action: +$ qemu-nbd -f raw -r -t -e 10 file & +$ nbdsh --opt-mode -c ' +H = list() +for i in range(20): + print(i) + H.insert(i, nbd.NBD()) + H[i].set_opt_mode(True) + H[i].connect_uri("nbd://localhost") +' +$ kill $! + +where later connections get to start progressing once earlier ones are +forcefully dropped for taking too long, rather than hanging. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-13-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[eblake: rebase to changes earlier in series, reduce scope of timer] +Signed-off-by: Eric Blake + +(cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + nbd/server.c | 28 +++++++++++++++++++++++++++- + nbd/trace-events | 1 + + 2 files changed, 28 insertions(+), 1 deletion(-) + +diff --git a/nbd/server.c b/nbd/server.c +index e50012499f..39285cc971 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -3186,22 +3186,48 @@ static void nbd_client_receive_next_request(NBDClient *client) + } + } + ++static void nbd_handshake_timer_cb(void *opaque) ++{ ++ QIOChannel *ioc = opaque; ++ ++ trace_nbd_handshake_timer_cb(); ++ qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++} ++ + static coroutine_fn void nbd_co_client_start(void *opaque) + { + NBDClient *client = opaque; + Error *local_err = NULL; ++ QEMUTimer *handshake_timer = NULL; + + qemu_co_mutex_init(&client->send_lock); + +- /* TODO - utilize client->handshake_max_secs */ ++ /* ++ * Create a timer to bound the time spent in negotiation. If the ++ * timer expires, it is likely nbd_negotiate will fail because the ++ * socket was shutdown. ++ */ ++ if (client->handshake_max_secs > 0) { ++ handshake_timer = aio_timer_new(qemu_get_aio_context(), ++ QEMU_CLOCK_REALTIME, ++ SCALE_NS, ++ nbd_handshake_timer_cb, ++ client->sioc); ++ timer_mod(handshake_timer, ++ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ++ client->handshake_max_secs * NANOSECONDS_PER_SECOND); ++ } ++ + if (nbd_negotiate(client, &local_err)) { + if (local_err) { + error_report_err(local_err); + } ++ timer_free(handshake_timer); + client_close(client, false); + return; + } + ++ timer_free(handshake_timer); + WITH_QEMU_LOCK_GUARD(&client->lock) { + nbd_client_receive_next_request(client); + } +diff --git a/nbd/trace-events b/nbd/trace-events +index 00ae3216a1..cbd0a4ab7e 100644 +--- a/nbd/trace-events ++++ b/nbd/trace-events +@@ -76,6 +76,7 @@ nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload + nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64 + nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32 + nbd_trip(void) "Reading request" ++nbd_handshake_timer_cb(void) "client took too long to negotiate" + + # client-connection.c + nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64 +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch b/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch new file mode 100644 index 0000000..7614df8 --- /dev/null +++ b/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch @@ -0,0 +1,330 @@ +From af6f51ad3482513e3ac047eb203f9dc623d47088 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Mon, 8 Apr 2024 11:00:44 -0500 +Subject: [PATCH 2/2] nbd/server: Mark negotiation functions as coroutine_fn + +RH-Author: Eric Blake +RH-MergeRequest: 239: avoid destination hang on NBD+TLS storage migration +RH-Jira: RHEL-33440 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] acf61b854f80365993d24bcd1d110ed3518b22fa (ebblake/centos-qemu-kvm) + +nbd_negotiate() is already marked coroutine_fn. And given the fix in +the previous patch to have nbd_negotiate_handle_starttls not create +and wait on a g_main_loop (as that would violate coroutine +constraints), it is worth marking the rest of the related static +functions reachable only during option negotiation as also being +coroutine_fn. + +Suggested-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Eric Blake +Message-ID: <20240408160214.1200629-6-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +[eblake: drop one spurious coroutine_fn marking] +Signed-off-by: Eric Blake + +Jira: https://issues.redhat.com/browse/RHEL-33440 +(cherry picked from commit 4fa333e08dd96395a99ea8dd9e4c73a29dd23344) +Signed-off-by: Eric Blake +--- + nbd/server.c | 102 +++++++++++++++++++++++++++++---------------------- + 1 file changed, 59 insertions(+), 43 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 98ae0e1632..892797bb11 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -195,8 +195,9 @@ static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option, + + /* Send a reply header, including length, but no payload. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, +- uint32_t len, Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, ++ uint32_t len, Error **errp) + { + NBDOptionReply rep; + +@@ -211,15 +212,15 @@ static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, + + /* Send a reply header with default 0 length. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_rep(NBDClient *client, uint32_t type, Error **errp) + { + return nbd_negotiate_send_rep_len(client, type, 0, errp); + } + + /* Send an error reply. + * Return -errno on error, 0 on success. */ +-static int G_GNUC_PRINTF(4, 0) ++static coroutine_fn int G_GNUC_PRINTF(4, 0) + nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type, + Error **errp, const char *fmt, va_list va) + { +@@ -259,7 +260,7 @@ nbd_sanitize_name(const char *name) + + /* Send an error reply. + * Return -errno on error, 0 on success. */ +-static int G_GNUC_PRINTF(4, 5) ++static coroutine_fn int G_GNUC_PRINTF(4, 5) + nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type, + Error **errp, const char *fmt, ...) + { +@@ -275,7 +276,7 @@ nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type, + /* Drop remainder of the current option, and send a reply with the + * given error type and message. Return -errno on read or write + * failure; or 0 if connection is still live. */ +-static int G_GNUC_PRINTF(4, 0) ++static coroutine_fn int G_GNUC_PRINTF(4, 0) + nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp, + const char *fmt, va_list va) + { +@@ -288,7 +289,7 @@ nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp, + return ret; + } + +-static int G_GNUC_PRINTF(4, 5) ++static coroutine_fn int G_GNUC_PRINTF(4, 5) + nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, + const char *fmt, ...) + { +@@ -302,7 +303,7 @@ nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, + return ret; + } + +-static int G_GNUC_PRINTF(3, 4) ++static coroutine_fn int G_GNUC_PRINTF(3, 4) + nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...) + { + int ret; +@@ -319,8 +320,9 @@ nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...) + * If @check_nul, require that no NUL bytes appear in buffer. + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +-static int nbd_opt_read(NBDClient *client, void *buffer, size_t size, +- bool check_nul, Error **errp) ++static coroutine_fn int ++nbd_opt_read(NBDClient *client, void *buffer, size_t size, ++ bool check_nul, Error **errp) + { + if (size > client->optlen) { + return nbd_opt_invalid(client, errp, +@@ -343,7 +345,8 @@ static int nbd_opt_read(NBDClient *client, void *buffer, size_t size, + /* Drop size bytes from the unparsed payload of the current option. + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +-static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp) ++static coroutine_fn int ++nbd_opt_skip(NBDClient *client, size_t size, Error **errp) + { + if (size > client->optlen) { + return nbd_opt_invalid(client, errp, +@@ -366,8 +369,9 @@ static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp) + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. + */ +-static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length, +- Error **errp) ++static coroutine_fn int ++nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length, ++ Error **errp) + { + int ret; + uint32_t len; +@@ -402,8 +406,8 @@ static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length, + + /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, Error **errp) + { + ERRP_GUARD(); + size_t name_len, desc_len; +@@ -444,7 +448,8 @@ static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, + + /* Process the NBD_OPT_LIST command, with a potential series of replies. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_handle_list(NBDClient *client, Error **errp) + { + NBDExport *exp; + assert(client->opt == NBD_OPT_LIST); +@@ -459,7 +464,8 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) + return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); + } + +-static void nbd_check_meta_export(NBDClient *client, NBDExport *exp) ++static coroutine_fn void ++nbd_check_meta_export(NBDClient *client, NBDExport *exp) + { + if (exp != client->contexts.exp) { + client->contexts.count = 0; +@@ -468,8 +474,9 @@ static void nbd_check_meta_export(NBDClient *client, NBDExport *exp) + + /* Send a reply to NBD_OPT_EXPORT_NAME. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, ++ Error **errp) + { + ERRP_GUARD(); + g_autofree char *name = NULL; +@@ -536,9 +543,9 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, + /* Send a single NBD_REP_INFO, with a buffer @buf of @length bytes. + * The buffer does NOT include the info type prefix. + * Return -errno on error, 0 if ready to send more. */ +-static int nbd_negotiate_send_info(NBDClient *client, +- uint16_t info, uint32_t length, void *buf, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_info(NBDClient *client, uint16_t info, uint32_t length, ++ void *buf, Error **errp) + { + int rc; + +@@ -565,7 +572,8 @@ static int nbd_negotiate_send_info(NBDClient *client, + * -errno transmission error occurred or @fatal was requested, errp is set + * 0 error message successfully sent to client, errp is not set + */ +-static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp) ++static coroutine_fn int ++nbd_reject_length(NBDClient *client, bool fatal, Error **errp) + { + int ret; + +@@ -583,7 +591,8 @@ static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp) + /* Handle NBD_OPT_INFO and NBD_OPT_GO. + * Return -errno on error, 0 if ready for next option, and 1 to move + * into transmission phase. */ +-static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_handle_info(NBDClient *client, Error **errp) + { + int rc; + g_autofree char *name = NULL; +@@ -755,7 +764,8 @@ struct NBDTLSServerHandshakeData { + Coroutine *co; + }; + +-static void nbd_server_tls_handshake(QIOTask *task, void *opaque) ++static void ++nbd_server_tls_handshake(QIOTask *task, void *opaque) + { + struct NBDTLSServerHandshakeData *data = opaque; + +@@ -768,8 +778,8 @@ static void nbd_server_tls_handshake(QIOTask *task, void *opaque) + + /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the + * new channel for all further (now-encrypted) communication. */ +-static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, +- Error **errp) ++static coroutine_fn QIOChannel * ++nbd_negotiate_handle_starttls(NBDClient *client, Error **errp) + { + QIOChannel *ioc; + QIOChannelTLS *tioc; +@@ -821,10 +831,9 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, + * + * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead. + */ +-static int nbd_negotiate_send_meta_context(NBDClient *client, +- const char *context, +- uint32_t context_id, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_meta_context(NBDClient *client, const char *context, ++ uint32_t context_id, Error **errp) + { + NBDOptionReplyMetaContext opt; + struct iovec iov[] = { +@@ -849,8 +858,9 @@ static int nbd_negotiate_send_meta_context(NBDClient *client, + * Return true if @query matches @pattern, or if @query is empty when + * the @client is performing _LIST_. + */ +-static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, +- const char *query) ++static coroutine_fn bool ++nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, ++ const char *query) + { + if (!*query) { + trace_nbd_negotiate_meta_query_parse("empty"); +@@ -867,7 +877,8 @@ static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, + /* + * Return true and adjust @str in place if it begins with @prefix. + */ +-static bool nbd_strshift(const char **str, const char *prefix) ++static coroutine_fn bool ++nbd_strshift(const char **str, const char *prefix) + { + size_t len = strlen(prefix); + +@@ -883,8 +894,9 @@ static bool nbd_strshift(const char **str, const char *prefix) + * Handle queries to 'base' namespace. For now, only the base:allocation + * context is available. Return true if @query has been handled. + */ +-static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta, +- const char *query) ++static coroutine_fn bool ++nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta, ++ const char *query) + { + if (!nbd_strshift(&query, "base:")) { + return false; +@@ -903,8 +915,9 @@ static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta, + * and qemu:allocation-depth contexts are available. Return true if @query + * has been handled. + */ +-static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta, +- const char *query) ++static coroutine_fn bool ++nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta, ++ const char *query) + { + size_t i; + +@@ -968,8 +981,9 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta, + * + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +-static int nbd_negotiate_meta_query(NBDClient *client, +- NBDMetaContexts *meta, Error **errp) ++static coroutine_fn int ++nbd_negotiate_meta_query(NBDClient *client, ++ NBDMetaContexts *meta, Error **errp) + { + int ret; + g_autofree char *query = NULL; +@@ -1008,7 +1022,8 @@ static int nbd_negotiate_meta_query(NBDClient *client, + * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT + * + * Return -errno on I/O error, or 0 if option was completely handled. */ +-static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_meta_queries(NBDClient *client, Error **errp) + { + int ret; + g_autofree char *export_name = NULL; +@@ -1136,7 +1151,8 @@ static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp) + * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, + * errp is not set + */ +-static int nbd_negotiate_options(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_options(NBDClient *client, Error **errp) + { + uint32_t flags; + bool fixedNewstyle = false; +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch b/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch new file mode 100644 index 0000000..6b4c670 --- /dev/null +++ b/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch @@ -0,0 +1,175 @@ +From 70acef52a99e5114699f5fa58de5f0b5c031b880 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 7 Aug 2024 08:50:01 -0500 +Subject: [PATCH 097/100] nbd/server: Plumb in new args to nbd_client_add() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 7614e294e1f5b7861386950ae994bea166d19950 (redhat/centos-stream/src/qemu-kvm) + +Upcoming patches to fix a CVE need to track an opaque pointer passed +in by the owner of a client object, as well as request for a time +limit on how fast negotiation must complete. Prepare for that by +changing the signature of nbd_client_new() and adding an accessor to +get at the opaque pointer, although for now the two servers +(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though +they pass in a new default timeout value. + +Suggested-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-11-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan] +Signed-off-by: Eric Blake + +(cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 6 ++++-- + include/block/nbd.h | 11 ++++++++++- + nbd/server.c | 20 +++++++++++++++++--- + qemu-nbd.c | 4 +++- + 4 files changed, 34 insertions(+), 7 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 213012435f..267a1de903 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -64,8 +64,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); +- nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz, +- nbd_blockdev_client_closed); ++ /* TODO - expose handshake timeout as QMP option */ ++ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, ++ nbd_server->tlscreds, nbd_server->tlsauthz, ++ nbd_blockdev_client_closed, NULL); + } + + static void nbd_update_server_watch(NBDServerData *s) +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 4e7bd6342f..1d4d65922d 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -33,6 +33,12 @@ typedef struct NBDMetaContexts NBDMetaContexts; + + extern const BlockExportDriver blk_exp_nbd; + ++/* ++ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must ++ * succeed at NBD_OPT_GO before being forcefully dropped as too slow. ++ */ ++#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10 ++ + /* Handshake phase structs - this struct is passed on the wire */ + + typedef struct NBDOption { +@@ -403,9 +409,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp); + NBDExport *nbd_export_find(const char *name); + + void nbd_client_new(QIOChannelSocket *sioc, ++ uint32_t handshake_max_secs, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, +- void (*close_fn)(NBDClient *, bool)); ++ void (*close_fn)(NBDClient *, bool), ++ void *owner); ++void *nbd_client_owner(NBDClient *client); + void nbd_client_get(NBDClient *client); + void nbd_client_put(NBDClient *client); + +diff --git a/nbd/server.c b/nbd/server.c +index 892797bb11..e50012499f 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -124,12 +124,14 @@ struct NBDMetaContexts { + struct NBDClient { + int refcount; /* atomic */ + void (*close_fn)(NBDClient *client, bool negotiated); ++ void *owner; + + QemuMutex lock; + + NBDExport *exp; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; ++ uint32_t handshake_max_secs; + QIOChannelSocket *sioc; /* The underlying data channel */ + QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ + +@@ -3191,6 +3193,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + + qemu_co_mutex_init(&client->send_lock); + ++ /* TODO - utilize client->handshake_max_secs */ + if (nbd_negotiate(client, &local_err)) { + if (local_err) { + error_report_err(local_err); +@@ -3205,14 +3208,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + } + + /* +- * Create a new client listener using the given channel @sioc. ++ * Create a new client listener using the given channel @sioc and @owner. + * Begin servicing it in a coroutine. When the connection closes, call +- * @close_fn with an indication of whether the client completed negotiation. ++ * @close_fn with an indication of whether the client completed negotiation ++ * within @handshake_max_secs seconds (0 for unbounded). + */ + void nbd_client_new(QIOChannelSocket *sioc, ++ uint32_t handshake_max_secs, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, +- void (*close_fn)(NBDClient *, bool)) ++ void (*close_fn)(NBDClient *, bool), ++ void *owner) + { + NBDClient *client; + Coroutine *co; +@@ -3225,13 +3231,21 @@ void nbd_client_new(QIOChannelSocket *sioc, + object_ref(OBJECT(client->tlscreds)); + } + client->tlsauthz = g_strdup(tlsauthz); ++ client->handshake_max_secs = handshake_max_secs; + client->sioc = sioc; + qio_channel_set_delay(QIO_CHANNEL(sioc), false); + object_ref(OBJECT(client->sioc)); + client->ioc = QIO_CHANNEL(sioc); + object_ref(OBJECT(client->ioc)); + client->close_fn = close_fn; ++ client->owner = owner; + + co = qemu_coroutine_create(nbd_co_client_start, client); + qemu_coroutine_enter(co); + } ++ ++void * ++nbd_client_owner(NBDClient *client) ++{ ++ return client->owner; ++} +diff --git a/qemu-nbd.c b/qemu-nbd.c +index d7b3ccab21..48e2fa5858 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -390,7 +390,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + + nb_fds++; + nbd_update_server_watch(); +- nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed); ++ /* TODO - expose handshake timeout as command line option */ ++ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, ++ tlscreds, tlsauthz, nbd_client_closed, NULL); + } + + static void nbd_update_server_watch(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch b/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch deleted file mode 100644 index 339e234..0000000 --- a/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch +++ /dev/null @@ -1,53 +0,0 @@ -From cd7788a857a6099206c4063e3ef69cb9e4aebcbc Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 21 Dec 2023 14:24:50 -0500 -Subject: [PATCH 070/101] nbd/server: avoid per-NBDRequest nbd_client_get/put() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/26] 5acb090ac4adf4260cd9e9c5605a27012b2a33aa (kmwolf/centos-qemu-kvm) - -nbd_trip() processes a single NBD request from start to finish and holds -an NBDClient reference throughout. NBDRequest does not outlive the scope -of nbd_trip(). Therefore it is unnecessary to ref/unref NBDClient for -each NBDRequest. - -Removing these nbd_client_get()/nbd_client_put() calls will make -thread-safety easier in the commits that follow. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-ID: <20231221192452.1785567-5-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - nbd/server.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index 895cf0a752..0b09ccc8dc 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -1557,7 +1557,6 @@ static NBDRequestData *nbd_request_get(NBDClient *client) - client->nb_requests++; - - req = g_new0(NBDRequestData, 1); -- nbd_client_get(client); - req->client = client; - return req; - } -@@ -1578,8 +1577,6 @@ static void nbd_request_put(NBDRequestData *req) - } - - nbd_client_receive_next_request(client); -- -- nbd_client_put(client); - } - - static void blk_aio_attached(AioContext *ctx, void *opaque) --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch b/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch new file mode 100644 index 0000000..9f65e99 --- /dev/null +++ b/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch @@ -0,0 +1,208 @@ +From ae9ebfc4ebd6f146951f04cf9e12eeaaf4c2387e Mon Sep 17 00:00:00 2001 +From: Zhu Yangyang +Date: Mon, 8 Apr 2024 11:00:43 -0500 +Subject: [PATCH 1/2] nbd/server: do not poll within a coroutine context + +RH-Author: Eric Blake +RH-MergeRequest: 239: avoid destination hang on NBD+TLS storage migration +RH-Jira: RHEL-33440 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] 27963e92aacca8ed43994113b645f472fba8f8bc (ebblake/centos-qemu-kvm) + +Coroutines are not supposed to block. Instead, they should yield. + +The client performs TLS upgrade outside of an AIOContext, during +synchronous handshake; this still requires g_main_loop. But the +server responds to TLS upgrade inside a coroutine, so a nested +g_main_loop is wrong. Since the two callbacks no longer share more +than the setting of data.complete and data.error, it's just as easy to +use static helpers instead of trying to share a common code path. It +is also possible to add assertions that no other code is interfering +with the eventual path to qio reaching the callback, whether or not it +required a yield or main loop. + +Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation") +Signed-off-by: Zhu Yangyang +[eblake: move callbacks to their use point, add assertions] +Signed-off-by: Eric Blake +Message-ID: <20240408160214.1200629-5-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy + +Jira: https://issues.redhat.com/browse/RHEL-33440 +(cherry picked from commit ae6d91a7e9b77abb029ed3fa9fad461422286942) +Signed-off-by: Eric Blake +--- + nbd/client.c | 28 ++++++++++++++++++++++++---- + nbd/common.c | 11 ----------- + nbd/nbd-internal.h | 10 ---------- + nbd/server.c | 28 +++++++++++++++++++++++----- + 4 files changed, 47 insertions(+), 30 deletions(-) + +diff --git a/nbd/client.c b/nbd/client.c +index 29ffc609a4..c89c750467 100644 +--- a/nbd/client.c ++++ b/nbd/client.c +@@ -596,13 +596,31 @@ static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict, + return 1; + } + ++/* Callback to learn when QIO TLS upgrade is complete */ ++struct NBDTLSClientHandshakeData { ++ bool complete; ++ Error *error; ++ GMainLoop *loop; ++}; ++ ++static void nbd_client_tls_handshake(QIOTask *task, void *opaque) ++{ ++ struct NBDTLSClientHandshakeData *data = opaque; ++ ++ qio_task_propagate_error(task, &data->error); ++ data->complete = true; ++ if (data->loop) { ++ g_main_loop_quit(data->loop); ++ } ++} ++ + static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, + QCryptoTLSCreds *tlscreds, + const char *hostname, Error **errp) + { + int ret; + QIOChannelTLS *tioc; +- struct NBDTLSHandshakeData data = { 0 }; ++ struct NBDTLSClientHandshakeData data = { 0 }; + + ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp); + if (ret <= 0) { +@@ -619,18 +637,20 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, + return NULL; + } + qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls"); +- data.loop = g_main_loop_new(g_main_context_default(), FALSE); + trace_nbd_receive_starttls_tls_handshake(); + qio_channel_tls_handshake(tioc, +- nbd_tls_handshake, ++ nbd_client_tls_handshake, + &data, + NULL, + NULL); + + if (!data.complete) { ++ data.loop = g_main_loop_new(g_main_context_default(), FALSE); + g_main_loop_run(data.loop); ++ assert(data.complete); ++ g_main_loop_unref(data.loop); + } +- g_main_loop_unref(data.loop); ++ + if (data.error) { + error_propagate(errp, data.error); + object_unref(OBJECT(tioc)); +diff --git a/nbd/common.c b/nbd/common.c +index 3247c1d618..589a748cfe 100644 +--- a/nbd/common.c ++++ b/nbd/common.c +@@ -47,17 +47,6 @@ int nbd_drop(QIOChannel *ioc, size_t size, Error **errp) + } + + +-void nbd_tls_handshake(QIOTask *task, +- void *opaque) +-{ +- struct NBDTLSHandshakeData *data = opaque; +- +- qio_task_propagate_error(task, &data->error); +- data->complete = true; +- g_main_loop_quit(data->loop); +-} +- +- + const char *nbd_opt_lookup(uint32_t opt) + { + switch (opt) { +diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h +index dfa02f77ee..91895106a9 100644 +--- a/nbd/nbd-internal.h ++++ b/nbd/nbd-internal.h +@@ -72,16 +72,6 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size, + return qio_channel_write_all(ioc, buffer, size, errp) < 0 ? -EIO : 0; + } + +-struct NBDTLSHandshakeData { +- GMainLoop *loop; +- bool complete; +- Error *error; +-}; +- +- +-void nbd_tls_handshake(QIOTask *task, +- void *opaque); +- + int nbd_drop(QIOChannel *ioc, size_t size, Error **errp); + + #endif +diff --git a/nbd/server.c b/nbd/server.c +index c3484cc1eb..98ae0e1632 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -748,6 +748,23 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) + return rc; + } + ++/* Callback to learn when QIO TLS upgrade is complete */ ++struct NBDTLSServerHandshakeData { ++ bool complete; ++ Error *error; ++ Coroutine *co; ++}; ++ ++static void nbd_server_tls_handshake(QIOTask *task, void *opaque) ++{ ++ struct NBDTLSServerHandshakeData *data = opaque; ++ ++ qio_task_propagate_error(task, &data->error); ++ data->complete = true; ++ if (!qemu_coroutine_entered(data->co)) { ++ aio_co_wake(data->co); ++ } ++} + + /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the + * new channel for all further (now-encrypted) communication. */ +@@ -756,7 +773,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, + { + QIOChannel *ioc; + QIOChannelTLS *tioc; +- struct NBDTLSHandshakeData data = { 0 }; ++ struct NBDTLSServerHandshakeData data = { 0 }; + + assert(client->opt == NBD_OPT_STARTTLS); + +@@ -777,17 +794,18 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, + + qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls"); + trace_nbd_negotiate_handle_starttls_handshake(); +- data.loop = g_main_loop_new(g_main_context_default(), FALSE); ++ data.co = qemu_coroutine_self(); + qio_channel_tls_handshake(tioc, +- nbd_tls_handshake, ++ nbd_server_tls_handshake, + &data, + NULL, + NULL); + + if (!data.complete) { +- g_main_loop_run(data.loop); ++ qemu_coroutine_yield(); ++ assert(data.complete); + } +- g_main_loop_unref(data.loop); ++ + if (data.error) { + object_unref(OBJECT(tioc)); + error_propagate(errp, data.error); +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch b/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch deleted file mode 100644 index e0d763d..0000000 --- a/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch +++ /dev/null @@ -1,373 +0,0 @@ -From bb0a6afff7f23a3ddb460dc1b2e70c06565f8a3f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 21 Dec 2023 14:24:52 -0500 -Subject: [PATCH 072/101] nbd/server: introduce NBDClient->lock to protect - fields - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/26] 49b64adaaf8b1c30f339d1ecc8ea89fb9db63f1c (kmwolf/centos-qemu-kvm) - -NBDClient has a number of fields that are accessed by both the export -AioContext and the main loop thread. When the AioContext lock is removed -these fields will need another form of protection. - -Add NBDClient->lock and protect fields that are accessed by both -threads. Also add assertions where possible and otherwise add doc -comments stating assumptions about which thread and lock holding. - -Note this patch moves the client->recv_coroutine assertion from -nbd_co_receive_request() to nbd_trip() where client->lock is held. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231221192452.1785567-7-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - nbd/server.c | 144 +++++++++++++++++++++++++++++++++++++++------------ - 1 file changed, 111 insertions(+), 33 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index e91e2e0903..941832f178 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -125,23 +125,25 @@ struct NBDClient { - int refcount; /* atomic */ - void (*close_fn)(NBDClient *client, bool negotiated); - -+ QemuMutex lock; -+ - NBDExport *exp; - QCryptoTLSCreds *tlscreds; - char *tlsauthz; - QIOChannelSocket *sioc; /* The underlying data channel */ - QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ - -- Coroutine *recv_coroutine; -+ Coroutine *recv_coroutine; /* protected by lock */ - - CoMutex send_lock; - Coroutine *send_coroutine; - -- bool read_yielding; -- bool quiescing; -+ bool read_yielding; /* protected by lock */ -+ bool quiescing; /* protected by lock */ - - QTAILQ_ENTRY(NBDClient) next; -- int nb_requests; -- bool closing; -+ int nb_requests; /* protected by lock */ -+ bool closing; /* protected by lock */ - - uint32_t check_align; /* If non-zero, check for aligned client requests */ - -@@ -1415,11 +1417,18 @@ nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp) - - len = qio_channel_readv(client->ioc, &iov, 1, errp); - if (len == QIO_CHANNEL_ERR_BLOCK) { -- client->read_yielding = true; -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ client->read_yielding = true; -+ -+ /* Prompt main loop thread to re-run nbd_drained_poll() */ -+ aio_wait_kick(); -+ } - qio_channel_yield(client->ioc, G_IO_IN); -- client->read_yielding = false; -- if (client->quiescing) { -- return -EAGAIN; -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ client->read_yielding = false; -+ if (client->quiescing) { -+ return -EAGAIN; -+ } - } - continue; - } else if (len < 0) { -@@ -1528,6 +1537,7 @@ void nbd_client_put(NBDClient *client) - blk_exp_unref(&client->exp->common); - } - g_free(client->contexts.bitmaps); -+ qemu_mutex_destroy(&client->lock); - g_free(client); - } - } -@@ -1561,11 +1571,13 @@ static void client_close(NBDClient *client, bool negotiated) - { - assert(qemu_in_main_thread()); - -- if (client->closing) { -- return; -- } -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ if (client->closing) { -+ return; -+ } - -- client->closing = true; -+ client->closing = true; -+ } - - /* Force requests to finish. They will drop their own references, - * then we'll close the socket and free the NBDClient. -@@ -1579,6 +1591,7 @@ static void client_close(NBDClient *client, bool negotiated) - } - } - -+/* Runs in export AioContext with client->lock held */ - static NBDRequestData *nbd_request_get(NBDClient *client) - { - NBDRequestData *req; -@@ -1591,6 +1604,7 @@ static NBDRequestData *nbd_request_get(NBDClient *client) - return req; - } - -+/* Runs in export AioContext with client->lock held */ - static void nbd_request_put(NBDRequestData *req) - { - NBDClient *client = req->client; -@@ -1614,14 +1628,18 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) - NBDExport *exp = opaque; - NBDClient *client; - -+ assert(qemu_in_main_thread()); -+ - trace_nbd_blk_aio_attached(exp->name, ctx); - - exp->common.ctx = ctx; - - QTAILQ_FOREACH(client, &exp->clients, next) { -- assert(client->nb_requests == 0); -- assert(client->recv_coroutine == NULL); -- assert(client->send_coroutine == NULL); -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ assert(client->nb_requests == 0); -+ assert(client->recv_coroutine == NULL); -+ assert(client->send_coroutine == NULL); -+ } - } - } - -@@ -1629,6 +1647,8 @@ static void blk_aio_detach(void *opaque) - { - NBDExport *exp = opaque; - -+ assert(qemu_in_main_thread()); -+ - trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); - - exp->common.ctx = NULL; -@@ -1639,8 +1659,12 @@ static void nbd_drained_begin(void *opaque) - NBDExport *exp = opaque; - NBDClient *client; - -+ assert(qemu_in_main_thread()); -+ - QTAILQ_FOREACH(client, &exp->clients, next) { -- client->quiescing = true; -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ client->quiescing = true; -+ } - } - } - -@@ -1649,28 +1673,48 @@ static void nbd_drained_end(void *opaque) - NBDExport *exp = opaque; - NBDClient *client; - -+ assert(qemu_in_main_thread()); -+ - QTAILQ_FOREACH(client, &exp->clients, next) { -- client->quiescing = false; -- nbd_client_receive_next_request(client); -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ client->quiescing = false; -+ nbd_client_receive_next_request(client); -+ } - } - } - -+/* Runs in export AioContext */ -+static void nbd_wake_read_bh(void *opaque) -+{ -+ NBDClient *client = opaque; -+ qio_channel_wake_read(client->ioc); -+} -+ - static bool nbd_drained_poll(void *opaque) - { - NBDExport *exp = opaque; - NBDClient *client; - -+ assert(qemu_in_main_thread()); -+ - QTAILQ_FOREACH(client, &exp->clients, next) { -- if (client->nb_requests != 0) { -- /* -- * If there's a coroutine waiting for a request on nbd_read_eof() -- * enter it here so we don't depend on the client to wake it up. -- */ -- if (client->recv_coroutine != NULL && client->read_yielding) { -- qio_channel_wake_read(client->ioc); -- } -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ if (client->nb_requests != 0) { -+ /* -+ * If there's a coroutine waiting for a request on nbd_read_eof() -+ * enter it here so we don't depend on the client to wake it up. -+ * -+ * Schedule a BH in the export AioContext to avoid missing the -+ * wake up due to the race between qio_channel_wake_read() and -+ * qio_channel_yield(). -+ */ -+ if (client->recv_coroutine != NULL && client->read_yielding) { -+ aio_bh_schedule_oneshot(nbd_export_aio_context(client->exp), -+ nbd_wake_read_bh, client); -+ } - -- return true; -+ return true; -+ } - } - } - -@@ -1681,6 +1725,8 @@ static void nbd_eject_notifier(Notifier *n, void *data) - { - NBDExport *exp = container_of(n, NBDExport, eject_notifier); - -+ assert(qemu_in_main_thread()); -+ - blk_exp_request_shutdown(&exp->common); - } - -@@ -2566,7 +2612,6 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, - int ret; - - g_assert(qemu_in_coroutine()); -- assert(client->recv_coroutine == qemu_coroutine_self()); - ret = nbd_receive_request(client, request, errp); - if (ret < 0) { - return ret; -@@ -2975,6 +3020,9 @@ static coroutine_fn void nbd_trip(void *opaque) - */ - - trace_nbd_trip(); -+ -+ qemu_mutex_lock(&client->lock); -+ - if (client->closing) { - goto done; - } -@@ -2990,7 +3038,21 @@ static coroutine_fn void nbd_trip(void *opaque) - } - - req = nbd_request_get(client); -- ret = nbd_co_receive_request(req, &request, &local_err); -+ -+ /* -+ * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has -+ * set client->quiescing but by the time we get back nbd_drained_end() may -+ * have already cleared client->quiescing. In that case we try again -+ * because nothing else will spawn an nbd_trip() coroutine until we set -+ * client->recv_coroutine = NULL further down. -+ */ -+ do { -+ assert(client->recv_coroutine == qemu_coroutine_self()); -+ qemu_mutex_unlock(&client->lock); -+ ret = nbd_co_receive_request(req, &request, &local_err); -+ qemu_mutex_lock(&client->lock); -+ } while (ret == -EAGAIN && !client->quiescing); -+ - client->recv_coroutine = NULL; - - if (client->closing) { -@@ -3002,15 +3064,16 @@ static coroutine_fn void nbd_trip(void *opaque) - } - - if (ret == -EAGAIN) { -- assert(client->quiescing); - goto done; - } - - nbd_client_receive_next_request(client); -+ - if (ret == -EIO) { - goto disconnect; - } - -+ qemu_mutex_unlock(&client->lock); - qio_channel_set_cork(client->ioc, true); - - if (ret < 0) { -@@ -3030,6 +3093,10 @@ static coroutine_fn void nbd_trip(void *opaque) - g_free(request.contexts->bitmaps); - g_free(request.contexts); - } -+ -+ qio_channel_set_cork(client->ioc, false); -+ qemu_mutex_lock(&client->lock); -+ - if (ret < 0) { - error_prepend(&local_err, "Failed to send reply: "); - goto disconnect; -@@ -3044,11 +3111,13 @@ static coroutine_fn void nbd_trip(void *opaque) - goto disconnect; - } - -- qio_channel_set_cork(client->ioc, false); - done: - if (req) { - nbd_request_put(req); - } -+ -+ qemu_mutex_unlock(&client->lock); -+ - if (!nbd_client_put_nonzero(client)) { - aio_co_reschedule_self(qemu_get_aio_context()); - nbd_client_put(client); -@@ -3059,13 +3128,19 @@ disconnect: - if (local_err) { - error_reportf_err(local_err, "Disconnect client, due to: "); - } -+ - nbd_request_put(req); -+ qemu_mutex_unlock(&client->lock); - - aio_co_reschedule_self(qemu_get_aio_context()); - client_close(client, true); - nbd_client_put(client); - } - -+/* -+ * Runs in export AioContext and main loop thread. Caller must hold -+ * client->lock. -+ */ - static void nbd_client_receive_next_request(NBDClient *client) - { - if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && -@@ -3091,7 +3166,9 @@ static coroutine_fn void nbd_co_client_start(void *opaque) - return; - } - -- nbd_client_receive_next_request(client); -+ WITH_QEMU_LOCK_GUARD(&client->lock) { -+ nbd_client_receive_next_request(client); -+ } - } - - /* -@@ -3108,6 +3185,7 @@ void nbd_client_new(QIOChannelSocket *sioc, - Coroutine *co; - - client = g_new0(NBDClient, 1); -+ qemu_mutex_init(&client->lock); - client->refcount = 1; - client->tlscreds = tlscreds; - if (tlscreds) { --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch b/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch deleted file mode 100644 index 3ca11a9..0000000 --- a/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch +++ /dev/null @@ -1,176 +0,0 @@ -From 8b60d72532b6511b41d82d591fb4f509314ef15f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 21 Dec 2023 14:24:51 -0500 -Subject: [PATCH 071/101] nbd/server: only traverse NBDExport->clients from - main loop thread - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/26] e7794a3a5c363c7508ee505c4ba03d9ef8862ca9 (kmwolf/centos-qemu-kvm) - -The NBD clients list is currently accessed from both the export -AioContext and the main loop thread. When the AioContext lock is removed -there will be nothing protecting the clients list. - -Adding a lock around the clients list is tricky because NBDClient -structs are refcounted and may be freed from the export AioContext or -the main loop thread. nbd_export_request_shutdown() -> client_close() -> -nbd_client_put() is also tricky because the list lock would be held -while indirectly dropping references to NDBClients. - -A simpler approach is to only allow nbd_client_put() and client_close() -calls from the main loop thread. Then the NBD clients list is only -accessed from the main loop thread and no fancy locking is needed. - -nbd_trip() just needs to reschedule itself in the main loop AioContext -before calling nbd_client_put() and client_close(). This costs more CPU -cycles per NBD request so add nbd_client_put_nonzero() to optimize the -common case where more references to NBDClient remain. - -Note that nbd_client_get() can still be called from either thread, so -make NBDClient->refcount atomic. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231221192452.1785567-6-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - nbd/server.c | 61 +++++++++++++++++++++++++++++++++++++++++++--------- - 1 file changed, 51 insertions(+), 10 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index 0b09ccc8dc..e91e2e0903 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -122,7 +122,7 @@ struct NBDMetaContexts { - }; - - struct NBDClient { -- int refcount; -+ int refcount; /* atomic */ - void (*close_fn)(NBDClient *client, bool negotiated); - - NBDExport *exp; -@@ -1501,14 +1501,17 @@ static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque - - #define MAX_NBD_REQUESTS 16 - -+/* Runs in export AioContext and main loop thread */ - void nbd_client_get(NBDClient *client) - { -- client->refcount++; -+ qatomic_inc(&client->refcount); - } - - void nbd_client_put(NBDClient *client) - { -- if (--client->refcount == 0) { -+ assert(qemu_in_main_thread()); -+ -+ if (qatomic_fetch_dec(&client->refcount) == 1) { - /* The last reference should be dropped by client->close, - * which is called by client_close. - */ -@@ -1529,8 +1532,35 @@ void nbd_client_put(NBDClient *client) - } - } - -+/* -+ * Tries to release the reference to @client, but only if other references -+ * remain. This is an optimization for the common case where we want to avoid -+ * the expense of scheduling nbd_client_put() in the main loop thread. -+ * -+ * Returns true upon success or false if the reference was not released because -+ * it is the last reference. -+ */ -+static bool nbd_client_put_nonzero(NBDClient *client) -+{ -+ int old = qatomic_read(&client->refcount); -+ int expected; -+ -+ do { -+ if (old == 1) { -+ return false; -+ } -+ -+ expected = old; -+ old = qatomic_cmpxchg(&client->refcount, expected, expected - 1); -+ } while (old != expected); -+ -+ return true; -+} -+ - static void client_close(NBDClient *client, bool negotiated) - { -+ assert(qemu_in_main_thread()); -+ - if (client->closing) { - return; - } -@@ -2933,15 +2963,20 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, - static coroutine_fn void nbd_trip(void *opaque) - { - NBDClient *client = opaque; -- NBDRequestData *req; -+ NBDRequestData *req = NULL; - NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ - int ret; - Error *local_err = NULL; - -+ /* -+ * Note that nbd_client_put() and client_close() must be called from the -+ * main loop thread. Use aio_co_reschedule_self() to switch AioContext -+ * before calling these functions. -+ */ -+ - trace_nbd_trip(); - if (client->closing) { -- nbd_client_put(client); -- return; -+ goto done; - } - - if (client->quiescing) { -@@ -2949,10 +2984,9 @@ static coroutine_fn void nbd_trip(void *opaque) - * We're switching between AIO contexts. Don't attempt to receive a new - * request and kick the main context which may be waiting for us. - */ -- nbd_client_put(client); - client->recv_coroutine = NULL; - aio_wait_kick(); -- return; -+ goto done; - } - - req = nbd_request_get(client); -@@ -3012,8 +3046,13 @@ static coroutine_fn void nbd_trip(void *opaque) - - qio_channel_set_cork(client->ioc, false); - done: -- nbd_request_put(req); -- nbd_client_put(client); -+ if (req) { -+ nbd_request_put(req); -+ } -+ if (!nbd_client_put_nonzero(client)) { -+ aio_co_reschedule_self(qemu_get_aio_context()); -+ nbd_client_put(client); -+ } - return; - - disconnect: -@@ -3021,6 +3060,8 @@ disconnect: - error_reportf_err(local_err, "Disconnect client, due to: "); - } - nbd_request_put(req); -+ -+ aio_co_reschedule_self(qemu_get_aio_context()); - client_close(client, true); - nbd_client_put(client); - } --- -2.39.3 - diff --git a/SOURCES/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch b/SOURCES/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch new file mode 100644 index 0000000..2c21dde --- /dev/null +++ b/SOURCES/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch @@ -0,0 +1,68 @@ +From c0a65c752cd83dea27cbeb34074d65fb2c5a6b59 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Wed, 20 Mar 2024 03:39:13 -0500 +Subject: [PATCH 008/100] pci-host/q35: Move PAM initialization above SMRAM + initialization + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [8/91] 22a9221d4726e872aa0f0dc25ae9d823c0611547 (bonzini/rhel-qemu-kvm) + +In mch_realize(), process PAM initialization before SMRAM initialization so +that later patch can skill all the SMRAM related with a single check. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-18-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 42c11ae2416dcbcd694ec3ee574fe2f3e70099ae) +Signed-off-by: Paolo Bonzini +--- + hw/pci-host/q35.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c +index 0d7d4e3f08..98d4a7c253 100644 +--- a/hw/pci-host/q35.c ++++ b/hw/pci-host/q35.c +@@ -568,6 +568,16 @@ static void mch_realize(PCIDevice *d, Error **errp) + /* setup pci memory mapping */ + pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space); + ++ /* PAM */ ++ init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, ++ mch->system_memory, mch->pci_address_space, ++ PAM_BIOS_BASE, PAM_BIOS_SIZE); ++ for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { ++ init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, ++ mch->system_memory, mch->pci_address_space, ++ PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); ++ } ++ + /* if *disabled* show SMRAM to all CPUs */ + memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", + mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, +@@ -634,15 +644,6 @@ static void mch_realize(PCIDevice *d, Error **errp) + + object_property_add_const_link(qdev_get_machine(), "smram", + OBJECT(&mch->smram)); +- +- init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, +- mch->system_memory, mch->pci_address_space, +- PAM_BIOS_BASE, PAM_BIOS_SIZE); +- for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { +- init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, +- mch->system_memory, mch->pci_address_space, +- PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); +- } + } + + uint64_t mch_mcfg_base(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch b/SOURCES/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch new file mode 100644 index 0000000..66e0423 --- /dev/null +++ b/SOURCES/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch @@ -0,0 +1,83 @@ +From c70f6e7e3461e6562c0591079cc71068bf0f2ed8 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:07 -0500 +Subject: [PATCH 033/100] physmem: Introduce + ram_block_discard_guest_memfd_range() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [33/91] b6169fa8d752d83977b18897be24f6ab9f3d3472 (bonzini/rhel-qemu-kvm) + +When memory page is converted from private to shared, the original +private memory is back'ed by guest_memfd. Introduce +ram_block_discard_guest_memfd_range() for discarding memory in +guest_memfd. + +Based on a patch by Isaku Yamahata . + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-12-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b2e9426c04fdd32d93a3a37db6b0c2e67c88c335) +Signed-off-by: Paolo Bonzini +--- + include/exec/cpu-common.h | 2 ++ + system/physmem.c | 23 +++++++++++++++++++++++ + 2 files changed, 25 insertions(+) + +diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h +index 6346df17ce..6d5318895a 100644 +--- a/include/exec/cpu-common.h ++++ b/include/exec/cpu-common.h +@@ -159,6 +159,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); + + int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); + int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); ++int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, ++ size_t length); + + #endif + +diff --git a/system/physmem.c b/system/physmem.c +index 5ebcf5be11..c3d04ca921 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -3721,6 +3721,29 @@ err: + return ret; + } + ++int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, ++ size_t length) ++{ ++ int ret = -1; ++ ++#ifdef CONFIG_FALLOCATE_PUNCH_HOLE ++ ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, ++ start, length); ++ ++ if (ret) { ++ ret = -errno; ++ error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", ++ __func__, rb->idstr, start, length, ret); ++ } ++#else ++ ret = -ENOSYS; ++ error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", ++ __func__, rb->idstr, start, length, ret); ++#endif ++ ++ return ret; ++} ++ + bool ramblock_is_pmem(RAMBlock *rb) + { + return rb->flags & RAM_PMEM; +-- +2.39.3 + diff --git a/SOURCES/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch b/SOURCES/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch new file mode 100644 index 0000000..037442c --- /dev/null +++ b/SOURCES/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch @@ -0,0 +1,140 @@ +From cfb109b393e019398a52f66a5ff0e9581c841335 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:37 -0500 +Subject: [PATCH 013/100] ppc/pef: switch to use + confidential_guest_kvm_init/reset() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [13/91] e25c498fc79a4c8e22ca41d9cbd06e40b4cf1f11 (bonzini/rhel-qemu-kvm) + +Use the unified interface to call confidential guest related kvm_init() +and kvm_reset(), to avoid exposing pef specific functions. + +As a bonus, pef.h goes away since there is no direct call from sPAPR +board code to PEF code anymore. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit 00a238b1a845fd5f0acd771664c5e184a63ed9b6) +Signed-off-by: Paolo Bonzini +--- + hw/ppc/pef.c | 9 ++++++--- + hw/ppc/spapr.c | 10 +++++++--- + include/hw/ppc/pef.h | 17 ----------------- + 3 files changed, 13 insertions(+), 23 deletions(-) + delete mode 100644 include/hw/ppc/pef.h + +diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c +index d28ed3ba73..47553348b1 100644 +--- a/hw/ppc/pef.c ++++ b/hw/ppc/pef.c +@@ -15,7 +15,6 @@ + #include "sysemu/kvm.h" + #include "migration/blocker.h" + #include "exec/confidential-guest-support.h" +-#include "hw/ppc/pef.h" + + #define TYPE_PEF_GUEST "pef-guest" + OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST) +@@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp) + #endif + } + +-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { + return 0; +@@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return kvmppc_svm_init(cgs, errp); + } + +-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) ++static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { + return 0; +@@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest, + + static void pef_guest_class_init(ObjectClass *oc, void *data) + { ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = pef_kvm_init; ++ klass->kvm_reset = pef_kvm_reset; + } + + static void pef_guest_init(Object *obj) +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index a258d81846..6f6f0fd790 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -75,6 +75,7 @@ + #include "hw/virtio/vhost-scsi-common.h" + + #include "exec/ram_addr.h" ++#include "exec/confidential-guest-support.h" + #include "hw/usb.h" + #include "qemu/config-file.h" + #include "qemu/error-report.h" +@@ -87,7 +88,6 @@ + #include "hw/ppc/spapr_tpm_proxy.h" + #include "hw/ppc/spapr_nvdimm.h" + #include "hw/ppc/spapr_numa.h" +-#include "hw/ppc/pef.h" + + #include "monitor/monitor.h" + +@@ -1715,7 +1715,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) + qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32); + } + +- pef_kvm_reset(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_reset(machine->cgs, &error_fatal); ++ } + spapr_caps_apply(spapr); + spapr_nested_reset(spapr); + if (spapr->svm_allowed) { +@@ -2844,7 +2846,9 @@ static void spapr_machine_init(MachineState *machine) + /* + * if Secure VM (PEF) support is configured, then initialize it + */ +- pef_kvm_init(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_init(machine->cgs, &error_fatal); ++ } + + msi_nonbroken = true; + +diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h +deleted file mode 100644 +index 707dbe524c..0000000000 +--- a/include/hw/ppc/pef.h ++++ /dev/null +@@ -1,17 +0,0 @@ +-/* +- * PEF (Protected Execution Facility) for POWER support +- * +- * Copyright Red Hat. +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#ifndef HW_PPC_PEF_H +-#define HW_PPC_PEF_H +- +-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp); +- +-#endif /* HW_PPC_PEF_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch b/SOURCES/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch new file mode 100644 index 0000000..c7f121b --- /dev/null +++ b/SOURCES/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch @@ -0,0 +1,164 @@ +From 83bb32c25472b500738a54ac8f2ad0f5c496acf1 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Wed, 20 Mar 2024 03:39:14 -0500 +Subject: [PATCH 009/100] q35: Introduce smm_ranges property for q35-pci-host + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [9/91] 931156772bfc2085e7241eecc56cf6eca3dac1fd (bonzini/rhel-qemu-kvm) + +Add a q35 property to check whether or not SMM ranges, e.g. SMRAM, TSEG, +etc... exist for the target platform. TDX doesn't support SMM and doesn't +play nice with QEMU modifying related guest memory ranges. + +Signed-off-by: Isaku Yamahata +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-19-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b07bf7b73fd02d24a7baa64a580f4974b86bbc86) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_q35.c | 2 ++ + hw/pci-host/q35.c | 42 +++++++++++++++++++++++++++------------ + include/hw/i386/pc.h | 1 + + include/hw/pci-host/q35.h | 1 + + 4 files changed, 33 insertions(+), 13 deletions(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 9adcdadce8..dedc86eec9 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -219,6 +219,8 @@ static void pc_q35_init(MachineState *machine) + x86ms->above_4g_mem_size, NULL); + object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU, + pcms->default_bus_bypass_iommu, NULL); ++ object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES, ++ x86_machine_is_smm_enabled(x86ms), NULL); + sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); + + /* pci */ +diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c +index 98d4a7c253..0b6cbaed7e 100644 +--- a/hw/pci-host/q35.c ++++ b/hw/pci-host/q35.c +@@ -179,6 +179,8 @@ static Property q35_host_props[] = { + mch.below_4g_mem_size, 0), + DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, + mch.above_4g_mem_size, 0), ++ DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost, ++ mch.has_smm_ranges, true), + DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), + DEFINE_PROP_END_OF_LIST(), + }; +@@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj) + /* mch's object_initialize resets the default value, set it again */ + qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE, + Q35_PCI_HOST_HOLE64_SIZE_DEFAULT); ++ + object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32", + q35_host_get_pci_hole_start, + NULL, NULL, NULL); +@@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d, + mch_update_pciexbar(mch); + } + ++ if (!mch->has_smm_ranges) { ++ return; ++ } ++ + if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM, + MCH_HOST_BRIDGE_SMRAM_SIZE)) { + mch_update_smram(mch); +@@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d, + static void mch_update(MCHPCIState *mch) + { + mch_update_pciexbar(mch); ++ + mch_update_pam(mch); +- mch_update_smram(mch); +- mch_update_ext_tseg_mbytes(mch); +- mch_update_smbase_smram(mch); ++ if (mch->has_smm_ranges) { ++ mch_update_smram(mch); ++ mch_update_ext_tseg_mbytes(mch); ++ mch_update_smbase_smram(mch); ++ } + + /* + * pci hole goes from end-of-low-ram to io-apic. +@@ -538,18 +548,20 @@ static void mch_reset(DeviceState *qdev) + pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR, + MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT); + +- d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; +- d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; +- d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; +- d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; ++ if (mch->has_smm_ranges) { ++ d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; ++ d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; ++ d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; ++ d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; + +- if (mch->ext_tseg_mbytes > 0) { +- pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, +- MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); +- } ++ if (mch->ext_tseg_mbytes > 0) { ++ pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, ++ MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); ++ } + +- d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; +- d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; ++ d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; ++ d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; ++ } + + mch_update(mch); + } +@@ -578,6 +590,10 @@ static void mch_realize(PCIDevice *d, Error **errp) + PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); + } + ++ if (!mch->has_smm_ranges) { ++ return; ++ } ++ + /* if *disabled* show SMRAM to all CPUs */ + memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", + mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 87420783ab..467e7fb52f 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -164,6 +164,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); + #define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size" + #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" + #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" ++#define PCI_HOST_PROP_SMM_RANGES "smm-ranges" + + + void pc_pci_as_mapping_init(MemoryRegion *system_memory, +diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h +index bafcbe6752..22fadfa3ed 100644 +--- a/include/hw/pci-host/q35.h ++++ b/include/hw/pci-host/q35.h +@@ -50,6 +50,7 @@ struct MCHPCIState { + MemoryRegion tseg_blackhole, tseg_window; + MemoryRegion smbase_blackhole, smbase_window; + bool has_smram_at_smbase; ++ bool has_smm_ranges; + Range pci_hole; + uint64_t below_4g_mem_size; + uint64_t above_4g_mem_size; +-- +2.39.3 + diff --git a/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch b/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch new file mode 100644 index 0000000..00cdae0 --- /dev/null +++ b/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch @@ -0,0 +1,117 @@ +From 97739ee2ed20856fe395248d399dfc7f9ab4f33d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 11 Apr 2024 15:06:01 +0200 +Subject: [PATCH 1/4] qcow2: Don't open data_file with BDRV_O_NO_IO + +RH-Author: Hana Czenczek +RH-MergeRequest: 1: CVE 2024-4467 (PRDSC) +RH-Jira: RHEL-35611 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake +RH-Commit: [1/4] f9843ce5c519901654a7d8ba43ee95ce25ca13c2 + +One use case for 'qemu-img info' is verifying that untrusted images +don't reference an unwanted external file, be it as a backing file or an +external data file. To make sure that calling 'qemu-img info' can't +already have undesired side effects with a malicious image, just don't +open the data file at all with BDRV_O_NO_IO. If nothing ever tries to do +I/O, we don't need to have it open. + +This changes the output of iotests case 061, which used 'qemu-img info' +to show that opening an image with an invalid data file fails. After +this patch, it succeeds. Replace this part of the test with a qemu-io +call, but keep the final 'qemu-img info' to show that the invalid data +file is correctly displayed in the output. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Upstream: N/A, embargoed +Signed-off-by: Hanna Czenczek +--- + block/qcow2.c | 17 ++++++++++++++++- + tests/qemu-iotests/061 | 6 ++++-- + tests/qemu-iotests/061.out | 8 ++++++-- + 3 files changed, 26 insertions(+), 5 deletions(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 0ebd455dc8..a4cffb628c 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1642,7 +1642,22 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, + goto fail; + } + +- if (open_data_file) { ++ if (open_data_file && (flags & BDRV_O_NO_IO)) { ++ /* ++ * Don't open the data file for 'qemu-img info' so that it can be used ++ * to verify that an untrusted qcow2 image doesn't refer to external ++ * files. ++ * ++ * Note: This still makes has_data_file() return true. ++ */ ++ if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { ++ s->data_file = NULL; ++ } else { ++ s->data_file = bs->file; ++ } ++ qdict_extract_subqdict(options, NULL, "data-file."); ++ qdict_del(options, "data-file"); ++ } else if (open_data_file) { + /* Open external data file */ + bdrv_graph_co_rdunlock(); + s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs, +diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 +index 53c7d428e3..b71ac097d1 100755 +--- a/tests/qemu-iotests/061 ++++ b/tests/qemu-iotests/061 +@@ -326,12 +326,14 @@ $QEMU_IMG amend -o "data_file=foo" "$TEST_IMG" + echo + _make_test_img -o "compat=1.1,data_file=$TEST_IMG.data" 64M + $QEMU_IMG amend -o "data_file=foo" "$TEST_IMG" +-_img_info --format-specific ++$QEMU_IO -c "read 0 4k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt ++$QEMU_IO -c "open -o data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" -c "read 0 4k" | _filter_qemu_io + TEST_IMG="data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" _img_info --format-specific --image-opts + + echo + $QEMU_IMG amend -o "data_file=" --image-opts "data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" +-_img_info --format-specific ++$QEMU_IO -c "read 0 4k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt ++$QEMU_IO -c "open -o data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" -c "read 0 4k" | _filter_qemu_io + TEST_IMG="data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" _img_info --format-specific --image-opts + + echo +diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out +index 139fc68177..24c33add7c 100644 +--- a/tests/qemu-iotests/061.out ++++ b/tests/qemu-iotests/061.out +@@ -545,7 +545,9 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + qemu-img: data-file can only be set for images that use an external data file + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 data_file=TEST_DIR/t.IMGFMT.data +-qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'foo': No such file or directory ++qemu-io: can't open device TEST_DIR/t.IMGFMT: Could not open 'foo': No such file or directory ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +@@ -560,7 +562,9 @@ Format specific information: + corrupt: false + extended l2: false + +-qemu-img: Could not open 'TEST_DIR/t.IMGFMT': 'data-file' is required for this image ++qemu-io: can't open device TEST_DIR/t.IMGFMT: 'data-file' is required for this image ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +-- +2.39.3 + diff --git a/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch b/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch deleted file mode 100644 index b31142e..0000000 --- a/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch +++ /dev/null @@ -1,167 +0,0 @@ -From f1e82fe5076b4030d385dfa49b8284899386114d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 20 Dec 2023 08:47:54 -0500 -Subject: [PATCH 08/22] qdev: add IOThreadVirtQueueMappingList property type - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [4/17] 817aa1339da8ed3814730473342ba045e66d5b51 (stefanha/centos-stream-qemu-kvm) - -virtio-blk and virtio-scsi devices will need a way to specify the -mapping between IOThreads and virtqueues. At the moment all virtqueues -are assigned to a single IOThread or the main loop. This single thread -can be a CPU bottleneck, so it is necessary to allow finer-grained -assignment to spread the load. - -Introduce DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST() so devices can take a -parameter that maps virtqueues to IOThreads. The command-line syntax for -this new property is as follows: - - --device '{"driver":"foo","iothread-vq-mapping":[{"iothread":"iothread0","vqs":[0,1,2]},...]}' - -IOThreads are specified by name and virtqueues are specified by 0-based -index. - -It will be common to simply assign virtqueues round-robin across a set -of IOThreads. A convenient syntax that does not require specifying -individual virtqueue indices is available: - - --device '{"driver":"foo","iothread-vq-mapping":[{"iothread":"iothread0"},{"iothread":"iothread1"},...]}' - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231220134755.814917-4-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit cf03a152c5d749fd0083bfe540df9524f1d2ff1d) -Signed-off-by: Stefan Hajnoczi ---- - hw/core/qdev-properties-system.c | 46 +++++++++++++++++++++++++++++ - include/hw/qdev-properties-system.h | 5 ++++ - qapi/virtio.json | 29 ++++++++++++++++++ - 3 files changed, 80 insertions(+) - -diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c -index 73cced4626..1a396521d5 100644 ---- a/hw/core/qdev-properties-system.c -+++ b/hw/core/qdev-properties-system.c -@@ -18,6 +18,7 @@ - #include "qapi/qapi-types-block.h" - #include "qapi/qapi-types-machine.h" - #include "qapi/qapi-types-migration.h" -+#include "qapi/qapi-visit-virtio.h" - #include "qapi/qmp/qerror.h" - #include "qemu/ctype.h" - #include "qemu/cutils.h" -@@ -1160,3 +1161,48 @@ const PropertyInfo qdev_prop_cpus390entitlement = { - .set = qdev_propinfo_set_enum, - .set_default_value = qdev_propinfo_set_default_value_enum, - }; -+ -+/* --- IOThreadVirtQueueMappingList --- */ -+ -+static void get_iothread_vq_mapping_list(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ IOThreadVirtQueueMappingList **prop_ptr = -+ object_field_prop_ptr(obj, opaque); -+ -+ visit_type_IOThreadVirtQueueMappingList(v, name, prop_ptr, errp); -+} -+ -+static void set_iothread_vq_mapping_list(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ IOThreadVirtQueueMappingList **prop_ptr = -+ object_field_prop_ptr(obj, opaque); -+ IOThreadVirtQueueMappingList *list; -+ -+ if (!visit_type_IOThreadVirtQueueMappingList(v, name, &list, errp)) { -+ return; -+ } -+ -+ qapi_free_IOThreadVirtQueueMappingList(*prop_ptr); -+ *prop_ptr = list; -+} -+ -+static void release_iothread_vq_mapping_list(Object *obj, -+ const char *name, void *opaque) -+{ -+ IOThreadVirtQueueMappingList **prop_ptr = -+ object_field_prop_ptr(obj, opaque); -+ -+ qapi_free_IOThreadVirtQueueMappingList(*prop_ptr); -+ *prop_ptr = NULL; -+} -+ -+const PropertyInfo qdev_prop_iothread_vq_mapping_list = { -+ .name = "IOThreadVirtQueueMappingList", -+ .description = "IOThread virtqueue mapping list [{\"iothread\":\"\", " -+ "\"vqs\":[1,2,3,...]},...]", -+ .get = get_iothread_vq_mapping_list, -+ .set = set_iothread_vq_mapping_list, -+ .release = release_iothread_vq_mapping_list, -+}; -diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h -index 91f7a2452d..06c359c190 100644 ---- a/include/hw/qdev-properties-system.h -+++ b/include/hw/qdev-properties-system.h -@@ -24,6 +24,7 @@ extern const PropertyInfo qdev_prop_off_auto_pcibar; - extern const PropertyInfo qdev_prop_pcie_link_speed; - extern const PropertyInfo qdev_prop_pcie_link_width; - extern const PropertyInfo qdev_prop_cpus390entitlement; -+extern const PropertyInfo qdev_prop_iothread_vq_mapping_list; - - #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d) \ - DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t) -@@ -82,4 +83,8 @@ extern const PropertyInfo qdev_prop_cpus390entitlement; - DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_cpus390entitlement, \ - CpuS390Entitlement) - -+#define DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST(_name, _state, _field) \ -+ DEFINE_PROP(_name, _state, _field, qdev_prop_iothread_vq_mapping_list, \ -+ IOThreadVirtQueueMappingList *) -+ - #endif -diff --git a/qapi/virtio.json b/qapi/virtio.json -index e6dcee7b83..19c7c36e36 100644 ---- a/qapi/virtio.json -+++ b/qapi/virtio.json -@@ -928,3 +928,32 @@ - 'data': { 'path': 'str', 'queue': 'uint16', '*index': 'uint16' }, - 'returns': 'VirtioQueueElement', - 'features': [ 'unstable' ] } -+ -+## -+# @IOThreadVirtQueueMapping: -+# -+# Describes the subset of virtqueues assigned to an IOThread. -+# -+# @iothread: the id of IOThread object -+# -+# @vqs: an optional array of virtqueue indices that will be handled by this -+# IOThread. When absent, virtqueues are assigned round-robin across all -+# IOThreadVirtQueueMappings provided. Either all IOThreadVirtQueueMappings -+# must have @vqs or none of them must have it. -+# -+# Since: 9.0 -+## -+ -+{ 'struct': 'IOThreadVirtQueueMapping', -+ 'data': { 'iothread': 'str', '*vqs': ['uint16'] } } -+ -+## -+# @DummyVirtioForceArrays: -+# -+# Not used by QMP; hack to let us use IOThreadVirtQueueMappingList internally -+# -+# Since: 9.0 -+## -+ -+{ 'struct': 'DummyVirtioForceArrays', -+ 'data': { 'unused-iothread-vq-mapping': ['IOThreadVirtQueueMapping'] } } --- -2.39.3 - diff --git a/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch b/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch deleted file mode 100644 index 94bb716..0000000 --- a/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 4251aab5b2beb68d1800cd4a329361ff6f57c430 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 20 Dec 2023 08:47:52 -0500 -Subject: [PATCH 07/22] qdev-properties: alias all object class properties - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [3/17] bc5d0aafe4645dacf9277904a2b20760d6e676e1 (stefanha/centos-stream-qemu-kvm) - -qdev_alias_all_properties() aliases a DeviceState's qdev properties onto -an Object. This is used for VirtioPCIProxy types so that --device -virtio-blk-pci has properties of its embedded --device virtio-blk-device -object. - -Currently this function is implemented using qdev properties. Change the -function to use QOM object class properties instead. This works because -qdev properties create QOM object class properties, but it also catches -any QOM object class-only properties that have no qdev properties. - -This change ensures that properties of devices are shown with --device -foo,\? even if they are QOM object class properties. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231220134755.814917-2-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 350147a871a545ab56b4a1062c8485635d9ffc24) -Signed-off-by: Stefan Hajnoczi ---- - hw/core/qdev-properties.c | 18 ++++++++++-------- - include/hw/qdev-properties.h | 4 ++-- - 2 files changed, 12 insertions(+), 10 deletions(-) - -diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c -index 840006e953..7d6fa726fd 100644 ---- a/hw/core/qdev-properties.c -+++ b/hw/core/qdev-properties.c -@@ -1076,16 +1076,18 @@ void device_class_set_props(DeviceClass *dc, Property *props) - void qdev_alias_all_properties(DeviceState *target, Object *source) - { - ObjectClass *class; -- Property *prop; -+ ObjectPropertyIterator iter; -+ ObjectProperty *prop; - - class = object_get_class(OBJECT(target)); -- do { -- DeviceClass *dc = DEVICE_CLASS(class); - -- for (prop = dc->props_; prop && prop->name; prop++) { -- object_property_add_alias(source, prop->name, -- OBJECT(target), prop->name); -+ object_class_property_iter_init(&iter, class); -+ while ((prop = object_property_iter_next(&iter))) { -+ if (object_property_find(source, prop->name)) { -+ continue; /* skip duplicate properties */ - } -- class = object_class_get_parent(class); -- } while (class != object_class_by_name(TYPE_DEVICE)); -+ -+ object_property_add_alias(source, prop->name, -+ OBJECT(target), prop->name); -+ } - } -diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h -index 25743a29a0..09aa04ca1e 100644 ---- a/include/hw/qdev-properties.h -+++ b/include/hw/qdev-properties.h -@@ -230,8 +230,8 @@ void qdev_property_add_static(DeviceState *dev, Property *prop); - * @target: Device which has properties to be aliased - * @source: Object to add alias properties to - * -- * Add alias properties to the @source object for all qdev properties on -- * the @target DeviceState. -+ * Add alias properties to the @source object for all properties on the @target -+ * DeviceState. - * - * This is useful when @target is an internal implementation object - * owned by @source, and you want to expose all the properties of that --- -2.39.3 - diff --git a/SOURCES/kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch b/SOURCES/kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch new file mode 100644 index 0000000..967adc1 --- /dev/null +++ b/SOURCES/kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch @@ -0,0 +1,40 @@ +From 97e10c2ccc8dc29019d6d22de1d23c55fea0f6c4 Mon Sep 17 00:00:00 2001 +From: Dehan Meng +Date: Wed, 21 Aug 2024 14:55:01 +0800 +Subject: [PATCH] qemu-guest-agent: Update the logfile path of + qga-fsfreeze-hook.log + +RH-Author: 6-dehan +RH-MergeRequest: 265: qemu-guest-agent: Update the logfile path of qga-fsfreeze-hook.log +RH-Jira: RHEL-52250 +RH-Acked-by: Konstantin Kostiuk +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 7c5cfb882dbc277becb7daa2c5d6b8eff3d601b2 (6-dehan/src_centosupstream_qemu-kvm) + +selinux context 'system_u:object_r:virt_qemu_ga_log_t:s0', it +should be changed to '/var/log/qemu-ga/qga-fsfreeze-hook.log'. And +it's worth to mention that this is RHEL-only change for matching +existing SELinux boolean and policy. + +Jira: https://issues.redhat.com/browse/RHEL-52250 +Signed-off-by: Dehan Meng +--- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index e9b84ec028..70536ba3e3 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -7,7 +7,7 @@ + # "freeze" argument before the filesystem is frozen. And for fsfreeze-thaw + # request, it is issued with "thaw" argument after filesystem is thawed. + +-LOGFILE=/var/log/qga-fsfreeze-hook.log ++LOGFILE=/var/log/qemu-ga/qga-fsfreeze-hook.log + FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored +-- +2.39.3 + diff --git a/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch b/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch new file mode 100644 index 0000000..c60570e --- /dev/null +++ b/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch @@ -0,0 +1,129 @@ +From fef17d3466deba9b4d581604de1c64c210d1a454 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 17 May 2024 21:50:14 -0500 +Subject: [PATCH 1/4] qio: Inherit follow_coroutine_ctx across TLS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 244: qio: Inherit follow_coroutine_ctx across TLS +RH-Jira: RHEL-33440 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/2] 447b144638a1f032fa036838f5f1839628389fe5 (ebblake/centos-qemu-kvm) + +Since qemu 8.2, the combination of NBD + TLS + iothread crashes on an +assertion failure: + +qemu-kvm: ../io/channel.c:534: void qio_channel_restart_read(void *): Assertion `qemu_get_current_aio_context() == qemu_coroutine_get_aio_context(co)' failed. + +It turns out that when we removed AioContext locking, we did so by +having NBD tell its qio channels that it wanted to opt in to +qio_channel_set_follow_coroutine_ctx(); but while we opted in on the +main channel, we did not opt in on the TLS wrapper channel. +qemu-iotests has coverage of NBD+iothread and NBD+TLS, but apparently +no coverage of NBD+TLS+iothread, or we would have noticed this +regression sooner. (I'll add that in the next patch) + +But while we could manually opt in to the TLS channel in nbd/server.c +(a one-line change), it is more generic if all qio channels that wrap +other channels inherit the follow status, in the same way that they +inherit feature bits. + +CC: Stefan Hajnoczi +CC: Daniel P. Berrangé +CC: qemu-stable@nongnu.org +Fixes: https://issues.redhat.com/browse/RHEL-34786 +Fixes: 06e0f098 ("io: follow coroutine AioContext in qio_channel_yield()", v8.2.0) +Signed-off-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Message-ID: <20240518025246.791593-5-eblake@redhat.com> +(cherry picked from commit 199e84de1c903ba5aa1f7256310bbc4a20dd930b) +Jira: https://issues.redhat.com/browse/RHEL-33440 +Signed-off-by: Eric Blake +--- + io/channel-tls.c | 26 +++++++++++++++----------- + io/channel-websock.c | 1 + + 2 files changed, 16 insertions(+), 11 deletions(-) + +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 1d9c9c72bf..67b9700006 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -69,37 +69,40 @@ qio_channel_tls_new_server(QIOChannel *master, + const char *aclname, + Error **errp) + { +- QIOChannelTLS *ioc; ++ QIOChannelTLS *tioc; ++ QIOChannel *ioc; + +- ioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); ++ tioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); ++ ioc = QIO_CHANNEL(tioc); + +- ioc->master = master; ++ tioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { +- qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SHUTDOWN); ++ qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } + object_ref(OBJECT(master)); + +- ioc->session = qcrypto_tls_session_new( ++ tioc->session = qcrypto_tls_session_new( + creds, + NULL, + aclname, + QCRYPTO_TLS_CREDS_ENDPOINT_SERVER, + errp); +- if (!ioc->session) { ++ if (!tioc->session) { + goto error; + } + + qcrypto_tls_session_set_callbacks( +- ioc->session, ++ tioc->session, + qio_channel_tls_write_handler, + qio_channel_tls_read_handler, +- ioc); ++ tioc); + +- trace_qio_channel_tls_new_server(ioc, master, creds, aclname); +- return ioc; ++ trace_qio_channel_tls_new_server(tioc, master, creds, aclname); ++ return tioc; + + error: +- object_unref(OBJECT(ioc)); ++ object_unref(OBJECT(tioc)); + return NULL; + } + +@@ -116,6 +119,7 @@ qio_channel_tls_new_client(QIOChannel *master, + ioc = QIO_CHANNEL(tioc); + + tioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } +diff --git a/io/channel-websock.c b/io/channel-websock.c +index a12acc27cf..de39f0d182 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -883,6 +883,7 @@ qio_channel_websock_new_server(QIOChannel *master) + ioc = QIO_CHANNEL(wioc); + + wioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch b/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch new file mode 100644 index 0000000..c6948ae --- /dev/null +++ b/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch @@ -0,0 +1,36 @@ +From 97334815c4a7cf0911b30c1366bbe67e883c57dd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 5 Jun 2024 10:28:20 +0400 +Subject: [PATCH 4/4] rhel 9.4.0 machine type compat for virtio-gpu migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 246: virtio-gpu: fix v2 migration +RH-Jira: RHEL-34621 +RH-Acked-by: Peter Xu +RH-Acked-by: Thomas Huth +RH-Commit: [2/2] 2a895d510453e83bb45dfb39b7751bcc2f309cc5 (marcandre.lureau-rh/qemu-kvm-centos) + +Signed-off-by: Marc-André Lureau +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index cf1d7faaaf..92609aae27 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -310,6 +310,8 @@ GlobalProperty hw_compat_rhel_9_5[] = { + { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, + /* hw_compat_rhel_9_5 from hw_compat_8_2 */ + { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" }, ++ /* hw_compat_rhel_9_5 from hw_compat_8_2 */ ++ { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, + }; + const size_t hw_compat_rhel_9_5_len = G_N_ELEMENTS(hw_compat_rhel_9_5); + +-- +2.39.3 + diff --git a/SOURCES/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch b/SOURCES/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch new file mode 100644 index 0000000..4f757bf --- /dev/null +++ b/SOURCES/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch @@ -0,0 +1,67 @@ +From 4c93bec108f7e3918a2ef91b51cec477ade38cc3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 17:45:56 -0400 +Subject: [PATCH 018/100] runstate: skip initial CPU reset if reset is not + actually possible +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [18/91] ced267fdaadbf2072c1897223522457a006e6c81 (bonzini/rhel-qemu-kvm) + +Right now, the system reset is concluded by a call to +cpu_synchronize_all_post_reset() in order to sync any changes +that the machine reset callback applied to the CPU state. + +However, for VMs with encrypted state such as SEV-ES guests (currently +the only case of guests with non-resettable CPUs) this cannot be done, +because guest state has already been finalized by machine-init-done notifiers. +cpu_synchronize_all_post_reset() does nothing on these guests, and actually +we would like to make it fail if called once guest has been encrypted. +So, assume that boards that support non-resettable CPUs do not touch +CPU state and that all such setup is done before, at the time of +cpu_synchronize_all_post_init(). + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 08b2d15cdd0d3fbbe37ce23bf192b770db3a7539) +Signed-off-by: Paolo Bonzini +--- + system/runstate.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/system/runstate.c b/system/runstate.c +index d6ab860eca..cb4905a40f 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -501,7 +501,20 @@ void qemu_system_reset(ShutdownCause reason) + default: + qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); + } +- cpu_synchronize_all_post_reset(); ++ ++ /* ++ * Some boards use the machine reset callback to point CPUs to the firmware ++ * entry point. Assume that this is not the case for boards that support ++ * non-resettable CPUs (currently used only for confidential guests), in ++ * which case cpu_synchronize_all_post_init() is enough because ++ * it does _more_ than cpu_synchronize_all_post_reset(). ++ */ ++ if (cpus_are_resettable()) { ++ cpu_synchronize_all_post_reset(); ++ } else { ++ assert(runstate_check(RUN_STATE_PRELAUNCH)); ++ } ++ + vm_set_suspended(false); + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch b/SOURCES/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch new file mode 100644 index 0000000..bc6e4e3 --- /dev/null +++ b/SOURCES/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch @@ -0,0 +1,109 @@ +From 4ebc58d4a7a3d4a20f20f1cd3f21082b80097fe2 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:38 -0500 +Subject: [PATCH 014/100] s390: Switch to use confidential_guest_kvm_init() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [14/91] 8c9e09ec9976c00b41c02868fff034286a341468 (bonzini/rhel-qemu-kvm) + +Use unified confidential_guest_kvm_init() for consistency with +other architectures. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a14a2b0148e657cc526b7a75f2a1937628764e7a) +Signed-off-by: Paolo Bonzini +--- + hw/s390x/s390-virtio-ccw.c | 5 ++++- + target/s390x/kvm/pv.c | 10 +++++++++- + target/s390x/kvm/pv.h | 14 -------------- + 3 files changed, 13 insertions(+), 16 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 9ad54682c6..828ce6e87e 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "exec/ram_addr.h" ++#include "exec/confidential-guest-support.h" + #include "hw/s390x/s390-virtio-hcall.h" + #include "hw/s390x/sclp.h" + #include "hw/s390x/s390_flic.h" +@@ -260,7 +261,9 @@ static void ccw_init(MachineState *machine) + s390_init_cpus(machine); + + /* Need CPU model to be determined before we can set up PV */ +- s390_pv_init(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_init(machine->cgs, &error_fatal); ++ } + + s390_flic_init(); + +diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c +index 7ca7faec73..dde836d21a 100644 +--- a/target/s390x/kvm/pv.c ++++ b/target/s390x/kvm/pv.c +@@ -334,12 +334,17 @@ static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) + return s390_pv_check_cpus(errp); + } + +-int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { + return 0; + } + ++ if (!kvm_enabled()) { ++ error_setg(errp, "Protected Virtualization requires KVM"); ++ return -1; ++ } ++ + if (!s390_has_feat(S390_FEAT_UNPACK)) { + error_setg(errp, + "CPU model does not support Protected Virtualization"); +@@ -364,6 +369,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest, + + static void s390_pv_guest_class_init(ObjectClass *oc, void *data) + { ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = s390_pv_kvm_init; + } + + static void s390_pv_guest_init(Object *obj) +diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h +index 5877d28ff1..4b40817439 100644 +--- a/target/s390x/kvm/pv.h ++++ b/target/s390x/kvm/pv.h +@@ -80,18 +80,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, + static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } + #endif /* CONFIG_KVM */ + +-int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp) +-{ +- if (!cgs) { +- return 0; +- } +- if (kvm_enabled()) { +- return s390_pv_kvm_init(cgs, errp); +- } +- +- error_setg(errp, "Protected Virtualization requires KVM"); +- return -1; +-} +- + #endif /* HW_S390_PV_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch b/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch deleted file mode 100644 index 1b4a4ab..0000000 --- a/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 64b0180f5a52668f8ac4c444ba369231dbc4d5b9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 22 Jan 2024 09:25:53 +0100 -Subject: [PATCH 096/101] s390x/pci: avoid double enable/disable of aif -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices -RH-Jira: RHEL-21169 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Commit: [1/3] ebdf8a474ea21486f5ec051683f17bae6d20f675 (clegoate/qemu-kvm-c9s) - -JIRA: https://issues.redhat.com/browse/RHEL-21169 - -commit 07b2c8e034d80ff92e202405c494d2ff80fcf848 -Author: Matthew Rosato -Date: Thu Jan 18 13:51:49 2024 -0500 - - s390x/pci: avoid double enable/disable of aif - - Use a flag to keep track of whether AIF is currently enabled. This can be - used to avoid enabling/disabling AIF multiple times as well as to determine - whether or not it should be disabled during reset processing. - - Fixes: d0bc7091c2 ("s390x/pci: enable adapter event notification for interpreted devices") - Reported-by: Cédric Le Goater - Reviewed-by: Eric Farman - Signed-off-by: Matthew Rosato - Message-ID: <20240118185151.265329-2-mjrosato@linux.ibm.com> - Reviewed-by: Cédric Le Goater - Signed-off-by: Thomas Huth - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-kvm.c | 25 +++++++++++++++++++++++-- - include/hw/s390x/s390-pci-bus.h | 1 + - 2 files changed, 24 insertions(+), 2 deletions(-) - -diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c -index ff41e4106d..1ee510436c 100644 ---- a/hw/s390x/s390-pci-kvm.c -+++ b/hw/s390x/s390-pci-kvm.c -@@ -27,6 +27,7 @@ bool s390_pci_kvm_interp_allowed(void) - - int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) - { -+ int rc; - struct kvm_s390_zpci_op args = { - .fh = pbdev->fh, - .op = KVM_S390_ZPCIOP_REG_AEN, -@@ -38,15 +39,35 @@ int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) - .u.reg_aen.flags = (assist) ? 0 : KVM_S390_ZPCIOP_REGAEN_HOST - }; - -- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); -+ if (pbdev->aif) { -+ return -EINVAL; -+ } -+ -+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); -+ if (rc == 0) { -+ pbdev->aif = true; -+ } -+ -+ return rc; - } - - int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) - { -+ int rc; -+ - struct kvm_s390_zpci_op args = { - .fh = pbdev->fh, - .op = KVM_S390_ZPCIOP_DEREG_AEN - }; - -- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); -+ if (!pbdev->aif) { -+ return -EINVAL; -+ } -+ -+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); -+ if (rc == 0) { -+ pbdev->aif = false; -+ } -+ -+ return rc; - } -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index b1bdbeaeb5..435e788867 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -361,6 +361,7 @@ struct S390PCIBusDevice { - bool unplug_requested; - bool interp; - bool forwarding_assist; -+ bool aif; - QTAILQ_ENTRY(S390PCIBusDevice) link; - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch b/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch deleted file mode 100644 index f3a4129..0000000 --- a/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch +++ /dev/null @@ -1,137 +0,0 @@ -From c885b17e09ab19a3e8d3b2e1765963811af6f764 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 22 Jan 2024 09:25:53 +0100 -Subject: [PATCH 098/101] s390x/pci: drive ISM reset from subsystem reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices -RH-Jira: RHEL-21169 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Commit: [3/3] 426cf156a2c67e6dcd7483a769fa3741e2700504 (clegoate/qemu-kvm-c9s) - -JIRA: https://issues.redhat.com/browse/RHEL-21169 - -commit 68c691ca99a2538d6a53a70ce8a9ce06ee307ff1 -Author: Matthew Rosato -Date: Thu Jan 18 13:51:51 2024 -0500 - - s390x/pci: drive ISM reset from subsystem reset - - ISM devices are sensitive to manipulation of the IOMMU, so the ISM device - needs to be reset before the vfio-pci device is reset (triggering a full - UNMAP). In order to ensure this occurs, trigger ISM device resets from - subsystem_reset before triggering the PCI bus reset (which will also - trigger vfio-pci reset). This only needs to be done for ISM devices - which were enabled for use by the guest. - Further, ensure that AIF is disabled as part of the reset event. - - Fixes: ef1535901a ("s390x: do a subsystem reset before the unprotect on reboot") - Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset") - Reported-by: Cédric Le Goater - Signed-off-by: Matthew Rosato - Message-ID: <20240118185151.265329-4-mjrosato@linux.ibm.com> - Reviewed-by: Eric Farman - Reviewed-by: Cédric Le Goater - Signed-off-by: Thomas Huth - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-bus.c | 26 +++++++++++++++++--------- - hw/s390x/s390-virtio-ccw.c | 8 ++++++++ - include/hw/s390x/s390-pci-bus.h | 1 + - 3 files changed, 26 insertions(+), 9 deletions(-) - -diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c -index 347580ebac..3e57d5faca 100644 ---- a/hw/s390x/s390-pci-bus.c -+++ b/hw/s390x/s390-pci-bus.c -@@ -151,20 +151,12 @@ static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) - pci_device_reset(pbdev->pdev); - } - --static void s390_pci_reset_cb(void *opaque) --{ -- S390PCIBusDevice *pbdev = opaque; -- -- pci_device_reset(pbdev->pdev); --} -- - static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) - { - HotplugHandler *hotplug_ctrl; - - if (pbdev->pft == ZPCI_PFT_ISM) { - notifier_remove(&pbdev->shutdown_notifier); -- qemu_unregister_reset(s390_pci_reset_cb, pbdev); - } - - /* Unplug the PCI device */ -@@ -1132,7 +1124,6 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - if (pbdev->pft == ZPCI_PFT_ISM) { - pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; - qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); -- qemu_register_reset(s390_pci_reset_cb, pbdev); - } - } else { - pbdev->fh |= FH_SHM_EMUL; -@@ -1279,6 +1270,23 @@ static void s390_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, - pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1); - } - -+void s390_pci_ism_reset(void) -+{ -+ S390pciState *s = s390_get_phb(); -+ -+ S390PCIBusDevice *pbdev, *next; -+ -+ /* Trigger reset event for each passthrough ISM device currently in-use */ -+ QTAILQ_FOREACH_SAFE(pbdev, &s->zpci_devs, link, next) { -+ if (pbdev->interp && pbdev->pft == ZPCI_PFT_ISM && -+ pbdev->fh & FH_MASK_ENABLE) { -+ s390_pci_kvm_aif_disable(pbdev); -+ -+ pci_device_reset(pbdev->pdev); -+ } -+ } -+} -+ - static void s390_pcihost_reset(DeviceState *dev) - { - S390pciState *s = S390_PCI_HOST_BRIDGE(dev); -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index e26ce26f5a..24f4773179 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -118,6 +118,14 @@ static void subsystem_reset(void) - DeviceState *dev; - int i; - -+ /* -+ * ISM firmware is sensitive to unexpected changes to the IOMMU, which can -+ * occur during reset of the vfio-pci device (unmap of entire aperture). -+ * Ensure any passthrough ISM devices are reset now, while CPUs are paused -+ * but before vfio-pci cleanup occurs. -+ */ -+ s390_pci_ism_reset(); -+ - for (i = 0; i < ARRAY_SIZE(reset_dev_types); i++) { - dev = DEVICE(object_resolve_path_type("", reset_dev_types[i], NULL)); - if (dev) { -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index 435e788867..2c43ea123f 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -401,5 +401,6 @@ S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s, - const char *target); - S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s, - S390PCIBusDevice *pbdev); -+void s390_pci_ism_reset(void); - - #endif --- -2.39.3 - diff --git a/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch b/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch deleted file mode 100644 index 845a467..0000000 --- a/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 49078bdfd4c116da3e920632ec6f7041f1b38015 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 22 Jan 2024 09:25:53 +0100 -Subject: [PATCH 097/101] s390x/pci: refresh fh before disabling aif -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices -RH-Jira: RHEL-21169 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Commit: [2/3] 3523067909c41818dfc769abdb93930833416c11 (clegoate/qemu-kvm-c9s) - -JIRA: https://issues.redhat.com/browse/RHEL-21169 - -commit 30e35258e25c75c9d799c34fd89afcafffb37084 -Author: Matthew Rosato -Date: Thu Jan 18 13:51:50 2024 -0500 - - s390x/pci: refresh fh before disabling aif - - Typically we refresh the host fh during CLP enable, however it's possible - that the device goes through multiple reset events before the guest - performs another CLP enable. Let's handle this for now by refreshing the - host handle from vfio before disabling aif. - - Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset") - Reported-by: Cédric Le Goater - Reviewed-by: Eric Farman - Signed-off-by: Matthew Rosato - Message-ID: <20240118185151.265329-3-mjrosato@linux.ibm.com> - Reviewed-by: Cédric Le Goater - Signed-off-by: Thomas Huth - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-kvm.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c -index 1ee510436c..9eef4fc3ec 100644 ---- a/hw/s390x/s390-pci-kvm.c -+++ b/hw/s390x/s390-pci-kvm.c -@@ -18,6 +18,7 @@ - #include "hw/s390x/s390-pci-bus.h" - #include "hw/s390x/s390-pci-kvm.h" - #include "hw/s390x/s390-pci-inst.h" -+#include "hw/s390x/s390-pci-vfio.h" - #include "cpu_models.h" - - bool s390_pci_kvm_interp_allowed(void) -@@ -64,6 +65,14 @@ int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) - return -EINVAL; - } - -+ /* -+ * The device may have already been reset but we still want to relinquish -+ * the guest ISC, so always be sure to use an up-to-date host fh. -+ */ -+ if (!s390_pci_get_host_fh(pbdev, &args.fh)) { -+ return -EPERM; -+ } -+ - rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); - if (rc == 0) { - pbdev->aif = false; --- -2.39.3 - diff --git a/SOURCES/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch b/SOURCES/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch new file mode 100644 index 0000000..2c0fb91 --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch @@ -0,0 +1,186 @@ +From 3d197f42afea6d0b176c2b26b772965692ffeab3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Alex=20Benn=C3=A9e?= +Date: Tue, 14 May 2024 18:42:44 +0100 +Subject: [PATCH 047/100] scripts/update-linux-header.sh: be more src tree + friendly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [47/91] c4165cc8bf79c3f96912e8210b3bb3565add288f (bonzini/rhel-qemu-kvm) + +Running "install_headers" in the Linux source tree is fairly +unfriendly as out-of-tree builds will start complaining about the +kernel source being non-pristine. As we have a temporary directory for +the install we should also do the build step here. So now we have: + + $tmpdir/ + $blddir/ + $hdrdir/ + +Reviewed-by: Pierrick Bouvier +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Alex Bennée +Message-Id: <20240514174253.694591-3-alex.bennee@linaro.org> +(cherry picked from commit b51ddd937f11f76614d4b36d14d8778df242661c) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 80 +++++++++++++++++---------------- + 1 file changed, 41 insertions(+), 39 deletions(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 5f20434d5c..4431ba4d54 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -27,6 +27,8 @@ + # types like "__u64". This work is done in the cp_portable function. + + tmpdir=$(mktemp -d) ++hdrdir="$tmpdir/headers" ++blddir="$tmpdir/build" + linux="$1" + output="$2" + +@@ -111,56 +113,56 @@ for arch in $ARCHLIST; do + arch_var=ARCH + fi + +- make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install ++ make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install + + rm -rf "$output/linux-headers/asm-$arch" + mkdir -p "$output/linux-headers/asm-$arch" + for header in kvm.h unistd.h bitsperlong.h mman.h; do +- cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" + done + + if [ $arch = mips ]; then +- cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" + fi + if [ $arch = powerpc ]; then +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" + fi + + rm -rf "$output/include/standard-headers/asm-$arch" + mkdir -p "$output/include/standard-headers/asm-$arch" + if [ $arch = s390 ]; then +- cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" ++ cp_portable "$hdrdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" + fi + if [ $arch = arm ]; then +- cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" +- cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" +- cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" + fi + if [ $arch = arm64 ]; then +- cp "$tmpdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" ++ cp "$hdrdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" + fi + if [ $arch = x86 ]; then +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" +- cp "$tmpdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" +- cp_portable "$tmpdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" ++ cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" ++ cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" + # Remove everything except the macros from bootparam.h avoiding the + # unnecessary import of several video/ist/etc headers + sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ +- "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" +- cp_portable "$tmpdir/bootparam.h" \ ++ "$hdrdir/include/asm/bootparam.h" > "$hdrdir/bootparam.h" ++ cp_portable "$hdrdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" +- cp_portable "$tmpdir/include/asm/setup_data.h" \ ++ cp_portable "$hdrdir/include/asm/setup_data.h" \ + "$output/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then +- cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" ++ cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + fi + done + arch= +@@ -170,13 +172,13 @@ mkdir -p "$output/linux-headers/linux" + for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ + psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ + vduse.h iommufd.h bits.h; do +- cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" ++ cp "$hdrdir/include/linux/$header" "$output/linux-headers/linux" + done + + rm -rf "$output/linux-headers/asm-generic" + mkdir -p "$output/linux-headers/asm-generic" + for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do +- cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" ++ cp "$hdrdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" + done + + if [ -L "$linux/source" ]; then +@@ -211,23 +213,23 @@ EOF + + rm -rf "$output/include/standard-headers/linux" + mkdir -p "$output/include/standard-headers/linux" +-for i in "$tmpdir"/include/linux/*virtio*.h \ +- "$tmpdir/include/linux/qemu_fw_cfg.h" \ +- "$tmpdir/include/linux/fuse.h" \ +- "$tmpdir/include/linux/input.h" \ +- "$tmpdir/include/linux/input-event-codes.h" \ +- "$tmpdir/include/linux/udmabuf.h" \ +- "$tmpdir/include/linux/pci_regs.h" \ +- "$tmpdir/include/linux/ethtool.h" \ +- "$tmpdir/include/linux/const.h" \ +- "$tmpdir/include/linux/kernel.h" \ +- "$tmpdir/include/linux/vhost_types.h" \ +- "$tmpdir/include/linux/sysinfo.h" \ +- "$tmpdir/include/misc/pvpanic.h"; do ++for i in "$hdrdir"/include/linux/*virtio*.h \ ++ "$hdrdir/include/linux/qemu_fw_cfg.h" \ ++ "$hdrdir/include/linux/fuse.h" \ ++ "$hdrdir/include/linux/input.h" \ ++ "$hdrdir/include/linux/input-event-codes.h" \ ++ "$hdrdir/include/linux/udmabuf.h" \ ++ "$hdrdir/include/linux/pci_regs.h" \ ++ "$hdrdir/include/linux/ethtool.h" \ ++ "$hdrdir/include/linux/const.h" \ ++ "$hdrdir/include/linux/kernel.h" \ ++ "$hdrdir/include/linux/vhost_types.h" \ ++ "$hdrdir/include/linux/sysinfo.h" \ ++ "$hdrdir/include/misc/pvpanic.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" + done + mkdir -p "$output/include/standard-headers/drm" +-cp_portable "$tmpdir/include/drm/drm_fourcc.h" \ ++cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ + "$output/include/standard-headers/drm" + + rm -rf "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma" +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch b/SOURCES/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch new file mode 100644 index 0000000..eea324e --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch @@ -0,0 +1,38 @@ +From 5db9faee4d6efc9dbe010d2b745aba59d943d2ac Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Wed, 21 Feb 2024 10:51:38 -0600 +Subject: [PATCH 016/100] scripts/update-linux-headers: Add bits.h to file + imports + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [16/91] 150ee6376982bd5f471cb561f6760bf80d1211db (bonzini/rhel-qemu-kvm) + +Signed-off-by: Michael Roth +Signed-off-by: Paolo Bonzini +(cherry picked from commit b40b8eb609d3549ac14aab43849b20f5cba951c9) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index d48856f9e2..5f20434d5c 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -169,7 +169,7 @@ rm -rf "$output/linux-headers/linux" + mkdir -p "$output/linux-headers/linux" + for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ + psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ +- vduse.h iommufd.h; do ++ vduse.h iommufd.h bits.h; do + cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" + done + +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch b/SOURCES/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch new file mode 100644 index 0000000..86c1f9c --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch @@ -0,0 +1,83 @@ +From 2d0989fe09703ef46ba9c5d14770dbf8a6fd2f80 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Sun, 18 Feb 2024 23:35:02 -0600 +Subject: [PATCH 015/100] scripts/update-linux-headers: Add setup_data.h to + import list + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [15/91] 9d46c8787259317710a84e7a6aa36731e9f55a17 (bonzini/rhel-qemu-kvm) + +Data structures like struct setup_data have been moved to a separate +setup_data.h header which bootparam.h relies on. Add setup_data.h to +the cp_portable() list and sync it along with the other header files. + +Note that currently struct setup_data is stripped away as part of +generating bootparam.h, but that handling is no currently needed for +setup_data.h since it doesn't pull in many external +headers/dependencies. However, QEMU currently redefines struct +setup_data in hw/i386/x86.c, so that will need to be removed as part of +any header update that pulls in the new setup_data.h to avoid build +bisect breakage. + +Because is the first architecture specific #include +in include/standard-headers/, add a new sed substitution to rewrite +asm/ include to the standard-headers/asm-* subdirectory for the current +architecture. + +And while at it, remove asm-generic/kvm_para.h from the list of +allowed includes: it does not have a matching substitution, and therefore +it would not be possible to use it on non-Linux systems where there is +no /usr/include/asm-generic/ directory. + +Signed-off-by: Michael Roth +Signed-off-by: Paolo Bonzini +(cherry picked from commit 66210a1a30f2384bb59f9dad8d769dba56dd30f1) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index a0006eec6f..d48856f9e2 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -61,7 +61,7 @@ cp_portable() { + -e 'linux/const' \ + -e 'linux/kernel' \ + -e 'linux/sysinfo' \ +- -e 'asm-generic/kvm_para' \ ++ -e 'asm/setup_data.h' \ + > /dev/null + then + echo "Unexpected #include in input file $f". +@@ -77,6 +77,7 @@ cp_portable() { + -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ + -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ ++ -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ + -e 's/__bitwise//' \ + -e 's/__attribute__((packed))/QEMU_PACKED/' \ + -e 's/__inline__/inline/' \ +@@ -155,11 +156,14 @@ for arch in $ARCHLIST; do + "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" + cp_portable "$tmpdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" ++ cp_portable "$tmpdir/include/asm/setup_data.h" \ ++ "$output/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then + cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + fi + done ++arch= + + rm -rf "$output/linux-headers/linux" + mkdir -p "$output/linux-headers/linux" +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch b/SOURCES/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch new file mode 100644 index 0000000..ecd631a --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch @@ -0,0 +1,47 @@ +From 09acdbc49a4dd85d82ad30ec2859edfcdba8431e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 May 2024 08:01:26 +0200 +Subject: [PATCH 049/100] scripts/update-linux-headers.sh: Fix the path of + setup_data.h + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [49/91] f3008bc07796687c9440f5720fbc72a12d0a1278 (bonzini/rhel-qemu-kvm) + +When running the update-linx-headers.sh script, it currently fails with: + +scripts/update-linux-headers.sh: line 73: .../qemu/standard-headers/asm-x86/setup_data.h: No such file or directory + +The "include" folder is obviously missing here - no clue how this could +have worked before? + +Fixes: 66210a1a30 ("scripts/update-linux-headers: Add setup_data.h to import list") +Message-ID: <20240527060126.12578-1-thuth@redhat.com> +Reviewed-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit bde26d90ae9f7551cac90e117fc7216c807a3bfe) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 0f404d5317..f084bee72e 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -160,7 +160,7 @@ for arch in $ARCHLIST; do + cp_portable "$hdrdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" + cp_portable "$hdrdir/include/asm/setup_data.h" \ +- "$output/standard-headers/asm-x86" ++ "$output/include/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then + cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" +-- +2.39.3 + diff --git a/SOURCES/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch b/SOURCES/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch new file mode 100644 index 0000000..c7dbd3a --- /dev/null +++ b/SOURCES/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch @@ -0,0 +1,44 @@ +From 8e63d742015bf69a00fd44e88eb1198f594b2de2 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 May 2024 08:02:43 +0200 +Subject: [PATCH 048/100] scripts/update-linux-headers.sh: Remove temporary + directory inbetween + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [48/91] 879554dc7e722c4e20b302a00ca745ddeefdc0fb (bonzini/rhel-qemu-kvm) + +We are reusing the same temporary directory for installing the headers +of all targets, so there could be stale files here when switching from +one target to another. Make sure to delete the folder before installing +a new set of target headers into it. + +Message-ID: <20240527060243.12647-1-thuth@redhat.com> +Reviewed-by: Michael S. Tsirkin +Acked-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit 3efc75ad9d9317e5709861bbebb2c29390f8e7a2) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 4431ba4d54..0f404d5317 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -113,6 +113,7 @@ for arch in $ARCHLIST; do + arch_var=ARCH + fi + ++ rm -rf "$hdrdir" + make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install + + rm -rf "$output/linux-headers/asm-$arch" +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-Await-request-purging.patch b/SOURCES/kvm-scsi-Await-request-purging.patch deleted file mode 100644 index 9bd4399..0000000 --- a/SOURCES/kvm-scsi-Await-request-purging.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 94d6458a58239b52394d58b6880509041186d5a8 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 2 Feb 2024 15:47:55 +0100 -Subject: [PATCH 04/22] scsi: Await request purging - -RH-Author: Hanna Czenczek -RH-MergeRequest: 222: Allow concurrent BlockBackend context changes -RH-Jira: RHEL-24593 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Commit: [2/2] 35a89273cab0af8f999881e67d359fe1328363a0 (hreitz/qemu-kvm-c-9-s) - -scsi_device_for_each_req_async() currently does not provide any way to -be awaited. One of its callers is scsi_device_purge_requests(), which -therefore currently does not guarantee that all requests are fully -settled when it returns. - -We want all requests to be settled, because scsi_device_purge_requests() -is called through the unrealize path, including the one invoked by -virtio_scsi_hotunplug() through qdev_simple_device_unplug_cb(), which -most likely assumes that all SCSI requests are done then. - -In fact, scsi_device_purge_requests() already contains a blk_drain(), -but this will not fully await scsi_device_for_each_req_async(), only the -I/O requests it potentially cancels (not the non-I/O requests). -However, we can have scsi_device_for_each_req_async() increment the BB -in-flight counter, and have scsi_device_for_each_req_async_bh() -decrement it when it is done. This way, the blk_drain() will fully -await all SCSI requests to be purged. - -This also removes the need for scsi_device_for_each_req_async_bh() to -double-check the current context and potentially re-schedule itself, -should it now differ from the BB's context: Changing a BB's AioContext -with a root node is done through bdrv_try_change_aio_context(), which -creates a drained section. With this patch, we keep the BB in-flight -counter elevated throughout, so we know the BB's context cannot change. - -Signed-off-by: Hanna Czenczek -Message-ID: <20240202144755.671354-3-hreitz@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 1604c0493193273e4eac547f86fbd2845e7f9af4) ---- - hw/scsi/scsi-bus.c | 30 +++++++++++++++++++++--------- - 1 file changed, 21 insertions(+), 9 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index 5b08cbf60a..b1bf8e6433 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -120,17 +120,13 @@ static void scsi_device_for_each_req_async_bh(void *opaque) - SCSIRequest *next; - - /* -- * If the AioContext changed before this BH was called then reschedule into -- * the new AioContext before accessing ->requests. This can happen when -- * scsi_device_for_each_req_async() is called and then the AioContext is -- * changed before BHs are run. -+ * The BB cannot have changed contexts between this BH being scheduled and -+ * now: BBs' AioContexts, when they have a node attached, can only be -+ * changed via bdrv_try_change_aio_context(), in a drained section. While -+ * we have the in-flight counter incremented, that drain must block. - */ - ctx = blk_get_aio_context(s->conf.blk); -- if (ctx != qemu_get_current_aio_context()) { -- aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, -- g_steal_pointer(&data)); -- return; -- } -+ assert(ctx == qemu_get_current_aio_context()); - - QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { - data->fn(req, data->fn_opaque); -@@ -138,11 +134,16 @@ static void scsi_device_for_each_req_async_bh(void *opaque) - - /* Drop the reference taken by scsi_device_for_each_req_async() */ - object_unref(OBJECT(s)); -+ -+ /* Paired with blk_inc_in_flight() in scsi_device_for_each_req_async() */ -+ blk_dec_in_flight(s->conf.blk); - } - - /* - * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() - * runs in the AioContext that is executing the request. -+ * Keeps the BlockBackend's in-flight counter incremented until everything is -+ * done, so draining it will settle all scheduled @fn() calls. - */ - static void scsi_device_for_each_req_async(SCSIDevice *s, - void (*fn)(SCSIRequest *, void *), -@@ -163,6 +164,8 @@ static void scsi_device_for_each_req_async(SCSIDevice *s, - */ - object_ref(OBJECT(s)); - -+ /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */ -+ blk_inc_in_flight(s->conf.blk); - aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), - scsi_device_for_each_req_async_bh, - data); -@@ -1728,11 +1731,20 @@ static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) - scsi_req_cancel_async(req, NULL); - } - -+/** -+ * Cancel all requests, and block until they are deleted. -+ */ - void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) - { - scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); - -+ /* -+ * Await all the scsi_device_purge_one_req() calls scheduled by -+ * scsi_device_for_each_req_async(), and all I/O requests that were -+ * cancelled this way, but may still take a bit of time to settle. -+ */ - blk_drain(sdev->conf.blk); -+ - scsi_device_set_ua(sdev, sense); - } - --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch b/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch deleted file mode 100644 index 6d43810..0000000 --- a/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch +++ /dev/null @@ -1,88 +0,0 @@ -From cd08d22a0da022d99fe6cfddb7de680abf66c8be Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:19:59 -0500 -Subject: [PATCH 082/101] scsi: assert that callbacks run in the correct - AioContext - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [13/26] d2fd5065c3b72d9d2f4e37efee39fe12eba0f0a9 (kmwolf/centos-qemu-kvm) - -Since the removal of AioContext locking, the correctness of the code -relies on running requests from a single AioContext at any given time. - -Add assertions that verify that callbacks are invoked in the correct -AioContext. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231205182011.1976568-3-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-disk.c | 14 ++++++++++++++ - system/dma-helpers.c | 3 +++ - 2 files changed, 17 insertions(+) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 2c1bbb3530..a5048e0aaf 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -273,6 +273,10 @@ static void scsi_aio_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ /* The request must only run in the BlockBackend's AioContext */ -+ assert(blk_get_aio_context(s->qdev.conf.blk) == -+ qemu_get_current_aio_context()); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -370,8 +374,13 @@ static void scsi_dma_complete(void *opaque, int ret) - - static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) - { -+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - uint32_t n; - -+ /* The request must only run in the BlockBackend's AioContext */ -+ assert(blk_get_aio_context(s->qdev.conf.blk) == -+ qemu_get_current_aio_context()); -+ - assert(r->req.aiocb == NULL); - if (scsi_disk_req_check_error(r, ret, false)) { - goto done; -@@ -496,8 +505,13 @@ static void scsi_read_data(SCSIRequest *req) - - static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) - { -+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - uint32_t n; - -+ /* The request must only run in the BlockBackend's AioContext */ -+ assert(blk_get_aio_context(s->qdev.conf.blk) == -+ qemu_get_current_aio_context()); -+ - assert (r->req.aiocb == NULL); - if (scsi_disk_req_check_error(r, ret, false)) { - goto done; -diff --git a/system/dma-helpers.c b/system/dma-helpers.c -index 528117f256..9b221cf94e 100644 ---- a/system/dma-helpers.c -+++ b/system/dma-helpers.c -@@ -119,6 +119,9 @@ static void dma_blk_cb(void *opaque, int ret) - - trace_dma_blk_cb(dbs, ret); - -+ /* DMAAIOCB is not thread-safe and must be accessed only from dbs->ctx */ -+ assert(ctx == qemu_get_current_aio_context()); -+ - dbs->acb = NULL; - dbs->offset += dbs->iov.size; - --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch b/SOURCES/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch new file mode 100644 index 0000000..a7eebbc --- /dev/null +++ b/SOURCES/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch @@ -0,0 +1,60 @@ +From d580b83d9eda7802ffa3890ea8641793fe78937c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:05 +0200 +Subject: [PATCH 094/100] scsi-block: Don't skip callback for sgio error + status/driver_status + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 1fee1b21dae314f4f34c88f2d2fabd7af011404a (kmwolf/centos-qemu-kvm) + +Instead of calling into scsi_handle_rw_error() directly from +scsi_block_sgio_complete() and skipping the normal callback, go through +the normal cleanup path by calling the callback with a positive error +value. + +The important difference here is not only that the code path is cleaner, +but that the callbacks set r->req.aiocb = NULL. If we skip setting this +and the error action is BLOCK_ERROR_ACTION_STOP, resuming the VM runs +into an assertion failure in scsi_read_data() or scsi_write_data() +because the dangling aiocb pointer is unexpected. + +Fixes: a108557bbf ("scsi: inline sg_io_sense_from_errno() into the callers.") +Buglink: https://issues.redhat.com/browse/RHEL-50000 +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 622a70161ac258e4a166a7dca4b5be267e0652d9) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index bed2c8746c..e7f57f3230 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2804,16 +2804,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + } else { + ret = io_hdr->status; + } +- +- if (ret > 0) { +- if (scsi_handle_rw_error(r, ret, true)) { +- scsi_req_unref(&r->req); +- return; +- } +- +- /* Ignore error. */ +- ret = 0; +- } + } + + req->cb(req->cb_opaque, ret); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch b/SOURCES/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch new file mode 100644 index 0000000..20aa88a --- /dev/null +++ b/SOURCES/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch @@ -0,0 +1,79 @@ +From eebe5fe8cbc854a6365e7c1adbb701079b137bcb Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:06 +0200 +Subject: [PATCH 095/100] scsi-disk: Add warning comments that host_status + errors take a shortcut + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 6fcd603fc78fda65a425a1acd9a8710d81c6ed7f (kmwolf/centos-qemu-kvm) + +scsi_block_sgio_complete() has surprising behaviour in that there are +error cases in which it directly completes the request and never calls +the passed callback. In the current state of the code, this doesn't seem +to result in bugs, but with future code changes, we must be careful to +never rely on the callback doing some cleanup until this code smell is +fixed. For now, just add warnings to make people aware of the trap. + +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 8a0495624f23f8f01dfb1484f367174f3b3572e8) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index e7f57f3230..b4062ac2ff 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -65,6 +65,9 @@ struct SCSIDiskClass { + /* + * Callbacks receive ret == 0 for success. Errors are represented either as + * negative errno values, or as positive SAM status codes. ++ * ++ * Beware: For errors returned in host_status, the function may directly ++ * complete the request and never call the callback. + */ + DMAIOFunc *dma_readv; + DMAIOFunc *dma_writev; +@@ -359,6 +362,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_dma_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -399,6 +403,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_read_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -538,6 +543,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_write_complete(void * opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -2793,6 +2799,7 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + sg_io_hdr_t *io_hdr = &req->io_header; + + if (ret == 0) { ++ /* FIXME This skips calling req->cb() and any cleanup in it */ + if (io_hdr->host_status != SCSI_HOST_OK) { + scsi_req_complete_failed(&r->req, io_hdr->host_status); + scsi_req_unref(&r->req); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch b/SOURCES/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch new file mode 100644 index 0000000..0e2aeaf --- /dev/null +++ b/SOURCES/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch @@ -0,0 +1,106 @@ +From bd5cace452183053e356a27317c759ecfe0391aa Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:07 +0200 +Subject: [PATCH 096/100] scsi-disk: Always report RESERVATION_CONFLICT to + guest + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] eb4142071e5cbe385a949a6c48b0c8f8c6086918 (kmwolf/centos-qemu-kvm) + +In the case of scsi-block, RESERVATION_CONFLICT is not a backend error, +but indicates that the guest tried to make a request that it isn't +allowed to execute. Pass the error to the guest so that it can decide +what to do with it. + +Without this, if we stop the VM in response to a RESERVATION_CONFLICT +(as is the default policy in management software such as oVirt or +KubeVirt), it can happen that the VM cannot be resumed any more because +every attempt to resume it immediately runs into the same error and +stops the VM again. + +One case that expects RESERVATION_CONFLICT errors to be visible in the +guest is running the validation tests in Windows 2019's Failover Cluster +Manager, which intentionally tries to execute invalid requests to see if +they are properly rejected. + +Buglink: https://issues.redhat.com/browse/RHEL-50000 +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-5-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 9da6bd39f92434f55573acd017841b195c60188f) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 35 ++++++++++++++++++++++++++++++----- + 1 file changed, 30 insertions(+), 5 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index b4062ac2ff..91ccf37fef 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -202,7 +202,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); + SCSISense sense = SENSE_CODE(NO_SENSE); +- int error = 0; ++ int error; + bool req_has_sense = false; + BlockErrorAction action; + int status; +@@ -213,11 +213,35 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + } else { + /* A passthrough command has completed with nonzero status. */ + status = ret; +- if (status == CHECK_CONDITION) { ++ switch (status) { ++ case CHECK_CONDITION: + req_has_sense = true; + error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); +- } else { ++ break; ++ case RESERVATION_CONFLICT: ++ /* ++ * Don't apply the error policy, always report to the guest. ++ * ++ * This is a passthrough code path, so it's not a backend error, but ++ * a response to an invalid guest request. ++ * ++ * Windows Failover Cluster validation intentionally sends invalid ++ * requests to verify that reservations work as intended. It is ++ * crucial that it sees the resulting errors. ++ * ++ * Treating a reservation conflict as a guest-side error is obvious ++ * when a pr-manager is in use. Without one, the situation is less ++ * clear, but there might be nothing that can be fixed on the host ++ * (like in the above example), and we don't want to be stuck in a ++ * loop where resuming the VM and retrying the request immediately ++ * stops it again. So always reporting is still the safer option in ++ * this case, too. ++ */ ++ error = 0; ++ break; ++ default: + error = EINVAL; ++ break; + } + } + +@@ -227,8 +251,9 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + * are usually retried immediately, so do not post them to QMP and + * do not account them as failed I/O. + */ +- if (req_has_sense && +- scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { ++ if (!error || (req_has_sense && ++ scsi_sense_buf_is_guest_recoverable(r->req.sense, ++ sizeof(r->req.sense)))) { + action = BLOCK_ERROR_ACTION_REPORT; + acct_failed = false; + } else { +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch b/SOURCES/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch new file mode 100644 index 0000000..409028a --- /dev/null +++ b/SOURCES/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch @@ -0,0 +1,125 @@ +From 1a0aa9bbdad63d72628002740410b8a28282a96e Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:04 +0200 +Subject: [PATCH 093/100] scsi-disk: Use positive return value for status in + dma_readv/writev + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] a0b3e7bfd7b7059c0ec3706f2eb1698c1d430b08 (kmwolf/centos-qemu-kvm) + +In some error cases, scsi_block_sgio_complete() never calls the passed +callback, but directly completes the request. This leads to bugs because +its error paths are not exact copies of what the callback would normally +do. + +In preparation to fix this, allow passing positive return values to the +callbacks that represent the status code that should be used to complete +the request. + +scsi_handle_rw_error() already handles positive values for its ret +parameter because scsi_block_sgio_complete() calls directly into it. + +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit cfe0880835cd364b590ffd27ef8dbd2ad8838bc5) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 4bd7af9d0c..bed2c8746c 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -62,6 +62,10 @@ OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE) + + struct SCSIDiskClass { + SCSIDeviceClass parent_class; ++ /* ++ * Callbacks receive ret == 0 for success. Errors are represented either as ++ * negative errno values, or as positive SAM status codes. ++ */ + DMAIOFunc *dma_readv; + DMAIOFunc *dma_writev; + bool (*need_fua_emulation)(SCSICommand *cmd); +@@ -261,7 +265,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return true; + } + +- if (ret < 0) { ++ if (ret != 0) { + return scsi_handle_rw_error(r, ret, acct_failed); + } + +@@ -338,7 +342,7 @@ static void scsi_write_do_fua(SCSIDiskReq *r) + static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) + { + assert(r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -363,9 +367,10 @@ static void scsi_dma_complete(void *opaque, int ret) + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_dma_complete_noio(r, ret); +@@ -381,7 +386,7 @@ static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) + qemu_get_current_aio_context()); + + assert(r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -402,9 +407,10 @@ static void scsi_read_complete(void *opaque, int ret) + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + trace_scsi_disk_read_complete(r->req.tag, r->qiov.size); + } +@@ -512,7 +518,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) + qemu_get_current_aio_context()); + + assert (r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -540,9 +546,10 @@ static void scsi_write_complete(void * opaque, int ret) + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_write_complete_noio(r, ret); +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch b/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch deleted file mode 100644 index 65b08ce..0000000 --- a/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch +++ /dev/null @@ -1,245 +0,0 @@ -From d1d384bd24a7aeb527f4abd8a0958146544ef9bb Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 4 Dec 2023 11:42:58 -0500 -Subject: [PATCH 079/101] scsi: don't lock AioContext in I/O code path - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [10/26] b5814cec94af5c254e300646d8783672b085bac3 (kmwolf/centos-qemu-kvm) - -blk_aio_*() doesn't require the AioContext lock and the SCSI subsystem's -internal state also does not anymore. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Acked-by: Kevin Wolf -Message-ID: <20231204164259.1515217-4-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-disk.c | 23 ----------------------- - hw/scsi/scsi-generic.c | 20 +++----------------- - 2 files changed, 3 insertions(+), 40 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 6691f5edb8..2c1bbb3530 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -273,8 +273,6 @@ static void scsi_aio_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -286,7 +284,6 @@ static void scsi_aio_complete(void *opaque, int ret) - scsi_req_complete(&r->req, GOOD); - - done: -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - scsi_req_unref(&r->req); - } - -@@ -394,8 +391,6 @@ static void scsi_read_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -406,7 +401,6 @@ static void scsi_read_complete(void *opaque, int ret) - trace_scsi_disk_read_complete(r->req.tag, r->qiov.size); - } - scsi_read_complete_noio(r, ret); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - /* Actually issue a read to the block device. */ -@@ -448,8 +442,6 @@ static void scsi_do_read_cb(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert (r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -459,7 +451,6 @@ static void scsi_do_read_cb(void *opaque, int ret) - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); - } - scsi_do_read(opaque, ret); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - /* Read more data from scsi device into buffer. */ -@@ -533,8 +524,6 @@ static void scsi_write_complete(void * opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert (r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -544,7 +533,6 @@ static void scsi_write_complete(void * opaque, int ret) - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); - } - scsi_write_complete_noio(r, ret); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - static void scsi_write_data(SCSIRequest *req) -@@ -1742,8 +1730,6 @@ static void scsi_unmap_complete(void *opaque, int ret) - SCSIDiskReq *r = data->r; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -1754,7 +1740,6 @@ static void scsi_unmap_complete(void *opaque, int ret) - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); - scsi_unmap_complete_noio(data, ret); - } -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf) -@@ -1822,8 +1807,6 @@ static void scsi_write_same_complete(void *opaque, int ret) - SCSIDiskReq *r = data->r; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -1847,7 +1830,6 @@ static void scsi_write_same_complete(void *opaque, int ret) - data->sector << BDRV_SECTOR_BITS, - &data->qiov, 0, - scsi_write_same_complete, data); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - return; - } - -@@ -1857,7 +1839,6 @@ done: - scsi_req_unref(&r->req); - qemu_vfree(data->iov.iov_base); - g_free(data); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf) -@@ -2810,7 +2791,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret) - { - SCSIBlockReq *req = (SCSIBlockReq *)opaque; - SCSIDiskReq *r = &req->req; -- SCSIDevice *s = r->req.dev; - sg_io_hdr_t *io_hdr = &req->io_header; - - if (ret == 0) { -@@ -2827,13 +2807,10 @@ static void scsi_block_sgio_complete(void *opaque, int ret) - } - - if (ret > 0) { -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); - if (scsi_handle_rw_error(r, ret, true)) { -- aio_context_release(blk_get_aio_context(s->conf.blk)); - scsi_req_unref(&r->req); - return; - } -- aio_context_release(blk_get_aio_context(s->conf.blk)); - - /* Ignore error. */ - ret = 0; -diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c -index 2417f0ad84..b7b04e1d63 100644 ---- a/hw/scsi/scsi-generic.c -+++ b/hw/scsi/scsi-generic.c -@@ -109,15 +109,11 @@ done: - static void scsi_command_complete(void *opaque, int ret) - { - SCSIGenericReq *r = (SCSIGenericReq *)opaque; -- SCSIDevice *s = r->req.dev; -- -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); - - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - - scsi_command_complete_noio(r, ret); -- aio_context_release(blk_get_aio_context(s->conf.blk)); - } - - static int execute_command(BlockBackend *blk, -@@ -274,14 +270,12 @@ static void scsi_read_complete(void * opaque, int ret) - SCSIDevice *s = r->req.dev; - int len; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - - if (ret || r->req.io_canceled) { - scsi_command_complete_noio(r, ret); -- goto done; -+ return; - } - - len = r->io_header.dxfer_len - r->io_header.resid; -@@ -320,7 +314,7 @@ static void scsi_read_complete(void * opaque, int ret) - r->io_header.status != GOOD || - len == 0) { - scsi_command_complete_noio(r, 0); -- goto done; -+ return; - } - - /* Snoop READ CAPACITY output to set the blocksize. */ -@@ -356,9 +350,6 @@ static void scsi_read_complete(void * opaque, int ret) - req_complete: - scsi_req_data(&r->req, len); - scsi_req_unref(&r->req); -- --done: -- aio_context_release(blk_get_aio_context(s->conf.blk)); - } - - /* Read more data from scsi device into buffer. */ -@@ -391,14 +382,12 @@ static void scsi_write_complete(void * opaque, int ret) - - trace_scsi_generic_write_complete(ret); - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - - if (ret || r->req.io_canceled) { - scsi_command_complete_noio(r, ret); -- goto done; -+ return; - } - - if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 && -@@ -408,9 +397,6 @@ static void scsi_write_complete(void * opaque, int ret) - } - - scsi_command_complete_noio(r, ret); -- --done: -- aio_context_release(blk_get_aio_context(s->conf.blk)); - } - - /* Write data to a scsi device. Returns nonzero on failure. --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch b/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch deleted file mode 100644 index 30f1c00..0000000 --- a/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch +++ /dev/null @@ -1,307 +0,0 @@ -From 42dd1357310bd1a68d6cacaa53cd5b1d1b02880d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 4 Dec 2023 11:42:56 -0500 -Subject: [PATCH 077/101] scsi: only access SCSIDevice->requests from one - thread - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [8/26] 9df662e82a63e93d184b5763bebbe7e43bc9dabe (kmwolf/centos-qemu-kvm) - -Stop depending on the AioContext lock and instead access -SCSIDevice->requests from only one thread at a time: -- When the VM is running only the BlockBackend's AioContext may access - the requests list. -- When the VM is stopped only the main loop may access the requests - list. - -These constraints protect the requests list without the need for locking -in the I/O code path. - -Note that multiple IOThreads are not supported yet because the code -assumes all SCSIRequests are executed from a single AioContext. Leave -that as future work. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231204164259.1515217-2-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-bus.c | 181 ++++++++++++++++++++++++++++------------- - include/hw/scsi/scsi.h | 7 +- - 2 files changed, 131 insertions(+), 57 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index fc4b77fdb0..b649cdf555 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -85,6 +85,89 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun) - return d; - } - -+/* -+ * Invoke @fn() for each enqueued request in device @s. Must be called from the -+ * main loop thread while the guest is stopped. This is only suitable for -+ * vmstate ->put(), use scsi_device_for_each_req_async() for other cases. -+ */ -+static void scsi_device_for_each_req_sync(SCSIDevice *s, -+ void (*fn)(SCSIRequest *, void *), -+ void *opaque) -+{ -+ SCSIRequest *req; -+ SCSIRequest *next_req; -+ -+ assert(!runstate_is_running()); -+ assert(qemu_in_main_thread()); -+ -+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) { -+ fn(req, opaque); -+ } -+} -+ -+typedef struct { -+ SCSIDevice *s; -+ void (*fn)(SCSIRequest *, void *); -+ void *fn_opaque; -+} SCSIDeviceForEachReqAsyncData; -+ -+static void scsi_device_for_each_req_async_bh(void *opaque) -+{ -+ g_autofree SCSIDeviceForEachReqAsyncData *data = opaque; -+ SCSIDevice *s = data->s; -+ AioContext *ctx; -+ SCSIRequest *req; -+ SCSIRequest *next; -+ -+ /* -+ * If the AioContext changed before this BH was called then reschedule into -+ * the new AioContext before accessing ->requests. This can happen when -+ * scsi_device_for_each_req_async() is called and then the AioContext is -+ * changed before BHs are run. -+ */ -+ ctx = blk_get_aio_context(s->conf.blk); -+ if (ctx != qemu_get_current_aio_context()) { -+ aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, -+ g_steal_pointer(&data)); -+ return; -+ } -+ -+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { -+ data->fn(req, data->fn_opaque); -+ } -+ -+ /* Drop the reference taken by scsi_device_for_each_req_async() */ -+ object_unref(OBJECT(s)); -+} -+ -+/* -+ * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() -+ * runs in the AioContext that is executing the request. -+ */ -+static void scsi_device_for_each_req_async(SCSIDevice *s, -+ void (*fn)(SCSIRequest *, void *), -+ void *opaque) -+{ -+ assert(qemu_in_main_thread()); -+ -+ SCSIDeviceForEachReqAsyncData *data = -+ g_new(SCSIDeviceForEachReqAsyncData, 1); -+ -+ data->s = s; -+ data->fn = fn; -+ data->fn_opaque = opaque; -+ -+ /* -+ * Hold a reference to the SCSIDevice until -+ * scsi_device_for_each_req_async_bh() finishes. -+ */ -+ object_ref(OBJECT(s)); -+ -+ aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), -+ scsi_device_for_each_req_async_bh, -+ data); -+} -+ - static void scsi_device_realize(SCSIDevice *s, Error **errp) - { - SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s); -@@ -144,20 +227,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host, - qbus_set_bus_hotplug_handler(BUS(bus)); - } - --static void scsi_dma_restart_bh(void *opaque) -+void scsi_req_retry(SCSIRequest *req) - { -- SCSIDevice *s = opaque; -- SCSIRequest *req, *next; -- -- qemu_bh_delete(s->bh); -- s->bh = NULL; -+ req->retry = true; -+} - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { -- scsi_req_ref(req); -- if (req->retry) { -- req->retry = false; -- switch (req->cmd.mode) { -+/* Called in the AioContext that is executing the request */ -+static void scsi_dma_restart_req(SCSIRequest *req, void *opaque) -+{ -+ scsi_req_ref(req); -+ if (req->retry) { -+ req->retry = false; -+ switch (req->cmd.mode) { - case SCSI_XFER_FROM_DEV: - case SCSI_XFER_TO_DEV: - scsi_req_continue(req); -@@ -166,37 +247,22 @@ static void scsi_dma_restart_bh(void *opaque) - scsi_req_dequeue(req); - scsi_req_enqueue(req); - break; -- } - } -- scsi_req_unref(req); - } -- aio_context_release(blk_get_aio_context(s->conf.blk)); -- /* Drop the reference that was acquired in scsi_dma_restart_cb */ -- object_unref(OBJECT(s)); --} -- --void scsi_req_retry(SCSIRequest *req) --{ -- /* No need to save a reference, because scsi_dma_restart_bh just -- * looks at the request list. */ -- req->retry = true; -+ scsi_req_unref(req); - } - - static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) - { - SCSIDevice *s = opaque; - -+ assert(qemu_in_main_thread()); -+ - if (!running) { - return; - } -- if (!s->bh) { -- AioContext *ctx = blk_get_aio_context(s->conf.blk); -- /* The reference is dropped in scsi_dma_restart_bh.*/ -- object_ref(OBJECT(s)); -- s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, -- &DEVICE(s)->mem_reentrancy_guard); -- qemu_bh_schedule(s->bh); -- } -+ -+ scsi_device_for_each_req_async(s, scsi_dma_restart_req, NULL); - } - - static bool scsi_bus_is_address_free(SCSIBus *bus, -@@ -1657,15 +1723,16 @@ void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense) - } - } - -+static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) -+{ -+ scsi_req_cancel_async(req, NULL); -+} -+ - void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) - { -- SCSIRequest *req; -+ scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); - - aio_context_acquire(blk_get_aio_context(sdev->conf.blk)); -- while (!QTAILQ_EMPTY(&sdev->requests)) { -- req = QTAILQ_FIRST(&sdev->requests); -- scsi_req_cancel_async(req, NULL); -- } - blk_drain(sdev->conf.blk); - aio_context_release(blk_get_aio_context(sdev->conf.blk)); - scsi_device_set_ua(sdev, sense); -@@ -1737,31 +1804,33 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev) - - /* SCSI request list. For simplicity, pv points to the whole device */ - -+static void put_scsi_req(SCSIRequest *req, void *opaque) -+{ -+ QEMUFile *f = opaque; -+ -+ assert(!req->io_canceled); -+ assert(req->status == -1 && req->host_status == -1); -+ assert(req->enqueued); -+ -+ qemu_put_sbyte(f, req->retry ? 1 : 2); -+ qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); -+ qemu_put_be32s(f, &req->tag); -+ qemu_put_be32s(f, &req->lun); -+ if (req->bus->info->save_request) { -+ req->bus->info->save_request(f, req); -+ } -+ if (req->ops->save_request) { -+ req->ops->save_request(f, req); -+ } -+} -+ - static int put_scsi_requests(QEMUFile *f, void *pv, size_t size, - const VMStateField *field, JSONWriter *vmdesc) - { - SCSIDevice *s = pv; -- SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus); -- SCSIRequest *req; - -- QTAILQ_FOREACH(req, &s->requests, next) { -- assert(!req->io_canceled); -- assert(req->status == -1 && req->host_status == -1); -- assert(req->enqueued); -- -- qemu_put_sbyte(f, req->retry ? 1 : 2); -- qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); -- qemu_put_be32s(f, &req->tag); -- qemu_put_be32s(f, &req->lun); -- if (bus->info->save_request) { -- bus->info->save_request(f, req); -- } -- if (req->ops->save_request) { -- req->ops->save_request(f, req); -- } -- } -+ scsi_device_for_each_req_sync(s, put_scsi_req, f); - qemu_put_sbyte(f, 0); -- - return 0; - } - -diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h -index 3692ca82f3..10c4e8288d 100644 ---- a/include/hw/scsi/scsi.h -+++ b/include/hw/scsi/scsi.h -@@ -69,14 +69,19 @@ struct SCSIDevice - { - DeviceState qdev; - VMChangeStateEntry *vmsentry; -- QEMUBH *bh; - uint32_t id; - BlockConf conf; - SCSISense unit_attention; - bool sense_is_ua; - uint8_t sense[SCSI_SENSE_BUF_SIZE]; - uint32_t sense_len; -+ -+ /* -+ * The requests list is only accessed from the AioContext that executes -+ * requests or from the main loop when IOThread processing is stopped. -+ */ - QTAILQ_HEAD(, SCSIRequest) requests; -+ - uint32_t channel; - uint32_t lun; - int blocksize; --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-remove-AioContext-locking.patch b/SOURCES/kvm-scsi-remove-AioContext-locking.patch deleted file mode 100644 index 34a5e46..0000000 --- a/SOURCES/kvm-scsi-remove-AioContext-locking.patch +++ /dev/null @@ -1,280 +0,0 @@ -From 61d605433a5edfcc7fe836fd399106ed1e1907bb Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:05 -0500 -Subject: [PATCH 088/101] scsi: remove AioContext locking - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [19/26] 12a8e26670074a17dd2b0cfac06e0aea03b3068f (kmwolf/centos-qemu-kvm) - -The AioContext lock no longer has any effect. Remove it. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-9-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-bus.c | 2 -- - hw/scsi/scsi-disk.c | 31 +++++-------------------------- - hw/scsi/virtio-scsi.c | 18 ------------------ - include/hw/virtio/virtio-scsi.h | 14 -------------- - 4 files changed, 5 insertions(+), 60 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index b649cdf555..5b08cbf60a 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -1732,9 +1732,7 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) - { - scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); - -- aio_context_acquire(blk_get_aio_context(sdev->conf.blk)); - blk_drain(sdev->conf.blk); -- aio_context_release(blk_get_aio_context(sdev->conf.blk)); - scsi_device_set_ua(sdev, sense); - } - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index a5048e0aaf..61be3d395a 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -2339,14 +2339,10 @@ static void scsi_disk_reset(DeviceState *dev) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev); - uint64_t nb_sectors; -- AioContext *ctx; - - scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET)); - -- ctx = blk_get_aio_context(s->qdev.conf.blk); -- aio_context_acquire(ctx); - blk_get_geometry(s->qdev.conf.blk, &nb_sectors); -- aio_context_release(ctx); - - nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE; - if (nb_sectors) { -@@ -2545,15 +2541,13 @@ static void scsi_unrealize(SCSIDevice *dev) - static void scsi_hd_realize(SCSIDevice *dev, Error **errp) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); -- AioContext *ctx = NULL; -+ - /* can happen for devices without drive. The error message for missing - * backend will be issued in scsi_realize - */ - if (s->qdev.conf.blk) { -- ctx = blk_get_aio_context(s->qdev.conf.blk); -- aio_context_acquire(ctx); - if (!blkconf_blocksizes(&s->qdev.conf, errp)) { -- goto out; -+ return; - } - } - s->qdev.blocksize = s->qdev.conf.logical_block_size; -@@ -2562,16 +2556,11 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) - s->product = g_strdup("QEMU HARDDISK"); - } - scsi_realize(&s->qdev, errp); --out: -- if (ctx) { -- aio_context_release(ctx); -- } - } - - static void scsi_cd_realize(SCSIDevice *dev, Error **errp) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); -- AioContext *ctx; - int ret; - uint32_t blocksize = 2048; - -@@ -2587,8 +2576,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) - blocksize = dev->conf.physical_block_size; - } - -- ctx = blk_get_aio_context(dev->conf.blk); -- aio_context_acquire(ctx); - s->qdev.blocksize = blocksize; - s->qdev.type = TYPE_ROM; - s->features |= 1 << SCSI_DISK_F_REMOVABLE; -@@ -2596,7 +2583,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) - s->product = g_strdup("QEMU CD-ROM"); - } - scsi_realize(&s->qdev, errp); -- aio_context_release(ctx); - } - - -@@ -2727,7 +2713,6 @@ static int get_device_type(SCSIDiskState *s) - static void scsi_block_realize(SCSIDevice *dev, Error **errp) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); -- AioContext *ctx; - int sg_version; - int rc; - -@@ -2742,9 +2727,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) - "be removed in a future version"); - } - -- ctx = blk_get_aio_context(s->qdev.conf.blk); -- aio_context_acquire(ctx); -- - /* check we are using a driver managing SG_IO (version 3 and after) */ - rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version); - if (rc < 0) { -@@ -2752,18 +2734,18 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) - if (rc != -EPERM) { - error_append_hint(errp, "Is this a SCSI device?\n"); - } -- goto out; -+ return; - } - if (sg_version < 30000) { - error_setg(errp, "scsi generic interface too old"); -- goto out; -+ return; - } - - /* get device type from INQUIRY data */ - rc = get_device_type(s); - if (rc < 0) { - error_setg(errp, "INQUIRY failed"); -- goto out; -+ return; - } - - /* Make a guess for the block size, we'll fix it when the guest sends. -@@ -2783,9 +2765,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) - - scsi_realize(&s->qdev, errp); - scsi_generic_read_device_inquiry(&s->qdev); -- --out: -- aio_context_release(ctx); - } - - typedef struct SCSIBlockReq { -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 4f8d35facc..ca365a70e9 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -642,9 +642,7 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) - return; - } - -- virtio_scsi_acquire(s); - virtio_scsi_handle_ctrl_vq(s, vq); -- virtio_scsi_release(s); - } - - static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req) -@@ -882,9 +880,7 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) - return; - } - -- virtio_scsi_acquire(s); - virtio_scsi_handle_cmd_vq(s, vq); -- virtio_scsi_release(s); - } - - static void virtio_scsi_get_config(VirtIODevice *vdev, -@@ -1031,9 +1027,7 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) - return; - } - -- virtio_scsi_acquire(s); - virtio_scsi_handle_event_vq(s, vq); -- virtio_scsi_release(s); - } - - static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) -@@ -1052,9 +1046,7 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) - }, - }; - -- virtio_scsi_acquire(s); - virtio_scsi_push_event(s, &info); -- virtio_scsi_release(s); - } - } - -@@ -1071,17 +1063,13 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); - VirtIOSCSI *s = VIRTIO_SCSI(vdev); - SCSIDevice *sd = SCSI_DEVICE(dev); -- AioContext *old_context; - int ret; - - if (s->ctx && !s->dataplane_fenced) { - if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; - } -- old_context = blk_get_aio_context(sd->conf.blk); -- aio_context_acquire(old_context); - ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); -- aio_context_release(old_context); - if (ret < 0) { - return; - } -@@ -1097,10 +1085,8 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - }, - }; - -- virtio_scsi_acquire(s); - virtio_scsi_push_event(s, &info); - scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); -- virtio_scsi_release(s); - } - } - -@@ -1122,17 +1108,13 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, - qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); - - if (s->ctx) { -- virtio_scsi_acquire(s); - /* If other users keep the BlockBackend in the iothread, that's ok */ - blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL); -- virtio_scsi_release(s); - } - - if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { -- virtio_scsi_acquire(s); - virtio_scsi_push_event(s, &info); - scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); -- virtio_scsi_release(s); - } - } - -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index da8cb928d9..7f0573b1bf 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -101,20 +101,6 @@ struct VirtIOSCSI { - uint32_t host_features; - }; - --static inline void virtio_scsi_acquire(VirtIOSCSI *s) --{ -- if (s->ctx) { -- aio_context_acquire(s->ctx); -- } --} -- --static inline void virtio_scsi_release(VirtIOSCSI *s) --{ -- if (s->ctx) { -- aio_context_release(s->ctx); -- } --} -- - void virtio_scsi_common_realize(DeviceState *dev, - VirtIOHandleOutput ctrl, - VirtIOHandleOutput evt, --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch b/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch deleted file mode 100644 index c9baf60..0000000 --- a/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 9f5c6dbe907fe6227006ab51179eaa50a63559cb Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:09 -0500 -Subject: [PATCH 092/101] scsi: remove outdated AioContext lock comment - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [23/26] 96e2e7d2e6a160ce4d695060f902d21030b3b1d8 (kmwolf/centos-qemu-kvm) - -The SCSI subsystem no longer uses the AioContext lock. Request -processing runs exclusively in the BlockBackend's AioContext since -"scsi: only access SCSIDevice->requests from one thread" and hence the -lock is unnecessary. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20231205182011.1976568-13-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-disk.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 61be3d395a..2e7e1e9a1c 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -355,7 +355,6 @@ done: - scsi_req_unref(&r->req); - } - --/* Called with AioContext lock held */ - static void scsi_dma_complete(void *opaque, int ret) - { - SCSIDiskReq *r = (SCSIDiskReq *)opaque; --- -2.39.3 - diff --git a/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch b/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch deleted file mode 100644 index 81ae2f1..0000000 --- a/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch +++ /dev/null @@ -1,190 +0,0 @@ -From c5f9e92cd49a2171a5b0223cafd7fab3f45edb82 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 9 Jan 2024 19:17:17 +0100 -Subject: [PATCH 06/22] string-output-visitor: Fix (pseudo) struct handling - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [2/17] 84e226f161680dd61b6635e213203d062c1aa556 (stefanha/centos-stream-qemu-kvm) - -Commit ff32bb53 tried to get minimal struct support into the string -output visitor by just making it return "". Unfortunately, it -forgot that the caller will still make more visitor calls for the -content of the struct. - -If the struct is contained in a list, such as IOThreadVirtQueueMapping, -in the better case its fields show up as separate list entries. In the -worse case, it contains another list, and the string output visitor -doesn't support nested lists and asserts that this doesn't happen. So as -soon as the optional "vqs" field in IOThreadVirtQueueMapping is -specified, we get a crash. - -This can be reproduced with the following command line: - - echo "info qtree" | ./qemu-system-x86_64 \ - -object iothread,id=t0 \ - -blockdev null-co,node-name=disk \ - -device '{"driver": "virtio-blk-pci", "drive": "disk", - "iothread-vq-mapping": [{"iothread": "t0", "vqs": [0]}]}' \ - -monitor stdio - -Fix the problem by counting the nesting level of structs and ignoring -any visitor calls for values (apart from start/end_struct) while we're -not on the top level. - -Lists nested directly within lists remain unimplemented, as we don't -currently have a use case for them. - -Fixes: ff32bb53476539d352653f4ed56372dced73a388 -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2069 -Reported-by: Aihua Liang -Signed-off-by: Kevin Wolf -Message-ID: <20240109181717.42493-1-kwolf@redhat.com> -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Kevin Wolf -(cherry picked from commit 014b99a8e41c8cd1e895137654b44dec5430122c) -Signed-off-by: Stefan Hajnoczi ---- - qapi/string-output-visitor.c | 46 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 46 insertions(+) - -diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c -index f0c1dea89e..5115536b15 100644 ---- a/qapi/string-output-visitor.c -+++ b/qapi/string-output-visitor.c -@@ -65,6 +65,7 @@ struct StringOutputVisitor - } range_start, range_end; - GList *ranges; - void *list; /* Only needed for sanity checking the caller */ -+ unsigned int struct_nesting; - }; - - static StringOutputVisitor *to_sov(Visitor *v) -@@ -144,6 +145,10 @@ static bool print_type_int64(Visitor *v, const char *name, int64_t *obj, - StringOutputVisitor *sov = to_sov(v); - GList *l; - -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - switch (sov->list_mode) { - case LM_NONE: - string_output_append(sov, *obj); -@@ -231,6 +236,10 @@ static bool print_type_size(Visitor *v, const char *name, uint64_t *obj, - uint64_t val; - char *out, *psize; - -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - if (!sov->human) { - out = g_strdup_printf("%"PRIu64, *obj); - string_output_set(sov, out); -@@ -250,6 +259,11 @@ static bool print_type_bool(Visitor *v, const char *name, bool *obj, - Error **errp) - { - StringOutputVisitor *sov = to_sov(v); -+ -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - string_output_set(sov, g_strdup(*obj ? "true" : "false")); - return true; - } -@@ -260,6 +274,10 @@ static bool print_type_str(Visitor *v, const char *name, char **obj, - StringOutputVisitor *sov = to_sov(v); - char *out; - -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - if (sov->human) { - out = *obj ? g_strdup_printf("\"%s\"", *obj) : g_strdup(""); - } else { -@@ -273,6 +291,11 @@ static bool print_type_number(Visitor *v, const char *name, double *obj, - Error **errp) - { - StringOutputVisitor *sov = to_sov(v); -+ -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - string_output_set(sov, g_strdup_printf("%.17g", *obj)); - return true; - } -@@ -283,6 +306,10 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, - StringOutputVisitor *sov = to_sov(v); - char *out; - -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - if (sov->human) { - out = g_strdup(""); - } else { -@@ -295,6 +322,9 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, - static bool start_struct(Visitor *v, const char *name, void **obj, - size_t size, Error **errp) - { -+ StringOutputVisitor *sov = to_sov(v); -+ -+ sov->struct_nesting++; - return true; - } - -@@ -302,6 +332,10 @@ static void end_struct(Visitor *v, void **obj) - { - StringOutputVisitor *sov = to_sov(v); - -+ if (--sov->struct_nesting) { -+ return; -+ } -+ - /* TODO actually print struct fields */ - string_output_set(sov, g_strdup("")); - } -@@ -312,6 +346,10 @@ start_list(Visitor *v, const char *name, GenericList **list, size_t size, - { - StringOutputVisitor *sov = to_sov(v); - -+ if (sov->struct_nesting) { -+ return true; -+ } -+ - /* we can't traverse a list in a list */ - assert(sov->list_mode == LM_NONE); - /* We don't support visits without a list */ -@@ -329,6 +367,10 @@ static GenericList *next_list(Visitor *v, GenericList *tail, size_t size) - StringOutputVisitor *sov = to_sov(v); - GenericList *ret = tail->next; - -+ if (sov->struct_nesting) { -+ return ret; -+ } -+ - if (ret && !ret->next) { - sov->list_mode = LM_END; - } -@@ -339,6 +381,10 @@ static void end_list(Visitor *v, void **obj) - { - StringOutputVisitor *sov = to_sov(v); - -+ if (sov->struct_nesting) { -+ return; -+ } -+ - assert(sov->list == obj); - assert(sov->list_mode == LM_STARTED || - sov->list_mode == LM_END || --- -2.39.3 - diff --git a/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch b/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch deleted file mode 100644 index f83635d..0000000 --- a/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch +++ /dev/null @@ -1,90 +0,0 @@ -From fb2069be402ec1322834c555714f0e993778cc9d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 12 Dec 2023 08:49:34 -0500 -Subject: [PATCH 05/22] string-output-visitor: show structs as "" - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [1/17] 0c08e8237d28fbdbdbc7576d4c17d2eeeb413c2a (stefanha/centos-stream-qemu-kvm) - -StringOutputVisitor crashes when it visits a struct because -->start_struct() is NULL. - -Show "" instead of crashing. This is necessary because the -virtio-blk-pci iothread-vq-mapping parameter that I'd like to introduce -soon is a list of IOThreadMapping structs. - -This patch is a quick fix to solve the crash, but the long-term solution -is replacing StringOutputVisitor with something that can handle the full -gamut of values in QEMU. - -Cc: Markus Armbruster -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231212134934.500289-1-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Reviewed-by: Markus Armbruster -Signed-off-by: Kevin Wolf -(cherry picked from commit ff32bb53476539d352653f4ed56372dced73a388) -Signed-off-by: Stefan Hajnoczi ---- - include/qapi/string-output-visitor.h | 6 +++--- - qapi/string-output-visitor.c | 16 ++++++++++++++++ - 2 files changed, 19 insertions(+), 3 deletions(-) - -diff --git a/include/qapi/string-output-visitor.h b/include/qapi/string-output-visitor.h -index 268dfe9986..b1ee473b30 100644 ---- a/include/qapi/string-output-visitor.h -+++ b/include/qapi/string-output-visitor.h -@@ -26,9 +26,9 @@ typedef struct StringOutputVisitor StringOutputVisitor; - * If everything else succeeds, pass @result to visit_complete() to - * collect the result of the visit. - * -- * The string output visitor does not implement support for visiting -- * QAPI structs, alternates, null, or arbitrary QTypes. It also -- * requires a non-null list argument to visit_start_list(). -+ * The string output visitor does not implement support for alternates, null, -+ * or arbitrary QTypes. Struct fields are not shown. It also requires a -+ * non-null list argument to visit_start_list(). - */ - Visitor *string_output_visitor_new(bool human, char **result); - -diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c -index c0cb72dbe4..f0c1dea89e 100644 ---- a/qapi/string-output-visitor.c -+++ b/qapi/string-output-visitor.c -@@ -292,6 +292,20 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, - return true; - } - -+static bool start_struct(Visitor *v, const char *name, void **obj, -+ size_t size, Error **errp) -+{ -+ return true; -+} -+ -+static void end_struct(Visitor *v, void **obj) -+{ -+ StringOutputVisitor *sov = to_sov(v); -+ -+ /* TODO actually print struct fields */ -+ string_output_set(sov, g_strdup("")); -+} -+ - static bool - start_list(Visitor *v, const char *name, GenericList **list, size_t size, - Error **errp) -@@ -379,6 +393,8 @@ Visitor *string_output_visitor_new(bool human, char **result) - v->visitor.type_str = print_type_str; - v->visitor.type_number = print_type_number; - v->visitor.type_null = print_type_null; -+ v->visitor.start_struct = start_struct; -+ v->visitor.end_struct = end_struct; - v->visitor.start_list = start_list; - v->visitor.next_list = next_list; - v->visitor.end_list = end_list; --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-new-CPU-model-SierraForest.patch b/SOURCES/kvm-target-i386-Add-new-CPU-model-SierraForest.patch new file mode 100644 index 0000000..c72f290 --- /dev/null +++ b/SOURCES/kvm-target-i386-Add-new-CPU-model-SierraForest.patch @@ -0,0 +1,215 @@ +From d9595fecd03c9a69ac562e3f240d50b2fa8d14a4 Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Wed, 20 Mar 2024 10:10:44 +0800 +Subject: [PATCH 006/100] target/i386: Add new CPU model SierraForest +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [6/91] 4bc71f82c258db46569a7e08965d1d358b19416c (bonzini/rhel-qemu-kvm) + +According to table 1-2 in Intel Architecture Instruction Set Extensions and +Future Features (rev 051) [1], SierraForest has the following new features +which have already been virtualized: + +- CMPCCXADD CPUID.(EAX=7,ECX=1):EAX[bit 7] +- AVX-IFMA CPUID.(EAX=7,ECX=1):EAX[bit 23] +- AVX-VNNI-INT8 CPUID.(EAX=7,ECX=1):EDX[bit 4] +- AVX-NE-CONVERT CPUID.(EAX=7,ECX=1):EDX[bit 5] + +Add above features to new CPU model SierraForest. Comparing with GraniteRapids +CPU model, SierraForest bare-metal removes the following features: + +- HLE CPUID.(EAX=7,ECX=0):EBX[bit 4] +- RTM CPUID.(EAX=7,ECX=0):EBX[bit 11] +- AVX512F CPUID.(EAX=7,ECX=0):EBX[bit 16] +- AVX512DQ CPUID.(EAX=7,ECX=0):EBX[bit 17] +- AVX512_IFMA CPUID.(EAX=7,ECX=0):EBX[bit 21] +- AVX512CD CPUID.(EAX=7,ECX=0):EBX[bit 28] +- AVX512BW CPUID.(EAX=7,ECX=0):EBX[bit 30] +- AVX512VL CPUID.(EAX=7,ECX=0):EBX[bit 31] +- AVX512_VBMI CPUID.(EAX=7,ECX=0):ECX[bit 1] +- AVX512_VBMI2 CPUID.(EAX=7,ECX=0):ECX[bit 6] +- AVX512_VNNI CPUID.(EAX=7,ECX=0):ECX[bit 11] +- AVX512_BITALG CPUID.(EAX=7,ECX=0):ECX[bit 12] +- AVX512_VPOPCNTDQ CPUID.(EAX=7,ECX=0):ECX[bit 14] +- LA57 CPUID.(EAX=7,ECX=0):ECX[bit 16] +- TSXLDTRK CPUID.(EAX=7,ECX=0):EDX[bit 16] +- AMX-BF16 CPUID.(EAX=7,ECX=0):EDX[bit 22] +- AVX512_FP16 CPUID.(EAX=7,ECX=0):EDX[bit 23] +- AMX-TILE CPUID.(EAX=7,ECX=0):EDX[bit 24] +- AMX-INT8 CPUID.(EAX=7,ECX=0):EDX[bit 25] +- AVX512_BF16 CPUID.(EAX=7,ECX=1):EAX[bit 5] +- fast zero-length MOVSB CPUID.(EAX=7,ECX=1):EAX[bit 10] +- fast short CMPSB, SCASB CPUID.(EAX=7,ECX=1):EAX[bit 12] +- AMX-FP16 CPUID.(EAX=7,ECX=1):EAX[bit 21] +- PREFETCHI CPUID.(EAX=7,ECX=1):EDX[bit 14] +- XFD CPUID.(EAX=0xD,ECX=1):EAX[bit 4] +- EPT_PAGE_WALK_LENGTH_5 VMX_EPT_VPID_CAP(0x48c)[bit 7] + +Add all features of GraniteRapids CPU model except above features to +SierraForest CPU model. + +SierraForest doesn’t support TSX and RTM but supports TAA_NO. When RTM is +not enabled in host, KVM will not report TAA_NO. So, just don't include +TAA_NO in SierraForest CPU model. + +[1] https://cdrdv2.intel.com/v1/dl/getContent/671368 + +Reviewed-by: Zhao Liu +Reviewed-by: Xiaoyao Li +Signed-off-by: Tao Su +Message-ID: <20240320021044.508263-1-tao1.su@linux.intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6e82d3b6220777667968a04c87e1667f164ebe88) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 126 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 0aa88d9b48..efbadc3ed7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4127,6 +4127,132 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ }, + }, + }, ++ { ++ .name = "SierraForest", ++ .level = 0x23, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 175, ++ .stepping = 0, ++ /* ++ * please keep the ascending order so that we can have a clear view of ++ * bit position of each feature. ++ */ ++ .features[FEAT_1_EDX] = ++ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | ++ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | ++ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | ++ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | ++ CPUID_SSE | CPUID_SSE2, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | ++ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | ++ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | ++ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | ++ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_WBNOINVD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SHA_NI, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | ++ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO | ++ MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO | ++ MSR_ARCH_CAP_PBRSB_NO, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD | ++ CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA, ++ .features[FEAT_7_1_EDX] = ++ CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT, ++ .features[FEAT_7_2_EDX] = ++ CPUID_7_2_EDX_MCDT_NO, ++ .features[FEAT_VMX_BASIC] = ++ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = ++ MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | ++ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | ++ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | ++ MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = ++ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | ++ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | ++ VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = ++ VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | ++ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | ++ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | ++ VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | ++ VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | ++ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | ++ VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | ++ VMX_SECONDARY_EXEC_XSAVES, ++ .features[FEAT_VMX_VMFUNC] = ++ MSR_VMX_VMFUNC_EPT_SWITCHING, ++ .xlevel = 0x80000008, ++ .model_id = "Intel Xeon Processor (SierraForest)", ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { /* end of list */ }, ++ }, ++ }, + { + .name = "Denverton", + .level = 21, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Export-RFDS-bit-to-guests.patch b/SOURCES/kvm-target-i386-Export-RFDS-bit-to-guests.patch new file mode 100644 index 0000000..74de391 --- /dev/null +++ b/SOURCES/kvm-target-i386-Export-RFDS-bit-to-guests.patch @@ -0,0 +1,50 @@ +From ae6229a3e45318b1101291b99a0e894399dcb1db Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Wed, 13 Mar 2024 07:53:23 -0700 +Subject: [PATCH 007/100] target/i386: Export RFDS bit to guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [7/91] 7eb6cae8821a2e953d3ff2033fa2e973011ad771 (bonzini/rhel-qemu-kvm) + +Register File Data Sampling (RFDS) is a CPU side-channel vulnerability +that may expose stale register value. CPUs that set RFDS_NO bit in MSR +IA32_ARCH_CAPABILITIES indicate that they are not vulnerable to RFDS. +Similarly, RFDS_CLEAR indicates that CPU is affected by RFDS, and has +the microcode to help mitigate RFDS. + +Make RFDS_CLEAR and RFDS_NO bits available to guests. + +Signed-off-by: Pawan Gupta +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <9a38877857392b5c2deae7e7db1b170d15510314.1710341348.git.pawan.kumar.gupta@linux.intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 41bdd9812863c150284a9339a048ed88c40f4df7) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index efbadc3ed7..489c853b42 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1158,8 +1158,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", + NULL, "fb-clear", NULL, NULL, + NULL, NULL, NULL, NULL, +- "pbrsb-no", NULL, "gds-no", NULL, +- NULL, NULL, NULL, NULL, ++ "pbrsb-no", NULL, "gds-no", "rfds-no", ++ "rfds-clear", NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_ARCH_CAPABILITIES, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch b/SOURCES/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch new file mode 100644 index 0000000..f37dbc2 --- /dev/null +++ b/SOURCES/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch @@ -0,0 +1,192 @@ +From 4a811f54cdb3c9329f193ea43c76ed4eb1b14c19 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 19 Mar 2024 15:29:33 +0100 +Subject: [PATCH 022/100] target/i386: Implement mc->kvm_type() to get VM type + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [22/91] d58cf6ead2de37852adc15c7642166904403453f (bonzini/rhel-qemu-kvm) + +KVM is introducing a new API to create confidential guests, which +will be used by TDX and SEV-SNP but is also available for SEV and +SEV-ES. The API uses the VM type argument to KVM_CREATE_VM to +identify which confidential computing technology to use. + +Since there are no other expected uses of VM types, delegate +mc->kvm_type() for x86 boards to the confidential-guest-support +object pointed to by ms->cgs. + +For example, if a sev-guest object is specified to confidential-guest-support, +like, + + qemu -machine ...,confidential-guest-support=sev0 \ + -object sev-guest,id=sev0,... + +it will check if a VM type KVM_X86_SEV_VM or KVM_X86_SEV_ES_VM +is supported, and if so use them together with the KVM_SEV_INIT2 +function of the KVM_MEMORY_ENCRYPT_OP ioctl. If not, it will fall back to +KVM_SEV_INIT and KVM_SEV_ES_INIT. + +This is a preparatory work towards TDX and SEV-SNP support, but it +will also enable support for VMSA features such as DebugSwap, which +are only available via KVM_SEV_INIT2. + +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit ee88612df1e8d6c2bfec75bff3f9482ea44acec1) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 11 ++++++++ + target/i386/confidential-guest.h | 19 ++++++++++++++ + target/i386/kvm/kvm.c | 44 ++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 2 ++ + 4 files changed, 76 insertions(+) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 84a4801977..3d5b51e92d 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1381,6 +1381,16 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name, + qapi_free_SgxEPCList(list); + } + ++static int x86_kvm_type(MachineState *ms, const char *vm_type) ++{ ++ /* ++ * No x86 machine has a kvm-type property. If one is added that has ++ * it, it should call kvm_get_vm_type() directly or not use it at all. ++ */ ++ assert(vm_type == NULL); ++ return kvm_enabled() ? kvm_get_vm_type(ms) : 0; ++} ++ + static void x86_machine_initfn(Object *obj) + { + X86MachineState *x86ms = X86_MACHINE(obj); +@@ -1405,6 +1415,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; + mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; + mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; ++ mc->kvm_type = x86_kvm_type; + x86mc->save_tsc_khz = true; + x86mc->fwcfg_dma_enabled = true; + nc->nmi_monitor_handler = x86_nmi; +diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h +index ca12d5a8fb..532e172a60 100644 +--- a/target/i386/confidential-guest.h ++++ b/target/i386/confidential-guest.h +@@ -36,5 +36,24 @@ struct X86ConfidentialGuest { + struct X86ConfidentialGuestClass { + /* */ + ConfidentialGuestSupportClass parent; ++ ++ /* */ ++ int (*kvm_type)(X86ConfidentialGuest *cg); + }; ++ ++/** ++ * x86_confidential_guest_kvm_type: ++ * ++ * Calls #X86ConfidentialGuestClass.unplug callback of @plug_handler. ++ */ ++static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) ++{ ++ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); ++ ++ if (klass->kvm_type) { ++ return klass->kvm_type(cg); ++ } else { ++ return 0; ++ } ++} + #endif +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a12207a8ee..1f0ab12c2e 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -31,6 +31,7 @@ + #include "sysemu/kvm_int.h" + #include "sysemu/runstate.h" + #include "kvm_i386.h" ++#include "../confidential-guest.h" + #include "sev.h" + #include "xen-emu.h" + #include "hyperv.h" +@@ -161,6 +162,49 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; + static RateLimit bus_lock_ratelimit_ctrl; + static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); + ++static const char *vm_type_name[] = { ++ [KVM_X86_DEFAULT_VM] = "default", ++}; ++ ++bool kvm_is_vm_type_supported(int type) ++{ ++ uint32_t machine_types; ++ ++ /* ++ * old KVM doesn't support KVM_CAP_VM_TYPES but KVM_X86_DEFAULT_VM ++ * is always supported ++ */ ++ if (type == KVM_X86_DEFAULT_VM) { ++ return true; ++ } ++ ++ machine_types = kvm_check_extension(KVM_STATE(current_machine->accelerator), ++ KVM_CAP_VM_TYPES); ++ return !!(machine_types & BIT(type)); ++} ++ ++int kvm_get_vm_type(MachineState *ms) ++{ ++ int kvm_type = KVM_X86_DEFAULT_VM; ++ ++ if (ms->cgs) { ++ if (!object_dynamic_cast(OBJECT(ms->cgs), TYPE_X86_CONFIDENTIAL_GUEST)) { ++ error_report("configuration type %s not supported for x86 guests", ++ object_get_typename(OBJECT(ms->cgs))); ++ exit(1); ++ } ++ kvm_type = x86_confidential_guest_kvm_type( ++ X86_CONFIDENTIAL_GUEST(ms->cgs)); ++ } ++ ++ if (!kvm_is_vm_type_supported(kvm_type)) { ++ error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]); ++ exit(1); ++ } ++ ++ return kvm_type; ++} ++ + bool kvm_has_smm(void) + { + return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index 30fedcffea..6b44844d95 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -37,6 +37,7 @@ bool kvm_hv_vpindex_settable(void); + bool kvm_enable_sgx_provisioning(KVMState *s); + bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); + ++int kvm_get_vm_type(MachineState *ms); + void kvm_arch_reset_vcpu(X86CPU *cs); + void kvm_arch_after_reset_vcpu(X86CPU *cpu); + void kvm_arch_do_init_vcpu(X86CPU *cs); +@@ -49,6 +50,7 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); + + #ifdef CONFIG_KVM + ++bool kvm_is_vm_type_supported(int type); + bool kvm_has_adjust_clock_stable(void); + bool kvm_has_exception_payload(void); + void kvm_synchronize_all_tsc(void); +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch b/SOURCES/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch new file mode 100644 index 0000000..9844da7 --- /dev/null +++ b/SOURCES/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch @@ -0,0 +1,68 @@ +From fe60f8d47b6e14f17dd6c06b03bd00e6bcdbeefb Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 20 Mar 2024 17:31:38 +0800 +Subject: [PATCH 005/100] target/i386: Introduce Icelake-Server-v7 to enable + TSX + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [5/91] 66d865899e0d510b6c86763422d6b28b904b208a (bonzini/rhel-qemu-kvm) + +When start L2 guest with both L1/L2 using Icelake-Server-v3 or above, +QEMU reports below warning: + +"warning: host doesn't support requested feature: MSR(10AH).taa-no [bit 8]" + +Reason is QEMU Icelake-Server-v3 has TSX feature disabled but enables taa-no +bit. It's meaningless that TSX isn't supported but still claim TSX is secure. +So L1 KVM doesn't expose taa-no to L2 if TSX is unsupported, then starting L2 +triggers the warning. + +Fix it by introducing a new version Icelake-Server-v7 which has both TSX +and taa-no features. Then guest can use TSX securely when it see taa-no. + +This matches the production Icelake which supports TSX and isn't susceptible +to TSX Async Abort (TAA) vulnerabilities, a.k.a, taa-no. + +Ideally, TSX should have being enabled together with taa-no since v3, but for +compatibility, we'd better to add v7 to enable it. + +Fixes: d965dc35592d ("target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model") +Tested-by: Xiangfei Ma +Signed-off-by: Zhenzhong Duan +Message-ID: <20240320093138.80267-2-zhenzhong.duan@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c895fa54e3060c5ac6f3888dce96c9b78626072b) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index a7f71422ea..0aa88d9b48 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3840,6 +3840,16 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 7, ++ .note = "TSX, taa-no", ++ .props = (PropValue[]) { ++ /* Restore TSX features removed by -v2 above */ ++ { "hle", "on" }, ++ { "rtm", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch b/SOURCES/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch new file mode 100644 index 0000000..ace0367 --- /dev/null +++ b/SOURCES/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch @@ -0,0 +1,49 @@ +From 070dda07559a7488c62fc80a8c79e8baaee125eb Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 3 Jul 2024 10:37:23 +0200 +Subject: [PATCH 087/100] target/i386: SEV: fix formatting of CPUID mismatch + message + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [87/91] 36bc2cc80d5ffc1ceeb1836540660ff45885a818 (bonzini/rhel-qemu-kvm) + +Fixes: 70943ad8e4d ("i386/sev: Add support for SNP CPUID validation", 2024-06-05) +Signed-off-by: Paolo Bonzini +(cherry picked from commit f45ef010e19fe86314bffd5d5c9d5d77f4ce8103) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c40562dce3..37de80adc7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -839,7 +839,7 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + size_t i; + + if (old->count != new->count) { +- error_report("SEV-SNP: CPUID validation failed due to count mismatch," ++ error_report("SEV-SNP: CPUID validation failed due to count mismatch, " + "provided: %d, expected: %d", old->count, new->count); + return; + } +@@ -851,8 +851,8 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + new_func = &new->entries[i]; + + if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { +- error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" +- "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" ++ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x, " ++ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x, " + "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", + old_func->eax_in, old_func->ecx_in, + old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch b/SOURCES/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch new file mode 100644 index 0000000..3030d59 --- /dev/null +++ b/SOURCES/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch @@ -0,0 +1,42 @@ +From 37b7e2185f1d23dd5f5a95b545b8d760492915ed Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 2 Aug 2024 01:43:37 +0200 +Subject: [PATCH 091/100] target/i386: SEV: fix mismatch in vcek-disabled + property name + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [91/91] 3a8abc4a0547b985cb79cef29bd3e8350d3d4b48 (bonzini/rhel-qemu-kvm) + +The vcek-disabled property of the sev-snp-guest object is misspelled +vcek-required (which I suppose would use the opposite polarity) in +the call to object_class_property_add_bool(). Fix it. + +Reported-by: Zixi Chen +Reviewed-by: Pankaj Gupta +Signed-off-by: Paolo Bonzini +(cherry picked from commit d4392415c328f83b2e30517a3561be523874f441) +--- + target/i386/sev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b921defb63..aed565dbe8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -2378,7 +2378,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + object_class_property_add_bool(oc, "author-key-enabled", + sev_snp_guest_get_author_key_enabled, + sev_snp_guest_set_author_key_enabled); +- object_class_property_add_bool(oc, "vcek-required", ++ object_class_property_add_bool(oc, "vcek-disabled", + sev_snp_guest_get_vcek_disabled, + sev_snp_guest_set_vcek_disabled); + object_class_property_add_str(oc, "host-data", +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch b/SOURCES/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch new file mode 100644 index 0000000..7c17e53 --- /dev/null +++ b/SOURCES/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch @@ -0,0 +1,146 @@ +From 6bb738fb90a3a1221ae35596b3d03a17e0b1c34d Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 19 Mar 2024 15:30:25 +0100 +Subject: [PATCH 023/100] target/i386: SEV: use KVM_SEV_INIT2 if possible + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [23/91] 9579d772ae5124a94c6b1e3a4566bf3470d2bc8f (bonzini/rhel-qemu-kvm) + +Implement support for the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM virtual +machine types, and the KVM_SEV_INIT2 function of KVM_MEMORY_ENCRYPT_OP. + +These replace the KVM_SEV_INIT and KVM_SEV_ES_INIT functions, and have +several advantages: + +- sharing the initialization sequence with SEV-SNP and TDX + +- allowing arguments including the set of desired VMSA features + +- protection against invalid use of KVM_GET/SET_* ioctls for guests + with encrypted state + +If the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM types are not supported, +fall back to KVM_SEV_INIT and KVM_SEV_ES_INIT (which use the +default x86 VM type). + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 663e2f443e5722370708ce2f4c27d94a2087d2d3) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 2 ++ + target/i386/sev.c | 41 +++++++++++++++++++++++++++++++++++++---- + 2 files changed, 39 insertions(+), 4 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 1f0ab12c2e..408568d053 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -164,6 +164,8 @@ static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); + + static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", ++ [KVM_X86_SEV_VM] = "SEV", ++ [KVM_X86_SEV_ES_VM] = "SEV-ES", + }; + + bool kvm_is_vm_type_supported(int type) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index ebe36d4c10..9dab4060b8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -26,6 +26,7 @@ + #include "qemu/error-report.h" + #include "crypto/hash.h" + #include "sysemu/kvm.h" ++#include "kvm/kvm_i386.h" + #include "sev.h" + #include "sysemu/sysemu.h" + #include "sysemu/runstate.h" +@@ -56,6 +57,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) + struct SevGuestState { + X86ConfidentialGuest parent_obj; + ++ int kvm_type; ++ + /* configuration parameters */ + char *sev_device; + uint32_t policy; +@@ -850,6 +853,26 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + ++static int sev_kvm_type(X86ConfidentialGuest *cg) ++{ ++ SevGuestState *sev = SEV_GUEST(cg); ++ int kvm_type; ++ ++ if (sev->kvm_type != -1) { ++ goto out; ++ } ++ ++ kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; ++ if (kvm_is_vm_type_supported(kvm_type)) { ++ sev->kvm_type = kvm_type; ++ } else { ++ sev->kvm_type = KVM_X86_DEFAULT_VM; ++ } ++ ++out: ++ return sev->kvm_type; ++} ++ + static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + SevGuestState *sev = SEV_GUEST(cgs); +@@ -929,13 +952,19 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + __func__); + goto err; + } +- cmd = KVM_SEV_ES_INIT; +- } else { +- cmd = KVM_SEV_INIT; + } + + trace_kvm_sev_init(); +- ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { ++ cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; ++ ++ ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ } else { ++ struct kvm_sev_init args = { 0 }; ++ ++ ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ } ++ + if (ret) { + error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +@@ -1327,8 +1356,10 @@ static void + sev_guest_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_kvm_init; ++ x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", + sev_guest_get_sev_device, +@@ -1357,6 +1388,8 @@ sev_guest_instance_init(Object *obj) + { + SevGuestState *sev = SEV_GUEST(obj); + ++ sev->kvm_type = -1; ++ + sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); + sev->policy = DEFAULT_GUEST_POLICY; + object_property_add_uint32_ptr(obj, "policy", &sev->policy, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-add-guest-phys-bits-cpu-property.patch b/SOURCES/kvm-target-i386-add-guest-phys-bits-cpu-property.patch new file mode 100644 index 0000000..cd41279 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-guest-phys-bits-cpu-property.patch @@ -0,0 +1,124 @@ +From 090c64ea622534ff2ae6c9b66cdf0b1ddb58bf26 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 18 Mar 2024 16:53:36 +0100 +Subject: [PATCH 002/100] target/i386: add guest-phys-bits cpu property + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [2/91] 6603e842012dc484e1f571ea0a77b59095f37003 (bonzini/rhel-qemu-kvm) + +Allows to set guest-phys-bits (cpuid leaf 80000008, eax[23:16]) +via -cpu $model,guest-phys-bits=$nr. + +Signed-off-by: Gerd Hoffmann +Message-ID: <20240318155336.156197-3-kraxel@redhat.com> +Reviewed-by: Zhao Liu +Signed-off-by: Paolo Bonzini +(cherry picked from commit 513ba32dccc659c80722b3a43233b26eaa50309a) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 2 ++ + target/i386/cpu.c | 22 ++++++++++++++++++++++ + target/i386/cpu.h | 8 ++++++++ + 3 files changed, 32 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 648762d908..b9fde3cec1 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -349,6 +349,8 @@ GlobalProperty pc_rhel_compat[] = { + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + + GlobalProperty pc_rhel_9_5_compat[] = { ++ /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ ++ { TYPE_X86_CPU, "guest-phys-bits", "0" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index be7b0663cd..a7f71422ea 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6591,6 +6591,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { + /* 64 bit processor */ + *eax |= (cpu_x86_virtual_addr_width(env) << 8); ++ *eax |= (cpu->guest_phys_bits << 16); + } + *ebx = env->features[FEAT_8000_0008_EBX]; + if (cs->nr_cores * cs->nr_threads > 1) { +@@ -7350,6 +7351,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + goto out; + } + ++ if (cpu->guest_phys_bits == -1) { ++ /* ++ * If it was not set by the user, or by the accelerator via ++ * cpu_exec_realizefn, clear. ++ */ ++ cpu->guest_phys_bits = 0; ++ } ++ + if (cpu->ucode_rev == 0) { + /* + * The default is the same as KVM's. Note that this check +@@ -7400,6 +7409,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + if (cpu->phys_bits == 0) { + cpu->phys_bits = TCG_PHYS_ADDR_BITS; + } ++ if (cpu->guest_phys_bits && ++ (cpu->guest_phys_bits > cpu->phys_bits || ++ cpu->guest_phys_bits < 32)) { ++ error_setg(errp, "guest-phys-bits should be between 32 and %u " ++ " (but is %u)", ++ cpu->phys_bits, cpu->guest_phys_bits); ++ return; ++ } + } else { + /* For 32 bit systems don't use the user set value, but keep + * phys_bits consistent with what we tell the guest. +@@ -7408,6 +7425,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + error_setg(errp, "phys-bits is not user-configurable in 32 bit"); + return; + } ++ if (cpu->guest_phys_bits != 0) { ++ error_setg(errp, "guest-phys-bits is not user-configurable in 32 bit"); ++ return; ++ } + + if (env->features[FEAT_1_EDX] & (CPUID_PSE36 | CPUID_PAE)) { + cpu->phys_bits = 36; +@@ -7908,6 +7929,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_BOOL("x-force-features", X86CPU, force_features, false), + DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), + DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), ++ DEFINE_PROP_UINT32("guest-phys-bits", X86CPU, guest_phys_bits, -1), + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), + DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 6b05738079..6112e27bfd 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -2027,6 +2027,14 @@ struct ArchCPU { + /* Number of physical address bits supported */ + uint32_t phys_bits; + ++ /* ++ * Number of guest physical address bits available. Usually this is ++ * identical to host physical address bits. With NPT or EPT 4-level ++ * paging, guest physical address space might be restricted to 48 bits ++ * even if the host cpu supports more physical address bits. ++ */ ++ uint32_t guest_phys_bits; ++ + /* in order to simplify APIC support, we leave this pointer to the + user */ + struct DeviceState *apic_state; +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-introduce-x86-confidential-guest.patch b/SOURCES/kvm-target-i386-introduce-x86-confidential-guest.patch new file mode 100644 index 0000000..dec3220 --- /dev/null +++ b/SOURCES/kvm-target-i386-introduce-x86-confidential-guest.patch @@ -0,0 +1,161 @@ +From 0573fcd1775b6613127b1906d59d02e65f7519f3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 11:07:43 -0400 +Subject: [PATCH 021/100] target/i386: introduce x86-confidential-guest + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [21/91] e86d3bcde7e1c2fa1ba8c9bc83e02033644f1ac0 (bonzini/rhel-qemu-kvm) + +Introduce a common superclass for x86 confidential guest implementations. +It will extend ConfidentialGuestSupportClass with a method that provides +the VM type to be passed to KVM_CREATE_VM. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit d82e9c843d662f13821026618aba936eda31a6c0) +Signed-off-by: Paolo Bonzini +--- + target/i386/confidential-guest.c | 33 ++++++++++++++++++++++++++ + target/i386/confidential-guest.h | 40 ++++++++++++++++++++++++++++++++ + target/i386/meson.build | 2 +- + target/i386/sev.c | 6 ++--- + 4 files changed, 77 insertions(+), 4 deletions(-) + create mode 100644 target/i386/confidential-guest.c + create mode 100644 target/i386/confidential-guest.h + +diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c +new file mode 100644 +index 0000000000..b3727845ad +--- /dev/null ++++ b/target/i386/confidential-guest.c +@@ -0,0 +1,33 @@ ++/* ++ * QEMU Confidential Guest support ++ * ++ * Copyright (C) 2024 Red Hat, Inc. ++ * ++ * Authors: ++ * Paolo Bonzini ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or ++ * later. See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "confidential-guest.h" ++ ++OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest, ++ x86_confidential_guest, ++ X86_CONFIDENTIAL_GUEST, ++ CONFIDENTIAL_GUEST_SUPPORT) ++ ++static void x86_confidential_guest_class_init(ObjectClass *oc, void *data) ++{ ++} ++ ++static void x86_confidential_guest_init(Object *obj) ++{ ++} ++ ++static void x86_confidential_guest_finalize(Object *obj) ++{ ++} +diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h +new file mode 100644 +index 0000000000..ca12d5a8fb +--- /dev/null ++++ b/target/i386/confidential-guest.h +@@ -0,0 +1,40 @@ ++/* ++ * x86-specific confidential guest methods. ++ * ++ * Copyright (c) 2024 Red Hat Inc. ++ * ++ * Authors: ++ * Paolo Bonzini ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef TARGET_I386_CG_H ++#define TARGET_I386_CG_H ++ ++#include "qom/object.h" ++ ++#include "exec/confidential-guest-support.h" ++ ++#define TYPE_X86_CONFIDENTIAL_GUEST "x86-confidential-guest" ++ ++OBJECT_DECLARE_TYPE(X86ConfidentialGuest, ++ X86ConfidentialGuestClass, ++ X86_CONFIDENTIAL_GUEST) ++ ++struct X86ConfidentialGuest { ++ /* */ ++ ConfidentialGuestSupport parent_obj; ++}; ++ ++/** ++ * X86ConfidentialGuestClass: ++ * ++ * Class to be implemented by confidential-guest-support concrete objects ++ * for the x86 target. ++ */ ++struct X86ConfidentialGuestClass { ++ /* */ ++ ConfidentialGuestSupportClass parent; ++}; ++#endif +diff --git a/target/i386/meson.build b/target/i386/meson.build +index 7c74bfa859..8abce725f8 100644 +--- a/target/i386/meson.build ++++ b/target/i386/meson.build +@@ -6,7 +6,7 @@ i386_ss.add(files( + 'xsave_helper.c', + 'cpu-dump.c', + )) +-i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c')) ++i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest.c')) + + # x86 cpu type + i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c49a8fd55e..ebe36d4c10 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -35,7 +35,7 @@ + #include "monitor/monitor.h" + #include "monitor/hmp-target.h" + #include "qapi/qapi-commands-misc-target.h" +-#include "exec/confidential-guest-support.h" ++#include "confidential-guest.h" + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" + +@@ -54,7 +54,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) + * -machine ...,memory-encryption=sev0 + */ + struct SevGuestState { +- ConfidentialGuestSupport parent_obj; ++ X86ConfidentialGuest parent_obj; + + /* configuration parameters */ + char *sev_device; +@@ -1372,7 +1372,7 @@ sev_guest_instance_init(Object *obj) + + /* sev guest info */ + static const TypeInfo sev_guest_info = { +- .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, ++ .parent = TYPE_X86_CONFIDENTIAL_GUEST, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), + .instance_finalize = sev_guest_finalize, +-- +2.39.3 + diff --git a/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch b/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch deleted file mode 100644 index a2d712f..0000000 --- a/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch +++ /dev/null @@ -1,205 +0,0 @@ -From cc8d794932e26df7c7f3c8cc0c1f42da8d52f12b Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 15 Jan 2024 10:26:52 +0100 -Subject: [PATCH 069/101] target/s390x/kvm/pv: Provide some more useful - information if decryption fails -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 213: s390x: Provide some more useful information if decryption of a PV image fails -RH-Jira: RHEL-18212 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck -RH-Commit: [1/1] 4ffb61869f7df33e23d3e0ebf8c29e386e3f6cbc (thuth/qemu-kvm-cs9) - -JIRA: https://issues.redhat.com/browse/RHEL-18212 - -commit 7af51621b16ae86646cc2dc9dee30de8176ff761 -Author: Thomas Huth -Date: Wed Jan 10 15:29:16 2024 +0100 - - target/s390x/kvm/pv: Provide some more useful information if decryption fails - - It's a common scenario to copy guest images from one host to another - to run the guest on the other machine. This (of course) does not work - with "secure execution" guests since they are encrypted with one certain - host key. However, if you still (accidentally) do it, you only get a - very user-unfriendly error message that looks like this: - - qemu-system-s390x: KVM PV command 2 (KVM_PV_SET_SEC_PARMS) failed: - header rc 108 rrc 5 IOCTL rc: -22 - - Let's provide at least a somewhat nicer hint to the users so that they - are able to figure out what might have gone wrong. - - Buglink: https://issues.redhat.com/browse/RHEL-18212 - Message-ID: <20240110142916.850605-1-thuth@redhat.com> - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Reviewed-by: Claudio Imbrenda - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - hw/s390x/ipl.c | 5 ++--- - hw/s390x/ipl.h | 2 +- - hw/s390x/s390-virtio-ccw.c | 5 ++++- - target/s390x/kvm/pv.c | 25 ++++++++++++++++++++----- - target/s390x/kvm/pv.h | 5 +++-- - 5 files changed, 30 insertions(+), 12 deletions(-) - -diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c -index 515dcf51b5..b23a6a0ef3 100644 ---- a/hw/s390x/ipl.c -+++ b/hw/s390x/ipl.c -@@ -703,7 +703,7 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu) - cpu_physical_memory_unmap(addr, len, 1, len); - } - --int s390_ipl_prepare_pv_header(void) -+int s390_ipl_prepare_pv_header(Error **errp) - { - IplParameterBlock *ipib = s390_ipl_get_iplb_pv(); - IPLBlockPV *ipib_pv = &ipib->pv; -@@ -712,8 +712,7 @@ int s390_ipl_prepare_pv_header(void) - - cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr, - ipib_pv->pv_header_len); -- rc = s390_pv_set_sec_parms((uintptr_t)hdr, -- ipib_pv->pv_header_len); -+ rc = s390_pv_set_sec_parms((uintptr_t)hdr, ipib_pv->pv_header_len, errp); - g_free(hdr); - return rc; - } -diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h -index 7fc86e7905..57cd125769 100644 ---- a/hw/s390x/ipl.h -+++ b/hw/s390x/ipl.h -@@ -107,7 +107,7 @@ typedef union IplParameterBlock IplParameterBlock; - - int s390_ipl_set_loadparm(uint8_t *loadparm); - void s390_ipl_update_diag308(IplParameterBlock *iplb); --int s390_ipl_prepare_pv_header(void); -+int s390_ipl_prepare_pv_header(Error **errp); - int s390_ipl_pv_unpack(void); - void s390_ipl_prepare_cpu(S390CPU *cpu); - IplParameterBlock *s390_ipl_get_iplb(void); -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 984891b82a..e26ce26f5a 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -391,7 +391,7 @@ static int s390_machine_protect(S390CcwMachineState *ms) - } - - /* Set SE header and unpack */ -- rc = s390_ipl_prepare_pv_header(); -+ rc = s390_ipl_prepare_pv_header(&local_err); - if (rc) { - goto out_err; - } -@@ -410,6 +410,9 @@ static int s390_machine_protect(S390CcwMachineState *ms) - return rc; - - out_err: -+ if (local_err) { -+ error_report_err(local_err); -+ } - s390_machine_unprotect(ms); - return rc; - } -diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c -index 6a69be7e5c..7ca7faec73 100644 ---- a/target/s390x/kvm/pv.c -+++ b/target/s390x/kvm/pv.c -@@ -29,7 +29,8 @@ static bool info_valid; - static struct kvm_s390_pv_info_vm info_vm; - static struct kvm_s390_pv_info_dump info_dump; - --static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) -+static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data, -+ int *pvrc) - { - struct kvm_pv_cmd pv_cmd = { - .cmd = cmd, -@@ -46,6 +47,9 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) - "IOCTL rc: %d", cmd, cmdname, pv_cmd.rc, pv_cmd.rrc, - rc); - } -+ if (pvrc) { -+ *pvrc = pv_cmd.rc; -+ } - return rc; - } - -@@ -53,12 +57,13 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) - * This macro lets us pass the command as a string to the function so - * we can print it on an error. - */ --#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data) -+#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data, NULL) -+#define s390_pv_cmd_pvrc(cmd, data, pvrc) __s390_pv_cmd(cmd, #cmd, data, pvrc) - #define s390_pv_cmd_exit(cmd, data) \ - { \ - int rc; \ - \ -- rc = __s390_pv_cmd(cmd, #cmd, data);\ -+ rc = __s390_pv_cmd(cmd, #cmd, data, NULL); \ - if (rc) { \ - exit(1); \ - } \ -@@ -142,14 +147,24 @@ bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) - return true; - } - --int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) -+int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp) - { -+ int ret, pvrc; - struct kvm_s390_pv_sec_parm args = { - .origin = origin, - .length = length, - }; - -- return s390_pv_cmd(KVM_PV_SET_SEC_PARMS, &args); -+ ret = s390_pv_cmd_pvrc(KVM_PV_SET_SEC_PARMS, &args, &pvrc); -+ if (ret) { -+ error_setg(errp, "Failed to set secure execution parameters"); -+ if (pvrc == 0x108) { -+ error_append_hint(errp, "Please check whether the image is " -+ "correctly encrypted for this host\n"); -+ } -+ } -+ -+ return ret; - } - - /* -diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h -index 7b935e2246..5877d28ff1 100644 ---- a/target/s390x/kvm/pv.h -+++ b/target/s390x/kvm/pv.h -@@ -42,7 +42,7 @@ int s390_pv_query_info(void); - int s390_pv_vm_enable(void); - void s390_pv_vm_disable(void); - bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); --int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); -+int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp); - int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); - void s390_pv_prep_reset(void); - int s390_pv_verify(void); -@@ -62,7 +62,8 @@ static inline int s390_pv_query_info(void) { return 0; } - static inline int s390_pv_vm_enable(void) { return 0; } - static inline void s390_pv_vm_disable(void) {} - static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } --static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } -+static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, -+ Error **errp) { return 0; } - static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } - static inline void s390_pv_prep_reset(void) {} - static inline int s390_pv_verify(void) { return 0; } --- -2.39.3 - diff --git a/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch b/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch deleted file mode 100644 index 9b3eefb..0000000 --- a/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 420bf75353286324822c3bbca3b52a7a56ed668c Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:20:00 -0500 -Subject: [PATCH 083/101] tests: remove aio_context_acquire() tests - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [14/26] f6421037c1523bc957f3be0f4ad05571ae012dba (kmwolf/centos-qemu-kvm) - -The aio_context_acquire() API is being removed. Drop the test case that -calls the API. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Message-ID: <20231205182011.1976568-4-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - tests/unit/test-aio.c | 67 +------------------------------------------ - 1 file changed, 1 insertion(+), 66 deletions(-) - -diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c -index 337b6e4ea7..e77d86be87 100644 ---- a/tests/unit/test-aio.c -+++ b/tests/unit/test-aio.c -@@ -100,76 +100,12 @@ static void event_ready_cb(EventNotifier *e) - - /* Tests using aio_*. */ - --typedef struct { -- QemuMutex start_lock; -- EventNotifier notifier; -- bool thread_acquired; --} AcquireTestData; -- --static void *test_acquire_thread(void *opaque) --{ -- AcquireTestData *data = opaque; -- -- /* Wait for other thread to let us start */ -- qemu_mutex_lock(&data->start_lock); -- qemu_mutex_unlock(&data->start_lock); -- -- /* event_notifier_set might be called either before or after -- * the main thread's call to poll(). The test case's outcome -- * should be the same in either case. -- */ -- event_notifier_set(&data->notifier); -- aio_context_acquire(ctx); -- aio_context_release(ctx); -- -- data->thread_acquired = true; /* success, we got here */ -- -- return NULL; --} -- - static void set_event_notifier(AioContext *nctx, EventNotifier *notifier, - EventNotifierHandler *handler) - { - aio_set_event_notifier(nctx, notifier, handler, NULL, NULL); - } - --static void dummy_notifier_read(EventNotifier *n) --{ -- event_notifier_test_and_clear(n); --} -- --static void test_acquire(void) --{ -- QemuThread thread; -- AcquireTestData data; -- -- /* Dummy event notifier ensures aio_poll() will block */ -- event_notifier_init(&data.notifier, false); -- set_event_notifier(ctx, &data.notifier, dummy_notifier_read); -- g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */ -- -- qemu_mutex_init(&data.start_lock); -- qemu_mutex_lock(&data.start_lock); -- data.thread_acquired = false; -- -- qemu_thread_create(&thread, "test_acquire_thread", -- test_acquire_thread, -- &data, QEMU_THREAD_JOINABLE); -- -- /* Block in aio_poll(), let other thread kick us and acquire context */ -- aio_context_acquire(ctx); -- qemu_mutex_unlock(&data.start_lock); /* let the thread run */ -- g_assert(aio_poll(ctx, true)); -- g_assert(!data.thread_acquired); -- aio_context_release(ctx); -- -- qemu_thread_join(&thread); -- set_event_notifier(ctx, &data.notifier, NULL); -- event_notifier_cleanup(&data.notifier); -- -- g_assert(data.thread_acquired); --} -- - static void test_bh_schedule(void) - { - BHTestData data = { .n = 0 }; -@@ -879,7 +815,7 @@ static void test_worker_thread_co_enter(void) - qemu_thread_get_self(&this_thread); - co = qemu_coroutine_create(co_check_current_thread, &this_thread); - -- qemu_thread_create(&worker_thread, "test_acquire_thread", -+ qemu_thread_create(&worker_thread, "test_aio_co_enter", - test_aio_co_enter, - co, QEMU_THREAD_JOINABLE); - -@@ -899,7 +835,6 @@ int main(int argc, char **argv) - while (g_main_context_iteration(NULL, false)); - - g_test_init(&argc, &argv, NULL); -- g_test_add_func("/aio/acquire", test_acquire); - g_test_add_func("/aio/bh/schedule", test_bh_schedule); - g_test_add_func("/aio/bh/schedule10", test_bh_schedule10); - g_test_add_func("/aio/bh/cancel", test_bh_cancel); --- -2.39.3 - diff --git a/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch b/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch deleted file mode 100644 index 0afdea2..0000000 --- a/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch +++ /dev/null @@ -1,46 +0,0 @@ -From bbe64d706b3cb8b10ecd22bd71cf76b21eea257f Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 25 Jan 2024 17:58:03 +0100 -Subject: [PATCH 20/22] tests/unit: Bump test-replication timeout to 60 seconds - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [16/17] 200768aedee44d10aa8d199b92a9c17a9002fc3f (stefanha/centos-stream-qemu-kvm) - -We're seeing timeouts for this test on CI runs (specifically for -ubuntu-20.04-s390x-all). It doesn't fail consistently, but even the -successful runs take about 27 or 28 seconds, which is not very far from -the 30 seconds timeout. - -Bump the timeout a bit to make failure less likely even on this CI host. - -Signed-off-by: Kevin Wolf -Message-ID: <20240125165803.48373-1-kwolf@redhat.com> -Reviewed-by: Thomas Huth -Signed-off-by: Kevin Wolf -(cherry picked from commit 63b18312d14ac984acaf13c7c55d9baa2d61496e) -Signed-off-by: Stefan Hajnoczi ---- - tests/unit/meson.build | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/tests/unit/meson.build b/tests/unit/meson.build -index a05d471090..28db6adea8 100644 ---- a/tests/unit/meson.build -+++ b/tests/unit/meson.build -@@ -173,7 +173,8 @@ test_env.set('G_TEST_BUILDDIR', meson.current_build_dir()) - - slow_tests = { - 'test-crypto-tlscredsx509': 45, -- 'test-crypto-tlssession': 45 -+ 'test-crypto-tlssession': 45, -+ 'test-replication': 60, - } - - foreach test_name, extra: tests --- -2.39.3 - diff --git a/SOURCES/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch b/SOURCES/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch new file mode 100644 index 0000000..d21d298 --- /dev/null +++ b/SOURCES/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch @@ -0,0 +1,59 @@ +From b02dc1e5c0f01228053e784f9ec7ac3a47e91d7c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:36:25 -0500 +Subject: [PATCH 026/100] trace/kvm: Split address space and slot id in + trace_kvm_set_user_memory() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [26/91] 640511c4ab0ba76bb4483f6c3fb73e060d914f0a (bonzini/rhel-qemu-kvm) + +The upper 16 bits of kvm_userspace_memory_region::slot are +address space id. Parse it separately in trace_kvm_set_user_memory(). + +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-5-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 72853afc638b3e28779c86dd05da2f3bb149fe2c) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 5 +++-- + accel/kvm/trace-events | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index b51e09a583..9bd235c969 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -303,8 +303,9 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + slot->old_flags = mem.flags; + err: +- trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr, +- mem.memory_size, mem.userspace_addr, ret); ++ trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, ++ mem.guest_phys_addr, mem.memory_size, ++ mem.userspace_addr, ret); + if (ret < 0) { + error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index a25902597b..9f599abc17 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" + kvm_irqchip_release_virq(int virq) "virq %d" + kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" + kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" +-kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" ++kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" + kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 + kvm_resample_fd_notify(int gsi) "gsi %d" + kvm_dirty_ring_full(int id) "vcpu %d" +-- +2.39.3 + diff --git a/SOURCES/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch b/SOURCES/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch new file mode 100644 index 0000000..f141bf1 --- /dev/null +++ b/SOURCES/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch @@ -0,0 +1,62 @@ +From e185104a10a37174d13d981fa1febafbb7e651aa Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 13:49:49 +0200 +Subject: [PATCH 050/100] update-linux-headers: fix forwarding to asm-generic + headers + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [50/91] 3c98a7fe790d943bb5ff8dca1da83f5944ec3e2e (bonzini/rhel-qemu-kvm) + +Afer commit 3efc75ad9d9 ("scripts/update-linux-headers.sh: Remove +temporary directory inbetween", 2024-05-29), updating linux-headers/ +results in errors such as + + cp: cannot stat '/tmp/tmp.1A1Eejh1UE/headers/include/asm/bitsperlong.h': No such file or directory + +because Loongarch does not have an asm/bitsperlong.h file and uses the +generic version. Before commit 3efc75ad9d9, the missing file would +incorrectly cause stale files to be included in linux-headers/. The files +were never committed to qemu.git, but were wrong nevertheless. The build +would just use the system version of the files, which is opposite to +the idea of importing Linux header files into QEMU's tree. + +Create forwarding headers, resembling the ones that are generated during a +kernel build by scripts/Makefile.asm-generic, if a file is only installed +under include/asm-generic/. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit ef7c70f020ca1fe9e7c98ea2cd9d6ba3c5714716) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index f084bee72e..78c0f2c43e 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -119,7 +119,14 @@ for arch in $ARCHLIST; do + rm -rf "$output/linux-headers/asm-$arch" + mkdir -p "$output/linux-headers/asm-$arch" + for header in kvm.h unistd.h bitsperlong.h mman.h; do +- cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ if test -f "$hdrdir/include/asm/$header"; then ++ cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ elif test -f "$hdrdir/include/asm-generic/$header"; then ++ # not installed as , but used as such in kernel sources ++ cat <$output/linux-headers/asm-$arch/$header ++#include ++EOF ++ fi + done + + if [ $arch = mips ]; then +-- +2.39.3 + diff --git a/SOURCES/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch b/SOURCES/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch new file mode 100644 index 0000000..a75a2aa --- /dev/null +++ b/SOURCES/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch @@ -0,0 +1,175 @@ +From 8d6c37ddc253f63202cc9519670c258e9d81b98e Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 14:25:06 +0200 +Subject: [PATCH 053/100] update-linux-headers: import linux/kvm_para.h header + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [53/91] 27d4db0ecec7d0b8adeba1ec85fca32eacee1009 (bonzini/rhel-qemu-kvm) + +Right now QEMU is importing arch/x86/include/uapi/asm/kvm_para.h +because it includes definitions for kvmclock and for KVM CPUID +bits. However, other definitions for KVM hypercall values and return +codes are included in include/uapi/linux/kvm_para.h and they will be +used by SEV-SNP. + +To ensure that it is possible to include both and +"standard-headers/asm-x86/kvm_para.h" without conflicts, provide +linux/kvm_para.h as a portable header too, and forward linux-headers/ +files to those in include/standard-headers. Note that +will include architecture-specific definitions as well, but +"standard-headers/linux/kvm_para.h" will not because it can be used in +architecture-independent files. + +This could easily be extended to other architectures, but right now +they do not need any symbol in their specific kvm_para.h files. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit aa274c33c39e7de981dc195abe60e1a246c9d248) +Signed-off-by: Paolo Bonzini +--- + include/standard-headers/linux/kvm_para.h | 38 +++++++++++++++++++++++ + linux-headers/asm-x86/kvm_para.h | 1 + + linux-headers/linux/kvm_para.h | 2 ++ + scripts/update-linux-headers.sh | 22 ++++++++++++- + 4 files changed, 62 insertions(+), 1 deletion(-) + create mode 100644 include/standard-headers/linux/kvm_para.h + create mode 100644 linux-headers/asm-x86/kvm_para.h + create mode 100644 linux-headers/linux/kvm_para.h + +diff --git a/include/standard-headers/linux/kvm_para.h b/include/standard-headers/linux/kvm_para.h +new file mode 100644 +index 0000000000..015c166302 +--- /dev/null ++++ b/include/standard-headers/linux/kvm_para.h +@@ -0,0 +1,38 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef __LINUX_KVM_PARA_H ++#define __LINUX_KVM_PARA_H ++ ++/* ++ * This header file provides a method for making a hypercall to the host ++ * Architectures should define: ++ * - kvm_hypercall0, kvm_hypercall1... ++ * - kvm_arch_para_features ++ * - kvm_para_available ++ */ ++ ++/* Return values for hypercalls */ ++#define KVM_ENOSYS 1000 ++#define KVM_EFAULT EFAULT ++#define KVM_EINVAL EINVAL ++#define KVM_E2BIG E2BIG ++#define KVM_EPERM EPERM ++#define KVM_EOPNOTSUPP 95 ++ ++#define KVM_HC_VAPIC_POLL_IRQ 1 ++#define KVM_HC_MMU_OP 2 ++#define KVM_HC_FEATURES 3 ++#define KVM_HC_PPC_MAP_MAGIC_PAGE 4 ++#define KVM_HC_KICK_CPU 5 ++#define KVM_HC_MIPS_GET_CLOCK_FREQ 6 ++#define KVM_HC_MIPS_EXIT_VM 7 ++#define KVM_HC_MIPS_CONSOLE_OUTPUT 8 ++#define KVM_HC_CLOCK_PAIRING 9 ++#define KVM_HC_SEND_IPI 10 ++#define KVM_HC_SCHED_YIELD 11 ++#define KVM_HC_MAP_GPA_RANGE 12 ++ ++/* ++ * hypercalls use architecture specific ++ */ ++ ++#endif /* __LINUX_KVM_PARA_H */ +diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h +new file mode 100644 +index 0000000000..1d3e0e0b07 +--- /dev/null ++++ b/linux-headers/asm-x86/kvm_para.h +@@ -0,0 +1 @@ ++#include "standard-headers/asm-x86/kvm_para.h" +diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h +new file mode 100644 +index 0000000000..6a1e672259 +--- /dev/null ++++ b/linux-headers/linux/kvm_para.h +@@ -0,0 +1,2 @@ ++#include "standard-headers/linux/kvm_para.h" ++#include +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 90759dcfe0..64d1989961 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -64,6 +64,7 @@ cp_portable() { + -e 'linux/kernel' \ + -e 'linux/sysinfo' \ + -e 'asm/setup_data.h' \ ++ -e 'asm/kvm_para.h' \ + > /dev/null + then + echo "Unexpected #include in input file $f". +@@ -71,6 +72,15 @@ cp_portable() { + fi + + header=$(basename "$f"); ++ ++ if test -z "$arch"; then ++ # Let users of include/standard-headers/linux/ headers pick the ++ # asm-* header that they care about ++ arch_cmd='/]*\)>/d' ++ else ++ arch_cmd='s/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' ++ fi ++ + sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \ + -e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/u\([0-9][0-9]*\)/uint\1_t/g' \ +@@ -79,7 +89,7 @@ cp_portable() { + -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ + -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ +- -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ ++ -e "$arch_cmd" \ + -e 's/__bitwise//' \ + -e 's/__attribute__((packed))/QEMU_PACKED/' \ + -e 's/__inline__/inline/' \ +@@ -159,7 +169,12 @@ EOF + cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" ++ + cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" ++ cat <$output/linux-headers/asm-$arch/kvm_para.h ++#include "standard-headers/asm-$arch/kvm_para.h" ++EOF ++ + # Remove everything except the macros from bootparam.h avoiding the + # unnecessary import of several video/ist/etc headers + sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ +@@ -209,6 +224,10 @@ if [ -d "$linux/LICENSES" ]; then + done + fi + ++cat <$output/linux-headers/linux/kvm_para.h ++#include "standard-headers/linux/kvm_para.h" ++#include ++EOF + cat <$output/linux-headers/linux/virtio_config.h + #include "standard-headers/linux/virtio_config.h" + EOF +@@ -231,6 +250,7 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ + "$hdrdir/include/linux/ethtool.h" \ + "$hdrdir/include/linux/const.h" \ + "$hdrdir/include/linux/kernel.h" \ ++ "$hdrdir/include/linux/kvm_para.h" \ + "$hdrdir/include/linux/vhost_types.h" \ + "$hdrdir/include/linux/sysinfo.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" +-- +2.39.3 + diff --git a/SOURCES/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch b/SOURCES/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch new file mode 100644 index 0000000..cb0a4d4 --- /dev/null +++ b/SOURCES/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch @@ -0,0 +1,95 @@ +From 00e250d9df1949d363758a34e3f46d8c71be054f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 14:16:55 +0200 +Subject: [PATCH 051/100] update-linux-headers: move pvpanic.h to correct + directory + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [51/91] 10efff5bbcb867ba34f3f9ff8045381ea96f94c7 (bonzini/rhel-qemu-kvm) + +Linux has , not . Use the same +directory for QEMU's include/standard-headers/ copy. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit b8116f4cbaa0f64bb07564f20b3b5219e23c8bff) +Signed-off-by: Paolo Bonzini +--- + hw/misc/pvpanic-isa.c | 2 +- + hw/misc/pvpanic-pci.c | 2 +- + hw/misc/pvpanic.c | 2 +- + include/standard-headers/{linux => misc}/pvpanic.h | 0 + scripts/update-linux-headers.sh | 6 ++++-- + 5 files changed, 7 insertions(+), 5 deletions(-) + rename include/standard-headers/{linux => misc}/pvpanic.h (100%) + +diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c +index ccec50f61b..b4f84c4110 100644 +--- a/hw/misc/pvpanic-isa.c ++++ b/hw/misc/pvpanic-isa.c +@@ -21,7 +21,7 @@ + #include "hw/misc/pvpanic.h" + #include "qom/object.h" + #include "hw/isa/isa.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + #include "hw/acpi/acpi_aml_interface.h" + + OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE) +diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c +index 83be95d0d2..4d44a881da 100644 +--- a/hw/misc/pvpanic-pci.c ++++ b/hw/misc/pvpanic-pci.c +@@ -21,7 +21,7 @@ + #include "hw/misc/pvpanic.h" + #include "qom/object.h" + #include "hw/pci/pci_device.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + + OBJECT_DECLARE_SIMPLE_TYPE(PVPanicPCIState, PVPANIC_PCI_DEVICE) + +diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c +index 1540e9091a..80289ecf5f 100644 +--- a/hw/misc/pvpanic.c ++++ b/hw/misc/pvpanic.c +@@ -21,7 +21,7 @@ + #include "hw/qdev-properties.h" + #include "hw/misc/pvpanic.h" + #include "qom/object.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + + static void handle_event(int event) + { +diff --git a/include/standard-headers/linux/pvpanic.h b/include/standard-headers/misc/pvpanic.h +similarity index 100% +rename from include/standard-headers/linux/pvpanic.h +rename to include/standard-headers/misc/pvpanic.h +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 78c0f2c43e..90759dcfe0 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -232,10 +232,12 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ + "$hdrdir/include/linux/const.h" \ + "$hdrdir/include/linux/kernel.h" \ + "$hdrdir/include/linux/vhost_types.h" \ +- "$hdrdir/include/linux/sysinfo.h" \ +- "$hdrdir/include/misc/pvpanic.h"; do ++ "$hdrdir/include/linux/sysinfo.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" + done ++mkdir -p "$output/include/standard-headers/misc" ++cp_portable "$hdrdir/include/misc/pvpanic.h" \ ++ "$output/include/standard-headers/misc" + mkdir -p "$output/include/standard-headers/drm" + cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ + "$output/include/standard-headers/drm" +-- +2.39.3 + diff --git a/SOURCES/kvm-util-char_dev-Add-open_cdev.patch b/SOURCES/kvm-util-char_dev-Add-open_cdev.patch deleted file mode 100644 index 1f1e870..0000000 --- a/SOURCES/kvm-util-char_dev-Add-open_cdev.patch +++ /dev/null @@ -1,175 +0,0 @@ -From de167878ec4ca159cc6def5134c91c5fe9b5ab96 Mon Sep 17 00:00:00 2001 -From: Yi Liu -Date: Tue, 21 Nov 2023 16:44:01 +0800 -Subject: [PATCH 022/101] util/char_dev: Add open_cdev() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [21/67] 72bf9ec3ccc9959626235bd270ec84caa4cee435 (eauger1/centos-qemu-kvm) - -/dev/vfio/devices/vfioX may not exist. In that case it is still possible -to open /dev/char/$major:$minor instead. Add helper function to abstract -the cdev open. - -Suggested-by: Jason Gunthorpe -Signed-off-by: Yi Liu -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit d6b5c4c1b516a8176b74ec35a0af8cf89b04b6c1) -Signed-off-by: Eric Auger ---- - MAINTAINERS | 2 + - include/qemu/chardev_open.h | 16 ++++++++ - util/chardev_open.c | 81 +++++++++++++++++++++++++++++++++++++ - util/meson.build | 1 + - 4 files changed, 100 insertions(+) - create mode 100644 include/qemu/chardev_open.h - create mode 100644 util/chardev_open.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index a5a446914a..ca70bb4e64 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -2174,6 +2174,8 @@ M: Zhenzhong Duan - S: Supported - F: backends/iommufd.c - F: include/sysemu/iommufd.h -+F: include/qemu/chardev_open.h -+F: util/chardev_open.c - - vhost - M: Michael S. Tsirkin -diff --git a/include/qemu/chardev_open.h b/include/qemu/chardev_open.h -new file mode 100644 -index 0000000000..64e8fcfdcb ---- /dev/null -+++ b/include/qemu/chardev_open.h -@@ -0,0 +1,16 @@ -+/* -+ * QEMU Chardev Helper -+ * -+ * Copyright (C) 2023 Intel Corporation. -+ * -+ * Authors: Yi Liu -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. See -+ * the COPYING file in the top-level directory. -+ */ -+ -+#ifndef QEMU_CHARDEV_OPEN_H -+#define QEMU_CHARDEV_OPEN_H -+ -+int open_cdev(const char *devpath, dev_t cdev); -+#endif -diff --git a/util/chardev_open.c b/util/chardev_open.c -new file mode 100644 -index 0000000000..f776429788 ---- /dev/null -+++ b/util/chardev_open.c -@@ -0,0 +1,81 @@ -+/* -+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved. -+ * Copyright (C) 2023 Intel Corporation. -+ * -+ * This software is available to you under a choice of one of two -+ * licenses. You may choose to be licensed under the terms of the GNU -+ * General Public License (GPL) Version 2, available from the file -+ * COPYING in the main directory of this source tree, or the -+ * OpenIB.org BSD license below: -+ * -+ * Redistribution and use in source and binary forms, with or -+ * without modification, are permitted provided that the following -+ * conditions are met: -+ * -+ * - Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * - Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials -+ * provided with the distribution. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ * -+ * Authors: Yi Liu -+ * -+ * Copied from -+ * https://github.com/linux-rdma/rdma-core/blob/master/util/open_cdev.c -+ * -+ */ -+ -+#include "qemu/osdep.h" -+#include "qemu/chardev_open.h" -+ -+static int open_cdev_internal(const char *path, dev_t cdev) -+{ -+ struct stat st; -+ int fd; -+ -+ fd = qemu_open_old(path, O_RDWR); -+ if (fd == -1) { -+ return -1; -+ } -+ if (fstat(fd, &st) || !S_ISCHR(st.st_mode) || -+ (cdev != 0 && st.st_rdev != cdev)) { -+ close(fd); -+ return -1; -+ } -+ return fd; -+} -+ -+static int open_cdev_robust(dev_t cdev) -+{ -+ g_autofree char *devpath = NULL; -+ -+ /* -+ * This assumes that udev is being used and is creating the /dev/char/ -+ * symlinks. -+ */ -+ devpath = g_strdup_printf("/dev/char/%u:%u", major(cdev), minor(cdev)); -+ return open_cdev_internal(devpath, cdev); -+} -+ -+int open_cdev(const char *devpath, dev_t cdev) -+{ -+ int fd; -+ -+ fd = open_cdev_internal(devpath, cdev); -+ if (fd == -1 && cdev != 0) { -+ return open_cdev_robust(cdev); -+ } -+ return fd; -+} -diff --git a/util/meson.build b/util/meson.build -index c2322ef6e7..174c133368 100644 ---- a/util/meson.build -+++ b/util/meson.build -@@ -108,6 +108,7 @@ if have_block - util_ss.add(files('filemonitor-stub.c')) - endif - util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c')) -+ util_ss.add(when: 'CONFIG_LINUX', if_true: files('chardev_open.c')) - endif - - if cpu == 'aarch64' --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch b/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch deleted file mode 100644 index 040288f..0000000 --- a/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch +++ /dev/null @@ -1,154 +0,0 @@ -From f554328f6f4702743af71befcb83c25c36e4fa4d Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:25 +0800 -Subject: [PATCH 046/101] vfio: Introduce a helper function to initialize - VFIODevice -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [45/67] 73225f394540bf5aeb70c0bdb89771f19a6d286d (eauger1/centos-qemu-kvm) - -Introduce a helper function to replace the common code to initialize -VFIODevice in pci, platform, ap and ccw VFIO device. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 6106a329141af7d47bdc3346ce9820d4714e0e5d) -Signed-off-by: Eric Auger ---- - hw/vfio/ap.c | 8 ++------ - hw/vfio/ccw.c | 8 ++------ - hw/vfio/helpers.c | 11 +++++++++++ - hw/vfio/pci.c | 6 ++---- - hw/vfio/platform.c | 6 ++---- - include/hw/vfio/vfio-common.h | 2 ++ - 6 files changed, 21 insertions(+), 20 deletions(-) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index 95fe7cd98b..e157aa1ff7 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -226,18 +226,14 @@ static void vfio_ap_instance_init(Object *obj) - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); - VFIODevice *vbasedev = &vapdev->vdev; - -- vbasedev->type = VFIO_DEVICE_TYPE_AP; -- vbasedev->ops = &vfio_ap_ops; -- vbasedev->dev = DEVICE(vapdev); -- vbasedev->fd = -1; -- - /* - * vfio-ap devices operate in a way compatible with discarding of - * memory in RAM blocks, as no pages are pinned in the host. - * This needs to be set before vfio_get_device() for vfio common to - * handle ram_block_discard_disable(). - */ -- vbasedev->ram_block_discard_allowed = true; -+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops, -+ DEVICE(vapdev), true); - } - - #ifdef CONFIG_IOMMUFD -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index 6305a4c1b8..90e4a53437 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -683,11 +683,6 @@ static void vfio_ccw_instance_init(Object *obj) - VFIOCCWDevice *vcdev = VFIO_CCW(obj); - VFIODevice *vbasedev = &vcdev->vdev; - -- vbasedev->type = VFIO_DEVICE_TYPE_CCW; -- vbasedev->ops = &vfio_ccw_ops; -- vbasedev->dev = DEVICE(vcdev); -- vbasedev->fd = -1; -- - /* - * All vfio-ccw devices are believed to operate in a way compatible with - * discarding of memory in RAM blocks, ie. pages pinned in the host are -@@ -696,7 +691,8 @@ static void vfio_ccw_instance_init(Object *obj) - * needs to be set before vfio_get_device() for vfio common to handle - * ram_block_discard_disable(). - */ -- vbasedev->ram_block_discard_allowed = true; -+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_CCW, &vfio_ccw_ops, -+ DEVICE(vcdev), true); - } - - #ifdef CONFIG_IOMMUFD -diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c -index 3592c3d54e..6789870802 100644 ---- a/hw/vfio/helpers.c -+++ b/hw/vfio/helpers.c -@@ -652,3 +652,14 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) - } - vbasedev->fd = fd; - } -+ -+void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, -+ DeviceState *dev, bool ram_discard) -+{ -+ vbasedev->type = type; -+ vbasedev->ops = ops; -+ vbasedev->dev = dev; -+ vbasedev->fd = -1; -+ -+ vbasedev->ram_block_discard_allowed = ram_discard; -+} -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 3f5900cc46..83c3238608 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3353,10 +3353,8 @@ static void vfio_instance_init(Object *obj) - vdev->host.slot = ~0U; - vdev->host.function = ~0U; - -- vbasedev->type = VFIO_DEVICE_TYPE_PCI; -- vbasedev->ops = &vfio_pci_ops; -- vbasedev->dev = DEVICE(vdev); -- vbasedev->fd = -1; -+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops, -+ DEVICE(vdev), false); - - vdev->nv_gpudirect_clique = 0xFF; - -diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c -index 506eb8193f..a8d9b7da63 100644 ---- a/hw/vfio/platform.c -+++ b/hw/vfio/platform.c -@@ -657,10 +657,8 @@ static void vfio_platform_instance_init(Object *obj) - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); - VFIODevice *vbasedev = &vdev->vbasedev; - -- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; -- vbasedev->ops = &vfio_platform_ops; -- vbasedev->dev = DEVICE(vdev); -- vbasedev->fd = -1; -+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops, -+ DEVICE(vdev), false); - } - - #ifdef CONFIG_IOMMUFD -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index efcba19f66..b8aa8a5495 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -257,4 +257,6 @@ int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, - /* Returns 0 on success, or a negative errno. */ - int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); - void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); -+void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, -+ DeviceState *dev, bool ram_discard); - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch b/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch deleted file mode 100644 index d41e8fb..0000000 --- a/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 7f392385d1b865904eae4b6681e3e7a87eb3af3d Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:27 +0800 -Subject: [PATCH 002/101] vfio: Introduce base object for VFIOContainer and - targeted interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [1/67] e63af50c2cb94f286b2d91f58c2d19dd862e019d (eauger1/centos-qemu-kvm) - -Introduce a dumb VFIOContainerBase object and its targeted interface. -This is willingly not a QOM object because we don't want it to be -visible from the user interface. The VFIOContainerBase will be -smoothly populated in subsequent patches as well as interfaces. - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit f61dddd73232e3d82d560d1e1bca120446021f2f) -Signed-off-by: Eric Auger ---- - include/hw/vfio/vfio-common.h | 8 ++--- - include/hw/vfio/vfio-container-base.h | 50 +++++++++++++++++++++++++++ - 2 files changed, 52 insertions(+), 6 deletions(-) - create mode 100644 include/hw/vfio/vfio-container-base.h - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index a4a22accb9..586d153c12 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -30,6 +30,7 @@ - #include - #endif - #include "sysemu/sysemu.h" -+#include "hw/vfio/vfio-container-base.h" - - #define VFIO_MSG_PREFIX "vfio %s: " - -@@ -81,6 +82,7 @@ typedef struct VFIOAddressSpace { - struct VFIOGroup; - - typedef struct VFIOContainer { -+ VFIOContainerBase bcontainer; - VFIOAddressSpace *space; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ - MemoryListener listener; -@@ -201,12 +203,6 @@ typedef struct VFIODisplay { - } dmabuf; - } VFIODisplay; - --typedef struct { -- unsigned long *bitmap; -- hwaddr size; -- hwaddr pages; --} VFIOBitmap; -- - VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); - void vfio_put_address_space(VFIOAddressSpace *space); - bool vfio_devices_all_running_and_saving(VFIOContainer *container); -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -new file mode 100644 -index 0000000000..1d6daaea5d ---- /dev/null -+++ b/include/hw/vfio/vfio-container-base.h -@@ -0,0 +1,50 @@ -+/* -+ * VFIO BASE CONTAINER -+ * -+ * Copyright (C) 2023 Intel Corporation. -+ * Copyright Red Hat, Inc. 2023 -+ * -+ * Authors: Yi Liu -+ * Eric Auger -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#ifndef HW_VFIO_VFIO_CONTAINER_BASE_H -+#define HW_VFIO_VFIO_CONTAINER_BASE_H -+ -+#include "exec/memory.h" -+ -+typedef struct VFIODevice VFIODevice; -+typedef struct VFIOIOMMUOps VFIOIOMMUOps; -+ -+typedef struct { -+ unsigned long *bitmap; -+ hwaddr size; -+ hwaddr pages; -+} VFIOBitmap; -+ -+/* -+ * This is the base object for vfio container backends -+ */ -+typedef struct VFIOContainerBase { -+ const VFIOIOMMUOps *ops; -+} VFIOContainerBase; -+ -+struct VFIOIOMMUOps { -+ /* basic feature */ -+ int (*dma_map)(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ void *vaddr, bool readonly); -+ int (*dma_unmap)(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb); -+ int (*attach_device)(const char *name, VFIODevice *vbasedev, -+ AddressSpace *as, Error **errp); -+ void (*detach_device)(VFIODevice *vbasedev); -+ /* migration feature */ -+ int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); -+ int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, -+ hwaddr iova, hwaddr size); -+}; -+#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch b/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch deleted file mode 100644 index 03fb220..0000000 --- a/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch +++ /dev/null @@ -1,276 +0,0 @@ -From 84b15fad1af781d06d0206d362de0801d7a18d0b Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:17 +0800 -Subject: [PATCH 038/101] vfio: Make VFIOContainerBase poiner parameter const - in VFIOIOMMUOps callbacks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [37/67] 95eb9edc7fcfefbd4b075f6f04941ed4a19ff87d (eauger1/centos-qemu-kvm) - -Some of the callbacks in VFIOIOMMUOps pass VFIOContainerBase poiner, -those callbacks only need read access to the sub object of VFIOContainerBase. -So make VFIOContainerBase, VFIOContainer and VFIOIOMMUFDContainer as const -in these callbacks. - -Local functions called by those callbacks also need same changes to avoid -build error. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 4517c33c31d392f08fa96a9db911da1e3507be94) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 9 +++---- - hw/vfio/container-base.c | 2 +- - hw/vfio/container.c | 34 ++++++++++++++------------- - hw/vfio/iommufd.c | 8 +++---- - include/hw/vfio/vfio-common.h | 12 ++++++---- - include/hw/vfio/vfio-container-base.h | 12 ++++++---- - 6 files changed, 42 insertions(+), 35 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 6569732b7a..08a3e57672 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -204,7 +204,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) - return true; - } - --bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) -+bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer) - { - VFIODevice *vbasedev; - -@@ -221,7 +221,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) - * Check if all VFIO devices are running and migration is active, which is - * essentially equivalent to the migration being in pre-copy phase. - */ --bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer) -+bool -+vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer) - { - VFIODevice *vbasedev; - -@@ -1139,7 +1140,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, - return 0; - } - --int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size) - { -@@ -1162,7 +1163,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, - return 0; - } - --int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, -+int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, - uint64_t size, ram_addr_t ram_addr) - { - bool all_device_dirty_tracking = -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index eee2dcfe76..1ffd25bbfa 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -63,7 +63,7 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); - } - --int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, - hwaddr iova, hwaddr size) - { -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 1dbf9b9a17..b22feb8ded 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -61,11 +61,11 @@ static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state) - } - } - --static int vfio_dma_unmap_bitmap(VFIOContainer *container, -+static int vfio_dma_unmap_bitmap(const VFIOContainer *container, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; -+ const VFIOContainerBase *bcontainer = &container->bcontainer; - struct vfio_iommu_type1_dma_unmap *unmap; - struct vfio_bitmap *bitmap; - VFIOBitmap vbmap; -@@ -117,11 +117,12 @@ unmap_exit: - /* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ --static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, -- ram_addr_t size, IOMMUTLBEntry *iotlb) -+static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb) - { -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); -+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dma_unmap unmap = { - .argsz = sizeof(unmap), - .flags = 0, -@@ -174,11 +175,11 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - return 0; - } - --static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, -+static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) - { -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); -+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dma_map map = { - .argsz = sizeof(map), - .flags = VFIO_DMA_MAP_FLAG_READ, -@@ -207,11 +208,12 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, - return -errno; - } - --static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, -- bool start) -+static int -+vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, -+ bool start) - { -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); -+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - int ret; - struct vfio_iommu_type1_dirty_bitmap dirty = { - .argsz = sizeof(dirty), -@@ -233,12 +235,12 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - return ret; - } - --static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, - hwaddr iova, hwaddr size) - { -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); -+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dirty_bitmap *dbitmap; - struct vfio_iommu_type1_dirty_bitmap_get *range; - int ret; -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index 5accd26484..87a561c545 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -26,10 +26,10 @@ - #include "qemu/chardev_open.h" - #include "pci.h" - --static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, -+static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) - { -- VFIOIOMMUFDContainer *container = -+ const VFIOIOMMUFDContainer *container = - container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - - return iommufd_backend_map_dma(container->be, -@@ -37,11 +37,11 @@ static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, - iova, size, vaddr, readonly); - } - --static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, -+static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) - { -- VFIOIOMMUFDContainer *container = -+ const VFIOIOMMUFDContainer *container = - container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - - /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 697bf24a35..efcba19f66 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -244,13 +244,15 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); - void vfio_migration_exit(VFIODevice *vbasedev); - - int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); --bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer); --bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer); --int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+bool -+vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer); -+bool -+vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer); -+int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size); --int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, -- uint64_t size, ram_addr_t ram_addr); -+int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, -+ uint64_t size, ram_addr_t ram_addr); - - /* Returns 0 on success, or a negative errno. */ - int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 45bb19c767..2ae297ccda 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -82,7 +82,7 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer, - MemoryRegionSection *section); - int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - bool start); --int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, - hwaddr iova, hwaddr size); - -@@ -93,18 +93,20 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); - - struct VFIOIOMMUOps { - /* basic feature */ -- int (*dma_map)(VFIOContainerBase *bcontainer, -+ int (*dma_map)(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); -- int (*dma_unmap)(VFIOContainerBase *bcontainer, -+ int (*dma_unmap)(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb); - int (*attach_device)(const char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp); - void (*detach_device)(VFIODevice *vbasedev); - /* migration feature */ -- int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); -- int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, -+ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer, -+ bool start); -+ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, -+ VFIOBitmap *vbmap, - hwaddr iova, hwaddr size); - /* PCI specific */ - int (*pci_hot_reset)(VFIODevice *vbasedev, bool single); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch b/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch deleted file mode 100644 index ffd8b9f..0000000 --- a/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 57bdfc821d6f4b4f9c6b1ff05bf0114e5cabc77e Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:13 +0800 -Subject: [PATCH 034/101] vfio/ap: Allow the selection of a given iommu backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [33/67] a12bb86e5b627ccf246fb9ce60820595589ff8e5 (eauger1/centos-qemu-kvm) - -Now we support two types of iommu backends, let's add the capability -to select one of them. This depends on whether an iommufd object has -been linked with the vfio-ap device: - -if the user wants to use the legacy backend, it shall not -link the vfio-ap device with any iommufd object: - - -device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX - -This is called the legacy mode/backend. - -If the user wants to use the iommufd backend (/dev/iommu) it -shall pass an iommufd object id in the vfio-ap device options: - - -object iommufd,id=iommufd0 - -device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0 - -Suggested-by: Alex Williamson -Signed-off-by: Zhenzhong Duan -Reviewed-by: Matthew Rosato -Reviewed-by: Cédric Le Goater -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 336f308958d598f3db351bb7d94cc57b4b2d448d) -Signed-off-by: Eric Auger ---- - hw/vfio/ap.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index bbf69ff55a..80629609ae 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -11,10 +11,12 @@ - */ - - #include "qemu/osdep.h" -+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include - #include - #include "qapi/error.h" - #include "hw/vfio/vfio-common.h" -+#include "sysemu/iommufd.h" - #include "hw/s390x/ap-device.h" - #include "qemu/error-report.h" - #include "qemu/event_notifier.h" -@@ -204,6 +206,10 @@ static void vfio_ap_unrealize(DeviceState *dev) - - static Property vfio_ap_properties[] = { - DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev), -+#ifdef CONFIG_IOMMUFD -+ DEFINE_PROP_LINK("iommufd", VFIOAPDevice, vdev.iommufd, -+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -+#endif - DEFINE_PROP_END_OF_LIST(), - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch b/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch deleted file mode 100644 index 1055329..0000000 --- a/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch +++ /dev/null @@ -1,87 +0,0 @@ -From db09b7c60c01ee75d602261ee959a96fa0d89d68 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:14 +0800 -Subject: [PATCH 035/101] vfio/ap: Make vfio cdev pre-openable by passing a - file handle -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [34/67] aaafa6088a9b0302d53aa539f67792d02ea0f663 (eauger1/centos-qemu-kvm) - -This gives management tools like libvirt a chance to open the vfio -cdev with privilege and pass FD to qemu. This way qemu never needs -to have privilege to open a VFIO or iommu cdev node. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Matthew Rosato -Reviewed-by: Cédric Le Goater -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 5e7ba401b71d18544a3e44b2a58b9e63fd5148d5) -Signed-off-by: Eric Auger ---- - hw/vfio/ap.c | 23 ++++++++++++++++++++++- - 1 file changed, 22 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index 80629609ae..f180e4a32a 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -160,7 +160,10 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); - VFIODevice *vbasedev = &vapdev->vdev; - -- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); -+ if (vfio_device_get_name(vbasedev, errp) < 0) { -+ return; -+ } -+ - vbasedev->ops = &vfio_ap_ops; - vbasedev->type = VFIO_DEVICE_TYPE_AP; - vbasedev->dev = dev; -@@ -230,11 +233,28 @@ static const VMStateDescription vfio_ap_vmstate = { - .unmigratable = 1, - }; - -+static void vfio_ap_instance_init(Object *obj) -+{ -+ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); -+ -+ vapdev->vdev.fd = -1; -+} -+ -+#ifdef CONFIG_IOMMUFD -+static void vfio_ap_set_fd(Object *obj, const char *str, Error **errp) -+{ -+ vfio_device_set_fd(&VFIO_AP_DEVICE(obj)->vdev, str, errp); -+} -+#endif -+ - static void vfio_ap_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); - - device_class_set_props(dc, vfio_ap_properties); -+#ifdef CONFIG_IOMMUFD -+ object_class_property_add_str(klass, "fd", NULL, vfio_ap_set_fd); -+#endif - dc->vmsd = &vfio_ap_vmstate; - dc->desc = "VFIO-based AP device assignment"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); -@@ -249,6 +269,7 @@ static const TypeInfo vfio_ap_info = { - .name = TYPE_VFIO_AP_DEVICE, - .parent = TYPE_AP_DEVICE, - .instance_size = sizeof(VFIOAPDevice), -+ .instance_init = vfio_ap_instance_init, - .class_init = vfio_ap_class_init, - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch b/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch deleted file mode 100644 index ed60920..0000000 --- a/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch +++ /dev/null @@ -1,81 +0,0 @@ -From b8630ecb698e31311089ba4e224d5e2c08c8e665 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:23 +0800 -Subject: [PATCH 044/101] vfio/ap: Move VFIODevice initializations in - vfio_ap_instance_init -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [43/67] 95a527f649b28c5c78903e99735107667e8468b1 (eauger1/centos-qemu-kvm) - -Some of the VFIODevice initializations is in vfio_ap_realize, -move all of them in vfio_ap_instance_init. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Eric Farman -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit cbbcc2f1706aa1a08637142744d2f5f6515ac93f) -Signed-off-by: Eric Auger ---- - hw/vfio/ap.c | 26 +++++++++++++------------- - 1 file changed, 13 insertions(+), 13 deletions(-) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index f180e4a32a..95fe7cd98b 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -164,18 +164,6 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) - return; - } - -- vbasedev->ops = &vfio_ap_ops; -- vbasedev->type = VFIO_DEVICE_TYPE_AP; -- vbasedev->dev = dev; -- -- /* -- * vfio-ap devices operate in a way compatible with discarding of -- * memory in RAM blocks, as no pages are pinned in the host. -- * This needs to be set before vfio_get_device() for vfio common to -- * handle ram_block_discard_disable(). -- */ -- vapdev->vdev.ram_block_discard_allowed = true; -- - ret = vfio_attach_device(vbasedev->name, vbasedev, - &address_space_memory, errp); - if (ret) { -@@ -236,8 +224,20 @@ static const VMStateDescription vfio_ap_vmstate = { - static void vfio_ap_instance_init(Object *obj) - { - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); -+ VFIODevice *vbasedev = &vapdev->vdev; - -- vapdev->vdev.fd = -1; -+ vbasedev->type = VFIO_DEVICE_TYPE_AP; -+ vbasedev->ops = &vfio_ap_ops; -+ vbasedev->dev = DEVICE(vapdev); -+ vbasedev->fd = -1; -+ -+ /* -+ * vfio-ap devices operate in a way compatible with discarding of -+ * memory in RAM blocks, as no pages are pinned in the host. -+ * This needs to be set before vfio_get_device() for vfio common to -+ * handle ram_block_discard_disable(). -+ */ -+ vbasedev->ram_block_discard_allowed = true; - } - - #ifdef CONFIG_IOMMUFD --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch b/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch deleted file mode 100644 index ff64a91..0000000 --- a/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 732115c80eb0dd672925a0737e09643d8a889abd Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:15 +0800 -Subject: [PATCH 036/101] vfio/ccw: Allow the selection of a given iommu - backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [35/67] 1701de023a9f3b3f0420689bf851e11aee88800d (eauger1/centos-qemu-kvm) - -Now we support two types of iommu backends, let's add the capability -to select one of them. This depends on whether an iommufd object has -been linked with the vfio-ccw device: - -If the user wants to use the legacy backend, it shall not -link the vfio-ccw device with any iommufd object: - - -device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX - -This is called the legacy mode/backend. - -If the user wants to use the iommufd backend (/dev/iommu) it -shall pass an iommufd object id in the vfio-ccw device options: - - -object iommufd,id=iommufd0 - -device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0 - -Suggested-by: Alex Williamson -Signed-off-by: Zhenzhong Duan -Reviewed-by: Matthew Rosato -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Farman -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit e70f971a6c1230138843d7ab82267e4a5aaf6bda) -Signed-off-by: Eric Auger ---- - hw/vfio/ccw.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index d857bb8d0f..d2d58bb677 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -15,12 +15,14 @@ - */ - - #include "qemu/osdep.h" -+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include - #include - #include - - #include "qapi/error.h" - #include "hw/vfio/vfio-common.h" -+#include "sysemu/iommufd.h" - #include "hw/s390x/s390-ccw.h" - #include "hw/s390x/vfio-ccw.h" - #include "hw/qdev-properties.h" -@@ -677,6 +679,10 @@ static void vfio_ccw_unrealize(DeviceState *dev) - static Property vfio_ccw_properties[] = { - DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), - DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false), -+#ifdef CONFIG_IOMMUFD -+ DEFINE_PROP_LINK("iommufd", VFIOCCWDevice, vdev.iommufd, -+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -+#endif - DEFINE_PROP_END_OF_LIST(), - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch b/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch deleted file mode 100644 index 6c91d85..0000000 --- a/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 0ff08afdec19f4decaf750fa7d158e0ea498ff28 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:16 +0800 -Subject: [PATCH 037/101] vfio/ccw: Make vfio cdev pre-openable by passing a - file handle -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [36/67] cc0d8f51cffa5d5a7aebc2334b908b9877179ae7 (eauger1/centos-qemu-kvm) - -This gives management tools like libvirt a chance to open the vfio -cdev with privilege and pass FD to qemu. This way qemu never needs -to have privilege to open a VFIO or iommu cdev node. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Matthew Rosato -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Farman -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 909a6254edaa8d0b0e3f1c0a623862e73d1842e9) -Signed-off-by: Eric Auger ---- - hw/vfio/ccw.c | 25 ++++++++++++++++++++++--- - 1 file changed, 22 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index d2d58bb677..2afdf17dbe 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -590,11 +590,12 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) - } - } - -+ if (vfio_device_get_name(vbasedev, errp) < 0) { -+ return; -+ } -+ - vbasedev->ops = &vfio_ccw_ops; - vbasedev->type = VFIO_DEVICE_TYPE_CCW; -- vbasedev->name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, -- vcdev->cdev.hostid.ssid, -- vcdev->cdev.hostid.devid); - vbasedev->dev = dev; - - /* -@@ -691,12 +692,29 @@ static const VMStateDescription vfio_ccw_vmstate = { - .unmigratable = 1, - }; - -+static void vfio_ccw_instance_init(Object *obj) -+{ -+ VFIOCCWDevice *vcdev = VFIO_CCW(obj); -+ -+ vcdev->vdev.fd = -1; -+} -+ -+#ifdef CONFIG_IOMMUFD -+static void vfio_ccw_set_fd(Object *obj, const char *str, Error **errp) -+{ -+ vfio_device_set_fd(&VFIO_CCW(obj)->vdev, str, errp); -+} -+#endif -+ - static void vfio_ccw_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); - S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); - - device_class_set_props(dc, vfio_ccw_properties); -+#ifdef CONFIG_IOMMUFD -+ object_class_property_add_str(klass, "fd", NULL, vfio_ccw_set_fd); -+#endif - dc->vmsd = &vfio_ccw_vmstate; - dc->desc = "VFIO-based subchannel assignment"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); -@@ -714,6 +732,7 @@ static const TypeInfo vfio_ccw_info = { - .name = TYPE_VFIO_CCW, - .parent = TYPE_S390_CCW, - .instance_size = sizeof(VFIOCCWDevice), -+ .instance_init = vfio_ccw_instance_init, - .class_init = vfio_ccw_class_init, - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch b/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch deleted file mode 100644 index 95b85f9..0000000 --- a/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 2ef1c050722115247962e3cd4d8fcf73727e597e Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:24 +0800 -Subject: [PATCH 045/101] vfio/ccw: Move VFIODevice initializations in - vfio_ccw_instance_init -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [44/67] 3345ed58f491aba8fd51bcc172af267ae53e6c8c (eauger1/centos-qemu-kvm) - -Some of the VFIODevice initializations is in vfio_ccw_realize, -move all of them in vfio_ccw_instance_init. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Eric Farman -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit c12b55ad6f9d3b4792b590e9211bd7319e4a2d70) -Signed-off-by: Eric Auger ---- - hw/vfio/ccw.c | 30 +++++++++++++++--------------- - 1 file changed, 15 insertions(+), 15 deletions(-) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index 2afdf17dbe..6305a4c1b8 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -594,20 +594,6 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) - return; - } - -- vbasedev->ops = &vfio_ccw_ops; -- vbasedev->type = VFIO_DEVICE_TYPE_CCW; -- vbasedev->dev = dev; -- -- /* -- * All vfio-ccw devices are believed to operate in a way compatible with -- * discarding of memory in RAM blocks, ie. pages pinned in the host are -- * in the current working set of the guest driver and therefore never -- * overlap e.g., with pages available to the guest balloon driver. This -- * needs to be set before vfio_get_device() for vfio common to handle -- * ram_block_discard_disable(). -- */ -- vbasedev->ram_block_discard_allowed = true; -- - ret = vfio_attach_device(cdev->mdevid, vbasedev, - &address_space_memory, errp); - if (ret) { -@@ -695,8 +681,22 @@ static const VMStateDescription vfio_ccw_vmstate = { - static void vfio_ccw_instance_init(Object *obj) - { - VFIOCCWDevice *vcdev = VFIO_CCW(obj); -+ VFIODevice *vbasedev = &vcdev->vdev; -+ -+ vbasedev->type = VFIO_DEVICE_TYPE_CCW; -+ vbasedev->ops = &vfio_ccw_ops; -+ vbasedev->dev = DEVICE(vcdev); -+ vbasedev->fd = -1; - -- vcdev->vdev.fd = -1; -+ /* -+ * All vfio-ccw devices are believed to operate in a way compatible with -+ * discarding of memory in RAM blocks, ie. pages pinned in the host are -+ * in the current working set of the guest driver and therefore never -+ * overlap e.g., with pages available to the guest balloon driver. This -+ * needs to be set before vfio_get_device() for vfio common to handle -+ * ram_block_discard_disable(). -+ */ -+ vbasedev->ram_block_discard_allowed = true; - } - - #ifdef CONFIG_IOMMUFD --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch b/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch deleted file mode 100644 index 8615b6d..0000000 --- a/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 7de36998dd6177380e46b8c5f3a91c3fad75483c Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:30 +0800 -Subject: [PATCH 005/101] vfio/common: Introduce vfio_container_init/destroy - helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [4/67] 8287f687ef19cd84afede1e8f3b16ac3caf29a1d (eauger1/centos-qemu-kvm) - -This adds two helper functions vfio_container_init/destroy which will be -used by both legacy and iommufd containers to do base container specific -initialization and release. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit ed2f7f80170251e7cdd2965a13ee97527d1fbec8) -Signed-off-by: Eric Auger ---- - hw/vfio/container-base.c | 9 +++++++++ - hw/vfio/container.c | 4 +++- - include/hw/vfio/vfio-container-base.h | 4 ++++ - 3 files changed, 16 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 55d3a35fa4..e929435751 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -30,3 +30,12 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - g_assert(bcontainer->ops->dma_unmap); - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); - } -+ -+void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) -+{ -+ bcontainer->ops = ops; -+} -+ -+void vfio_container_destroy(VFIOContainerBase *bcontainer) -+{ -+} -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index c04df26323..32a0251dd1 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -559,7 +559,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->vrdl_list); - bcontainer = &container->bcontainer; -- bcontainer->ops = &vfio_legacy_ops; -+ vfio_container_init(bcontainer, &vfio_legacy_ops); - - ret = vfio_init_container(container, group->fd, errp); - if (ret) { -@@ -661,6 +661,7 @@ put_space_exit: - static void vfio_disconnect_container(VFIOGroup *group) - { - VFIOContainer *container = group->container; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - - QLIST_REMOVE(group, container_next); - group->container = NULL; -@@ -695,6 +696,7 @@ static void vfio_disconnect_container(VFIOGroup *group) - QLIST_REMOVE(giommu, giommu_next); - g_free(giommu); - } -+ vfio_container_destroy(bcontainer); - - trace_vfio_disconnect_container(container->fd); - close(container->fd); -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 56b033f59f..577f52ccbc 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -38,6 +38,10 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb); - -+void vfio_container_init(VFIOContainerBase *bcontainer, -+ const VFIOIOMMUOps *ops); -+void vfio_container_destroy(VFIOContainerBase *bcontainer); -+ - struct VFIOIOMMUOps { - /* basic feature */ - int (*dma_map)(VFIOContainerBase *bcontainer, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch b/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch deleted file mode 100644 index eec555b..0000000 --- a/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch +++ /dev/null @@ -1,221 +0,0 @@ -From 36f4005c3dbb4c8b63a975494c75281de51c25f9 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:31 +0800 -Subject: [PATCH 006/101] vfio/common: Move giommu_list in base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [5/67] ba5898e96c16c7f6e8108ae461b454d3c8c35404 (eauger1/centos-qemu-kvm) - -Move the giommu_list field in the base container and store -the base container in the VFIOGuestIOMMU. - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit dddf83ab99eb832c449249397a1c302c6ed746bf) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 17 +++++++++++------ - hw/vfio/container-base.c | 9 +++++++++ - hw/vfio/container.c | 8 -------- - include/hw/vfio/vfio-common.h | 9 --------- - include/hw/vfio/vfio-container-base.h | 9 +++++++++ - 5 files changed, 29 insertions(+), 23 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index e610771888..43580bcc43 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - { - VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); -- VFIOContainerBase *bcontainer = &giommu->container->bcontainer; -+ VFIOContainerBase *bcontainer = giommu->bcontainer; - hwaddr iova = iotlb->iova + giommu->iommu_offset; - void *vaddr; - int ret; -@@ -569,6 +569,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { - VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ VFIOContainerBase *bcontainer = &container->bcontainer; - hwaddr iova, end; - Int128 llend, llsize; - void *vaddr; -@@ -612,7 +613,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - giommu->iommu_mr = iommu_mr; - giommu->iommu_offset = section->offset_within_address_space - - section->offset_within_region; -- giommu->container = container; -+ giommu->bcontainer = bcontainer; - llend = int128_add(int128_make64(section->offset_within_region), - section->size); - llend = int128_sub(llend, int128_one()); -@@ -647,7 +648,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - g_free(giommu); - goto fail; - } -- QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); -+ QLIST_INSERT_HEAD(&bcontainer->giommu_list, giommu, giommu_next); - memory_region_iommu_replay(giommu->iommu_mr, &giommu->n); - - return; -@@ -732,6 +733,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - MemoryRegionSection *section) - { - VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ VFIOContainerBase *bcontainer = &container->bcontainer; - hwaddr iova, end; - Int128 llend, llsize; - int ret; -@@ -744,7 +746,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - -- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { -+ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { - if (MEMORY_REGION(giommu->iommu_mr) == section->mr && - giommu->n.start == section->offset_within_region) { - memory_region_unregister_iommu_notifier(section->mr, -@@ -1206,7 +1208,9 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - vfio_giommu_dirty_notifier *gdn = container_of(n, - vfio_giommu_dirty_notifier, n); - VFIOGuestIOMMU *giommu = gdn->giommu; -- VFIOContainer *container = giommu->container; -+ VFIOContainerBase *bcontainer = giommu->bcontainer; -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - hwaddr iova = iotlb->iova + giommu->iommu_offset; - ram_addr_t translated_addr; - int ret = -EINVAL; -@@ -1284,12 +1288,13 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, - static int vfio_sync_dirty_bitmap(VFIOContainer *container, - MemoryRegionSection *section) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - ram_addr_t ram_addr; - - if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - -- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { -+ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { - if (MEMORY_REGION(giommu->iommu_mr) == section->mr && - giommu->n.start == section->offset_within_region) { - Int128 llend; -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index e929435751..20bcb9669a 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -34,8 +34,17 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) - { - bcontainer->ops = ops; -+ QLIST_INIT(&bcontainer->giommu_list); - } - - void vfio_container_destroy(VFIOContainerBase *bcontainer) - { -+ VFIOGuestIOMMU *giommu, *tmp; -+ -+ QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { -+ memory_region_unregister_iommu_notifier( -+ MEMORY_REGION(giommu->iommu_mr), &giommu->n); -+ QLIST_REMOVE(giommu, giommu_next); -+ g_free(giommu); -+ } - } -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 32a0251dd1..133d3c8f5c 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -556,7 +556,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->dirty_pages_supported = false; - container->dma_max_mappings = 0; - container->iova_ranges = NULL; -- QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->vrdl_list); - bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, &vfio_legacy_ops); -@@ -686,16 +685,9 @@ static void vfio_disconnect_container(VFIOGroup *group) - - if (QLIST_EMPTY(&container->group_list)) { - VFIOAddressSpace *space = container->space; -- VFIOGuestIOMMU *giommu, *tmp; - - QLIST_REMOVE(container, next); - -- QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { -- memory_region_unregister_iommu_notifier( -- MEMORY_REGION(giommu->iommu_mr), &giommu->n); -- QLIST_REMOVE(giommu, giommu_next); -- g_free(giommu); -- } - vfio_container_destroy(bcontainer); - - trace_vfio_disconnect_container(container->fd); -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 24a26345e5..6be082b8f2 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -95,7 +95,6 @@ typedef struct VFIOContainer { - uint64_t max_dirty_bitmap_size; - unsigned long pgsizes; - unsigned int dma_max_mappings; -- QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; -@@ -104,14 +103,6 @@ typedef struct VFIOContainer { - GList *iova_ranges; - } VFIOContainer; - --typedef struct VFIOGuestIOMMU { -- VFIOContainer *container; -- IOMMUMemoryRegion *iommu_mr; -- hwaddr iommu_offset; -- IOMMUNotifier n; -- QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; --} VFIOGuestIOMMU; -- - typedef struct VFIORamDiscardListener { - VFIOContainer *container; - MemoryRegion *mr; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 577f52ccbc..a11aec5755 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -29,8 +29,17 @@ typedef struct { - */ - typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; -+ QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - } VFIOContainerBase; - -+typedef struct VFIOGuestIOMMU { -+ VFIOContainerBase *bcontainer; -+ IOMMUMemoryRegion *iommu_mr; -+ hwaddr iommu_offset; -+ IOMMUNotifier n; -+ QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; -+} VFIOGuestIOMMU; -+ - int vfio_container_dma_map(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch b/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch deleted file mode 100644 index 261807a..0000000 --- a/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch +++ /dev/null @@ -1,55 +0,0 @@ -From e9476ee64edd81fafd409fb3ceaad80668446bff Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:02 +0800 -Subject: [PATCH 023/101] vfio/common: return early if space isn't empty -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [22/67] 239c21ae7cddc8efabc041b9c7774f15b4964631 (eauger1/centos-qemu-kvm) - -This is a trivial optimization. If there is active container in space, -vfio_reset_handler will never be unregistered. So revert the check of -space->containers and return early. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 1eae5b7bd3ddd03b5591e9122b011c6520064a5a) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 572ae7c934..934f4f5446 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1462,10 +1462,13 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) - - void vfio_put_address_space(VFIOAddressSpace *space) - { -- if (QLIST_EMPTY(&space->containers)) { -- QLIST_REMOVE(space, list); -- g_free(space); -+ if (!QLIST_EMPTY(&space->containers)) { -+ return; - } -+ -+ QLIST_REMOVE(space, list); -+ g_free(space); -+ - if (QLIST_EMPTY(&vfio_address_spaces)) { - qemu_unregister_reset(vfio_reset_handler, NULL); - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch b/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch deleted file mode 100644 index 62caf8a..0000000 --- a/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch +++ /dev/null @@ -1,257 +0,0 @@ -From facad966c42b1ec38b12e45f2b84bd059542b60c Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:35 +0800 -Subject: [PATCH 010/101] vfio/container: Convert functions to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [9/67] a0002d6e9cb0ca76e3e2f25208ecba22dd9f9a88 (eauger1/centos-qemu-kvm) - -In the prospect to get rid of VFIOContainer refs -in common.c lets convert misc functions to use the base -container object instead: - -vfio_devices_all_dirty_tracking -vfio_devices_all_device_dirty_tracking -vfio_devices_all_running_and_mig_active -vfio_devices_query_dirty_bitmap -vfio_get_dirty_bitmap - -Signed-off-by: Eric Auger -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit e1cac6b203f45b5322e831e8d50edfdf18609b09) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 42 +++++++++++++++-------------------- - hw/vfio/container.c | 6 ++--- - hw/vfio/trace-events | 2 +- - include/hw/vfio/vfio-common.h | 9 ++++---- - 4 files changed, 26 insertions(+), 33 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 9415395ed9..cf6618f6ed 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -177,9 +177,8 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) - migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P; - } - --static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) -+static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - -@@ -204,9 +203,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - return true; - } - --bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) -+bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - - QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { -@@ -222,9 +220,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) - * Check if all VFIO devices are running and migration is active, which is - * essentially equivalent to the migration being in pre-copy phase. - */ --bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) -+bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - - if (!migration_is_active(migrate_get_current())) { -@@ -1082,7 +1079,7 @@ static void vfio_listener_log_global_start(MemoryListener *listener) - VFIOContainer *container = container_of(listener, VFIOContainer, listener); - int ret; - -- if (vfio_devices_all_device_dirty_tracking(container)) { -+ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { - ret = vfio_devices_dma_logging_start(container); - } else { - ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -@@ -1101,7 +1098,7 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) - VFIOContainer *container = container_of(listener, VFIOContainer, listener); - int ret = 0; - -- if (vfio_devices_all_device_dirty_tracking(container)) { -+ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { - vfio_devices_dma_logging_stop(container); - } else { - ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -@@ -1141,11 +1138,10 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, - return 0; - } - --int vfio_devices_query_dirty_bitmap(VFIOContainer *container, -+int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - int ret; - -@@ -1165,17 +1161,16 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container, - return 0; - } - --int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, -+int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, - uint64_t size, ram_addr_t ram_addr) - { - bool all_device_dirty_tracking = -- vfio_devices_all_device_dirty_tracking(container); -+ vfio_devices_all_device_dirty_tracking(bcontainer); - uint64_t dirty_pages; - VFIOBitmap vbmap; - int ret; - -- if (!container->bcontainer.dirty_pages_supported && -- !all_device_dirty_tracking) { -+ if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) { - cpu_physical_memory_set_dirty_range(ram_addr, size, - tcg_enabled() ? DIRTY_CLIENTS_ALL : - DIRTY_CLIENTS_NOCODE); -@@ -1188,10 +1183,9 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - } - - if (all_device_dirty_tracking) { -- ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); -+ ret = vfio_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size); - } else { -- ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap, -- iova, size); -+ ret = vfio_container_query_dirty_bitmap(bcontainer, &vbmap, iova, size); - } - - if (ret) { -@@ -1201,8 +1195,7 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr, - vbmap.pages); - -- trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size, -- ram_addr, dirty_pages); -+ trace_vfio_get_dirty_bitmap(iova, size, vbmap.size, ram_addr, dirty_pages); - out: - g_free(vbmap.bitmap); - -@@ -1236,8 +1229,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - - rcu_read_lock(); - if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { -- ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1, -- translated_addr); -+ ret = vfio_get_dirty_bitmap(&container->bcontainer, iova, -+ iotlb->addr_mask + 1, translated_addr); - if (ret) { - error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", -@@ -1266,7 +1259,8 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, - * Sync the whole mapped region (spanning multiple individual mappings) - * in one go. - */ -- return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr); -+ return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size, -+ ram_addr); - } - - static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, -@@ -1335,7 +1329,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - ram_addr = memory_region_get_ram_addr(section->mr) + - section->offset_within_region; - -- return vfio_get_dirty_bitmap(container, -+ return vfio_get_dirty_bitmap(&container->bcontainer, - REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), - int128_get64(section->size), ram_addr); - } -@@ -1350,7 +1344,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, - return; - } - -- if (vfio_devices_all_dirty_tracking(container)) { -+ if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { - ret = vfio_sync_dirty_bitmap(container, section); - if (ret) { - error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret, -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 63a906de93..7bd81eab09 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -129,8 +129,8 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - bool need_dirty_sync = false; - int ret; - -- if (iotlb && vfio_devices_all_running_and_mig_active(container)) { -- if (!vfio_devices_all_device_dirty_tracking(container) && -+ if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { -+ if (!vfio_devices_all_device_dirty_tracking(bcontainer) && - container->bcontainer.dirty_pages_supported) { - return vfio_dma_unmap_bitmap(container, iova, size, iotlb); - } -@@ -162,7 +162,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - } - - if (need_dirty_sync) { -- ret = vfio_get_dirty_bitmap(container, iova, size, -+ ret = vfio_get_dirty_bitmap(bcontainer, iova, size, - iotlb->translated_addr); - if (ret) { - return ret; -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 9f7fedee98..08a1f9dfa4 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -117,7 +117,7 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic - vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" - vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" - vfio_legacy_dma_unmap_overflow_workaround(void) "" --vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 -+vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 - vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 - - # platform.c -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 9740cf9fbc..bc67e1316c 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -186,7 +186,6 @@ typedef struct VFIODisplay { - - VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); - void vfio_put_address_space(VFIOAddressSpace *space); --bool vfio_devices_all_running_and_saving(VFIOContainer *container); - - /* SPAPR specific */ - int vfio_container_add_section_window(VFIOContainer *container, -@@ -260,11 +259,11 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); - void vfio_migration_exit(VFIODevice *vbasedev); - - int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); --bool vfio_devices_all_running_and_mig_active(VFIOContainer *container); --bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container); --int vfio_devices_query_dirty_bitmap(VFIOContainer *container, -+bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer); -+bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer); -+int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size); --int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, -+int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, - uint64_t size, ram_addr_t ram_addr); - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch b/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch deleted file mode 100644 index 92e9a38..0000000 --- a/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch +++ /dev/null @@ -1,97 +0,0 @@ -From a5d19bfbfddb36fa6d68ca6282a5acd9b245d48a Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:41 +0800 -Subject: [PATCH 016/101] vfio/container: Implement attach/detach_device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [15/67] e233c90e4af2061dc0612bc1b1d17be1a47daeae (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 1eb31f13b24c49884d8256f96a6664df2dd0824d) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 16 ++++++++++++++++ - hw/vfio/container.c | 12 +++++------- - 2 files changed, 21 insertions(+), 7 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 8ef2e7967d..483ba82089 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1498,3 +1498,19 @@ retry: - - return info; - } -+ -+int vfio_attach_device(char *name, VFIODevice *vbasedev, -+ AddressSpace *as, Error **errp) -+{ -+ const VFIOIOMMUOps *ops = &vfio_legacy_ops; -+ -+ return ops->attach_device(name, vbasedev, as, errp); -+} -+ -+void vfio_detach_device(VFIODevice *vbasedev) -+{ -+ if (!vbasedev->bcontainer) { -+ return; -+ } -+ vbasedev->bcontainer->ops->detach_device(vbasedev); -+} -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 721c0d7375..6bacf38222 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -873,8 +873,8 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) - * @name and @vbasedev->name are likely to be different depending - * on the type of the device, hence the need for passing @name - */ --int vfio_attach_device(char *name, VFIODevice *vbasedev, -- AddressSpace *as, Error **errp) -+static int vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, -+ AddressSpace *as, Error **errp) - { - int groupid = vfio_device_groupid(vbasedev, errp); - VFIODevice *vbasedev_iter; -@@ -914,14 +914,10 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - return ret; - } - --void vfio_detach_device(VFIODevice *vbasedev) -+static void vfio_legacy_detach_device(VFIODevice *vbasedev) - { - VFIOGroup *group = vbasedev->group; - -- if (!vbasedev->bcontainer) { -- return; -- } -- - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); - vbasedev->bcontainer = NULL; -@@ -933,6 +929,8 @@ void vfio_detach_device(VFIODevice *vbasedev) - const VFIOIOMMUOps vfio_legacy_ops = { - .dma_map = vfio_legacy_dma_map, - .dma_unmap = vfio_legacy_dma_unmap, -+ .attach_device = vfio_legacy_attach_device, -+ .detach_device = vfio_legacy_detach_device, - .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, - .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, - }; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch b/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch deleted file mode 100644 index 42b406b..0000000 --- a/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch +++ /dev/null @@ -1,65 +0,0 @@ -From c3c9f366c356032fa57ff7cc664732ba87ceb3fb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:18 +0100 -Subject: [PATCH 051/101] vfio/container: Initialize VFIOIOMMUOps under - vfio_init_container() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [50/67] f325136391b22babadb1be3394c527deecdcd3ca (eauger1/centos-qemu-kvm) - -vfio_init_container() already defines the IOMMU type of the container. -Do the same for the VFIOIOMMUOps struct. This prepares ground for the -following patches that will deduce the associated VFIOIOMMUOps struct -from the IOMMU type. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit bffe92af0e7571868d47a1d1cd2205e13054d492) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index afcfe80488..f4a0434a52 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -370,7 +370,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, - } - - static int vfio_init_container(VFIOContainer *container, int group_fd, -- Error **errp) -+ VFIOAddressSpace *space, Error **errp) - { - int iommu_type, ret; - -@@ -401,6 +401,7 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, - } - - container->iommu_type = iommu_type; -+ vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops); - return 0; - } - -@@ -583,9 +584,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container = g_malloc0(sizeof(*container)); - container->fd = fd; - bcontainer = &container->bcontainer; -- vfio_container_init(bcontainer, space, &vfio_legacy_ops); - -- ret = vfio_init_container(container, group->fd, errp); -+ ret = vfio_init_container(container, group->fd, space, errp); - if (ret) { - goto free_container_exit; - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch b/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch deleted file mode 100644 index 3411ecb..0000000 --- a/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 29f13011e62f5370ef7fb3248dc85c90ae5bb042 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:21 +0100 -Subject: [PATCH 054/101] vfio/container: Intoduce a new VFIOIOMMUClass::setup - handler -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [53/67] 8641161afc33d68795bcf51a47e89061b34d50a8 (eauger1/centos-qemu-kvm) - -This will help in converting the sPAPR IOMMU backend to a QOM interface. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit 61d893f2cdb34a2b0255f9b5fbba6b49b94ff730) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 1 + - include/hw/vfio/vfio-container-base.h | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 220e838a91..c22bdd3216 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -1129,6 +1129,7 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) - { - VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); - -+ vioc->setup = vfio_legacy_setup; - vioc->dma_map = vfio_legacy_dma_map; - vioc->dma_unmap = vfio_legacy_dma_unmap; - vioc->attach_device = vfio_legacy_attach_device; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index c60370fc5e..ce8b1fba88 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -109,6 +109,7 @@ struct VFIOIOMMUClass { - InterfaceClass parent_class; - - /* basic feature */ -+ int (*setup)(VFIOContainerBase *bcontainer, Error **errp); - int (*dma_map)(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch deleted file mode 100644 index 7139e64..0000000 --- a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 5b63e4595e106196ef922b7f762c8f4150d73979 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:19 +0100 -Subject: [PATCH 052/101] vfio/container: Introduce a VFIOIOMMU QOM interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [51/67] 7c06e2165efe94dcd203d44e422a7aa9fac9816c (eauger1/centos-qemu-kvm) - -VFIOContainerBase was not introduced as an abstract QOM object because -it felt unnecessary to expose all the IOMMU backends to the QEMU -machine and human interface. However, we can still abstract the IOMMU -backend handlers using a QOM interface class. This provides more -flexibility when referencing the various implementations. - -Simply transform the VFIOIOMMUOps struct in an InterfaceClass and do -some initial name replacements. Next changes will start converting -VFIOIOMMUOps. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit fdaa774e67435a328c0e28006c4d749f2198294a) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 2 +- - hw/vfio/container-base.c | 12 +++++++++++- - hw/vfio/pci.c | 2 +- - include/hw/vfio/vfio-container-base.h | 23 +++++++++++++++++++---- - 4 files changed, 32 insertions(+), 7 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 08a3e57672..49dab41566 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1503,7 +1503,7 @@ retry: - int vfio_attach_device(char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp) - { -- const VFIOIOMMUOps *ops = &vfio_legacy_ops; -+ const VFIOIOMMUClass *ops = &vfio_legacy_ops; - - #ifdef CONFIG_IOMMUFD - if (vbasedev->iommufd) { -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 1ffd25bbfa..913ae49077 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -72,7 +72,7 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - } - - void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, -- const VFIOIOMMUOps *ops) -+ const VFIOIOMMUClass *ops) - { - bcontainer->ops = ops; - bcontainer->space = space; -@@ -99,3 +99,13 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) - - g_list_free_full(bcontainer->iova_ranges, g_free); - } -+ -+static const TypeInfo types[] = { -+ { -+ .name = TYPE_VFIO_IOMMU, -+ .parent = TYPE_INTERFACE, -+ .class_size = sizeof(VFIOIOMMUClass), -+ }, -+}; -+ -+DEFINE_TYPES(types) -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 83c3238608..adb7c09367 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2491,7 +2491,7 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, - static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) - { - VFIODevice *vbasedev = &vdev->vbasedev; -- const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops; -+ const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops; - - return ops->pci_hot_reset(vbasedev, single); - } -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 5c9594b6c7..d6147b4aee 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -16,7 +16,8 @@ - #include "exec/memory.h" - - typedef struct VFIODevice VFIODevice; --typedef struct VFIOIOMMUOps VFIOIOMMUOps; -+typedef struct VFIOIOMMUClass VFIOIOMMUClass; -+#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */ - - typedef struct { - unsigned long *bitmap; -@@ -34,7 +35,7 @@ typedef struct VFIOAddressSpace { - * This is the base object for vfio container backends - */ - typedef struct VFIOContainerBase { -- const VFIOIOMMUOps *ops; -+ const VFIOIOMMUClass *ops; - VFIOAddressSpace *space; - MemoryListener listener; - Error *error; -@@ -88,10 +89,24 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, - - void vfio_container_init(VFIOContainerBase *bcontainer, - VFIOAddressSpace *space, -- const VFIOIOMMUOps *ops); -+ const VFIOIOMMUClass *ops); - void vfio_container_destroy(VFIOContainerBase *bcontainer); - --struct VFIOIOMMUOps { -+ -+#define TYPE_VFIO_IOMMU "vfio-iommu" -+ -+/* -+ * VFIOContainerBase is not an abstract QOM object because it felt -+ * unnecessary to expose all the IOMMU backends to the QEMU machine -+ * and human interface. However, we can still abstract the IOMMU -+ * backend handlers using a QOM interface class. This provides more -+ * flexibility when referencing the various implementations. -+ */ -+DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU) -+ -+struct VFIOIOMMUClass { -+ InterfaceClass parent_class; -+ - /* basic feature */ - int (*dma_map)(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch deleted file mode 100644 index 60439ff..0000000 --- a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 58927bf236541b9423f855eca1970f7a3cf864a9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:20 +0100 -Subject: [PATCH 053/101] vfio/container: Introduce a VFIOIOMMU legacy QOM - interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [52/67] a81f39d13305e84699313e17ae64d10ff4b09067 (eauger1/centos-qemu-kvm) - -Convert the legacy VFIOIOMMUOps struct to the new VFIOIOMMU QOM -interface. The set of of operations for this backend can be referenced -with a literal typename instead of a C struct. This will simplify -support of multiple backends. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit 9812feefab3a4ff95a6cfd73aecb120b406bc98c) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 6 ++- - hw/vfio/container.c | 58 ++++++++++++++++++++++----- - include/hw/vfio/vfio-common.h | 1 - - include/hw/vfio/vfio-container-base.h | 1 + - 4 files changed, 55 insertions(+), 11 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 49dab41566..2329d0efc8 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1503,13 +1503,17 @@ retry: - int vfio_attach_device(char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp) - { -- const VFIOIOMMUClass *ops = &vfio_legacy_ops; -+ const VFIOIOMMUClass *ops = -+ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); - - #ifdef CONFIG_IOMMUFD - if (vbasedev->iommufd) { - ops = &vfio_iommufd_ops; - } - #endif -+ -+ assert(ops); -+ - return ops->attach_device(name, vbasedev, as, errp); - } - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index f4a0434a52..220e838a91 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -369,10 +369,30 @@ static int vfio_get_iommu_type(VFIOContainer *container, - return -EINVAL; - } - -+/* -+ * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type -+ */ -+static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) -+{ -+ ObjectClass *klass = NULL; -+ -+ switch (iommu_type) { -+ case VFIO_TYPE1v2_IOMMU: -+ case VFIO_TYPE1_IOMMU: -+ klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); -+ break; -+ default: -+ g_assert_not_reached(); -+ }; -+ -+ return VFIO_IOMMU_CLASS(klass); -+} -+ - static int vfio_init_container(VFIOContainer *container, int group_fd, - VFIOAddressSpace *space, Error **errp) - { - int iommu_type, ret; -+ const VFIOIOMMUClass *vioc; - - iommu_type = vfio_get_iommu_type(container, errp); - if (iommu_type < 0) { -@@ -401,7 +421,14 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, - } - - container->iommu_type = iommu_type; -- vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops); -+ -+ vioc = vfio_get_iommu_class(iommu_type, errp); -+ if (!vioc) { -+ error_setg(errp, "No available IOMMU models"); -+ return -EINVAL; -+ } -+ -+ vfio_container_init(&container->bcontainer, space, vioc); - return 0; - } - -@@ -1098,12 +1125,25 @@ out_single: - return ret; - } - --const VFIOIOMMUOps vfio_legacy_ops = { -- .dma_map = vfio_legacy_dma_map, -- .dma_unmap = vfio_legacy_dma_unmap, -- .attach_device = vfio_legacy_attach_device, -- .detach_device = vfio_legacy_detach_device, -- .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, -- .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, -- .pci_hot_reset = vfio_legacy_pci_hot_reset, -+static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) -+{ -+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); -+ -+ vioc->dma_map = vfio_legacy_dma_map; -+ vioc->dma_unmap = vfio_legacy_dma_unmap; -+ vioc->attach_device = vfio_legacy_attach_device; -+ vioc->detach_device = vfio_legacy_detach_device; -+ vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking; -+ vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap; -+ vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; - }; -+ -+static const TypeInfo types[] = { -+ { -+ .name = TYPE_VFIO_IOMMU_LEGACY, -+ .parent = TYPE_VFIO_IOMMU, -+ .class_init = vfio_iommu_legacy_class_init, -+ }, -+}; -+ -+DEFINE_TYPES(types) -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index b8aa8a5495..14c497b6b0 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -210,7 +210,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; - typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; - extern VFIOGroupList vfio_group_list; - extern VFIODeviceList vfio_device_list; --extern const VFIOIOMMUOps vfio_legacy_ops; - extern const VFIOIOMMUOps vfio_iommufd_ops; - extern const MemoryListener vfio_memory_listener; - extern int vfio_kvm_device_fd; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index d6147b4aee..c60370fc5e 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -94,6 +94,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); - - - #define TYPE_VFIO_IOMMU "vfio-iommu" -+#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" - - /* - * VFIOContainerBase is not an abstract QOM object because it felt --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch b/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch deleted file mode 100644 index 2840e2c..0000000 --- a/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch +++ /dev/null @@ -1,71 +0,0 @@ -From e56f961fbe95a53a52c5eca00b4fca17d825e860 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:28 +0800 -Subject: [PATCH 003/101] vfio/container: Introduce a empty VFIOIOMMUOps -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [2/67] 0de0afffce42fa4a17f6d33a10b6162cdfbe8150 (eauger1/centos-qemu-kvm) - -This empty VFIOIOMMUOps named vfio_legacy_ops will hold all general -IOMMU ops of legacy container. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit d24668579184f4098779983724ec74cd3db62e10) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 5 +++++ - include/hw/vfio/vfio-common.h | 2 +- - 2 files changed, 6 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 242010036a..4bc43ddfa4 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -472,6 +472,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - Error **errp) - { - VFIOContainer *container; -+ VFIOContainerBase *bcontainer; - int ret, fd; - VFIOAddressSpace *space; - -@@ -552,6 +553,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->iova_ranges = NULL; - QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->vrdl_list); -+ bcontainer = &container->bcontainer; -+ bcontainer->ops = &vfio_legacy_ops; - - ret = vfio_init_container(container, group->fd, errp); - if (ret) { -@@ -933,3 +936,5 @@ void vfio_detach_device(VFIODevice *vbasedev) - vfio_put_base_device(vbasedev); - vfio_put_group(group); - } -+ -+const VFIOIOMMUOps vfio_legacy_ops; -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 586d153c12..678161f207 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -255,7 +255,7 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; - typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; - extern VFIOGroupList vfio_group_list; - extern VFIODeviceList vfio_device_list; -- -+extern const VFIOIOMMUOps vfio_legacy_ops; - extern const MemoryListener vfio_memory_listener; - extern int vfio_kvm_device_fd; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch b/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch deleted file mode 100644 index ae9ccd8..0000000 --- a/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 6c7546756e979e4f5ba29ae51a21c63fa90492cf Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:17 +0100 -Subject: [PATCH 050/101] vfio/container: Introduce vfio_legacy_setup() for - further cleanups -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [49/67] 3a621ba2605c98b7fbf7fd9f93a207f728f1202e (eauger1/centos-qemu-kvm) - -This will help subsequent patches to unify the initialization of type1 -and sPAPR IOMMU backends. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit d3764db87531cd53849ccee9b2f72aede90ccf5b) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 63 +++++++++++++++++++++++++-------------------- - 1 file changed, 35 insertions(+), 28 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 1e77a2929e..afcfe80488 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -474,6 +474,35 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - } - } - -+static int vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp) -+{ -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); -+ g_autofree struct vfio_iommu_type1_info *info = NULL; -+ int ret; -+ -+ ret = vfio_get_iommu_info(container, &info); -+ if (ret) { -+ error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); -+ return ret; -+ } -+ -+ if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { -+ bcontainer->pgsizes = info->iova_pgsizes; -+ } else { -+ bcontainer->pgsizes = qemu_real_host_page_size(); -+ } -+ -+ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { -+ bcontainer->dma_max_mappings = 65535; -+ } -+ -+ vfio_get_info_iova_range(info, bcontainer); -+ -+ vfio_get_iommu_info_migration(container, info); -+ return 0; -+} -+ - static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - Error **errp) - { -@@ -570,40 +599,18 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - switch (container->iommu_type) { - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_IOMMU: -- { -- struct vfio_iommu_type1_info *info; -- -- ret = vfio_get_iommu_info(container, &info); -- if (ret) { -- error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); -- goto enable_discards_exit; -- } -- -- if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { -- bcontainer->pgsizes = info->iova_pgsizes; -- } else { -- bcontainer->pgsizes = qemu_real_host_page_size(); -- } -- -- if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { -- bcontainer->dma_max_mappings = 65535; -- } -- -- vfio_get_info_iova_range(info, bcontainer); -- -- vfio_get_iommu_info_migration(container, info); -- g_free(info); -+ ret = vfio_legacy_setup(bcontainer, errp); - break; -- } - case VFIO_SPAPR_TCE_v2_IOMMU: - case VFIO_SPAPR_TCE_IOMMU: -- { - ret = vfio_spapr_container_init(container, errp); -- if (ret) { -- goto enable_discards_exit; -- } - break; -+ default: -+ g_assert_not_reached(); - } -+ -+ if (ret) { -+ goto enable_discards_exit; - } - - vfio_kvm_device_add_group(group); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch b/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch deleted file mode 100644 index 3d46a06..0000000 --- a/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 6a597d7c82a4538fa1f928db7e600ec2e5a44361 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:39 +0800 -Subject: [PATCH 014/101] vfio/container: Move dirty_pgsizes and - max_dirty_bitmap_size to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [13/67] b9fe57174368e36788b017cc2ad13b748592cfc2 (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 4d6b95010c59127ac4f7230d6ee88b5d0e99738c) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 9 +++++---- - include/hw/vfio/vfio-common.h | 2 -- - include/hw/vfio/vfio-container-base.h | 2 ++ - 3 files changed, 7 insertions(+), 6 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 5c1dee8c9f..c8088a8174 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -64,6 +64,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - struct vfio_iommu_type1_dma_unmap *unmap; - struct vfio_bitmap *bitmap; - VFIOBitmap vbmap; -@@ -91,7 +92,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, - bitmap->size = vbmap.size; - bitmap->data = (__u64 *)vbmap.bitmap; - -- if (vbmap.size > container->max_dirty_bitmap_size) { -+ if (vbmap.size > bcontainer->max_dirty_bitmap_size) { - error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size); - ret = -E2BIG; - goto unmap_exit; -@@ -131,7 +132,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - - if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { - if (!vfio_devices_all_device_dirty_tracking(bcontainer) && -- container->bcontainer.dirty_pages_supported) { -+ bcontainer->dirty_pages_supported) { - return vfio_dma_unmap_bitmap(container, iova, size, iotlb); - } - -@@ -469,8 +470,8 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - */ - if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { - bcontainer->dirty_pages_supported = true; -- container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; -- container->dirty_pgsizes = cap_mig->pgsize_bitmap; -+ bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; -+ bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap; - } - } - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 922022cbc6..b1c9fe711b 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -80,8 +80,6 @@ typedef struct VFIOContainer { - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ - MemoryListener prereg_listener; - unsigned iommu_type; -- uint64_t dirty_pgsizes; -- uint64_t max_dirty_bitmap_size; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - GList *iova_ranges; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 95f8d319e0..80e4a993c5 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -39,6 +39,8 @@ typedef struct VFIOContainerBase { - MemoryListener listener; - Error *error; - bool initialized; -+ uint64_t dirty_pgsizes; -+ uint64_t max_dirty_bitmap_size; - unsigned long pgsizes; - unsigned int dma_max_mappings; - bool dirty_pages_supported; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch deleted file mode 100644 index c9c79b6..0000000 --- a/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 882143ef30da4182f049eb8192e0fac317c372b3 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:40 +0800 -Subject: [PATCH 015/101] vfio/container: Move iova_ranges to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [14/67] 49f2e3c484b4c0c63be9aa4eb1bf08804dcb1ec3 (eauger1/centos-qemu-kvm) - -Meanwhile remove the helper function vfio_free_container as it -only calls g_free now. - -No functional change intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit f79baf8c9575ac3193ca86ec508791c86d96b13e) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 5 +++-- - hw/vfio/container-base.c | 3 +++ - hw/vfio/container.c | 19 ++++++------------- - include/hw/vfio/vfio-common.h | 1 - - include/hw/vfio/vfio-container-base.h | 1 + - 5 files changed, 13 insertions(+), 16 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index be623e544b..8ef2e7967d 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -637,9 +637,10 @@ static void vfio_listener_region_add(MemoryListener *listener, - goto fail; - } - -- if (container->iova_ranges) { -+ if (bcontainer->iova_ranges) { - ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, -- container->iova_ranges, &err); -+ bcontainer->iova_ranges, -+ &err); - if (ret) { - g_free(giommu); - goto fail; -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 7f508669f5..0177f43741 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - bcontainer->error = NULL; - bcontainer->dirty_pages_supported = false; - bcontainer->dma_max_mappings = 0; -+ bcontainer->iova_ranges = NULL; - QLIST_INIT(&bcontainer->giommu_list); - QLIST_INIT(&bcontainer->vrdl_list); - } -@@ -70,4 +71,6 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) - QLIST_REMOVE(giommu, giommu_next); - g_free(giommu); - } -+ -+ g_list_free_full(bcontainer->iova_ranges, g_free); - } -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index c8088a8174..721c0d7375 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -308,7 +308,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, - } - - static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, -- VFIOContainer *container) -+ VFIOContainerBase *bcontainer) - { - struct vfio_info_cap_header *hdr; - struct vfio_iommu_type1_info_cap_iova_range *cap; -@@ -326,8 +326,8 @@ static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, - - range_set_bounds(range, cap->iova_ranges[i].start, - cap->iova_ranges[i].end); -- container->iova_ranges = -- range_list_insert(container->iova_ranges, range); -+ bcontainer->iova_ranges = -+ range_list_insert(bcontainer->iova_ranges, range); - } - - return true; -@@ -475,12 +475,6 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - } - } - --static void vfio_free_container(VFIOContainer *container) --{ -- g_list_free_full(container->iova_ranges, g_free); -- g_free(container); --} -- - static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - Error **errp) - { -@@ -560,7 +554,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - - container = g_malloc0(sizeof(*container)); - container->fd = fd; -- container->iova_ranges = NULL; - bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, space, &vfio_legacy_ops); - -@@ -597,7 +590,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - bcontainer->dma_max_mappings = 65535; - } - -- vfio_get_info_iova_range(info, container); -+ vfio_get_info_iova_range(info, bcontainer); - - vfio_get_iommu_info_migration(container, info); - g_free(info); -@@ -649,7 +642,7 @@ enable_discards_exit: - vfio_ram_block_discard_disable(container, false); - - free_container_exit: -- vfio_free_container(container); -+ g_free(container); - - close_fd_exit: - close(fd); -@@ -693,7 +686,7 @@ static void vfio_disconnect_container(VFIOGroup *group) - - trace_vfio_disconnect_container(container->fd); - close(container->fd); -- vfio_free_container(container); -+ g_free(container); - - vfio_put_address_space(space); - } -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index b1c9fe711b..b9e5a0e64b 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -82,7 +82,6 @@ typedef struct VFIOContainer { - unsigned iommu_type; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; -- GList *iova_ranges; - } VFIOContainer; - - typedef struct VFIOHostDMAWindow { -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 80e4a993c5..9658ffb526 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -48,6 +48,7 @@ typedef struct VFIOContainerBase { - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; - QLIST_ENTRY(VFIOContainerBase) next; - QLIST_HEAD(, VFIODevice) device_list; -+ GList *iova_ranges; - } VFIOContainerBase; - - typedef struct VFIOGuestIOMMU { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch deleted file mode 100644 index 3198bfd..0000000 --- a/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch +++ /dev/null @@ -1,522 +0,0 @@ -From 36bc7782bb02f81368e3e43a3947d16ad362e137 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:38 +0800 -Subject: [PATCH 013/101] vfio/container: Move listener to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [12/67] f469ab126c6366170aa2520f9b4d9969d3ae0a04 (eauger1/centos-qemu-kvm) - -Move listener to base container. Also error and initialized fields -are moved at the same time. - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit c7b313d300f161c650d011a5c9da469bcd5d34e4) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 110 +++++++++++++------------- - hw/vfio/container-base.c | 1 + - hw/vfio/container.c | 19 +++-- - hw/vfio/spapr.c | 11 +-- - include/hw/vfio/vfio-common.h | 3 - - include/hw/vfio/vfio-container-base.h | 3 + - 6 files changed, 74 insertions(+), 73 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index f15665789f..be623e544b 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -541,7 +541,7 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section, - return true; - } - --static bool vfio_get_section_iova_range(VFIOContainer *container, -+static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, - MemoryRegionSection *section, - hwaddr *out_iova, hwaddr *out_end, - Int128 *out_llend) -@@ -569,8 +569,10 @@ static bool vfio_get_section_iova_range(VFIOContainer *container, - static void vfio_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, listener); -- VFIOContainerBase *bcontainer = &container->bcontainer; -+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, -+ listener); -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - hwaddr iova, end; - Int128 llend, llsize; - void *vaddr; -@@ -581,7 +583,8 @@ static void vfio_listener_region_add(MemoryListener *listener, - return; - } - -- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) { -+ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end, -+ &llend)) { - if (memory_region_is_ram_device(section->mr)) { - trace_vfio_listener_region_add_no_dma_map( - memory_region_name(section->mr), -@@ -688,13 +691,12 @@ static void vfio_listener_region_add(MemoryListener *listener, - } - } - -- ret = vfio_container_dma_map(&container->bcontainer, -- iova, int128_get64(llsize), vaddr, -- section->readonly); -+ ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize), -+ vaddr, section->readonly); - if (ret) { - error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx", %p) = %d (%s)", -- container, iova, int128_get64(llsize), vaddr, ret, -+ bcontainer, iova, int128_get64(llsize), vaddr, ret, - strerror(-ret)); - if (memory_region_is_ram_device(section->mr)) { - /* Allow unexpected mappings not to be fatal for RAM devices */ -@@ -716,9 +718,9 @@ fail: - * can gracefully fail. Runtime, there's not much we can do other - * than throw a hardware error. - */ -- if (!container->initialized) { -- if (!container->error) { -- error_propagate_prepend(&container->error, err, -+ if (!bcontainer->initialized) { -+ if (!bcontainer->error) { -+ error_propagate_prepend(&bcontainer->error, err, - "Region %s: ", - memory_region_name(section->mr)); - } else { -@@ -733,8 +735,10 @@ fail: - static void vfio_listener_region_del(MemoryListener *listener, - MemoryRegionSection *section) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, listener); -- VFIOContainerBase *bcontainer = &container->bcontainer; -+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, -+ listener); -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - hwaddr iova, end; - Int128 llend, llsize; - int ret; -@@ -767,7 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener, - */ - } - -- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) { -+ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end, -+ &llend)) { - return; - } - -@@ -790,22 +795,22 @@ static void vfio_listener_region_del(MemoryListener *listener, - if (int128_eq(llsize, int128_2_64())) { - /* The unmap ioctl doesn't accept a full 64-bit span. */ - llsize = int128_rshift(llsize, 1); -- ret = vfio_container_dma_unmap(&container->bcontainer, iova, -+ ret = vfio_container_dma_unmap(bcontainer, iova, - int128_get64(llsize), NULL); - if (ret) { - error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", -- container, iova, int128_get64(llsize), ret, -+ bcontainer, iova, int128_get64(llsize), ret, - strerror(-ret)); - } - iova += int128_get64(llsize); - } -- ret = vfio_container_dma_unmap(&container->bcontainer, iova, -+ ret = vfio_container_dma_unmap(bcontainer, iova, - int128_get64(llsize), NULL); - if (ret) { - error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", -- container, iova, int128_get64(llsize), ret, -+ bcontainer, iova, int128_get64(llsize), ret, - strerror(-ret)); - } - } -@@ -825,16 +830,15 @@ typedef struct VFIODirtyRanges { - } VFIODirtyRanges; - - typedef struct VFIODirtyRangesListener { -- VFIOContainer *container; -+ VFIOContainerBase *bcontainer; - VFIODirtyRanges ranges; - MemoryListener listener; - } VFIODirtyRangesListener; - - static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, -- VFIOContainer *container) -+ VFIOContainerBase *bcontainer) - { - VFIOPCIDevice *pcidev; -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - Object *owner; - -@@ -863,7 +867,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, - hwaddr iova, end, *min, *max; - - if (!vfio_listener_valid_section(section, "tracking_update") || -- !vfio_get_section_iova_range(dirty->container, section, -+ !vfio_get_section_iova_range(dirty->bcontainer, section, - &iova, &end, NULL)) { - return; - } -@@ -887,7 +891,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, - * The alternative would be an IOVATree but that has a much bigger runtime - * overhead and unnecessary complexity. - */ -- if (vfio_section_is_vfio_pci(section, dirty->container) && -+ if (vfio_section_is_vfio_pci(section, dirty->bcontainer) && - iova >= UINT32_MAX) { - min = &range->minpci64; - max = &range->maxpci64; -@@ -911,7 +915,7 @@ static const MemoryListener vfio_dirty_tracking_listener = { - .region_add = vfio_dirty_tracking_update, - }; - --static void vfio_dirty_tracking_init(VFIOContainer *container, -+static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer, - VFIODirtyRanges *ranges) - { - VFIODirtyRangesListener dirty; -@@ -921,10 +925,10 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, - dirty.ranges.min64 = UINT64_MAX; - dirty.ranges.minpci64 = UINT64_MAX; - dirty.listener = vfio_dirty_tracking_listener; -- dirty.container = container; -+ dirty.bcontainer = bcontainer; - - memory_listener_register(&dirty.listener, -- container->bcontainer.space->as); -+ bcontainer->space->as); - - *ranges = dirty.ranges; - -@@ -936,12 +940,11 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, - memory_listener_unregister(&dirty.listener); - } - --static void vfio_devices_dma_logging_stop(VFIOContainer *container) -+static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) - { - uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), - sizeof(uint64_t))] = {}; - struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - - feature->argsz = sizeof(buf); -@@ -962,7 +965,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) - } - - static struct vfio_device_feature * --vfio_device_feature_dma_logging_start_create(VFIOContainer *container, -+vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer, - VFIODirtyRanges *tracking) - { - struct vfio_device_feature *feature; -@@ -1035,16 +1038,15 @@ static void vfio_device_feature_dma_logging_start_destroy( - g_free(feature); - } - --static int vfio_devices_dma_logging_start(VFIOContainer *container) -+static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer) - { - struct vfio_device_feature *feature; - VFIODirtyRanges ranges; -- VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - int ret = 0; - -- vfio_dirty_tracking_init(container, &ranges); -- feature = vfio_device_feature_dma_logging_start_create(container, -+ vfio_dirty_tracking_init(bcontainer, &ranges); -+ feature = vfio_device_feature_dma_logging_start_create(bcontainer, - &ranges); - if (!feature) { - return -errno; -@@ -1067,7 +1069,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) - - out: - if (ret) { -- vfio_devices_dma_logging_stop(container); -+ vfio_devices_dma_logging_stop(bcontainer); - } - - vfio_device_feature_dma_logging_start_destroy(feature); -@@ -1077,14 +1079,14 @@ out: - - static void vfio_listener_log_global_start(MemoryListener *listener) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, -+ listener); - int ret; - -- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { -- ret = vfio_devices_dma_logging_start(container); -+ if (vfio_devices_all_device_dirty_tracking(bcontainer)) { -+ ret = vfio_devices_dma_logging_start(bcontainer); - } else { -- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -- true); -+ ret = vfio_container_set_dirty_page_tracking(bcontainer, true); - } - - if (ret) { -@@ -1096,14 +1098,14 @@ static void vfio_listener_log_global_start(MemoryListener *listener) - - static void vfio_listener_log_global_stop(MemoryListener *listener) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, -+ listener); - int ret = 0; - -- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { -- vfio_devices_dma_logging_stop(container); -+ if (vfio_devices_all_device_dirty_tracking(bcontainer)) { -+ vfio_devices_dma_logging_stop(bcontainer); - } else { -- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -- false); -+ ret = vfio_container_set_dirty_page_tracking(bcontainer, false); - } - - if (ret) { -@@ -1214,8 +1216,6 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - vfio_giommu_dirty_notifier, n); - VFIOGuestIOMMU *giommu = gdn->giommu; - VFIOContainerBase *bcontainer = giommu->bcontainer; -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); - hwaddr iova = iotlb->iova + giommu->iommu_offset; - ram_addr_t translated_addr; - int ret = -EINVAL; -@@ -1230,12 +1230,12 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - - rcu_read_lock(); - if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { -- ret = vfio_get_dirty_bitmap(&container->bcontainer, iova, -- iotlb->addr_mask + 1, translated_addr); -+ ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1, -+ translated_addr); - if (ret) { - error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", -- container, iova, iotlb->addr_mask + 1, ret, -+ bcontainer, iova, iotlb->addr_mask + 1, ret, - strerror(-ret)); - } - } -@@ -1291,10 +1291,9 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, - &vrdl); - } - --static int vfio_sync_dirty_bitmap(VFIOContainer *container, -+static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, - MemoryRegionSection *section) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - ram_addr_t ram_addr; - - if (memory_region_is_iommu(section->mr)) { -@@ -1330,7 +1329,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - ram_addr = memory_region_get_ram_addr(section->mr) + - section->offset_within_region; - -- return vfio_get_dirty_bitmap(&container->bcontainer, -+ return vfio_get_dirty_bitmap(bcontainer, - REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), - int128_get64(section->size), ram_addr); - } -@@ -1338,15 +1337,16 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - static void vfio_listener_log_sync(MemoryListener *listener, - MemoryRegionSection *section) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, listener); -+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, -+ listener); - int ret; - - if (vfio_listener_skipped_section(section)) { - return; - } - -- if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { -- ret = vfio_sync_dirty_bitmap(container, section); -+ if (vfio_devices_all_dirty_tracking(bcontainer)) { -+ ret = vfio_sync_dirty_bitmap(bcontainer, section); - if (ret) { - error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret, - strerror(-ret)); -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 584eee4ba1..7f508669f5 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -51,6 +51,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - { - bcontainer->ops = ops; - bcontainer->space = space; -+ bcontainer->error = NULL; - bcontainer->dirty_pages_supported = false; - bcontainer->dma_max_mappings = 0; - QLIST_INIT(&bcontainer->giommu_list); -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 6ba2e2f8c4..5c1dee8c9f 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -453,6 +453,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - { - struct vfio_info_cap_header *hdr; - struct vfio_iommu_type1_info_cap_migration *cap_mig; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - - hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); - if (!hdr) { -@@ -467,7 +468,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - * qemu_real_host_page_size to mark those dirty. - */ - if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { -- container->bcontainer.dirty_pages_supported = true; -+ bcontainer->dirty_pages_supported = true; - container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; - container->dirty_pgsizes = cap_mig->pgsize_bitmap; - } -@@ -558,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - - container = g_malloc0(sizeof(*container)); - container->fd = fd; -- container->error = NULL; - container->iova_ranges = NULL; - bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, space, &vfio_legacy_ops); -@@ -621,25 +621,24 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - -- container->listener = vfio_memory_listener; -- -- memory_listener_register(&container->listener, bcontainer->space->as); -+ bcontainer->listener = vfio_memory_listener; -+ memory_listener_register(&bcontainer->listener, bcontainer->space->as); - -- if (container->error) { -+ if (bcontainer->error) { - ret = -1; -- error_propagate_prepend(errp, container->error, -+ error_propagate_prepend(errp, bcontainer->error, - "memory listener initialization failed: "); - goto listener_release_exit; - } - -- container->initialized = true; -+ bcontainer->initialized = true; - - return 0; - listener_release_exit: - QLIST_REMOVE(group, container_next); - QLIST_REMOVE(bcontainer, next); - vfio_kvm_device_del_group(group); -- memory_listener_unregister(&container->listener); -+ memory_listener_unregister(&bcontainer->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || - container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { - vfio_spapr_container_deinit(container); -@@ -674,7 +673,7 @@ static void vfio_disconnect_container(VFIOGroup *group) - * group. - */ - if (QLIST_EMPTY(&container->group_list)) { -- memory_listener_unregister(&container->listener); -+ memory_listener_unregister(&bcontainer->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || - container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { - vfio_spapr_container_deinit(container); -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 4f76bdd3ca..7a50975f25 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -46,6 +46,7 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, - { - VFIOContainer *container = container_of(listener, VFIOContainer, - prereg_listener); -+ VFIOContainerBase *bcontainer = &container->bcontainer; - const hwaddr gpa = section->offset_within_address_space; - hwaddr end; - int ret; -@@ -88,9 +89,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, - * can gracefully fail. Runtime, there's not much we can do other - * than throw a hardware error. - */ -- if (!container->initialized) { -- if (!container->error) { -- error_setg_errno(&container->error, -ret, -+ if (!bcontainer->initialized) { -+ if (!bcontainer->error) { -+ error_setg_errno(&bcontainer->error, -ret, - "Memory registering failed"); - } - } else { -@@ -445,9 +446,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - - memory_listener_register(&container->prereg_listener, - &address_space_memory); -- if (container->error) { -+ if (bcontainer->error) { - ret = -1; -- error_propagate_prepend(errp, container->error, -+ error_propagate_prepend(errp, bcontainer->error, - "RAM memory listener initialization failed: "); - goto listener_unregister_exit; - } -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 8a607a4c17..922022cbc6 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -78,11 +78,8 @@ struct VFIOGroup; - typedef struct VFIOContainer { - VFIOContainerBase bcontainer; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ -- MemoryListener listener; - MemoryListener prereg_listener; - unsigned iommu_type; -- Error *error; -- bool initialized; - uint64_t dirty_pgsizes; - uint64_t max_dirty_bitmap_size; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 8e05b5ac5a..95f8d319e0 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -36,6 +36,9 @@ typedef struct VFIOAddressSpace { - typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; - VFIOAddressSpace *space; -+ MemoryListener listener; -+ Error *error; -+ bool initialized; - unsigned long pgsizes; - unsigned int dma_max_mappings; - bool dirty_pages_supported; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch b/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch deleted file mode 100644 index df483e3..0000000 --- a/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch +++ /dev/null @@ -1,230 +0,0 @@ -From 0b3fbb6bf5c5bccec184829ff9454fd637c512b9 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:34 +0800 -Subject: [PATCH 009/101] vfio/container: Move per container device list in - base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [8/67] d546cc25f4424b2d42356765c860fdaf4a3ba652 (eauger1/centos-qemu-kvm) - -VFIO Device is also changed to point to base container instead of -legacy container. - -No functional change intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 3e6015d1117579324b456aa169dfca06da9922cf) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 23 +++++++++++++++-------- - hw/vfio/container.c | 12 ++++++------ - include/hw/vfio/vfio-common.h | 3 +-- - include/hw/vfio/vfio-container-base.h | 1 + - 4 files changed, 23 insertions(+), 16 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index b1a875ca93..9415395ed9 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void) - - bool vfio_viommu_preset(VFIODevice *vbasedev) - { -- return vbasedev->container->bcontainer.space->as != &address_space_memory; -+ return vbasedev->bcontainer->space->as != &address_space_memory; - } - - static void vfio_set_migration_error(int err) -@@ -179,6 +179,7 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) - - static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - -@@ -187,7 +188,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - return false; - } - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - VFIOMigration *migration = vbasedev->migration; - - if (!migration) { -@@ -205,9 +206,10 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - - bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (!vbasedev->dirty_pages_supported) { - return false; - } -@@ -222,13 +224,14 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) - */ - bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - - if (!migration_is_active(migrate_get_current())) { - return false; - } - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - VFIOMigration *migration = vbasedev->migration; - - if (!migration) { -@@ -833,12 +836,13 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, - VFIOContainer *container) - { - VFIOPCIDevice *pcidev; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - Object *owner; - - owner = memory_region_owner(section->mr); - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { - continue; - } -@@ -939,13 +943,14 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) - uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), - sizeof(uint64_t))] = {}; - struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - - feature->argsz = sizeof(buf); - feature->flags = VFIO_DEVICE_FEATURE_SET | - VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (!vbasedev->dirty_tracking) { - continue; - } -@@ -1036,6 +1041,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) - { - struct vfio_device_feature *feature; - VFIODirtyRanges ranges; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - int ret = 0; - -@@ -1046,7 +1052,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) - return -errno; - } - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (vbasedev->dirty_tracking) { - continue; - } -@@ -1139,10 +1145,11 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - VFIODevice *vbasedev; - int ret; - -- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { -+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - ret = vfio_device_dma_logging_report(vbasedev, iova, size, - vbmap->bitmap); - if (ret) { -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 3ab74e2615..63a906de93 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -888,7 +888,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - int groupid = vfio_device_groupid(vbasedev, errp); - VFIODevice *vbasedev_iter; - VFIOGroup *group; -- VFIOContainer *container; -+ VFIOContainerBase *bcontainer; - int ret; - - if (groupid < 0) { -@@ -915,9 +915,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - return ret; - } - -- container = group->container; -- vbasedev->container = container; -- QLIST_INSERT_HEAD(&container->device_list, vbasedev, container_next); -+ bcontainer = &group->container->bcontainer; -+ vbasedev->bcontainer = bcontainer; -+ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); - QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); - - return ret; -@@ -927,13 +927,13 @@ void vfio_detach_device(VFIODevice *vbasedev) - { - VFIOGroup *group = vbasedev->group; - -- if (!vbasedev->container) { -+ if (!vbasedev->bcontainer) { - return; - } - - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); -- vbasedev->container = NULL; -+ vbasedev->bcontainer = NULL; - trace_vfio_detach_device(vbasedev->name, group->groupid); - vfio_put_base_device(vbasedev); - vfio_put_group(group); -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 60f2785fe0..9740cf9fbc 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -90,7 +90,6 @@ typedef struct VFIOContainer { - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; -- QLIST_HEAD(, VFIODevice) device_list; - GList *iova_ranges; - } VFIOContainer; - -@@ -118,7 +117,7 @@ typedef struct VFIODevice { - QLIST_ENTRY(VFIODevice) container_next; - QLIST_ENTRY(VFIODevice) global_next; - struct VFIOGroup *group; -- VFIOContainer *container; -+ VFIOContainerBase *bcontainer; - char *sysfsdev; - char *name; - DeviceState *dev; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index f244f003d0..7090962496 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -39,6 +39,7 @@ typedef struct VFIOContainerBase { - bool dirty_pages_supported; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - QLIST_ENTRY(VFIOContainerBase) next; -+ QLIST_HEAD(, VFIODevice) device_list; - } VFIOContainerBase; - - typedef struct VFIOGuestIOMMU { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch b/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch deleted file mode 100644 index 0db20c2..0000000 --- a/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch +++ /dev/null @@ -1,242 +0,0 @@ -From d798939fbbe6c27200c165edd6f3771413821b34 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:36 +0800 -Subject: [PATCH 011/101] vfio/container: Move pgsizes and dma_max_mappings to - base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [10/67] e80696175aba159a17ce9a869535db66682deb08 (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 7ab1cb74ffdbf92ef237243b41bde5c7067d5298) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 17 +++++++++-------- - hw/vfio/container-base.c | 1 + - hw/vfio/container.c | 11 +++++------ - hw/vfio/spapr.c | 10 ++++++---- - include/hw/vfio/vfio-common.h | 2 -- - include/hw/vfio/vfio-container-base.h | 2 ++ - 6 files changed, 23 insertions(+), 20 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index cf6618f6ed..1cb53d369e 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -401,6 +401,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - static void vfio_register_ram_discard_listener(VFIOContainer *container, - MemoryRegionSection *section) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl; - -@@ -419,8 +420,8 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - section->mr); - - g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity)); -- g_assert(container->pgsizes && -- vrdl->granularity >= 1ULL << ctz64(container->pgsizes)); -+ g_assert(bcontainer->pgsizes && -+ vrdl->granularity >= 1ULL << ctz64(bcontainer->pgsizes)); - - ram_discard_listener_init(&vrdl->listener, - vfio_ram_discard_notify_populate, -@@ -441,7 +442,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - * number of sections in the address space we could have over time, - * also consuming DMA mappings. - */ -- if (container->dma_max_mappings) { -+ if (bcontainer->dma_max_mappings) { - unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512; - - #ifdef CONFIG_KVM -@@ -462,11 +463,11 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - } - - if (vrdl_mappings + max_memslots - vrdl_count > -- container->dma_max_mappings) { -+ bcontainer->dma_max_mappings) { - warn_report("%s: possibly running out of DMA mappings. E.g., try" - " increasing the 'block-size' of virtio-mem devies." - " Maximum possible DMA mappings: %d, Maximum possible" -- " memslots: %d", __func__, container->dma_max_mappings, -+ " memslots: %d", __func__, bcontainer->dma_max_mappings, - max_memslots); - } - } -@@ -626,7 +627,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - iommu_idx); - - ret = memory_region_iommu_set_page_size_mask(giommu->iommu_mr, -- container->pgsizes, -+ bcontainer->pgsizes, - &err); - if (ret) { - g_free(giommu); -@@ -675,7 +676,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - llsize = int128_sub(llend, int128_make64(iova)); - - if (memory_region_is_ram_device(section->mr)) { -- hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1; -+ hwaddr pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; - - if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { - trace_vfio_listener_region_add_no_dma_map( -@@ -777,7 +778,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - if (memory_region_is_ram_device(section->mr)) { - hwaddr pgmask; - -- pgmask = (1ULL << ctz64(container->pgsizes)) - 1; -+ pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; - try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); - } else if (memory_region_has_ram_discard_manager(section->mr)) { - vfio_unregister_ram_discard_listener(container, section); -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 5d654ae172..dcce111349 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -52,6 +52,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - bcontainer->ops = ops; - bcontainer->space = space; - bcontainer->dirty_pages_supported = false; -+ bcontainer->dma_max_mappings = 0; - QLIST_INIT(&bcontainer->giommu_list); - } - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 7bd81eab09..c5a6262882 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -154,7 +154,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && - container->iommu_type == VFIO_TYPE1v2_IOMMU) { - trace_vfio_legacy_dma_unmap_overflow_workaround(); -- unmap.size -= 1ULL << ctz64(container->pgsizes); -+ unmap.size -= 1ULL << ctz64(bcontainer->pgsizes); - continue; - } - error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); -@@ -559,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container = g_malloc0(sizeof(*container)); - container->fd = fd; - container->error = NULL; -- container->dma_max_mappings = 0; - container->iova_ranges = NULL; - QLIST_INIT(&container->vrdl_list); - bcontainer = &container->bcontainer; -@@ -589,13 +588,13 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - } - - if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { -- container->pgsizes = info->iova_pgsizes; -+ bcontainer->pgsizes = info->iova_pgsizes; - } else { -- container->pgsizes = qemu_real_host_page_size(); -+ bcontainer->pgsizes = qemu_real_host_page_size(); - } - -- if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { -- container->dma_max_mappings = 65535; -+ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { -+ bcontainer->dma_max_mappings = 65535; - } - - vfio_get_info_iova_range(info, container); -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 83da2f7ec2..4f76bdd3ca 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -226,6 +226,7 @@ static int vfio_spapr_create_window(VFIOContainer *container, - hwaddr *pgsize) - { - int ret = 0; -+ VFIOContainerBase *bcontainer = &container->bcontainer; - IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); - uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask; - unsigned entries, bits_total, bits_per_level, max_levels; -@@ -239,13 +240,13 @@ static int vfio_spapr_create_window(VFIOContainer *container, - if (pagesize > rampagesize) { - pagesize = rampagesize; - } -- pgmask = container->pgsizes & (pagesize | (pagesize - 1)); -+ pgmask = bcontainer->pgsizes & (pagesize | (pagesize - 1)); - pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0; - if (!pagesize) { - error_report("Host doesn't support page size 0x%"PRIx64 - ", the supported mask is 0x%lx", - memory_region_iommu_get_min_page_size(iommu_mr), -- container->pgsizes); -+ bcontainer->pgsizes); - return -EINVAL; - } - -@@ -421,6 +422,7 @@ void vfio_container_del_section_window(VFIOContainer *container, - - int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - { -+ VFIOContainerBase *bcontainer = &container->bcontainer; - struct vfio_iommu_spapr_tce_info info; - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - int ret, fd = container->fd; -@@ -461,7 +463,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - } - - if (v2) { -- container->pgsizes = info.ddw.pgsizes; -+ bcontainer->pgsizes = info.ddw.pgsizes; - /* - * There is a default window in just created container. - * To make region_add/del simpler, we better remove this -@@ -476,7 +478,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - } - } else { - /* The default table uses 4K pages */ -- container->pgsizes = 0x1000; -+ bcontainer->pgsizes = 0x1000; - vfio_host_win_add(container, info.dma32_window_start, - info.dma32_window_start + - info.dma32_window_size - 1, -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index bc67e1316c..d3dc2f9dcb 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -85,8 +85,6 @@ typedef struct VFIOContainer { - bool initialized; - uint64_t dirty_pgsizes; - uint64_t max_dirty_bitmap_size; -- unsigned long pgsizes; -- unsigned int dma_max_mappings; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 7090962496..85ec7e1a56 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -36,6 +36,8 @@ typedef struct VFIOAddressSpace { - typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; - VFIOAddressSpace *space; -+ unsigned long pgsizes; -+ unsigned int dma_max_mappings; - bool dirty_pages_supported; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - QLIST_ENTRY(VFIOContainerBase) next; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch deleted file mode 100644 index edd4538..0000000 --- a/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch +++ /dev/null @@ -1,265 +0,0 @@ -From 3ba43cbc5b096feed6272e070cf152d5fc74df01 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:32 +0800 -Subject: [PATCH 007/101] vfio/container: Move space field to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [6/67] b0aa17d9ec4588bd64373452a30306e826234d0b (eauger1/centos-qemu-kvm) - -Move the space field to the base object. Also the VFIOAddressSpace -now contains a list of base containers. - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit e5597063386a0c76308ad16da31726d23f489945) -Signed-off-by: Eric Auger ---- - hw/ppc/spapr_pci_vfio.c | 10 +++++----- - hw/vfio/common.c | 4 ++-- - hw/vfio/container-base.c | 6 +++++- - hw/vfio/container.c | 18 ++++++++---------- - include/hw/vfio/vfio-common.h | 8 -------- - include/hw/vfio/vfio-container-base.h | 9 +++++++++ - 6 files changed, 29 insertions(+), 26 deletions(-) - -diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c -index f283f7e38d..d1d07bec46 100644 ---- a/hw/ppc/spapr_pci_vfio.c -+++ b/hw/ppc/spapr_pci_vfio.c -@@ -84,27 +84,27 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) - static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) - { - VFIOAddressSpace *space = vfio_get_address_space(as); -- VFIOContainer *container = NULL; -+ VFIOContainerBase *bcontainer = NULL; - - if (QLIST_EMPTY(&space->containers)) { - /* No containers to act on */ - goto out; - } - -- container = QLIST_FIRST(&space->containers); -+ bcontainer = QLIST_FIRST(&space->containers); - -- if (QLIST_NEXT(container, next)) { -+ if (QLIST_NEXT(bcontainer, next)) { - /* - * We don't yet have logic to synchronize EEH state across - * multiple containers - */ -- container = NULL; -+ bcontainer = NULL; - goto out; - } - - out: - vfio_put_address_space(space); -- return container; -+ return container_of(bcontainer, VFIOContainer, bcontainer); - } - - static bool vfio_eeh_as_ok(AddressSpace *as) -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 43580bcc43..1d8202537e 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void) - - bool vfio_viommu_preset(VFIODevice *vbasedev) - { -- return vbasedev->container->space->as != &address_space_memory; -+ return vbasedev->container->bcontainer.space->as != &address_space_memory; - } - - static void vfio_set_migration_error(int err) -@@ -922,7 +922,7 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, - dirty.container = container; - - memory_listener_register(&dirty.listener, -- container->space->as); -+ container->bcontainer.space->as); - - *ranges = dirty.ranges; - -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 20bcb9669a..3933391e0d 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -31,9 +31,11 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); - } - --void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) -+void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, -+ const VFIOIOMMUOps *ops) - { - bcontainer->ops = ops; -+ bcontainer->space = space; - QLIST_INIT(&bcontainer->giommu_list); - } - -@@ -41,6 +43,8 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) - { - VFIOGuestIOMMU *giommu, *tmp; - -+ QLIST_REMOVE(bcontainer, next); -+ - QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { - memory_region_unregister_iommu_notifier( - MEMORY_REGION(giommu->iommu_mr), &giommu->n); -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 133d3c8f5c..f12fcb6fe1 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -514,7 +514,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - * details once we know which type of IOMMU we are using. - */ - -- QLIST_FOREACH(container, &space->containers, next) { -+ QLIST_FOREACH(bcontainer, &space->containers, next) { -+ container = container_of(bcontainer, VFIOContainer, bcontainer); - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { -@@ -550,7 +551,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - } - - container = g_malloc0(sizeof(*container)); -- container->space = space; - container->fd = fd; - container->error = NULL; - container->dirty_pages_supported = false; -@@ -558,7 +558,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->iova_ranges = NULL; - QLIST_INIT(&container->vrdl_list); - bcontainer = &container->bcontainer; -- vfio_container_init(bcontainer, &vfio_legacy_ops); -+ vfio_container_init(bcontainer, space, &vfio_legacy_ops); - - ret = vfio_init_container(container, group->fd, errp); - if (ret) { -@@ -613,14 +613,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - vfio_kvm_device_add_group(group); - - QLIST_INIT(&container->group_list); -- QLIST_INSERT_HEAD(&space->containers, container, next); -+ QLIST_INSERT_HEAD(&space->containers, bcontainer, next); - - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - - container->listener = vfio_memory_listener; - -- memory_listener_register(&container->listener, container->space->as); -+ memory_listener_register(&container->listener, bcontainer->space->as); - - if (container->error) { - ret = -1; -@@ -634,7 +634,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - return 0; - listener_release_exit: - QLIST_REMOVE(group, container_next); -- QLIST_REMOVE(container, next); -+ QLIST_REMOVE(bcontainer, next); - vfio_kvm_device_del_group(group); - memory_listener_unregister(&container->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || -@@ -684,9 +684,7 @@ static void vfio_disconnect_container(VFIOGroup *group) - } - - if (QLIST_EMPTY(&container->group_list)) { -- VFIOAddressSpace *space = container->space; -- -- QLIST_REMOVE(container, next); -+ VFIOAddressSpace *space = bcontainer->space; - - vfio_container_destroy(bcontainer); - -@@ -707,7 +705,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) - QLIST_FOREACH(group, &vfio_group_list, next) { - if (group->groupid == groupid) { - /* Found it. Now is it already in the right context? */ -- if (group->container->space->as == as) { -+ if (group->container->bcontainer.space->as == as) { - return group; - } else { - error_setg(errp, "group %d used in multiple address spaces", -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 6be082b8f2..bd4de6cb3a 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -73,17 +73,10 @@ typedef struct VFIOMigration { - bool initial_data_sent; - } VFIOMigration; - --typedef struct VFIOAddressSpace { -- AddressSpace *as; -- QLIST_HEAD(, VFIOContainer) containers; -- QLIST_ENTRY(VFIOAddressSpace) list; --} VFIOAddressSpace; -- - struct VFIOGroup; - - typedef struct VFIOContainer { - VFIOContainerBase bcontainer; -- VFIOAddressSpace *space; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ - MemoryListener listener; - MemoryListener prereg_listener; -@@ -98,7 +91,6 @@ typedef struct VFIOContainer { - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; -- QLIST_ENTRY(VFIOContainer) next; - QLIST_HEAD(, VFIODevice) device_list; - GList *iova_ranges; - } VFIOContainer; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index a11aec5755..c7cc6ec9c5 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -24,12 +24,20 @@ typedef struct { - hwaddr pages; - } VFIOBitmap; - -+typedef struct VFIOAddressSpace { -+ AddressSpace *as; -+ QLIST_HEAD(, VFIOContainerBase) containers; -+ QLIST_ENTRY(VFIOAddressSpace) list; -+} VFIOAddressSpace; -+ - /* - * This is the base object for vfio container backends - */ - typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; -+ VFIOAddressSpace *space; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; -+ QLIST_ENTRY(VFIOContainerBase) next; - } VFIOContainerBase; - - typedef struct VFIOGuestIOMMU { -@@ -48,6 +56,7 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - IOMMUTLBEntry *iotlb); - - void vfio_container_init(VFIOContainerBase *bcontainer, -+ VFIOAddressSpace *space, - const VFIOIOMMUOps *ops); - void vfio_container_destroy(VFIOContainerBase *bcontainer); - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch deleted file mode 100644 index 5e31d07..0000000 --- a/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch +++ /dev/null @@ -1,255 +0,0 @@ -From aadd055dcc06cb964ebfd2868b7e9b207d62ae0e Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:37 +0800 -Subject: [PATCH 012/101] vfio/container: Move vrdl_list to base container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [11/67] 42da5389e39291839259f0e4c020c7461b7225cc (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit dc74a4b0056c0c803d46612a2319294921097974) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 38 +++++++++++++-------------- - hw/vfio/container-base.c | 1 + - hw/vfio/container.c | 1 - - include/hw/vfio/vfio-common.h | 11 -------- - include/hw/vfio/vfio-container-base.h | 11 ++++++++ - 5 files changed, 31 insertions(+), 31 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 1cb53d369e..f15665789f 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -351,13 +351,13 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, - { - VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, - listener); -+ VFIOContainerBase *bcontainer = vrdl->bcontainer; - const hwaddr size = int128_get64(section->size); - const hwaddr iova = section->offset_within_address_space; - int ret; - - /* Unmap with a single call. */ -- ret = vfio_container_dma_unmap(&vrdl->container->bcontainer, -- iova, size , NULL); -+ ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL); - if (ret) { - error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, - strerror(-ret)); -@@ -369,6 +369,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - { - VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, - listener); -+ VFIOContainerBase *bcontainer = vrdl->bcontainer; - const hwaddr end = section->offset_within_region + - int128_get64(section->size); - hwaddr start, next, iova; -@@ -387,8 +388,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - section->offset_within_address_space; - vaddr = memory_region_get_ram_ptr(section->mr) + start; - -- ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova, -- next - start, vaddr, section->readonly); -+ ret = vfio_container_dma_map(bcontainer, iova, next - start, -+ vaddr, section->readonly); - if (ret) { - /* Rollback */ - vfio_ram_discard_notify_discard(rdl, section); -@@ -398,10 +399,9 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - return 0; - } - --static void vfio_register_ram_discard_listener(VFIOContainer *container, -+static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, - MemoryRegionSection *section) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl; - -@@ -412,7 +412,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE)); - - vrdl = g_new0(VFIORamDiscardListener, 1); -- vrdl->container = container; -+ vrdl->bcontainer = bcontainer; - vrdl->mr = section->mr; - vrdl->offset_within_address_space = section->offset_within_address_space; - vrdl->size = int128_get64(section->size); -@@ -427,7 +427,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - vfio_ram_discard_notify_populate, - vfio_ram_discard_notify_discard, true); - ram_discard_manager_register_listener(rdm, &vrdl->listener, section); -- QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next); -+ QLIST_INSERT_HEAD(&bcontainer->vrdl_list, vrdl, next); - - /* - * Sanity-check if we have a theoretically problematic setup where we could -@@ -451,7 +451,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - } - #endif - -- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { -+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { - hwaddr start, end; - - start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space, -@@ -473,13 +473,13 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, - } - } - --static void vfio_unregister_ram_discard_listener(VFIOContainer *container, -+static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, - MemoryRegionSection *section) - { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl = NULL; - -- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { -+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { - if (vrdl->mr == section->mr && - vrdl->offset_within_address_space == - section->offset_within_address_space) { -@@ -663,7 +663,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - * about changes. - */ - if (memory_region_has_ram_discard_manager(section->mr)) { -- vfio_register_ram_discard_listener(container, section); -+ vfio_register_ram_discard_listener(bcontainer, section); - return; - } - -@@ -781,7 +781,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; - try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); - } else if (memory_region_has_ram_discard_manager(section->mr)) { -- vfio_unregister_ram_discard_listener(container, section); -+ vfio_unregister_ram_discard_listener(bcontainer, section); - /* Unregistering will trigger an unmap. */ - try_unmap = false; - } -@@ -1260,17 +1260,17 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, - * Sync the whole mapped region (spanning multiple individual mappings) - * in one go. - */ -- return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size, -- ram_addr); -+ return vfio_get_dirty_bitmap(vrdl->bcontainer, iova, size, ram_addr); - } - --static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, -- MemoryRegionSection *section) -+static int -+vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section) - { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl = NULL; - -- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { -+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { - if (vrdl->mr == section->mr && - vrdl->offset_within_address_space == - section->offset_within_address_space) { -@@ -1324,7 +1324,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, - } - return 0; - } else if (memory_region_has_ram_discard_manager(section->mr)) { -- return vfio_sync_ram_discard_listener_dirty_bitmap(container, section); -+ return vfio_sync_ram_discard_listener_dirty_bitmap(bcontainer, section); - } - - ram_addr = memory_region_get_ram_addr(section->mr) + -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index dcce111349..584eee4ba1 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - bcontainer->dirty_pages_supported = false; - bcontainer->dma_max_mappings = 0; - QLIST_INIT(&bcontainer->giommu_list); -+ QLIST_INIT(&bcontainer->vrdl_list); - } - - void vfio_container_destroy(VFIOContainerBase *bcontainer) -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index c5a6262882..6ba2e2f8c4 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -560,7 +560,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->fd = fd; - container->error = NULL; - container->iova_ranges = NULL; -- QLIST_INIT(&container->vrdl_list); - bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, space, &vfio_legacy_ops); - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index d3dc2f9dcb..8a607a4c17 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -87,20 +87,9 @@ typedef struct VFIOContainer { - uint64_t max_dirty_bitmap_size; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; -- QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; - GList *iova_ranges; - } VFIOContainer; - --typedef struct VFIORamDiscardListener { -- VFIOContainer *container; -- MemoryRegion *mr; -- hwaddr offset_within_address_space; -- hwaddr size; -- uint64_t granularity; -- RamDiscardListener listener; -- QLIST_ENTRY(VFIORamDiscardListener) next; --} VFIORamDiscardListener; -- - typedef struct VFIOHostDMAWindow { - hwaddr min_iova; - hwaddr max_iova; -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 85ec7e1a56..8e05b5ac5a 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -40,6 +40,7 @@ typedef struct VFIOContainerBase { - unsigned int dma_max_mappings; - bool dirty_pages_supported; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; -+ QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; - QLIST_ENTRY(VFIOContainerBase) next; - QLIST_HEAD(, VFIODevice) device_list; - } VFIOContainerBase; -@@ -52,6 +53,16 @@ typedef struct VFIOGuestIOMMU { - QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; - } VFIOGuestIOMMU; - -+typedef struct VFIORamDiscardListener { -+ VFIOContainerBase *bcontainer; -+ MemoryRegion *mr; -+ hwaddr offset_within_address_space; -+ hwaddr size; -+ uint64_t granularity; -+ RamDiscardListener listener; -+ QLIST_ENTRY(VFIORamDiscardListener) next; -+} VFIORamDiscardListener; -+ - int vfio_container_dma_map(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch b/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch deleted file mode 100644 index f68be0b..0000000 --- a/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch +++ /dev/null @@ -1,66 +0,0 @@ -From edfc1ee2a1854d180ffad92e70212535a2ca668c Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 21 Dec 2023 10:45:17 +0800 -Subject: [PATCH 062/101] vfio/container: Rename vfio_init_container to - vfio_set_iommu -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [61/67] 5e7f956379b54fe6fa7e078ec17e71325aa109af (eauger1/centos-qemu-kvm) - -vfio_container_init() and vfio_init_container() names are confusing -especially when we see vfio_init_container() calls vfio_container_init(). - -vfio_container_init() operates on base container which is consistent -with all routines handling 'VFIOContainerBase *' ops. - -vfio_init_container() operates on legacy container and setup IOMMU -context with ioctl(VFIO_SET_IOMMU). - -So choose to rename vfio_init_container to vfio_set_iommu to avoid -the confusion. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -(cherry picked from commit 9f734a117cbf63b03577b46c8cad8ad88ec6dced) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 8d334f52f2..bd25b9fbad 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -392,8 +392,8 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) - return VFIO_IOMMU_CLASS(klass); - } - --static int vfio_init_container(VFIOContainer *container, int group_fd, -- VFIOAddressSpace *space, Error **errp) -+static int vfio_set_iommu(VFIOContainer *container, int group_fd, -+ VFIOAddressSpace *space, Error **errp) - { - int iommu_type, ret; - const VFIOIOMMUClass *vioc; -@@ -616,7 +616,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container->fd = fd; - bcontainer = &container->bcontainer; - -- ret = vfio_init_container(container, group->fd, space, errp); -+ ret = vfio_set_iommu(container, group->fd, space, errp); - if (ret) { - goto free_container_exit; - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch b/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch deleted file mode 100644 index 77df179..0000000 --- a/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 8d3857c7877da58ed0c6b62cf2714c4127350522 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 20 Dec 2023 14:53:02 +0100 -Subject: [PATCH 059/101] vfio/container: Replace basename with - g_path_get_basename -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [58/67] 56a90f23dadc89271b1fff014fc64ade87c1a4cb (eauger1/centos-qemu-kvm) - -g_path_get_basename() is a portable utility function that has the -advantage of not modifing the string argument. It also fixes a compile -breakage with the Musl C library reported in [1]. - -[1] https://lore.kernel.org/all/20231212010228.2701544-1-raj.khem@gmail.com/ - -Reported-by: Khem Raj -Reviewed-by: Eric Auger -Reviewed-by: Zhao Liu -Reviewed-by: Zhenzhong Duan -Signed-off-by: Cédric Le Goater -(cherry picked from commit 213ae3ffda463c0503e39e0cf827511b5298c314) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 688cf23bab..8d334f52f2 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -869,7 +869,8 @@ static void vfio_put_base_device(VFIODevice *vbasedev) - - static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) - { -- char *tmp, group_path[PATH_MAX], *group_name; -+ char *tmp, group_path[PATH_MAX]; -+ g_autofree char *group_name = NULL; - int ret, groupid; - ssize_t len; - -@@ -885,7 +886,7 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) - - group_path[len] = 0; - -- group_name = basename(group_path); -+ group_name = g_path_get_basename(group_path); - if (sscanf(group_name, "%d", &groupid) != 1) { - error_setg_errno(errp, errno, "failed to read %s", group_path); - return -errno; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch b/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch deleted file mode 100644 index 5442688..0000000 --- a/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch +++ /dev/null @@ -1,235 +0,0 @@ -From a2c8aa64b1b21a3e1d4cf2a4fe7d84dc32f69284 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:33 +0800 -Subject: [PATCH 008/101] vfio/container: Switch to IOMMU BE - set_dirty_page_tracking/query_dirty_bitmap API -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [7/67] 88368809c7990e1d9b01406e48694fe3e3fb1397 (eauger1/centos-qemu-kvm) - -dirty_pages_supported field is also moved to the base container - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit bb424490edcef73d07f200d53f69415b203d81df) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 12 ++++++++---- - hw/vfio/container-base.c | 16 ++++++++++++++++ - hw/vfio/container.c | 21 ++++++++++++++------- - include/hw/vfio/vfio-common.h | 6 ------ - include/hw/vfio/vfio-container-base.h | 6 ++++++ - 5 files changed, 44 insertions(+), 17 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 1d8202537e..b1a875ca93 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1079,7 +1079,8 @@ static void vfio_listener_log_global_start(MemoryListener *listener) - if (vfio_devices_all_device_dirty_tracking(container)) { - ret = vfio_devices_dma_logging_start(container); - } else { -- ret = vfio_set_dirty_page_tracking(container, true); -+ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -+ true); - } - - if (ret) { -@@ -1097,7 +1098,8 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) - if (vfio_devices_all_device_dirty_tracking(container)) { - vfio_devices_dma_logging_stop(container); - } else { -- ret = vfio_set_dirty_page_tracking(container, false); -+ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, -+ false); - } - - if (ret) { -@@ -1165,7 +1167,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - VFIOBitmap vbmap; - int ret; - -- if (!container->dirty_pages_supported && !all_device_dirty_tracking) { -+ if (!container->bcontainer.dirty_pages_supported && -+ !all_device_dirty_tracking) { - cpu_physical_memory_set_dirty_range(ram_addr, size, - tcg_enabled() ? DIRTY_CLIENTS_ALL : - DIRTY_CLIENTS_NOCODE); -@@ -1180,7 +1183,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - if (all_device_dirty_tracking) { - ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); - } else { -- ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size); -+ ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap, -+ iova, size); - } - - if (ret) { -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 3933391e0d..5d654ae172 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -31,11 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); - } - -+int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, -+ bool start) -+{ -+ g_assert(bcontainer->ops->set_dirty_page_tracking); -+ return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); -+} -+ -+int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+ VFIOBitmap *vbmap, -+ hwaddr iova, hwaddr size) -+{ -+ g_assert(bcontainer->ops->query_dirty_bitmap); -+ return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size); -+} -+ - void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - const VFIOIOMMUOps *ops) - { - bcontainer->ops = ops; - bcontainer->space = space; -+ bcontainer->dirty_pages_supported = false; - QLIST_INIT(&bcontainer->giommu_list); - } - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index f12fcb6fe1..3ab74e2615 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -131,7 +131,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, - - if (iotlb && vfio_devices_all_running_and_mig_active(container)) { - if (!vfio_devices_all_device_dirty_tracking(container) && -- container->dirty_pages_supported) { -+ container->bcontainer.dirty_pages_supported) { - return vfio_dma_unmap_bitmap(container, iova, size, iotlb); - } - -@@ -205,14 +205,17 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, - return -errno; - } - --int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) -+static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, -+ bool start) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - int ret; - struct vfio_iommu_type1_dirty_bitmap dirty = { - .argsz = sizeof(dirty), - }; - -- if (!container->dirty_pages_supported) { -+ if (!bcontainer->dirty_pages_supported) { - return 0; - } - -@@ -232,9 +235,12 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) - return ret; - } - --int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, -- hwaddr iova, hwaddr size) -+static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+ VFIOBitmap *vbmap, -+ hwaddr iova, hwaddr size) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dirty_bitmap *dbitmap; - struct vfio_iommu_type1_dirty_bitmap_get *range; - int ret; -@@ -461,7 +467,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, - * qemu_real_host_page_size to mark those dirty. - */ - if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { -- container->dirty_pages_supported = true; -+ container->bcontainer.dirty_pages_supported = true; - container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; - container->dirty_pgsizes = cap_mig->pgsize_bitmap; - } -@@ -553,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - container = g_malloc0(sizeof(*container)); - container->fd = fd; - container->error = NULL; -- container->dirty_pages_supported = false; - container->dma_max_mappings = 0; - container->iova_ranges = NULL; - QLIST_INIT(&container->vrdl_list); -@@ -937,4 +942,6 @@ void vfio_detach_device(VFIODevice *vbasedev) - const VFIOIOMMUOps vfio_legacy_ops = { - .dma_map = vfio_legacy_dma_map, - .dma_unmap = vfio_legacy_dma_unmap, -+ .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, -+ .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, - }; -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index bd4de6cb3a..60f2785fe0 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -83,7 +83,6 @@ typedef struct VFIOContainer { - unsigned iommu_type; - Error *error; - bool initialized; -- bool dirty_pages_supported; - uint64_t dirty_pgsizes; - uint64_t max_dirty_bitmap_size; - unsigned long pgsizes; -@@ -190,11 +189,6 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); - void vfio_put_address_space(VFIOAddressSpace *space); - bool vfio_devices_all_running_and_saving(VFIOContainer *container); - --/* container->fd */ --int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); --int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, -- hwaddr iova, hwaddr size); -- - /* SPAPR specific */ - int vfio_container_add_section_window(VFIOContainer *container, - MemoryRegionSection *section, -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index c7cc6ec9c5..f244f003d0 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -36,6 +36,7 @@ typedef struct VFIOAddressSpace { - typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; - VFIOAddressSpace *space; -+ bool dirty_pages_supported; - QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; - QLIST_ENTRY(VFIOContainerBase) next; - } VFIOContainerBase; -@@ -54,6 +55,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, - int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb); -+int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, -+ bool start); -+int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, -+ VFIOBitmap *vbmap, -+ hwaddr iova, hwaddr size); - - void vfio_container_init(VFIOContainerBase *bcontainer, - VFIOAddressSpace *space, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch b/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch deleted file mode 100644 index cfb5eb1..0000000 --- a/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch +++ /dev/null @@ -1,303 +0,0 @@ -From 00daef8e3f4f64b1401b2e8945c256d27fbfa960 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 2 Nov 2023 15:12:29 +0800 -Subject: [PATCH 004/101] vfio/container: Switch to dma_map|unmap API -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [3/67] 9a20e2f2b277be65463f145df3309271493be6ac (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Yi Sun -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit b08501a999e2448f500a46d68da503be55186b04) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 45 +++++++++++++++------------ - hw/vfio/container-base.c | 32 +++++++++++++++++++ - hw/vfio/container.c | 22 ++++++++----- - hw/vfio/meson.build | 1 + - hw/vfio/trace-events | 2 +- - include/hw/vfio/vfio-common.h | 4 --- - include/hw/vfio/vfio-container-base.h | 7 +++++ - 7 files changed, 81 insertions(+), 32 deletions(-) - create mode 100644 hw/vfio/container-base.c - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index e70fdf5e0c..e610771888 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - { - VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); -- VFIOContainer *container = giommu->container; -+ VFIOContainerBase *bcontainer = &giommu->container->bcontainer; - hwaddr iova = iotlb->iova + giommu->iommu_offset; - void *vaddr; - int ret; -@@ -322,21 +322,22 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) - * of vaddr will always be there, even if the memory object is - * destroyed and its backing memory munmap-ed. - */ -- ret = vfio_dma_map(container, iova, -- iotlb->addr_mask + 1, vaddr, -- read_only); -+ ret = vfio_container_dma_map(bcontainer, iova, -+ iotlb->addr_mask + 1, vaddr, -+ read_only); - if (ret) { -- error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " -+ error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx", %p) = %d (%s)", -- container, iova, -+ bcontainer, iova, - iotlb->addr_mask + 1, vaddr, ret, strerror(-ret)); - } - } else { -- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb); -+ ret = vfio_container_dma_unmap(bcontainer, iova, -+ iotlb->addr_mask + 1, iotlb); - if (ret) { -- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " -+ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", -- container, iova, -+ bcontainer, iova, - iotlb->addr_mask + 1, ret, strerror(-ret)); - vfio_set_migration_error(ret); - } -@@ -355,9 +356,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, - int ret; - - /* Unmap with a single call. */ -- ret = vfio_dma_unmap(vrdl->container, iova, size , NULL); -+ ret = vfio_container_dma_unmap(&vrdl->container->bcontainer, -+ iova, size , NULL); - if (ret) { -- error_report("%s: vfio_dma_unmap() failed: %s", __func__, -+ error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, - strerror(-ret)); - } - } -@@ -385,8 +387,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - section->offset_within_address_space; - vaddr = memory_region_get_ram_ptr(section->mr) + start; - -- ret = vfio_dma_map(vrdl->container, iova, next - start, -- vaddr, section->readonly); -+ ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova, -+ next - start, vaddr, section->readonly); - if (ret) { - /* Rollback */ - vfio_ram_discard_notify_discard(rdl, section); -@@ -684,10 +686,11 @@ static void vfio_listener_region_add(MemoryListener *listener, - } - } - -- ret = vfio_dma_map(container, iova, int128_get64(llsize), -- vaddr, section->readonly); -+ ret = vfio_container_dma_map(&container->bcontainer, -+ iova, int128_get64(llsize), vaddr, -+ section->readonly); - if (ret) { -- error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", " -+ error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx", %p) = %d (%s)", - container, iova, int128_get64(llsize), vaddr, ret, - strerror(-ret)); -@@ -784,18 +787,20 @@ static void vfio_listener_region_del(MemoryListener *listener, - if (int128_eq(llsize, int128_2_64())) { - /* The unmap ioctl doesn't accept a full 64-bit span. */ - llsize = int128_rshift(llsize, 1); -- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); -+ ret = vfio_container_dma_unmap(&container->bcontainer, iova, -+ int128_get64(llsize), NULL); - if (ret) { -- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " -+ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", - container, iova, int128_get64(llsize), ret, - strerror(-ret)); - } - iova += int128_get64(llsize); - } -- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); -+ ret = vfio_container_dma_unmap(&container->bcontainer, iova, -+ int128_get64(llsize), NULL); - if (ret) { -- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " -+ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", - container, iova, int128_get64(llsize), ret, - strerror(-ret)); -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -new file mode 100644 -index 0000000000..55d3a35fa4 ---- /dev/null -+++ b/hw/vfio/container-base.c -@@ -0,0 +1,32 @@ -+/* -+ * VFIO BASE CONTAINER -+ * -+ * Copyright (C) 2023 Intel Corporation. -+ * Copyright Red Hat, Inc. 2023 -+ * -+ * Authors: Yi Liu -+ * Eric Auger -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include "qapi/error.h" -+#include "qemu/error-report.h" -+#include "hw/vfio/vfio-container-base.h" -+ -+int vfio_container_dma_map(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ void *vaddr, bool readonly) -+{ -+ g_assert(bcontainer->ops->dma_map); -+ return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly); -+} -+ -+int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb) -+{ -+ g_assert(bcontainer->ops->dma_unmap); -+ return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); -+} -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 4bc43ddfa4..c04df26323 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -115,9 +115,11 @@ unmap_exit: - /* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ --int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, -- ram_addr_t size, IOMMUTLBEntry *iotlb) -+static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, -+ ram_addr_t size, IOMMUTLBEntry *iotlb) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dma_unmap unmap = { - .argsz = sizeof(unmap), - .flags = 0, -@@ -151,7 +153,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, - */ - if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && - container->iommu_type == VFIO_TYPE1v2_IOMMU) { -- trace_vfio_dma_unmap_overflow_workaround(); -+ trace_vfio_legacy_dma_unmap_overflow_workaround(); - unmap.size -= 1ULL << ctz64(container->pgsizes); - continue; - } -@@ -170,9 +172,11 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, - return 0; - } - --int vfio_dma_map(VFIOContainer *container, hwaddr iova, -- ram_addr_t size, void *vaddr, bool readonly) -+static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, -+ ram_addr_t size, void *vaddr, bool readonly) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - struct vfio_iommu_type1_dma_map map = { - .argsz = sizeof(map), - .flags = VFIO_DMA_MAP_FLAG_READ, -@@ -191,7 +195,8 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, - * the VGA ROM space. - */ - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || -- (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && -+ (errno == EBUSY && -+ vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 && - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { - return 0; - } -@@ -937,4 +942,7 @@ void vfio_detach_device(VFIODevice *vbasedev) - vfio_put_group(group); - } - --const VFIOIOMMUOps vfio_legacy_ops; -+const VFIOIOMMUOps vfio_legacy_ops = { -+ .dma_map = vfio_legacy_dma_map, -+ .dma_unmap = vfio_legacy_dma_unmap, -+}; -diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build -index 2a6912c940..eb6ce6229d 100644 ---- a/hw/vfio/meson.build -+++ b/hw/vfio/meson.build -@@ -2,6 +2,7 @@ vfio_ss = ss.source_set() - vfio_ss.add(files( - 'helpers.c', - 'common.c', -+ 'container-base.c', - 'container.c', - 'spapr.c', - 'migration.c', -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 0eb2387cf2..9f7fedee98 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -116,7 +116,7 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re - vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" - vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" - vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" --vfio_dma_unmap_overflow_workaround(void) "" -+vfio_legacy_dma_unmap_overflow_workaround(void) "" - vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 - vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 678161f207..24a26345e5 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -208,10 +208,6 @@ void vfio_put_address_space(VFIOAddressSpace *space); - bool vfio_devices_all_running_and_saving(VFIOContainer *container); - - /* container->fd */ --int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, -- ram_addr_t size, IOMMUTLBEntry *iotlb); --int vfio_dma_map(VFIOContainer *container, hwaddr iova, -- ram_addr_t size, void *vaddr, bool readonly); - int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); - int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, - hwaddr iova, hwaddr size); -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 1d6daaea5d..56b033f59f 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -31,6 +31,13 @@ typedef struct VFIOContainerBase { - const VFIOIOMMUOps *ops; - } VFIOContainerBase; - -+int vfio_container_dma_map(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ void *vaddr, bool readonly); -+int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb); -+ - struct VFIOIOMMUOps { - /* basic feature */ - int (*dma_map)(VFIOContainerBase *bcontainer, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch b/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch deleted file mode 100644 index 52e3d87..0000000 --- a/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 49435d4d592bc890f56b69c2290f890c87b5a103 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:05 +0800 -Subject: [PATCH 026/101] vfio/iommufd: Add support for iova_ranges and pgsizes -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [25/67] 578af0547d97276ccd4936b574c12118fc70d468 (eauger1/centos-qemu-kvm) - -Some vIOMMU such as virtio-iommu use IOVA ranges from host side to -setup reserved ranges for passthrough device, so that guest will not -use an IOVA range beyond host support. - -Use an uAPI of IOMMUFD to get IOVA ranges of host side and pass to -vIOMMU just like the legacy backend, if this fails, fallback to -64bit IOVA range. - -Also use out_iova_alignment returned from uAPI as pgsizes instead of -qemu_real_host_page_size() as a fallback. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 714e9affa8ae1d84007c8afde7bb10fef9cb883d) -Signed-off-by: Eric Auger ---- - hw/vfio/iommufd.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 55 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index 6d31aeac7b..01b448e840 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -261,6 +261,53 @@ static int iommufd_cdev_ram_block_discard_disable(bool state) - return ram_block_uncoordinated_discard_disable(state); - } - -+static int iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container, -+ uint32_t ioas_id, Error **errp) -+{ -+ VFIOContainerBase *bcontainer = &container->bcontainer; -+ struct iommu_ioas_iova_ranges *info; -+ struct iommu_iova_range *iova_ranges; -+ int ret, sz, fd = container->be->fd; -+ -+ info = g_malloc0(sizeof(*info)); -+ info->size = sizeof(*info); -+ info->ioas_id = ioas_id; -+ -+ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); -+ if (ret && errno != EMSGSIZE) { -+ goto error; -+ } -+ -+ sz = info->num_iovas * sizeof(struct iommu_iova_range); -+ info = g_realloc(info, sizeof(*info) + sz); -+ info->allowed_iovas = (uintptr_t)(info + 1); -+ -+ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); -+ if (ret) { -+ goto error; -+ } -+ -+ iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas; -+ -+ for (int i = 0; i < info->num_iovas; i++) { -+ Range *range = g_new(Range, 1); -+ -+ range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last); -+ bcontainer->iova_ranges = -+ range_list_insert(bcontainer->iova_ranges, range); -+ } -+ bcontainer->pgsizes = info->out_iova_alignment; -+ -+ g_free(info); -+ return 0; -+ -+error: -+ ret = -errno; -+ g_free(info); -+ error_setg_errno(errp, errno, "Cannot get IOVA ranges"); -+ return ret; -+} -+ - static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp) - { -@@ -335,7 +382,14 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - goto err_discard_disable; - } - -- bcontainer->pgsizes = qemu_real_host_page_size(); -+ ret = iommufd_cdev_get_info_iova_range(container, ioas_id, &err); -+ if (ret) { -+ error_append_hint(&err, -+ "Fallback to default 64bit IOVA range and 4K page size\n"); -+ warn_report_err(err); -+ err = NULL; -+ bcontainer->pgsizes = qemu_real_host_page_size(); -+ } - - bcontainer->listener = vfio_memory_listener; - memory_listener_register(&bcontainer->listener, bcontainer->space->as); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch b/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch deleted file mode 100644 index 48db196..0000000 --- a/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch +++ /dev/null @@ -1,215 +0,0 @@ -From e94700896dd8fcea149d9719eccde6f485440be2 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:08 +0800 -Subject: [PATCH 029/101] vfio/iommufd: Enable pci hot reset through iommufd - cdev interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [28/67] ca1ae970138ee4a6f4b3b49817e775f3159f4c97 (eauger1/centos-qemu-kvm) - -Implement the newly introduced pci_hot_reset callback named -iommufd_cdev_pci_hot_reset to do iommufd specific check and -reset operation. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 96d6f85ff012abd7aaa35b1a2bc48b8640c898d9) -Signed-off-by: Eric Auger ---- - hw/vfio/iommufd.c | 150 +++++++++++++++++++++++++++++++++++++++++++ - hw/vfio/trace-events | 1 + - 2 files changed, 151 insertions(+) - -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index 01b448e840..6e53e013ef 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -24,6 +24,7 @@ - #include "sysemu/reset.h" - #include "qemu/cutils.h" - #include "qemu/chardev_open.h" -+#include "pci.h" - - static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) -@@ -468,9 +469,158 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) - close(vbasedev->fd); - } - -+static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) -+{ -+ VFIODevice *vbasedev_iter; -+ -+ QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { -+ if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { -+ continue; -+ } -+ if (devid == vbasedev_iter->devid) { -+ return vbasedev_iter; -+ } -+ } -+ return NULL; -+} -+ -+static VFIOPCIDevice * -+iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev, -+ VFIODevice *reset_dev) -+{ -+ VFIODevice *vbasedev_tmp; -+ -+ if (dep_dev->devid == reset_dev->devid || -+ dep_dev->devid == VFIO_PCI_DEVID_OWNED) { -+ return NULL; -+ } -+ -+ vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid); -+ if (!vbasedev_tmp || !vbasedev_tmp->dev->realized || -+ vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) { -+ return NULL; -+ } -+ -+ return container_of(vbasedev_tmp, VFIOPCIDevice, vbasedev); -+} -+ -+static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single) -+{ -+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); -+ struct vfio_pci_hot_reset_info *info = NULL; -+ struct vfio_pci_dependent_device *devices; -+ struct vfio_pci_hot_reset *reset; -+ int ret, i; -+ bool multi = false; -+ -+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); -+ -+ if (!single) { -+ vfio_pci_pre_reset(vdev); -+ } -+ vdev->vbasedev.needs_reset = false; -+ -+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); -+ -+ if (ret) { -+ goto out_single; -+ } -+ -+ assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID); -+ -+ devices = &info->devices[0]; -+ -+ if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) { -+ if (!vdev->has_pm_reset) { -+ for (i = 0; i < info->count; i++) { -+ if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) { -+ error_report("vfio: Cannot reset device %s, " -+ "depends on device %04x:%02x:%02x.%x " -+ "which is not owned.", -+ vdev->vbasedev.name, devices[i].segment, -+ devices[i].bus, PCI_SLOT(devices[i].devfn), -+ PCI_FUNC(devices[i].devfn)); -+ } -+ } -+ } -+ ret = -EPERM; -+ goto out_single; -+ } -+ -+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); -+ -+ for (i = 0; i < info->count; i++) { -+ VFIOPCIDevice *tmp; -+ -+ trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment, -+ devices[i].bus, -+ PCI_SLOT(devices[i].devfn), -+ PCI_FUNC(devices[i].devfn), -+ devices[i].devid); -+ -+ /* -+ * If a VFIO cdev device is resettable, all the dependent devices -+ * are either bound to same iommufd or within same iommu_groups as -+ * one of the iommufd bound devices. -+ */ -+ assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED); -+ -+ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); -+ if (!tmp) { -+ continue; -+ } -+ -+ if (single) { -+ ret = -EINVAL; -+ goto out_single; -+ } -+ vfio_pci_pre_reset(tmp); -+ tmp->vbasedev.needs_reset = false; -+ multi = true; -+ } -+ -+ if (!single && !multi) { -+ ret = -EINVAL; -+ goto out_single; -+ } -+ -+ /* Use zero length array for hot reset with iommufd backend */ -+ reset = g_malloc0(sizeof(*reset)); -+ reset->argsz = sizeof(*reset); -+ -+ /* Bus reset! */ -+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); -+ g_free(reset); -+ if (ret) { -+ ret = -errno; -+ } -+ -+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, -+ ret ? strerror(errno) : "Success"); -+ -+ /* Re-enable INTx on affected devices */ -+ for (i = 0; i < info->count; i++) { -+ VFIOPCIDevice *tmp; -+ -+ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); -+ if (!tmp) { -+ continue; -+ } -+ vfio_pci_post_reset(tmp); -+ } -+out_single: -+ if (!single) { -+ vfio_pci_post_reset(vdev); -+ } -+ g_free(info); -+ -+ return ret; -+} -+ - const VFIOIOMMUOps vfio_iommufd_ops = { - .dma_map = iommufd_cdev_map, - .dma_unmap = iommufd_cdev_unmap, - .attach_device = iommufd_cdev_attach, - .detach_device = iommufd_cdev_detach, -+ .pci_hot_reset = iommufd_cdev_pci_hot_reset, - }; -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 3340c93af0..8fdde54456 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Succ - iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" - iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" - iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" -+iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch b/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch deleted file mode 100644 index f00cbcd..0000000 --- a/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch +++ /dev/null @@ -1,561 +0,0 @@ -From f018d0b686406256c2b5e823e4227316ee1394e9 Mon Sep 17 00:00:00 2001 -From: Yi Liu -Date: Tue, 21 Nov 2023 16:44:03 +0800 -Subject: [PATCH 024/101] vfio/iommufd: Implement the iommufd backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [23/67] d11046654117a690542a1e2b48b9d1994f778b2d (eauger1/centos-qemu-kvm) - -The iommufd backend is implemented based on the new /dev/iommu user API. -This backend obviously depends on CONFIG_IOMMUFD. - -So far, the iommufd backend doesn't support dirty page sync yet. - -Co-authored-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 5ee3dc7af7859e7b8aa34c10c21778101c15e812) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 6 + - hw/vfio/iommufd.c | 422 ++++++++++++++++++++++++++++++++++ - hw/vfio/meson.build | 3 + - hw/vfio/trace-events | 10 + - include/hw/vfio/vfio-common.h | 11 + - 5 files changed, 452 insertions(+) - create mode 100644 hw/vfio/iommufd.c - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 934f4f5446..6569732b7a 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -19,6 +19,7 @@ - */ - - #include "qemu/osdep.h" -+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include - #ifdef CONFIG_KVM - #include -@@ -1503,6 +1504,11 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - { - const VFIOIOMMUOps *ops = &vfio_legacy_ops; - -+#ifdef CONFIG_IOMMUFD -+ if (vbasedev->iommufd) { -+ ops = &vfio_iommufd_ops; -+ } -+#endif - return ops->attach_device(name, vbasedev, as, errp); - } - -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -new file mode 100644 -index 0000000000..6d31aeac7b ---- /dev/null -+++ b/hw/vfio/iommufd.c -@@ -0,0 +1,422 @@ -+/* -+ * iommufd container backend -+ * -+ * Copyright (C) 2023 Intel Corporation. -+ * Copyright Red Hat, Inc. 2023 -+ * -+ * Authors: Yi Liu -+ * Eric Auger -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include -+#include -+#include -+ -+#include "hw/vfio/vfio-common.h" -+#include "qemu/error-report.h" -+#include "trace.h" -+#include "qapi/error.h" -+#include "sysemu/iommufd.h" -+#include "hw/qdev-core.h" -+#include "sysemu/reset.h" -+#include "qemu/cutils.h" -+#include "qemu/chardev_open.h" -+ -+static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, -+ ram_addr_t size, void *vaddr, bool readonly) -+{ -+ VFIOIOMMUFDContainer *container = -+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); -+ -+ return iommufd_backend_map_dma(container->be, -+ container->ioas_id, -+ iova, size, vaddr, readonly); -+} -+ -+static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, -+ hwaddr iova, ram_addr_t size, -+ IOMMUTLBEntry *iotlb) -+{ -+ VFIOIOMMUFDContainer *container = -+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); -+ -+ /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ -+ return iommufd_backend_unmap_dma(container->be, -+ container->ioas_id, iova, size); -+} -+ -+static int iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp) -+{ -+ return vfio_kvm_device_add_fd(vbasedev->fd, errp); -+} -+ -+static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev) -+{ -+ Error *err = NULL; -+ -+ if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) { -+ error_report_err(err); -+ } -+} -+ -+static int iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) -+{ -+ IOMMUFDBackend *iommufd = vbasedev->iommufd; -+ struct vfio_device_bind_iommufd bind = { -+ .argsz = sizeof(bind), -+ .flags = 0, -+ }; -+ int ret; -+ -+ ret = iommufd_backend_connect(iommufd, errp); -+ if (ret) { -+ return ret; -+ } -+ -+ /* -+ * Add device to kvm-vfio to be prepared for the tracking -+ * in KVM. Especially for some emulated devices, it requires -+ * to have kvm information in the device open. -+ */ -+ ret = iommufd_cdev_kvm_device_add(vbasedev, errp); -+ if (ret) { -+ goto err_kvm_device_add; -+ } -+ -+ /* Bind device to iommufd */ -+ bind.iommufd = iommufd->fd; -+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind); -+ if (ret) { -+ error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d", -+ vbasedev->fd, bind.iommufd); -+ goto err_bind; -+ } -+ -+ vbasedev->devid = bind.out_devid; -+ trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, -+ vbasedev->fd, vbasedev->devid); -+ return ret; -+err_bind: -+ iommufd_cdev_kvm_device_del(vbasedev); -+err_kvm_device_add: -+ iommufd_backend_disconnect(iommufd); -+ return ret; -+} -+ -+static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) -+{ -+ /* Unbind is automatically conducted when device fd is closed */ -+ iommufd_cdev_kvm_device_del(vbasedev); -+ iommufd_backend_disconnect(vbasedev->iommufd); -+} -+ -+static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) -+{ -+ long int ret = -ENOTTY; -+ char *path, *vfio_dev_path = NULL, *vfio_path = NULL; -+ DIR *dir = NULL; -+ struct dirent *dent; -+ gchar *contents; -+ struct stat st; -+ gsize length; -+ int major, minor; -+ dev_t vfio_devt; -+ -+ path = g_strdup_printf("%s/vfio-dev", sysfs_path); -+ if (stat(path, &st) < 0) { -+ error_setg_errno(errp, errno, "no such host device"); -+ goto out_free_path; -+ } -+ -+ dir = opendir(path); -+ if (!dir) { -+ error_setg_errno(errp, errno, "couldn't open directory %s", path); -+ goto out_free_path; -+ } -+ -+ while ((dent = readdir(dir))) { -+ if (!strncmp(dent->d_name, "vfio", 4)) { -+ vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name); -+ break; -+ } -+ } -+ -+ if (!vfio_dev_path) { -+ error_setg(errp, "failed to find vfio-dev/vfioX/dev"); -+ goto out_close_dir; -+ } -+ -+ if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) { -+ error_setg(errp, "failed to load \"%s\"", vfio_dev_path); -+ goto out_free_dev_path; -+ } -+ -+ if (sscanf(contents, "%d:%d", &major, &minor) != 2) { -+ error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path); -+ goto out_free_dev_path; -+ } -+ g_free(contents); -+ vfio_devt = makedev(major, minor); -+ -+ vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name); -+ ret = open_cdev(vfio_path, vfio_devt); -+ if (ret < 0) { -+ error_setg(errp, "Failed to open %s", vfio_path); -+ } -+ -+ trace_iommufd_cdev_getfd(vfio_path, ret); -+ g_free(vfio_path); -+ -+out_free_dev_path: -+ g_free(vfio_dev_path); -+out_close_dir: -+ closedir(dir); -+out_free_path: -+ if (*errp) { -+ error_prepend(errp, VFIO_MSG_PREFIX, path); -+ } -+ g_free(path); -+ -+ return ret; -+} -+ -+static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, -+ Error **errp) -+{ -+ int ret, iommufd = vbasedev->iommufd->fd; -+ struct vfio_device_attach_iommufd_pt attach_data = { -+ .argsz = sizeof(attach_data), -+ .flags = 0, -+ .pt_id = id, -+ }; -+ -+ /* Attach device to an IOAS or hwpt within iommufd */ -+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data); -+ if (ret) { -+ error_setg_errno(errp, errno, -+ "[iommufd=%d] error attach %s (%d) to id=%d", -+ iommufd, vbasedev->name, vbasedev->fd, id); -+ } else { -+ trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, -+ vbasedev->fd, id); -+ } -+ return ret; -+} -+ -+static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) -+{ -+ int ret, iommufd = vbasedev->iommufd->fd; -+ struct vfio_device_detach_iommufd_pt detach_data = { -+ .argsz = sizeof(detach_data), -+ .flags = 0, -+ }; -+ -+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data); -+ if (ret) { -+ error_setg_errno(errp, errno, "detach %s failed", vbasedev->name); -+ } else { -+ trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name); -+ } -+ return ret; -+} -+ -+static int iommufd_cdev_attach_container(VFIODevice *vbasedev, -+ VFIOIOMMUFDContainer *container, -+ Error **errp) -+{ -+ return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); -+} -+ -+static void iommufd_cdev_detach_container(VFIODevice *vbasedev, -+ VFIOIOMMUFDContainer *container) -+{ -+ Error *err = NULL; -+ -+ if (iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { -+ error_report_err(err); -+ } -+} -+ -+static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) -+{ -+ VFIOContainerBase *bcontainer = &container->bcontainer; -+ -+ if (!QLIST_EMPTY(&bcontainer->device_list)) { -+ return; -+ } -+ memory_listener_unregister(&bcontainer->listener); -+ vfio_container_destroy(bcontainer); -+ iommufd_backend_free_id(container->be, container->ioas_id); -+ g_free(container); -+} -+ -+static int iommufd_cdev_ram_block_discard_disable(bool state) -+{ -+ /* -+ * We support coordinated discarding of RAM via the RamDiscardManager. -+ */ -+ return ram_block_uncoordinated_discard_disable(state); -+} -+ -+static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, -+ AddressSpace *as, Error **errp) -+{ -+ VFIOContainerBase *bcontainer; -+ VFIOIOMMUFDContainer *container; -+ VFIOAddressSpace *space; -+ struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; -+ int ret, devfd; -+ uint32_t ioas_id; -+ Error *err = NULL; -+ -+ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); -+ if (devfd < 0) { -+ return devfd; -+ } -+ vbasedev->fd = devfd; -+ -+ ret = iommufd_cdev_connect_and_bind(vbasedev, errp); -+ if (ret) { -+ goto err_connect_bind; -+ } -+ -+ space = vfio_get_address_space(as); -+ -+ /* try to attach to an existing container in this space */ -+ QLIST_FOREACH(bcontainer, &space->containers, next) { -+ container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); -+ if (bcontainer->ops != &vfio_iommufd_ops || -+ vbasedev->iommufd != container->be) { -+ continue; -+ } -+ if (iommufd_cdev_attach_container(vbasedev, container, &err)) { -+ const char *msg = error_get_pretty(err); -+ -+ trace_iommufd_cdev_fail_attach_existing_container(msg); -+ error_free(err); -+ err = NULL; -+ } else { -+ ret = iommufd_cdev_ram_block_discard_disable(true); -+ if (ret) { -+ error_setg(errp, -+ "Cannot set discarding of RAM broken (%d)", ret); -+ goto err_discard_disable; -+ } -+ goto found_container; -+ } -+ } -+ -+ /* Need to allocate a new dedicated container */ -+ ret = iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp); -+ if (ret < 0) { -+ goto err_alloc_ioas; -+ } -+ -+ trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); -+ -+ container = g_malloc0(sizeof(*container)); -+ container->be = vbasedev->iommufd; -+ container->ioas_id = ioas_id; -+ -+ bcontainer = &container->bcontainer; -+ vfio_container_init(bcontainer, space, &vfio_iommufd_ops); -+ QLIST_INSERT_HEAD(&space->containers, bcontainer, next); -+ -+ ret = iommufd_cdev_attach_container(vbasedev, container, errp); -+ if (ret) { -+ goto err_attach_container; -+ } -+ -+ ret = iommufd_cdev_ram_block_discard_disable(true); -+ if (ret) { -+ goto err_discard_disable; -+ } -+ -+ bcontainer->pgsizes = qemu_real_host_page_size(); -+ -+ bcontainer->listener = vfio_memory_listener; -+ memory_listener_register(&bcontainer->listener, bcontainer->space->as); -+ -+ if (bcontainer->error) { -+ ret = -1; -+ error_propagate_prepend(errp, bcontainer->error, -+ "memory listener initialization failed: "); -+ goto err_listener_register; -+ } -+ -+ bcontainer->initialized = true; -+ -+found_container: -+ ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info); -+ if (ret) { -+ error_setg_errno(errp, errno, "error getting device info"); -+ goto err_listener_register; -+ } -+ -+ /* -+ * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level -+ * for discarding incompatibility check as well? -+ */ -+ if (vbasedev->ram_block_discard_allowed) { -+ iommufd_cdev_ram_block_discard_disable(false); -+ } -+ -+ vbasedev->group = 0; -+ vbasedev->num_irqs = dev_info.num_irqs; -+ vbasedev->num_regions = dev_info.num_regions; -+ vbasedev->flags = dev_info.flags; -+ vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); -+ vbasedev->bcontainer = bcontainer; -+ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); -+ QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); -+ -+ trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, -+ vbasedev->num_regions, vbasedev->flags); -+ return 0; -+ -+err_listener_register: -+ iommufd_cdev_ram_block_discard_disable(false); -+err_discard_disable: -+ iommufd_cdev_detach_container(vbasedev, container); -+err_attach_container: -+ iommufd_cdev_container_destroy(container); -+err_alloc_ioas: -+ vfio_put_address_space(space); -+ iommufd_cdev_unbind_and_disconnect(vbasedev); -+err_connect_bind: -+ close(vbasedev->fd); -+ return ret; -+} -+ -+static void iommufd_cdev_detach(VFIODevice *vbasedev) -+{ -+ VFIOContainerBase *bcontainer = vbasedev->bcontainer; -+ VFIOAddressSpace *space = bcontainer->space; -+ VFIOIOMMUFDContainer *container = container_of(bcontainer, -+ VFIOIOMMUFDContainer, -+ bcontainer); -+ QLIST_REMOVE(vbasedev, global_next); -+ QLIST_REMOVE(vbasedev, container_next); -+ vbasedev->bcontainer = NULL; -+ -+ if (!vbasedev->ram_block_discard_allowed) { -+ iommufd_cdev_ram_block_discard_disable(false); -+ } -+ -+ iommufd_cdev_detach_container(vbasedev, container); -+ iommufd_cdev_container_destroy(container); -+ vfio_put_address_space(space); -+ -+ iommufd_cdev_unbind_and_disconnect(vbasedev); -+ close(vbasedev->fd); -+} -+ -+const VFIOIOMMUOps vfio_iommufd_ops = { -+ .dma_map = iommufd_cdev_map, -+ .dma_unmap = iommufd_cdev_unmap, -+ .attach_device = iommufd_cdev_attach, -+ .detach_device = iommufd_cdev_detach, -+}; -diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build -index eb6ce6229d..e5d98b6adc 100644 ---- a/hw/vfio/meson.build -+++ b/hw/vfio/meson.build -@@ -7,6 +7,9 @@ vfio_ss.add(files( - 'spapr.c', - 'migration.c', - )) -+vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( -+ 'iommufd.c', -+)) - vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( - 'display.c', - 'pci-quirks.c', -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 08a1f9dfa4..3340c93af0 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -164,3 +164,13 @@ vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcop - vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 - vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" - vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" -+ -+#iommufd.c -+ -+iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d" -+iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)" -+iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d" -+iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Successfully detached %s" -+iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" -+iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" -+iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 24ecc0e7ee..3dac5c167e 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -89,6 +89,14 @@ typedef struct VFIOHostDMAWindow { - QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next; - } VFIOHostDMAWindow; - -+typedef struct IOMMUFDBackend IOMMUFDBackend; -+ -+typedef struct VFIOIOMMUFDContainer { -+ VFIOContainerBase bcontainer; -+ IOMMUFDBackend *be; -+ uint32_t ioas_id; -+} VFIOIOMMUFDContainer; -+ - typedef struct VFIODeviceOps VFIODeviceOps; - - typedef struct VFIODevice { -@@ -116,6 +124,8 @@ typedef struct VFIODevice { - OnOffAuto pre_copy_dirty_page_tracking; - bool dirty_pages_supported; - bool dirty_tracking; -+ int devid; -+ IOMMUFDBackend *iommufd; - } VFIODevice; - - struct VFIODeviceOps { -@@ -201,6 +211,7 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; - extern VFIOGroupList vfio_group_list; - extern VFIODeviceList vfio_device_list; - extern const VFIOIOMMUOps vfio_legacy_ops; -+extern const VFIOIOMMUOps vfio_iommufd_ops; - extern const MemoryListener vfio_memory_listener; - extern int vfio_kvm_device_fd; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch b/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch deleted file mode 100644 index 866a437..0000000 --- a/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch +++ /dev/null @@ -1,155 +0,0 @@ -From f98defd6fe081bc44f5bd823d187d7d3b12832ac Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:23 +0100 -Subject: [PATCH 056/101] vfio/iommufd: Introduce a VFIOIOMMU iommufd QOM - interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [55/67] 789ecf74ace326b0df5d494fd558d7d0b6294a85 (eauger1/centos-qemu-kvm) - -As previously done for the sPAPR and legacy IOMMU backends, convert -the VFIOIOMMUOps struct to a QOM interface. The set of of operations -for this backend can be referenced with a literal typename instead of -a C struct. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit ce5f6d49f5845c3b9955cc377a5223c3f8d7ba1e) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 2 +- - hw/vfio/iommufd.c | 35 ++++++++++++++++++++------- - include/hw/vfio/vfio-common.h | 1 - - include/hw/vfio/vfio-container-base.h | 2 +- - 4 files changed, 28 insertions(+), 12 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 2329d0efc8..89ff1c7aed 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1508,7 +1508,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - - #ifdef CONFIG_IOMMUFD - if (vbasedev->iommufd) { -- ops = &vfio_iommufd_ops; -+ ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - } - #endif - -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index 87a561c545..d4c586e842 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -319,6 +319,8 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - int ret, devfd; - uint32_t ioas_id; - Error *err = NULL; -+ const VFIOIOMMUClass *iommufd_vioc = -+ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - - if (vbasedev->fd < 0) { - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); -@@ -340,7 +342,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - /* try to attach to an existing container in this space */ - QLIST_FOREACH(bcontainer, &space->containers, next) { - container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); -- if (bcontainer->ops != &vfio_iommufd_ops || -+ if (bcontainer->ops != iommufd_vioc || - vbasedev->iommufd != container->be) { - continue; - } -@@ -374,7 +376,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - container->ioas_id = ioas_id; - - bcontainer = &container->bcontainer; -- vfio_container_init(bcontainer, space, &vfio_iommufd_ops); -+ vfio_container_init(bcontainer, space, iommufd_vioc); - QLIST_INSERT_HEAD(&space->containers, bcontainer, next); - - ret = iommufd_cdev_attach_container(vbasedev, container, errp); -@@ -476,9 +478,11 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) - static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) - { - VFIODevice *vbasedev_iter; -+ const VFIOIOMMUClass *iommufd_vioc = -+ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - - QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { -- if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { -+ if (vbasedev_iter->bcontainer->ops != iommufd_vioc) { - continue; - } - if (devid == vbasedev_iter->devid) { -@@ -621,10 +625,23 @@ out_single: - return ret; - } - --const VFIOIOMMUOps vfio_iommufd_ops = { -- .dma_map = iommufd_cdev_map, -- .dma_unmap = iommufd_cdev_unmap, -- .attach_device = iommufd_cdev_attach, -- .detach_device = iommufd_cdev_detach, -- .pci_hot_reset = iommufd_cdev_pci_hot_reset, -+static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) -+{ -+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); -+ -+ vioc->dma_map = iommufd_cdev_map; -+ vioc->dma_unmap = iommufd_cdev_unmap; -+ vioc->attach_device = iommufd_cdev_attach; -+ vioc->detach_device = iommufd_cdev_detach; -+ vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; - }; -+ -+static const TypeInfo types[] = { -+ { -+ .name = TYPE_VFIO_IOMMU_IOMMUFD, -+ .parent = TYPE_VFIO_IOMMU, -+ .class_init = vfio_iommu_iommufd_class_init, -+ }, -+}; -+ -+DEFINE_TYPES(types) -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 14c497b6b0..9b7ef7d02b 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -210,7 +210,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; - typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; - extern VFIOGroupList vfio_group_list; - extern VFIODeviceList vfio_device_list; --extern const VFIOIOMMUOps vfio_iommufd_ops; - extern const MemoryListener vfio_memory_listener; - extern int vfio_kvm_device_fd; - -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 9e21d7811f..b2813b0c11 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -17,7 +17,6 @@ - - typedef struct VFIODevice VFIODevice; - typedef struct VFIOIOMMUClass VFIOIOMMUClass; --#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */ - - typedef struct { - unsigned long *bitmap; -@@ -96,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); - #define TYPE_VFIO_IOMMU "vfio-iommu" - #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" - #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" -+#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" - - /* - * VFIOContainerBase is not an abstract QOM object because it felt --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch b/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch deleted file mode 100644 index f77032b..0000000 --- a/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 5a49c5bb690d55fc88b6fb12f059ae932de0a716 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:04 +0800 -Subject: [PATCH 025/101] vfio/iommufd: Relax assert check for iommufd backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [24/67] 2c9e41e9ca0b67ebf807d1643a98866a0cb75768 (eauger1/centos-qemu-kvm) - -Currently iommufd doesn't support dirty page sync yet, -but it will not block us doing live migration if VFIO -migration is force enabled. - -So in this case we allow set_dirty_page_tracking to be NULL. -Note we don't need same change for query_dirty_bitmap because -when dirty page sync isn't supported, query_dirty_bitmap will -never be called. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 36e84d0c17102fa1c887d8c650a13ec08fca0ec0) -Signed-off-by: Eric Auger ---- - hw/vfio/container-base.c | 4 ++++ - hw/vfio/container.c | 4 ---- - 2 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 71f7274973..eee2dcfe76 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -55,6 +55,10 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer, - int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - bool start) - { -+ if (!bcontainer->dirty_pages_supported) { -+ return 0; -+ } -+ - g_assert(bcontainer->ops->set_dirty_page_tracking); - return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); - } -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index 6bacf38222..ed2d721b2b 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -216,10 +216,6 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - .argsz = sizeof(dirty), - }; - -- if (!bcontainer->dirty_pages_supported) { -- return 0; -- } -- - if (start) { - dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; - } else { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch b/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch deleted file mode 100644 index 97d30c9..0000000 --- a/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 5549bf1b2e07213c23e280a43ab2ab67d5b7304a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:25 +0100 -Subject: [PATCH 058/101] vfio/iommufd: Remove CONFIG_IOMMUFD usage -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [57/67] 3a6a45d379241d9412e0b8bcfeb9be0b4add59a5 (eauger1/centos-qemu-kvm) - -Availability of the IOMMUFD backend can now be fully determined at -runtime and the ifdef check was a build time protection (for PPC not -supporting it mostly). - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit c1139fa4feba8c320e4bd0a4e34af55caa5ffbb9) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 89ff1c7aed..0d4d8b8416 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -19,7 +19,6 @@ - */ - - #include "qemu/osdep.h" --#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include - #ifdef CONFIG_KVM - #include -@@ -1506,11 +1505,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, - const VFIOIOMMUClass *ops = - VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); - --#ifdef CONFIG_IOMMUFD - if (vbasedev->iommufd) { - ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - } --#endif - - assert(ops); - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch b/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch deleted file mode 100644 index 7401d52..0000000 --- a/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 6b36dc2a305af856af03aad2e315eea96a349153 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Thu, 21 Dec 2023 09:09:57 +0100 -Subject: [PATCH 061/101] vfio/iommufd: Remove the use of stat() to check file - existence -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [60/67] 485770e45c1a6399780939bfb8b01b615d9213c6 (eauger1/centos-qemu-kvm) - -Using stat() before opening a file or a directory can lead to a -time-of-check to time-of-use (TOCTOU) filesystem race, which is -reported by coverity as a Security best practices violations. The -sequence could be replaced by open and fdopendir but it doesn't add -much in this case. Simply use opendir to avoid the race. - -Fixes: CID 1531551 -Signed-off-by: Cédric Le Goater -Reviewed-by: Zhenzhong Duan -(cherry picked from commit 6ba254801f6bc7f3ef68a6414f1b107237c7eb26) -Signed-off-by: Eric Auger ---- - hw/vfio/iommufd.c | 6 ------ - 1 file changed, 6 deletions(-) - -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index d4c586e842..9bfddc1360 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -121,17 +121,11 @@ static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) - DIR *dir = NULL; - struct dirent *dent; - gchar *contents; -- struct stat st; - gsize length; - int major, minor; - dev_t vfio_devt; - - path = g_strdup_printf("%s/vfio-dev", sysfs_path); -- if (stat(path, &st) < 0) { -- error_setg_errno(errp, errno, "no such host device"); -- goto out_free_path; -- } -- - dir = opendir(path); - if (!dir) { - error_setg_errno(errp, errno, "couldn't open directory %s", path); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch b/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch deleted file mode 100644 index 6556a19..0000000 --- a/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 0c0435e7210b99a6bf7b8f8205f7af8277b7525b Mon Sep 17 00:00:00 2001 -From: Avihai Horon -Date: Sun, 31 Dec 2023 12:48:18 +0200 -Subject: [PATCH 063/101] vfio/migration: Add helper function to set state or - reset device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [62/67] 1a63eea289561a05a6a8527c2a9da0289a7836d9 (eauger1/centos-qemu-kvm) - -There are several places where failure in setting the device state leads -to a device reset, which is done by setting ERROR as the recover state. - -Add a helper function that sets the device state and resets the device -in case of failure. This will make the code cleaner and remove duplicate -comments. - -Signed-off-by: Avihai Horon -Reviewed-by: Cédric Le Goater -Reviewed-by: Philippe Mathieu-Daudé -(cherry picked from commit c817e5a377a334241eed149e35760aca58bdeb34) -Signed-off-by: Eric Auger ---- - hw/vfio/migration.c | 41 +++++++++++++++++------------------------ - 1 file changed, 17 insertions(+), 24 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 28d422b39f..70e6b1a709 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -163,6 +163,19 @@ reset_device: - return ret; - } - -+/* -+ * Some device state transitions require resetting the device if they fail. -+ * This function sets the device in new_state and resets the device if that -+ * fails. Reset is done by using ERROR as the recover state. -+ */ -+static int -+vfio_migration_set_state_or_reset(VFIODevice *vbasedev, -+ enum vfio_device_mig_state new_state) -+{ -+ return vfio_migration_set_state(vbasedev, new_state, -+ VFIO_DEVICE_STATE_ERROR); -+} -+ - static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, - uint64_t data_size) - { -@@ -422,12 +435,7 @@ static void vfio_save_cleanup(void *opaque) - * after migration has completed, so it won't increase downtime. - */ - if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) { -- /* -- * If setting the device in STOP state fails, the device should be -- * reset. To do so, use ERROR state as a recover state. -- */ -- vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP, -- VFIO_DEVICE_STATE_ERROR); -+ vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP); - } - - g_free(migration->data_buffer); -@@ -699,12 +707,7 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running, - VFIO_DEVICE_STATE_PRE_COPY_P2P : - VFIO_DEVICE_STATE_RUNNING_P2P; - -- /* -- * If setting the device in new_state fails, the device should be reset. -- * To do so, use ERROR state as a recover state. -- */ -- ret = vfio_migration_set_state(vbasedev, new_state, -- VFIO_DEVICE_STATE_ERROR); -+ ret = vfio_migration_set_state_or_reset(vbasedev, new_state); - if (ret) { - /* - * Migration should be aborted in this case, but vm_state_notify() -@@ -736,12 +739,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) - VFIO_DEVICE_STATE_STOP; - } - -- /* -- * If setting the device in new_state fails, the device should be reset. -- * To do so, use ERROR state as a recover state. -- */ -- ret = vfio_migration_set_state(vbasedev, new_state, -- VFIO_DEVICE_STATE_ERROR); -+ ret = vfio_migration_set_state_or_reset(vbasedev, new_state); - if (ret) { - /* - * Migration should be aborted in this case, but vm_state_notify() -@@ -770,12 +768,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) - case MIGRATION_STATUS_CANCELLING: - case MIGRATION_STATUS_CANCELLED: - case MIGRATION_STATUS_FAILED: -- /* -- * If setting the device in RUNNING state fails, the device should -- * be reset. To do so, use ERROR state as a recover state. -- */ -- vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING, -- VFIO_DEVICE_STATE_ERROR); -+ vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING); - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch b/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch deleted file mode 100644 index f79de18..0000000 --- a/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 7788fdc2375e01ead0c8a705c3b3d7467dd93d67 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 21 Nov 2023 16:44:09 +0800 -Subject: [PATCH 030/101] vfio/pci: Allow the selection of a given iommu - backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [29/67] 363c62607a11093ea0062489e11a708117d8ffb9 (eauger1/centos-qemu-kvm) - -Now we support two types of iommu backends, let's add the capability -to select one of them. This depends on whether an iommufd object has -been linked with the vfio-pci device: - -If the user wants to use the legacy backend, it shall not -link the vfio-pci device with any iommufd object: - - -device vfio-pci,host=0000:02:00.0 - -This is called the legacy mode/backend. - -If the user wants to use the iommufd backend (/dev/iommu) it -shall pass an iommufd object id in the vfio-pci device options: - - -object iommufd,id=iommufd0 - -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0 - -Suggested-by: Alex Williamson -Signed-off-by: Eric Auger -Signed-off-by: Yi Liu -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit ee42b261b0a2e465ae003ddcaf1caf117c201f74) -Signed-off-by: Eric Auger ---- - hw/vfio/pci.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 83b2561908..39e6a6678e 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -19,6 +19,7 @@ - */ - - #include "qemu/osdep.h" -+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include - #include - -@@ -42,6 +43,7 @@ - #include "qapi/error.h" - #include "migration/blocker.h" - #include "migration/qemu-file.h" -+#include "sysemu/iommufd.h" - - #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" - -@@ -3415,6 +3417,10 @@ static Property vfio_pci_dev_properties[] = { - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), - */ -+#ifdef CONFIG_IOMMUFD -+ DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, -+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -+#endif - DEFINE_PROP_END_OF_LIST(), - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch b/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch deleted file mode 100644 index 837e490..0000000 --- a/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 43236995e8ad336d366b625fb8362046be53fc34 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 29 Jan 2024 09:46:34 +0100 -Subject: [PATCH] vfio/pci: Clear MSI-X IRQ index always -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 218: vfio/pci: Clear MSI-X IRQ index always -RH-Jira: RHEL-21293 -RH-Acked-by: Eric Auger -RH-Acked-by: Alex Williamson -RH-Commit: [1/1] b4b587b13c11e350d3e5fcc11ba66a006b25a763 (clegoate/qemu-kvm-c9s) - -JIRA: https://issues.redhat.com/browse/RHEL-21293 - -commit d2b668fca5652760b435ce812a743bba03d2f316 -Author: Cédric Le Goater -Date: Thu Jan 25 14:27:36 2024 +0100 - - vfio/pci: Clear MSI-X IRQ index always - - When doing device assignment of a physical device, MSI-X can be - enabled with no vectors enabled and this sets the IRQ index to - VFIO_PCI_MSIX_IRQ_INDEX. However, when MSI-X is disabled, the IRQ - index is left untouched if no vectors are in use. Then, when INTx - is enabled, the IRQ index value is considered incompatible (set to - MSI-X) and VFIO_DEVICE_SET_IRQS fails. QEMU complains with : - - qemu-system-x86_64: vfio 0000:08:00.0: Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument - - To avoid that, unconditionaly clear the IRQ index when MSI-X is - disabled. - - Buglink: https://issues.redhat.com/browse/RHEL-21293 - Fixes: 5ebffa4e87e7 ("vfio/pci: use an invalid fd to enable MSI-X") - Cc: Jing Liu - Cc: Alex Williamson - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index adb7c09367..29bb8067eb 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -829,9 +829,11 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev) - } - } - -- if (vdev->nr_vectors) { -- vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); -- } -+ /* -+ * Always clear MSI-X IRQ index. A PF device could have enabled -+ * MSI-X with no vectors. See vfio_msix_enable(). -+ */ -+ vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); - - vfio_msi_disable_common(vdev); - vfio_intx_enable(vdev, &err); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch b/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch deleted file mode 100644 index af6593c..0000000 --- a/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch +++ /dev/null @@ -1,139 +0,0 @@ -From fe5ecedd452754eeb238b23eb0544ed3c5086157 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:06 +0800 -Subject: [PATCH 027/101] vfio/pci: Extract out a helper - vfio_pci_get_pci_hot_reset_info -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [26/67] 730b7f1496f4f21310fa13c79cb87f8d5e2ad2a8 (eauger1/centos-qemu-kvm) - -This helper will be used by both legacy and iommufd backends. - -No functional changes intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 4d36ec23a75eb387492f4d68ff1b8eeee5d68142) -Signed-off-by: Eric Auger ---- - hw/vfio/pci.c | 54 +++++++++++++++++++++++++++++++++++---------------- - hw/vfio/pci.h | 3 +++ - 2 files changed, 40 insertions(+), 17 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index ec98080f28..b482e5479f 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2448,22 +2448,13 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) - return (strcmp(tmp, name) == 0); - } - --static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) -+int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, -+ struct vfio_pci_hot_reset_info **info_p) - { -- VFIOGroup *group; - struct vfio_pci_hot_reset_info *info; -- struct vfio_pci_dependent_device *devices; -- struct vfio_pci_hot_reset *reset; -- int32_t *fds; -- int ret, i, count; -- bool multi = false; -+ int ret, count; - -- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); -- -- if (!single) { -- vfio_pci_pre_reset(vdev); -- } -- vdev->vbasedev.needs_reset = false; -+ assert(info_p && !*info_p); - - info = g_malloc0(sizeof(*info)); - info->argsz = sizeof(*info); -@@ -2471,24 +2462,53 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); - if (ret && errno != ENOSPC) { - ret = -errno; -+ g_free(info); - if (!vdev->has_pm_reset) { - error_report("vfio: Cannot reset device %s, " - "no available reset mechanism.", vdev->vbasedev.name); - } -- goto out_single; -+ return ret; - } - - count = info->count; -- info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices))); -- info->argsz = sizeof(*info) + (count * sizeof(*devices)); -- devices = &info->devices[0]; -+ info = g_realloc(info, sizeof(*info) + (count * sizeof(info->devices[0]))); -+ info->argsz = sizeof(*info) + (count * sizeof(info->devices[0])); - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); - if (ret) { - ret = -errno; -+ g_free(info); - error_report("vfio: hot reset info failed: %m"); -+ return ret; -+ } -+ -+ *info_p = info; -+ return 0; -+} -+ -+static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) -+{ -+ VFIOGroup *group; -+ struct vfio_pci_hot_reset_info *info = NULL; -+ struct vfio_pci_dependent_device *devices; -+ struct vfio_pci_hot_reset *reset; -+ int32_t *fds; -+ int ret, i, count; -+ bool multi = false; -+ -+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); -+ -+ if (!single) { -+ vfio_pci_pre_reset(vdev); -+ } -+ vdev->vbasedev.needs_reset = false; -+ -+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); -+ -+ if (ret) { - goto out_single; - } -+ devices = &info->devices[0]; - - trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); - -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index eb74d9de2d..3568a6135d 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -219,6 +219,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); - - extern const PropertyInfo qdev_prop_nv_gpudirect_clique; - -+int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, -+ struct vfio_pci_hot_reset_info **info_p); -+ - int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); - - int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch b/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch deleted file mode 100644 index 2a2db5f..0000000 --- a/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch +++ /dev/null @@ -1,466 +0,0 @@ -From acc3e5306e184567006bc45e7f36f2473e75d08a Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:07 +0800 -Subject: [PATCH 028/101] vfio/pci: Introduce a vfio pci hot reset interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [27/67] 192088dbf2cf88663acd2416f69b7eeb175b2525 (eauger1/centos-qemu-kvm) - -Legacy vfio pci and iommufd cdev have different process to hot reset -vfio device, expand current code to abstract out pci_hot_reset callback -for legacy vfio, this same interface will also be used by iommufd -cdev vfio device. - -Rename vfio_pci_hot_reset to vfio_legacy_pci_hot_reset and move it -into container.c. - -vfio_pci_[pre/post]_reset and vfio_pci_host_match are exported so -they could be called in legacy and iommufd pci_hot_reset callback. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit c328e7e8ad1c969dbcbe90ee76afcd3cfec5e945) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 170 ++++++++++++++++++++++++++ - hw/vfio/pci.c | 168 +------------------------ - hw/vfio/pci.h | 3 + - include/hw/vfio/vfio-container-base.h | 3 + - 4 files changed, 182 insertions(+), 162 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index ed2d721b2b..1dbf9b9a17 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -33,6 +33,7 @@ - #include "trace.h" - #include "qapi/error.h" - #include "migration/migration.h" -+#include "pci.h" - - VFIOGroupList vfio_group_list = - QLIST_HEAD_INITIALIZER(vfio_group_list); -@@ -922,6 +923,174 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev) - vfio_put_group(group); - } - -+static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single) -+{ -+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); -+ VFIOGroup *group; -+ struct vfio_pci_hot_reset_info *info = NULL; -+ struct vfio_pci_dependent_device *devices; -+ struct vfio_pci_hot_reset *reset; -+ int32_t *fds; -+ int ret, i, count; -+ bool multi = false; -+ -+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); -+ -+ if (!single) { -+ vfio_pci_pre_reset(vdev); -+ } -+ vdev->vbasedev.needs_reset = false; -+ -+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); -+ -+ if (ret) { -+ goto out_single; -+ } -+ devices = &info->devices[0]; -+ -+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); -+ -+ /* Verify that we have all the groups required */ -+ for (i = 0; i < info->count; i++) { -+ PCIHostDeviceAddress host; -+ VFIOPCIDevice *tmp; -+ VFIODevice *vbasedev_iter; -+ -+ host.domain = devices[i].segment; -+ host.bus = devices[i].bus; -+ host.slot = PCI_SLOT(devices[i].devfn); -+ host.function = PCI_FUNC(devices[i].devfn); -+ -+ trace_vfio_pci_hot_reset_dep_devices(host.domain, -+ host.bus, host.slot, host.function, devices[i].group_id); -+ -+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { -+ continue; -+ } -+ -+ QLIST_FOREACH(group, &vfio_group_list, next) { -+ if (group->groupid == devices[i].group_id) { -+ break; -+ } -+ } -+ -+ if (!group) { -+ if (!vdev->has_pm_reset) { -+ error_report("vfio: Cannot reset device %s, " -+ "depends on group %d which is not owned.", -+ vdev->vbasedev.name, devices[i].group_id); -+ } -+ ret = -EPERM; -+ goto out; -+ } -+ -+ /* Prep dependent devices for reset and clear our marker. */ -+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { -+ if (!vbasedev_iter->dev->realized || -+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { -+ continue; -+ } -+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); -+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { -+ if (single) { -+ ret = -EINVAL; -+ goto out_single; -+ } -+ vfio_pci_pre_reset(tmp); -+ tmp->vbasedev.needs_reset = false; -+ multi = true; -+ break; -+ } -+ } -+ } -+ -+ if (!single && !multi) { -+ ret = -EINVAL; -+ goto out_single; -+ } -+ -+ /* Determine how many group fds need to be passed */ -+ count = 0; -+ QLIST_FOREACH(group, &vfio_group_list, next) { -+ for (i = 0; i < info->count; i++) { -+ if (group->groupid == devices[i].group_id) { -+ count++; -+ break; -+ } -+ } -+ } -+ -+ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); -+ reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); -+ fds = &reset->group_fds[0]; -+ -+ /* Fill in group fds */ -+ QLIST_FOREACH(group, &vfio_group_list, next) { -+ for (i = 0; i < info->count; i++) { -+ if (group->groupid == devices[i].group_id) { -+ fds[reset->count++] = group->fd; -+ break; -+ } -+ } -+ } -+ -+ /* Bus reset! */ -+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); -+ g_free(reset); -+ if (ret) { -+ ret = -errno; -+ } -+ -+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, -+ ret ? strerror(errno) : "Success"); -+ -+out: -+ /* Re-enable INTx on affected devices */ -+ for (i = 0; i < info->count; i++) { -+ PCIHostDeviceAddress host; -+ VFIOPCIDevice *tmp; -+ VFIODevice *vbasedev_iter; -+ -+ host.domain = devices[i].segment; -+ host.bus = devices[i].bus; -+ host.slot = PCI_SLOT(devices[i].devfn); -+ host.function = PCI_FUNC(devices[i].devfn); -+ -+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { -+ continue; -+ } -+ -+ QLIST_FOREACH(group, &vfio_group_list, next) { -+ if (group->groupid == devices[i].group_id) { -+ break; -+ } -+ } -+ -+ if (!group) { -+ break; -+ } -+ -+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { -+ if (!vbasedev_iter->dev->realized || -+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { -+ continue; -+ } -+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); -+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { -+ vfio_pci_post_reset(tmp); -+ break; -+ } -+ } -+ } -+out_single: -+ if (!single) { -+ vfio_pci_post_reset(vdev); -+ } -+ g_free(info); -+ -+ return ret; -+} -+ - const VFIOIOMMUOps vfio_legacy_ops = { - .dma_map = vfio_legacy_dma_map, - .dma_unmap = vfio_legacy_dma_unmap, -@@ -929,4 +1098,5 @@ const VFIOIOMMUOps vfio_legacy_ops = { - .detach_device = vfio_legacy_detach_device, - .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, - .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, -+ .pci_hot_reset = vfio_legacy_pci_hot_reset, - }; -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index b482e5479f..83b2561908 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2377,7 +2377,7 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp) - return 0; - } - --static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) -+void vfio_pci_pre_reset(VFIOPCIDevice *vdev) - { - PCIDevice *pdev = &vdev->pdev; - uint16_t cmd; -@@ -2414,7 +2414,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) - vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); - } - --static void vfio_pci_post_reset(VFIOPCIDevice *vdev) -+void vfio_pci_post_reset(VFIOPCIDevice *vdev) - { - Error *err = NULL; - int nr; -@@ -2438,7 +2438,7 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) - vfio_quirk_reset(vdev); - } - --static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) -+bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) - { - char tmp[13]; - -@@ -2488,166 +2488,10 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, - - static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) - { -- VFIOGroup *group; -- struct vfio_pci_hot_reset_info *info = NULL; -- struct vfio_pci_dependent_device *devices; -- struct vfio_pci_hot_reset *reset; -- int32_t *fds; -- int ret, i, count; -- bool multi = false; -- -- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); -- -- if (!single) { -- vfio_pci_pre_reset(vdev); -- } -- vdev->vbasedev.needs_reset = false; -- -- ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); -- -- if (ret) { -- goto out_single; -- } -- devices = &info->devices[0]; -- -- trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); -- -- /* Verify that we have all the groups required */ -- for (i = 0; i < info->count; i++) { -- PCIHostDeviceAddress host; -- VFIOPCIDevice *tmp; -- VFIODevice *vbasedev_iter; -- -- host.domain = devices[i].segment; -- host.bus = devices[i].bus; -- host.slot = PCI_SLOT(devices[i].devfn); -- host.function = PCI_FUNC(devices[i].devfn); -- -- trace_vfio_pci_hot_reset_dep_devices(host.domain, -- host.bus, host.slot, host.function, devices[i].group_id); -- -- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { -- continue; -- } -- -- QLIST_FOREACH(group, &vfio_group_list, next) { -- if (group->groupid == devices[i].group_id) { -- break; -- } -- } -- -- if (!group) { -- if (!vdev->has_pm_reset) { -- error_report("vfio: Cannot reset device %s, " -- "depends on group %d which is not owned.", -- vdev->vbasedev.name, devices[i].group_id); -- } -- ret = -EPERM; -- goto out; -- } -- -- /* Prep dependent devices for reset and clear our marker. */ -- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { -- if (!vbasedev_iter->dev->realized || -- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { -- continue; -- } -- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); -- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { -- if (single) { -- ret = -EINVAL; -- goto out_single; -- } -- vfio_pci_pre_reset(tmp); -- tmp->vbasedev.needs_reset = false; -- multi = true; -- break; -- } -- } -- } -- -- if (!single && !multi) { -- ret = -EINVAL; -- goto out_single; -- } -- -- /* Determine how many group fds need to be passed */ -- count = 0; -- QLIST_FOREACH(group, &vfio_group_list, next) { -- for (i = 0; i < info->count; i++) { -- if (group->groupid == devices[i].group_id) { -- count++; -- break; -- } -- } -- } -- -- reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); -- reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); -- fds = &reset->group_fds[0]; -- -- /* Fill in group fds */ -- QLIST_FOREACH(group, &vfio_group_list, next) { -- for (i = 0; i < info->count; i++) { -- if (group->groupid == devices[i].group_id) { -- fds[reset->count++] = group->fd; -- break; -- } -- } -- } -- -- /* Bus reset! */ -- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); -- g_free(reset); -- -- trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, -- ret ? strerror(errno) : "Success"); -- --out: -- /* Re-enable INTx on affected devices */ -- for (i = 0; i < info->count; i++) { -- PCIHostDeviceAddress host; -- VFIOPCIDevice *tmp; -- VFIODevice *vbasedev_iter; -- -- host.domain = devices[i].segment; -- host.bus = devices[i].bus; -- host.slot = PCI_SLOT(devices[i].devfn); -- host.function = PCI_FUNC(devices[i].devfn); -- -- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { -- continue; -- } -- -- QLIST_FOREACH(group, &vfio_group_list, next) { -- if (group->groupid == devices[i].group_id) { -- break; -- } -- } -- -- if (!group) { -- break; -- } -- -- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { -- if (!vbasedev_iter->dev->realized || -- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { -- continue; -- } -- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); -- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { -- vfio_pci_post_reset(tmp); -- break; -- } -- } -- } --out_single: -- if (!single) { -- vfio_pci_post_reset(vdev); -- } -- g_free(info); -+ VFIODevice *vbasedev = &vdev->vbasedev; -+ const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops; - -- return ret; -+ return ops->pci_hot_reset(vbasedev, single); - } - - /* -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 3568a6135d..b7de39c010 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -219,6 +219,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); - - extern const PropertyInfo qdev_prop_nv_gpudirect_clique; - -+void vfio_pci_pre_reset(VFIOPCIDevice *vdev); -+void vfio_pci_post_reset(VFIOPCIDevice *vdev); -+bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name); - int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, - struct vfio_pci_hot_reset_info **info_p); - -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 4b6f017c6f..45bb19c767 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -106,6 +106,9 @@ struct VFIOIOMMUOps { - int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); - int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, - hwaddr iova, hwaddr size); -+ /* PCI specific */ -+ int (*pci_hot_reset)(VFIODevice *vbasedev, bool single); -+ - /* SPAPR specific */ - int (*add_window)(VFIOContainerBase *bcontainer, - MemoryRegionSection *section, --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch b/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch deleted file mode 100644 index 4a973b5..0000000 --- a/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch +++ /dev/null @@ -1,237 +0,0 @@ -From 965a44793806fef2094906947bd3b428638bf89a Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:10 +0800 -Subject: [PATCH 031/101] vfio/pci: Make vfio cdev pre-openable by passing a - file handle -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [30/67] a14b824b700e8fb36633cd159bcc422d992a316f (eauger1/centos-qemu-kvm) - -Conflicts: contextual conflict in hw/vfio/pci.c due to -RHEL-only f73562144e492 vfio: cap number of devices that can be assigned - -This gives management tools like libvirt a chance to open the vfio -cdev with privilege and pass FD to qemu. This way qemu never needs -to have privilege to open a VFIO or iommu cdev node. - -Together with the earlier support of pre-opening /dev/iommu device, -now we have full support of passing a vfio device to unprivileged -qemu by management tool. This mode is no more considered for the -legacy backend. So let's remove the "TODO" comment. - -Add helper functions vfio_device_set_fd() and vfio_device_get_name() -to set fd and get device name, they will also be used by other vfio -devices. - -There is no easy way to check if a device is mdev with FD passing, -so fail the x-balloon-allowed check unconditionally in this case. - -There is also no easy way to get BDF as name with FD passing, so -we fake a name by VFIO_FD[fd]. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Tested-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit da3e04b26fd8d15b344944504d5ffa9c5f20b54b) -Signed-off-by: Eric Auger ---- - hw/vfio/helpers.c | 43 +++++++++++++++++++++++++++++++++++ - hw/vfio/iommufd.c | 12 ++++++---- - hw/vfio/pci.c | 28 +++++++++++++---------- - include/hw/vfio/vfio-common.h | 4 ++++ - 4 files changed, 71 insertions(+), 16 deletions(-) - -diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c -index 168847e7c5..3592c3d54e 100644 ---- a/hw/vfio/helpers.c -+++ b/hw/vfio/helpers.c -@@ -27,6 +27,7 @@ - #include "trace.h" - #include "qapi/error.h" - #include "qemu/error-report.h" -+#include "monitor/monitor.h" - - /* - * Common VFIO interrupt disable -@@ -609,3 +610,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) - - return ret; - } -+ -+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) -+{ -+ struct stat st; -+ -+ if (vbasedev->fd < 0) { -+ if (stat(vbasedev->sysfsdev, &st) < 0) { -+ error_setg_errno(errp, errno, "no such host device"); -+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); -+ return -errno; -+ } -+ /* User may specify a name, e.g: VFIO platform device */ -+ if (!vbasedev->name) { -+ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); -+ } -+ } else { -+ if (!vbasedev->iommufd) { -+ error_setg(errp, "Use FD passing only with iommufd backend"); -+ return -EINVAL; -+ } -+ /* -+ * Give a name with fd so any function printing out vbasedev->name -+ * will not break. -+ */ -+ if (!vbasedev->name) { -+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); -+ } -+ } -+ -+ return 0; -+} -+ -+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) -+{ -+ int fd = monitor_fd_param(monitor_cur(), str, errp); -+ -+ if (fd < 0) { -+ error_prepend(errp, "Could not parse remote object fd %s:", str); -+ return; -+ } -+ vbasedev->fd = fd; -+} -diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c -index 6e53e013ef..5accd26484 100644 ---- a/hw/vfio/iommufd.c -+++ b/hw/vfio/iommufd.c -@@ -320,11 +320,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, - uint32_t ioas_id; - Error *err = NULL; - -- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); -- if (devfd < 0) { -- return devfd; -+ if (vbasedev->fd < 0) { -+ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); -+ if (devfd < 0) { -+ return devfd; -+ } -+ vbasedev->fd = devfd; -+ } else { -+ devfd = vbasedev->fd; - } -- vbasedev->fd = devfd; - - ret = iommufd_cdev_connect_and_bind(vbasedev, errp); - if (ret) { -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 39e6a6678e..3412a63bb1 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2949,7 +2949,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - VFIOGroup *group; - char *tmp, *subsys; - Error *err = NULL; -- struct stat st; - int ret, i = 0; - bool is_mdev; - char uuid[UUID_STR_LEN]; -@@ -2976,11 +2975,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - return; - } - -- if (!vbasedev->sysfsdev) { -+ if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { - if (!(~vdev->host.domain || ~vdev->host.bus || - ~vdev->host.slot || ~vdev->host.function)) { - error_setg(errp, "No provided host device"); - error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " -+#ifdef CONFIG_IOMMUFD -+ "or -device vfio-pci,fd=DEVICE_FD " -+#endif - "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); - return; - } -@@ -2990,13 +2992,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - vdev->host.slot, vdev->host.function); - } - -- if (stat(vbasedev->sysfsdev, &st) < 0) { -- error_setg_errno(errp, errno, "no such host device"); -- error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); -+ if (vfio_device_get_name(vbasedev, errp) < 0) { - return; - } -- -- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); - vbasedev->ops = &vfio_pci_ops; - vbasedev->type = VFIO_DEVICE_TYPE_PCI; - vbasedev->dev = DEVICE(vdev); -@@ -3356,6 +3354,7 @@ static void vfio_instance_init(Object *obj) - vdev->host.bus = ~0U; - vdev->host.slot = ~0U; - vdev->host.function = ~0U; -+ vdev->vbasedev.fd = -1; - - vdev->nv_gpudirect_clique = 0xFF; - -@@ -3412,11 +3411,6 @@ static Property vfio_pci_dev_properties[] = { - qdev_prop_nv_gpudirect_clique, uint8_t), - DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo, - OFF_AUTOPCIBAR_OFF), -- /* -- * TODO - support passed fds... is this necessary? -- * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), -- * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), -- */ - #ifdef CONFIG_IOMMUFD - DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, - TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -@@ -3424,6 +3418,13 @@ static Property vfio_pci_dev_properties[] = { - DEFINE_PROP_END_OF_LIST(), - }; - -+#ifdef CONFIG_IOMMUFD -+static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) -+{ -+ vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); -+} -+#endif -+ - static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); -@@ -3431,6 +3432,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) - - dc->reset = vfio_pci_reset; - device_class_set_props(dc, vfio_pci_dev_properties); -+#ifdef CONFIG_IOMMUFD -+ object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); -+#endif - dc->desc = "VFIO-based PCI device assignment"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); - pdc->realize = vfio_realize; -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 3dac5c167e..697bf24a35 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -251,4 +251,8 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, - hwaddr size); - int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, - uint64_t size, ram_addr_t ram_addr); -+ -+/* Returns 0 on success, or a negative errno. */ -+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); -+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch b/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch deleted file mode 100644 index d426ede..0000000 --- a/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 942bd7251d166f558e0e6acf7ba853e940e2fb52 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:21 +0800 -Subject: [PATCH 042/101] vfio/pci: Move VFIODevice initializations in - vfio_instance_init -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [41/67] 67392d7a92a6ec2155697a355c88d295338a0785 (eauger1/centos-qemu-kvm) - -Some of the VFIODevice initializations is in vfio_realize, -move all of them in vfio_instance_init. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit dd2fcb1716be9b89c726b3446f38446bb99d6b3a) -Signed-off-by: Eric Auger ---- - hw/vfio/pci.c | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 3412a63bb1..3f5900cc46 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2995,9 +2995,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - if (vfio_device_get_name(vbasedev, errp) < 0) { - return; - } -- vbasedev->ops = &vfio_pci_ops; -- vbasedev->type = VFIO_DEVICE_TYPE_PCI; -- vbasedev->dev = DEVICE(vdev); - - /* - * Mediated devices *might* operate compatibly with discarding of RAM, but -@@ -3346,6 +3343,7 @@ static void vfio_instance_init(Object *obj) - { - PCIDevice *pci_dev = PCI_DEVICE(obj); - VFIOPCIDevice *vdev = VFIO_PCI(obj); -+ VFIODevice *vbasedev = &vdev->vbasedev; - - device_add_bootindex_property(obj, &vdev->bootindex, - "bootindex", NULL, -@@ -3354,7 +3352,11 @@ static void vfio_instance_init(Object *obj) - vdev->host.bus = ~0U; - vdev->host.slot = ~0U; - vdev->host.function = ~0U; -- vdev->vbasedev.fd = -1; -+ -+ vbasedev->type = VFIO_DEVICE_TYPE_PCI; -+ vbasedev->ops = &vfio_pci_ops; -+ vbasedev->dev = DEVICE(vdev); -+ vbasedev->fd = -1; - - vdev->nv_gpudirect_clique = 0xFF; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch b/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch deleted file mode 100644 index 06c2f0f..0000000 --- a/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch +++ /dev/null @@ -1,77 +0,0 @@ -From ede579d6d5fe5be9235d6a218efdb237192aee0e Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:11 +0800 -Subject: [PATCH 032/101] vfio/platform: Allow the selection of a given iommu - backend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [31/67] aba1dc16cada602edd7be1a28b0f57991131e6f7 (eauger1/centos-qemu-kvm) - -Now we support two types of iommu backends, let's add the capability -to select one of them. This depends on whether an iommufd object has -been linked with the vfio-platform device: - -If the user wants to use the legacy backend, it shall not -link the vfio-platform device with any iommufd object: - - -device vfio-platform,host=XXX - -This is called the legacy mode/backend. - -If the user wants to use the iommufd backend (/dev/iommu) it -shall pass an iommufd object id in the vfio-platform device options: - - -object iommufd,id=iommufd0 - -device vfio-platform,host=XXX,iommufd=iommufd0 - -Suggested-by: Alex Williamson -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Reviewed-by: Eric Auger -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit a6c50e1c3f8d0eb77edaea392e61508bb3c516f8) -Signed-off-by: Eric Auger ---- - hw/vfio/platform.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c -index 8e3d4ac458..98ae4bc655 100644 ---- a/hw/vfio/platform.c -+++ b/hw/vfio/platform.c -@@ -15,11 +15,13 @@ - */ - - #include "qemu/osdep.h" -+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ - #include "qapi/error.h" - #include - #include - - #include "hw/vfio/vfio-platform.h" -+#include "sysemu/iommufd.h" - #include "migration/vmstate.h" - #include "qemu/error-report.h" - #include "qemu/lockable.h" -@@ -649,6 +651,10 @@ static Property vfio_platform_dev_properties[] = { - DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, - mmap_timeout, 1100), - DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), -+#ifdef CONFIG_IOMMUFD -+ DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd, -+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -+#endif - DEFINE_PROP_END_OF_LIST(), - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch b/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch deleted file mode 100644 index f931524..0000000 --- a/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 22664f4115d9b297ef4276e48f8ba0bc195ec99e Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:12 +0800 -Subject: [PATCH 033/101] vfio/platform: Make vfio cdev pre-openable by passing - a file handle -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [32/67] 069867dce64b826e92dc2051405a4ded5261981f (eauger1/centos-qemu-kvm) - -This gives management tools like libvirt a chance to open the vfio -cdev with privilege and pass FD to qemu. This way qemu never needs -to have privilege to open a VFIO or iommu cdev node. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit 3016e60f8f715d2058a48e4956be994482c5e218) -Signed-off-by: Eric Auger ---- - hw/vfio/platform.c | 32 ++++++++++++++++++++++++-------- - 1 file changed, 24 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c -index 98ae4bc655..a97d9c6234 100644 ---- a/hw/vfio/platform.c -+++ b/hw/vfio/platform.c -@@ -531,14 +531,13 @@ static VFIODeviceOps vfio_platform_ops = { - */ - static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) - { -- struct stat st; - int ret; - -- /* @sysfsdev takes precedence over @host */ -- if (vbasedev->sysfsdev) { -+ /* @fd takes precedence over @sysfsdev which takes precedence over @host */ -+ if (vbasedev->fd < 0 && vbasedev->sysfsdev) { - g_free(vbasedev->name); - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); -- } else { -+ } else if (vbasedev->fd < 0) { - if (!vbasedev->name || strchr(vbasedev->name, '/')) { - error_setg(errp, "wrong host device name"); - return -EINVAL; -@@ -548,10 +547,9 @@ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) - vbasedev->name); - } - -- if (stat(vbasedev->sysfsdev, &st) < 0) { -- error_setg_errno(errp, errno, -- "failed to get the sysfs host device file status"); -- return -errno; -+ ret = vfio_device_get_name(vbasedev, errp); -+ if (ret) { -+ return ret; - } - - ret = vfio_attach_device(vbasedev->name, vbasedev, -@@ -658,6 +656,20 @@ static Property vfio_platform_dev_properties[] = { - DEFINE_PROP_END_OF_LIST(), - }; - -+static void vfio_platform_instance_init(Object *obj) -+{ -+ VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); -+ -+ vdev->vbasedev.fd = -1; -+} -+ -+#ifdef CONFIG_IOMMUFD -+static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp) -+{ -+ vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp); -+} -+#endif -+ - static void vfio_platform_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); -@@ -665,6 +677,9 @@ static void vfio_platform_class_init(ObjectClass *klass, void *data) - - dc->realize = vfio_platform_realize; - device_class_set_props(dc, vfio_platform_dev_properties); -+#ifdef CONFIG_IOMMUFD -+ object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd); -+#endif - dc->vmsd = &vfio_platform_vmstate; - dc->desc = "VFIO-based platform device assignment"; - sbc->connect_irq_notifier = vfio_start_irqfd_injection; -@@ -677,6 +692,7 @@ static const TypeInfo vfio_platform_dev_info = { - .name = TYPE_VFIO_PLATFORM, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(VFIOPlatformDevice), -+ .instance_init = vfio_platform_instance_init, - .class_init = vfio_platform_class_init, - .class_size = sizeof(VFIOPlatformDeviceClass), - }; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch b/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch deleted file mode 100644 index 56283a6..0000000 --- a/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 2417020283532030f424fe07dfeb7477e6489640 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Tue, 21 Nov 2023 16:44:22 +0800 -Subject: [PATCH 043/101] vfio/platform: Move VFIODevice initializations in - vfio_platform_instance_init -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [42/67] 53a459b6246d7d7bdc7a62ac92f02f1e775a54a6 (eauger1/centos-qemu-kvm) - -Some of the VFIODevice initializations is in vfio_platform_realize, -move all of them in vfio_platform_instance_init. - -No functional change intended. - -Suggested-by: Cédric Le Goater -Signed-off-by: Zhenzhong Duan -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Nicolin Chen -Signed-off-by: Cédric Le Goater -(cherry picked from commit a0cf44c8d618578843a65ea7f6d3db8ce52185bc) -Signed-off-by: Eric Auger ---- - hw/vfio/platform.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c -index a97d9c6234..506eb8193f 100644 ---- a/hw/vfio/platform.c -+++ b/hw/vfio/platform.c -@@ -581,10 +581,6 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) - VFIODevice *vbasedev = &vdev->vbasedev; - int i, ret; - -- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; -- vbasedev->dev = dev; -- vbasedev->ops = &vfio_platform_ops; -- - qemu_mutex_init(&vdev->intp_mutex); - - trace_vfio_platform_realize(vbasedev->sysfsdev ? -@@ -659,8 +655,12 @@ static Property vfio_platform_dev_properties[] = { - static void vfio_platform_instance_init(Object *obj) - { - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); -+ VFIODevice *vbasedev = &vdev->vbasedev; - -- vdev->vbasedev.fd = -1; -+ vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; -+ vbasedev->ops = &vfio_platform_ops; -+ vbasedev->dev = DEVICE(vdev); -+ vbasedev->fd = -1; - } - - #ifdef CONFIG_IOMMUFD --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch b/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch deleted file mode 100644 index fb7e707..0000000 --- a/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch +++ /dev/null @@ -1,129 +0,0 @@ -From e75ec2aca351daabe597ca6322c1589885f30d7a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:16 +0100 -Subject: [PATCH 049/101] vfio/spapr: Extend VFIOIOMMUOps with a release - handler -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [48/67] 1c4d22a6f69324805d050767fcf178d8566f2030 (eauger1/centos-qemu-kvm) - -This allows to abstract a bit more the sPAPR IOMMU support in the -legacy IOMMU backend. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit 001a013ea3f125d2ec0e709b5765754149d8d968) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 10 +++----- - hw/vfio/spapr.c | 35 +++++++++++++++------------ - include/hw/vfio/vfio-container-base.h | 1 + - 3 files changed, 24 insertions(+), 22 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index b22feb8ded..1e77a2929e 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -632,9 +632,8 @@ listener_release_exit: - QLIST_REMOVE(bcontainer, next); - vfio_kvm_device_del_group(group); - memory_listener_unregister(&bcontainer->listener); -- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || -- container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { -- vfio_spapr_container_deinit(container); -+ if (bcontainer->ops->release) { -+ bcontainer->ops->release(bcontainer); - } - - enable_discards_exit: -@@ -667,9 +666,8 @@ static void vfio_disconnect_container(VFIOGroup *group) - */ - if (QLIST_EMPTY(&container->group_list)) { - memory_listener_unregister(&bcontainer->listener); -- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || -- container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { -- vfio_spapr_container_deinit(container); -+ if (bcontainer->ops->release) { -+ bcontainer->ops->release(bcontainer); - } - } - -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 5c6426e697..44617dfc6b 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -440,6 +440,24 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, - } - } - -+static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) -+{ -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); -+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -+ container); -+ VFIOHostDMAWindow *hostwin, *next; -+ -+ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { -+ memory_listener_unregister(&scontainer->prereg_listener); -+ } -+ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, -+ next) { -+ QLIST_REMOVE(hostwin, hostwin_next); -+ g_free(hostwin); -+ } -+} -+ - static VFIOIOMMUOps vfio_iommu_spapr_ops; - - static void setup_spapr_ops(VFIOContainerBase *bcontainer) -@@ -447,6 +465,7 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer) - vfio_iommu_spapr_ops = *bcontainer->ops; - vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; - vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; -+ vfio_iommu_spapr_ops.release = vfio_spapr_container_release; - bcontainer->ops = &vfio_iommu_spapr_ops; - } - -@@ -527,19 +546,3 @@ listener_unregister_exit: - } - return ret; - } -- --void vfio_spapr_container_deinit(VFIOContainer *container) --{ -- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -- container); -- VFIOHostDMAWindow *hostwin, *next; -- -- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { -- memory_listener_unregister(&scontainer->prereg_listener); -- } -- QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, -- next) { -- QLIST_REMOVE(hostwin, hostwin_next); -- g_free(hostwin); -- } --} -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 2ae297ccda..5c9594b6c7 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -117,5 +117,6 @@ struct VFIOIOMMUOps { - Error **errp); - void (*del_window)(VFIOContainerBase *bcontainer, - MemoryRegionSection *section); -+ void (*release)(VFIOContainerBase *bcontainer); - }; - #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch b/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch deleted file mode 100644 index f835acb..0000000 --- a/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch +++ /dev/null @@ -1,150 +0,0 @@ -From 645ed97633935712edcc2c56f252738b38f15e3a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:22 +0100 -Subject: [PATCH 055/101] vfio/spapr: Introduce a sPAPR VFIOIOMMU QOM interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [54/67] 2ceac3c07d71790dc3852fbbbd4084a7affb9373 (eauger1/centos-qemu-kvm) - -Move vfio_spapr_container_setup() to a VFIOIOMMUClass::setup handler -and convert the sPAPR VFIOIOMMUOps struct to a QOM interface. The -sPAPR QOM interface inherits from the legacy QOM interface because -because both have the same basic needs. The sPAPR interface is then -extended with the handlers specific to the sPAPR IOMMU. - -This allows reuse and provides better abstraction of the backends. It -will be useful to avoid compiling the sPAPR IOMMU backend on targets -not supporting it. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit f221f641a2fe69c2ca3857759551470664b0bec8) -Signed-off-by: Eric Auger ---- - hw/vfio/container.c | 18 +++++-------- - hw/vfio/spapr.c | 39 ++++++++++++++++----------- - include/hw/vfio/vfio-container-base.h | 1 + - 3 files changed, 31 insertions(+), 27 deletions(-) - -diff --git a/hw/vfio/container.c b/hw/vfio/container.c -index c22bdd3216..688cf23bab 100644 ---- a/hw/vfio/container.c -+++ b/hw/vfio/container.c -@@ -381,6 +381,10 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) - case VFIO_TYPE1_IOMMU: - klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); - break; -+ case VFIO_SPAPR_TCE_v2_IOMMU: -+ case VFIO_SPAPR_TCE_IOMMU: -+ klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR); -+ break; - default: - g_assert_not_reached(); - }; -@@ -623,19 +627,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - goto free_container_exit; - } - -- switch (container->iommu_type) { -- case VFIO_TYPE1v2_IOMMU: -- case VFIO_TYPE1_IOMMU: -- ret = vfio_legacy_setup(bcontainer, errp); -- break; -- case VFIO_SPAPR_TCE_v2_IOMMU: -- case VFIO_SPAPR_TCE_IOMMU: -- ret = vfio_spapr_container_init(container, errp); -- break; -- default: -- g_assert_not_reached(); -- } -+ assert(bcontainer->ops->setup); - -+ ret = bcontainer->ops->setup(bcontainer, errp); - if (ret) { - goto enable_discards_exit; - } -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 44617dfc6b..0d949bb728 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -458,20 +458,11 @@ static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) - } - } - --static VFIOIOMMUOps vfio_iommu_spapr_ops; -- --static void setup_spapr_ops(VFIOContainerBase *bcontainer) --{ -- vfio_iommu_spapr_ops = *bcontainer->ops; -- vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; -- vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; -- vfio_iommu_spapr_ops.release = vfio_spapr_container_release; -- bcontainer->ops = &vfio_iommu_spapr_ops; --} -- --int vfio_spapr_container_init(VFIOContainer *container, Error **errp) -+static int vfio_spapr_container_setup(VFIOContainerBase *bcontainer, -+ Error **errp) - { -- VFIOContainerBase *bcontainer = &container->bcontainer; -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, - container); - struct vfio_iommu_spapr_tce_info info; -@@ -536,8 +527,6 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - 0x1000); - } - -- setup_spapr_ops(bcontainer); -- - return 0; - - listener_unregister_exit: -@@ -546,3 +535,23 @@ listener_unregister_exit: - } - return ret; - } -+ -+static void vfio_iommu_spapr_class_init(ObjectClass *klass, void *data) -+{ -+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); -+ -+ vioc->add_window = vfio_spapr_container_add_section_window; -+ vioc->del_window = vfio_spapr_container_del_section_window; -+ vioc->release = vfio_spapr_container_release; -+ vioc->setup = vfio_spapr_container_setup; -+}; -+ -+static const TypeInfo types[] = { -+ { -+ .name = TYPE_VFIO_IOMMU_SPAPR, -+ .parent = TYPE_VFIO_IOMMU_LEGACY, -+ .class_init = vfio_iommu_spapr_class_init, -+ }, -+}; -+ -+DEFINE_TYPES(types) -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index ce8b1fba88..9e21d7811f 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -95,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); - - #define TYPE_VFIO_IOMMU "vfio-iommu" - #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" -+#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" - - /* - * VFIOContainerBase is not an abstract QOM object because it felt --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch b/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch deleted file mode 100644 index f1ca4a2..0000000 --- a/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch +++ /dev/null @@ -1,91 +0,0 @@ -From ff0c13c22878eed0f3879c0805bef5b9f9d83e04 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:42 +0800 -Subject: [PATCH 017/101] vfio/spapr: Introduce spapr backend and target - interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [16/67] e35cda157a2a1afeded3305622c861abd07edb51 (eauger1/centos-qemu-kvm) - -Introduce an empty spapr backend which will hold spapr specific -content, currently only prereg_listener and hostwin_list. - -Also introduce two spapr specific callbacks add/del_window into -VFIOIOMMUOps. Instantiate a spapr ops with a helper setup_spapr_ops -and assign it to bcontainer->ops. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 9b7d38bf5a2c1054bfe6de08806954cdc45d8d98) -Signed-off-by: Eric Auger ---- - hw/vfio/spapr.c | 14 ++++++++++++++ - include/hw/vfio/vfio-container-base.h | 6 ++++++ - 2 files changed, 20 insertions(+) - -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 7a50975f25..e1a6b35563 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -24,6 +24,10 @@ - #include "qapi/error.h" - #include "trace.h" - -+typedef struct VFIOSpaprContainer { -+ VFIOContainer container; -+} VFIOSpaprContainer; -+ - static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) - { - if (memory_region_is_iommu(section->mr)) { -@@ -421,6 +425,14 @@ void vfio_container_del_section_window(VFIOContainer *container, - } - } - -+static VFIOIOMMUOps vfio_iommu_spapr_ops; -+ -+static void setup_spapr_ops(VFIOContainerBase *bcontainer) -+{ -+ vfio_iommu_spapr_ops = *bcontainer->ops; -+ bcontainer->ops = &vfio_iommu_spapr_ops; -+} -+ - int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - { - VFIOContainerBase *bcontainer = &container->bcontainer; -@@ -486,6 +498,8 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - 0x1000); - } - -+ setup_spapr_ops(bcontainer); -+ - return 0; - - listener_unregister_exit: -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index 9658ffb526..f62a14ac73 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -101,5 +101,11 @@ struct VFIOIOMMUOps { - int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); - int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, - hwaddr iova, hwaddr size); -+ /* SPAPR specific */ -+ int (*add_window)(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section, -+ Error **errp); -+ void (*del_window)(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section); - }; - #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch b/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch deleted file mode 100644 index 93cb6b8..0000000 --- a/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch +++ /dev/null @@ -1,188 +0,0 @@ -From 3e9e7b57b15ac328f5d663b4e04df546d49f5fa6 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:45 +0800 -Subject: [PATCH 020/101] vfio/spapr: Move hostwin_list into spapr container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [19/67] 87cfeaa32ad32a260a89b2bb1866d59e20c0fe30 (eauger1/centos-qemu-kvm) - -No functional changes intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit dbb9d0c9691d145338686d3e0920da047f2ab3da) -Signed-off-by: Eric Auger ---- - hw/vfio/spapr.c | 36 +++++++++++++++++++---------------- - include/hw/vfio/vfio-common.h | 1 - - 2 files changed, 20 insertions(+), 17 deletions(-) - -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 68c3dd6c75..5c6426e697 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -27,6 +27,7 @@ - typedef struct VFIOSpaprContainer { - VFIOContainer container; - MemoryListener prereg_listener; -+ QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - } VFIOSpaprContainer; - - static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) -@@ -154,12 +155,12 @@ static const MemoryListener vfio_prereg_listener = { - .region_del = vfio_prereg_listener_region_del, - }; - --static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, -+static void vfio_host_win_add(VFIOSpaprContainer *scontainer, hwaddr min_iova, - hwaddr max_iova, uint64_t iova_pgsizes) - { - VFIOHostDMAWindow *hostwin; - -- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { -+ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - min_iova, -@@ -173,15 +174,15 @@ static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, - hostwin->min_iova = min_iova; - hostwin->max_iova = max_iova; - hostwin->iova_pgsizes = iova_pgsizes; -- QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); -+ QLIST_INSERT_HEAD(&scontainer->hostwin_list, hostwin, hostwin_next); - } - --static int vfio_host_win_del(VFIOContainer *container, -+static int vfio_host_win_del(VFIOSpaprContainer *scontainer, - hwaddr min_iova, hwaddr max_iova) - { - VFIOHostDMAWindow *hostwin; - -- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { -+ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { - if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); -@@ -192,7 +193,7 @@ static int vfio_host_win_del(VFIOContainer *container, - return -1; - } - --static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, -+static VFIOHostDMAWindow *vfio_find_hostwin(VFIOSpaprContainer *container, - hwaddr iova, hwaddr end) - { - VFIOHostDMAWindow *hostwin; -@@ -329,6 +330,8 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, - { - VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); -+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -+ container); - VFIOHostDMAWindow *hostwin; - hwaddr pgsize = 0; - int ret; -@@ -344,7 +347,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, - iova = section->offset_within_address_space; - end = iova + int128_get64(section->size) - 1; - -- if (!vfio_find_hostwin(container, iova, end)) { -+ if (!vfio_find_hostwin(scontainer, iova, end)) { - error_setg(errp, "Container %p can't map guest IOVA region" - " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, - iova, end); -@@ -358,7 +361,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, - } - - /* For now intersections are not allowed, we may relax this later */ -- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { -+ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - section->offset_within_address_space, -@@ -380,7 +383,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, - return ret; - } - -- vfio_host_win_add(container, section->offset_within_address_space, -+ vfio_host_win_add(scontainer, section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, pgsize); - #ifdef CONFIG_KVM -@@ -419,6 +422,8 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, - { - VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); -+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -+ container); - - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return; -@@ -426,7 +431,7 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, - - vfio_spapr_remove_window(container, - section->offset_within_address_space); -- if (vfio_host_win_del(container, -+ if (vfio_host_win_del(scontainer, - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1) < 0) { -@@ -454,7 +459,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - int ret, fd = container->fd; - -- QLIST_INIT(&container->hostwin_list); -+ QLIST_INIT(&scontainer->hostwin_list); - - /* - * The host kernel code implementing VFIO_IOMMU_DISABLE is called -@@ -506,7 +511,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - } else { - /* The default table uses 4K pages */ - bcontainer->pgsizes = 0x1000; -- vfio_host_win_add(container, info.dma32_window_start, -+ vfio_host_win_add(scontainer, info.dma32_window_start, - info.dma32_window_start + - info.dma32_window_size - 1, - 0x1000); -@@ -525,15 +530,14 @@ listener_unregister_exit: - - void vfio_spapr_container_deinit(VFIOContainer *container) - { -+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -+ container); - VFIOHostDMAWindow *hostwin, *next; - - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { -- VFIOSpaprContainer *scontainer = container_of(container, -- VFIOSpaprContainer, -- container); - memory_listener_unregister(&scontainer->prereg_listener); - } -- QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, -+ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, - next) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index ed6148c058..24ecc0e7ee 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -79,7 +79,6 @@ typedef struct VFIOContainer { - VFIOContainerBase bcontainer; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ - unsigned iommu_type; -- QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; - } VFIOContainer; - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch b/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch deleted file mode 100644 index 1db4b55..0000000 --- a/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 17e6dad3e43e173147c0ca33f6f1f4f317a77d0b Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:44 +0800 -Subject: [PATCH 019/101] vfio/spapr: Move prereg_listener into spapr container -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [18/67] dbea1b0b759e91b953271da92bba4ca6853bec82 (eauger1/centos-qemu-kvm) - -No functional changes intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -Signed-off-by: Cédric Le Goater -(cherry picked from commit 6ad359ec29af7f21dcb206c8edb26905a4925f80) -Signed-off-by: Eric Auger ---- - hw/vfio/spapr.c | 24 ++++++++++++++++-------- - include/hw/vfio/vfio-common.h | 1 - - 2 files changed, 16 insertions(+), 9 deletions(-) - -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index 5be1911aad..68c3dd6c75 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -26,6 +26,7 @@ - - typedef struct VFIOSpaprContainer { - VFIOContainer container; -+ MemoryListener prereg_listener; - } VFIOSpaprContainer; - - static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) -@@ -48,8 +49,9 @@ static void *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa) - static void vfio_prereg_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, -- prereg_listener); -+ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, -+ prereg_listener); -+ VFIOContainer *container = &scontainer->container; - VFIOContainerBase *bcontainer = &container->bcontainer; - const hwaddr gpa = section->offset_within_address_space; - hwaddr end; -@@ -107,8 +109,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, - static void vfio_prereg_listener_region_del(MemoryListener *listener, - MemoryRegionSection *section) - { -- VFIOContainer *container = container_of(listener, VFIOContainer, -- prereg_listener); -+ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, -+ prereg_listener); -+ VFIOContainer *container = &scontainer->container; - const hwaddr gpa = section->offset_within_address_space; - hwaddr end; - int ret; -@@ -445,6 +448,8 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer) - int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - { - VFIOContainerBase *bcontainer = &container->bcontainer; -+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, -+ container); - struct vfio_iommu_spapr_tce_info info; - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - int ret, fd = container->fd; -@@ -463,9 +468,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - return -errno; - } - } else { -- container->prereg_listener = vfio_prereg_listener; -+ scontainer->prereg_listener = vfio_prereg_listener; - -- memory_listener_register(&container->prereg_listener, -+ memory_listener_register(&scontainer->prereg_listener, - &address_space_memory); - if (bcontainer->error) { - ret = -1; -@@ -513,7 +518,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) - - listener_unregister_exit: - if (v2) { -- memory_listener_unregister(&container->prereg_listener); -+ memory_listener_unregister(&scontainer->prereg_listener); - } - return ret; - } -@@ -523,7 +528,10 @@ void vfio_spapr_container_deinit(VFIOContainer *container) - VFIOHostDMAWindow *hostwin, *next; - - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { -- memory_listener_unregister(&container->prereg_listener); -+ VFIOSpaprContainer *scontainer = container_of(container, -+ VFIOSpaprContainer, -+ container); -+ memory_listener_unregister(&scontainer->prereg_listener); - } - QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, - next) { -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 055f679363..ed6148c058 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -78,7 +78,6 @@ struct VFIOGroup; - typedef struct VFIOContainer { - VFIOContainerBase bcontainer; - int fd; /* /dev/vfio/vfio, empowered by the attached groups */ -- MemoryListener prereg_listener; - unsigned iommu_type; - QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; - QLIST_HEAD(, VFIOGroup) group_list; --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch b/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch deleted file mode 100644 index 7762804..0000000 --- a/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 5d485eb1442a81b51688124ce30024e96490acbf Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 19 Dec 2023 07:58:24 +0100 -Subject: [PATCH 057/101] vfio/spapr: Only compile sPAPR IOMMU support when - needed -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [56/67] 4dc0cbde470f877a8aac2bf6fab6923f2f919285 (eauger1/centos-qemu-kvm) - -sPAPR IOMMU support is only needed for pseries machines. Compile out -support when CONFIG_PSERIES is not set. This saves ~7K of text. - -Reviewed-by: Zhenzhong Duan -Tested-by: Eric Farman -Signed-off-by: Cédric Le Goater -(cherry picked from commit 10164df6ed3d41cbf67105dcd954a663ef4cc3e9) -Signed-off-by: Eric Auger ---- - hw/vfio/meson.build | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build -index e5d98b6adc..bb98493b53 100644 ---- a/hw/vfio/meson.build -+++ b/hw/vfio/meson.build -@@ -4,9 +4,9 @@ vfio_ss.add(files( - 'common.c', - 'container-base.c', - 'container.c', -- 'spapr.c', - 'migration.c', - )) -+vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) - vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( - 'iommufd.c', - )) --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch b/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch deleted file mode 100644 index 4d8db61..0000000 --- a/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch +++ /dev/null @@ -1,184 +0,0 @@ -From 3b7f044f15b4a9daf4ad7eda58777aba6dbe3fc0 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 2 Nov 2023 15:12:43 +0800 -Subject: [PATCH 018/101] vfio/spapr: switch to spapr IOMMU BE - add/del_section_window -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 211: IOMMUFD backend backport -RH-Jira: RHEL-19302 RHEL-21057 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Sebastian Ott -RH-Commit: [17/67] a0d9f1f2d4d2592f3d9fc2ee5b2c38236a986e38 (eauger1/centos-qemu-kvm) - -No functional change intended. - -Signed-off-by: Zhenzhong Duan -Reviewed-by: Cédric Le Goater -(cherry picked from commit 233309e8e4c158af6c6b126d5ad021bae40a918a) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 8 ++------ - hw/vfio/container-base.c | 21 +++++++++++++++++++++ - hw/vfio/spapr.c | 19 ++++++++++++++----- - include/hw/vfio/vfio-common.h | 5 ----- - include/hw/vfio/vfio-container-base.h | 5 +++++ - 5 files changed, 42 insertions(+), 16 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 483ba82089..572ae7c934 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -571,8 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener, - { - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); - hwaddr iova, end; - Int128 llend, llsize; - void *vaddr; -@@ -595,7 +593,7 @@ static void vfio_listener_region_add(MemoryListener *listener, - return; - } - -- if (vfio_container_add_section_window(container, section, &err)) { -+ if (vfio_container_add_section_window(bcontainer, section, &err)) { - goto fail; - } - -@@ -738,8 +736,6 @@ static void vfio_listener_region_del(MemoryListener *listener, - { - VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, - listener); -- VFIOContainer *container = container_of(bcontainer, VFIOContainer, -- bcontainer); - hwaddr iova, end; - Int128 llend, llsize; - int ret; -@@ -818,7 +814,7 @@ static void vfio_listener_region_del(MemoryListener *listener, - - memory_region_unref(section->mr); - -- vfio_container_del_section_window(container, section); -+ vfio_container_del_section_window(bcontainer, section); - } - - typedef struct VFIODirtyRanges { -diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c -index 0177f43741..71f7274973 100644 ---- a/hw/vfio/container-base.c -+++ b/hw/vfio/container-base.c -@@ -31,6 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); - } - -+int vfio_container_add_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section, -+ Error **errp) -+{ -+ if (!bcontainer->ops->add_window) { -+ return 0; -+ } -+ -+ return bcontainer->ops->add_window(bcontainer, section, errp); -+} -+ -+void vfio_container_del_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section) -+{ -+ if (!bcontainer->ops->del_window) { -+ return; -+ } -+ -+ return bcontainer->ops->del_window(bcontainer, section); -+} -+ - int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - bool start) - { -diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c -index e1a6b35563..5be1911aad 100644 ---- a/hw/vfio/spapr.c -+++ b/hw/vfio/spapr.c -@@ -319,10 +319,13 @@ static int vfio_spapr_create_window(VFIOContainer *container, - return 0; - } - --int vfio_container_add_section_window(VFIOContainer *container, -- MemoryRegionSection *section, -- Error **errp) -+static int -+vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section, -+ Error **errp) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); - VFIOHostDMAWindow *hostwin; - hwaddr pgsize = 0; - int ret; -@@ -407,9 +410,13 @@ int vfio_container_add_section_window(VFIOContainer *container, - return 0; - } - --void vfio_container_del_section_window(VFIOContainer *container, -- MemoryRegionSection *section) -+static void -+vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section) - { -+ VFIOContainer *container = container_of(bcontainer, VFIOContainer, -+ bcontainer); -+ - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return; - } -@@ -430,6 +437,8 @@ static VFIOIOMMUOps vfio_iommu_spapr_ops; - static void setup_spapr_ops(VFIOContainerBase *bcontainer) - { - vfio_iommu_spapr_ops = *bcontainer->ops; -+ vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; -+ vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; - bcontainer->ops = &vfio_iommu_spapr_ops; - } - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index b9e5a0e64b..055f679363 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -169,11 +169,6 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); - void vfio_put_address_space(VFIOAddressSpace *space); - - /* SPAPR specific */ --int vfio_container_add_section_window(VFIOContainer *container, -- MemoryRegionSection *section, -- Error **errp); --void vfio_container_del_section_window(VFIOContainer *container, -- MemoryRegionSection *section); - int vfio_spapr_container_init(VFIOContainer *container, Error **errp); - void vfio_spapr_container_deinit(VFIOContainer *container); - -diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h -index f62a14ac73..4b6f017c6f 100644 ---- a/include/hw/vfio/vfio-container-base.h -+++ b/include/hw/vfio/vfio-container-base.h -@@ -75,6 +75,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, - int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb); -+int vfio_container_add_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section, -+ Error **errp); -+void vfio_container_del_section_window(VFIOContainerBase *bcontainer, -+ MemoryRegionSection *section); - int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, - bool start); - int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch b/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch deleted file mode 100644 index ef770fd..0000000 --- a/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 2a758da4e1433564998def68447008908c96e113 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 2 Feb 2024 16:31:57 +0100 -Subject: [PATCH 2/6] virtio: Re-enable notifications after drain - -RH-Author: Hanna Czenczek -RH-MergeRequest: 223: virtio: Re-enable notifications after drain -RH-Jira: RHEL-3934 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/3] e3be798e6259a378fc03f4364ecaeb875b01f64c (hreitz/qemu-kvm-c-9-s) - -During drain, we do not care about virtqueue notifications, which is why -we remove the handlers on it. When removing those handlers, whether vq -notifications are enabled or not depends on whether we were in polling -mode or not; if not, they are enabled (by default); if so, they have -been disabled by the io_poll_start callback. - -Because we do not care about those notifications after removing the -handlers, this is fine. However, we have to explicitly ensure they are -enabled when re-attaching the handlers, so we will resume receiving -notifications. We do this in virtio_queue_aio_attach_host_notifier*(). -If such a function is called while we are in a polling section, -attaching the notifiers will then invoke the io_poll_start callback, -re-disabling notifications. - -Because we will always miss virtqueue updates in the drained section, we -also need to poll the virtqueue once after attaching the notifiers. - -Buglink: https://issues.redhat.com/browse/RHEL-3934 -Signed-off-by: Hanna Czenczek -Message-ID: <20240202153158.788922-3-hreitz@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 5bdbaebcce18fe6a627cafad2043ec08f3de5744) ---- - hw/virtio/virtio.c | 42 ++++++++++++++++++++++++++++++++++++++++++ - include/block/aio.h | 7 ++++++- - 2 files changed, 48 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 3a160f86ed..356d690cc9 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -3556,6 +3556,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) - - void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) - { -+ /* -+ * virtio_queue_aio_detach_host_notifier() can leave notifications disabled. -+ * Re-enable them. (And if detach has not been used before, notifications -+ * being enabled is still the default state while a notifier is attached; -+ * see virtio_queue_host_notifier_aio_poll_end(), which will always leave -+ * notifications enabled once the polling section is left.) -+ */ -+ if (!virtio_queue_get_notification(vq)) { -+ virtio_queue_set_notification(vq, 1); -+ } -+ - aio_set_event_notifier(ctx, &vq->host_notifier, - virtio_queue_host_notifier_read, - virtio_queue_host_notifier_aio_poll, -@@ -3563,6 +3574,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) - aio_set_event_notifier_poll(ctx, &vq->host_notifier, - virtio_queue_host_notifier_aio_poll_begin, - virtio_queue_host_notifier_aio_poll_end); -+ -+ /* -+ * We will have ignored notifications about new requests from the guest -+ * while no notifiers were attached, so "kick" the virt queue to process -+ * those requests now. -+ */ -+ event_notifier_set(&vq->host_notifier); - } - - /* -@@ -3573,14 +3591,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) - */ - void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) - { -+ /* See virtio_queue_aio_attach_host_notifier() */ -+ if (!virtio_queue_get_notification(vq)) { -+ virtio_queue_set_notification(vq, 1); -+ } -+ - aio_set_event_notifier(ctx, &vq->host_notifier, - virtio_queue_host_notifier_read, - NULL, NULL); -+ -+ /* -+ * See virtio_queue_aio_attach_host_notifier(). -+ * Note that this may be unnecessary for the type of virtqueues this -+ * function is used for. Still, it will not hurt to have a quick look into -+ * whether we can/should process any of the virtqueue elements. -+ */ -+ event_notifier_set(&vq->host_notifier); - } - - void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) - { - aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL); -+ -+ /* -+ * aio_set_event_notifier_poll() does not guarantee whether io_poll_end() -+ * will run after io_poll_begin(), so by removing the notifier, we do not -+ * know whether virtio_queue_host_notifier_aio_poll_end() has run after a -+ * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether -+ * notifications are enabled or disabled. It does not really matter anyway; -+ * we just removed the notifier, so we do not care about notifications until -+ * we potentially re-attach it. The attach_host_notifier functions will -+ * ensure that notifications are enabled again when they are needed. -+ */ - } - - void virtio_queue_host_notifier_read(EventNotifier *n) -diff --git a/include/block/aio.h b/include/block/aio.h -index af05512a7d..261c77fd9a 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -480,9 +480,14 @@ void aio_set_event_notifier(AioContext *ctx, - AioPollFn *io_poll, - EventNotifierHandler *io_poll_ready); - --/* Set polling begin/end callbacks for an event notifier that has already been -+/* -+ * Set polling begin/end callbacks for an event notifier that has already been - * registered with aio_set_event_notifier. Do nothing if the event notifier is - * not registered. -+ * -+ * Note that if the io_poll_end() callback (or the entire notifier) is removed -+ * during polling, it will not be called, so an io_poll_begin() is not -+ * necessarily always followed by an io_poll_end(). - */ - void aio_set_event_notifier_poll(AioContext *ctx, - EventNotifier *notifier, --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch b/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch deleted file mode 100644 index 0565357..0000000 --- a/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 376df80fbba5a9bb0ec43cad083cde9de59128d7 Mon Sep 17 00:00:00 2001 -From: Stefan Weil via -Date: Sun, 24 Dec 2023 12:43:14 +0100 -Subject: [PATCH 10/22] virtio-blk: Fix potential nullpointer read access in - virtio_blk_data_plane_destroy - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [6/17] 460005fc7719b2e1dd577dfe75d18537ab2b8d06 (stefanha/centos-stream-qemu-kvm) - -Fixes: CID 1532828 -Fixes: b6948ab01d ("virtio-blk: add iothread-vq-mapping parameter") -Signed-off-by: Stefan Weil -Signed-off-by: Michael Tokarev -(cherry picked from commit d819fc9516a4ec71e37a6c9edfcd285b7f98c2dc) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/dataplane/virtio-blk.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 6debd4401e..97a302cf49 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -152,7 +152,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) - { - VirtIOBlock *vblk; -- VirtIOBlkConf *conf = s->conf; -+ VirtIOBlkConf *conf; - - if (!s) { - return; -@@ -160,6 +160,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) - - vblk = VIRTIO_BLK(s->vdev); - assert(!vblk->dataplane_started); -+ conf = s->conf; - - if (conf->iothread_vq_mapping_list) { - IOThreadVirtQueueMappingList *node; --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch b/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch deleted file mode 100644 index 1a3771e..0000000 --- a/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 094941b2c3e66e078d93718933eb07e800a7dd60 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 2 Feb 2024 16:31:58 +0100 -Subject: [PATCH 3/6] virtio-blk: Use ioeventfd_attach in start_ioeventfd - -RH-Author: Hanna Czenczek -RH-MergeRequest: 223: virtio: Re-enable notifications after drain -RH-Jira: RHEL-3934 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/3] 96d6760d1b7b12df695b6825b15a2a3b8a79a74c (hreitz/qemu-kvm-c-9-s) - -Commit d3f6f294aeadd5f88caf0155e4360808c95b3146 ("virtio-blk: always set -ioeventfd during startup") has made virtio_blk_start_ioeventfd() always -kick the virtqueue (set the ioeventfd), regardless of whether the BB is -drained. That is no longer necessary, because attaching the host -notifier will now set the ioeventfd, too; this happens either -immediately right here in virtio_blk_start_ioeventfd(), or later when -the drain ends, in virtio_blk_ioeventfd_attach(). - -With event_notifier_set() removed, the code becomes the same as the one -in virtio_blk_ioeventfd_attach(), so we can reuse that function. - -Signed-off-by: Hanna Czenczek -Message-ID: <20240202153158.788922-4-hreitz@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 52bff01f64eec017ffb0d5903a0ee1d67ca7a548) ---- - hw/block/virtio-blk.c | 21 ++++++++++----------- - 1 file changed, 10 insertions(+), 11 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 0b9100b746..7fdeaf2d12 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -37,6 +37,8 @@ - #include "hw/virtio/virtio-blk-common.h" - #include "qemu/coroutine.h" - -+static void virtio_blk_ioeventfd_attach(VirtIOBlock *s); -+ - static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, - VirtIOBlockReq *req) - { -@@ -1808,17 +1810,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) - s->ioeventfd_started = true; - smp_wmb(); /* paired with aio_notify_accept() on the read side */ - -- /* Get this show started by hooking up our callbacks */ -- for (i = 0; i < nvqs; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- AioContext *ctx = s->vq_aio_context[i]; -- -- /* Kick right away to begin processing requests already in vring */ -- event_notifier_set(virtio_queue_get_host_notifier(vq)); -- -- if (!blk_in_drain(s->conf.conf.blk)) { -- virtio_queue_aio_attach_host_notifier(vq, ctx); -- } -+ /* -+ * Get this show started by hooking up our callbacks. If drained now, -+ * virtio_blk_drained_end() will do this later. -+ * Attaching the notifier also kicks the virtqueues, processing any requests -+ * they may already have. -+ */ -+ if (!blk_in_drain(s->conf.conf.blk)) { -+ virtio_blk_ioeventfd_attach(s); - } - return 0; - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch b/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch deleted file mode 100644 index 65a96a0..0000000 --- a/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch +++ /dev/null @@ -1,464 +0,0 @@ -From 733fc13f65286c849ad6618be89df450f8bc5f7e Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 20 Dec 2023 08:47:55 -0500 -Subject: [PATCH 09/22] virtio-blk: add iothread-vq-mapping parameter - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [5/17] c371fe62376c4eb54da88272a5966cec28404224 (stefanha/centos-stream-qemu-kvm) - -Add the iothread-vq-mapping parameter to assign virtqueues to IOThreads. -Store the vq:AioContext mapping in the new struct -VirtIOBlockDataPlane->vq_aio_context[] field and refactor the code to -use the per-vq AioContext instead of the BlockDriverState's AioContext. - -Reimplement --device virtio-blk-pci,iothread= and non-IOThread mode by -assigning all virtqueues to the IOThread and main loop's AioContext in -vq_aio_context[], respectively. - -The comment in struct VirtIOBlockDataPlane about EventNotifiers is -stale. Remove it. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20231220134755.814917-5-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit b6948ab01df068bef591868c22d1f873d2d05cde) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/dataplane/virtio-blk.c | 155 ++++++++++++++++++++++++-------- - hw/block/dataplane/virtio-blk.h | 3 + - hw/block/virtio-blk.c | 92 ++++++++++++++++--- - include/hw/virtio/virtio-blk.h | 2 + - 4 files changed, 202 insertions(+), 50 deletions(-) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 7bbbd981ad..6debd4401e 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -32,13 +32,11 @@ struct VirtIOBlockDataPlane { - VirtIOBlkConf *conf; - VirtIODevice *vdev; - -- /* Note that these EventNotifiers are assigned by value. This is -- * fine as long as you do not call event_notifier_cleanup on them -- * (because you don't own the file descriptor or handle; you just -- * use it). -+ /* -+ * The AioContext for each virtqueue. The BlockDriverState will use the -+ * first element as its AioContext. - */ -- IOThread *iothread; -- AioContext *ctx; -+ AioContext **vq_aio_context; - }; - - /* Raise an interrupt to signal guest, if necessary */ -@@ -47,6 +45,45 @@ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) - virtio_notify_irqfd(s->vdev, vq); - } - -+/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ -+static void -+apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, -+ AioContext **vq_aio_context, uint16_t num_queues) -+{ -+ IOThreadVirtQueueMappingList *node; -+ size_t num_iothreads = 0; -+ size_t cur_iothread = 0; -+ -+ for (node = iothread_vq_mapping_list; node; node = node->next) { -+ num_iothreads++; -+ } -+ -+ for (node = iothread_vq_mapping_list; node; node = node->next) { -+ IOThread *iothread = iothread_by_id(node->value->iothread); -+ AioContext *ctx = iothread_get_aio_context(iothread); -+ -+ /* Released in virtio_blk_data_plane_destroy() */ -+ object_ref(OBJECT(iothread)); -+ -+ if (node->value->vqs) { -+ uint16List *vq; -+ -+ /* Explicit vq:IOThread assignment */ -+ for (vq = node->value->vqs; vq; vq = vq->next) { -+ vq_aio_context[vq->value] = ctx; -+ } -+ } else { -+ /* Round-robin vq:IOThread assignment */ -+ for (unsigned i = cur_iothread; i < num_queues; -+ i += num_iothreads) { -+ vq_aio_context[i] = ctx; -+ } -+ } -+ -+ cur_iothread++; -+ } -+} -+ - /* Context: QEMU global mutex held */ - bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - VirtIOBlockDataPlane **dataplane, -@@ -58,7 +95,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - - *dataplane = NULL; - -- if (conf->iothread) { -+ if (conf->iothread || conf->iothread_vq_mapping_list) { - if (!k->set_guest_notifiers || !k->ioeventfd_assign) { - error_setg(errp, - "device is incompatible with iothread " -@@ -86,13 +123,24 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - s = g_new0(VirtIOBlockDataPlane, 1); - s->vdev = vdev; - s->conf = conf; -+ s->vq_aio_context = g_new(AioContext *, conf->num_queues); -+ -+ if (conf->iothread_vq_mapping_list) { -+ apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, -+ conf->num_queues); -+ } else if (conf->iothread) { -+ AioContext *ctx = iothread_get_aio_context(conf->iothread); -+ for (unsigned i = 0; i < conf->num_queues; i++) { -+ s->vq_aio_context[i] = ctx; -+ } - -- if (conf->iothread) { -- s->iothread = conf->iothread; -- object_ref(OBJECT(s->iothread)); -- s->ctx = iothread_get_aio_context(s->iothread); -+ /* Released in virtio_blk_data_plane_destroy() */ -+ object_ref(OBJECT(conf->iothread)); - } else { -- s->ctx = qemu_get_aio_context(); -+ AioContext *ctx = qemu_get_aio_context(); -+ for (unsigned i = 0; i < conf->num_queues; i++) { -+ s->vq_aio_context[i] = ctx; -+ } - } - - *dataplane = s; -@@ -104,6 +152,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) - { - VirtIOBlock *vblk; -+ VirtIOBlkConf *conf = s->conf; - - if (!s) { - return; -@@ -111,9 +160,21 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) - - vblk = VIRTIO_BLK(s->vdev); - assert(!vblk->dataplane_started); -- if (s->iothread) { -- object_unref(OBJECT(s->iothread)); -+ -+ if (conf->iothread_vq_mapping_list) { -+ IOThreadVirtQueueMappingList *node; -+ -+ for (node = conf->iothread_vq_mapping_list; node; node = node->next) { -+ IOThread *iothread = iothread_by_id(node->value->iothread); -+ object_unref(OBJECT(iothread)); -+ } -+ } -+ -+ if (conf->iothread) { -+ object_unref(OBJECT(conf->iothread)); - } -+ -+ g_free(s->vq_aio_context); - g_free(s); - } - -@@ -177,19 +238,13 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - - trace_virtio_blk_data_plane_start(s); - -- r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); -+ r = blk_set_aio_context(s->conf->conf.blk, s->vq_aio_context[0], -+ &local_err); - if (r < 0) { - error_report_err(local_err); - goto fail_aio_context; - } - -- /* Kick right away to begin processing requests already in vring */ -- for (i = 0; i < nvqs; i++) { -- VirtQueue *vq = virtio_get_queue(s->vdev, i); -- -- event_notifier_set(virtio_queue_get_host_notifier(vq)); -- } -- - /* - * These fields must be visible to the IOThread when it processes the - * virtqueue, otherwise it will think dataplane has not started yet. -@@ -206,8 +261,12 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - if (!blk_in_drain(s->conf->conf.blk)) { - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); -+ AioContext *ctx = s->vq_aio_context[i]; - -- virtio_queue_aio_attach_host_notifier(vq, s->ctx); -+ /* Kick right away to begin processing requests already in vring */ -+ event_notifier_set(virtio_queue_get_host_notifier(vq)); -+ -+ virtio_queue_aio_attach_host_notifier(vq, ctx); - } - } - return 0; -@@ -236,23 +295,18 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - * - * Context: BH in IOThread - */ --static void virtio_blk_data_plane_stop_bh(void *opaque) -+static void virtio_blk_data_plane_stop_vq_bh(void *opaque) - { -- VirtIOBlockDataPlane *s = opaque; -- unsigned i; -- -- for (i = 0; i < s->conf->num_queues; i++) { -- VirtQueue *vq = virtio_get_queue(s->vdev, i); -- EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); -+ VirtQueue *vq = opaque; -+ EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); - -- virtio_queue_aio_detach_host_notifier(vq, s->ctx); -+ virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); - -- /* -- * Test and clear notifier after disabling event, in case poll callback -- * didn't have time to run. -- */ -- virtio_queue_host_notifier_read(host_notifier); -- } -+ /* -+ * Test and clear notifier after disabling event, in case poll callback -+ * didn't have time to run. -+ */ -+ virtio_queue_host_notifier_read(host_notifier); - } - - /* Context: QEMU global mutex held */ -@@ -279,7 +333,12 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - trace_virtio_blk_data_plane_stop(s); - - if (!blk_in_drain(s->conf->conf.blk)) { -- aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); -+ for (i = 0; i < nvqs; i++) { -+ VirtQueue *vq = virtio_get_queue(s->vdev, i); -+ AioContext *ctx = s->vq_aio_context[i]; -+ -+ aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); -+ } - } - - /* -@@ -322,3 +381,23 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - - s->stopping = false; - } -+ -+void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s) -+{ -+ VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); -+ -+ for (uint16_t i = 0; i < s->conf->num_queues; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); -+ } -+} -+ -+void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s) -+{ -+ VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); -+ -+ for (uint16_t i = 0; i < s->conf->num_queues; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); -+ } -+} -diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h -index 5e18bb99ae..1a806fe447 100644 ---- a/hw/block/dataplane/virtio-blk.h -+++ b/hw/block/dataplane/virtio-blk.h -@@ -28,4 +28,7 @@ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); - int virtio_blk_data_plane_start(VirtIODevice *vdev); - void virtio_blk_data_plane_stop(VirtIODevice *vdev); - -+void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s); -+void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s); -+ - #endif /* HW_DATAPLANE_VIRTIO_BLK_H */ -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index ec9ed09a6a..46e73b2c96 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1151,6 +1151,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - return; - } - } -+ - virtio_blk_handle_vq(s, vq); - } - -@@ -1463,6 +1464,68 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, - return 0; - } - -+static bool -+validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, -+ uint16_t num_queues, Error **errp) -+{ -+ g_autofree unsigned long *vqs = bitmap_new(num_queues); -+ g_autoptr(GHashTable) iothreads = -+ g_hash_table_new(g_str_hash, g_str_equal); -+ -+ for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { -+ const char *name = node->value->iothread; -+ uint16List *vq; -+ -+ if (!iothread_by_id(name)) { -+ error_setg(errp, "IOThread \"%s\" object does not exist", name); -+ return false; -+ } -+ -+ if (!g_hash_table_add(iothreads, (gpointer)name)) { -+ error_setg(errp, -+ "duplicate IOThread name \"%s\" in iothread-vq-mapping", -+ name); -+ return false; -+ } -+ -+ if (node != list) { -+ if (!!node->value->vqs != !!list->value->vqs) { -+ error_setg(errp, "either all items in iothread-vq-mapping " -+ "must have vqs or none of them must have it"); -+ return false; -+ } -+ } -+ -+ for (vq = node->value->vqs; vq; vq = vq->next) { -+ if (vq->value >= num_queues) { -+ error_setg(errp, "vq index %u for IOThread \"%s\" must be " -+ "less than num_queues %u in iothread-vq-mapping", -+ vq->value, name, num_queues); -+ return false; -+ } -+ -+ if (test_and_set_bit(vq->value, vqs)) { -+ error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " -+ "because it is already assigned", vq->value, name); -+ return false; -+ } -+ } -+ } -+ -+ if (list->value->vqs) { -+ for (uint16_t i = 0; i < num_queues; i++) { -+ if (!test_bit(i, vqs)) { -+ error_setg(errp, -+ "missing vq %u IOThread assignment in iothread-vq-mapping", -+ i); -+ return false; -+ } -+ } -+ } -+ -+ return true; -+} -+ - static void virtio_resize_cb(void *opaque) - { - VirtIODevice *vdev = opaque; -@@ -1487,34 +1550,24 @@ static void virtio_blk_resize(void *opaque) - static void virtio_blk_drained_begin(void *opaque) - { - VirtIOBlock *s = opaque; -- VirtIODevice *vdev = VIRTIO_DEVICE(opaque); -- AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); - - if (!s->dataplane || !s->dataplane_started) { - return; - } - -- for (uint16_t i = 0; i < s->conf.num_queues; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- virtio_queue_aio_detach_host_notifier(vq, ctx); -- } -+ virtio_blk_data_plane_detach(s->dataplane); - } - - /* Resume virtqueue ioeventfd processing after drain */ - static void virtio_blk_drained_end(void *opaque) - { - VirtIOBlock *s = opaque; -- VirtIODevice *vdev = VIRTIO_DEVICE(opaque); -- AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); - - if (!s->dataplane || !s->dataplane_started) { - return; - } - -- for (uint16_t i = 0; i < s->conf.num_queues; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- virtio_queue_aio_attach_host_notifier(vq, ctx); -- } -+ virtio_blk_data_plane_attach(s->dataplane); - } - - static const BlockDevOps virtio_block_ops = { -@@ -1600,6 +1653,19 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - return; - } - -+ if (conf->iothread_vq_mapping_list) { -+ if (conf->iothread) { -+ error_setg(errp, "iothread and iothread-vq-mapping properties " -+ "cannot be set at the same time"); -+ return; -+ } -+ -+ if (!validate_iothread_vq_mapping_list(conf->iothread_vq_mapping_list, -+ conf->num_queues, errp)) { -+ return; -+ } -+ } -+ - s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, - s->host_features); - virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); -@@ -1702,6 +1768,8 @@ static Property virtio_blk_properties[] = { - DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), - DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, - IOThread *), -+ DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOBlock, -+ conf.iothread_vq_mapping_list), - DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, - VIRTIO_BLK_F_DISCARD, true), - DEFINE_PROP_BOOL("report-discard-granularity", VirtIOBlock, -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index 9881009c22..5e4091e4da 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -21,6 +21,7 @@ - #include "sysemu/block-backend.h" - #include "sysemu/block-ram-registrar.h" - #include "qom/object.h" -+#include "qapi/qapi-types-virtio.h" - - #define TYPE_VIRTIO_BLK "virtio-blk-device" - OBJECT_DECLARE_SIMPLE_TYPE(VirtIOBlock, VIRTIO_BLK) -@@ -37,6 +38,7 @@ struct VirtIOBlkConf - { - BlockConf conf; - IOThread *iothread; -+ IOThreadVirtQueueMappingList *iothread_vq_mapping_list; - char *serial; - uint32_t request_merging; - uint16_t num_queues; --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch b/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch deleted file mode 100644 index 31e83a2..0000000 --- a/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch +++ /dev/null @@ -1,177 +0,0 @@ -From d54e88103aa76f3bf755b3f4308d8ab60367c6ef Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 14 Sep 2023 10:00:59 -0400 -Subject: [PATCH 074/101] virtio-blk: add lock to protect s->rq - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [5/26] 17dcd5ba18c03e5633a014d8d62d34d8dd7b43bf (kmwolf/centos-qemu-kvm) - -s->rq is accessed from IO_CODE and GLOBAL_STATE_CODE. Introduce a lock -to protect s->rq and eliminate reliance on the AioContext lock. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20230914140101.1065008-3-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf ---- - hw/block/virtio-blk.c | 67 +++++++++++++++++++++++----------- - include/hw/virtio/virtio-blk.h | 3 +- - 2 files changed, 47 insertions(+), 23 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index a1f8e15522..ee38e089bc 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -82,8 +82,11 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, - /* Break the link as the next request is going to be parsed from the - * ring again. Otherwise we may end up doing a double completion! */ - req->mr_next = NULL; -- req->next = s->rq; -- s->rq = req; -+ -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ req->next = s->rq; -+ s->rq = req; -+ } - } else if (action == BLOCK_ERROR_ACTION_REPORT) { - virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); - if (acct_failed) { -@@ -1183,10 +1186,13 @@ static void virtio_blk_dma_restart_bh(void *opaque) - { - VirtIOBlock *s = opaque; - -- VirtIOBlockReq *req = s->rq; -+ VirtIOBlockReq *req; - MultiReqBuffer mrb = {}; - -- s->rq = NULL; -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ req = s->rq; -+ s->rq = NULL; -+ } - - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - while (req) { -@@ -1238,22 +1244,29 @@ static void virtio_blk_reset(VirtIODevice *vdev) - AioContext *ctx; - VirtIOBlockReq *req; - -+ /* Dataplane has stopped... */ -+ assert(!s->dataplane_started); -+ -+ /* ...but requests may still be in flight. */ - ctx = blk_get_aio_context(s->blk); - aio_context_acquire(ctx); - blk_drain(s->blk); -+ aio_context_release(ctx); - - /* We drop queued requests after blk_drain() because blk_drain() itself can - * produce them. */ -- while (s->rq) { -- req = s->rq; -- s->rq = req->next; -- virtqueue_detach_element(req->vq, &req->elem, 0); -- virtio_blk_free_request(req); -- } -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ while (s->rq) { -+ req = s->rq; -+ s->rq = req->next; - -- aio_context_release(ctx); -+ /* No other threads can access req->vq here */ -+ virtqueue_detach_element(req->vq, &req->elem, 0); -+ -+ virtio_blk_free_request(req); -+ } -+ } - -- assert(!s->dataplane_started); - blk_set_enable_write_cache(s->blk, s->original_wce); - } - -@@ -1443,18 +1456,22 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) - static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) - { - VirtIOBlock *s = VIRTIO_BLK(vdev); -- VirtIOBlockReq *req = s->rq; - -- while (req) { -- qemu_put_sbyte(f, 1); -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ VirtIOBlockReq *req = s->rq; - -- if (s->conf.num_queues > 1) { -- qemu_put_be32(f, virtio_get_queue_index(req->vq)); -- } -+ while (req) { -+ qemu_put_sbyte(f, 1); - -- qemu_put_virtqueue_element(vdev, f, &req->elem); -- req = req->next; -+ if (s->conf.num_queues > 1) { -+ qemu_put_be32(f, virtio_get_queue_index(req->vq)); -+ } -+ -+ qemu_put_virtqueue_element(vdev, f, &req->elem); -+ req = req->next; -+ } - } -+ - qemu_put_sbyte(f, 0); - } - -@@ -1480,8 +1497,11 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, - - req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); - virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req); -- req->next = s->rq; -- s->rq = req; -+ -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ req->next = s->rq; -+ s->rq = req; -+ } - } - - return 0; -@@ -1628,6 +1648,8 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - s->host_features); - virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); - -+ qemu_mutex_init(&s->rq_lock); -+ - s->blk = conf->conf.blk; - s->rq = NULL; - s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; -@@ -1679,6 +1701,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) - virtio_del_queue(vdev, i); - } - qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); -+ qemu_mutex_destroy(&s->rq_lock); - blk_ram_registrar_destroy(&s->blk_ram_registrar); - qemu_del_vm_change_state_handler(s->change); - blockdev_mark_auto_del(s->blk); -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index dafec432ce..9881009c22 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -54,7 +54,8 @@ struct VirtIOBlockReq; - struct VirtIOBlock { - VirtIODevice parent_obj; - BlockBackend *blk; -- void *rq; -+ QemuMutex rq_lock; -+ void *rq; /* protected by rq_lock */ - VirtIOBlkConf conf; - unsigned short sector_mask; - bool original_wce; --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch b/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch deleted file mode 100644 index a7b518d..0000000 --- a/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 22730552442003e81c8c508c3e7ebacf647e4e75 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:48 -0500 -Subject: [PATCH 19/22] virtio-blk: always set ioeventfd during startup - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [15/17] 5f7142aeaa54fda41bd5c4fd3222fd8e3e18f370 (stefanha/centos-stream-qemu-kvm) - -When starting ioeventfd it is common practice to set the event notifier -so that the ioeventfd handler is triggered to run immediately. There may -be no requests waiting to be processed, but the idea is that if a -request snuck in then we guarantee that it will be detected. - -One scenario where self-triggering the ioeventfd is necessary is when -virtio_blk_handle_output() is called from a vCPU thread before the -VIRTIO Device Status transitions to DRIVER_OK. In that case we need to -self-trigger the ioeventfd so that the kick handled by the vCPU thread -causes the vq AioContext thread to take over handling the request(s). - -Fixes: b6948ab01df0 ("virtio-blk: add iothread-vq-mapping parameter") -Reported-by: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-7-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit d3f6f294aeadd5f88caf0155e4360808c95b3146) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 81de06c9f6..0b9100b746 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1809,14 +1809,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) - smp_wmb(); /* paired with aio_notify_accept() on the read side */ - - /* Get this show started by hooking up our callbacks */ -- if (!blk_in_drain(s->conf.conf.blk)) { -- for (i = 0; i < nvqs; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- AioContext *ctx = s->vq_aio_context[i]; -+ for (i = 0; i < nvqs; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ AioContext *ctx = s->vq_aio_context[i]; - -- /* Kick right away to begin processing requests already in vring */ -- event_notifier_set(virtio_queue_get_host_notifier(vq)); -+ /* Kick right away to begin processing requests already in vring */ -+ event_notifier_set(virtio_queue_get_host_notifier(vq)); - -+ if (!blk_in_drain(s->conf.conf.blk)) { - virtio_queue_aio_attach_host_notifier(vq, ctx); - } - } --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch b/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch deleted file mode 100644 index 8d93bf6..0000000 --- a/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch +++ /dev/null @@ -1,72 +0,0 @@ -From f62b56c68d50a149a07e15797bf3605e63b2c501 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 22 Jan 2024 12:26:25 -0500 -Subject: [PATCH 4/6] virtio-blk: avoid using ioeventfd state in irqfd - conditional - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 224: virtio-blk: avoid using ioeventfd state in irqfd conditional -RH-Jira: RHEL-15394 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [1/1] 8f24084669db52457e55e2523b9f56f5560dd6ce (stefanha/centos-stream-qemu-kvm) - -Requests that complete in an IOThread use irqfd to notify the guest -while requests that complete in the main loop thread use the traditional -qdev irq code path. The reason for this conditional is that the irq code -path requires the BQL: - - if (s->ioeventfd_started && !s->ioeventfd_disabled) { - virtio_notify_irqfd(vdev, req->vq); - } else { - virtio_notify(vdev, req->vq); - } - -There is a corner case where the conditional invokes the irq code path -instead of the irqfd code path: - - static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) - { - ... - /* - * Set ->ioeventfd_started to false before draining so that host notifiers - * are not detached/attached anymore. - */ - s->ioeventfd_started = false; - - /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ - blk_drain(s->conf.conf.blk); - -During blk_drain() the conditional produces the wrong result because -ioeventfd_started is false. - -Use qemu_in_iothread() instead of checking the ioeventfd state. - -Cc: qemu-stable@nongnu.org -Buglink: https://issues.redhat.com/browse/RHEL-15394 -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240122172625.415386-1-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit bfa36802d1704fc413c590ebdcc4e5ae0eacf439) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 7fdeaf2d12..2ae2f6a823 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -66,7 +66,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) - iov_discard_undo(&req->inhdr_undo); - iov_discard_undo(&req->outhdr_undo); - virtqueue_push(req->vq, &req->elem, req->in_len); -- if (s->ioeventfd_started && !s->ioeventfd_disabled) { -+ if (qemu_in_iothread()) { - virtio_notify_irqfd(vdev, req->vq); - } else { - virtio_notify(vdev, req->vq); --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch deleted file mode 100644 index be3c7db..0000000 --- a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch +++ /dev/null @@ -1,167 +0,0 @@ -From a2069ff76637365cacf5b96f9427b98a6ca2c9ba Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 14 Sep 2023 10:01:00 -0400 -Subject: [PATCH 075/101] virtio-blk: don't lock AioContext in the completion - code path - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [6/26] 3426f62c2156f6967bb4ffbce75a4ff46d3312a3 (kmwolf/centos-qemu-kvm) - -Nothing in the completion code path relies on the AioContext lock -anymore. Virtqueues are only accessed from one thread at any moment and -the s->rq global state is protected by its own lock now. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20230914140101.1065008-4-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf ---- - hw/block/virtio-blk.c | 34 ++++------------------------------ - 1 file changed, 4 insertions(+), 30 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index ee38e089bc..f5315df042 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -105,7 +105,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) - VirtIOBlock *s = next->dev; - VirtIODevice *vdev = VIRTIO_DEVICE(s); - -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - while (next) { - VirtIOBlockReq *req = next; - next = req->mr_next; -@@ -138,7 +137,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) - block_acct_done(blk_get_stats(s->blk), &req->acct); - virtio_blk_free_request(req); - } -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - - static void virtio_blk_flush_complete(void *opaque, int ret) -@@ -146,19 +144,13 @@ static void virtio_blk_flush_complete(void *opaque, int ret) - VirtIOBlockReq *req = opaque; - VirtIOBlock *s = req->dev; - -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); -- if (ret) { -- if (virtio_blk_handle_rw_error(req, -ret, 0, true)) { -- goto out; -- } -+ if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) { -+ return; - } - - virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); - block_acct_done(blk_get_stats(s->blk), &req->acct); - virtio_blk_free_request(req); -- --out: -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - - static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) -@@ -168,11 +160,8 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) - bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & - ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; - -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); -- if (ret) { -- if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { -- goto out; -- } -+ if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { -+ return; - } - - virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); -@@ -180,9 +169,6 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) - block_acct_done(blk_get_stats(s->blk), &req->acct); - } - virtio_blk_free_request(req); -- --out: -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - - #ifdef __linux__ -@@ -229,10 +215,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int status) - virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); - - out: -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - virtio_blk_req_complete(req, status); - virtio_blk_free_request(req); -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - g_free(ioctl_req); - } - -@@ -672,7 +656,6 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) - { - ZoneCmdData *data = opaque; - VirtIOBlockReq *req = data->req; -- VirtIOBlock *s = req->dev; - VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); - struct iovec *in_iov = data->in_iov; - unsigned in_num = data->in_num; -@@ -763,10 +746,8 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) - } - - out: -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - virtio_blk_req_complete(req, err_status); - virtio_blk_free_request(req); -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - g_free(data->zone_report_data.zones); - g_free(data); - } -@@ -829,10 +810,8 @@ static void virtio_blk_zone_mgmt_complete(void *opaque, int ret) - err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; - } - -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - virtio_blk_req_complete(req, err_status); - virtio_blk_free_request(req); -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - - static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op) -@@ -882,7 +861,6 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret) - { - ZoneCmdData *data = opaque; - VirtIOBlockReq *req = data->req; -- VirtIOBlock *s = req->dev; - VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); - int64_t append_sector, n; - uint8_t err_status = VIRTIO_BLK_S_OK; -@@ -905,10 +883,8 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret) - trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret); - - out: -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - virtio_blk_req_complete(req, err_status); - virtio_blk_free_request(req); -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - g_free(data); - } - -@@ -944,10 +920,8 @@ static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, - return 0; - - out: -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - virtio_blk_req_complete(req, err_status); - virtio_blk_free_request(req); -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - return err_status; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch deleted file mode 100644 index c31fcca..0000000 --- a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 2816f6ce20c496e21947f215112be34a5cb93606 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 14 Sep 2023 10:01:01 -0400 -Subject: [PATCH 076/101] virtio-blk: don't lock AioContext in the submission - code path - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [7/26] e0de2744cb319569ea008334e45ee5fc2ba9b6d7 (kmwolf/centos-qemu-kvm) - -There is no need to acquire the AioContext lock around blk_aio_*() or -blk_get_geometry() anymore. I/O plugging (defer_call()) also does not -require the AioContext lock anymore. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20230914140101.1065008-5-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf ---- - hw/block/virtio-blk.c | 5 ----- - 1 file changed, 5 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index f5315df042..e110f9718b 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1111,7 +1111,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - MultiReqBuffer mrb = {}; - bool suppress_notifications = virtio_queue_get_notification(vq); - -- aio_context_acquire(blk_get_aio_context(s->blk)); - defer_call_begin(); - - do { -@@ -1137,7 +1136,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - } - - defer_call_end(); -- aio_context_release(blk_get_aio_context(s->blk)); - } - - static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) -@@ -1168,7 +1166,6 @@ static void virtio_blk_dma_restart_bh(void *opaque) - s->rq = NULL; - } - -- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); - while (req) { - VirtIOBlockReq *next = req->next; - if (virtio_blk_handle_request(req, &mrb)) { -@@ -1192,8 +1189,6 @@ static void virtio_blk_dma_restart_bh(void *opaque) - - /* Paired with inc in virtio_blk_dma_restart_cb() */ - blk_dec_in_flight(s->conf.conf.blk); -- -- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - - static void virtio_blk_dma_restart_cb(void *opaque, bool running, --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch b/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch deleted file mode 100644 index 3fb8211..0000000 --- a/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch +++ /dev/null @@ -1,1009 +0,0 @@ -From d9be1e1f199ee3171455636f32f3ba59b57e9351 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:43 -0500 -Subject: [PATCH 14/22] virtio-blk: move dataplane code into virtio-blk.c - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [10/17] ad854c6c7e808da272bd07229e8c915c1ee6f296 (stefanha/centos-stream-qemu-kvm) - -The dataplane code used to be significantly different from the -non-dataplane code and therefore had a separate source file. - -Over time the difference has gotten smaller because the I/O code paths -were unified. Nowadays the distinction between the VirtIOBlock and -VirtIOBlockDataPlane structs is more of an inconvenience that hinders -code simplification. - -Move hw/block/dataplane/virtio-blk.c into hw/block/virtio-blk.c, merging -VirtIOBlockDataPlane's fields into VirtIOBlock. - -hw/block/virtio-blk.c used VirtIOBlock->dataplane to check if -virtio_blk_data_plane_create() was successful. This is not necessary -because ->dataplane_started and ->dataplane_disabled can be used -instead. This patch makes those changes in order to drop -VirtIOBlock->dataplane. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-2-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 3bcc17f06526754fd675dcf601414442044fa0b6) -Signed-off-by: Stefan Hajnoczi - -Conflicts: - hw/block/dataplane/virtio-blk.c - Downstream is missing commit 0b2675c473f6 ("Rename "QEMU global mutex" - to "BQL" in comments and docs") so the source file still contains old - "QEMU global mutex held" comments instead of the new "BQL held" - phrasing. The code moved into hw/block/virtio-blk.c by this patch uses - the new "BQL held" phrasing so to minimize conflicts in future - backports. Either way, this is not a code change and therefore no risk - in introducing bugs. ---- - hw/block/dataplane/meson.build | 1 - - hw/block/dataplane/trace-events | 5 - - hw/block/dataplane/trace.h | 1 - - hw/block/dataplane/virtio-blk.c | 404 -------------------------------- - hw/block/dataplane/virtio-blk.h | 34 --- - hw/block/virtio-blk.c | 362 ++++++++++++++++++++++++++-- - include/hw/virtio/virtio-blk.h | 12 +- - meson.build | 1 - - 8 files changed, 357 insertions(+), 463 deletions(-) - delete mode 100644 hw/block/dataplane/trace-events - delete mode 100644 hw/block/dataplane/trace.h - delete mode 100644 hw/block/dataplane/virtio-blk.c - delete mode 100644 hw/block/dataplane/virtio-blk.h - -diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build -index 025b3b061b..11a5eba2f4 100644 ---- a/hw/block/dataplane/meson.build -+++ b/hw/block/dataplane/meson.build -@@ -1,2 +1 @@ --system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) - specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) -diff --git a/hw/block/dataplane/trace-events b/hw/block/dataplane/trace-events -deleted file mode 100644 -index 38fc3e7507..0000000000 ---- a/hw/block/dataplane/trace-events -+++ /dev/null -@@ -1,5 +0,0 @@ --# See docs/devel/tracing.rst for syntax documentation. -- --# virtio-blk.c --virtio_blk_data_plane_start(void *s) "dataplane %p" --virtio_blk_data_plane_stop(void *s) "dataplane %p" -diff --git a/hw/block/dataplane/trace.h b/hw/block/dataplane/trace.h -deleted file mode 100644 -index 240cc59834..0000000000 ---- a/hw/block/dataplane/trace.h -+++ /dev/null -@@ -1 +0,0 @@ --#include "trace/trace-hw_block_dataplane.h" -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -deleted file mode 100644 -index 97a302cf49..0000000000 ---- a/hw/block/dataplane/virtio-blk.c -+++ /dev/null -@@ -1,404 +0,0 @@ --/* -- * Dedicated thread for virtio-blk I/O processing -- * -- * Copyright 2012 IBM, Corp. -- * Copyright 2012 Red Hat, Inc. and/or its affiliates -- * -- * Authors: -- * Stefan Hajnoczi -- * -- * This work is licensed under the terms of the GNU GPL, version 2 or later. -- * See the COPYING file in the top-level directory. -- * -- */ -- --#include "qemu/osdep.h" --#include "qapi/error.h" --#include "trace.h" --#include "qemu/iov.h" --#include "qemu/main-loop.h" --#include "qemu/thread.h" --#include "qemu/error-report.h" --#include "hw/virtio/virtio-blk.h" --#include "virtio-blk.h" --#include "block/aio.h" --#include "hw/virtio/virtio-bus.h" --#include "qom/object_interfaces.h" -- --struct VirtIOBlockDataPlane { -- bool starting; -- bool stopping; -- -- VirtIOBlkConf *conf; -- VirtIODevice *vdev; -- -- /* -- * The AioContext for each virtqueue. The BlockDriverState will use the -- * first element as its AioContext. -- */ -- AioContext **vq_aio_context; --}; -- --/* Raise an interrupt to signal guest, if necessary */ --void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) --{ -- virtio_notify_irqfd(s->vdev, vq); --} -- --/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ --static void --apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, -- AioContext **vq_aio_context, uint16_t num_queues) --{ -- IOThreadVirtQueueMappingList *node; -- size_t num_iothreads = 0; -- size_t cur_iothread = 0; -- -- for (node = iothread_vq_mapping_list; node; node = node->next) { -- num_iothreads++; -- } -- -- for (node = iothread_vq_mapping_list; node; node = node->next) { -- IOThread *iothread = iothread_by_id(node->value->iothread); -- AioContext *ctx = iothread_get_aio_context(iothread); -- -- /* Released in virtio_blk_data_plane_destroy() */ -- object_ref(OBJECT(iothread)); -- -- if (node->value->vqs) { -- uint16List *vq; -- -- /* Explicit vq:IOThread assignment */ -- for (vq = node->value->vqs; vq; vq = vq->next) { -- vq_aio_context[vq->value] = ctx; -- } -- } else { -- /* Round-robin vq:IOThread assignment */ -- for (unsigned i = cur_iothread; i < num_queues; -- i += num_iothreads) { -- vq_aio_context[i] = ctx; -- } -- } -- -- cur_iothread++; -- } --} -- --/* Context: QEMU global mutex held */ --bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, -- VirtIOBlockDataPlane **dataplane, -- Error **errp) --{ -- VirtIOBlockDataPlane *s; -- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -- -- *dataplane = NULL; -- -- if (conf->iothread || conf->iothread_vq_mapping_list) { -- if (!k->set_guest_notifiers || !k->ioeventfd_assign) { -- error_setg(errp, -- "device is incompatible with iothread " -- "(transport does not support notifiers)"); -- return false; -- } -- if (!virtio_device_ioeventfd_enabled(vdev)) { -- error_setg(errp, "ioeventfd is required for iothread"); -- return false; -- } -- -- /* If dataplane is (re-)enabled while the guest is running there could -- * be block jobs that can conflict. -- */ -- if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { -- error_prepend(errp, "cannot start virtio-blk dataplane: "); -- return false; -- } -- } -- /* Don't try if transport does not support notifiers. */ -- if (!virtio_device_ioeventfd_enabled(vdev)) { -- return false; -- } -- -- s = g_new0(VirtIOBlockDataPlane, 1); -- s->vdev = vdev; -- s->conf = conf; -- s->vq_aio_context = g_new(AioContext *, conf->num_queues); -- -- if (conf->iothread_vq_mapping_list) { -- apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, -- conf->num_queues); -- } else if (conf->iothread) { -- AioContext *ctx = iothread_get_aio_context(conf->iothread); -- for (unsigned i = 0; i < conf->num_queues; i++) { -- s->vq_aio_context[i] = ctx; -- } -- -- /* Released in virtio_blk_data_plane_destroy() */ -- object_ref(OBJECT(conf->iothread)); -- } else { -- AioContext *ctx = qemu_get_aio_context(); -- for (unsigned i = 0; i < conf->num_queues; i++) { -- s->vq_aio_context[i] = ctx; -- } -- } -- -- *dataplane = s; -- -- return true; --} -- --/* Context: QEMU global mutex held */ --void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) --{ -- VirtIOBlock *vblk; -- VirtIOBlkConf *conf; -- -- if (!s) { -- return; -- } -- -- vblk = VIRTIO_BLK(s->vdev); -- assert(!vblk->dataplane_started); -- conf = s->conf; -- -- if (conf->iothread_vq_mapping_list) { -- IOThreadVirtQueueMappingList *node; -- -- for (node = conf->iothread_vq_mapping_list; node; node = node->next) { -- IOThread *iothread = iothread_by_id(node->value->iothread); -- object_unref(OBJECT(iothread)); -- } -- } -- -- if (conf->iothread) { -- object_unref(OBJECT(conf->iothread)); -- } -- -- g_free(s->vq_aio_context); -- g_free(s); --} -- --/* Context: QEMU global mutex held */ --int virtio_blk_data_plane_start(VirtIODevice *vdev) --{ -- VirtIOBlock *vblk = VIRTIO_BLK(vdev); -- VirtIOBlockDataPlane *s = vblk->dataplane; -- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); -- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -- unsigned i; -- unsigned nvqs = s->conf->num_queues; -- Error *local_err = NULL; -- int r; -- -- if (vblk->dataplane_started || s->starting) { -- return 0; -- } -- -- s->starting = true; -- -- /* Set up guest notifier (irq) */ -- r = k->set_guest_notifiers(qbus->parent, nvqs, true); -- if (r != 0) { -- error_report("virtio-blk failed to set guest notifier (%d), " -- "ensure -accel kvm is set.", r); -- goto fail_guest_notifiers; -- } -- -- /* -- * Batch all the host notifiers in a single transaction to avoid -- * quadratic time complexity in address_space_update_ioeventfds(). -- */ -- memory_region_transaction_begin(); -- -- /* Set up virtqueue notify */ -- for (i = 0; i < nvqs; i++) { -- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); -- if (r != 0) { -- int j = i; -- -- fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); -- while (i--) { -- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -- } -- -- /* -- * The transaction expects the ioeventfds to be open when it -- * commits. Do it now, before the cleanup loop. -- */ -- memory_region_transaction_commit(); -- -- while (j--) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); -- } -- goto fail_host_notifiers; -- } -- } -- -- memory_region_transaction_commit(); -- -- trace_virtio_blk_data_plane_start(s); -- -- r = blk_set_aio_context(s->conf->conf.blk, s->vq_aio_context[0], -- &local_err); -- if (r < 0) { -- error_report_err(local_err); -- goto fail_aio_context; -- } -- -- /* -- * These fields must be visible to the IOThread when it processes the -- * virtqueue, otherwise it will think dataplane has not started yet. -- * -- * Make sure ->dataplane_started is false when blk_set_aio_context() is -- * called above so that draining does not cause the host notifier to be -- * detached/attached prematurely. -- */ -- s->starting = false; -- vblk->dataplane_started = true; -- smp_wmb(); /* paired with aio_notify_accept() on the read side */ -- -- /* Get this show started by hooking up our callbacks */ -- if (!blk_in_drain(s->conf->conf.blk)) { -- for (i = 0; i < nvqs; i++) { -- VirtQueue *vq = virtio_get_queue(s->vdev, i); -- AioContext *ctx = s->vq_aio_context[i]; -- -- /* Kick right away to begin processing requests already in vring */ -- event_notifier_set(virtio_queue_get_host_notifier(vq)); -- -- virtio_queue_aio_attach_host_notifier(vq, ctx); -- } -- } -- return 0; -- -- fail_aio_context: -- memory_region_transaction_begin(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -- } -- -- memory_region_transaction_commit(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -- } -- fail_host_notifiers: -- k->set_guest_notifiers(qbus->parent, nvqs, false); -- fail_guest_notifiers: -- vblk->dataplane_disabled = true; -- s->starting = false; -- return -ENOSYS; --} -- --/* Stop notifications for new requests from guest. -- * -- * Context: BH in IOThread -- */ --static void virtio_blk_data_plane_stop_vq_bh(void *opaque) --{ -- VirtQueue *vq = opaque; -- EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); -- -- virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); -- -- /* -- * Test and clear notifier after disabling event, in case poll callback -- * didn't have time to run. -- */ -- virtio_queue_host_notifier_read(host_notifier); --} -- --/* Context: QEMU global mutex held */ --void virtio_blk_data_plane_stop(VirtIODevice *vdev) --{ -- VirtIOBlock *vblk = VIRTIO_BLK(vdev); -- VirtIOBlockDataPlane *s = vblk->dataplane; -- BusState *qbus = qdev_get_parent_bus(DEVICE(vblk)); -- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -- unsigned i; -- unsigned nvqs = s->conf->num_queues; -- -- if (!vblk->dataplane_started || s->stopping) { -- return; -- } -- -- /* Better luck next time. */ -- if (vblk->dataplane_disabled) { -- vblk->dataplane_disabled = false; -- vblk->dataplane_started = false; -- return; -- } -- s->stopping = true; -- trace_virtio_blk_data_plane_stop(s); -- -- if (!blk_in_drain(s->conf->conf.blk)) { -- for (i = 0; i < nvqs; i++) { -- VirtQueue *vq = virtio_get_queue(s->vdev, i); -- AioContext *ctx = s->vq_aio_context[i]; -- -- aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); -- } -- } -- -- /* -- * Batch all the host notifiers in a single transaction to avoid -- * quadratic time complexity in address_space_update_ioeventfds(). -- */ -- memory_region_transaction_begin(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -- } -- -- /* -- * The transaction expects the ioeventfds to be open when it -- * commits. Do it now, before the cleanup loop. -- */ -- memory_region_transaction_commit(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -- } -- -- /* -- * Set ->dataplane_started to false before draining so that host notifiers -- * are not detached/attached anymore. -- */ -- vblk->dataplane_started = false; -- -- /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ -- blk_drain(s->conf->conf.blk); -- -- /* -- * Try to switch bs back to the QEMU main loop. If other users keep the -- * BlockBackend in the iothread, that's ok -- */ -- blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); -- -- /* Clean up guest notifier (irq) */ -- k->set_guest_notifiers(qbus->parent, nvqs, false); -- -- s->stopping = false; --} -- --void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s) --{ -- VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); -- -- for (uint16_t i = 0; i < s->conf->num_queues; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); -- } --} -- --void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s) --{ -- VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); -- -- for (uint16_t i = 0; i < s->conf->num_queues; i++) { -- VirtQueue *vq = virtio_get_queue(vdev, i); -- virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); -- } --} -diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h -deleted file mode 100644 -index 1a806fe447..0000000000 ---- a/hw/block/dataplane/virtio-blk.h -+++ /dev/null -@@ -1,34 +0,0 @@ --/* -- * Dedicated thread for virtio-blk I/O processing -- * -- * Copyright 2012 IBM, Corp. -- * Copyright 2012 Red Hat, Inc. and/or its affiliates -- * -- * Authors: -- * Stefan Hajnoczi -- * -- * This work is licensed under the terms of the GNU GPL, version 2 or later. -- * See the COPYING file in the top-level directory. -- * -- */ -- --#ifndef HW_DATAPLANE_VIRTIO_BLK_H --#define HW_DATAPLANE_VIRTIO_BLK_H -- --#include "hw/virtio/virtio.h" -- --typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane; -- --bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, -- VirtIOBlockDataPlane **dataplane, -- Error **errp); --void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s); --void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); -- --int virtio_blk_data_plane_start(VirtIODevice *vdev); --void virtio_blk_data_plane_stop(VirtIODevice *vdev); -- --void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s); --void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s); -- --#endif /* HW_DATAPLANE_VIRTIO_BLK_H */ -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 46e73b2c96..cb623069f8 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -27,7 +27,6 @@ - #include "sysemu/sysemu.h" - #include "sysemu/runstate.h" - #include "hw/virtio/virtio-blk.h" --#include "dataplane/virtio-blk.h" - #include "scsi/constants.h" - #ifdef __linux__ - # include -@@ -66,7 +65,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) - iov_discard_undo(&req->outhdr_undo); - virtqueue_push(req->vq, &req->elem, req->in_len); - if (s->dataplane_started && !s->dataplane_disabled) { -- virtio_blk_data_plane_notify(s->dataplane, req->vq); -+ virtio_notify_irqfd(vdev, req->vq); - } else { - virtio_notify(vdev, req->vq); - } -@@ -1142,7 +1141,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - { - VirtIOBlock *s = (VirtIOBlock *)vdev; - -- if (s->dataplane && !s->dataplane_started) { -+ if (!s->dataplane_disabled && !s->dataplane_started) { - /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start - * dataplane here instead of waiting for .set_status(). - */ -@@ -1546,16 +1545,34 @@ static void virtio_blk_resize(void *opaque) - aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); - } - -+static void virtio_blk_data_plane_detach(VirtIOBlock *s) -+{ -+ VirtIODevice *vdev = VIRTIO_DEVICE(s); -+ -+ for (uint16_t i = 0; i < s->conf.num_queues; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); -+ } -+} -+ -+static void virtio_blk_data_plane_attach(VirtIOBlock *s) -+{ -+ VirtIODevice *vdev = VIRTIO_DEVICE(s); -+ -+ for (uint16_t i = 0; i < s->conf.num_queues; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); -+ } -+} -+ - /* Suspend virtqueue ioeventfd processing during drain */ - static void virtio_blk_drained_begin(void *opaque) - { - VirtIOBlock *s = opaque; - -- if (!s->dataplane || !s->dataplane_started) { -- return; -+ if (s->dataplane_started) { -+ virtio_blk_data_plane_detach(s); - } -- -- virtio_blk_data_plane_detach(s->dataplane); - } - - /* Resume virtqueue ioeventfd processing after drain */ -@@ -1563,11 +1580,9 @@ static void virtio_blk_drained_end(void *opaque) - { - VirtIOBlock *s = opaque; - -- if (!s->dataplane || !s->dataplane_started) { -- return; -+ if (s->dataplane_started) { -+ virtio_blk_data_plane_attach(s); - } -- -- virtio_blk_data_plane_attach(s->dataplane); - } - - static const BlockDevOps virtio_block_ops = { -@@ -1576,6 +1591,326 @@ static const BlockDevOps virtio_block_ops = { - .drained_end = virtio_blk_drained_end, - }; - -+/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ -+static void -+apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, -+ AioContext **vq_aio_context, uint16_t num_queues) -+{ -+ IOThreadVirtQueueMappingList *node; -+ size_t num_iothreads = 0; -+ size_t cur_iothread = 0; -+ -+ for (node = iothread_vq_mapping_list; node; node = node->next) { -+ num_iothreads++; -+ } -+ -+ for (node = iothread_vq_mapping_list; node; node = node->next) { -+ IOThread *iothread = iothread_by_id(node->value->iothread); -+ AioContext *ctx = iothread_get_aio_context(iothread); -+ -+ /* Released in virtio_blk_data_plane_destroy() */ -+ object_ref(OBJECT(iothread)); -+ -+ if (node->value->vqs) { -+ uint16List *vq; -+ -+ /* Explicit vq:IOThread assignment */ -+ for (vq = node->value->vqs; vq; vq = vq->next) { -+ vq_aio_context[vq->value] = ctx; -+ } -+ } else { -+ /* Round-robin vq:IOThread assignment */ -+ for (unsigned i = cur_iothread; i < num_queues; -+ i += num_iothreads) { -+ vq_aio_context[i] = ctx; -+ } -+ } -+ -+ cur_iothread++; -+ } -+} -+ -+/* Context: BQL held */ -+static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) -+{ -+ VirtIODevice *vdev = VIRTIO_DEVICE(s); -+ VirtIOBlkConf *conf = &s->conf; -+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -+ -+ if (conf->iothread || conf->iothread_vq_mapping_list) { -+ if (!k->set_guest_notifiers || !k->ioeventfd_assign) { -+ error_setg(errp, -+ "device is incompatible with iothread " -+ "(transport does not support notifiers)"); -+ return false; -+ } -+ if (!virtio_device_ioeventfd_enabled(vdev)) { -+ error_setg(errp, "ioeventfd is required for iothread"); -+ return false; -+ } -+ -+ /* -+ * If dataplane is (re-)enabled while the guest is running there could -+ * be block jobs that can conflict. -+ */ -+ if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { -+ error_prepend(errp, "cannot start virtio-blk dataplane: "); -+ return false; -+ } -+ } -+ /* Don't try if transport does not support notifiers. */ -+ if (!virtio_device_ioeventfd_enabled(vdev)) { -+ s->dataplane_disabled = true; -+ return false; -+ } -+ -+ s->vq_aio_context = g_new(AioContext *, conf->num_queues); -+ -+ if (conf->iothread_vq_mapping_list) { -+ apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, -+ conf->num_queues); -+ } else if (conf->iothread) { -+ AioContext *ctx = iothread_get_aio_context(conf->iothread); -+ for (unsigned i = 0; i < conf->num_queues; i++) { -+ s->vq_aio_context[i] = ctx; -+ } -+ -+ /* Released in virtio_blk_data_plane_destroy() */ -+ object_ref(OBJECT(conf->iothread)); -+ } else { -+ AioContext *ctx = qemu_get_aio_context(); -+ for (unsigned i = 0; i < conf->num_queues; i++) { -+ s->vq_aio_context[i] = ctx; -+ } -+ } -+ -+ return true; -+} -+ -+/* Context: BQL held */ -+static void virtio_blk_data_plane_destroy(VirtIOBlock *s) -+{ -+ VirtIOBlkConf *conf = &s->conf; -+ -+ assert(!s->dataplane_started); -+ -+ if (conf->iothread_vq_mapping_list) { -+ IOThreadVirtQueueMappingList *node; -+ -+ for (node = conf->iothread_vq_mapping_list; node; node = node->next) { -+ IOThread *iothread = iothread_by_id(node->value->iothread); -+ object_unref(OBJECT(iothread)); -+ } -+ } -+ -+ if (conf->iothread) { -+ object_unref(OBJECT(conf->iothread)); -+ } -+ -+ g_free(s->vq_aio_context); -+ s->vq_aio_context = NULL; -+} -+ -+/* Context: BQL held */ -+static int virtio_blk_data_plane_start(VirtIODevice *vdev) -+{ -+ VirtIOBlock *s = VIRTIO_BLK(vdev); -+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); -+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -+ unsigned i; -+ unsigned nvqs = s->conf.num_queues; -+ Error *local_err = NULL; -+ int r; -+ -+ if (s->dataplane_started || s->dataplane_starting) { -+ return 0; -+ } -+ -+ s->dataplane_starting = true; -+ -+ /* Set up guest notifier (irq) */ -+ r = k->set_guest_notifiers(qbus->parent, nvqs, true); -+ if (r != 0) { -+ error_report("virtio-blk failed to set guest notifier (%d), " -+ "ensure -accel kvm is set.", r); -+ goto fail_guest_notifiers; -+ } -+ -+ /* -+ * Batch all the host notifiers in a single transaction to avoid -+ * quadratic time complexity in address_space_update_ioeventfds(). -+ */ -+ memory_region_transaction_begin(); -+ -+ /* Set up virtqueue notify */ -+ for (i = 0; i < nvqs; i++) { -+ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); -+ if (r != 0) { -+ int j = i; -+ -+ fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); -+ while (i--) { -+ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ /* -+ * The transaction expects the ioeventfds to be open when it -+ * commits. Do it now, before the cleanup loop. -+ */ -+ memory_region_transaction_commit(); -+ -+ while (j--) { -+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); -+ } -+ goto fail_host_notifiers; -+ } -+ } -+ -+ memory_region_transaction_commit(); -+ -+ r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], -+ &local_err); -+ if (r < 0) { -+ error_report_err(local_err); -+ goto fail_aio_context; -+ } -+ -+ /* -+ * These fields must be visible to the IOThread when it processes the -+ * virtqueue, otherwise it will think dataplane has not started yet. -+ * -+ * Make sure ->dataplane_started is false when blk_set_aio_context() is -+ * called above so that draining does not cause the host notifier to be -+ * detached/attached prematurely. -+ */ -+ s->dataplane_starting = false; -+ s->dataplane_started = true; -+ smp_wmb(); /* paired with aio_notify_accept() on the read side */ -+ -+ /* Get this show started by hooking up our callbacks */ -+ if (!blk_in_drain(s->conf.conf.blk)) { -+ for (i = 0; i < nvqs; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ AioContext *ctx = s->vq_aio_context[i]; -+ -+ /* Kick right away to begin processing requests already in vring */ -+ event_notifier_set(virtio_queue_get_host_notifier(vq)); -+ -+ virtio_queue_aio_attach_host_notifier(vq, ctx); -+ } -+ } -+ return 0; -+ -+ fail_aio_context: -+ memory_region_transaction_begin(); -+ -+ for (i = 0; i < nvqs; i++) { -+ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < nvqs; i++) { -+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -+ } -+ fail_host_notifiers: -+ k->set_guest_notifiers(qbus->parent, nvqs, false); -+ fail_guest_notifiers: -+ s->dataplane_disabled = true; -+ s->dataplane_starting = false; -+ return -ENOSYS; -+} -+ -+/* Stop notifications for new requests from guest. -+ * -+ * Context: BH in IOThread -+ */ -+static void virtio_blk_data_plane_stop_vq_bh(void *opaque) -+{ -+ VirtQueue *vq = opaque; -+ EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); -+ -+ virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); -+ -+ /* -+ * Test and clear notifier after disabling event, in case poll callback -+ * didn't have time to run. -+ */ -+ virtio_queue_host_notifier_read(host_notifier); -+} -+ -+/* Context: BQL held */ -+static void virtio_blk_data_plane_stop(VirtIODevice *vdev) -+{ -+ VirtIOBlock *s = VIRTIO_BLK(vdev); -+ BusState *qbus = qdev_get_parent_bus(DEVICE(s)); -+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -+ unsigned i; -+ unsigned nvqs = s->conf.num_queues; -+ -+ if (!s->dataplane_started || s->dataplane_stopping) { -+ return; -+ } -+ -+ /* Better luck next time. */ -+ if (s->dataplane_disabled) { -+ s->dataplane_disabled = false; -+ s->dataplane_started = false; -+ return; -+ } -+ s->dataplane_stopping = true; -+ -+ if (!blk_in_drain(s->conf.conf.blk)) { -+ for (i = 0; i < nvqs; i++) { -+ VirtQueue *vq = virtio_get_queue(vdev, i); -+ AioContext *ctx = s->vq_aio_context[i]; -+ -+ aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); -+ } -+ } -+ -+ /* -+ * Batch all the host notifiers in a single transaction to avoid -+ * quadratic time complexity in address_space_update_ioeventfds(). -+ */ -+ memory_region_transaction_begin(); -+ -+ for (i = 0; i < nvqs; i++) { -+ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ /* -+ * The transaction expects the ioeventfds to be open when it -+ * commits. Do it now, before the cleanup loop. -+ */ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < nvqs; i++) { -+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -+ } -+ -+ /* -+ * Set ->dataplane_started to false before draining so that host notifiers -+ * are not detached/attached anymore. -+ */ -+ s->dataplane_started = false; -+ -+ /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ -+ blk_drain(s->conf.conf.blk); -+ -+ /* -+ * Try to switch bs back to the QEMU main loop. If other users keep the -+ * BlockBackend in the iothread, that's ok -+ */ -+ blk_set_aio_context(s->conf.conf.blk, qemu_get_aio_context(), NULL); -+ -+ /* Clean up guest notifier (irq) */ -+ k->set_guest_notifiers(qbus->parent, nvqs, false); -+ -+ s->dataplane_stopping = false; -+} -+ - static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - { - VirtIODevice *vdev = VIRTIO_DEVICE(dev); -@@ -1680,7 +2015,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); - } - qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); -- virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); -+ virtio_blk_data_plane_create(s, &err); - if (err != NULL) { - error_propagate(errp, err); - for (i = 0; i < conf->num_queues; i++) { -@@ -1717,8 +2052,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) - - blk_drain(s->blk); - del_boot_device_lchs(dev, "/disk@0,0"); -- virtio_blk_data_plane_destroy(s->dataplane); -- s->dataplane = NULL; -+ virtio_blk_data_plane_destroy(s); - for (i = 0; i < conf->num_queues; i++) { - virtio_del_queue(vdev, i); - } -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index 5e4091e4da..fecffdc303 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -50,8 +50,6 @@ struct VirtIOBlkConf - bool x_enable_wce_if_config_wce; - }; - --struct VirtIOBlockDataPlane; -- - struct VirtIOBlockReq; - struct VirtIOBlock { - VirtIODevice parent_obj; -@@ -64,7 +62,15 @@ struct VirtIOBlock { - VMChangeStateEntry *change; - bool dataplane_disabled; - bool dataplane_started; -- struct VirtIOBlockDataPlane *dataplane; -+ bool dataplane_starting; -+ bool dataplane_stopping; -+ -+ /* -+ * The AioContext for each virtqueue. The BlockDriverState will use the -+ * first element as its AioContext. -+ */ -+ AioContext **vq_aio_context; -+ - uint64_t host_features; - size_t config_size; - BlockRAMRegistrar blk_ram_registrar; -diff --git a/meson.build b/meson.build -index 6c77d9687d..47c65d0f53 100644 ---- a/meson.build -+++ b/meson.build -@@ -3298,7 +3298,6 @@ if have_system - 'hw/arm', - 'hw/audio', - 'hw/block', -- 'hw/block/dataplane', - 'hw/char', - 'hw/display', - 'hw/dma', --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch b/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch deleted file mode 100644 index 5f45b9d..0000000 --- a/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 71257c2f320f1511de1e275779cf4b90effc1f02 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:44 -0500 -Subject: [PATCH 15/22] virtio-blk: rename dataplane create/destroy functions - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [11/17] 60e7016d5f3e4e9e89945578279b12f812f85ddf (stefanha/centos-stream-qemu-kvm) - -virtio_blk_data_plane_create() and virtio_blk_data_plane_destroy() are -actually about s->vq_aio_context[] rather than managing -dataplane-specific state. - -As a prerequisite to using s->vq_aio_context[] in all code paths (even -when dataplane is not used), rename these functions to reflect that they -just manage s->vq_aio_context and call them regardless of whether or not -dataplane is in use. - -Note that virtio-blk supports running with -device -virtio-blk-pci,ioevent=off where the vCPU thread enters the device -emulation code. In this mode ioeventfd is not used for virtqueue -processing. However, we still want to initialize s->vq_aio_context[] to -qemu_aio_context in that case since I/O completion callbacks will be -invoked in the main loop thread. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-3-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 57bc2658935778d1ae0edbcd4402763da8c7bae2) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 23 ++++++++++++----------- - 1 file changed, 12 insertions(+), 11 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index cb623069f8..4d6f9377c6 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1608,7 +1608,7 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, - IOThread *iothread = iothread_by_id(node->value->iothread); - AioContext *ctx = iothread_get_aio_context(iothread); - -- /* Released in virtio_blk_data_plane_destroy() */ -+ /* Released in virtio_blk_vq_aio_context_cleanup() */ - object_ref(OBJECT(iothread)); - - if (node->value->vqs) { -@@ -1631,7 +1631,7 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, - } - - /* Context: BQL held */ --static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) -+static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) - { - VirtIODevice *vdev = VIRTIO_DEVICE(s); - VirtIOBlkConf *conf = &s->conf; -@@ -1659,11 +1659,6 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) - return false; - } - } -- /* Don't try if transport does not support notifiers. */ -- if (!virtio_device_ioeventfd_enabled(vdev)) { -- s->dataplane_disabled = true; -- return false; -- } - - s->vq_aio_context = g_new(AioContext *, conf->num_queues); - -@@ -1676,7 +1671,7 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) - s->vq_aio_context[i] = ctx; - } - -- /* Released in virtio_blk_data_plane_destroy() */ -+ /* Released in virtio_blk_vq_aio_context_cleanup() */ - object_ref(OBJECT(conf->iothread)); - } else { - AioContext *ctx = qemu_get_aio_context(); -@@ -1689,7 +1684,7 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) - } - - /* Context: BQL held */ --static void virtio_blk_data_plane_destroy(VirtIOBlock *s) -+static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) - { - VirtIOBlkConf *conf = &s->conf; - -@@ -2015,7 +2010,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); - } - qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); -- virtio_blk_data_plane_create(s, &err); -+ -+ /* Don't start dataplane if transport does not support notifiers. */ -+ if (!virtio_device_ioeventfd_enabled(vdev)) { -+ s->dataplane_disabled = true; -+ } -+ -+ virtio_blk_vq_aio_context_init(s, &err); - if (err != NULL) { - error_propagate(errp, err); - for (i = 0; i < conf->num_queues; i++) { -@@ -2052,7 +2053,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) - - blk_drain(s->blk); - del_boot_device_lchs(dev, "/disk@0,0"); -- virtio_blk_data_plane_destroy(s); -+ virtio_blk_vq_aio_context_cleanup(s); - for (i = 0; i < conf->num_queues; i++) { - virtio_del_queue(vdev, i); - } --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch b/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch deleted file mode 100644 index a0c0b67..0000000 --- a/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch +++ /dev/null @@ -1,307 +0,0 @@ -From ba80cdcd5604b9b9efc4682ade9828ab74ebf5e6 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:45 -0500 -Subject: [PATCH 16/22] virtio-blk: rename dataplane to ioeventfd - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [12/17] 4230005e0d1b4629fe4540f1f63cd705e58618da (stefanha/centos-stream-qemu-kvm) - -The dataplane code is really about using ioeventfd. It's used both for -IOThreads (what we think of as dataplane) and for the core virtio-pci -code's ioeventfd feature (which is enabled by default and used when no -IOThread has been specified). Rename the code to reflect this. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-4-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 3cdaf3dd4a4ca94ebabe7eab23b432f1a6c547cc) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 78 +++++++++++++++++----------------- - include/hw/virtio/virtio-blk.h | 8 ++-- - 2 files changed, 43 insertions(+), 43 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 4d6f9377c6..08c566946a 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -64,7 +64,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) - iov_discard_undo(&req->inhdr_undo); - iov_discard_undo(&req->outhdr_undo); - virtqueue_push(req->vq, &req->elem, req->in_len); -- if (s->dataplane_started && !s->dataplane_disabled) { -+ if (s->ioeventfd_started && !s->ioeventfd_disabled) { - virtio_notify_irqfd(vdev, req->vq); - } else { - virtio_notify(vdev, req->vq); -@@ -1141,12 +1141,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - { - VirtIOBlock *s = (VirtIOBlock *)vdev; - -- if (!s->dataplane_disabled && !s->dataplane_started) { -+ if (!s->ioeventfd_disabled && !s->ioeventfd_started) { - /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start -- * dataplane here instead of waiting for .set_status(). -+ * ioeventfd here instead of waiting for .set_status(). - */ - virtio_device_start_ioeventfd(vdev); -- if (!s->dataplane_disabled) { -+ if (!s->ioeventfd_disabled) { - return; - } - } -@@ -1213,7 +1213,7 @@ static void virtio_blk_reset(VirtIODevice *vdev) - VirtIOBlockReq *req; - - /* Dataplane has stopped... */ -- assert(!s->dataplane_started); -+ assert(!s->ioeventfd_started); - - /* ...but requests may still be in flight. */ - blk_drain(s->blk); -@@ -1380,7 +1380,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) - VirtIOBlock *s = VIRTIO_BLK(vdev); - - if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { -- assert(!s->dataplane_started); -+ assert(!s->ioeventfd_started); - } - - if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { -@@ -1545,7 +1545,7 @@ static void virtio_blk_resize(void *opaque) - aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); - } - --static void virtio_blk_data_plane_detach(VirtIOBlock *s) -+static void virtio_blk_ioeventfd_detach(VirtIOBlock *s) - { - VirtIODevice *vdev = VIRTIO_DEVICE(s); - -@@ -1555,7 +1555,7 @@ static void virtio_blk_data_plane_detach(VirtIOBlock *s) - } - } - --static void virtio_blk_data_plane_attach(VirtIOBlock *s) -+static void virtio_blk_ioeventfd_attach(VirtIOBlock *s) - { - VirtIODevice *vdev = VIRTIO_DEVICE(s); - -@@ -1570,8 +1570,8 @@ static void virtio_blk_drained_begin(void *opaque) - { - VirtIOBlock *s = opaque; - -- if (s->dataplane_started) { -- virtio_blk_data_plane_detach(s); -+ if (s->ioeventfd_started) { -+ virtio_blk_ioeventfd_detach(s); - } - } - -@@ -1580,8 +1580,8 @@ static void virtio_blk_drained_end(void *opaque) - { - VirtIOBlock *s = opaque; - -- if (s->dataplane_started) { -- virtio_blk_data_plane_attach(s); -+ if (s->ioeventfd_started) { -+ virtio_blk_ioeventfd_attach(s); - } - } - -@@ -1651,11 +1651,11 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) - } - - /* -- * If dataplane is (re-)enabled while the guest is running there could -+ * If ioeventfd is (re-)enabled while the guest is running there could - * be block jobs that can conflict. - */ - if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { -- error_prepend(errp, "cannot start virtio-blk dataplane: "); -+ error_prepend(errp, "cannot start virtio-blk ioeventfd: "); - return false; - } - } -@@ -1688,7 +1688,7 @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) - { - VirtIOBlkConf *conf = &s->conf; - -- assert(!s->dataplane_started); -+ assert(!s->ioeventfd_started); - - if (conf->iothread_vq_mapping_list) { - IOThreadVirtQueueMappingList *node; -@@ -1708,7 +1708,7 @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) - } - - /* Context: BQL held */ --static int virtio_blk_data_plane_start(VirtIODevice *vdev) -+static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) - { - VirtIOBlock *s = VIRTIO_BLK(vdev); - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); -@@ -1718,11 +1718,11 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) - Error *local_err = NULL; - int r; - -- if (s->dataplane_started || s->dataplane_starting) { -+ if (s->ioeventfd_started || s->ioeventfd_starting) { - return 0; - } - -- s->dataplane_starting = true; -+ s->ioeventfd_starting = true; - - /* Set up guest notifier (irq) */ - r = k->set_guest_notifiers(qbus->parent, nvqs, true); -@@ -1773,14 +1773,14 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) - - /* - * These fields must be visible to the IOThread when it processes the -- * virtqueue, otherwise it will think dataplane has not started yet. -+ * virtqueue, otherwise it will think ioeventfd has not started yet. - * -- * Make sure ->dataplane_started is false when blk_set_aio_context() is -+ * Make sure ->ioeventfd_started is false when blk_set_aio_context() is - * called above so that draining does not cause the host notifier to be - * detached/attached prematurely. - */ -- s->dataplane_starting = false; -- s->dataplane_started = true; -+ s->ioeventfd_starting = false; -+ s->ioeventfd_started = true; - smp_wmb(); /* paired with aio_notify_accept() on the read side */ - - /* Get this show started by hooking up our callbacks */ -@@ -1812,8 +1812,8 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) - fail_host_notifiers: - k->set_guest_notifiers(qbus->parent, nvqs, false); - fail_guest_notifiers: -- s->dataplane_disabled = true; -- s->dataplane_starting = false; -+ s->ioeventfd_disabled = true; -+ s->ioeventfd_starting = false; - return -ENOSYS; - } - -@@ -1821,7 +1821,7 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) - * - * Context: BH in IOThread - */ --static void virtio_blk_data_plane_stop_vq_bh(void *opaque) -+static void virtio_blk_ioeventfd_stop_vq_bh(void *opaque) - { - VirtQueue *vq = opaque; - EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); -@@ -1836,7 +1836,7 @@ static void virtio_blk_data_plane_stop_vq_bh(void *opaque) - } - - /* Context: BQL held */ --static void virtio_blk_data_plane_stop(VirtIODevice *vdev) -+static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) - { - VirtIOBlock *s = VIRTIO_BLK(vdev); - BusState *qbus = qdev_get_parent_bus(DEVICE(s)); -@@ -1844,24 +1844,24 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) - unsigned i; - unsigned nvqs = s->conf.num_queues; - -- if (!s->dataplane_started || s->dataplane_stopping) { -+ if (!s->ioeventfd_started || s->ioeventfd_stopping) { - return; - } - - /* Better luck next time. */ -- if (s->dataplane_disabled) { -- s->dataplane_disabled = false; -- s->dataplane_started = false; -+ if (s->ioeventfd_disabled) { -+ s->ioeventfd_disabled = false; -+ s->ioeventfd_started = false; - return; - } -- s->dataplane_stopping = true; -+ s->ioeventfd_stopping = true; - - if (!blk_in_drain(s->conf.conf.blk)) { - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(vdev, i); - AioContext *ctx = s->vq_aio_context[i]; - -- aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); -+ aio_wait_bh_oneshot(ctx, virtio_blk_ioeventfd_stop_vq_bh, vq); - } - } - -@@ -1886,10 +1886,10 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) - } - - /* -- * Set ->dataplane_started to false before draining so that host notifiers -+ * Set ->ioeventfd_started to false before draining so that host notifiers - * are not detached/attached anymore. - */ -- s->dataplane_started = false; -+ s->ioeventfd_started = false; - - /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ - blk_drain(s->conf.conf.blk); -@@ -1903,7 +1903,7 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) - /* Clean up guest notifier (irq) */ - k->set_guest_notifiers(qbus->parent, nvqs, false); - -- s->dataplane_stopping = false; -+ s->ioeventfd_stopping = false; - } - - static void virtio_blk_device_realize(DeviceState *dev, Error **errp) -@@ -2011,9 +2011,9 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - } - qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); - -- /* Don't start dataplane if transport does not support notifiers. */ -+ /* Don't start ioeventfd if transport does not support notifiers. */ - if (!virtio_device_ioeventfd_enabled(vdev)) { -- s->dataplane_disabled = true; -+ s->ioeventfd_disabled = true; - } - - virtio_blk_vq_aio_context_init(s, &err); -@@ -2137,8 +2137,8 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data) - vdc->reset = virtio_blk_reset; - vdc->save = virtio_blk_save_device; - vdc->load = virtio_blk_load_device; -- vdc->start_ioeventfd = virtio_blk_data_plane_start; -- vdc->stop_ioeventfd = virtio_blk_data_plane_stop; -+ vdc->start_ioeventfd = virtio_blk_start_ioeventfd; -+ vdc->stop_ioeventfd = virtio_blk_stop_ioeventfd; - } - - static const TypeInfo virtio_blk_info = { -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index fecffdc303..833a9a344f 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -60,10 +60,10 @@ struct VirtIOBlock { - unsigned short sector_mask; - bool original_wce; - VMChangeStateEntry *change; -- bool dataplane_disabled; -- bool dataplane_started; -- bool dataplane_starting; -- bool dataplane_stopping; -+ bool ioeventfd_disabled; -+ bool ioeventfd_started; -+ bool ioeventfd_starting; -+ bool ioeventfd_stopping; - - /* - * The AioContext for each virtqueue. The BlockDriverState will use the --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch b/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch deleted file mode 100644 index 611b881..0000000 --- a/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 9311035821b3fea3f78c7f06ddb8a3861584f907 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:46 -0500 -Subject: [PATCH 17/22] virtio-blk: restart s->rq reqs in vq AioContexts - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [13/17] cf5ad0352a78458ffc7588f967963f62b267fd64 (stefanha/centos-stream-qemu-kvm) - -A virtio-blk device with the iothread-vq-mapping parameter has -per-virtqueue AioContexts. It is not thread-safe to process s->rq -requests in the BlockBackend AioContext since that may be different from -the virtqueue's AioContext to which this request belongs. The code -currently races and could crash. - -Adapt virtio_blk_dma_restart_cb() to first split s->rq into per-vq lists -and then schedule a BH each vq's AioContext as necessary. This way -requests are safely processed in their vq's AioContext. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-5-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 71ee0cdd14cc01a8b51aa4e9577dd0a1bb2f8e19) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 44 ++++++++++++++++++++++++++++++++----------- - 1 file changed, 33 insertions(+), 11 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 08c566946a..f48ce5cbb8 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1156,16 +1156,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - - static void virtio_blk_dma_restart_bh(void *opaque) - { -- VirtIOBlock *s = opaque; -+ VirtIOBlockReq *req = opaque; -+ VirtIOBlock *s = req->dev; /* we're called with at least one request */ - -- VirtIOBlockReq *req; - MultiReqBuffer mrb = {}; - -- WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -- req = s->rq; -- s->rq = NULL; -- } -- - while (req) { - VirtIOBlockReq *next = req->next; - if (virtio_blk_handle_request(req, &mrb)) { -@@ -1195,16 +1190,43 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, - RunState state) - { - VirtIOBlock *s = opaque; -+ uint16_t num_queues = s->conf.num_queues; - - if (!running) { - return; - } - -- /* Paired with dec in virtio_blk_dma_restart_bh() */ -- blk_inc_in_flight(s->conf.conf.blk); -+ /* Split the device-wide s->rq request list into per-vq request lists */ -+ g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues); -+ VirtIOBlockReq *rq; -+ -+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { -+ rq = s->rq; -+ s->rq = NULL; -+ } -+ -+ while (rq) { -+ VirtIOBlockReq *next = rq->next; -+ uint16_t idx = virtio_get_queue_index(rq->vq); -+ -+ rq->next = vq_rq[idx]; -+ vq_rq[idx] = rq; -+ rq = next; -+ } - -- aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.conf.blk), -- virtio_blk_dma_restart_bh, s); -+ /* Schedule a BH to submit the requests in each vq's AioContext */ -+ for (uint16_t i = 0; i < num_queues; i++) { -+ if (!vq_rq[i]) { -+ continue; -+ } -+ -+ /* Paired with dec in virtio_blk_dma_restart_bh() */ -+ blk_inc_in_flight(s->conf.conf.blk); -+ -+ aio_bh_schedule_oneshot(s->vq_aio_context[i], -+ virtio_blk_dma_restart_bh, -+ vq_rq[i]); -+ } - } - - static void virtio_blk_reset(VirtIODevice *vdev) --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch b/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch deleted file mode 100644 index 303c007..0000000 --- a/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 282cebc22987958d11efc76e4f6ddb9601e709d9 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 19 Jan 2024 08:57:47 -0500 -Subject: [PATCH 18/22] virtio-blk: tolerate failure to set BlockBackend - AioContext - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter -RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Czenczek -RH-Commit: [14/17] edb113ce9fea0c1a88ae7b5d61c35c1981e6993f (stefanha/centos-stream-qemu-kvm) - -We no longer rely on setting the AioContext since the block layer -IO_CODE APIs can be called from any thread. Now it's just a hint to help -block jobs and other operations co-locate themselves in a thread with -the guest I/O requests. Keep going if setting the AioContext fails. - -Suggested-by: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Message-ID: <20240119135748.270944-6-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit ea0736d7f84ead109a6b701427991828f97724c3) -Signed-off-by: Stefan Hajnoczi ---- - hw/block/virtio-blk.c | 19 +++++-------------- - 1 file changed, 5 insertions(+), 14 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index f48ce5cbb8..81de06c9f6 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1786,11 +1786,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) - - memory_region_transaction_commit(); - -+ /* -+ * Try to change the AioContext so that block jobs and other operations can -+ * co-locate their activity in the same AioContext. If it fails, nevermind. -+ */ - r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], - &local_err); - if (r < 0) { -- error_report_err(local_err); -- goto fail_aio_context; -+ warn_report_err(local_err); - } - - /* -@@ -1819,18 +1822,6 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) - } - return 0; - -- fail_aio_context: -- memory_region_transaction_begin(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -- } -- -- memory_region_transaction_commit(); -- -- for (i = 0; i < nvqs; i++) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -- } - fail_host_notifiers: - k->set_guest_notifiers(qbus->parent, nvqs, false); - fail_guest_notifiers: --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch b/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch deleted file mode 100644 index 1f70049..0000000 --- a/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 5db0b4131c56d96760b3300298f4bedab99d35cb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 6 Sep 2023 17:00:22 +0400 -Subject: [PATCH 100/101] virtio-gpu: block migration of VMs with blob=true -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -RH-MergeRequest: 217: virtio-gpu: block migration of VMs with blob=true -RH-Jira: RHEL-7565 -RH-Commit: [1/1] f978ca697d574b1419eb027a1007c060dfb83298 - -JIRA: https://issues.redhat.com/browse/RHEL-7565 - -commit 9c549ab6895a43ad0cb33e684e11cdb0b5400897 -Author: Marc-André Lureau -Date: Wed Sep 6 17:00:22 2023 +0400 - -virtio-gpu: block migration of VMs with blob=true - -"blob" resources don't have an associated pixman image: - -#0 pixman_image_get_stride (image=0x0) at ../pixman/pixman-image.c:921 -#1 0x0000562327c25236 in virtio_gpu_save (f=0x56232bb13b00, opaque=0x56232b555a60, size=0, field=0x5623289ab6c8 <__compound_literal.3+104>, vmdesc=0x56232ab59fe0) at ../hw/display/virtio-gpu.c:1225 - -Related to: -https://bugzilla.redhat.com/show_bug.cgi?id=2236353 - -Signed-off-by: Marc-André Lureau -Acked-by: Peter Xu - -[ rhel backport - fix Error* vs Error** argument ] -Signed-off-by: Marc-André Lureau ---- - hw/display/virtio-gpu.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c -index b016d3bac8..1702190ead 100644 ---- a/hw/display/virtio-gpu.c -+++ b/hw/display/virtio-gpu.c -@@ -27,6 +27,7 @@ - #include "hw/virtio/virtio-gpu-pixman.h" - #include "hw/virtio/virtio-bus.h" - #include "hw/qdev-properties.h" -+#include "migration/blocker.h" - #include "qemu/log.h" - #include "qemu/module.h" - #include "qapi/error.h" -@@ -41,6 +42,8 @@ virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id, - - static void virtio_gpu_reset_bh(void *opaque); - -+static Error *blob_mig_blocker; -+ - void virtio_gpu_update_cursor_data(VirtIOGPU *g, - struct virtio_gpu_scanout *s, - uint32_t resource_id) -@@ -1452,6 +1455,14 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) - error_setg(errp, "blobs and virgl are not compatible (yet)"); - return; - } -+ -+ if (!blob_mig_blocker) { -+ error_setg(&blob_mig_blocker, -+ "virtio-gpu blob VMs are currently not migratable."); -+ } -+ if (migrate_add_blocker(&blob_mig_blocker, errp)) { -+ return; -+ } - } - - if (!virtio_gpu_base_device_realize(qdev, -@@ -1478,6 +1489,9 @@ static void virtio_gpu_device_unrealize(DeviceState *qdev) - { - VirtIOGPU *g = VIRTIO_GPU(qdev); - -+ if (virtio_gpu_blob_enabled(g->parent_obj.conf)) { -+ migrate_del_blocker(&blob_mig_blocker); -+ } - g_clear_pointer(&g->ctrl_bh, qemu_bh_delete); - g_clear_pointer(&g->cursor_bh, qemu_bh_delete); - g_clear_pointer(&g->reset_bh, qemu_bh_delete); --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch b/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch new file mode 100644 index 0000000..7183a65 --- /dev/null +++ b/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch @@ -0,0 +1,122 @@ +From 97d039841728570b54d11ee7e5322743f519d861 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 16 May 2024 12:40:22 +0400 +Subject: [PATCH 3/4] virtio-gpu: fix v2 migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 246: virtio-gpu: fix v2 migration +RH-Jira: RHEL-34621 +RH-Acked-by: Peter Xu +RH-Acked-by: Thomas Huth +RH-Commit: [1/2] 187370bc6198a4ed0e4763314f8113ffcd21eb36 (marcandre.lureau-rh/qemu-kvm-centos) + +Commit dfcf74fa ("virtio-gpu: fix scanout migration post-load") broke +forward/backward version migration. Versioning of nested VMSD structures +is not straightforward, as the wire format doesn't have nested +structures versions. Introduce x-scanout-vmstate-version and a field +test to save/load appropriately according to the machine version. + +Fixes: dfcf74fa ("virtio-gpu: fix scanout migration post-load") +Signed-off-by: Marc-André Lureau +Signed-off-by: Peter Xu +Reviewed-by: Fiona Ebner +Tested-by: Fiona Ebner +[fixed long lines] +Signed-off-by: Fabiano Rosas + +Jira: https://issues.redhat.com/browse/RHEL-34621 +Signed-off-by: Marc-André Lureau +(cherry picked from commit 40a23ef643664b5c1021a9789f9d680b6294fb50) +--- + hw/core/machine.c | 1 + + hw/display/virtio-gpu.c | 30 ++++++++++++++++++++++-------- + include/hw/virtio/virtio-gpu.h | 1 + + 3 files changed, 24 insertions(+), 8 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 0f256d9633..cf1d7faaaf 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,7 @@ GlobalProperty hw_compat_8_2[] = { + { "migration", "zero-page-detection", "legacy"}, + { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, + { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" }, ++ { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, + }; + const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2); + +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index ae831b6b3e..d60b1b2973 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -1166,10 +1166,17 @@ static void virtio_gpu_cursor_bh(void *opaque) + virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq); + } + ++static bool scanout_vmstate_after_v2(void *opaque, int version) ++{ ++ struct VirtIOGPUBase *base = container_of(opaque, VirtIOGPUBase, scanout); ++ struct VirtIOGPU *gpu = container_of(base, VirtIOGPU, parent_obj); ++ ++ return gpu->scanout_vmstate_version >= 2; ++} ++ + static const VMStateDescription vmstate_virtio_gpu_scanout = { + .name = "virtio-gpu-one-scanout", +- .version_id = 2, +- .minimum_version_id = 1, ++ .version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(resource_id, struct virtio_gpu_scanout), + VMSTATE_UINT32(width, struct virtio_gpu_scanout), +@@ -1181,12 +1188,18 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = { + VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout), + VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout), + VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout), +- VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2), ++ VMSTATE_UINT32_TEST(fb.format, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.bytes_pp, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.width, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.height, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.stride, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.offset, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), + VMSTATE_END_OF_LIST() + }, + }; +@@ -1659,6 +1672,7 @@ static Property virtio_gpu_properties[] = { + DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags, + VIRTIO_GPU_FLAG_BLOB_ENABLED, false), + DEFINE_PROP_SIZE("hostmem", VirtIOGPU, parent_obj.conf.hostmem, 0), ++ DEFINE_PROP_UINT8("x-scanout-vmstate-version", VirtIOGPU, scanout_vmstate_version, 2), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h +index ed44cdad6b..842315d51d 100644 +--- a/include/hw/virtio/virtio-gpu.h ++++ b/include/hw/virtio/virtio-gpu.h +@@ -177,6 +177,7 @@ typedef struct VGPUDMABuf { + struct VirtIOGPU { + VirtIOGPUBase parent_obj; + ++ uint8_t scanout_vmstate_version; + uint64_t conf_max_hostmem; + + VirtQueue *ctrl_vq; +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch b/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch deleted file mode 100644 index 6ad1c98..0000000 --- a/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 94bccae527f1ab8328cc7692532046d700e2ca71 Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Mon, 5 Feb 2024 19:27:07 +0100 -Subject: [PATCH 22/22] virtio-mem: default-enable "dynamic-memslots" - -RH-Author: David Hildenbrand -RH-MergeRequest: 220: virtio-mem: default-enable "dynamic-memslots" -RH-Jira: RHEL-24045 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] d9a60acd7de1d8703ea3ca938e388e19f31f5347 - -JIRA: https://issues.redhat.com/browse/RHEL-24045 -Upstream: RHEL only - -We only support selected vhost-user devices in combination with -virtio-mem in RHEL. One devices that works well is virtiofsd, devices that -are currently incompatible include DPDK and SPDK. - -The vhost devices we support must be compatible with the dynamic-memslot -feature (i.e., support at least 509 memslots, support dynamically adding/ -removing memslots), such that setting "dynamic-memslots=on" will work a -expected and not make certain QEMU commandlines or hotplug of vhost-user -devices bail out. - -Let's set "dynamic-memslots=on" starting with RHEL 9.4, so we -get the benefits (i.e., reduced metadata consumption in KVM, majority of -unplugged memory being inaccessible) as default. - -When wanting to run virtio-mem with incompatible vhost-user devices, it -might just work (if the vhost-user device is created before the -virtio-mem device), or the feature can be manually disabled by -specifying "dynamic-memslots=off". - -Signed-off-by: David Hildenbrand ---- - hw/core/machine.c | 2 ++ - hw/virtio/virtio-mem.c | 3 ++- - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 446601ee30..309f6ba685 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -78,6 +78,8 @@ GlobalProperty hw_compat_rhel_9_4[] = { - { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, - /* hw_compat_rhel_9_4 from hw_compat_8_1 */ - { "igb", "x-pcie-flr-init", "off" }, -+ /* hw_compat_rhel_9_4 jira RHEL-24045 */ -+ { "virtio-mem", "dynamic-memslots", "off" }, - }; - const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); - -diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c -index 75ee38aa46..00ca91e8fe 100644 ---- a/hw/virtio/virtio-mem.c -+++ b/hw/virtio/virtio-mem.c -@@ -1696,8 +1696,9 @@ static Property virtio_mem_properties[] = { - #endif - DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, - early_migration, true), -+ /* RHEL: default-enable "dynamic-memslots" (jira RHEL-24045) */ - DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM, -- dynamic_memslots, false), -+ dynamic_memslots, true), - DEFINE_PROP_END_OF_LIST(), - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-rng-block-max-bytes-0.patch b/SOURCES/kvm-virtio-rng-block-max-bytes-0.patch new file mode 100644 index 0000000..2fba53d --- /dev/null +++ b/SOURCES/kvm-virtio-rng-block-max-bytes-0.patch @@ -0,0 +1,49 @@ +From 3dd1412176a8ee6c06b5d41aa00ca49b535d99b7 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Wed, 24 Jul 2024 06:48:59 -0400 +Subject: [PATCH 092/100] virtio-rng: block max-bytes=0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 259: virtio-rng: block max-bytes=0 +RH-Jira: RHEL-50336 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Thomas Huth +RH-Acked-by: Eric Auger +RH-Commit: [1/1] 6d9852cc7cf7fdf49521b6301ceda26e11b1291f (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-50336 + +with max-bytes set to 0, quota is 0 and so device does not work. +block this to avoid user confusion + +Message-Id: <73a89a42d82ec8b47358f25119b87063e4a6ea57.1721818306.git.mst@redhat.com> +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit 024d046bf41b5256adec671085bcee767a6da125) +Signed-off-by: Laurent Vivier +--- + hw/virtio/virtio-rng.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c +index f74efffef7..7cf31da071 100644 +--- a/hw/virtio/virtio-rng.c ++++ b/hw/virtio/virtio-rng.c +@@ -184,8 +184,9 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) + + /* Workaround: Property parsing does not enforce unsigned integers, + * So this is a hack to reject such numbers. */ +- if (vrng->conf.max_bytes > INT64_MAX) { +- error_setg(errp, "'max-bytes' parameter must be non-negative, " ++ if (vrng->conf.max_bytes == 0 || ++ vrng->conf.max_bytes > INT64_MAX) { ++ error_setg(errp, "'max-bytes' parameter must be positive, " + "and less than 2^63"); + return; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch b/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch deleted file mode 100644 index b8066b2..0000000 --- a/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch +++ /dev/null @@ -1,78 +0,0 @@ -From da3a5afa41790ae913d41cfcdc3c6a8731ae3fe8 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 2 Feb 2024 16:31:56 +0100 -Subject: [PATCH 1/6] virtio-scsi: Attach event vq notifier with no_poll - -RH-Author: Hanna Czenczek -RH-MergeRequest: 223: virtio: Re-enable notifications after drain -RH-Jira: RHEL-3934 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/3] d29b461a0a4b584af0ee80fb3f9e45c92ea88eb0 (hreitz/qemu-kvm-c-9-s) - -As of commit 38738f7dbbda90fbc161757b7f4be35b52205552 ("virtio-scsi: -don't waste CPU polling the event virtqueue"), we only attach an io_read -notifier for the virtio-scsi event virtqueue instead, and no polling -notifiers. During operation, the event virtqueue is typically -non-empty, but none of the buffers are intended to be used immediately. -Instead, they only get used when certain events occur. Therefore, it -makes no sense to continuously poll it when non-empty, because it is -supposed to be and stay non-empty. - -We do this by using virtio_queue_aio_attach_host_notifier_no_poll() -instead of virtio_queue_aio_attach_host_notifier() for the event -virtqueue. - -Commit 766aa2de0f29b657148e04599320d771c36fd126 ("virtio-scsi: implement -BlockDevOps->drained_begin()") however has virtio_scsi_drained_end() use -virtio_queue_aio_attach_host_notifier() for all virtqueues, including -the event virtqueue. This can lead to it being polled again, undoing -the benefit of commit 38738f7dbbda90fbc161757b7f4be35b52205552. - -Fix it by using virtio_queue_aio_attach_host_notifier_no_poll() for the -event virtqueue. - -Reported-by: Fiona Ebner -Fixes: 766aa2de0f29b657148e04599320d771c36fd126 - ("virtio-scsi: implement BlockDevOps->drained_begin()") -Reviewed-by: Stefan Hajnoczi -Tested-by: Fiona Ebner -Reviewed-by: Fiona Ebner -Signed-off-by: Hanna Czenczek -Message-ID: <20240202153158.788922-2-hreitz@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit c42c3833e0cfdf2b80fb3ca410acfd392b6874ab) ---- - hw/scsi/virtio-scsi.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index ca365a70e9..9943186917 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -1149,6 +1149,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus) - static void virtio_scsi_drained_end(SCSIBus *bus) - { - VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); -+ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); - VirtIODevice *vdev = VIRTIO_DEVICE(s); - uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + - s->parent_obj.conf.num_queues; -@@ -1166,7 +1167,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus) - - for (uint32_t i = 0; i < total_queues; i++) { - VirtQueue *vq = virtio_get_queue(vdev, i); -- virtio_queue_aio_attach_host_notifier(vq, s->ctx); -+ if (vq == vs->event_vq) { -+ virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx); -+ } else { -+ virtio_queue_aio_attach_host_notifier(vq, s->ctx); -+ } - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch b/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch deleted file mode 100644 index 9ad8fdf..0000000 --- a/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 1ee3f919a51135a0798a14c734ca80d74d30025d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 4 Dec 2023 11:42:57 -0500 -Subject: [PATCH 078/101] virtio-scsi: don't lock AioContext around - virtio_queue_aio_attach_host_notifier() - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [9/26] 5e1179e617d05bf765b285ba42393ec1ddbeba28 (kmwolf/centos-qemu-kvm) - -virtio_queue_aio_attach_host_notifier() does not require the AioContext -lock. Stop taking the lock and add an explicit smp_wmb() because we were -relying on the implicit barrier in the AioContext lock before. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Message-ID: <20231204164259.1515217-3-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - hw/scsi/virtio-scsi-dataplane.c | 8 +------- - 1 file changed, 1 insertion(+), 7 deletions(-) - -diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c -index 1e684beebe..135e23fe54 100644 ---- a/hw/scsi/virtio-scsi-dataplane.c -+++ b/hw/scsi/virtio-scsi-dataplane.c -@@ -149,23 +149,17 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - - memory_region_transaction_commit(); - -- /* -- * These fields are visible to the IOThread so we rely on implicit barriers -- * in aio_context_acquire() on the write side and aio_notify_accept() on -- * the read side. -- */ - s->dataplane_starting = false; - s->dataplane_started = true; -+ smp_wmb(); /* paired with aio_notify_accept() */ - - if (s->bus.drain_count == 0) { -- aio_context_acquire(s->ctx); - virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); - virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); - - for (i = 0; i < vs->conf.num_queues; i++) { - virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); - } -- aio_context_release(s->ctx); - } - return 0; - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch b/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch deleted file mode 100644 index 2654cb7..0000000 --- a/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch +++ /dev/null @@ -1,173 +0,0 @@ -From c2d7633ead6e19d4b6af5552ca907ae071b8734b Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 5 Dec 2023 13:19:58 -0500 -Subject: [PATCH 081/101] virtio-scsi: replace AioContext lock with tmf_bh_lock - -RH-Author: Kevin Wolf -RH-MergeRequest: 214: Remove AioContext lock -RH-Jira: RHEL-15965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [12/26] 8fb375bfd72a491d47321c78078577071a4e90fb (kmwolf/centos-qemu-kvm) - -Protect the Task Management Function BH state with a lock. The TMF BH -runs in the main loop thread. An IOThread might process a TMF at the -same time as the TMF BH is running. Therefore tmf_bh_list and tmf_bh -must be protected by a lock. - -Run TMF request completion in the IOThread using aio_wait_bh_oneshot(). -This avoids more locking to protect the virtqueue and SCSI layer state. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Message-ID: <20231205182011.1976568-2-stefanha@redhat.com> -Signed-off-by: Kevin Wolf ---- - hw/scsi/virtio-scsi.c | 62 ++++++++++++++++++++++----------- - include/hw/virtio/virtio-scsi.h | 3 +- - 2 files changed, 43 insertions(+), 22 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 9c751bf296..4f8d35facc 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -123,6 +123,30 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req) - virtio_scsi_free_req(req); - } - -+static void virtio_scsi_complete_req_bh(void *opaque) -+{ -+ VirtIOSCSIReq *req = opaque; -+ -+ virtio_scsi_complete_req(req); -+} -+ -+/* -+ * Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop -+ * thread cannot touch the virtqueue since that could race with an IOThread. -+ */ -+static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req) -+{ -+ VirtIOSCSI *s = req->dev; -+ -+ if (!s->ctx || s->ctx == qemu_get_aio_context()) { -+ /* No need to schedule a BH when there is no IOThread */ -+ virtio_scsi_complete_req(req); -+ } else { -+ /* Run request completion in the IOThread */ -+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req); -+ } -+} -+ - static void virtio_scsi_bad_req(VirtIOSCSIReq *req) - { - virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers"); -@@ -338,10 +362,7 @@ static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) - - out: - object_unref(OBJECT(d)); -- -- virtio_scsi_acquire(s); -- virtio_scsi_complete_req(req); -- virtio_scsi_release(s); -+ virtio_scsi_complete_req_from_main_loop(req); - } - - /* Some TMFs must be processed from the main loop thread */ -@@ -354,18 +375,16 @@ static void virtio_scsi_do_tmf_bh(void *opaque) - - GLOBAL_STATE_CODE(); - -- virtio_scsi_acquire(s); -+ WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) { -+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { -+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); -+ QTAILQ_INSERT_TAIL(&reqs, req, next); -+ } - -- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { -- QTAILQ_REMOVE(&s->tmf_bh_list, req, next); -- QTAILQ_INSERT_TAIL(&reqs, req, next); -+ qemu_bh_delete(s->tmf_bh); -+ s->tmf_bh = NULL; - } - -- qemu_bh_delete(s->tmf_bh); -- s->tmf_bh = NULL; -- -- virtio_scsi_release(s); -- - QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { - QTAILQ_REMOVE(&reqs, req, next); - virtio_scsi_do_one_tmf_bh(req); -@@ -379,8 +398,7 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) - - GLOBAL_STATE_CODE(); - -- virtio_scsi_acquire(s); -- -+ /* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */ - if (s->tmf_bh) { - qemu_bh_delete(s->tmf_bh); - s->tmf_bh = NULL; -@@ -393,19 +411,19 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) - req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; - virtio_scsi_complete_req(req); - } -- -- virtio_scsi_release(s); - } - - static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) - { - VirtIOSCSI *s = req->dev; - -- QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); -+ WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) { -+ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); - -- if (!s->tmf_bh) { -- s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); -- qemu_bh_schedule(s->tmf_bh); -+ if (!s->tmf_bh) { -+ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); -+ qemu_bh_schedule(s->tmf_bh); -+ } - } - } - -@@ -1235,6 +1253,7 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) - Error *err = NULL; - - QTAILQ_INIT(&s->tmf_bh_list); -+ qemu_mutex_init(&s->tmf_bh_lock); - - virtio_scsi_common_realize(dev, - virtio_scsi_handle_ctrl, -@@ -1277,6 +1296,7 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) - - qbus_set_hotplug_handler(BUS(&s->bus), NULL); - virtio_scsi_common_unrealize(dev); -+ qemu_mutex_destroy(&s->tmf_bh_lock); - } - - static Property virtio_scsi_properties[] = { -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index 779568ab5d..da8cb928d9 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -85,8 +85,9 @@ struct VirtIOSCSI { - - /* - * TMFs deferred to main loop BH. These fields are protected by -- * virtio_scsi_acquire(). -+ * tmf_bh_lock. - */ -+ QemuMutex tmf_bh_lock; - QEMUBH *tmf_bh; - QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; - --- -2.39.3 - diff --git a/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch b/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch deleted file mode 100644 index a8bf6ac..0000000 --- a/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 2932c8de175fadeed4bb7c1024724cbabc53f6d5 Mon Sep 17 00:00:00 2001 -From: Sebastian Ott -Date: Mon, 19 Feb 2024 02:37:27 -0500 -Subject: [PATCH 6/6] x86: rhel 9.2.0 machine type compat fix - -RH-Author: Sebastian Ott -RH-MergeRequest: 342: Draft: x86: rhel 9.2.0 machine type compat fix (RHEL) -RH-Jira: RHEL-17068 -RH-Acked-by: Thomas Huth -RH-Commit: [23/23] 658dda965f34119de300eef26155f47b1b3fa7f1 - -Fix up the compatibility for 9.2.0 and older. - -Signed-off-by: Sebastian Ott's avatarSebastian Ott ---- - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 44038391fb..09d02cc91f 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1023,6 +1023,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->enforce_amd_1tb_hole = false; - /* From pc_i440fx_8_0_machine_options() */ - pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; -+ /* From pc_i440fx_8_1_machine_options() */ -+ pcmc->broken_32bit_mem_addr_check = true; - /* Introduced in QEMU 8.2 */ - pcmc->default_south_bridge = TYPE_PIIX3_DEVICE; - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 6387df97c8..c6967e1846 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -759,6 +759,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) - - /* From pc_q35_8_0_machine_options() */ - pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; -+ /* From pc_q35_8_1_machine_options() */ -+ pcmc->broken_32bit_mem_addr_check = true; - - compat_props_add(m->compat_props, hw_compat_rhel_9_4, - hw_compat_rhel_9_4_len); --- -2.39.3 - diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index d58cd84..cc7b200 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -148,8 +148,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 8.2.0 -Release: 6%{?rcrel}%{?dist}%{?cc_suffix} +Version: 9.0.0 +Release: 10%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -186,368 +186,252 @@ Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0013: 0013-Add-support-statement-to-help-output.patch Patch0014: 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0015: 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0016: 0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch -# For RHEL-17168 - Introduce virt-rhel9.4.0 arm-virt machine type [aarch64] -Patch17: kvm-hw-arm-virt-Fix-compats.patch -# For RHEL-19738 - Enable properties allowing to disable high memory regions -Patch18: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch19: kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch20: kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch21: kvm-vfio-container-Switch-to-dma_map-unmap-API.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch22: kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch23: kvm-vfio-common-Move-giommu_list-in-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch24: kvm-vfio-container-Move-space-field-to-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch25: kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch26: kvm-vfio-container-Move-per-container-device-list-in-bas.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch27: kvm-vfio-container-Convert-functions-to-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch28: kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch29: kvm-vfio-container-Move-vrdl_list-to-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch30: kvm-vfio-container-Move-listener-to-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch31: kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch32: kvm-vfio-container-Move-iova_ranges-to-base-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch33: kvm-vfio-container-Implement-attach-detach_device.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch34: kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch35: kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch36: kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch37: kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch38: kvm-backends-iommufd-Introduce-the-iommufd-object.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch39: kvm-util-char_dev-Add-open_cdev.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch40: kvm-vfio-common-return-early-if-space-isn-t-empty.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch41: kvm-vfio-iommufd-Implement-the-iommufd-backend.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch42: kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch43: kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch44: kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch45: kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch46: kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch47: kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch48: kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch49: kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch50: kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch51: kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch52: kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch53: kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch54: kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch55: kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch56: kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch57: kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch58: kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch59: kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch60: kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch61: kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch62: kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch63: kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch64: kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch65: kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch66: kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch67: kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch68: kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch69: kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch70: kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch71: kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch72: kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch73: kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch74: kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch75: kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch76: kvm-vfio-container-Replace-basename-with-g_path_get_base.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch77: kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch78: kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch79: kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch80: kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch81: kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch82: kvm-backends-iommufd-Remove-mutex.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch83: kvm-Compile-IOMMUFD-object-on-aarch64.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch84: kvm-Compile-IOMMUFD-on-s390x.patch -# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend -# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 -Patch85: kvm-Compile-IOMMUFD-on-x86_64.patch -# For RHEL-18212 - [RHEL9][Secure-execution][s390x] The error message is not clear when boot up a SE guest with wrong encryption -Patch86: kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch87: kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch88: kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch89: kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch90: kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch91: kvm-virtio-blk-add-lock-to-protect-s-rq.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch92: kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch93: kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch94: kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch95: kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch96: kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch97: kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch98: kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch99: kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch100: kvm-tests-remove-aio_context_acquire-tests.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch101: kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch102: kvm-graph-lock-remove-AioContext-locking.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch103: kvm-block-remove-AioContext-locking.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch104: kvm-block-remove-bdrv_co_lock.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch105: kvm-scsi-remove-AioContext-locking.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch106: kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch107: kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch108: kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch109: kvm-scsi-remove-outdated-AioContext-lock-comment.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch110: kvm-job-remove-outdated-AioContext-locking-comments.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch111: kvm-block-remove-outdated-AioContext-locking-comments.patch -# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) -Patch112: kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch -# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 -Patch113: kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch -# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 -Patch114: kvm-s390x-pci-refresh-fh-before-disabling-aif.patch -# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 -Patch115: kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch -# For RHEL-21570 - Critical performance degradation for input devices in virtio vnc session -Patch116: kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch -# For RHEL-7565 - qemu crashed when migrate guest with blob resources enabled -Patch117: kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch -# For RHEL-21293 - [emulated igb] Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument -Patch118: kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch -# For RHEL-20341 - memory-device size alignment check invalid in QEMU 8.2 -Patch119: kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch -# For RHEL-20341 - memory-device size alignment check invalid in QEMU 8.2 -Patch120: kvm-memory-device-reintroduce-memory-region-size-check.patch -# For RHEL-24593 - qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk -Patch121: kvm-block-backend-Allow-concurrent-context-changes.patch -# For RHEL-24593 - qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk -Patch122: kvm-scsi-Await-request-purging.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch123: kvm-string-output-visitor-show-structs-as-omitted.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch124: kvm-string-output-visitor-Fix-pseudo-struct-handling.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch125: kvm-qdev-properties-alias-all-object-class-properties.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch126: kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch127: kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch128: kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch129: kvm-iotests-add-filter_qmp_generated_node_ids.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch130: kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch131: kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch132: kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch133: kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch134: kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch135: kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch136: kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch137: kvm-virtio-blk-always-set-ioeventfd-during-startup.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch138: kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch -# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. -# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support -# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] -Patch139: kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch -# For RHEL-24045 - QEMU: default-enable dynamically using multiple memslots for virtio-mem -Patch140: kvm-virtio-mem-default-enable-dynamic-memslots.patch -# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled -Patch141: kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch -# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled -Patch142: kvm-virtio-Re-enable-notifications-after-drain.patch -# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled -Patch143: kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch -# For RHEL-15394 - virtio-blk: qemu hang on "no response on QMP query-status" when write data to disk without enough space -Patch144: kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch -# For RHEL-24988 - Mark virt-rhel9.{0,2}.0 machine types as deprecated -Patch145: kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch -# For RHEL-17068 - Check/fix machine type compatibility for qemu-kvm 8.2.0 [x86_64] -Patch146: kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch +Patch0016: 0016-Add-upstream-compatibility-bits.patch +Patch0017: 0017-x86-rhel-9.4.0-machine-type-compat-fix.patch +# For RHEL-34945 - [aarch64, kvm-unit-tests] all tests tagged as FAIL [qemu-kvm: GLib: g_ptr_array_add: assertion 'rarray' failed] +Patch18: kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch +# For RHEL-30362 - Check/fix machine type compatibility for QEMU 9.0.0 [x86_64][rhel-9.5.0] +Patch19: kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch20: kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch21: kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch22: kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch +# For RHEL-33440 - Qemu hang when quit dst vm after storage migration(nbd+tls) +Patch23: kvm-iotests-test-NBD-TLS-iothread.patch +# For RHEL-34621 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument +Patch24: kvm-virtio-gpu-fix-v2-migration.patch +# For RHEL-34621 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument +Patch25: kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch +# For RHEL-42411 - qemu-kvm: linux-aio: add support for IO_CMD_FDSYNC command +Patch26: kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch +# For RHEL-34618 - aio=io_uring: Assertion failure `luringcb->co->ctx == s->aio_context' with block_resize +# For RHEL-38697 - aio=native: Assertion failure `laiocb->co->ctx == laiocb->ctx->aio_context' with block_resize +Patch27: kvm-Revert-monitor-use-aio_co_reschedule_self.patch +# For RHEL-34618 - aio=io_uring: Assertion failure `luringcb->co->ctx == s->aio_context' with block_resize +# For RHEL-38697 - aio=native: Assertion failure `laiocb->co->ctx == laiocb->ctx->aio_context' with block_resize +Patch28: kvm-aio-warn-about-iohandler_ctx-special-casing.patch +# For RHEL-36159 - qemu crash on Assertion `block->n_free_ciphers > 0' failed in guest installation with luks and iothread-vq-mapping +Patch29: kvm-block-crypto-create-ciphers-on-demand.patch +# For RHEL-36159 - qemu crash on Assertion `block->n_free_ciphers > 0' failed in guest installation with luks and iothread-vq-mapping +Patch30: kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch31: kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch32: kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch33: kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch +# For RHEL-35611 - CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5] +Patch34: kvm-block-Parse-filenames-only-when-explicitly-requested.patch +# For RHEL-40708 - [RHEL9.5.0][virtio_fs][s390x] after hot-unplug the vhost-user-fs-ccw device, the device is failed to hot-plug again +Patch35: kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch +# For RHEL-39936 - ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256) on FUJITSU +Patch36: kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch37: kvm-introduce-pc_rhel_9_5_compat.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch38: kvm-target-i386-add-guest-phys-bits-cpu-property.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch39: kvm-kvm-add-support-for-guest-physical-bits.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch40: kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch41: kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch42: kvm-target-i386-Add-new-CPU-model-SierraForest.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch43: kvm-target-i386-Export-RFDS-bit-to-guests.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch44: kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch45: kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch46: kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch47: kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch48: kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch49: kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch50: kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch51: kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch52: kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch53: kvm-linux-headers-update-to-current-kvm-next.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch54: kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch55: kvm-KVM-track-whether-guest-state-is-encrypted.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch56: kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch57: kvm-target-i386-introduce-x86-confidential-guest.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch58: kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch59: kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch60: kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch61: kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch62: kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch63: kvm-kvm-Introduce-support-for-memory_attributes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch64: kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch65: kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch66: kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch67: kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch68: kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch69: kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch70: kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch71: kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch72: kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch73: kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch74: kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch75: kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch76: kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch77: kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch78: kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch79: kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch80: kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch81: kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch82: kvm-hw-i386-split-x86.c-in-multiple-parts.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch83: kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch84: kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch85: kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch86: kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch87: kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch88: kvm-linux-headers-Update-to-current-kvm-next.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch89: kvm-update-linux-headers-import-linux-kvm_para.h-header.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch90: kvm-machine-allow-early-use-of-machine_require_guest_mem.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch91: kvm-i386-sev-Replace-error_report-with-error_setg.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch92: kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch93: kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch94: kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch95: kvm-i386-sev-Introduce-sev-snp-guest-object.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch96: kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch97: kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch98: kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch99: kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch100: kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch101: kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch102: kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch103: kvm-i386-sev-Add-the-SNP-launch-start-context.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch104: kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch105: kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch106: kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch107: kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch108: kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch109: kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch110: kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch111: kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch112: kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch113: kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch114: kvm-i386-sev-Extract-build_kernel_loader_hashes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch115: kvm-i386-sev-Reorder-struct-declarations.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch116: kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch117: kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch118: kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch119: kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch120: kvm-i386-sev-fix-unreachable-code-coverity-issue.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch121: kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch122: kvm-i386-sev-Return-when-sev_common-is-null.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch123: kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch124: kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch125: kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch126: kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch127: kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch +# For RHEL-50336 - Fail to boot up the guest including vtpm and virtio-rng (max-bytes=0) devices +Patch128: kvm-virtio-rng-block-max-bytes-0.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch129: kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch130: kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch131: kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch132: kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch133: kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch134: kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch135: kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch136: kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch +# For RHEL-52250 - fsfreeze hooks break on the systems first restorecon +Patch137: kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch138: kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch %if %{have_clang} BuildRequires: clang @@ -1172,7 +1056,7 @@ run_configure \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ trace/trace-events-all qemu-kvm-simpletrace.stp -cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm +cp -a qemu-system-%{kvm_target} qemu-kvm %ifarch s390x # Copy the built new images into place for "make check": @@ -1258,7 +1142,7 @@ popd mkdir -p %{buildroot}%{_datadir}/systemtap/tapset -install -m 0755 %{qemu_kvm_build}/%{kvm_target}-softmmu/qemu-system-%{kvm_target} %{buildroot}%{_libexecdir}/qemu-kvm +install -m 0755 %{qemu_kvm_build}/qemu-system-%{kvm_target} %{buildroot}%{_libexecdir}/qemu-kvm install -m 0644 %{qemu_kvm_build}/qemu-kvm.stp %{buildroot}%{_datadir}/systemtap/tapset/ install -m 0644 %{qemu_kvm_build}/qemu-kvm-log.stp %{buildroot}%{_datadir}/systemtap/tapset/ install -m 0644 %{qemu_kvm_build}/qemu-kvm-simpletrace.stp %{buildroot}%{_datadir}/systemtap/tapset/ @@ -1317,6 +1201,7 @@ rm -rf %{buildroot}%{_datadir}/%{name}/qboot.rom rm -rf %{buildroot}%{_datadir}/%{name}/s390-ccw.img rm -rf %{buildroot}%{_datadir}/%{name}/s390-netboot.img rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware.img +rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware64.img rm -rf %{buildroot}%{_datadir}/%{name}/canyonlands.dtb rm -rf %{buildroot}%{_datadir}/%{name}/u-boot-sam460-20100605.bin @@ -1547,6 +1432,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp %{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp %{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf +%{_datadir}/systemtap/tapset/qemu-img*.stp +%{_datadir}/systemtap/tapset/qemu-io*.stp +%{_datadir}/systemtap/tapset/qemu-nbd*.stp +%{_datadir}/systemtap/tapset/qemu-storage-daemon*.stp %ifarch x86_64 %{_libdir}/%{name}/accel-tcg-%{kvm_target}.so @@ -1609,6 +1498,243 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Sep 02 2024 Miroslav Rezanina - 9.0.0-10 +- kvm-nbd-server-CVE-2024-7409-Avoid-use-after-free-when-c.patch [RHEL-52617] +- Resolves: RHEL-52617 + (CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5]) + +* Mon Aug 26 2024 Miroslav Rezanina - 9.0.0-9 +- kvm-qemu-guest-agent-Update-the-logfile-path-of-qga-fsfr.patch [RHEL-52250] +- Resolves: RHEL-52250 + (fsfreeze hooks break on the systems first restorecon) + +* Wed Aug 14 2024 Miroslav Rezanina - 9.0.0-8 +- kvm-introduce-pc_rhel_9_5_compat.patch [RHEL-39544] +- kvm-target-i386-add-guest-phys-bits-cpu-property.patch [RHEL-39544] +- kvm-kvm-add-support-for-guest-physical-bits.patch [RHEL-39544] +- kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch [RHEL-39544] +- kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch [RHEL-39544] +- kvm-target-i386-Add-new-CPU-model-SierraForest.patch [RHEL-39544] +- kvm-target-i386-Export-RFDS-bit-to-guests.patch [RHEL-39544] +- kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch [RHEL-39544] +- kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch [RHEL-39544] +- kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch [RHEL-39544] +- kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch [RHEL-39544] +- kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch [RHEL-39544] +- kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch [RHEL-39544] +- kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch [RHEL-39544] +- kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch [RHEL-39544] +- kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch [RHEL-39544] +- kvm-linux-headers-update-to-current-kvm-next.patch [RHEL-39544] +- kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch [RHEL-39544] +- kvm-KVM-track-whether-guest-state-is-encrypted.patch [RHEL-39544] +- kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch [RHEL-39544] +- kvm-target-i386-introduce-x86-confidential-guest.patch [RHEL-39544] +- kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch [RHEL-39544] +- kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch [RHEL-39544] +- kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch [RHEL-39544] +- kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch [RHEL-39544] +- kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch [RHEL-39544] +- kvm-kvm-Introduce-support-for-memory_attributes.patch [RHEL-39544] +- kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch [RHEL-39544] +- kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch [RHEL-39544] +- kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch [RHEL-39544] +- kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch [RHEL-39544] +- kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch [RHEL-39544] +- kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch [RHEL-39544] +- kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch [RHEL-39544] +- kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch [RHEL-39544] +- kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch [RHEL-39544] +- kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch [RHEL-39544] +- kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch [RHEL-39544] +- kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch [RHEL-39544] +- kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch [RHEL-39544] +- kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch [RHEL-39544] +- kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch [RHEL-39544] +- kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch [RHEL-39544] +- kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch [RHEL-39544] +- kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch [RHEL-39544] +- kvm-hw-i386-split-x86.c-in-multiple-parts.patch [RHEL-39544] +- kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch [RHEL-39544] +- kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch [RHEL-39544] +- kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch [RHEL-39544] +- kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch [RHEL-39544] +- kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch [RHEL-39544] +- kvm-linux-headers-Update-to-current-kvm-next.patch [RHEL-39544] +- kvm-update-linux-headers-import-linux-kvm_para.h-header.patch [RHEL-39544] +- kvm-machine-allow-early-use-of-machine_require_guest_mem.patch [RHEL-39544] +- kvm-i386-sev-Replace-error_report-with-error_setg.patch [RHEL-39544] +- kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch [RHEL-39544] +- kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch [RHEL-39544] +- kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch [RHEL-39544] +- kvm-i386-sev-Introduce-sev-snp-guest-object.patch [RHEL-39544] +- kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch [RHEL-39544] +- kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch [RHEL-39544] +- kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch [RHEL-39544] +- kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch [RHEL-39544] +- kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch [RHEL-39544] +- kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch [RHEL-39544] +- kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch [RHEL-39544] +- kvm-i386-sev-Add-the-SNP-launch-start-context.patch [RHEL-39544] +- kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch [RHEL-39544] +- kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch [RHEL-39544] +- kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch [RHEL-39544] +- kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch [RHEL-39544] +- kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch [RHEL-39544] +- kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch [RHEL-39544] +- kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch [RHEL-39544] +- kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch [RHEL-39544] +- kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch [RHEL-39544] +- kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch [RHEL-39544] +- kvm-i386-sev-Extract-build_kernel_loader_hashes.patch [RHEL-39544] +- kvm-i386-sev-Reorder-struct-declarations.patch [RHEL-39544] +- kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch [RHEL-39544] +- kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch [RHEL-39544] +- kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch [RHEL-39544] +- kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch [RHEL-39544] +- kvm-i386-sev-fix-unreachable-code-coverity-issue.patch [RHEL-39544] +- kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch [RHEL-39544] +- kvm-i386-sev-Return-when-sev_common-is-null.patch [RHEL-39544] +- kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch [RHEL-39544] +- kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch [RHEL-39544] +- kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch [RHEL-39544] +- kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch [RHEL-39544] +- kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch [RHEL-39544] +- kvm-virtio-rng-block-max-bytes-0.patch [RHEL-50336] +- kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch [RHEL-50000] +- kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch [RHEL-50000] +- kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch [RHEL-50000] +- kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch [RHEL-50000] +- kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch [RHEL-52617] +- Resolves: RHEL-39544 + ([QEMU] Add support for AMD SEV-SNP to Qemu) +- Resolves: RHEL-50336 + (Fail to boot up the guest including vtpm and virtio-rng (max-bytes=0) devices) +- Resolves: RHEL-50000 + (scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert) +- Resolves: RHEL-52617 + (CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5]) + +* Mon Jul 15 2024 Miroslav Rezanina - 9.0.0-7 +- kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch [RHEL-40708] +- kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch [RHEL-39936] +- Resolves: RHEL-40708 + ([RHEL9.5.0][virtio_fs][s390x] after hot-unplug the vhost-user-fs-ccw device, the device is failed to hot-plug again ) +- Resolves: RHEL-39936 + (ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256) on FUJITSU) + +* Thu Jul 04 2024 Miroslav Rezanina - 9.0.0-6 +- kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch [RHEL-35611] +- kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch [RHEL-35611] +- kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch [RHEL-35611] +- kvm-block-Parse-filenames-only-when-explicitly-requested.patch [RHEL-35611] +- Resolves: RHEL-35611 + (CVE-2024-4467 qemu-kvm: QEMU: 'qemu-img info' leads to host file read/write [rhel-9.5]) + +* Tue Jun 25 2024 Miroslav Rezanina - 9.0.0-5 +- kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch [RHEL-42411] +- kvm-Revert-monitor-use-aio_co_reschedule_self.patch [RHEL-34618 RHEL-38697] +- kvm-aio-warn-about-iohandler_ctx-special-casing.patch [RHEL-34618 RHEL-38697] +- kvm-block-crypto-create-ciphers-on-demand.patch [RHEL-36159] +- kvm-crypto-block-drop-qcrypto_block_open-n_threads-argum.patch [RHEL-36159] +- Resolves: RHEL-42411 + (qemu-kvm: linux-aio: add support for IO_CMD_FDSYNC command) +- Resolves: RHEL-34618 + (aio=io_uring: Assertion failure `luringcb->co->ctx == s->aio_context' with block_resize) +- Resolves: RHEL-38697 + (aio=native: Assertion failure `laiocb->co->ctx == laiocb->ctx->aio_context' with block_resize) +- Resolves: RHEL-36159 + (qemu crash on Assertion `block->n_free_ciphers > 0' failed in guest installation with luks and iothread-vq-mapping) + +* Mon Jun 17 2024 Miroslav Rezanina - 9.0.0-4 +- kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch [RHEL-33440] +- kvm-iotests-test-NBD-TLS-iothread.patch [RHEL-33440] +- kvm-virtio-gpu-fix-v2-migration.patch [RHEL-34621] +- kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch [RHEL-34621] +- Resolves: RHEL-33440 + (Qemu hang when quit dst vm after storage migration(nbd+tls)) +- Resolves: RHEL-34621 + ([RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument) + +* Tue May 21 2024 Miroslav Rezanina - 9.0.0-3 +- kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch [RHEL-33440] +- kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch [RHEL-33440] +- Resolves: RHEL-33440 + (Qemu hang when quit dst vm after storage migration(nbd+tls)) + +* Tue May 07 2024 Miroslav Rezanina - 9.0.0-2 +- kvm-hw-arm-virt-Fix-spurious-call-to-arm_virt_compat_set.patch [RHEL-34945] +- kvm-Revert-x86-rhel-9.4.0-machine-type-compat-fix.patch [RHEL-30362] +- Resolves: RHEL-34945 + ([aarch64, kvm-unit-tests] all tests tagged as FAIL [qemu-kvm: GLib: g_ptr_array_add: assertion 'rarray' failed] ) +- Resolves: RHEL-30362 + (Check/fix machine type compatibility for QEMU 9.0.0 [x86_64][rhel-9.5.0]) + +* Wed Apr 24 2024 Miroslav Rezanina - 9.0.0-1 +- Rebase to QEMU 9.0.0 [RHEL-28073] +- Resolves: RHEL-28073 + (Rebase qemu-kvm to QEMU 9.0.0 for RHEL 9.5) + +* Tue Mar 26 2024 Miroslav Rezanina - 8.2.0-11 +- kvm-coroutine-cap-per-thread-local-pool-size.patch [RHEL-28947] +- kvm-coroutine-reserve-5-000-mappings.patch [RHEL-28947] +- Resolves: RHEL-28947 + (Qemu crashing with "failed to set up stack guard page: Cannot allocate memory") + +* Thu Mar 21 2024 Miroslav Rezanina - 8.2.0-10 +- kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch [RHEL-24614] +- kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch [RHEL-24614] +- kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch [RHEL-24614] +- Resolves: RHEL-24614 + ([RHEL9][chardev] qemu hit core dump while using TLS server from host to guest) + +* Wed Mar 20 2024 Miroslav Rezanina - 8.2.0-9 +- kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch [RHEL-28125] +- kvm-nbd-server-Fix-race-in-draining-the-export.patch [RHEL-28125] +- kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch [RHEL-28125] +- kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch [RHEL-21705] +- Resolves: RHEL-28125 + (RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete.) +- Resolves: RHEL-21705 + (pc-q35-rhel9.4.0 does not provide proper computer information) + +* Mon Mar 18 2024 Miroslav Rezanina - 8.2.0-8 +- kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch [RHEL-19629] +- kvm-ui-clipboard-add-asserts-for-update-and-request.patch [RHEL-19629] +- kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch [RHEL-21705] +- kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch [RHEL-21705] +- kvm-Implement-SMBIOS-type-9-v2.6.patch [RHEL-21705] +- kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch [RHEL-21705] +- kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch [RHEL-21705] +- kvm-smbios-get-rid-of-smbios_legacy-global.patch [RHEL-21705] +- kvm-smbios-avoid-mangling-user-provided-tables.patch [RHEL-21705] +- kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch [RHEL-21705] +- kvm-smbios-add-smbios_add_usr_blob_size-helper.patch [RHEL-21705] +- kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch [RHEL-21705] +- kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch [RHEL-21705] +- kvm-smbios-handle-errors-consistently.patch [RHEL-21705] +- kvm-smbios-get-rid-of-global-smbios_ep_type.patch [RHEL-21705] +- kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch [RHEL-21705] +- kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch [RHEL-21705] +- kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch [RHEL-21705] +- kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch [RHEL-21705] +- kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch [RHEL-21705] +- Resolves: RHEL-19629 + (CVE-2023-6683 qemu-kvm: QEMU: VNC: NULL pointer dereference in qemu_clipboard_request() [rhel-9]) +- Resolves: RHEL-21705 + (pc-q35-rhel9.4.0 does not provide proper computer information) + +* Fri Mar 08 2024 Miroslav Rezanina - 8.2.0-7 +- kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch [RHEL-26049] +- kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch [RHEL-24614] +- Resolves: RHEL-26049 + (When max vcpu is greater than or equal to 246, qemu unable to init event notifier) +- Resolves: RHEL-24614 + ([RHEL9][chardev][s390x] qemu hit core dump while using TLS server from host to guest) + * Mon Feb 19 2024 Miroslav Rezanina - 8.2.0-6 - kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch [RHEL-3934] - kvm-virtio-Re-enable-notifications-after-drain.patch [RHEL-3934]