diff --git a/.gitignore b/.gitignore index b926b6e..855cccf 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,4 @@ /qemu-9.0.0.tar.xz /qemu-9.1.0.tar.xz /qemu-10.0.0.tar.xz +/qemu-10.1.0.tar.xz diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch index 853276c..b2d2860 100644 --- a/0004-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From ff5cdaa4c69d89d7c6429b30fbdc5b9e1f0a6968 Mon Sep 17 00:00:00 2001 +From 8a5eef9fcb74b2fa82ac6122caf3c3d38a26b195 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -We are rebasing from qemu-kvm-9.1.0-17.el10. +We are rebasing from qemu-kvm-10.0.0-12.el10. Signed-off-by: Miroslav Rezanina @@ -23,36 +23,42 @@ Rebase notes (9.1.0): - Removed --disable-avx512f configure option - Removed qemu-vsmr-helper (changed upstream) -Rebase notes (10.0.0 rc0): +Rebase notes (10.0.0): - Split --disable-sanitazers configure option (upstream change) - Removed s390x-netboot.img (upstream) - accel-tcg module no longer built (upstream) - Removed new upstream npcm8xx board rom - Not package hw-uefi-vars.so - Not package pnv-pnor.bin on build - -Rebase notes (10.0.0): - Include riscv support +Rebase notes (10.1.0 rc0): +- Remove avocado tests installation (removed upstream) +- dtb files installed in special directory (upstream change) +- Remove ast27x0_bootrom.bin +- Removed --disable-avx* configure options +- Removed 32bit archs conditionals +- Conditional qemu-kvm-block-rbd requirement +- Flip ipxe roms and seavgabios arch condition + Merged patches (9.1.0): - b206b8f7cb redhat: Remove the s390-netboot.img from the spec file - 95605107f1 Require new dtrace package -Merged patches (10.0.0 rc0): +Merged patches (10.0.0): - 07c8c9b9ff qemu-guest-agent: Update the logfile path of qga-fsfreeze-hook.log - -Merged patches (10.0.0 rc1): - 1f54babd2a Recommend systemtap-client from qemu-tools - -Merged patches (10.0.0 rc3): - 3e4d2a0fb8 Also recommend systemtap-devel from qemu-tools + +Merged patches (10.1.0 rc0): +- 72119e03ea distro: add an explicit valgrind-devel build dep --- .distro/Makefile | 101 ++ .distro/Makefile.common | 42 + .distro/README.tests | 39 + .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 1606 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 1795 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + .distro/scripts/frh.py | 4 +- @@ -63,7 +69,7 @@ Merged patches (10.0.0 rc3): scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + ui/vnc-auth-sasl.c | 2 +- - 16 files changed, 1868 insertions(+), 8 deletions(-) + 16 files changed, 2057 insertions(+), 8 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch index e470db7..222f261 100644 --- a/0005-Enable-disable-devices-for-RHEL.patch +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From e56f9e1921ab836e454ee003812487cd520580b1 Mon Sep 17 00:00:00 2001 +From 03cf16ca98c4ed835c4da8c4424bfac5a9ae3aa6 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL @@ -19,20 +19,20 @@ Rebase notes (9.1.0): - Remove unnecessary chunks - Removed CONFIG_VHOST_USER_SCMI and CONFIG_VHOST_USER_SND from some archs -Rebase notes (10.0.0 rc0): +Rebase notes (10.0.0): - Added CONFIG_PCI_BRIDGE for aarch64 and x86_64 (new upstream) - Do not add deprecation_note member as it was added upstream (target/arm/cpu.h) - Rename CONFIG_ARM_GICV3_TCG to CONFIG_ARM_GICV3 - -Rebase notes (10.0.0 rc1): - Remove deprecated line change for code commented out - Do not change minimal revision for piix4 - Remove YongFeng vcpu - -Rebase notes (10.0.0): - Add rebase devices changes - Enable virtio-mem on s390x +Rebase notes (10.1.0 rc0): +- Improved riscv cpu chunk re-added +- Comment out unused code + Merged commits (9.1.0): - f24c7a1fee Disable FDC devices - fe8c6cb1ce Disable vga-cirrus device @@ -43,31 +43,42 @@ Merged commits (9.1.0): - cd57d17e3c target/s390x: Revert the old s390x CPU model disablement code - 42af7b3ad5 Enable vhost-user-scmi devices - aa374ce5ea x86/cpu: update deprecation string to match lowest undeprecated model + +Merged commits (10.1.0 rc0): +- 312cdc116e Enable vhost-user-gpu-pci for RHIVOS +- be460986c1 Enable amd-iommu device + +Merged commits (10.1.0 rc2): +- 6306605028 Declare rtl8139 as deprecated + +Merged commits (10.1.0 rc3): +- f06f55a179 Enable uefi variable service for edk2 --- .distro/qemu-kvm.spec.template | 20 +-- - .../aarch64-softmmu/aarch64-rh-devices.mak | 47 +++++++ + .../aarch64-softmmu/aarch64-rh-devices.mak | 49 ++++++++ configs/devices/rh-virtio.mak | 10 ++ .../riscv64-softmmu/riscv64-rh-devices.mak | 39 ++++++ .../s390x-softmmu/s390x-rh-devices.mak | 20 +++ - .../x86_64-softmmu/x86_64-rh-devices.mak | 115 ++++++++++++++++++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 118 ++++++++++++++++++ hw/arm/virt.c | 4 + hw/cxl/meson.build | 3 +- hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + + hw/net/rtl8139.c | 4 + hw/usb/meson.build | 2 +- hw/virtio/meson.build | 6 +- target/arm/arm-qmp-cmds.c | 2 + target/arm/cpu.h | 2 + - target/arm/cpu64.c | 12 +- + target/arm/cpu64.c | 7 +- target/arm/tcg/cpu32.c | 2 + target/arm/tcg/cpu64.c | 8 ++ target/arm/tcg/meson.build | 2 +- - target/i386/cpu.c | 18 +++ + target/i386/cpu.c | 20 +++ target/riscv/cpu.c | 6 + target/s390x/cpu_models.c | 2 +- tests/qtest/arm-cpu-features.c | 4 + - 23 files changed, 317 insertions(+), 16 deletions(-) + 24 files changed, 323 insertions(+), 16 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak create mode 100644 configs/devices/riscv64-softmmu/riscv64-rh-devices.mak @@ -76,10 +87,10 @@ Merged commits (9.1.0): diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..dce5fca821 +index 0000000000..855278f70e --- /dev/null +++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -0,0 +1,47 @@ +@@ -0,0 +1,49 @@ +include ../rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y @@ -127,6 +138,8 @@ index 0000000000..dce5fca821 +CONFIG_IOMMUFD=y +CONFIG_VHOST_USER_SND=y +CONFIG_VHOST_USER_SCMI=y ++CONFIG_VHOST_USER_GPU=y ++CONFIG_UEFI_VARS=y diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak new file mode 100644 index 0000000000..94ede1b5f6 @@ -216,10 +229,10 @@ index 0000000000..834281d872 +CONFIG_IOMMUFD=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..8da1a8f82f +index 0000000000..828cb8aa6f --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -0,0 +1,115 @@ +@@ -0,0 +1,118 @@ +include ../rh-virtio.mak + +CONFIG_ACPI=y @@ -319,6 +332,7 @@ index 0000000000..8da1a8f82f +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VGA=y +CONFIG_VIRTIO_IOMMU=y ++CONFIG_AMD_IOMMU=y +CONFIG_VMMOUSE=y +CONFIG_VMPORT=y +CONFIG_VTD=y @@ -335,11 +349,13 @@ index 0000000000..8da1a8f82f +CONFIG_VHOST_USER_FS=y +CONFIG_IOMMUFD=y +CONFIG_VHOST_USER_SND=y ++CONFIG_VHOST_USER_GPU=y ++CONFIG_UEFI_VARS=y diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index a96452f17a..68bb983ecf 100644 +index ef6be3660f..b525e00365 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -3129,6 +3129,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3183,6 +3183,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) MachineClass *mc = MACHINE_CLASS(oc); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); static const char * const valid_cpu_types[] = { @@ -347,7 +363,7 @@ index a96452f17a..68bb983ecf 100644 #ifdef CONFIG_TCG ARM_CPU_TYPE_NAME("cortex-a7"), ARM_CPU_TYPE_NAME("cortex-a15"), -@@ -3144,8 +3145,11 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3198,8 +3199,11 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) ARM_CPU_TYPE_NAME("neoverse-n2"), #endif /* TARGET_AARCH64 */ #endif /* CONFIG_TCG */ @@ -374,10 +390,10 @@ index 3e375f61a9..613adb3ebb 100644 if_false: files( 'cxl-host-stubs.c', diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 818ff60d6f..2a1caca7f7 100644 +index a0f2709c69..8c3962443b 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -191,7 +191,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -191,7 +191,8 @@ static void piix3_ide_class_init(ObjectClass *klass, const void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -387,7 +403,7 @@ index 818ff60d6f..2a1caca7f7 100644 } static const TypeInfo piix3_ide_info = { -@@ -215,6 +216,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -215,6 +216,8 @@ static void piix4_ide_class_init(ObjectClass *klass, const void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -397,10 +413,10 @@ index 818ff60d6f..2a1caca7f7 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index fa0c549eb9..9aa92c1f76 100644 +index 71f5f976e9..794078ed84 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -950,6 +950,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -950,6 +950,8 @@ static void i8042_class_initfn(ObjectClass *klass, const void *data) dc->vmsd = &vmstate_kbd_isa; adevc->build_dev_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -410,10 +426,10 @@ index fa0c549eb9..9aa92c1f76 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 3d0b227703..6b96be40ef 100644 +index a80a7b0cdb..7eb5a3b19d 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1745,6 +1745,7 @@ static const E1000Info e1000_devices[] = { +@@ -1732,6 +1732,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -421,7 +437,7 @@ index 3d0b227703..6b96be40ef 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1757,6 +1758,7 @@ static const E1000Info e1000_devices[] = { +@@ -1744,6 +1745,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -429,6 +445,28 @@ index 3d0b227703..6b96be40ef 100644 }; static void e1000_register_types(void) +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 324fb932aa..f4dd693abb 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -57,6 +57,7 @@ + #include "system/dma.h" + #include "qemu/module.h" + #include "qemu/timer.h" ++#include "qemu/error-report.h" + #include "qemu/bswap.h" + #include "net/net.h" + #include "net/eth.h" +@@ -3363,6 +3364,9 @@ static void pci_rtl8139_realize(PCIDevice *dev, Error **errp) + DeviceState *d = DEVICE(dev); + uint8_t *pci_conf; + ++ warn_report("'rtl8139' is deprecated, " ++ "please use a different Network Interface Card"); ++ + pci_conf = dev->config; + pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */ + /* TODO: start of capability list, but no capability diff --git a/hw/usb/meson.build b/hw/usb/meson.build index 17360a5b5a..3c4fdfc31d 100644 --- a/hw/usb/meson.build @@ -443,10 +481,10 @@ index 17360a5b5a..3c4fdfc31d 100644 endif diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build -index 164f6fd995..43f9c477da 100644 +index 3ea7b3cec8..3102d68bec 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build -@@ -21,7 +21,8 @@ if have_vhost +@@ -22,7 +22,8 @@ if have_vhost system_virtio_ss.add(files('vhost-user-base.c')) # MMIO Stubs @@ -456,7 +494,7 @@ index 164f6fd995..43f9c477da 100644 system_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) -@@ -29,7 +30,8 @@ if have_vhost +@@ -30,7 +31,8 @@ if have_vhost system_virtio_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input.c')) # PCI Stubs @@ -467,10 +505,10 @@ index 164f6fd995..43f9c477da 100644 if_true: files('vhost-user-gpio-pci.c')) system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c -index 883c0a0e8c..dc06eb6778 100644 +index d292c974c4..9bb68866e1 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c -@@ -223,6 +223,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, +@@ -225,6 +225,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, static void arm_cpu_add_definition(gpointer data, gpointer user_data) { ObjectClass *oc = data; @@ -478,7 +516,7 @@ index 883c0a0e8c..dc06eb6778 100644 CpuDefinitionInfoList **cpu_list = user_data; CpuDefinitionInfo *info; const char *typename; -@@ -231,6 +232,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) +@@ -233,6 +234,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) info = g_malloc0(sizeof(*info)); info->name = cpu_model_from_type(typename); info->q_typename = g_strdup(typename); @@ -487,12 +525,12 @@ index 883c0a0e8c..dc06eb6778 100644 QAPI_LIST_PREPEND(*cpu_list, info); } diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index a8177c6c2e..6d1055a90c 100644 +index dc9b6dce4c..dc0da8b0ae 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h -@@ -35,6 +35,8 @@ - #define KVM_HAVE_MCE_INJECTION 1 - #endif +@@ -34,6 +34,8 @@ + #include "target/arm/gtimer.h" + #include "target/arm/cpu-sysregs.h" +#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" + @@ -500,10 +538,10 @@ index a8177c6c2e..6d1055a90c 100644 #define EXCP_SWI 2 /* software interrupt */ #define EXCP_PREFETCH_ABORT 3 diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 8188ede5cc..550232f362 100644 +index 26cf7e6dfa..051d5d653b 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c -@@ -675,6 +675,7 @@ static void aarch64_a57_initfn(Object *obj) +@@ -698,6 +698,7 @@ static void aarch64_a57_initfn(Object *obj) define_cortex_a72_a57_a53_cp_reginfo(cpu); } @@ -511,7 +549,7 @@ index 8188ede5cc..550232f362 100644 static void aarch64_a53_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -735,6 +736,7 @@ static void aarch64_a53_initfn(Object *obj) +@@ -759,6 +760,7 @@ static void aarch64_a53_initfn(Object *obj) cpu->gic_pribits = 5; define_cortex_a72_a57_a53_cp_reginfo(cpu); } @@ -519,7 +557,7 @@ index 8188ede5cc..550232f362 100644 static void aarch64_host_initfn(Object *obj) { -@@ -773,8 +775,11 @@ static void aarch64_max_initfn(Object *obj) +@@ -797,8 +799,11 @@ static void aarch64_max_initfn(Object *obj) } static const ARMCPUInfo aarch64_cpus[] = { @@ -532,39 +570,25 @@ index 8188ede5cc..550232f362 100644 { .name = "max", .initfn = aarch64_max_initfn }, #if defined(CONFIG_KVM) || defined(CONFIG_HVF) { .name = "host", .initfn = aarch64_host_initfn }, -@@ -845,8 +850,13 @@ static void aarch64_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void aarch64_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c -index 2c45b7eddd..09c5f3f74a 100644 +index a2a23eae0d..c362759d65 100644 --- a/target/arm/tcg/cpu32.c +++ b/target/arm/tcg/cpu32.c -@@ -120,6 +120,7 @@ void aa32_max_features(ARMCPU *cpu) - cpu->isar.id_dfr1 = t; +@@ -115,6 +115,7 @@ void aa32_max_features(ARMCPU *cpu) + FIELD_DP32_IDREG(isar, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */ } +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) -@@ -1078,3 +1079,4 @@ static void arm_tcg_cpu_register_types(void) +@@ -1084,3 +1085,4 @@ static void arm_tcg_cpu_register_types(void) type_init(arm_tcg_cpu_register_types) #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ +#endif /* disabled for RHEL */ diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c -index 29ab0ac79d..be3baf5fba 100644 +index 35cddbafa4..c7c464a0af 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -29,6 +29,7 @@ @@ -575,7 +599,7 @@ index 29ab0ac79d..be3baf5fba 100644 static void aarch64_a35_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -112,6 +113,7 @@ static void aarch64_a35_initfn(Object *obj) +@@ -113,6 +114,7 @@ static void aarch64_a35_initfn(Object *obj) /* These values are the same with A53/A57/A72. */ define_cortex_a72_a57_a53_cp_reginfo(cpu); } @@ -583,7 +607,7 @@ index 29ab0ac79d..be3baf5fba 100644 static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) -@@ -201,6 +203,7 @@ static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name, +@@ -199,6 +201,7 @@ static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name, static const Property arm_cpu_lpa2_property = DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true); @@ -591,7 +615,7 @@ index 29ab0ac79d..be3baf5fba 100644 static void aarch64_a55_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1074,6 +1077,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj) +@@ -1080,6 +1083,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj) aarch64_add_pauth_properties(obj); aarch64_add_sve_properties(obj); } @@ -599,7 +623,7 @@ index 29ab0ac79d..be3baf5fba 100644 /* * -cpu max: a CPU with as many features enabled as our emulation supports. -@@ -1299,6 +1303,7 @@ void aarch64_max_tcg_initfn(Object *obj) +@@ -1310,6 +1314,7 @@ void aarch64_max_tcg_initfn(Object *obj) qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); } @@ -607,7 +631,7 @@ index 29ab0ac79d..be3baf5fba 100644 static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, -@@ -1310,14 +1315,17 @@ static const ARMCPUInfo aarch64_cpus[] = { +@@ -1321,14 +1326,17 @@ static const ARMCPUInfo aarch64_cpus[] = { { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn }, { .name = "neoverse-n2", .initfn = aarch64_neoverse_n2_initfn }, }; @@ -619,28 +643,46 @@ index 29ab0ac79d..be3baf5fba 100644 size_t i; for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) { - aarch64_cpu_register(&aarch64_cpus[i]); + arm_cpu_register(&aarch64_cpus[i]); } +#endif } type_init(aarch64_cpu_register_types) diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build -index dd12ccedb1..545c2cc3a7 100644 +index 895facdc30..f1a9e01c51 100644 --- a/target/arm/tcg/meson.build +++ b/target/arm/tcg/meson.build -@@ -61,5 +61,5 @@ arm_system_ss.add(files( +@@ -53,7 +53,7 @@ arm_system_ss.add(files( 'psci.c', )) -arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) +#arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) + + arm_common_ss.add(zlib) diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 1b64ceaaba..0b09990a8f 100644 +index 6d85149e6e..9c756a05f2 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -2481,9 +2481,13 @@ static const CPUCaches epyc_genoa_cache_info = { +@@ -3163,6 +3163,7 @@ static const CPUCaches xeon_srf_cache_info = { + }, + }; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static const CPUCaches yongfeng_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + /* CPUID 0x4.0x0.EAX */ +@@ -3261,6 +3262,7 @@ static const CPUCaches yongfeng_cache_info = { + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, + }; ++#endif + + /* The following VMX features are not supported by KVM and are left out in the + * CPU definitions: +@@ -3290,9 +3292,13 @@ static const CPUCaches yongfeng_cache_info = { * PT in VMX operation */ @@ -654,7 +696,7 @@ index 1b64ceaaba..0b09990a8f 100644 .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -2502,6 +2506,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3311,6 +3317,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x8000000A, .model_id = "QEMU Virtual CPU version " QEMU_HW_VERSION, }, @@ -662,7 +704,7 @@ index 1b64ceaaba..0b09990a8f 100644 { .name = "phenom", .level = 5, -@@ -2866,8 +2871,10 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3679,8 +3686,10 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000008, .model_id = "Intel Core 2 Duo P9xxx (Penryn Class Core 2)", }, @@ -673,7 +715,7 @@ index 1b64ceaaba..0b09990a8f 100644 .level = 11, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2945,6 +2952,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3758,6 +3767,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Westmere", @@ -681,7 +723,7 @@ index 1b64ceaaba..0b09990a8f 100644 .level = 11, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -3026,6 +3034,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3839,6 +3849,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "SandyBridge", @@ -689,7 +731,7 @@ index 1b64ceaaba..0b09990a8f 100644 .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -3112,6 +3121,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3925,6 +3936,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "IvyBridge", @@ -697,7 +739,7 @@ index 1b64ceaaba..0b09990a8f 100644 .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -4711,6 +4721,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -5551,6 +5563,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Denverton", @@ -705,7 +747,7 @@ index 1b64ceaaba..0b09990a8f 100644 .level = 21, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -4821,6 +4832,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -5661,6 +5674,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Snowridge", @@ -713,7 +755,7 @@ index 1b64ceaaba..0b09990a8f 100644 .level = 27, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -5002,6 +5014,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -5842,6 +5856,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000008, .model_id = "Intel Xeon Phi Processor (Knights Mill)", }, @@ -721,7 +763,7 @@ index 1b64ceaaba..0b09990a8f 100644 { .name = "Opteron_G1", .level = 5, -@@ -5069,8 +5082,10 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -5909,8 +5924,10 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000008, .model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)", }, @@ -732,7 +774,7 @@ index 1b64ceaaba..0b09990a8f 100644 .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 21, -@@ -5103,6 +5118,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -5943,6 +5960,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G5", @@ -740,76 +782,68 @@ index 1b64ceaaba..0b09990a8f 100644 .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 21, -@@ -5498,6 +5514,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - .model_id = "AMD EPYC-Genoa Processor", - .cache_info = &epyc_genoa_cache_info, +@@ -6420,6 +6438,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } }, +#if 0 // Disabled for Red Hat Enterprise Linux { .name = "YongFeng", .level = 0x1F, -@@ -5622,6 +5639,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x80000008, - .model_id = "Zhaoxin YongFeng Processor", +@@ -6565,6 +6584,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } }, +#endif - }; - - /* + { + .name = "EPYC-Turin", + .level = 0xd, diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c -index 09ded6829a..1893ad2f0d 100644 +index d055ddf462..bca50a39be 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c -@@ -481,6 +481,7 @@ static void riscv_max_cpu_init(Object *obj) - #endif - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if defined(TARGET_RISCV64) - static void rv64_base_cpu_init(Object *obj) - { -@@ -831,6 +832,7 @@ static void rv32e_bare_cpu_init(Object *obj) - riscv_cpu_set_misa_ext(env, RVE); - } - #endif -+#endif /* disabled for RHEL */ - - static ObjectClass *riscv_cpu_class_by_name(const char *cpu_model) - { -@@ -2970,6 +2972,7 @@ static const Property riscv_cpu_properties[] = { - DEFINE_PROP_BOOL("x-misa-w", RISCVCPU, cfg.misa_w, false), +@@ -2035,6 +2035,7 @@ static const PropertyInfo prop_marchid = { + .set = prop_marchid_set, }; +#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if defined(TARGET_RISCV64) - static void rva22u64_profile_cpu_init(Object *obj) - { -@@ -2999,6 +3002,7 @@ static void rva23s64_profile_cpu_init(Object *obj) - RVA23S64.enabled = true; - } - #endif -+#endif /* disabled for RHEL */ + /* + * RVA22U64 defines some 'named features' that are cache + * related: Za64rs, Zic64b, Ziccif, Ziccrse, Ziccamoa +@@ -2143,12 +2144,15 @@ static RISCVCPUProfile RVA23S64 = { + RISCV_PROFILE_EXT_LIST_END + } + }; ++#endif - static const gchar *riscv_gdb_arch_name(CPUState *cs) - { -@@ -3236,6 +3240,7 @@ static const TypeInfo riscv_cpu_type_infos[] = { - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX, MXL_RV64, riscv_max_cpu_init), - #endif + RISCVCPUProfile *riscv_profiles[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + &RVA22U64, + &RVA22S64, + &RVA23U64, + &RVA23S64, ++#endif + NULL, + }; + +@@ -2993,6 +2997,7 @@ static const TypeInfo riscv_cpu_type_infos[] = { + .cfg.pmp_regions = 8 + ), +#if 0 /* Disabled for Red Hat Enterprise Linux */ #if defined(TARGET_RISCV32) || \ (defined(TARGET_RISCV64) && !defined(CONFIG_USER_ONLY)) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE32, MXL_RV32, rv32_base_cpu_init), -@@ -3271,6 +3276,7 @@ static const TypeInfo riscv_cpu_type_infos[] = { - DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23U64, MXL_RV64, rva23u64_profile_cpu_init), - DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23S64, MXL_RV64, rva23s64_profile_cpu_init), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_BASE32, TYPE_RISCV_DYNAMIC_CPU, +@@ -3287,6 +3292,7 @@ static const TypeInfo riscv_cpu_type_infos[] = { + DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23U64, TYPE_RISCV_CPU_RV64I, RVA23U64), + DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA23S64, TYPE_RISCV_CPU_RV64I, RVA23S64), #endif /* TARGET_RISCV64 */ +#endif /* disabled for RHEL */ }; DEFINE_TYPES(riscv_cpu_type_infos) diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 93a05e43d7..111d46a59a 100644 +index 954a7a99a9..fe29f5c5b7 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -72,7 +72,6 @@ static S390CPUDef s390_cpu_defs[] = { diff --git a/0006-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch index 005d537..310fcfc 100644 --- a/0006-Machine-type-related-general-changes.patch +++ b/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From db31addfd949b587e937dc39137fc93aa9596990 Mon Sep 17 00:00:00 2001 +From dcaeeab5909a41372c9445e9f97282e8dd3d1d44 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -20,6 +20,12 @@ Rebase notes (10.0.0): - Add riscv changes - Added upstream compat changes +Rebase notes (10.1.0 rc2): +- Remove downstream change to i8254 (review comment) + +Rebase notes (10.1.0): +- Added upstream compat changes from 10.1 + Merged commits (9.1.0): - 043ad5ce97 Add upstream compatibility bits (partial) - bfbdab5824 rhel 9.4.0 machine type compat for virtio-gpu migration @@ -30,23 +36,22 @@ Merged commits (10.0.0 rc0): - d93fcb3940 virtio-net: disable USO for all RHEL9 (partial) --- hw/acpi/piix4.c | 2 +- - hw/arm/virt.c | 3 +- - hw/core/machine.c | 133 +++++++++++++++++++++++++++++++++++ + hw/arm/virt.c | 2 +- + hw/core/machine.c | 145 +++++++++++++++++++++++++++++++++++ hw/i386/fw_cfg.c | 3 +- hw/net/rtl8139.c | 4 +- hw/riscv/virt.c | 4 +- - hw/smbios/smbios.c | 46 +++++++++++- - hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-xhci-pci.c | 55 +++++++++++---- + hw/smbios/smbios.c | 46 ++++++++++- + hw/usb/hcd-xhci-pci.c | 54 +++++++++---- hw/usb/hcd-xhci-pci.h | 1 + hw/virtio/virtio-mem.c | 3 +- - include/hw/boards.h | 28 ++++++++ + include/hw/boards.h | 31 ++++++++ include/hw/firmware/smbios.h | 4 +- include/hw/i386/pc.h | 3 + - 14 files changed, 265 insertions(+), 26 deletions(-) + 13 files changed, 277 insertions(+), 25 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 6d023e595b..8b8aa6b3aa 100644 +index 7a18f18dda..2b3678b8c6 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -245,7 +245,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) @@ -59,26 +64,25 @@ index 6d023e595b..8b8aa6b3aa 100644 .fields = (const VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 68bb983ecf..904596ae3b 100644 +index b525e00365..1800981317 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1715,7 +1715,8 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1755,7 +1755,7 @@ static void virt_build_smbios(VirtMachineState *vms) + product = "KVM Virtual Machine"; } - smbios_set_defaults("QEMU", product, -- vmc->smbios_old_sys_ver ? "1.0" : mc->name); -+ vmc->smbios_old_sys_ver ? "1.0" : mc->name, -+ NULL, NULL); +- smbios_set_defaults("QEMU", product, mc->name); ++ smbios_set_defaults("QEMU", product, mc->name, NULL, NULL); /* build the array of physical mem area from base_memmap */ mem_array.address = vms->memmap[VIRT_MEM].base; diff --git a/hw/core/machine.c b/hw/core/machine.c -index 63c6ef93d2..c219ae589c 100644 +index bd47527479..2a1a42cebc 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -298,6 +298,139 @@ GlobalProperty hw_compat_2_4[] = { +@@ -288,6 +288,151 @@ GlobalProperty hw_compat_2_6[] = { }; - const size_t hw_compat_2_4_len = G_N_ELEMENTS(hw_compat_2_4); + const size_t hw_compat_2_6_len = G_N_ELEMENTS(hw_compat_2_6); +/* + * RHEL only: machine types for previous major releases are deprecated @@ -86,6 +90,18 @@ index 63c6ef93d2..c219ae589c 100644 +const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_10_2[] = { ++ /* hw_compat_rhel_10_2 from hw_compat_10_0 */ ++ { "scsi-hd", "dpofua", "off" }, ++ /* hw_compat_rhel_10_2 from hw_compat_10_0 */ ++ { "vfio-pci", "x-migration-load-config-after-iter", "off" }, ++ /* hw_compat_rhel_10_2 from hw_compat_10_0 */ ++ { "ramfb", "use-legacy-x86-rom", "true"}, ++ /* hw_compat_rhel_10_2 from hw_compat_10_0 */ ++ { "vfio-pci-nohotplug", "use-legacy-x86-rom", "true" }, ++}; ++const size_t hw_compat_rhel_10_2_len = G_N_ELEMENTS(hw_compat_10_0); ++ +GlobalProperty hw_compat_rhel_10_1[] = { + /* hw_compat_rhel_10_1 from hw_compat_9_1 */ + { TYPE_PCI_DEVICE, "x-pcie-ext-tag", "false" }, @@ -231,10 +247,10 @@ index 5c0bcd5f8a..07df7281d2 100644 /* tell smbios about cpuid version and features */ diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 135ab57160..6c57a8985b 100644 +index f4dd693abb..ac3a7376ad 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3171,7 +3171,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3173,7 +3173,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -243,7 +259,7 @@ index 135ab57160..6c57a8985b 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3252,7 +3252,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3254,7 +3254,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -254,10 +270,10 @@ index 135ab57160..6c57a8985b 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c -index e517002fdf..b8d20575af 100644 +index 47e573f85a..ab5a9ec613 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c -@@ -1400,7 +1400,7 @@ static void virt_build_smbios(RISCVVirtState *s) +@@ -1402,7 +1402,7 @@ static void virt_build_smbios(RISCVVirtState *s) product = "KVM Virtual Machine"; } @@ -266,7 +282,7 @@ index e517002fdf..b8d20575af 100644 if (riscv_is_32bit(&s->soc[0])) { smbios_set_default_processor_family(0x200); -@@ -1919,7 +1919,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -1920,7 +1920,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) mc->desc = "RISC-V VirtIO board"; mc->init = virt_machine_init; mc->max_cpus = VIRT_CPUS_MAX; @@ -276,10 +292,10 @@ index e517002fdf..b8d20575af 100644 mc->no_cdrom = 1; mc->pci_allow_0_address = true; diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index ad4cd6721e..813b9b43ec 100644 +index 1ac063cfb4..03f7a00ed1 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -38,6 +38,10 @@ size_t usr_blobs_len; +@@ -39,6 +39,10 @@ size_t usr_blobs_len; static unsigned usr_table_max; static unsigned usr_table_cnt; @@ -290,7 +306,7 @@ index ad4cd6721e..813b9b43ec 100644 uint8_t *smbios_tables; size_t smbios_tables_len; unsigned smbios_table_max; -@@ -626,7 +630,7 @@ static void smbios_build_type_1_table(void) +@@ -627,7 +631,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -299,7 +315,7 @@ index ad4cd6721e..813b9b43ec 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -1014,15 +1018,51 @@ void smbios_set_default_processor_family(uint16_t processor_family) +@@ -1015,15 +1019,51 @@ void smbios_set_default_processor_family(uint16_t processor_family) } void smbios_set_defaults(const char *manufacturer, const char *product, @@ -353,21 +369,8 @@ index ad4cd6721e..813b9b43ec 100644 SMBIOS_SET_DEFAULT(type2.version, version); SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type3.version, version); -diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 29105afcc3..cef91c9a79 100644 ---- a/hw/timer/i8254_common.c -+++ b/hw/timer/i8254_common.c -@@ -229,7 +229,7 @@ static const VMStateDescription vmstate_pit_common = { - .pre_save = pit_dispatch_pre_save, - .post_load = pit_dispatch_post_load, - .fields = (const VMStateField[]) { -- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), -+ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ - VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, - vmstate_pit_channel, PITChannelState), - VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c -index d908eb787d..401a2734ed 100644 +index b93c80b09d..f4a2f1a1de 100644 --- a/hw/usb/hcd-xhci-pci.c +++ b/hw/usb/hcd-xhci-pci.c @@ -120,6 +120,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) @@ -446,7 +449,7 @@ index d908eb787d..401a2734ed 100644 if (s->msix != ON_OFF_AUTO_OFF) { /* TODO check for errors, and should fail when msix=on */ msix_init(dev, s->xhci.numintrs, -@@ -221,8 +245,11 @@ static const Property xhci_pci_properties[] = { +@@ -221,6 +245,8 @@ static const Property xhci_pci_properties[] = { DEFINE_PROP_ON_OFF_AUTO("msix", XHCIPciState, msix, ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("conditional-intr-mapping", XHCIPciState, conditional_intr_mapping, false), @@ -454,10 +457,7 @@ index d908eb787d..401a2734ed 100644 + DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true), }; -+ - static void xhci_class_init(ObjectClass *klass, void *data) - { - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + static void xhci_class_init(ObjectClass *klass, const void *data) diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h index 5b61ae8455..3170db064b 100644 --- a/hw/usb/hcd-xhci-pci.h @@ -471,10 +471,10 @@ index 5b61ae8455..3170db064b 100644 #endif diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c -index 5f57eccbb6..391d85c652 100644 +index c46f6f9c3e..1805597879 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c -@@ -1717,8 +1717,9 @@ static const Property virtio_mem_properties[] = { +@@ -1699,8 +1699,9 @@ static const Property virtio_mem_properties[] = { #endif DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, early_migration, true), @@ -486,13 +486,16 @@ index 5f57eccbb6..391d85c652 100644 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm, diff --git a/include/hw/boards.h b/include/hw/boards.h -index f22b2e7fc7..d7fa968dca 100644 +index f94713e6e2..a434b21909 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -848,4 +848,32 @@ extern const size_t hw_compat_2_5_len; - extern GlobalProperty hw_compat_2_4[]; - extern const size_t hw_compat_2_4_len; +@@ -863,4 +863,35 @@ extern const size_t hw_compat_2_7_len; + extern GlobalProperty hw_compat_2_6[]; + extern const size_t hw_compat_2_6_len; ++extern GlobalProperty hw_compat_rhel_10_2[]; ++extern const size_t hw_compat_rhel_10_2_len; ++ +extern GlobalProperty hw_compat_rhel_10_1[]; +extern const size_t hw_compat_rhel_10_1_len; + @@ -538,7 +541,7 @@ index f066ab7262..e805d25fbe 100644 uint8_t *smbios_get_table_legacy(size_t *length, Error **errp); void smbios_get_tables(MachineState *ms, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 103b54301f..e4d32f8aea 100644 +index 79b72c54dd..3b4ea24c20 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -103,6 +103,9 @@ struct PCMachineClass { diff --git a/0007-meson-temporarily-disable-Wunused-function.patch b/0007-meson-temporarily-disable-Wunused-function.patch index c87b164..c0313b5 100644 --- a/0007-meson-temporarily-disable-Wunused-function.patch +++ b/0007-meson-temporarily-disable-Wunused-function.patch @@ -1,4 +1,4 @@ -From 6871c5dd1655c578d9605015b2f88cba38715767 Mon Sep 17 00:00:00 2001 +From 7eff7b32584a50d73053b4e3e007621b14ebf766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 3 Jul 2024 13:32:32 +0100 Subject: meson: temporarily disable -Wunused-function @@ -20,10 +20,10 @@ Rebase notes (9.1.0) 1 file changed, 1 insertion(+) diff --git a/meson.build b/meson.build -index 41f68d3806..066ca7a2c9 100644 +index 50c774a195..0e0120a613 100644 --- a/meson.build +++ b/meson.build -@@ -747,6 +747,7 @@ warn_flags = [ +@@ -757,6 +757,7 @@ warn_flags = [ '-Wno-string-plus-int', '-Wno-tautological-type-limit-compare', '-Wno-typedef-redefinition', diff --git a/0008-Remove-upstream-machine-types-for-aarch64-s390x-and-.patch b/0008-Remove-upstream-machine-types-for-aarch64-s390x-and-.patch index 59cdaa0..86d7669 100644 --- a/0008-Remove-upstream-machine-types-for-aarch64-s390x-and-.patch +++ b/0008-Remove-upstream-machine-types-for-aarch64-s390x-and-.patch @@ -1,4 +1,4 @@ -From af51df70a29fec7ae9e8bb64006dab26487c1a35 Mon Sep 17 00:00:00 2001 +From fca16c8b4612edfa63d7882379897a9907dde738 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 10 Jul 2024 02:25:51 -0400 Subject: Remove upstream machine types for aarch64, s390x and x86_64 @@ -19,31 +19,31 @@ Rebase notes (9.1.0): hw/arm/virt.c | 2 ++ hw/i386/pc_piix.c | 2 ++ hw/i386/pc_q35.c | 2 ++ - hw/s390x/s390-virtio-ccw.c | 2 ++ - 4 files changed, 8 insertions(+) + hw/s390x/s390-virtio-ccw.c | 3 +++ + 4 files changed, 9 insertions(+) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 904596ae3b..6d5ea31e46 100644 +index 1800981317..e6e98fef1c 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -3413,6 +3413,7 @@ static void machvirt_machine_init(void) +@@ -3459,6 +3459,7 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void virt_machine_10_0_options(MachineClass *mc) + static void virt_machine_10_1_options(MachineClass *mc) { } -@@ -3677,3 +3678,4 @@ static void virt_machine_2_6_options(MachineClass *mc) - vmc->no_pmu = true; +@@ -3634,3 +3635,4 @@ static void virt_machine_4_1_options(MachineClass *mc) + mc->auto_enable_numa_with_memhp = false; } - DEFINE_VIRT_MACHINE(2, 6) + DEFINE_VIRT_MACHINE(4, 1) +#endif /* disabled for RHEL */ diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 6c91e2d292..1b58988c9a 100644 +index c03324281b..acf010e20f 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -452,6 +452,7 @@ static void pc_i440fx_init(MachineState *machine) +@@ -475,6 +475,7 @@ static void pc_i440fx_init(MachineState *machine) #define DEFINE_I440FX_MACHINE_AS_LATEST(major, minor) \ DEFINE_PC_VER_MACHINE(pc_i440fx, "pc-i440fx", pc_i440fx_init, true, "pc", major, minor); @@ -51,51 +51,52 @@ index 6c91e2d292..1b58988c9a 100644 static void pc_i440fx_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -794,6 +795,7 @@ static void pc_i440fx_machine_2_4_options(MachineClass *m) +@@ -802,6 +803,7 @@ static void pc_i440fx_machine_2_6_options(MachineClass *m) } - DEFINE_I440FX_MACHINE(2, 4); + DEFINE_I440FX_MACHINE(2, 6); +#endif /* Disabled for Red Hat Enterprise Linux */ #ifdef CONFIG_ISAPC static void isapc_machine_options(MachineClass *m) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index fd96d0345c..97a40a3a9c 100644 +index b309b2b378..2203ffd67e 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -361,6 +361,7 @@ static void pc_q35_machine_options(MachineClass *m) +@@ -374,6 +374,7 @@ static void pc_q35_machine_options(MachineClass *m) pc_q35_compat_defaults, pc_q35_compat_defaults_len); } +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void pc_q35_machine_10_0_options(MachineClass *m) + static void pc_q35_machine_10_1_options(MachineClass *m) { pc_q35_machine_options(m); -@@ -689,3 +690,4 @@ static void pc_q35_machine_2_4_options(MachineClass *m) +@@ -685,3 +686,4 @@ static void pc_q35_machine_2_6_options(MachineClass *m) } - DEFINE_Q35_MACHINE(2, 4); + DEFINE_Q35_MACHINE(2, 6); +#endif /* Disabled for Red Hat Enterprise Linux */ diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 75b32182eb..5aa5910399 100644 +index a79bd13275..2fca2bcf4d 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -921,6 +921,7 @@ static const TypeInfo ccw_machine_info = { +@@ -911,6 +911,7 @@ static const TypeInfo ccw_machine_info = { DEFINE_CCW_MACHINE_IMPL(false, major, minor) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_10_0_instance_options(MachineState *machine) + static void ccw_machine_10_1_instance_options(MachineState *machine) { } -@@ -1295,6 +1296,7 @@ static void ccw_machine_2_9_class_options(MachineClass *mc) - DEFINE_CCW_MACHINE(2, 9); +@@ -1167,6 +1168,8 @@ static void ccw_machine_4_2_class_options(MachineClass *mc) + } + DEFINE_CCW_MACHINE(4, 2); - #endif +#endif /* disabled for RHEL */ - ++ static void ccw_machine_register_types(void) { + type_register_static(&ccw_machine_info); -- 2.39.3 diff --git a/0009-Adapt-versioned-machine-type-macros-for-RHEL.patch b/0009-Adapt-versioned-machine-type-macros-for-RHEL.patch index fea50ed..f0ccb30 100644 --- a/0009-Adapt-versioned-machine-type-macros-for-RHEL.patch +++ b/0009-Adapt-versioned-machine-type-macros-for-RHEL.patch @@ -1,4 +1,4 @@ -From d74a60788160bf1cefe391430bb7ef2f2bd0d29c Mon Sep 17 00:00:00 2001 +From 0bc1a45f789d1aa6b75496739a77dbc77fc2492c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 3 Jul 2024 15:27:03 +0100 Subject: Adapt versioned machine type macros for RHEL @@ -19,19 +19,38 @@ The versioned machine type macros are changed thus: Signed-off-by: Daniel P. Berrangé Signed-off-by: Miroslav Rezanina +--- +Rebase changes (10.1.0 rc0): +- Added deprecetion limits change to docs/conf.py --- .distro/Makefile | 2 +- .distro/Makefile.common | 1 + .distro/qemu-kvm.spec.template | 1 + - .distro/scripts/process-patches.sh | 3 ++ - include/hw/boards.h | 60 ++++++++++-------------------- + .distro/scripts/process-patches.sh | 3 +++ + docs/conf.py | 4 +-- + include/hw/boards.h | 39 ++++++++++++------------------ meson.build | 1 + - meson_options.txt | 2 + - scripts/meson-buildoptions.sh | 2 + - 8 files changed, 30 insertions(+), 42 deletions(-) + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 2 ++ + 9 files changed, 29 insertions(+), 26 deletions(-) +diff --git a/docs/conf.py b/docs/conf.py +index f892a6e1da..0b8861e4bf 100644 +--- a/docs/conf.py ++++ b/docs/conf.py +@@ -140,8 +140,8 @@ + # MACHINE_VER_DELETION_MAJOR & MACHINE_VER_DEPRECATION_MAJOR + # defined in include/hw/boards.h and the introductory text in + # docs/about/deprecated.rst +-ver_machine_deprecation_version = "%d.%d.0" % (major - 3, minor) +-ver_machine_deletion_version = "%d.%d.0" % (major - 6, minor) ++ver_machine_deprecation_version = "%d.%d.0" % (major - 1, minor) ++ver_machine_deletion_version = "%d.%d.0" % (major - 2, minor) + + # The language for content autogenerated by Sphinx. Refer to documentation + # for a list of supported languages. diff --git a/include/hw/boards.h b/include/hw/boards.h -index d7fa968dca..1f6bba7d64 100644 +index a434b21909..da2fc92ce8 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -577,16 +577,16 @@ struct MachineState { @@ -76,15 +95,9 @@ index d7fa968dca..1f6bba7d64 100644 #define MACHINE_VER_SYM(sym, prefix, ...) \ _MACHINE_VER_PICK(__VA_ARGS__, \ -@@ -634,26 +634,22 @@ struct MachineState { - - - /* -- * How many years/major releases for each phase -- * of the life cycle. Assumes use of versioning -- * scheme where major is bumped each year -+ * How many RHEL major releases for each phase -+ * of the life cycle. +@@ -642,17 +642,16 @@ struct MachineState { + * and ver_machine_deletion_version logic in docs/conf.py and + * the text in docs/about/deprecated.rst */ -#define MACHINE_VER_DELETION_MAJOR 6 -#define MACHINE_VER_DEPRECATION_MAJOR 3 @@ -102,15 +115,23 @@ index d7fa968dca..1f6bba7d64 100644 + "machines from the previous RHEL major release are " \ + "subject to deletion in the next RHEL major release" + #define _MACHINE_VER_IS_CURRENT_EXPIRED(cutoff, major, minor) \ + (((QEMU_VERSION_MAJOR - major) > cutoff) || \ +@@ -683,12 +682,7 @@ struct MachineState { + * If this ever changes the logic below will need modifying.... + */ #define _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor) \ -- (((QEMU_VERSION_MAJOR - major) > cutoff) || \ -- (((QEMU_VERSION_MAJOR - major) == cutoff) && \ -- (QEMU_VERSION_MINOR - minor) >= 0)) +- ((QEMU_VERSION_MICRO < 50 && \ +- _MACHINE_VER_IS_CURRENT_EXPIRED(cutoff, major, minor)) || \ +- (QEMU_VERSION_MICRO >= 50 && QEMU_VERSION_MINOR < 2 && \ +- _MACHINE_VER_IS_NEXT_MINOR_EXPIRED(cutoff, major, minor)) || \ +- (QEMU_VERSION_MICRO >= 50 && QEMU_VERSION_MINOR == 2 && \ +- _MACHINE_VER_IS_NEXT_MAJOR_EXPIRED(cutoff, major, minor))) + ((RHEL_VERSION - major) >= cutoff) #define _MACHINE_VER_IS_EXPIRED2(cutoff, major, minor) \ _MACHINE_VER_IS_EXPIRED_IMPL(cutoff, major, minor) -@@ -715,32 +711,14 @@ struct MachineState { +@@ -750,10 +744,9 @@ struct MachineState { * This must be unconditionally used in the register * method for all machine types which support versioning. * @@ -118,40 +139,17 @@ index d7fa968dca..1f6bba7d64 100644 - * suitable period of time has passed, it will cause - * execution of the method to return, avoiding registration - * of the machine -- * -- * The new deprecation and deletion policy for versioned -- * machine types was introduced in QEMU 9.1.0. -- * -- * Under the new policy a number of old machine types (any -- * prior to 2.12) would be liable for immediate deletion -- * which would be a violation of our historical deprecation -- * and removal policy -- * -- * Thus deletions are temporarily gated on existance of -- * the env variable "QEMU_DELETE_MACHINES" / QEMU version -- * number >= 10.1.0. This gate can be deleted in the 10.1.0 -- * dev cycle + * It will automatically avoid registration of machines + * that should have been deleted at the start of this + * RHEL release */ #define MACHINE_VER_DELETION(...) \ do { \ - if (MACHINE_VER_SHOULD_DELETE(__VA_ARGS__)) { \ -- if (getenv("QEMU_DELETE_MACHINES") || \ -- QEMU_VERSION_MAJOR > 10 || (QEMU_VERSION_MAJOR == 10 && \ -- QEMU_VERSION_MINOR >= 1)) { \ -- return; \ -- } \ -+ return; \ - } \ - } while (0) - diff --git a/meson.build b/meson.build -index 066ca7a2c9..38ad60fc10 100644 +index 0e0120a613..23494666d9 100644 --- a/meson.build +++ b/meson.build -@@ -2594,6 +2594,7 @@ config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version())) +@@ -2636,6 +2636,7 @@ config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version())) config_host_data.set('QEMU_VERSION_MAJOR', meson.project_version().split('.')[0]) config_host_data.set('QEMU_VERSION_MINOR', meson.project_version().split('.')[1]) config_host_data.set('QEMU_VERSION_MICRO', meson.project_version().split('.')[2]) @@ -160,7 +158,7 @@ index 066ca7a2c9..38ad60fc10 100644 config_host_data.set_quoted('CONFIG_HOST_DSOSUF', host_dsosuf) config_host_data.set('HAVE_HOST_BLOCK_DEVICE', have_host_block_device) diff --git a/meson_options.txt b/meson_options.txt -index 59d973bca0..ad6996178c 100644 +index fff1521e58..f45d7ded45 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -2,6 +2,8 @@ @@ -173,7 +171,7 @@ index 59d973bca0..ad6996178c 100644 description: 'Suffix for QEMU data/modules/config directories (can be empty)') option('docdir', type : 'string', value : 'share/doc', diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh -index 3e8e00852b..e9edc8a919 100644 +index 0ebe6bc52a..4146dbc88d 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -75,6 +75,7 @@ meson_options_help() { @@ -184,7 +182,7 @@ index 3e8e00852b..e9edc8a919 100644 printf "%s\n" ' --smbd=VALUE Path to smbd for slirp networking' printf "%s\n" ' --sysconfdir=VALUE Sysconf data directory [etc]' printf "%s\n" ' --tls-priority=VALUE Default TLS protocol/cipher priority string' -@@ -464,6 +465,7 @@ _meson_option_parse() { +@@ -465,6 +466,7 @@ _meson_option_parse() { --disable-relocatable) printf "%s" -Drelocatable=false ;; --enable-replication) printf "%s" -Dreplication=enabled ;; --disable-replication) printf "%s" -Dreplication=disabled ;; diff --git a/0010-Increase-deletion-schedule-to-4-releases.patch b/0010-Increase-deletion-schedule-to-4-releases.patch index 047cf63..51b1741 100644 --- a/0010-Increase-deletion-schedule-to-4-releases.patch +++ b/0010-Increase-deletion-schedule-to-4-releases.patch @@ -1,4 +1,4 @@ -From 4d25fc36b02293e78f93ce3512fe485d799fa8f5 Mon Sep 17 00:00:00 2001 +From 7c4955a929940701a7613fe3db516adbcc97576b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 3 Jul 2024 18:45:58 +0100 Subject: Increase deletion schedule to 4 releases @@ -20,12 +20,12 @@ Rebase notes (9.1.0) 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hw/boards.h b/include/hw/boards.h -index 1f6bba7d64..182c11dc2c 100644 +index da2fc92ce8..aca254ea18 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -637,7 +637,7 @@ struct MachineState { - * How many RHEL major releases for each phase - * of the life cycle. +@@ -642,7 +642,7 @@ struct MachineState { + * and ver_machine_deletion_version logic in docs/conf.py and + * the text in docs/about/deprecated.rst */ -#define MACHINE_VER_DELETION_MAJOR 2 +#define MACHINE_VER_DELETION_MAJOR 4 diff --git a/0011-Add-downstream-aarch64-versioned-virt-machine-types.patch b/0011-Add-downstream-aarch64-versioned-virt-machine-types.patch index d6469f6..ce712b7 100644 --- a/0011-Add-downstream-aarch64-versioned-virt-machine-types.patch +++ b/0011-Add-downstream-aarch64-versioned-virt-machine-types.patch @@ -1,4 +1,4 @@ -From 9da015b8759b082330459277058c014e71bff62b Mon Sep 17 00:00:00 2001 +From 59b7559dfa658e325e8c1e23f13f48cf6952ac2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 3 Jul 2024 13:25:47 +0100 Subject: Add downstream aarch64 versioned 'virt' machine types @@ -20,6 +20,9 @@ Rebase notes (10.0.0) - Set no_nested_smmu - Use upstream compat +Rebase notes (10.1.0): +- Use rebase compat + Merged patches (9.1.0): - 043ad5ce97 Add upstream compatibility bits (partial) @@ -32,15 +35,15 @@ Merged patches (10.0.0 rc0): - d93fcb3940 virtio-net: disable USO for all RHEL9 (partial) - 0440f3d003 arm: disable pauth for virt-rhel9* in RHEL10 --- - hw/arm/virt.c | 152 ++++++++++++++++++++++++++++++++++++------ + hw/arm/virt.c | 154 +++++++++++++++++++++++++++++++++++++----- include/hw/arm/virt.h | 1 + - 2 files changed, 134 insertions(+), 19 deletions(-) + 2 files changed, 137 insertions(+), 18 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6d5ea31e46..12bf754b6a 100644 +index e6e98fef1c..d37d1bb3cf 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -93,6 +93,32 @@ static GlobalProperty arm_virt_compat[] = { +@@ -97,6 +97,32 @@ static GlobalProperty arm_virt_compat[] = { }; static const size_t arm_virt_compat_len = G_N_ELEMENTS(arm_virt_compat); @@ -73,7 +76,7 @@ index 6d5ea31e46..12bf754b6a 100644 /* * This cannot be called from the virt_machine_class_init() because * TYPE_VIRT_MACHINE is abstract and mc->compat_props g_ptr_array_new() -@@ -102,6 +128,8 @@ static void arm_virt_compat_set(MachineClass *mc) +@@ -106,6 +132,8 @@ static void arm_virt_compat_set(MachineClass *mc) { compat_props_add(mc->compat_props, arm_virt_compat, arm_virt_compat_len); @@ -82,7 +85,7 @@ index 6d5ea31e46..12bf754b6a 100644 } #define DEFINE_VIRT_MACHINE_IMPL(latest, ...) \ -@@ -112,10 +140,11 @@ static void arm_virt_compat_set(MachineClass *mc) +@@ -116,10 +144,11 @@ static void arm_virt_compat_set(MachineClass *mc) MachineClass *mc = MACHINE_CLASS(oc); \ arm_virt_compat_set(mc); \ MACHINE_VER_SYM(options, virt, __VA_ARGS__)(mc); \ @@ -95,7 +98,7 @@ index 6d5ea31e46..12bf754b6a 100644 } \ } \ static const TypeInfo MACHINE_VER_SYM(info, virt, __VA_ARGS__) = \ -@@ -131,10 +160,10 @@ static void arm_virt_compat_set(MachineClass *mc) +@@ -135,10 +164,10 @@ static void arm_virt_compat_set(MachineClass *mc) } \ type_init(MACHINE_VER_SYM(register, virt, __VA_ARGS__)); @@ -110,20 +113,23 @@ index 6d5ea31e46..12bf754b6a 100644 /* Number of external interrupt lines to configure the GIC with */ -@@ -1708,14 +1737,21 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1746,16 +1775,26 @@ static void virt_build_smbios(VirtMachineState *vms) + { + MachineClass *mc = MACHINE_GET_CLASS(vms); + MachineState *ms = MACHINE(vms); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); uint8_t *smbios_tables, *smbios_anchor; size_t smbios_tables_len, smbios_anchor_len; struct smbios_phys_mem_area mem_array; + const char *manufacturer = "QEMU"; const char *product = "QEMU Virtual Machine"; -+ const char *version = vmc->smbios_old_sys_ver ? "1.0" : mc->name; ++ const char *version = mc->name; if (kvm_enabled()) { product = "KVM Virtual Machine"; } -- smbios_set_defaults("QEMU", product, -- vmc->smbios_old_sys_ver ? "1.0" : mc->name, +- smbios_set_defaults("QEMU", product, mc->name, NULL, NULL); + if (!vmc->manufacturer_product_compat) { + manufacturer = "Red Hat"; + product = "KVM"; @@ -131,10 +137,11 @@ index 6d5ea31e46..12bf754b6a 100644 + } + + smbios_set_defaults(manufacturer, product, version, - NULL, NULL); ++ NULL, NULL); /* build the array of physical mem area from base_memmap */ -@@ -2464,6 +2500,7 @@ static void machvirt_init(MachineState *machine) + mem_array.address = vms->memmap[VIRT_MEM].base; +@@ -2517,6 +2556,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -142,7 +149,7 @@ index 6d5ea31e46..12bf754b6a 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2491,6 +2528,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2544,6 +2584,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -150,7 +157,7 @@ index 6d5ea31e46..12bf754b6a 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2506,6 +2544,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) +@@ -2559,6 +2600,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) vms->highmem = value; } @@ -158,7 +165,7 @@ index 6d5ea31e46..12bf754b6a 100644 static bool virt_get_compact_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2519,6 +2558,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) +@@ -2572,6 +2614,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) vms->highmem_compact = value; } @@ -166,7 +173,7 @@ index 6d5ea31e46..12bf754b6a 100644 static bool virt_get_highmem_redists(Object *obj, Error **errp) { -@@ -2611,6 +2651,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) +@@ -2664,6 +2707,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } @@ -174,7 +181,7 @@ index 6d5ea31e46..12bf754b6a 100644 static bool virt_get_dtb_randomness(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2624,6 +2665,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +@@ -2677,6 +2721,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) vms->dtb_randomness = value; } @@ -182,7 +189,7 @@ index 6d5ea31e46..12bf754b6a 100644 static char *virt_get_oem_id(Object *obj, Error **errp) { -@@ -2707,6 +2749,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2760,6 +2805,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -190,7 +197,7 @@ index 6d5ea31e46..12bf754b6a 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2720,6 +2763,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2773,6 +2819,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -198,7 +205,7 @@ index 6d5ea31e46..12bf754b6a 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -3160,16 +3204,16 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3213,16 +3260,16 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) NULL }; @@ -221,7 +228,7 @@ index 6d5ea31e46..12bf754b6a 100644 machine_class_allow_dynamic_sysbus_dev(mc, TYPE_UEFI_VARS_SYSBUS); #ifdef CONFIG_TPM machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); -@@ -3181,11 +3225,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3234,11 +3281,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) mc->minimum_page_bits = 12; mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; mc->cpu_index_to_instance_props = virt_cpu_index_to_props; @@ -234,7 +241,7 @@ index 6d5ea31e46..12bf754b6a 100644 mc->valid_cpu_types = valid_cpu_types; mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; mc->kvm_type = virt_kvm_type; -@@ -3210,6 +3250,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3263,6 +3306,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) NULL, NULL); object_class_property_set_description(oc, "acpi", "Enable ACPI"); @@ -242,7 +249,7 @@ index 6d5ea31e46..12bf754b6a 100644 object_class_property_add_bool(oc, "secure", virt_get_secure, virt_set_secure); object_class_property_set_description(oc, "secure", -@@ -3222,6 +3263,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3275,6 +3319,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) "Set on/off to enable/disable emulating a " "guest CPU which implements the ARM " "Virtualization Extensions"); @@ -250,7 +257,7 @@ index 6d5ea31e46..12bf754b6a 100644 object_class_property_add_bool(oc, "highmem", virt_get_highmem, virt_set_highmem); -@@ -3229,12 +3271,14 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3282,12 +3327,14 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) "Set on/off to enable/disable using " "physical address space above 32 bits"); @@ -265,7 +272,7 @@ index 6d5ea31e46..12bf754b6a 100644 object_class_property_add_bool(oc, "highmem-redists", virt_get_highmem_redists, -@@ -3270,7 +3314,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3323,7 +3370,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) virt_set_gic_version); object_class_property_set_description(oc, "gic-version", "Set GIC version. " @@ -274,7 +281,7 @@ index 6d5ea31e46..12bf754b6a 100644 object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); object_class_property_set_description(oc, "iommu", -@@ -3290,11 +3334,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3343,11 +3390,13 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) "Set on/off to enable/disable reporting host memory errors " "to a KVM guest using ACPI and guest external abort exceptions"); @@ -288,7 +295,7 @@ index 6d5ea31e46..12bf754b6a 100644 object_class_property_add_bool(oc, "its", virt_get_its, virt_set_its); -@@ -3302,6 +3348,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3355,6 +3404,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) "Set on/off to enable/disable " "ITS instantiation"); @@ -296,7 +303,7 @@ index 6d5ea31e46..12bf754b6a 100644 object_class_property_add_bool(oc, "dtb-randomness", virt_get_dtb_randomness, virt_set_dtb_randomness); -@@ -3314,6 +3361,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -3367,6 +3417,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) virt_set_dtb_randomness); object_class_property_set_description(oc, "dtb-kaslr-seed", "Deprecated synonym of dtb-randomness"); @@ -304,9 +311,9 @@ index 6d5ea31e46..12bf754b6a 100644 object_class_property_add_str(oc, "x-oem-id", virt_get_oem_id, -@@ -3679,3 +3727,69 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3636,3 +3687,70 @@ static void virt_machine_4_1_options(MachineClass *mc) } - DEFINE_VIRT_MACHINE(2, 6) + DEFINE_VIRT_MACHINE(4, 1) #endif /* disabled for RHEL */ + +static void virt_rhel_machine_10_0_0_options(MachineClass *mc) @@ -315,6 +322,7 @@ index 6d5ea31e46..12bf754b6a 100644 + + /* QEMU 9.1 and earlier have only a stage-1 SMMU, not a nested s1+2 one */ + vmc->no_nested_smmu = true; ++ compat_props_add(mc->compat_props, hw_compat_rhel_10_2, hw_compat_rhel_10_2_len); + compat_props_add(mc->compat_props, hw_compat_rhel_10_1, hw_compat_rhel_10_1_len); +} +DEFINE_VIRT_MACHINE_AS_LATEST(10, 0, 0) @@ -375,10 +383,10 @@ index 6d5ea31e46..12bf754b6a 100644 +} +DEFINE_VIRT_MACHINE(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index c8e94e6aed..26cfdf1d41 100644 +index 365a28b082..94c79d6c6d 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -135,6 +135,7 @@ struct VirtMachineClass { +@@ -132,6 +132,7 @@ struct VirtMachineClass { bool no_tcg_lpa2; bool no_ns_el2_virt_timer_irq; bool no_nested_smmu; diff --git a/0012-Add-downstream-s390x-versioned-s390-ccw-virtio-machi.patch b/0012-Add-downstream-s390x-versioned-s390-ccw-virtio-machi.patch index cf21b6b..1f3a1b0 100644 --- a/0012-Add-downstream-s390x-versioned-s390-ccw-virtio-machi.patch +++ b/0012-Add-downstream-s390x-versioned-s390-ccw-virtio-machi.patch @@ -1,4 +1,4 @@ -From 68460abb11ff9a65c7e9d988609954c2845d32e4 Mon Sep 17 00:00:00 2001 +From 0976e78ca34a38fbc21c71c2f05e884c0264b62e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 3 Jul 2024 13:44:36 +0100 Subject: Add downstream s390x versioned 's390-ccw-virtio' machine types @@ -14,6 +14,9 @@ Rebase notes (10.0.0): - Use upstream compat - Disabled relaxed-translation for older types +Rebase notes (10.1.0): +- Use rebase compat + Merged patches (9.1.0): - 043ad5ce97 Add upstream compatibility bits (partial) - 04596b496e s390x: remove deprecated rhel machine types @@ -24,17 +27,17 @@ Merged patches (10.0.0 rc0): - 926a9d0ca2 redhat: Add rhel9.6.0 and rhel10.0.0 machine types - d93fcb3940 virtio-net: disable USO for all RHEL9 (partial) --- - hw/s390x/s390-virtio-ccw.c | 103 +++++++++++++++++++++++++++++-- + hw/s390x/s390-virtio-ccw.c | 106 +++++++++++++++++++++++++++++-- target/s390x/cpu_models.c | 11 ++++ target/s390x/cpu_models.h | 2 + target/s390x/cpu_models_system.c | 2 + - 4 files changed, 113 insertions(+), 5 deletions(-) + 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 5aa5910399..59b545740e 100644 +index 2fca2bcf4d..9be423858d 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -696,6 +696,7 @@ static void s390_nmi(NMIState *n, int cpu_index, Error **errp) +@@ -716,6 +716,7 @@ static void s390_nmi(NMIState *n, int cpu_index, Error **errp) s390_cpu_restart(S390_CPU(cs)); } @@ -42,7 +45,7 @@ index 5aa5910399..59b545740e 100644 static ram_addr_t s390_fixup_ram_size(ram_addr_t sz) { /* same logic as in sclp.c */ -@@ -715,6 +716,7 @@ static ram_addr_t s390_fixup_ram_size(ram_addr_t sz) +@@ -735,6 +736,7 @@ static ram_addr_t s390_fixup_ram_size(ram_addr_t sz) } return newsz; } @@ -50,7 +53,7 @@ index 5aa5910399..59b545740e 100644 static inline bool machine_get_aes_key_wrap(Object *obj, Error **errp) { -@@ -893,7 +895,7 @@ static const TypeInfo ccw_machine_info = { +@@ -883,7 +885,7 @@ static const TypeInfo ccw_machine_info = { { \ MachineClass *mc = MACHINE_CLASS(oc); \ MACHINE_VER_SYM(class_options, ccw, __VA_ARGS__)(mc); \ @@ -59,7 +62,7 @@ index 5aa5910399..59b545740e 100644 mc->init = MACHINE_VER_SYM(mach_init, ccw, __VA_ARGS__); \ MACHINE_VER_DEPRECATION(__VA_ARGS__); \ if (latest) { \ -@@ -914,11 +916,11 @@ static const TypeInfo ccw_machine_info = { +@@ -904,11 +906,11 @@ static const TypeInfo ccw_machine_info = { } \ type_init(MACHINE_VER_SYM(register, ccw, __VA_ARGS__)) @@ -75,8 +78,8 @@ index 5aa5910399..59b545740e 100644 #if 0 /* Disabled for Red Hat Enterprise Linux */ -@@ -1298,6 +1300,97 @@ DEFINE_CCW_MACHINE(2, 9); - #endif +@@ -1170,6 +1172,100 @@ DEFINE_CCW_MACHINE(4, 2); + #endif /* disabled for RHEL */ +static void ccw_rhel_machine_10_0_0_instance_options(MachineState *machine) @@ -85,12 +88,15 @@ index 5aa5910399..59b545740e 100644 + +static void ccw_rhel_machine_10_0_0_class_options(MachineClass *mc) +{ ++ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); + static GlobalProperty compat[] = { + { TYPE_S390_PCI_DEVICE, "relaxed-translation", "off", }, + }; + + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ compat_props_add(mc->compat_props, hw_compat_rhel_10_2, hw_compat_rhel_10_2_len); + compat_props_add(mc->compat_props, hw_compat_rhel_10_1, hw_compat_rhel_10_1_len); ++ s390mc->use_cpi = false; +} +DEFINE_CCW_MACHINE_AS_LATEST(10, 0, 0); + @@ -174,7 +180,7 @@ index 5aa5910399..59b545740e 100644 { type_register_static(&ccw_machine_info); diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 111d46a59a..156bcf0d22 100644 +index fe29f5c5b7..2a7fc949a4 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -47,6 +47,9 @@ @@ -187,8 +193,8 @@ index 111d46a59a..156bcf0d22 100644 static S390CPUDef s390_cpu_defs[] = { /* * Linux requires at least z10 nowadays, and IBM only supports recent CPUs -@@ -932,22 +935,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) - static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) +@@ -931,22 +934,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, const void *data) + static void s390_base_cpu_model_class_init(ObjectClass *oc, const void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); + CPUClass *cc = CPU_CLASS(oc); @@ -203,7 +209,7 @@ index 111d46a59a..156bcf0d22 100644 + } } - static void s390_cpu_model_class_init(ObjectClass *oc, void *data) + static void s390_cpu_model_class_init(ObjectClass *oc, const void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); + CPUClass *cc = CPU_CLASS(oc); @@ -217,9 +223,9 @@ index 111d46a59a..156bcf0d22 100644 + } } - static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) + static void s390_qemu_cpu_model_class_init(ObjectClass *oc, const void *data) diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h -index 71d4bc2dd4..d6c7c2cb50 100644 +index f701bc0b53..670a567c67 100644 --- a/target/s390x/cpu_models.h +++ b/target/s390x/cpu_models.h @@ -38,6 +38,8 @@ typedef struct S390CPUDef { @@ -232,7 +238,7 @@ index 71d4bc2dd4..d6c7c2cb50 100644 /* CPU model based on a CPU definition */ diff --git a/target/s390x/cpu_models_system.c b/target/s390x/cpu_models_system.c -index 4351182f72..4074124c44 100644 +index 5b84604867..d715bdc870 100644 --- a/target/s390x/cpu_models_system.c +++ b/target/s390x/cpu_models_system.c @@ -56,6 +56,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) diff --git a/0013-Add-downstream-x86_64-versioned-pc-q35-machine-types.patch b/0013-Add-downstream-x86_64-versioned-pc-q35-machine-types.patch index 9697d1c..3e1945f 100644 --- a/0013-Add-downstream-x86_64-versioned-pc-q35-machine-types.patch +++ b/0013-Add-downstream-x86_64-versioned-pc-q35-machine-types.patch @@ -1,4 +1,4 @@ -From 869dc39b548550c0b7b6a2bd8ab13746ec1b50c1 Mon Sep 17 00:00:00 2001 +From f7424ca0a529f1f3a76a4044b535d1ecd7b5b114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 3 Jul 2024 13:44:41 +0100 Subject: Add downstream x86_64 versioned 'pc' & 'q35' machine types @@ -19,6 +19,9 @@ Rebase notes (10.0.0 rc0): - Add downstream specific compat - Fixing rhel-9.4 compat issue +Rebase notes (10.1.0): +- Use rebase compat + Merged patches (9.1.0): - 043ad5ce97 Add upstream compatibility bits (partial) @@ -34,16 +37,16 @@ Merged patches (10.0.0 rc0): - d93fcb3940 virtio-net: disable USO for all RHEL9 (partial) --- hw/i386/fw_cfg.c | 2 +- - hw/i386/pc.c | 63 +++++++++++++++++++- - hw/i386/pc_piix.c | 49 +++++++++++++-- - hw/i386/pc_q35.c | 119 ++++++++++++++++++++++++++++++++++--- + hw/i386/pc.c | 70 ++++++++++++++++++++- + hw/i386/pc_piix.c | 55 +++++++++++++++-- + hw/i386/pc_q35.c | 123 ++++++++++++++++++++++++++++++++++--- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 18 ++++++ + include/hw/i386/pc.h | 21 +++++++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 ++ tests/qtest/meson.build | 2 +- tests/qtest/pvpanic-test.c | 5 +- - 10 files changed, 247 insertions(+), 18 deletions(-) + 10 files changed, 267 insertions(+), 18 deletions(-) diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c index 07df7281d2..8009f5f31f 100644 @@ -59,10 +62,10 @@ index 07df7281d2..8009f5f31f 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 01d0581f62..5ae388789b 100644 +index 2f58e73d33..439abe8f46 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -287,6 +287,65 @@ const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4); +@@ -273,6 +273,72 @@ const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6); */ #define PC_FW_DATA (0x20000 + 0x8000) @@ -88,6 +91,13 @@ index 01d0581f62..5ae388789b 100644 +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_10_2_compat[] = { ++ /* pc_rhel_10_2_compat from pc_compat_10_0 */ ++ { TYPE_X86_CPU, "x-consistent-cache", "false" }, ++ { TYPE_X86_CPU, "x-vendor-cpuid-only-v2", "false" }, ++}; ++const size_t pc_rhel_10_2_compat_len = G_N_ELEMENTS(pc_compat_10_0); ++ +GlobalProperty pc_rhel_10_1_compat[] = { + /* pc_rhel_10_1_compat from pc_compat_9_1 */ + { "ICH9-LPC", "x-smi-swsmi-timer", "off" }, @@ -128,7 +138,7 @@ index 01d0581f62..5ae388789b 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1780,6 +1839,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1754,6 +1820,7 @@ static void pc_machine_class_init(ObjectClass *oc, const void *data) pcmc->kvmclock_create_always = true; x86mc->apic_xrupt_override = true; assert(!mc->get_hotplug_handler); @@ -136,7 +146,7 @@ index 01d0581f62..5ae388789b 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->auto_enable_numa_with_memhp = true; -@@ -1787,7 +1847,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1761,7 +1828,8 @@ static void pc_machine_class_init(ObjectClass *oc, const void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -147,10 +157,10 @@ index 01d0581f62..5ae388789b 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 1b58988c9a..78e9534a2c 100644 +index acf010e20f..d546c4a8a9 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -52,6 +52,7 @@ +@@ -53,6 +53,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "system/xen.h" @@ -158,7 +168,7 @@ index 1b58988c9a..78e9534a2c 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -446,11 +447,11 @@ static void pc_i440fx_init(MachineState *machine) +@@ -469,11 +470,11 @@ static void pc_i440fx_init(MachineState *machine) pc_init1(machine, TYPE_I440FX_PCI_DEVICE); } @@ -174,7 +184,7 @@ index 1b58988c9a..78e9534a2c 100644 #if 0 /* Disabled for Red Hat Enterprise Linux */ static void pc_i440fx_machine_options(MachineClass *m) -@@ -845,3 +846,43 @@ static void xenfv_machine_3_1_options(MachineClass *m) +@@ -853,3 +854,49 @@ static void xenfv_machine_3_1_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_machine_3_1_options); #endif @@ -203,6 +213,8 @@ index 1b58988c9a..78e9534a2c 100644 + pc_set_south_bridge); + object_class_property_set_description(oc, "x-south-bridge", + "Use a different south bridge than PIIX3"); ++ compat_props_add(m->compat_props, ++ pc_piix_compat_defaults, pc_piix_compat_defaults_len); +} + +static void pc_i440fx_rhel_machine_10_0_0_options(MachineClass *m) @@ -212,18 +224,22 @@ index 1b58988c9a..78e9534a2c 100644 + m->desc = "RHEL 10.0.0 PC (i440FX + PIIX, 1996)"; + m->deprecation_reason = rhel_old_machine_deprecation; + ++ compat_props_add(m->compat_props, hw_compat_rhel_10_2, ++ hw_compat_rhel_10_2_len); + compat_props_add(m->compat_props, hw_compat_rhel_10_1, + hw_compat_rhel_10_1_len); ++ compat_props_add(m->compat_props, pc_rhel_10_2_compat, ++ pc_rhel_10_2_compat_len); + compat_props_add(m->compat_props, pc_rhel_10_1_compat, + pc_rhel_10_1_compat_len); +} +DEFINE_I440FX_MACHINE_AS_LATEST(10, 0, 0); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 97a40a3a9c..2f19204304 100644 +index 2203ffd67e..e5d10c0335 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -327,11 +327,11 @@ static void pc_q35_init(MachineState *machine) - } +@@ -340,11 +340,11 @@ static void pc_q35_init(MachineState *machine) + #endif } -#define DEFINE_Q35_MACHINE(major, minor) \ @@ -238,7 +254,7 @@ index 97a40a3a9c..2f19204304 100644 #define DEFINE_Q35_MACHINE_BUGFIX(major, minor, micro) \ DEFINE_PC_VER_MACHINE(pc_q35, "pc-q35", pc_q35_init, false, NULL, major, minor, micro); -@@ -342,21 +342,21 @@ static void pc_q35_machine_options(MachineClass *m) +@@ -355,21 +355,21 @@ static void pc_q35_machine_options(MachineClass *m) pcmc->pci_root_uid = 0; pcmc->default_cpu_version = 1; @@ -265,9 +281,9 @@ index 97a40a3a9c..2f19204304 100644 compat_props_add(m->compat_props, pc_q35_compat_defaults, pc_q35_compat_defaults_len); } -@@ -691,3 +691,104 @@ static void pc_q35_machine_2_4_options(MachineClass *m) +@@ -687,3 +687,108 @@ static void pc_q35_machine_2_6_options(MachineClass *m) - DEFINE_Q35_MACHINE(2, 4); + DEFINE_Q35_MACHINE(2, 6); #endif /* Disabled for Red Hat Enterprise Linux */ + +/* Red Hat Enterprise Linux machine types */ @@ -280,8 +296,12 @@ index 97a40a3a9c..2f19204304 100644 + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "10.0.0"; + ++ compat_props_add(m->compat_props, hw_compat_rhel_10_2, ++ hw_compat_rhel_10_2_len); + compat_props_add(m->compat_props, hw_compat_rhel_10_1, + hw_compat_rhel_10_1_len); ++ compat_props_add(m->compat_props, pc_rhel_10_2_compat, ++ pc_rhel_10_2_compat_len); + compat_props_add(m->compat_props, pc_rhel_10_1_compat, + pc_rhel_10_1_compat_len); +} @@ -371,10 +391,10 @@ index 97a40a3a9c..2f19204304 100644 + +DEFINE_Q35_MACHINE(9, 0, 0); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 182c11dc2c..a43847767f 100644 +index aca254ea18..22c3abd51e 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -309,6 +309,8 @@ struct MachineClass { +@@ -308,6 +308,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -384,16 +404,19 @@ index 182c11dc2c..a43847767f 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index e4d32f8aea..5306b6d7cb 100644 +index 3b4ea24c20..633df2fcf8 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -305,6 +305,24 @@ extern const size_t pc_compat_2_5_len; - extern GlobalProperty pc_compat_2_4[]; - extern const size_t pc_compat_2_4_len; +@@ -301,6 +301,27 @@ extern const size_t pc_compat_2_7_len; + extern GlobalProperty pc_compat_2_6[]; + extern const size_t pc_compat_2_6_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_10_2_compat[]; ++extern const size_t pc_rhel_10_2_compat_len; ++ +extern GlobalProperty pc_rhel_10_1_compat[]; +extern const size_t pc_rhel_10_1_compat_len; + @@ -410,13 +433,13 @@ index e4d32f8aea..5306b6d7cb 100644 +extern const size_t pc_rhel_9_0_compat_len; + #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \ - static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ - { \ + static void pc_machine_##suffix##_class_init(ObjectClass *oc, \ + const void *data) \ diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c -index 6269fa8045..8f455c24e9 100644 +index 89a7953659..74c0b036e3 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c -@@ -174,6 +174,7 @@ static PropValue kvm_default_props[] = { +@@ -175,6 +175,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -425,10 +448,10 @@ index 6269fa8045..8f455c24e9 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 6c749d4ee8..9cb2512c7c 100644 +index 369626f8c8..0eb39d22d6 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -4366,6 +4366,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4389,6 +4389,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -436,7 +459,7 @@ index 6c749d4ee8..9cb2512c7c 100644 kvm_msr_buf_reset(cpu); -@@ -4763,6 +4764,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4786,6 +4787,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -447,7 +470,7 @@ index 6c749d4ee8..9cb2512c7c 100644 case MSR_KVM_ASYNC_PF_INT: env->async_pf_int_msr = msrs[i].data; diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 3136d15e0f..7749ec4b2f 100644 +index 669d07c06b..b96aa06084 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -49,6 +49,7 @@ qtests_filter = \ diff --git a/kvm-Disable-virtio-net-pci-romfile-loading-on-riscv64.patch b/0014-Disable-virtio-net-pci-romfile-loading-on-riscv64.patch similarity index 81% rename from kvm-Disable-virtio-net-pci-romfile-loading-on-riscv64.patch rename to 0014-Disable-virtio-net-pci-romfile-loading-on-riscv64.patch index 8cd2641..21b840f 100644 --- a/kvm-Disable-virtio-net-pci-romfile-loading-on-riscv64.patch +++ b/0014-Disable-virtio-net-pci-romfile-loading-on-riscv64.patch @@ -1,7 +1,7 @@ -From 12720b4c0434d9549f0e41ed5951890fdd9aedd4 Mon Sep 17 00:00:00 2001 +From 7bc17dffbc537e8546249c7c2d19e426ad50e61f Mon Sep 17 00:00:00 2001 From: Andrea Bolognani Date: Tue, 10 Jun 2025 14:27:29 +0200 -Subject: [PATCH 5/5] Disable virtio-net-pci romfile loading on riscv64 +Subject: Disable virtio-net-pci romfile loading on riscv64 RH-Author: Andrea Bolognani RH-MergeRequest: 373: Various small fixes @@ -12,12 +12,16 @@ RH-Commit: [4/4] b490ef3c3ab6a47f90c67016f685e19a65d97100 (abologna/centos-strea Same motivation for disabling it as on aarch64. Signed-off-by: Andrea Bolognani + +Patch-name: kvm-Disable-virtio-net-pci-romfile-loading-on-riscv64.patch +Patch-id: 51 +Patch-present-in-specfile: True --- hw/riscv/virt.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c -index b8d20575af..5b9291a906 100644 +index ab5a9ec613..3b187d6c98 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -59,6 +59,18 @@ @@ -39,7 +43,7 @@ index b8d20575af..5b9291a906 100644 /* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */ static bool virt_use_kvm_aia_aplic_imsic(RISCVVirtAIAType aia_type) { -@@ -1977,6 +1989,9 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) +@@ -1978,6 +1990,9 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) NULL, NULL); object_class_property_set_description(oc, "iommu-sys", "Enable IOMMU platform device"); diff --git a/0014-Revert-meson-temporarily-disable-Wunused-function.patch b/0015-Revert-meson-temporarily-disable-Wunused-function.patch similarity index 84% rename from 0014-Revert-meson-temporarily-disable-Wunused-function.patch rename to 0015-Revert-meson-temporarily-disable-Wunused-function.patch index 87b2a80..e3bec6c 100644 --- a/0014-Revert-meson-temporarily-disable-Wunused-function.patch +++ b/0015-Revert-meson-temporarily-disable-Wunused-function.patch @@ -1,4 +1,4 @@ -From 54d447640d3c550143e07610a516e4cdf474260e Mon Sep 17 00:00:00 2001 +From d132184ec50656d9ed675801695a66f620fe0821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 3 Jul 2024 13:47:04 +0100 Subject: Revert "meson: temporarily disable -Wunused-function" @@ -16,10 +16,10 @@ Signed-off-by: Daniel P. Berrangé 1 file changed, 1 deletion(-) diff --git a/meson.build b/meson.build -index 38ad60fc10..0607c1313b 100644 +index 23494666d9..ef2e5be6e2 100644 --- a/meson.build +++ b/meson.build -@@ -747,7 +747,6 @@ warn_flags = [ +@@ -757,7 +757,6 @@ warn_flags = [ '-Wno-string-plus-int', '-Wno-tautological-type-limit-compare', '-Wno-typedef-redefinition', diff --git a/0015-Enable-make-check.patch b/0016-Enable-make-check.patch similarity index 83% rename from 0015-Enable-make-check.patch rename to 0016-Enable-make-check.patch index de71366..809be58 100644 --- a/0015-Enable-make-check.patch +++ b/0016-Enable-make-check.patch @@ -1,4 +1,4 @@ -From bd972a5dfe6cec33302ae52d7503a23b7b8506af Mon Sep 17 00:00:00 2001 +From 0f4d74ce6ff137291962909aaebc1dbf3dd27508 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -19,15 +19,16 @@ Rebase notes (10.0.0 rc0) Rebase notes (10.0.0): - Add riscv changes + +Rebase notes (10.1.0 rc0): +- Comment out unused code --- .distro/qemu-kvm.spec.template | 4 +-- - tests/avocado/reverse_debugging.py | 2 +- tests/functional/meson.build | 2 +- - tests/functional/test_aarch64_replay.py | 2 +- tests/functional/test_aarch64_tcg_plugins.py | 4 +-- tests/qemu-iotests/meson.build | 34 ++++++++++---------- tests/qemu-iotests/testenv.py | 3 ++ - tests/qtest/bios-tables-test.c | 6 ++++ + tests/qtest/bios-tables-test.c | 10 ++++++ tests/qtest/fuzz-e1000e-test.c | 2 +- tests/qtest/fuzz-virtio-scsi-test.c | 2 +- tests/qtest/intel-hda-test.c | 2 +- @@ -38,26 +39,13 @@ Rebase notes (10.0.0): tests/qtest/pvpanic-test.c | 2 +- tests/qtest/riscv-csr-test.c | 4 +++ tests/qtest/virtio-net-failover.c | 1 + - 18 files changed, 45 insertions(+), 32 deletions(-) + 16 files changed, 47 insertions(+), 30 deletions(-) -diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index f24287cd0a..3880b81df6 100644 ---- a/tests/avocado/reverse_debugging.py -+++ b/tests/avocado/reverse_debugging.py -@@ -228,7 +228,7 @@ def test_aarch64_virt(self): - """ - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' - '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/functional/meson.build b/tests/functional/meson.build -index 0f8be30fe2..4463f6bb0d 100644 +index 311c6f1806..c7f9051c90 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build -@@ -291,7 +291,7 @@ tests_sparc64_system_thorough = [ +@@ -319,7 +319,7 @@ tests_sparc64_system_thorough = [ tests_x86_64_system_quick = [ 'cpu_queries', @@ -66,24 +54,11 @@ index 0f8be30fe2..4463f6bb0d 100644 'migration', 'pc_cpu_hotplug_props', 'virtio_version', -diff --git a/tests/functional/test_aarch64_replay.py b/tests/functional/test_aarch64_replay.py -index bd6609d914..ddf93814fd 100755 ---- a/tests/functional/test_aarch64_replay.py -+++ b/tests/functional/test_aarch64_replay.py -@@ -18,7 +18,7 @@ class Aarch64Replay(ReplayKernelBase): - - def test_aarch64_virt(self): - self.set_machine('virt') -- self.cpu = 'cortex-a53' -+ self.cpu = 'cortex-a57' - kernel_path = self.ASSET_KERNEL.fetch() - kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + - 'console=ttyAMA0') diff --git a/tests/functional/test_aarch64_tcg_plugins.py b/tests/functional/test_aarch64_tcg_plugins.py -index 4ea71f5f88..0b1043f3dc 100755 +index cb7e9298fb..9efa826b01 100755 --- a/tests/functional/test_aarch64_tcg_plugins.py +++ b/tests/functional/test_aarch64_tcg_plugins.py -@@ -65,7 +65,7 @@ class PluginKernelNormal(PluginKernelBase): +@@ -64,7 +64,7 @@ class PluginKernelNormal(PluginKernelBase): def test_aarch64_virt_insn(self): self.set_machine('virt') @@ -92,7 +67,7 @@ index 4ea71f5f88..0b1043f3dc 100755 kernel_path = self.ASSET_KERNEL.fetch() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyAMA0') -@@ -91,7 +91,7 @@ def test_aarch64_virt_insn(self): +@@ -90,7 +90,7 @@ def test_aarch64_virt_insn(self): def test_aarch64_virt_insn_icount(self): self.set_machine('virt') @@ -159,10 +134,10 @@ index 6326e46b7b..bc849ae9cf 100644 self.qemu_default_machine = get_default_machine(self.qemu_prog) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c -index 0a333ec435..e24098fc70 100644 +index e7e6926c81..386196edc8 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c -@@ -1707,6 +1707,7 @@ static void test_acpi_microvm_ioapic2_tcg(void) +@@ -1755,6 +1755,7 @@ static void test_acpi_microvm_ioapic2_tcg(void) free_test_data(&data); } @@ -170,7 +145,7 @@ index 0a333ec435..e24098fc70 100644 static void test_acpi_riscv64_virt_tcg_numamem(void) { test_data data = { -@@ -1732,6 +1733,7 @@ static void test_acpi_riscv64_virt_tcg_numamem(void) +@@ -1780,6 +1781,7 @@ static void test_acpi_riscv64_virt_tcg_numamem(void) &data); free_test_data(&data); } @@ -178,7 +153,23 @@ index 0a333ec435..e24098fc70 100644 static void test_acpi_aarch64_virt_tcg_numamem(void) { -@@ -2085,6 +2087,7 @@ static void test_acpi_microvm_acpi_erst(void) +@@ -1856,6 +1858,7 @@ static void test_acpi_aarch64_virt_tcg_acpi_spcr(void) + free_test_data(&data); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_acpi_riscv64_virt_tcg_acpi_spcr(void) + { + test_data data = { +@@ -1874,6 +1877,7 @@ static void test_acpi_riscv64_virt_tcg_acpi_spcr(void) + "-machine spcr=off", &data); + free_test_data(&data); + } ++#endif + + static void test_acpi_tcg_acpi_hmat(const char *machine, const char *arch) + { +@@ -2171,6 +2175,7 @@ static void test_acpi_microvm_acpi_erst(void) } #endif /* CONFIG_POSIX */ @@ -186,7 +177,7 @@ index 0a333ec435..e24098fc70 100644 static void test_acpi_riscv64_virt_tcg(void) { test_data data = { -@@ -2106,6 +2109,7 @@ static void test_acpi_riscv64_virt_tcg(void) +@@ -2192,6 +2197,7 @@ static void test_acpi_riscv64_virt_tcg(void) test_acpi_one("-cpu rva22s64 ", &data); free_test_data(&data); } @@ -194,7 +185,23 @@ index 0a333ec435..e24098fc70 100644 static void test_acpi_aarch64_virt_tcg(void) { -@@ -2587,12 +2591,14 @@ int main(int argc, char *argv[]) +@@ -2526,6 +2532,7 @@ static void test_acpi_aarch64_virt_oem_fields(void) + g_free(args); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define LOONGARCH64_INIT_TEST_DATA(data) \ + test_data data = { \ + .machine = "virt", \ +@@ -2594,6 +2601,7 @@ static void test_acpi_loongarch64_virt_oem_fields(void) + free_test_data(&data); + g_free(args); + } ++#endif + + int main(int argc, char *argv[]) + { +@@ -2769,6 +2777,7 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/virt/viot", test_acpi_aarch64_virt_viot); } } @@ -202,8 +209,9 @@ index 0a333ec435..e24098fc70 100644 } else if (strcmp(arch, "riscv64") == 0) { if (has_tcg && qtest_has_device("virtio-blk-pci")) { qtest_add_func("acpi/virt", test_acpi_riscv64_virt_tcg); - qtest_add_func("acpi/virt/numamem", - test_acpi_riscv64_virt_tcg_numamem); +@@ -2788,6 +2797,7 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/virt/oem-fields", + test_acpi_loongarch64_virt_oem_fields); } +#endif /* disabled for RHEL */ } @@ -288,7 +296,7 @@ index b6a87d27ed..423ba12159 100644 { "rx", "rx62n" }, { "loongarch64", "la464"}, diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 7749ec4b2f..6e2d08acc5 100644 +index b96aa06084..ef44ffaf78 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -91,7 +91,6 @@ qtests_i386 = \ diff --git a/0016-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0017-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 57% rename from 0016-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0017-vfio-cap-number-of-devices-that-can-be-assigned.patch index adac2b6..03adb71 100644 --- a/0016-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0017-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,8 +1,17 @@ -From 194c56d4231e0ea6e86c04d905a3941e376c9a55 Mon Sep 17 00:00:00 2001 +From 0f17fef61fc05f7617e47fcbc6a6c13efa5435d8 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned +RH-Author: Bandan Das +Message-id: <1386101113-31560-3-git-send-email-bsd@redhat.com> +Patchwork-id: 55984 +O-Subject: [PATCH RHEL7 qemu-kvm v2 2/2] vfio: cap number of devices that can be assigned +Bugzilla: 678368 +RH-Acked-by: Alex Williamson +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Michael S. Tsirkin + Go through all groups to get count of total number of devices active to enforce limit @@ -17,16 +26,50 @@ Count of slots increased to 509 later so we could increase limit to 64 as some usecases require more than 32 devices. Signed-off-by: Bandan Das ---- - hw/vfio/pci.c | 31 ++++++++++++++++++++++++++++++- - hw/vfio/pci.h | 1 + - 2 files changed, 31 insertions(+), 1 deletion(-) +Rebase changes (8.2.0): +- Update to upstream changes + +Rebased notes (10.1.0) +- Update to upstream changes +- Introduced vfio_device_count() +--- + hw/vfio/container.c | 14 ++++++++++++++ + hw/vfio/pci.c | 21 +++++++++++++++++++++ + hw/vfio/pci.h | 1 + + include/hw/vfio/vfio-device.h | 1 + + 4 files changed, 37 insertions(+) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 3e13feaa74..b912b9396b 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -44,6 +44,20 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + static VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); + ++int vfio_device_count(void) ++{ ++ int i = 0; ++ VFIOGroup *group; ++ VFIODevice *vbasedev_iter; ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ i++; ++ } ++ } ++ return i; ++} ++ + static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state) + { + switch (container->iommu_type) { diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 7f1532fbed..a71fe1ca7a 100644 +index 07257d0fa0..48da233cb2 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -50,6 +50,9 @@ +@@ -52,6 +52,9 @@ /* Protected by BQL */ static KVMRouteChange vfio_route_change; @@ -36,14 +79,7 @@ index 7f1532fbed..a71fe1ca7a 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); -@@ -2966,10 +2969,33 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - ERRP_GUARD(); - VFIOPCIDevice *vdev = VFIO_PCI(pdev); - VFIODevice *vbasedev = &vdev->vbasedev; -- int i, ret; -+ int ret, i = 0; -+ VFIODevice *vbasedev_iter; -+ VFIOGroup *group; +@@ -3355,6 +3358,21 @@ static void vfio_pci_realize(PCIDevice *pdev, Error **errp) char uuid[UUID_STR_LEN]; g_autofree char *name = NULL; @@ -56,13 +92,7 @@ index 7f1532fbed..a71fe1ca7a 100644 + device_limit = vdev->assigned_device_limit; + } + -+ QLIST_FOREACH(group, &vfio_group_list, next) { -+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { -+ i++; -+ } -+ } -+ -+ if (i >= vdev->assigned_device_limit) { ++ if (vfio_device_count() >= vdev->assigned_device_limit) { + error_setg(errp, "Maximum supported vfio devices (%d) " + "already attached", vdev->assigned_device_limit); + return; @@ -71,7 +101,7 @@ index 7f1532fbed..a71fe1ca7a 100644 if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3380,6 +3406,9 @@ static const Property vfio_pci_dev_properties[] = { +@@ -3687,6 +3705,9 @@ static const Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -82,10 +112,10 @@ index 7f1532fbed..a71fe1ca7a 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index d94ecaba68..3854bbcb26 100644 +index 810a842f4a..81555d8774 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h -@@ -142,6 +142,7 @@ struct VFIOPCIDevice { +@@ -145,6 +145,7 @@ struct VFIOPCIDevice { EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); @@ -93,6 +123,18 @@ index d94ecaba68..3854bbcb26 100644 uint32_t vendor_id; uint32_t device_id; uint32_t sub_vendor_id; +diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h +index 6e4d5ccdac..9290774299 100644 +--- a/include/hw/vfio/vfio-device.h ++++ b/include/hw/vfio/vfio-device.h +@@ -140,6 +140,7 @@ struct VFIODeviceOps { + #define strwriteerror(ret) \ + (ret < 0 ? strerror(-ret) : "short write") + ++int vfio_device_count(void); + void vfio_device_irq_disable(VFIODevice *vbasedev, int index); + void vfio_device_irq_unmask(VFIODevice *vbasedev, int index); + void vfio_device_irq_mask(VFIODevice *vbasedev, int index); -- 2.39.3 diff --git a/0017-Add-support-statement-to-help-output.patch b/0018-Add-support-statement-to-help-output.patch similarity index 85% rename from 0017-Add-support-statement-to-help-output.patch rename to 0018-Add-support-statement-to-help-output.patch index 862cb9e..884c4ce 100644 --- a/0017-Add-support-statement-to-help-output.patch +++ b/0018-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From f3ef3004dc20bd1d6a1de3797fc46259f6503541 Mon Sep 17 00:00:00 2001 +From 74a65ebea34ca36e3f510bc80274ce039df4e383 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -12,10 +12,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/system/vl.c b/system/vl.c -index ec93988a03..c2bd30dc93 100644 +index 3b7057e6c6..d3e6158753 100644 --- a/system/vl.c +++ b/system/vl.c -@@ -870,9 +870,17 @@ static void version(void) +@@ -872,9 +872,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -33,7 +33,7 @@ index ec93988a03..c2bd30dc93 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", g_get_prgname()); -@@ -898,6 +906,7 @@ static void help(int exitcode) +@@ -900,6 +908,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0019-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch similarity index 93% rename from 0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename to 0019-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 7c483d1..a7371c5 100644 --- a/0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0019-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 5c4d190b3a79b22c86b59929ffe83433074c64a8 Mon Sep 17 00:00:00 2001 +From 50b5abd584d9157677d694260a797be793fcf985 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -27,10 +27,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index dc694a99a3..5eb668b1e2 100644 +index ab23f14d21..3837456a61 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3672,11 +3672,11 @@ SRST +@@ -3858,11 +3858,11 @@ SRST :: diff --git a/0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/0020-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch similarity index 94% rename from 0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch rename to 0020-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index 128998f..d264a5f 100644 --- a/0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/0020-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From 1453ce2b1fa98c0d9f952827bc40b3a90d0f70be Mon Sep 17 00:00:00 2001 +From f1ec21d5adafcd06563a6c5404c5d631f470ab0c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -25,7 +25,7 @@ Signed-off-by: Kevin Wolf 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 7774e7f090..b6ade4755d 100644 +index 4aa9f9e068..6df65aab93 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1358,6 +1358,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, @@ -42,7 +42,7 @@ index 7774e7f090..b6ade4755d 100644 s->qcow_version = header.version; diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index fc3c64bcb8..4b238954d5 100644 +index 511a55b1e8..35c0fc0d20 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -83,6 +83,7 @@ _filter_qemu() diff --git a/kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch b/0021-file-posix-Define-DM_MPATH_PROBE_PATHS.patch similarity index 78% rename from kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch rename to 0021-file-posix-Define-DM_MPATH_PROBE_PATHS.patch index 3cfb6a7..e329fe0 100644 --- a/kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch +++ b/0021-file-posix-Define-DM_MPATH_PROBE_PATHS.patch @@ -1,7 +1,7 @@ -From 42fc4705817fc4d16f2ba785fd29777ed2b7355a Mon Sep 17 00:00:00 2001 +From 90041be5316257fc98eb62af1e8a927e53d2d612 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 29 Apr 2025 17:05:41 +0200 -Subject: [PATCH 1/9] file-posix: Define DM_MPATH_PROBE_PATHS +Subject: file-posix: Define DM_MPATH_PROBE_PATHS RH-Author: Kevin Wolf RH-MergeRequest: 370: file-posix: Fix multipath failover with SCSI passthrough @@ -17,17 +17,21 @@ This is a downstream-only patch that can be removed after the next minor release. Signed-off-by: Kevin Wolf + +Patch-name: kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch +Patch-id: 41 +Patch-present-in-specfile: True --- block/file-posix.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c -index dea7b09b6c..52cc25db84 100644 +index 8c738674ce..cb2e94d7db 100644 --- a/block/file-posix.c +++ b/block/file-posix.c -@@ -134,6 +134,11 @@ - #define RAW_LOCK_PERM_BASE 100 - #define RAW_LOCK_SHARED_BASE 200 +@@ -156,6 +156,11 @@ + */ + #define SG_IO_MAX_RETRIES 8 +/* TODO Remove this when the kernel side is merged */ +#if !defined(DM_MPATH_PROBE_PATHS) && defined(DM_GET_TARGET_VERSION) diff --git a/kvm-Declare-rtl8139-as-deprecated.patch b/kvm-Declare-rtl8139-as-deprecated.patch deleted file mode 100644 index 71df389..0000000 --- a/kvm-Declare-rtl8139-as-deprecated.patch +++ /dev/null @@ -1,56 +0,0 @@ -From e4a324ce1f4ffca214b6d5b5681b9eca630707bf Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 29 Jul 2025 17:00:50 +0200 -Subject: [PATCH] Declare rtl8139 as deprecated -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 396: Declare rtl8139 as deprecated -RH-Jira: RHEL-45624 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Thomas Huth -RH-Acked-by: Cindy Lu -RH-Commit: [1/1] d44c1b68b59f1442b01c84b43e07aed5f4d254bc (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-45624 -Upstream: RHEL ONLY - -The e1000+e1000e pair should be more than sufficient for the -"ancient non-virtio NIC" use case. - -There does not appear to be any compelling reason to continue supporting -the rtl8139 NIC, so we should deprecate it in RHEL-10, with a view to -deleting it in RHEL-11. - -Signed-off-by: Laurent Vivier ---- - hw/net/rtl8139.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 31a6956252..7609006394 100644 ---- a/hw/net/rtl8139.c -+++ b/hw/net/rtl8139.c -@@ -57,6 +57,7 @@ - #include "system/dma.h" - #include "qemu/module.h" - #include "qemu/timer.h" -+#include "qemu/error-report.h" - #include "net/net.h" - #include "net/eth.h" - #include "system/system.h" -@@ -3364,6 +3365,9 @@ static void pci_rtl8139_realize(PCIDevice *dev, Error **errp) - DeviceState *d = DEVICE(dev); - uint8_t *pci_conf; - -+ warn_report("'rtl8139' is deprecated, " -+ "please use a different Network Interface Card"); -+ - pci_conf = dev->config; - pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */ - /* TODO: start of capability list, but no capability --- -2.39.3 - diff --git a/kvm-Enable-amd-iommu-device.patch b/kvm-Enable-amd-iommu-device.patch deleted file mode 100644 index 67f6bbc..0000000 --- a/kvm-Enable-amd-iommu-device.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 7b15a63367901d3d3fad7cd17c3960662f2f88f0 Mon Sep 17 00:00:00 2001 -From: John Allen -Date: Wed, 11 Jun 2025 15:41:14 -0500 -Subject: [PATCH 42/43] Enable amd-iommu device - -RH-Author: John Allen -RH-MergeRequest: 383: Add ability to manually specify the AMDVI-PCI device -RH-Jira: RHEL-85649 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 401f99c41c07746b736300bf40175df11a3330d0 (johnalle/qemu-kvm-fork) - -Now that the amdvi-pci device that amd-iommu creates can be specified -manually, amd-iommu device can be enabled. - -JIRA: https://issues.redhat.com/browse/RHEL-85649 - -Upstream: RHEL ONLY - -Signed-off-by: John Allen ---- - configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -index 097dad9003..26e51b1edf 100644 ---- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -97,6 +97,7 @@ CONFIG_VIRTIO_MEM=y - CONFIG_VIRTIO_PCI=y - CONFIG_VIRTIO_VGA=y - CONFIG_VIRTIO_IOMMU=y -+CONFIG_AMD_IOMMU=y - CONFIG_VMMOUSE=y - CONFIG_VMPORT=y - CONFIG_VTD=y --- -2.39.3 - diff --git a/kvm-Enable-uefi-variable-service-for-edk2.patch b/kvm-Enable-uefi-variable-service-for-edk2.patch deleted file mode 100644 index 74884cb..0000000 --- a/kvm-Enable-uefi-variable-service-for-edk2.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 058b99a669b3cdd55a8c0c685d2fbfae0935d79e Mon Sep 17 00:00:00 2001 -From: Shaoqin Huang -Date: Wed, 6 Aug 2025 22:31:50 -0400 -Subject: [PATCH] Enable uefi variable service for edk2 - -RH-Author: Shaoqin Huang -RH-MergeRequest: 398: Enable uefi variable service for edk2 -RH-Jira: RHEL-102325 -RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Auger -RH-Commit: [1/1] b498b1f4fbdb31b7bb1122caaacd8f391124b1a0 (shahuang/qemu-kvm) - -JIRA: https://issues.redhat.com/browse/RHEL-102325 - -To support secure boot, we need the uefi variable service for edk2 to be -available. - -Signed-off-by: Shaoqin Huang ---- - configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + - configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -index 197fabeb00..855278f70e 100644 ---- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -46,3 +46,4 @@ CONFIG_IOMMUFD=y - CONFIG_VHOST_USER_SND=y - CONFIG_VHOST_USER_SCMI=y - CONFIG_VHOST_USER_GPU=y -+CONFIG_UEFI_VARS=y -diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -index 26e51b1edf..828cb8aa6f 100644 ---- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -115,3 +115,4 @@ CONFIG_VHOST_USER_FS=y - CONFIG_IOMMUFD=y - CONFIG_VHOST_USER_SND=y - CONFIG_VHOST_USER_GPU=y -+CONFIG_UEFI_VARS=y --- -2.39.3 - diff --git a/kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch b/kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch deleted file mode 100644 index 659e692..0000000 --- a/kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 86f52a84c158f7b31455596c9700124977696314 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Tue, 29 Apr 2025 09:09:51 -0400 -Subject: [PATCH 4/4] Enable vhost-user-gpu-pci for RHIVOS - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 356: Enable vhost-user-gpu-pci for RHIVOS -RH-Jira: RHEL-86056 -RH-Commit: [1/1] d323301596f82a3b0a98b0ac99f839d39199ab32 (mrezanin/centos-src-qemu-kvm) - -RHIVOS needs vhost-user-gpu-pci device to be available. - -Signed-off-by: Miroslav Rezanina ---- - configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + - configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -index dce5fca821..197fabeb00 100644 ---- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -45,3 +45,4 @@ CONFIG_VHOST_USER_FS=y - CONFIG_IOMMUFD=y - CONFIG_VHOST_USER_SND=y - CONFIG_VHOST_USER_SCMI=y -+CONFIG_VHOST_USER_GPU=y -diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -index 8da1a8f82f..097dad9003 100644 ---- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -113,3 +113,4 @@ CONFIG_VHOST_USER_VSOCK=y - CONFIG_VHOST_USER_FS=y - CONFIG_IOMMUFD=y - CONFIG_VHOST_USER_SND=y -+CONFIG_VHOST_USER_GPU=y --- -2.39.3 - diff --git a/kvm-block-Add-new-bdrv_co_is_all_zeroes-function.patch b/kvm-block-Add-new-bdrv_co_is_all_zeroes-function.patch deleted file mode 100644 index 456b034..0000000 --- a/kvm-block-Add-new-bdrv_co_is_all_zeroes-function.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 659dd2d1f7b1facbf9c548468c1b50237f7aa8e4 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:21 -0500 -Subject: [PATCH 04/14] block: Add new bdrv_co_is_all_zeroes() function - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/14] 404590dbec0b1113872a7eb1bfe5af0450fe6a28 (ebblake/centos-qemu-kvm) - -There are some optimizations that require knowing if an image starts -out as reading all zeroes, such as making blockdev-mirror faster by -skipping the copying of source zeroes to the destination. The -existing bdrv_co_is_zero_fast() is a good building block for answering -this question, but it tends to give an answer of 0 for a file we just -created via QMP 'blockdev-create' or similar (such as 'qemu-img create --f raw'). Why? Because file-posix.c insists on allocating a tiny -header to any file rather than leaving it 100% sparse, due to some -filesystems that are unable to answer alignment probes on a hole. But -teaching file-posix.c to read the tiny header doesn't scale - the -problem of a small header is also visible when libvirt sets up an NBD -client to a just-created file on a migration destination host. - -So, we need a wrapper function that handles a bit more complexity in a -common manner for all block devices - when the BDS is mostly a hole, -but has a small non-hole header, it is still worth the time to read -that header and check if it reads as all zeroes before giving up and -returning a pessimistic answer. - -Signed-off-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-ID: <20250509204341.3553601-19-eblake@redhat.com> -(cherry picked from commit 52726096707c5c8b90597c445de897fa64d56e73) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/io.c | 62 ++++++++++++++++++++++++++++++++++++++++ - include/block/block-io.h | 2 ++ - 2 files changed, 64 insertions(+) - -diff --git a/block/io.c b/block/io.c -index 64f4b1d22a..b6fc07e1dc 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -38,10 +38,14 @@ - #include "qemu/error-report.h" - #include "qemu/main-loop.h" - #include "system/replay.h" -+#include "qemu/units.h" - - /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ - #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) - -+/* Maximum read size for checking if data reads as zero, in bytes */ -+#define MAX_ZERO_CHECK_BUFFER (128 * KiB) -+ - static void coroutine_fn GRAPH_RDLOCK - bdrv_parent_cb_resize(BlockDriverState *bs); - -@@ -2778,6 +2782,64 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, - return 1; - } - -+/* -+ * Check @bs (and its backing chain) to see if the entire image is known -+ * to read as zeroes. -+ * Return 1 if that is the case, 0 otherwise and -errno on error. -+ * This test is meant to be fast rather than accurate so returning 0 -+ * does not guarantee non-zero data; however, a return of 1 is reliable, -+ * and this function can report 1 in more cases than bdrv_co_is_zero_fast. -+ */ -+int coroutine_fn bdrv_co_is_all_zeroes(BlockDriverState *bs) -+{ -+ int ret; -+ int64_t pnum, bytes; -+ char *buf; -+ QEMUIOVector local_qiov; -+ IO_CODE(); -+ -+ bytes = bdrv_co_getlength(bs); -+ if (bytes < 0) { -+ return bytes; -+ } -+ -+ /* First probe - see if the entire image reads as zero */ -+ ret = bdrv_co_common_block_status_above(bs, NULL, false, BDRV_WANT_ZERO, -+ 0, bytes, &pnum, NULL, NULL, -+ NULL); -+ if (ret < 0) { -+ return ret; -+ } -+ if (ret & BDRV_BLOCK_ZERO) { -+ return bdrv_co_is_zero_fast(bs, pnum, bytes - pnum); -+ } -+ -+ /* -+ * Because of the way 'blockdev-create' works, raw files tend to -+ * be created with a non-sparse region at the front to make -+ * alignment probing easier. If the block starts with only a -+ * small allocated region, it is still worth the effort to see if -+ * the rest of the image is still sparse, coupled with manually -+ * reading the first region to see if it reads zero after all. -+ */ -+ if (pnum > MAX_ZERO_CHECK_BUFFER) { -+ return 0; -+ } -+ ret = bdrv_co_is_zero_fast(bs, pnum, bytes - pnum); -+ if (ret <= 0) { -+ return ret; -+ } -+ /* Only the head of the image is unknown, and it's small. Read it. */ -+ buf = qemu_blockalign(bs, pnum); -+ qemu_iovec_init_buf(&local_qiov, buf, pnum); -+ ret = bdrv_driver_preadv(bs, 0, pnum, &local_qiov, 0, 0); -+ if (ret >= 0) { -+ ret = buffer_is_zero(buf, pnum); -+ } -+ qemu_vfree(buf); -+ return ret; -+} -+ - int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, - int64_t bytes, int64_t *pnum) - { -diff --git a/include/block/block-io.h b/include/block/block-io.h -index b49e0537dd..b99cc98d26 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -161,6 +161,8 @@ bdrv_is_allocated_above(BlockDriverState *bs, BlockDriverState *base, - - int coroutine_fn GRAPH_RDLOCK - bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes); -+int coroutine_fn GRAPH_RDLOCK -+bdrv_co_is_all_zeroes(BlockDriverState *bs); - - int GRAPH_RDLOCK - bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, --- -2.39.3 - diff --git a/kvm-block-Expand-block-status-mode-from-bool-to-flags.patch b/kvm-block-Expand-block-status-mode-from-bool-to-flags.patch deleted file mode 100644 index b159e0f..0000000 --- a/kvm-block-Expand-block-status-mode-from-bool-to-flags.patch +++ /dev/null @@ -1,689 +0,0 @@ -From cb945ccd11d37c959f590ae5661ffe5b73f372a7 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:18 -0500 -Subject: [PATCH 01/14] block: Expand block status mode from bool to flags - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/14] 12507ca1dbe44640e4a8baded4d5afd5fb4ed615 (ebblake/centos-qemu-kvm) - -This patch is purely mechanical, changing bool want_zero into an -unsigned int for bitwise-or of flags. As of this patch, all -implementations are unchanged (the old want_zero==true is now -mode==BDRV_WANT_PRECISE which is a superset of BDRV_WANT_ZERO); but -the callers in io.c that used to pass want_zero==false are now -prepared for future driver changes that can now distinguish bewteen -BDRV_WANT_ZERO vs. BDRV_WANT_ALLOCATED. The next patch will actually -change the file-posix driver along those lines, now that we have -more-specific hints. - -As for the background why this patch is useful: right now, the -file-posix driver recognizes that if allocation is being queried, the -entire image can be reported as allocated (there is no backing file to -refer to) - but this throws away information on whether the entire -image reads as zero (trivially true if lseek(SEEK_HOLE) at offset 0 -returns -ENXIO, a bit more complicated to prove if the raw file was -created with 'qemu-img create' since we intentionally allocate a small -chunk of all-zero data to help with alignment probing). Later patches -will add a generic algorithm for seeing if an entire file reads as -zeroes. - -Signed-off-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-ID: <20250509204341.3553601-16-eblake@redhat.com> -(cherry picked from commit c33159dec79069514f78faecfe268439226b0f5b) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/blkdebug.c | 6 ++-- - block/copy-before-write.c | 4 +-- - block/coroutines.h | 4 +-- - block/file-posix.c | 4 +-- - block/gluster.c | 4 +-- - block/io.c | 51 ++++++++++++++++---------------- - block/iscsi.c | 6 ++-- - block/nbd.c | 4 +-- - block/null.c | 6 ++-- - block/parallels.c | 6 ++-- - block/qcow.c | 2 +- - block/qcow2.c | 6 ++-- - block/qed.c | 6 ++-- - block/quorum.c | 4 +-- - block/raw-format.c | 4 +-- - block/rbd.c | 6 ++-- - block/snapshot-access.c | 4 +-- - block/vdi.c | 4 +-- - block/vmdk.c | 2 +- - block/vpc.c | 2 +- - block/vvfat.c | 6 ++-- - include/block/block-common.h | 11 +++++++ - include/block/block_int-common.h | 27 +++++++++-------- - include/block/block_int-io.h | 4 +-- - tests/unit/test-block-iothread.c | 2 +- - 25 files changed, 99 insertions(+), 86 deletions(-) - -diff --git a/block/blkdebug.c b/block/blkdebug.c -index 1c1967f8e0..c54aee0c84 100644 ---- a/block/blkdebug.c -+++ b/block/blkdebug.c -@@ -751,9 +751,9 @@ blkdebug_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) - } - - static int coroutine_fn GRAPH_RDLOCK --blkdebug_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, -- int64_t bytes, int64_t *pnum, int64_t *map, -- BlockDriverState **file) -+blkdebug_co_block_status(BlockDriverState *bs, unsigned int mode, -+ int64_t offset, int64_t bytes, int64_t *pnum, -+ int64_t *map, BlockDriverState **file) - { - int err; - -diff --git a/block/copy-before-write.c b/block/copy-before-write.c -index fd470f5f92..2badb3a885 100644 ---- a/block/copy-before-write.c -+++ b/block/copy-before-write.c -@@ -291,8 +291,8 @@ cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes, - } - - static int coroutine_fn GRAPH_RDLOCK --cbw_co_snapshot_block_status(BlockDriverState *bs, -- bool want_zero, int64_t offset, int64_t bytes, -+cbw_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode, -+ int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, - BlockDriverState **file) - { -diff --git a/block/coroutines.h b/block/coroutines.h -index 79e5efbf75..892646bb7a 100644 ---- a/block/coroutines.h -+++ b/block/coroutines.h -@@ -47,7 +47,7 @@ int coroutine_fn GRAPH_RDLOCK - bdrv_co_common_block_status_above(BlockDriverState *bs, - BlockDriverState *base, - bool include_base, -- bool want_zero, -+ unsigned int mode, - int64_t offset, - int64_t bytes, - int64_t *pnum, -@@ -78,7 +78,7 @@ int co_wrapper_mixed_bdrv_rdlock - bdrv_common_block_status_above(BlockDriverState *bs, - BlockDriverState *base, - bool include_base, -- bool want_zero, -+ unsigned int mode, - int64_t offset, - int64_t bytes, - int64_t *pnum, -diff --git a/block/file-posix.c b/block/file-posix.c -index 0d85123d0f..0c6569742f 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3266,7 +3266,7 @@ static int find_allocation(BlockDriverState *bs, off_t start, - * well exceed it. - */ - static int coroutine_fn raw_co_block_status(BlockDriverState *bs, -- bool want_zero, -+ unsigned int mode, - int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, -@@ -3282,7 +3282,7 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, - return ret; - } - -- if (!want_zero) { -+ if (mode != BDRV_WANT_PRECISE) { - *pnum = bytes; - *map = offset; - *file = bs; -diff --git a/block/gluster.c b/block/gluster.c -index c6d25ae733..8197b0ecef 100644 ---- a/block/gluster.c -+++ b/block/gluster.c -@@ -1465,7 +1465,7 @@ exit: - * (Based on raw_co_block_status() from file-posix.c.) - */ - static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, -- bool want_zero, -+ unsigned int mode, - int64_t offset, - int64_t bytes, - int64_t *pnum, -@@ -1482,7 +1482,7 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, - return ret; - } - -- if (!want_zero) { -+ if (mode != BDRV_WANT_PRECISE) { - *pnum = bytes; - *map = offset; - *file = bs; -diff --git a/block/io.c b/block/io.c -index ccec11386b..e328402adc 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -2364,10 +2364,8 @@ int bdrv_flush_all(void) - * Drivers not implementing the functionality are assumed to not support - * backing files, hence all their sectors are reported as allocated. - * -- * If 'want_zero' is true, the caller is querying for mapping -- * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and -- * _ZERO where possible; otherwise, the result favors larger 'pnum', -- * with a focus on accurate BDRV_BLOCK_ALLOCATED. -+ * 'mode' serves as a hint as to which results are favored; see the -+ * BDRV_WANT_* macros for details. - * - * If 'offset' is beyond the end of the disk image the return value is - * BDRV_BLOCK_EOF and 'pnum' is set to 0. -@@ -2387,7 +2385,7 @@ int bdrv_flush_all(void) - * set to the host mapping and BDS corresponding to the guest offset. - */ - static int coroutine_fn GRAPH_RDLOCK --bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, -+bdrv_co_do_block_status(BlockDriverState *bs, unsigned int mode, - int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, BlockDriverState **file) - { -@@ -2476,7 +2474,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, - local_file = bs; - local_map = aligned_offset; - } else { -- ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, -+ ret = bs->drv->bdrv_co_block_status(bs, mode, aligned_offset, - aligned_bytes, pnum, &local_map, - &local_file); - -@@ -2488,10 +2486,10 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, - * the cache requires an RCU update, so double check here to avoid - * such an update if possible. - * -- * Check want_zero, because we only want to update the cache when we -+ * Check mode, because we only want to update the cache when we - * have accurate information about what is zero and what is data. - */ -- if (want_zero && -+ if (mode == BDRV_WANT_PRECISE && - ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && - QLIST_EMPTY(&bs->children)) - { -@@ -2548,7 +2546,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, - - if (ret & BDRV_BLOCK_RAW) { - assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file); -- ret = bdrv_co_do_block_status(local_file, want_zero, local_map, -+ ret = bdrv_co_do_block_status(local_file, mode, local_map, - *pnum, pnum, &local_map, &local_file); - goto out; - } -@@ -2560,7 +2558,7 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, - - if (!cow_bs) { - ret |= BDRV_BLOCK_ZERO; -- } else if (want_zero) { -+ } else if (mode == BDRV_WANT_PRECISE) { - int64_t size2 = bdrv_co_getlength(cow_bs); - - if (size2 >= 0 && offset >= size2) { -@@ -2569,14 +2567,14 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, - } - } - -- if (want_zero && ret & BDRV_BLOCK_RECURSE && -+ if (mode == BDRV_WANT_PRECISE && ret & BDRV_BLOCK_RECURSE && - local_file && local_file != bs && - (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && - (ret & BDRV_BLOCK_OFFSET_VALID)) { - int64_t file_pnum; - int ret2; - -- ret2 = bdrv_co_do_block_status(local_file, want_zero, local_map, -+ ret2 = bdrv_co_do_block_status(local_file, mode, local_map, - *pnum, &file_pnum, NULL, NULL); - if (ret2 >= 0) { - /* Ignore errors. This is just providing extra information, it -@@ -2627,7 +2625,7 @@ int coroutine_fn - bdrv_co_common_block_status_above(BlockDriverState *bs, - BlockDriverState *base, - bool include_base, -- bool want_zero, -+ unsigned int mode, - int64_t offset, - int64_t bytes, - int64_t *pnum, -@@ -2654,7 +2652,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, - return 0; - } - -- ret = bdrv_co_do_block_status(bs, want_zero, offset, bytes, pnum, -+ ret = bdrv_co_do_block_status(bs, mode, offset, bytes, pnum, - map, file); - ++*depth; - if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) { -@@ -2671,7 +2669,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, - for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base; - p = bdrv_filter_or_cow_bs(p)) - { -- ret = bdrv_co_do_block_status(p, want_zero, offset, bytes, pnum, -+ ret = bdrv_co_do_block_status(p, mode, offset, bytes, pnum, - map, file); - ++*depth; - if (ret < 0) { -@@ -2734,7 +2732,8 @@ int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, - BlockDriverState **file) - { - IO_CODE(); -- return bdrv_co_common_block_status_above(bs, base, false, true, offset, -+ return bdrv_co_common_block_status_above(bs, base, false, -+ BDRV_WANT_PRECISE, offset, - bytes, pnum, map, file, NULL); - } - -@@ -2765,8 +2764,9 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, - return 1; - } - -- ret = bdrv_co_common_block_status_above(bs, NULL, false, false, offset, -- bytes, &pnum, NULL, NULL, NULL); -+ ret = bdrv_co_common_block_status_above(bs, NULL, false, BDRV_WANT_ZERO, -+ offset, bytes, &pnum, NULL, NULL, -+ NULL); - - if (ret < 0) { - return ret; -@@ -2782,9 +2782,9 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, - int64_t dummy; - IO_CODE(); - -- ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset, -- bytes, pnum ? pnum : &dummy, NULL, -- NULL, NULL); -+ ret = bdrv_co_common_block_status_above(bs, bs, true, BDRV_WANT_ALLOCATED, -+ offset, bytes, pnum ? pnum : &dummy, -+ NULL, NULL, NULL); - if (ret < 0) { - return ret; - } -@@ -2817,7 +2817,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *bs, - int ret; - IO_CODE(); - -- ret = bdrv_co_common_block_status_above(bs, base, include_base, false, -+ ret = bdrv_co_common_block_status_above(bs, base, include_base, -+ BDRV_WANT_ALLOCATED, - offset, bytes, pnum, NULL, NULL, - &depth); - if (ret < 0) { -@@ -3714,8 +3715,8 @@ bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes, - } - - int coroutine_fn --bdrv_co_snapshot_block_status(BlockDriverState *bs, -- bool want_zero, int64_t offset, int64_t bytes, -+bdrv_co_snapshot_block_status(BlockDriverState *bs, unsigned int mode, -+ int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, - BlockDriverState **file) - { -@@ -3733,7 +3734,7 @@ bdrv_co_snapshot_block_status(BlockDriverState *bs, - } - - bdrv_inc_in_flight(bs); -- ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes, -+ ret = drv->bdrv_co_snapshot_block_status(bs, mode, offset, bytes, - pnum, map, file); - bdrv_dec_in_flight(bs); - -diff --git a/block/iscsi.c b/block/iscsi.c -index 2f0f4dac09..15b96ee880 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -694,9 +694,9 @@ out_unlock: - - - static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, -- bool want_zero, int64_t offset, -- int64_t bytes, int64_t *pnum, -- int64_t *map, -+ unsigned int mode, -+ int64_t offset, int64_t bytes, -+ int64_t *pnum, int64_t *map, - BlockDriverState **file) - { - IscsiLun *iscsilun = bs->opaque; -diff --git a/block/nbd.c b/block/nbd.c -index 887841bc81..d5a2b21c6d 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -1397,8 +1397,8 @@ nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) - } - - static int coroutine_fn GRAPH_RDLOCK nbd_client_co_block_status( -- BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, -- int64_t *pnum, int64_t *map, BlockDriverState **file) -+ BlockDriverState *bs, unsigned int mode, int64_t offset, -+ int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) - { - int ret, request_ret; - NBDExtent64 extent = { 0 }; -diff --git a/block/null.c b/block/null.c -index dc0b1fdbd9..4e448d593d 100644 ---- a/block/null.c -+++ b/block/null.c -@@ -227,9 +227,9 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state, - } - - static int coroutine_fn null_co_block_status(BlockDriverState *bs, -- bool want_zero, int64_t offset, -- int64_t bytes, int64_t *pnum, -- int64_t *map, -+ unsigned int mode, -+ int64_t offset, int64_t bytes, -+ int64_t *pnum, int64_t *map, - BlockDriverState **file) - { - BDRVNullState *s = bs->opaque; -diff --git a/block/parallels.c b/block/parallels.c -index 347ca127f3..3a375e2a8a 100644 ---- a/block/parallels.c -+++ b/block/parallels.c -@@ -416,9 +416,9 @@ parallels_co_flush_to_os(BlockDriverState *bs) - } - - static int coroutine_fn GRAPH_RDLOCK --parallels_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, -- int64_t bytes, int64_t *pnum, int64_t *map, -- BlockDriverState **file) -+parallels_co_block_status(BlockDriverState *bs, unsigned int mode, -+ int64_t offset, int64_t bytes, int64_t *pnum, -+ int64_t *map, BlockDriverState **file) - { - BDRVParallelsState *s = bs->opaque; - int count; -diff --git a/block/qcow.c b/block/qcow.c -index da8ad4d243..8a3e7591a9 100644 ---- a/block/qcow.c -+++ b/block/qcow.c -@@ -530,7 +530,7 @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate, - } - - static int coroutine_fn GRAPH_RDLOCK --qcow_co_block_status(BlockDriverState *bs, bool want_zero, -+qcow_co_block_status(BlockDriverState *bs, unsigned int mode, - int64_t offset, int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file) - { -diff --git a/block/qcow2.c b/block/qcow2.c -index b6ade4755d..9fc96ba99a 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -2147,9 +2147,9 @@ static void qcow2_join_options(QDict *options, QDict *old_options) - } - - static int coroutine_fn GRAPH_RDLOCK --qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, -- int64_t count, int64_t *pnum, int64_t *map, -- BlockDriverState **file) -+qcow2_co_block_status(BlockDriverState *bs, unsigned int mode, -+ int64_t offset, int64_t count, int64_t *pnum, -+ int64_t *map, BlockDriverState **file) - { - BDRVQcow2State *s = bs->opaque; - uint64_t host_offset; -diff --git a/block/qed.c b/block/qed.c -index ac24449ffb..4a36fb3929 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -833,9 +833,9 @@ fail: - } - - static int coroutine_fn GRAPH_RDLOCK --bdrv_qed_co_block_status(BlockDriverState *bs, bool want_zero, int64_t pos, -- int64_t bytes, int64_t *pnum, int64_t *map, -- BlockDriverState **file) -+bdrv_qed_co_block_status(BlockDriverState *bs, unsigned int mode, -+ int64_t pos, int64_t bytes, int64_t *pnum, -+ int64_t *map, BlockDriverState **file) - { - BDRVQEDState *s = bs->opaque; - size_t len = MIN(bytes, SIZE_MAX); -diff --git a/block/quorum.c b/block/quorum.c -index 30747a6df9..ed8ce801ee 100644 ---- a/block/quorum.c -+++ b/block/quorum.c -@@ -1226,7 +1226,7 @@ static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c, - * region contains zeroes, and BDRV_BLOCK_DATA otherwise. - */ - static int coroutine_fn GRAPH_RDLOCK --quorum_co_block_status(BlockDriverState *bs, bool want_zero, -+quorum_co_block_status(BlockDriverState *bs, unsigned int mode, - int64_t offset, int64_t count, - int64_t *pnum, int64_t *map, BlockDriverState **file) - { -@@ -1238,7 +1238,7 @@ quorum_co_block_status(BlockDriverState *bs, bool want_zero, - for (i = 0; i < s->num_children; i++) { - int64_t bytes; - ret = bdrv_co_common_block_status_above(s->children[i]->bs, NULL, false, -- want_zero, offset, count, -+ mode, offset, count, - &bytes, NULL, NULL, NULL); - if (ret < 0) { - quorum_report_bad(QUORUM_OP_TYPE_READ, offset, count, -diff --git a/block/raw-format.c b/block/raw-format.c -index e08526e2ec..df16ac1ea2 100644 ---- a/block/raw-format.c -+++ b/block/raw-format.c -@@ -283,8 +283,8 @@ fail: - } - - static int coroutine_fn GRAPH_RDLOCK --raw_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, -- int64_t bytes, int64_t *pnum, int64_t *map, -+raw_co_block_status(BlockDriverState *bs, unsigned int mode, -+ int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) - { - BDRVRawState *s = bs->opaque; -diff --git a/block/rbd.c b/block/rbd.c -index af984fb7db..4f3d42a8e7 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -1504,9 +1504,9 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, - } - - static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, -- bool want_zero, int64_t offset, -- int64_t bytes, int64_t *pnum, -- int64_t *map, -+ unsigned int mode, -+ int64_t offset, int64_t bytes, -+ int64_t *pnum, int64_t *map, - BlockDriverState **file) - { - BDRVRBDState *s = bs->opaque; -diff --git a/block/snapshot-access.c b/block/snapshot-access.c -index 71ac83c01f..17ed2402db 100644 ---- a/block/snapshot-access.c -+++ b/block/snapshot-access.c -@@ -41,11 +41,11 @@ snapshot_access_co_preadv_part(BlockDriverState *bs, - - static int coroutine_fn GRAPH_RDLOCK - snapshot_access_co_block_status(BlockDriverState *bs, -- bool want_zero, int64_t offset, -+ unsigned int mode, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file) - { -- return bdrv_co_snapshot_block_status(bs->file->bs, want_zero, offset, -+ return bdrv_co_snapshot_block_status(bs->file->bs, mode, offset, - bytes, pnum, map, file); - } - -diff --git a/block/vdi.c b/block/vdi.c -index a2da6ecab0..3ddc62a569 100644 ---- a/block/vdi.c -+++ b/block/vdi.c -@@ -523,8 +523,8 @@ static int vdi_reopen_prepare(BDRVReopenState *state, - } - - static int coroutine_fn GRAPH_RDLOCK --vdi_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, -- int64_t bytes, int64_t *pnum, int64_t *map, -+vdi_co_block_status(BlockDriverState *bs, unsigned int mode, -+ int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file) - { - BDRVVdiState *s = (BDRVVdiState *)bs->opaque; -diff --git a/block/vmdk.c b/block/vmdk.c -index 2adec49912..9c7ab037e1 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -1777,7 +1777,7 @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent, - } - - static int coroutine_fn GRAPH_RDLOCK --vmdk_co_block_status(BlockDriverState *bs, bool want_zero, -+vmdk_co_block_status(BlockDriverState *bs, unsigned int mode, - int64_t offset, int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file) - { -diff --git a/block/vpc.c b/block/vpc.c -index 0309e319f6..801ff5793f 100644 ---- a/block/vpc.c -+++ b/block/vpc.c -@@ -726,7 +726,7 @@ fail: - } - - static int coroutine_fn GRAPH_RDLOCK --vpc_co_block_status(BlockDriverState *bs, bool want_zero, -+vpc_co_block_status(BlockDriverState *bs, unsigned int mode, - int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, - BlockDriverState **file) -diff --git a/block/vvfat.c b/block/vvfat.c -index 91d69b3cc8..814796d918 100644 ---- a/block/vvfat.c -+++ b/block/vvfat.c -@@ -3134,9 +3134,9 @@ vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, - } - - static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs, -- bool want_zero, int64_t offset, -- int64_t bytes, int64_t *n, -- int64_t *map, -+ unsigned int mode, -+ int64_t offset, int64_t bytes, -+ int64_t *n, int64_t *map, - BlockDriverState **file) - { - *n = bytes; -diff --git a/include/block/block-common.h b/include/block/block-common.h -index 0b831ef87b..c8c626daea 100644 ---- a/include/block/block-common.h -+++ b/include/block/block-common.h -@@ -333,6 +333,17 @@ typedef enum { - #define BDRV_BLOCK_RECURSE 0x40 - #define BDRV_BLOCK_COMPRESSED 0x80 - -+/* -+ * Block status hints: the bitwise-or of these flags emphasize what -+ * the caller hopes to learn, and some drivers may be able to give -+ * faster answers by doing less work when the hint permits. -+ */ -+#define BDRV_WANT_ZERO BDRV_BLOCK_ZERO -+#define BDRV_WANT_OFFSET_VALID BDRV_BLOCK_OFFSET_VALID -+#define BDRV_WANT_ALLOCATED BDRV_BLOCK_ALLOCATED -+#define BDRV_WANT_PRECISE (BDRV_WANT_ZERO | BDRV_WANT_OFFSET_VALID | \ -+ BDRV_WANT_OFFSET_VALID) -+ - typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; - - typedef struct BDRVReopenState { -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index ebb4e56a50..a9c0daa2a4 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -608,15 +608,16 @@ struct BlockDriver { - * according to the current layer, and should only need to set - * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, - * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing -- * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See -- * block.h for the overall meaning of the bits. As a hint, the -- * flag want_zero is true if the caller cares more about precise -- * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for -- * overall allocation (favor larger *pnum, perhaps by reporting -- * _DATA instead of _ZERO). The block layer guarantees input -- * clamped to bdrv_getlength() and aligned to request_alignment, -- * as well as non-NULL pnum, map, and file; in turn, the driver -- * must return an error or set pnum to an aligned non-zero value. -+ * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). The -+ * caller will synthesize BDRV_BLOCK_ALLOCATED based on the -+ * non-zero results. See block.h for the overall meaning of the -+ * bits. As a hint, the flags in @mode may include a bitwise-or -+ * of BDRV_WANT_ALLOCATED, BDRV_WANT_OFFSET_VALID, or -+ * BDRV_WANT_ZERO based on what the caller is looking for in the -+ * results. The block layer guarantees input clamped to -+ * bdrv_getlength() and aligned to request_alignment, as well as -+ * non-NULL pnum, map, and file; in turn, the driver must return -+ * an error or set pnum to an aligned non-zero value. - * - * Note that @bytes is just a hint on how big of a region the - * caller wants to inspect. It is not a limit on *pnum. -@@ -628,8 +629,8 @@ struct BlockDriver { - * to clamping *pnum for return to its caller. - */ - int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_block_status)( -- BlockDriverState *bs, -- bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, -+ BlockDriverState *bs, unsigned int mode, -+ int64_t offset, int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file); - - /* -@@ -653,8 +654,8 @@ struct BlockDriver { - QEMUIOVector *qiov, size_t qiov_offset); - - int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_snapshot_block_status)( -- BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, -- int64_t *pnum, int64_t *map, BlockDriverState **file); -+ BlockDriverState *bs, unsigned int mode, int64_t offset, -+ int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); - - int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard_snapshot)( - BlockDriverState *bs, int64_t offset, int64_t bytes); -diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h -index 4a7cf2b4fd..4f94eb3c5a 100644 ---- a/include/block/block_int-io.h -+++ b/include/block/block_int-io.h -@@ -38,8 +38,8 @@ - int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv_snapshot(BdrvChild *child, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset); - int coroutine_fn GRAPH_RDLOCK bdrv_co_snapshot_block_status( -- BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, -- int64_t *pnum, int64_t *map, BlockDriverState **file); -+ BlockDriverState *bs, unsigned int mode, int64_t offset, -+ int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); - int coroutine_fn GRAPH_RDLOCK bdrv_co_pdiscard_snapshot(BlockDriverState *bs, - int64_t offset, int64_t bytes); - -diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c -index 2b358eaaa8..e26b3be593 100644 ---- a/tests/unit/test-block-iothread.c -+++ b/tests/unit/test-block-iothread.c -@@ -63,7 +63,7 @@ bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, - } - - static int coroutine_fn bdrv_test_co_block_status(BlockDriverState *bs, -- bool want_zero, -+ unsigned int mode, - int64_t offset, int64_t count, - int64_t *pnum, int64_t *map, - BlockDriverState **file) --- -2.39.3 - diff --git a/kvm-block-Let-bdrv_co_is_zero_fast-consolidate-adjacent-.patch b/kvm-block-Let-bdrv_co_is_zero_fast-consolidate-adjacent-.patch deleted file mode 100644 index f0a2749..0000000 --- a/kvm-block-Let-bdrv_co_is_zero_fast-consolidate-adjacent-.patch +++ /dev/null @@ -1,90 +0,0 @@ -From e101b9872f9b3f6c5e128f29d7c3bb91faca362b Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:20 -0500 -Subject: [PATCH 03/14] block: Let bdrv_co_is_zero_fast consolidate adjacent - extents - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/14] 4520f7ef5bcc5803413541e6f48bf750af3d31e0 (ebblake/centos-qemu-kvm) - -Some BDS drivers have a cap on how much block status they can supply -in one query (for example, NBD talking to an older server cannot -inspect more than 4G per query; and qcow2 tends to cap its answers -rather than cross a cluster boundary of an L1 table). Although the -existing callers of bdrv_co_is_zero_fast are not passing in that large -of a 'bytes' parameter, an upcoming caller wants to query the entire -image at once, and will thus benefit from being able to treat adjacent -zero regions in a coalesced manner, rather than claiming the region is -non-zero merely because pnum was truncated and didn't match the -incoming bytes. - -While refactoring this into a loop, note that there is no need to -assign pnum prior to calling bdrv_co_common_block_status_above() (it -is guaranteed to be assigned deeper in the callstack). - -Signed-off-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-ID: <20250509204341.3553601-18-eblake@redhat.com> -(cherry picked from commit 31bf15d97dd1d205a3b264675f9a1b3bd1939068) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/io.c | 27 +++++++++++++++------------ - 1 file changed, 15 insertions(+), 12 deletions(-) - -diff --git a/block/io.c b/block/io.c -index e328402adc..64f4b1d22a 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -2751,28 +2751,31 @@ int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, int64_t offset, - * by @offset and @bytes is known to read as zeroes. - * Return 1 if that is the case, 0 otherwise and -errno on error. - * This test is meant to be fast rather than accurate so returning 0 -- * does not guarantee non-zero data. -+ * does not guarantee non-zero data; but a return of 1 is reliable. - */ - int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, - int64_t bytes) - { - int ret; -- int64_t pnum = bytes; -+ int64_t pnum; - IO_CODE(); - -- if (!bytes) { -- return 1; -- } -- -- ret = bdrv_co_common_block_status_above(bs, NULL, false, BDRV_WANT_ZERO, -- offset, bytes, &pnum, NULL, NULL, -- NULL); -+ while (bytes) { -+ ret = bdrv_co_common_block_status_above(bs, NULL, false, -+ BDRV_WANT_ZERO, offset, bytes, -+ &pnum, NULL, NULL, NULL); - -- if (ret < 0) { -- return ret; -+ if (ret < 0) { -+ return ret; -+ } -+ if (!(ret & BDRV_BLOCK_ZERO)) { -+ return 0; -+ } -+ offset += pnum; -+ bytes -= pnum; - } - -- return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO); -+ return 1; - } - - int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, --- -2.39.3 - diff --git a/kvm-block-io-skip-head-tail-requests-on-EINVAL.patch b/kvm-block-io-skip-head-tail-requests-on-EINVAL.patch deleted file mode 100644 index 9650c04..0000000 --- a/kvm-block-io-skip-head-tail-requests-on-EINVAL.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 8d520ef6e8959a017535ecfc556b067e4b118cb7 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 17 Apr 2025 11:05:28 -0400 -Subject: [PATCH 2/4] block/io: skip head/tail requests on EINVAL - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 354: file-posix: probe discard alignment on Linux block devices -RH-Jira: RHEL-87642 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake -RH-Commit: [2/3] 5fddeb3ba2df7c61cdb8dd709e56914f3b5c0972 (stefanha/centos-stream-qemu-kvm) - -When guests send misaligned discard requests, the block layer breaks -them up into a misaligned head, an aligned main body, and a misaligned -tail. - -The file-posix block driver on Linux returns -EINVAL on misaligned -discard requests. This causes bdrv_co_pdiscard() to fail and guests -configured with werror=stop will pause. - -Add a special case for misaligned head/tail requests. Simply continue -when EINVAL is encountered so that the aligned main body of the request -can be completed and the guest is not paused. This is the best we can do -when guest discard limits do not match the host discard limits. - -Fixes: https://issues.redhat.com/browse/RHEL-86032 -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Hanna Czenczek -Message-ID: <20250417150528.76470-3-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 4733cb0833c4b223f92ec0136980eeb5239ecb87) -Signed-off-by: Stefan Hajnoczi ---- - block/io.c | 15 ++++++++++----- - 1 file changed, 10 insertions(+), 5 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 1ba8d1aeea..ccec11386b 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -3109,11 +3109,12 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, - /* Invalidate the cached block-status data range if this discard overlaps */ - bdrv_bsc_invalidate_range(bs, offset, bytes); - -- /* Discard is advisory, but some devices track and coalesce -+ /* -+ * Discard is advisory, but some devices track and coalesce - * unaligned requests, so we must pass everything down rather than -- * round here. Still, most devices will just silently ignore -- * unaligned requests (by returning -ENOTSUP), so we must fragment -- * the request accordingly. */ -+ * round here. Still, most devices reject unaligned requests with -+ * -EINVAL or -ENOTSUP, so we must fragment the request accordingly. -+ */ - align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment); - assert(align % bs->bl.request_alignment == 0); - head = offset % align; -@@ -3180,7 +3181,11 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, - } - } - if (ret && ret != -ENOTSUP) { -- goto out; -+ if (ret == -EINVAL && (offset % align != 0 || num % align != 0)) { -+ /* Silently skip rejected unaligned head/tail requests */ -+ } else { -+ goto out; /* bail out */ -+ } - } - - offset += num; --- -2.39.3 - diff --git a/kvm-block-mark-bdrv_child_change_aio_context-GRAPH_RDLOC.patch b/kvm-block-mark-bdrv_child_change_aio_context-GRAPH_RDLOC.patch deleted file mode 100644 index a39df88..0000000 --- a/kvm-block-mark-bdrv_child_change_aio_context-GRAPH_RDLOC.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 9e52d5e0ac43c31ca39c7e1ac41bf5bcb179f848 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:44 +0200 -Subject: [PATCH 19/33] block: mark bdrv_child_change_aio_context() - GRAPH_RDLOCK - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [7/21] ca3294430750a4266f4f3cbe42192d3abb84c817 (kmwolf/centos-qemu-kvm) - -This is a small step in preparation to mark bdrv_drained_begin() as -GRAPH_UNLOCKED. More concretely, it is in preparation to move the -drain out of bdrv_change_aio_context() and marking that function as -GRAPH_RDLOCK. - -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-8-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 469422c45b3a816eaf36e7edc895c81e0f3d38bb) -Signed-off-by: Kevin Wolf ---- - include/block/block-global-state.h | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index 9be34b3c99..aad160956a 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -274,9 +274,10 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); - int bdrv_debug_resume(BlockDriverState *bs, const char *tag); - bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); - --bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, -- GHashTable *visited, Transaction *tran, -- Error **errp); -+bool GRAPH_RDLOCK -+bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, -+ GHashTable *visited, Transaction *tran, -+ Error **errp); - int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp); - --- -2.39.3 - diff --git a/kvm-block-mark-bdrv_drained_begin-and-friends-as-GRAPH_U.patch b/kvm-block-mark-bdrv_drained_begin-and-friends-as-GRAPH_U.patch deleted file mode 100644 index f1bad96..0000000 --- a/kvm-block-mark-bdrv_drained_begin-and-friends-as-GRAPH_U.patch +++ /dev/null @@ -1,62 +0,0 @@ -From e002888564647162c7796075ef7bdc14c0dc29fc Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:56 +0200 -Subject: [PATCH 31/33] block: mark bdrv_drained_begin() and friends as - GRAPH_UNLOCKED - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [19/21] 7dd6a447a7fabb2685bec471a28f9e3e78dbc2d1 (kmwolf/centos-qemu-kvm) - -All of bdrv_drain_all_begin(), bdrv_drain_all() and -bdrv_drained_begin() poll and are not allowed to be called with the -block graph lock held. Mark the function as such. - -Suggested-by: Kevin Wolf -Signed-off-by: Fiona Ebner -Message-ID: <20250530151125.955508-20-f.ebner@proxmox.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit fc1d2f3eac7946658b160db0b813b81288fb1778) -Signed-off-by: Kevin Wolf ---- - include/block/block-global-state.h | 4 ++-- - include/block/block-io.h | 2 +- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index 91f249b5ad..84a2a4ecd5 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -192,10 +192,10 @@ int bdrv_inactivate_all(void); - - int bdrv_flush_all(void); - void bdrv_close_all(void); --void bdrv_drain_all_begin(void); -+void GRAPH_UNLOCKED bdrv_drain_all_begin(void); - void bdrv_drain_all_begin_nopoll(void); - void bdrv_drain_all_end(void); --void bdrv_drain_all(void); -+void GRAPH_UNLOCKED bdrv_drain_all(void); - - void bdrv_aio_cancel(BlockAIOCB *acb); - -diff --git a/include/block/block-io.h b/include/block/block-io.h -index b99cc98d26..4cf83fb367 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -431,7 +431,7 @@ bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, - * - * This function can be recursive. - */ --void bdrv_drained_begin(BlockDriverState *bs); -+void GRAPH_UNLOCKED bdrv_drained_begin(BlockDriverState *bs); - - /** - * bdrv_do_drained_begin_quiesce: --- -2.39.3 - diff --git a/kvm-block-mark-bdrv_parent_change_aio_context-GRAPH_RDLO.patch b/kvm-block-mark-bdrv_parent_change_aio_context-GRAPH_RDLO.patch deleted file mode 100644 index 1524fb9..0000000 --- a/kvm-block-mark-bdrv_parent_change_aio_context-GRAPH_RDLO.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 191a72ef2a40d7bd14c5ad3745f1dfccbbf95817 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:42 +0200 -Subject: [PATCH 17/33] block: mark bdrv_parent_change_aio_context() - GRAPH_RDLOCK - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [5/21] e80d701ce609e823f85465a3433324cbf077b3b9 (kmwolf/centos-qemu-kvm) - -This is a small step in preparation to mark bdrv_drained_begin() as -GRAPH_UNLOCKED. More concretely, it allows marking the -change_aio_ctx() callback GRAPH_RDLOCK_PTR, which is the next step. - -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-6-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 3758733959af93b5eb3283659d868ad5b24152b4) -Signed-off-by: Kevin Wolf ---- - block.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/block.c b/block.c -index e340bac177..18a6be3bd6 100644 ---- a/block.c -+++ b/block.c -@@ -7575,10 +7575,10 @@ typedef struct BdrvStateSetAioContext { - BlockDriverState *bs; - } BdrvStateSetAioContext; - --static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, -- GHashTable *visited, -- Transaction *tran, -- Error **errp) -+static bool GRAPH_RDLOCK -+bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, -+ GHashTable *visited, Transaction *tran, -+ Error **errp) - { - GLOBAL_STATE_CODE(); - if (g_hash_table_contains(visited, c)) { --- -2.39.3 - diff --git a/kvm-block-mark-change_aio_ctx-callback-and-instances-as-.patch b/kvm-block-mark-change_aio_ctx-callback-and-instances-as-.patch deleted file mode 100644 index ac0fbd7..0000000 --- a/kvm-block-mark-change_aio_ctx-callback-and-instances-as-.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 1c72dbd318b6c15d7a7cbc14a270056a5cb6b182 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:43 +0200 -Subject: [PATCH 18/33] block: mark change_aio_ctx() callback and instances as - GRAPH_RDLOCK(_PTR) - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [6/21] 9b1d5045f18b3393c5a35ce854da0dfd6fc2803b (kmwolf/centos-qemu-kvm) - -This is a small step in preparation to mark bdrv_drained_begin() as -GRAPH_UNLOCKED. More concretely, it is in preparation to move the -drain out of bdrv_change_aio_context() and marking that function as -GRAPH_RDLOCK. - -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-7-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 844d550d09ac29ff2b1b49069587ae6a989df31d) -Signed-off-by: Kevin Wolf ---- - block.c | 7 ++++--- - block/block-backend.c | 6 +++--- - blockjob.c | 6 +++--- - include/block/block_int-common.h | 6 +++--- - 4 files changed, 13 insertions(+), 12 deletions(-) - -diff --git a/block.c b/block.c -index 18a6be3bd6..f7b21d8f27 100644 ---- a/block.c -+++ b/block.c -@@ -1226,9 +1226,10 @@ static int bdrv_child_cb_inactivate(BdrvChild *child) - return 0; - } - --static bool bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx, -- GHashTable *visited, Transaction *tran, -- Error **errp) -+static bool GRAPH_RDLOCK -+bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx, -+ GHashTable *visited, Transaction *tran, -+ Error **errp) - { - BlockDriverState *bs = child->opaque; - return bdrv_change_aio_context(bs, ctx, visited, tran, errp); -diff --git a/block/block-backend.c b/block/block-backend.c -index a402db13f2..6a6949edeb 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -136,9 +136,9 @@ static void blk_root_drained_end(BdrvChild *child); - static void blk_root_change_media(BdrvChild *child, bool load); - static void blk_root_resize(BdrvChild *child); - --static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, -- GHashTable *visited, Transaction *tran, -- Error **errp); -+static bool GRAPH_RDLOCK -+blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, GHashTable *visited, -+ Transaction *tran, Error **errp); - - static char *blk_root_get_parent_desc(BdrvChild *child) - { -diff --git a/blockjob.c b/blockjob.c -index 32007f31a9..34185d7715 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -144,9 +144,9 @@ static TransactionActionDrv change_child_job_context = { - .clean = g_free, - }; - --static bool child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx, -- GHashTable *visited, Transaction *tran, -- Error **errp) -+static bool GRAPH_RDLOCK -+child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx, GHashTable *visited, -+ Transaction *tran, Error **errp) - { - BlockJob *job = c->opaque; - BdrvStateChildJobContext *s; -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index a9c0daa2a4..307dc56ed8 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -987,9 +987,9 @@ struct BdrvChildClass { - bool backing_mask_protocol, - Error **errp); - -- bool (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, -- GHashTable *visited, Transaction *tran, -- Error **errp); -+ bool GRAPH_RDLOCK_PTR (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, -+ GHashTable *visited, -+ Transaction *tran, Error **errp); - - /* - * I/O API functions. These functions are thread-safe. --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-bdrv_attach_child.patch b/kvm-block-move-drain-outside-of-bdrv_attach_child.patch deleted file mode 100644 index 43037d0..0000000 --- a/kvm-block-move-drain-outside-of-bdrv_attach_child.patch +++ /dev/null @@ -1,326 +0,0 @@ -From 62e8b3e9173ea4fb85cf52c66109832ff9d4d437 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:50 +0200 -Subject: [PATCH 25/33] block: move drain outside of bdrv_attach_child() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [13/21] c68a1e34fa991dff72ec0a6403fd9786341ab534 (kmwolf/centos-qemu-kvm) - -This is part of resolving the deadlock mentioned in commit "block: -move draining out of bdrv_change_aio_context() and mark GRAPH_RDLOCK". - -The function bdrv_attach_child() runs under the graph lock, so it is -not allowed to drain. It is called by: -1. replication_start() -2. quorum_add_child() -3. bdrv_open_child_common() -4. Throughout test-bdrv-graph-mod.c and test-bdrv-drain.c unit tests. - -In all callers, a drained section is introduced. - -The function quorum_add_child() runs under the graph lock, so it is -not actually allowed to drain. This will be addressed by the following -commit. - -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-14-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 77f3965ba7fed5b35212171a1e41c20c05a7ef11) -Signed-off-by: Kevin Wolf ---- - block.c | 6 ++++-- - block/quorum.c | 2 ++ - block/replication.c | 5 +++++ - tests/unit/test-bdrv-drain.c | 14 ++++++++++++++ - tests/unit/test-bdrv-graph-mod.c | 10 ++++++++++ - 5 files changed, 35 insertions(+), 2 deletions(-) - -diff --git a/block.c b/block.c -index 536a017201..3857f42877 100644 ---- a/block.c -+++ b/block.c -@@ -3269,6 +3269,8 @@ out: - * - * On failure NULL is returned, errp is set and the reference to - * child_bs is also dropped. -+ * -+ * All block nodes must be drained. - */ - BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, -@@ -3283,7 +3285,6 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - - GLOBAL_STATE_CODE(); - -- bdrv_drain_all_begin(); - child = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, - child_class, child_role, tran, errp); - if (!child) { -@@ -3298,7 +3299,6 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - - out: - tran_finalize(tran, ret); -- bdrv_drain_all_end(); - - bdrv_schedule_unref(child_bs); - -@@ -3789,10 +3789,12 @@ static BdrvChild *bdrv_open_child_common(const char *filename, - return NULL; - } - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, - errp); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - return child; - } -diff --git a/block/quorum.c b/block/quorum.c -index ed8ce801ee..ea17b0ec13 100644 ---- a/block/quorum.c -+++ b/block/quorum.c -@@ -1096,8 +1096,10 @@ quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, Error **errp) - /* We can safely add the child now */ - bdrv_ref(child_bs); - -+ bdrv_drain_all_begin(); - child = bdrv_attach_child(bs, child_bs, indexstr, &child_of_bds, - BDRV_CHILD_DATA, errp); -+ bdrv_drain_all_end(); - if (child == NULL) { - s->next_child_index--; - return; -diff --git a/block/replication.c b/block/replication.c -index 0020f33843..02814578c6 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -541,6 +541,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - return; - } - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - - bdrv_ref(hidden_disk->bs); -@@ -550,6 +551,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (local_err) { - error_propagate(errp, local_err); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - return; - } - -@@ -560,6 +562,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - if (local_err) { - error_propagate(errp, local_err); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - return; - } - -@@ -572,12 +575,14 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, - !check_top_bs(top_bs, bs)) { - error_setg(errp, "No top_bs or it is invalid"); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - reopen_backing_file(bs, false, NULL); - return; - } - bdrv_op_block_all(top_bs, s->blocker); - - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - s->backup_job = backup_job_create( - NULL, s->secondary_disk->bs, s->hidden_disk->bs, -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 4f3057844b..ac76525e5a 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -1049,10 +1049,12 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, - - null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &error_abort); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - /* This child will be the one to pass to requests through to, and - * it will stall until a drain occurs */ -@@ -1060,21 +1062,25 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, - &error_abort); - child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; - /* Takes our reference to child_bs */ -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", - &child_of_bds, - BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, - &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - /* This child is just there to be deleted - * (for detach_instead_of_delete == true) */ - null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &error_abort); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, - &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - blk_insert_bs(blk, bs, &error_abort); -@@ -1157,6 +1163,7 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque) - - bdrv_dec_in_flight(data->child_b->bs); - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(data->parent_b, data->child_b); - -@@ -1165,6 +1172,7 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque) - &child_of_bds, BDRV_CHILD_DATA, - &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - } - - static void coroutine_mixed_fn detach_by_parent_aio_cb(void *opaque, int ret) -@@ -1262,6 +1270,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) - /* Set child relationships */ - bdrv_ref(b); - bdrv_ref(a); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); -@@ -1273,6 +1282,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) - by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class, - BDRV_CHILD_DATA, &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - g_assert_cmpint(parent_a->refcnt, ==, 1); - g_assert_cmpint(parent_b->refcnt, ==, 1); -@@ -1685,6 +1695,7 @@ static void test_drop_intermediate_poll(void) - * Establish the chain last, so the chain links are the first - * elements in the BDS.parents lists - */ -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - for (i = 0; i < 3; i++) { - if (i) { -@@ -1694,6 +1705,7 @@ static void test_drop_intermediate_poll(void) - } - } - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - job = block_job_create("job", &test_simple_job_driver, NULL, job_node, - 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); -@@ -1940,10 +1952,12 @@ static void do_test_replace_child_mid_drain(int old_drain_count, - new_child_bs->total_sectors = 1; - - bdrv_ref(old_child_bs); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, - BDRV_CHILD_COW, &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - parent_s->setup_completed = true; - - for (i = 0; i < old_drain_count; i++) { -diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c -index d743abb4bb..7b03ebe4b0 100644 ---- a/tests/unit/test-bdrv-graph-mod.c -+++ b/tests/unit/test-bdrv-graph-mod.c -@@ -137,10 +137,12 @@ static void test_update_perm_tree(void) - - blk_insert_bs(root, bs, &error_abort); - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_attach_child(filter, bs, "child", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - ret = bdrv_append(filter, bs, NULL); - g_assert_cmpint(ret, <, 0); -@@ -204,11 +206,13 @@ static void test_should_update_child(void) - - bdrv_set_backing_hd(target, bs, &error_abort); - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - g_assert(target->backing->bs == bs); - bdrv_attach_child(filter, target, "target", &child_of_bds, - BDRV_CHILD_DATA, &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - bdrv_append(filter, bs, &error_abort); - - bdrv_graph_rdlock_main_loop(); -@@ -244,6 +248,7 @@ static void test_parallel_exclusive_write(void) - bdrv_ref(base); - bdrv_ref(fl1); - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_attach_child(top, fl1, "backing", &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, -@@ -257,6 +262,7 @@ static void test_parallel_exclusive_write(void) - - bdrv_replace_node(fl1, fl2, &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - bdrv_drained_end(fl2); - bdrv_drained_end(fl1); -@@ -363,6 +369,7 @@ static void test_parallel_perm_update(void) - */ - bdrv_ref(base); - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_attach_child(top, ws, "file", &child_of_bds, BDRV_CHILD_DATA, - &error_abort); -@@ -377,6 +384,7 @@ static void test_parallel_perm_update(void) - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - /* Select fl1 as first child to be active */ - s->selected = c_fl1; -@@ -430,11 +438,13 @@ static void test_append_greedy_filter(void) - BlockDriverState *base = no_perm_node("base"); - BlockDriverState *fl = exclusive_writer_node("fl1"); - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_attach_child(top, base, "backing", &child_of_bds, - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - bdrv_append(fl, base, &error_abort); - bdrv_unref(fl); --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-bdrv_attach_child_common.patch b/kvm-block-move-drain-outside-of-bdrv_attach_child_common.patch deleted file mode 100644 index 3cecf05..0000000 --- a/kvm-block-move-drain-outside-of-bdrv_attach_child_common.patch +++ /dev/null @@ -1,260 +0,0 @@ -From faa96d060a393458e2e1f9ba77c53adf7d52bc85 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:47 +0200 -Subject: [PATCH 22/33] block: move drain outside of - bdrv_attach_child_common(_abort)() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [10/21] 0b8f078c657c094dbd6f967eaaccafd8112a9bda (kmwolf/centos-qemu-kvm) - -This is part of resolving the deadlock mentioned in commit "block: -move draining out of bdrv_change_aio_context() and mark GRAPH_RDLOCK". - -The function bdrv_attach_child_common_abort() is used only as the -abort callback in bdrv_attach_child_common_drv transactions, so the -tran_finalize() calls of such transactions need to be in drained -sections too. - -All code paths are covered: -The bdrv_attach_child_common_drv transactions are only used in -bdrv_attach_child_common(), so it is enough to check callers of -bdrv_attach_child_common() following the transactions. - -bdrv_attach_child_common() is called by: -1. bdrv_attach_child_noperm(), which does not finalize the - transaction yet. -2. bdrv_root_attach_child(), where a drained section is introduced. - -bdrv_attach_child_noperm() is called by: -1. bdrv_attach_child(), where a drained section is introduced. -2. bdrv_set_file_or_backing_noperm(), which does not finalize the - transaction yet. -3. bdrv_append(), where a drained section is introduced. - -bdrv_set_file_or_backing_noperm() is called by: -1. bdrv_set_backing_hd_drained(), where a drained section is - introduced. -2. bdrv_reopen_parse_file_or_backing(), which does not finalize the - transaction yet. Draining the old child bs currently happens under - the graph lock there. This is replaced with an assertion, because - the drain will be moved further up to the caller. - -bdrv_reopen_parse_file_or_backing() is called by: -1. bdrv_reopen_prepare(), which does not finalize the transaction yet. - -bdrv_reopen_prepare() is called by: -1. bdrv_reopen_multiple(), which does finalize the transaction. It is - called after bdrv_reopen_queue(), which starts a drained section. - The drained section ends, when bdrv_reopen_queue_free() is called - at the end of bdrv_reopen_multiple(). - -This resolves all code paths. - -The functions bdrv_set_backing_hd_drained(), bdrv_attach_child() and -bdrv_root_attach_child() run under the graph lock, so they are not -actually allowed to drain. This will be addressed in the following -commits. - -Signed-off-by: Fiona Ebner -Message-ID: <20250530151125.955508-11-f.ebner@proxmox.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 2b833595aa21679145cfe67ba720113b165c19ef) -Signed-off-by: Kevin Wolf ---- - block.c | 40 ++++++++++++++++++++++++---------------- - 1 file changed, 24 insertions(+), 16 deletions(-) - -diff --git a/block.c b/block.c -index 3c2e8c5592..2c18e0a4fa 100644 ---- a/block.c -+++ b/block.c -@@ -3028,10 +3028,8 @@ static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque) - bdrv_replace_child_noperm(s->child, NULL); - - if (bdrv_get_aio_context(bs) != s->old_child_ctx) { -- bdrv_drain_all_begin(); - bdrv_try_change_aio_context_locked(bs, s->old_child_ctx, NULL, - &error_abort); -- bdrv_drain_all_end(); - } - - if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) { -@@ -3043,10 +3041,8 @@ static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque) - - /* No need to visit `child`, because it has been detached already */ - visited = g_hash_table_new(NULL, NULL); -- bdrv_drain_all_begin(); - ret = s->child->klass->change_aio_ctx(s->child, s->old_parent_ctx, - visited, tran, &error_abort); -- bdrv_drain_all_end(); - g_hash_table_destroy(visited); - - /* transaction is supposed to always succeed */ -@@ -3075,6 +3071,9 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { - * - * Both @parent_bs and @child_bs can move to a different AioContext in this - * function. -+ * -+ * All block nodes must be drained before this function is called until after -+ * the transaction is finalized. - */ - static BdrvChild * GRAPH_WRLOCK - bdrv_attach_child_common(BlockDriverState *child_bs, -@@ -3118,10 +3117,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - parent_ctx = bdrv_child_get_parent_aio_context(new_child); - if (child_ctx != parent_ctx) { - Error *local_err = NULL; -- bdrv_drain_all_begin(); - int ret = bdrv_try_change_aio_context_locked(child_bs, parent_ctx, NULL, - &local_err); -- bdrv_drain_all_end(); - - if (ret < 0 && child_class->change_aio_ctx) { - Transaction *aio_ctx_tran = tran_new(); -@@ -3129,11 +3126,9 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - bool ret_child; - - g_hash_table_add(visited, new_child); -- bdrv_drain_all_begin(); - ret_child = child_class->change_aio_ctx(new_child, child_ctx, - visited, aio_ctx_tran, - NULL); -- bdrv_drain_all_end(); - if (ret_child == true) { - error_free(local_err); - ret = 0; -@@ -3189,6 +3184,9 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - * - * After calling this function, the transaction @tran may only be completed - * while holding a writer lock for the graph. -+ * -+ * All block nodes must be drained before this function is called until after -+ * the transaction is finalized. - */ - static BdrvChild * GRAPH_WRLOCK - bdrv_attach_child_noperm(BlockDriverState *parent_bs, -@@ -3244,6 +3242,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - - GLOBAL_STATE_CODE(); - -+ bdrv_drain_all_begin(); - child = bdrv_attach_child_common(child_bs, child_name, child_class, - child_role, perm, shared_perm, opaque, - tran, errp); -@@ -3256,6 +3255,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - - out: - tran_finalize(tran, ret); -+ bdrv_drain_all_end(); - - bdrv_schedule_unref(child_bs); - -@@ -3283,6 +3283,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - - GLOBAL_STATE_CODE(); - -+ bdrv_drain_all_begin(); - child = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, - child_class, child_role, tran, errp); - if (!child) { -@@ -3297,6 +3298,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - - out: - tran_finalize(tran, ret); -+ bdrv_drain_all_end(); - - bdrv_schedule_unref(child_bs); - -@@ -3465,6 +3467,9 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) - * - * After calling this function, the transaction @tran may only be completed - * while holding a writer lock for the graph. -+ * -+ * All block nodes must be drained before this function is called until after -+ * the transaction is finalized. - */ - static int GRAPH_WRLOCK - bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, -@@ -3573,6 +3578,7 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs, - assert(bs->backing->bs->quiesce_counter > 0); - } - -+ bdrv_drain_all_begin(); - ret = bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); - if (ret < 0) { - goto out; -@@ -3581,6 +3587,7 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs, - ret = bdrv_refresh_perms(bs, tran, errp); - out: - tran_finalize(tran, ret); -+ bdrv_drain_all_end(); - return ret; - } - -@@ -4721,6 +4728,9 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, - * Return 0 on success, otherwise return < 0 and set @errp. - * - * @reopen_state->bs can move to a different AioContext in this function. -+ * -+ * All block nodes must be drained before this function is called until after -+ * the transaction is finalized. - */ - static int GRAPH_UNLOCKED - bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, -@@ -4814,7 +4824,7 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - - if (old_child_bs) { - bdrv_ref(old_child_bs); -- bdrv_drained_begin(old_child_bs); -+ assert(old_child_bs->quiesce_counter > 0); - } - - bdrv_graph_rdunlock_main_loop(); -@@ -4826,7 +4836,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - bdrv_graph_wrunlock(); - - if (old_child_bs) { -- bdrv_drained_end(old_child_bs); - bdrv_unref(old_child_bs); - } - -@@ -4855,6 +4864,9 @@ out_rdlock: - * - * After calling this function, the transaction @change_child_tran may only be - * completed while holding a writer lock for the graph. -+ * -+ * All block nodes must be drained before this function is called until after -+ * the transaction is finalized. - */ - static int GRAPH_UNLOCKED - bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, -@@ -5501,9 +5513,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - assert(!bs_new->backing); - bdrv_graph_rdunlock_main_loop(); - -- bdrv_drained_begin(bs_top); -- bdrv_drained_begin(bs_new); -- -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - - child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", -@@ -5525,9 +5535,7 @@ out: - - bdrv_refresh_limits(bs_top, NULL, NULL); - bdrv_graph_wrunlock(); -- -- bdrv_drained_end(bs_top); -- bdrv_drained_end(bs_new); -+ bdrv_drain_all_end(); - - return ret; - } --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-bdrv_change_aio_context-.patch b/kvm-block-move-drain-outside-of-bdrv_change_aio_context-.patch deleted file mode 100644 index 14cd101..0000000 --- a/kvm-block-move-drain-outside-of-bdrv_change_aio_context-.patch +++ /dev/null @@ -1,269 +0,0 @@ -From c5b0d95edea3d1e2ef47804e0c6fe19ade4dcaed Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:45 +0200 -Subject: [PATCH 20/33] block: move drain outside of bdrv_change_aio_context() - and mark GRAPH_RDLOCK - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [8/21] 26dc90de9b684044752ba2ad3831bd786e40fe72 (kmwolf/centos-qemu-kvm) - -This is in preparation to mark bdrv_drained_begin() as GRAPH_UNLOCKED. - -Note that even if bdrv_drained_begin() were already marked as -GRAPH_UNLOCKED, TSA would not complain about the instance in -bdrv_change_aio_context() before this change, because it is preceded -by a bdrv_graph_rdunlock_main_loop() call. It is not correct to -release the lock here, and in case the caller holds a write lock, it -wouldn't actually release the lock. - -In combination with block-stream, there is a deadlock that can happen -because of this [0]. In particular, it can happen that -main thread IO thread -1. acquires write lock - in blk_co_do_preadv_part(): - 2. have non-zero blk->in_flight - 3. try to acquire read lock -4. begin drain - -Steps 3 and 4 might be switched. Draining will poll and get stuck, -because it will see the non-zero in_flight counter. But the IO thread -will not make any progress either, because it cannot acquire the read -lock. - -After this change, all paths to bdrv_change_aio_context() drain: -bdrv_change_aio_context() is called by: -1. bdrv_child_cb_change_aio_ctx() which is only called via the - change_aio_ctx() callback, see below. -2. bdrv_child_change_aio_context(), see below. -3. bdrv_try_change_aio_context(), where a drained section is - introduced. - -The change_aio_ctx() callback is called by: -1. bdrv_attach_child_common_abort(), where a drained section is - introduced. -2. bdrv_attach_child_common(), where a drained section is introduced. -3. bdrv_parent_change_aio_context(), see below. - -bdrv_child_change_aio_context() is called by: -1. bdrv_change_aio_context(), i.e. recursive, so being in a drained - section is invariant. -2. child_job_change_aio_ctx(), which is only called via the - change_aio_ctx() callback, see above. - -bdrv_parent_change_aio_context() is called by: -1. bdrv_change_aio_context(), i.e. recursive, so being in a drained - section is invariant. - -This resolves all code paths. Note that bdrv_attach_child_common() -and bdrv_attach_child_common_abort() hold the graph write lock and -callers of bdrv_try_change_aio_context() might too, so they are not -actually allowed to drain either. This will be addressed in the -following commits. - -More granular draining is not trivially possible, because -bdrv_change_aio_context() can recursively call itself e.g. via -bdrv_child_change_aio_context(). - -[0]: https://lore.kernel.org/qemu-devel/73839c04-7616-407e-b057-80ca69e63f51@virtuozzo.com/ - -Reported-by: Andrey Drobyshev -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-9-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 91ba0e1c382bd4a4b9c6a200f8a175d6ff30ab99) -Signed-off-by: Kevin Wolf ---- - block.c | 57 +++++++++++++++++++++++--------- - include/block/block_int-common.h | 12 +++++++ - 2 files changed, 53 insertions(+), 16 deletions(-) - -diff --git a/block.c b/block.c -index f7b21d8f27..af438ae7ff 100644 ---- a/block.c -+++ b/block.c -@@ -106,9 +106,9 @@ static void bdrv_reopen_abort(BDRVReopenState *reopen_state); - - static bool bdrv_backing_overridden(BlockDriverState *bs); - --static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, -- GHashTable *visited, Transaction *tran, -- Error **errp); -+static bool GRAPH_RDLOCK -+bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, -+ GHashTable *visited, Transaction *tran, Error **errp); - - /* If non-zero, use only whitelisted block drivers */ - static int use_bdrv_whitelist; -@@ -3040,8 +3040,10 @@ static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque) - - /* No need to visit `child`, because it has been detached already */ - visited = g_hash_table_new(NULL, NULL); -+ bdrv_drain_all_begin(); - ret = s->child->klass->change_aio_ctx(s->child, s->old_parent_ctx, - visited, tran, &error_abort); -+ bdrv_drain_all_end(); - g_hash_table_destroy(visited); - - /* transaction is supposed to always succeed */ -@@ -3122,9 +3124,11 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - bool ret_child; - - g_hash_table_add(visited, new_child); -+ bdrv_drain_all_begin(); - ret_child = child_class->change_aio_ctx(new_child, child_ctx, - visited, aio_ctx_tran, - NULL); -+ bdrv_drain_all_end(); - if (ret_child == true) { - error_free(local_err); - ret = 0; -@@ -7576,6 +7580,17 @@ typedef struct BdrvStateSetAioContext { - BlockDriverState *bs; - } BdrvStateSetAioContext; - -+/* -+ * Changes the AioContext of @child to @ctx and recursively for the associated -+ * block nodes and all their children and parents. Returns true if the change is -+ * possible and the transaction @tran can be continued. Returns false and sets -+ * @errp if not and the transaction must be aborted. -+ * -+ * @visited will accumulate all visited BdrvChild objects. The caller is -+ * responsible for freeing the list afterwards. -+ * -+ * Must be called with the affected block nodes drained. -+ */ - static bool GRAPH_RDLOCK - bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, - GHashTable *visited, Transaction *tran, -@@ -7604,6 +7619,17 @@ bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, - return true; - } - -+/* -+ * Changes the AioContext of @c->bs to @ctx and recursively for all its children -+ * and parents. Returns true if the change is possible and the transaction @tran -+ * can be continued. Returns false and sets @errp if not and the transaction -+ * must be aborted. -+ * -+ * @visited will accumulate all visited BdrvChild objects. The caller is -+ * responsible for freeing the list afterwards. -+ * -+ * Must be called with the affected block nodes drained. -+ */ - bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp) -@@ -7619,10 +7645,6 @@ bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, - static void bdrv_set_aio_context_clean(void *opaque) - { - BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; -- BlockDriverState *bs = (BlockDriverState *) state->bs; -- -- /* Paired with bdrv_drained_begin in bdrv_change_aio_context() */ -- bdrv_drained_end(bs); - - g_free(state); - } -@@ -7650,10 +7672,12 @@ static TransactionActionDrv set_aio_context = { - * - * @visited will accumulate all visited BdrvChild objects. The caller is - * responsible for freeing the list afterwards. -+ * -+ * @bs must be drained. - */ --static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, -- GHashTable *visited, Transaction *tran, -- Error **errp) -+static bool GRAPH_RDLOCK -+bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, -+ GHashTable *visited, Transaction *tran, Error **errp) - { - BdrvChild *c; - BdrvStateSetAioContext *state; -@@ -7664,21 +7688,17 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, - return true; - } - -- bdrv_graph_rdlock_main_loop(); - QLIST_FOREACH(c, &bs->parents, next_parent) { - if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) { -- bdrv_graph_rdunlock_main_loop(); - return false; - } - } - - QLIST_FOREACH(c, &bs->children, next) { - if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) { -- bdrv_graph_rdunlock_main_loop(); - return false; - } - } -- bdrv_graph_rdunlock_main_loop(); - - state = g_new(BdrvStateSetAioContext, 1); - *state = (BdrvStateSetAioContext) { -@@ -7686,8 +7706,7 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, - .bs = bs, - }; - -- /* Paired with bdrv_drained_end in bdrv_set_aio_context_clean() */ -- bdrv_drained_begin(bs); -+ assert(bs->quiesce_counter > 0); - - tran_add(tran, &set_aio_context, state); - -@@ -7720,6 +7739,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - if (ignore_child) { - g_hash_table_add(visited, ignore_child); - } -+ bdrv_drain_all_begin(); -+ bdrv_graph_rdlock_main_loop(); - ret = bdrv_change_aio_context(bs, ctx, visited, tran, errp); - g_hash_table_destroy(visited); - -@@ -7733,10 +7754,14 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - if (!ret) { - /* Just run clean() callbacks. No AioContext changed. */ - tran_abort(tran); -+ bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_end(); - return -EPERM; - } - - tran_commit(tran); -+ bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_end(); - return 0; - } - -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 307dc56ed8..5d34e6a510 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -987,6 +987,18 @@ struct BdrvChildClass { - bool backing_mask_protocol, - Error **errp); - -+ /* -+ * Notifies the parent that the child is trying to change its AioContext. -+ * The parent may in turn change the AioContext of other nodes in the same -+ * transaction. Returns true if the change is possible and the transaction -+ * can be continued. Returns false and sets @errp if not and the transaction -+ * must be aborted. -+ * -+ * @visited will accumulate all visited BdrvChild objects. The caller is -+ * responsible for freeing the list afterwards. -+ * -+ * Must be called with the affected block nodes drained. -+ */ - bool GRAPH_RDLOCK_PTR (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, - GHashTable *visited, - Transaction *tran, Error **errp); --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-bdrv_root_attach_child.patch b/kvm-block-move-drain-outside-of-bdrv_root_attach_child.patch deleted file mode 100644 index 3cb6ca8..0000000 --- a/kvm-block-move-drain-outside-of-bdrv_root_attach_child.patch +++ /dev/null @@ -1,283 +0,0 @@ -From 4629f2201a220c7775df5a305f22d51ba7a34641 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:49 +0200 -Subject: [PATCH 24/33] block: move drain outside of bdrv_root_attach_child() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [12/21] fa142303dc37b7dc3555900f6e86d5d8241cb36b (kmwolf/centos-qemu-kvm) - -This is part of resolving the deadlock mentioned in commit "block: -move draining out of bdrv_change_aio_context() and mark GRAPH_RDLOCK". - -The function bdrv_root_attach_child() runs under the graph lock, so it -is not allowed to drain. It is called by: -1. blk_insert_bs(), where a drained section is introduced. -2. block_job_add_bdrv(), which holds the graph lock itself. - -block_job_add_bdrv() is called by: -1. mirror_start_job() -2. stream_start() -3. commit_start() -4. backup_job_create() -5. block_job_create() -6. In the test_blockjob_common_drain_node() unit test - -In all callers, a drained section is introduced. - -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-13-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit ffdcd081f52544f065020c780a6c522dace6b0af) -Signed-off-by: Kevin Wolf ---- - block.c | 4 ++-- - block/backup.c | 2 ++ - block/block-backend.c | 2 ++ - block/commit.c | 4 ++++ - block/mirror.c | 5 +++++ - block/stream.c | 4 ++++ - blockjob.c | 4 ++++ - include/block/blockjob.h | 2 ++ - tests/unit/test-bdrv-drain.c | 2 ++ - 9 files changed, 27 insertions(+), 2 deletions(-) - -diff --git a/block.c b/block.c -index d84b8ae49e..536a017201 100644 ---- a/block.c -+++ b/block.c -@@ -3228,6 +3228,8 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs, - * - * On failure NULL is returned, errp is set and the reference to - * child_bs is also dropped. -+ * -+ * All block nodes must be drained. - */ - BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - const char *child_name, -@@ -3242,7 +3244,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - - GLOBAL_STATE_CODE(); - -- bdrv_drain_all_begin(); - child = bdrv_attach_child_common(child_bs, child_name, child_class, - child_role, perm, shared_perm, opaque, - tran, errp); -@@ -3255,7 +3256,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - - out: - tran_finalize(tran, ret); -- bdrv_drain_all_end(); - - bdrv_schedule_unref(child_bs); - -diff --git a/block/backup.c b/block/backup.c -index 79652bf57b..9d55e55b79 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -497,10 +497,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - block_copy_set_speed(bcs, speed); - - /* Required permissions are taken by copy-before-write filter target */ -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, - &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - return &job->common; - -diff --git a/block/block-backend.c b/block/block-backend.c -index 6a6949edeb..24cae3cb55 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -904,6 +904,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) - - GLOBAL_STATE_CODE(); - bdrv_ref(bs); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - - if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) { -@@ -919,6 +920,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) - BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, - perm, shared_perm, blk, errp); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - if (blk->root == NULL) { - return -EPERM; - } -diff --git a/block/commit.c b/block/commit.c -index 5df3d05346..6c06b894ff 100644 ---- a/block/commit.c -+++ b/block/commit.c -@@ -342,6 +342,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, - * this is the responsibility of the interface (i.e. whoever calls - * commit_start()). - */ -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - s->base_overlay = bdrv_find_overlay(top, base); - assert(s->base_overlay); -@@ -374,18 +375,21 @@ void commit_start(const char *job_id, BlockDriverState *bs, - iter_shared_perms, errp); - if (ret < 0) { - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - goto fail; - } - } - - if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) { - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - goto fail; - } - s->chain_frozen = true; - - ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - if (ret < 0) { - goto fail; -diff --git a/block/mirror.c b/block/mirror.c -index c2c5099c95..6e8caf4b49 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -2014,6 +2014,7 @@ static BlockJob *mirror_start_job( - */ - bdrv_disable_dirty_bitmap(s->dirty_bitmap); - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - ret = block_job_add_bdrv(&s->common, "source", bs, 0, - BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | -@@ -2021,6 +2022,7 @@ static BlockJob *mirror_start_job( - errp); - if (ret < 0) { - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - goto fail; - } - -@@ -2066,16 +2068,19 @@ static BlockJob *mirror_start_job( - iter_shared_perms, errp); - if (ret < 0) { - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - goto fail; - } - } - - if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) { - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - goto fail; - } - } - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - QTAILQ_INIT(&s->ops_in_flight); - -diff --git a/block/stream.c b/block/stream.c -index 6ba49cffd3..f5441f27f4 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -371,10 +371,12 @@ void stream_start(const char *job_id, BlockDriverState *bs, - * already have our own plans. Also don't allow resize as the image size is - * queried only at the job start and then cached. - */ -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - if (block_job_add_bdrv(&s->common, "active node", bs, 0, - basic_flags | BLK_PERM_WRITE, errp)) { - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - goto fail; - } - -@@ -395,10 +397,12 @@ void stream_start(const char *job_id, BlockDriverState *bs, - basic_flags, errp); - if (ret < 0) { - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - goto fail; - } - } - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - s->base_overlay = base_overlay; - s->above_base = above_base; -diff --git a/blockjob.c b/blockjob.c -index 34185d7715..44991e3ff7 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -496,6 +496,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - int ret; - GLOBAL_STATE_CODE(); - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - - if (job_id == NULL && !(flags & JOB_INTERNAL)) { -@@ -506,6 +507,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - flags, cb, opaque, errp); - if (job == NULL) { - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - return NULL; - } - -@@ -544,10 +546,12 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - } - - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - return job; - - fail: - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - job_early_fail(&job->job); - return NULL; - } -diff --git a/include/block/blockjob.h b/include/block/blockjob.h -index 7061ab7201..990f3e179a 100644 ---- a/include/block/blockjob.h -+++ b/include/block/blockjob.h -@@ -137,6 +137,8 @@ BlockJob *block_job_get_locked(const char *id); - * Add @bs to the list of BlockDriverState that are involved in - * @job. This means that all operations will be blocked on @bs while - * @job exists. -+ * -+ * All block nodes must be drained. - */ - int GRAPH_WRLOCK - block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 3185f3f429..4f3057844b 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -772,9 +772,11 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - tjob->bs = src; - job = &tjob->common; - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - switch (result) { - case TEST_JOB_SUCCESS: --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-bdrv_root_unref_child.patch b/kvm-block-move-drain-outside-of-bdrv_root_unref_child.patch deleted file mode 100644 index bafe83f..0000000 --- a/kvm-block-move-drain-outside-of-bdrv_root_unref_child.patch +++ /dev/null @@ -1,405 +0,0 @@ -From 1ad00825750a515baf5bd9185bbc31549a61568b Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:52 +0200 -Subject: [PATCH 27/33] block: move drain outside of bdrv_root_unref_child() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [15/21] 05cc374ad5ce3f3ae4b3e5d4024c67141acf2cc2 (kmwolf/centos-qemu-kvm) - -This is part of resolving the deadlock mentioned in commit "block: -move draining out of bdrv_change_aio_context() and mark GRAPH_RDLOCK". - -bdrv_root_unref_child() is called by: -1. blk_remove_bs(), where a drained section is introduced. -2. bdrv_unref_child(), which runs under the graph lock, so the drain - will be moved further up to its callers. -3. block_job_remove_all_bdrv(), where a drained section is introduced. - -For all callers of bdrv_unref_child() and its generated -bdrv_co_unref_child() coroutine variant, a drained section is -introduced, they are not explicilty listed here. The caller -quorum_del_child() holds the graph lock, so it is not actually allowed -to drain. This will be addressed in the next commit. - -Signed-off-by: Fiona Ebner -Message-ID: <20250530151125.955508-16-f.ebner@proxmox.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit b13f54654546cbc0661d3fe9d25f7543535c2bee) -Signed-off-by: Kevin Wolf ---- - block.c | 18 ++++++++++++++---- - block/blklogwrites.c | 4 ++++ - block/blkverify.c | 2 ++ - block/block-backend.c | 2 ++ - block/qcow2.c | 4 ++++ - block/quorum.c | 6 ++++++ - block/replication.c | 2 ++ - block/snapshot.c | 2 ++ - block/vmdk.c | 10 ++++++++++ - blockjob.c | 2 ++ - tests/unit/test-bdrv-drain.c | 4 ++++ - 11 files changed, 52 insertions(+), 4 deletions(-) - -diff --git a/block.c b/block.c -index d7425ff971..51bc084b1e 100644 ---- a/block.c -+++ b/block.c -@@ -1721,12 +1721,14 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, - open_failed: - bs->drv = NULL; - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - if (bs->file != NULL) { - bdrv_unref_child(bs, bs->file); - assert(!bs->file); - } - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - g_free(bs->opaque); - bs->opaque = NULL; -@@ -3305,7 +3307,11 @@ out: - return ret < 0 ? NULL : child; - } - --/* Callers must ensure that child->frozen is false. */ -+/* -+ * Callers must ensure that child->frozen is false. -+ * -+ * All block nodes must be drained. -+ */ - void bdrv_root_unref_child(BdrvChild *child) - { - BlockDriverState *child_bs = child->bs; -@@ -3326,10 +3332,8 @@ void bdrv_root_unref_child(BdrvChild *child) - * When the parent requiring a non-default AioContext is removed, the - * node moves back to the main AioContext - */ -- bdrv_drain_all_begin(); - bdrv_try_change_aio_context_locked(child_bs, qemu_get_aio_context(), - NULL, NULL); -- bdrv_drain_all_end(); - } - - bdrv_schedule_unref(child_bs); -@@ -3402,7 +3406,11 @@ bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, - } - } - --/* Callers must ensure that child->frozen is false. */ -+/* -+ * Callers must ensure that child->frozen is false. -+ * -+ * All block nodes must be drained. -+ */ - void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) - { - GLOBAL_STATE_CODE(); -@@ -5172,6 +5180,7 @@ static void bdrv_close(BlockDriverState *bs) - bs->drv = NULL; - } - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - bdrv_unref_child(bs, child); -@@ -5180,6 +5189,7 @@ static void bdrv_close(BlockDriverState *bs) - assert(!bs->backing); - assert(!bs->file); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - g_free(bs->opaque); - bs->opaque = NULL; -diff --git a/block/blklogwrites.c b/block/blklogwrites.c -index b0f78c4bc7..70ac76f401 100644 ---- a/block/blklogwrites.c -+++ b/block/blklogwrites.c -@@ -281,9 +281,11 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, - ret = 0; - fail_log: - if (ret < 0) { -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->log_file); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - s->log_file = NULL; - qemu_mutex_destroy(&s->mutex); - } -@@ -296,10 +298,12 @@ static void blk_log_writes_close(BlockDriverState *bs) - { - BDRVBlkLogWritesState *s = bs->opaque; - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->log_file); - s->log_file = NULL; - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - qemu_mutex_destroy(&s->mutex); - } - -diff --git a/block/blkverify.c b/block/blkverify.c -index db79a36681..3a71f7498c 100644 ---- a/block/blkverify.c -+++ b/block/blkverify.c -@@ -151,10 +151,12 @@ static void blkverify_close(BlockDriverState *bs) - { - BDRVBlkverifyState *s = bs->opaque; - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->test_file); - s->test_file = NULL; - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - } - - static int64_t coroutine_fn GRAPH_RDLOCK -diff --git a/block/block-backend.c b/block/block-backend.c -index 24cae3cb55..68209bb2f7 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -889,9 +889,11 @@ void blk_remove_bs(BlockBackend *blk) - root = blk->root; - blk->root = NULL; - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_root_unref_child(root); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - } - - /* -diff --git a/block/qcow2.c b/block/qcow2.c -index 9fc96ba99a..9480598b6d 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -1901,7 +1901,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, - g_free(s->image_data_file); - if (open_data_file && has_data_file(bs)) { - bdrv_graph_co_rdunlock(); -+ bdrv_drain_all_begin(); - bdrv_co_unref_child(bs, s->data_file); -+ bdrv_drain_all_end(); - bdrv_graph_co_rdlock(); - s->data_file = NULL; - } -@@ -2827,9 +2829,11 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file) - if (close_data_file && has_data_file(bs)) { - GLOBAL_STATE_CODE(); - bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->data_file); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - s->data_file = NULL; - bdrv_graph_rdlock_main_loop(); - } -diff --git a/block/quorum.c b/block/quorum.c -index ed8ce801ee..81407a38ee 100644 ---- a/block/quorum.c -+++ b/block/quorum.c -@@ -1037,6 +1037,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, - - close_exit: - /* cleanup on error */ -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - for (i = 0; i < s->num_children; i++) { - if (!opened[i]) { -@@ -1045,6 +1046,7 @@ close_exit: - bdrv_unref_child(bs, s->children[i]); - } - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - g_free(s->children); - g_free(opened); - exit: -@@ -1057,11 +1059,13 @@ static void quorum_close(BlockDriverState *bs) - BDRVQuorumState *s = bs->opaque; - int i; - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - for (i = 0; i < s->num_children; i++) { - bdrv_unref_child(bs, s->children[i]); - } - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - g_free(s->children); - } -@@ -1143,7 +1147,9 @@ quorum_del_child(BlockDriverState *bs, BdrvChild *child, Error **errp) - (s->num_children - i - 1) * sizeof(BdrvChild *)); - s->children = g_renew(BdrvChild *, s->children, --s->num_children); - -+ bdrv_drain_all_begin(); - bdrv_unref_child(bs, child); -+ bdrv_drain_all_end(); - - quorum_refresh_flags(bs); - } -diff --git a/block/replication.c b/block/replication.c -index 02814578c6..92eb432b1b 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -655,12 +655,14 @@ static void replication_done(void *opaque, int ret) - if (ret == 0) { - s->stage = BLOCK_REPLICATION_DONE; - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(bs, s->secondary_disk); - s->secondary_disk = NULL; - bdrv_unref_child(bs, s->hidden_disk); - s->hidden_disk = NULL; - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - s->error = 0; - } else { -diff --git a/block/snapshot.c b/block/snapshot.c -index 9f300a78bd..28c9c43621 100644 ---- a/block/snapshot.c -+++ b/block/snapshot.c -@@ -291,9 +291,11 @@ int bdrv_snapshot_goto(BlockDriverState *bs, - } - - /* .bdrv_open() will re-attach it */ -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(bs, fallback); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); - memset(bs->opaque, 0, drv->instance_size); -diff --git a/block/vmdk.c b/block/vmdk.c -index 9c7ab037e1..89a7250120 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -271,6 +271,7 @@ static void vmdk_free_extents(BlockDriverState *bs) - BDRVVmdkState *s = bs->opaque; - VmdkExtent *e; - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - for (i = 0; i < s->num_extents; i++) { - e = &s->extents[i]; -@@ -283,6 +284,7 @@ static void vmdk_free_extents(BlockDriverState *bs) - } - } - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - - g_free(s->extents); - } -@@ -1247,9 +1249,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - 0, 0, 0, 0, 0, &extent, errp); - if (ret < 0) { - bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - bdrv_graph_rdlock_main_loop(); - goto out; - } -@@ -1266,9 +1270,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - g_free(buf); - if (ret) { - bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - bdrv_graph_rdlock_main_loop(); - goto out; - } -@@ -1277,9 +1283,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); - if (ret) { - bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - bdrv_graph_rdlock_main_loop(); - goto out; - } -@@ -1287,9 +1295,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, - } else { - error_setg(errp, "Unsupported extent type '%s'", type); - bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - bdrv_unref_child(bs, extent_file); - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - bdrv_graph_rdlock_main_loop(); - ret = -ENOTSUP; - goto out; -diff --git a/blockjob.c b/blockjob.c -index 44991e3ff7..e68181a35b 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) - * one to make sure that such a concurrent access does not attempt - * to process an already freed BdrvChild. - */ -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - while (job->nodes) { - GSList *l = job->nodes; -@@ -211,6 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) - g_slist_free_1(l); - } - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - } - - bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index ac76525e5a..59c2793725 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -955,11 +955,13 @@ static void bdrv_test_top_close(BlockDriverState *bs) - { - BdrvChild *c, *next_c; - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { - bdrv_unref_child(bs, c); - } - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - } - - static int coroutine_fn GRAPH_RDLOCK -@@ -1016,7 +1018,9 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) - bdrv_graph_co_rdlock(); - QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { - bdrv_graph_co_rdunlock(); -+ bdrv_drain_all_begin(); - bdrv_co_unref_child(bs, c); -+ bdrv_drain_all_end(); - bdrv_graph_co_rdlock(); - } - bdrv_graph_co_rdunlock(); --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-bdrv_set_backing_hd_drai.patch b/kvm-block-move-drain-outside-of-bdrv_set_backing_hd_drai.patch deleted file mode 100644 index e64b8e9..0000000 --- a/kvm-block-move-drain-outside-of-bdrv_set_backing_hd_drai.patch +++ /dev/null @@ -1,123 +0,0 @@ -From e3a722681805290fced1cde3d4ac991f8278f158 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:48 +0200 -Subject: [PATCH 23/33] block: move drain outside of - bdrv_set_backing_hd_drained() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [11/21] ab1d93ce55b51312e3aac1867e8826286c5d49de (kmwolf/centos-qemu-kvm) - -This is part of resolving the deadlock mentioned in commit "block: -move draining out of bdrv_change_aio_context() and mark GRAPH_RDLOCK". - -The function bdrv_set_backing_hd_drained() holds the graph lock, so it -is not allowed to drain. It is called by: -1. bdrv_set_backing_hd(), where a drained section is introduced, - replacing the previously present bs-specific drains. -2. stream_prepare(), where a drained section is introduced replacing - the previously present bs-specific drains. - -The drain_bs variable in bdrv_set_backing_hd_drained() is now -superfluous and thus dropped. - -Signed-off-by: Fiona Ebner -Message-ID: <20250530151125.955508-12-f.ebner@proxmox.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit e66dbda11eab2b4a091d470f3508a4d6ca60eaf5) -Signed-off-by: Kevin Wolf ---- - block.c | 16 +++------------- - block/stream.c | 6 ++---- - 2 files changed, 5 insertions(+), 17 deletions(-) - -diff --git a/block.c b/block.c -index 2c18e0a4fa..d84b8ae49e 100644 ---- a/block.c -+++ b/block.c -@@ -3562,8 +3562,7 @@ out: - * Both @bs and @backing_hd can move to a different AioContext in this - * function. - * -- * If a backing child is already present (i.e. we're detaching a node), that -- * child node must be drained. -+ * All block nodes must be drained. - */ - int bdrv_set_backing_hd_drained(BlockDriverState *bs, - BlockDriverState *backing_hd, -@@ -3578,7 +3577,6 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs, - assert(bs->backing->bs->quiesce_counter > 0); - } - -- bdrv_drain_all_begin(); - ret = bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); - if (ret < 0) { - goto out; -@@ -3587,28 +3585,20 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs, - ret = bdrv_refresh_perms(bs, tran, errp); - out: - tran_finalize(tran, ret); -- bdrv_drain_all_end(); - return ret; - } - - int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp) - { -- BlockDriverState *drain_bs; - int ret; - GLOBAL_STATE_CODE(); - -- bdrv_graph_rdlock_main_loop(); -- drain_bs = bs->backing ? bs->backing->bs : bs; -- bdrv_graph_rdunlock_main_loop(); -- -- bdrv_ref(drain_bs); -- bdrv_drained_begin(drain_bs); -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); - bdrv_graph_wrunlock(); -- bdrv_drained_end(drain_bs); -- bdrv_unref(drain_bs); -+ bdrv_drain_all_end(); - - return ret; - } -diff --git a/block/stream.c b/block/stream.c -index 999d9e56d4..6ba49cffd3 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -80,11 +80,10 @@ static int stream_prepare(Job *job) - * may end up working with the wrong base node (or it might even have gone - * away by the time we want to use it). - */ -- bdrv_drained_begin(unfiltered_bs); - if (unfiltered_bs_cow) { - bdrv_ref(unfiltered_bs_cow); -- bdrv_drained_begin(unfiltered_bs_cow); - } -+ bdrv_drain_all_begin(); - - bdrv_graph_rdlock_main_loop(); - base = bdrv_filter_or_cow_bs(s->above_base); -@@ -123,11 +122,10 @@ static int stream_prepare(Job *job) - } - - out: -+ bdrv_drain_all_end(); - if (unfiltered_bs_cow) { -- bdrv_drained_end(unfiltered_bs_cow); - bdrv_unref(unfiltered_bs_cow); - } -- bdrv_drained_end(unfiltered_bs); - return ret; - } - --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-bdrv_try_change_aio_cont.patch b/kvm-block-move-drain-outside-of-bdrv_try_change_aio_cont.patch deleted file mode 100644 index 8861e99..0000000 --- a/kvm-block-move-drain-outside-of-bdrv_try_change_aio_cont.patch +++ /dev/null @@ -1,257 +0,0 @@ -From 0fc67b52f8d95a10d0bbc1d49eaf2f93c603d967 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:46 +0200 -Subject: [PATCH 21/33] block: move drain outside of - bdrv_try_change_aio_context() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [9/21] 9e421c675137364d37f9694b7df6ed5c434f20c6 (kmwolf/centos-qemu-kvm) - -This is part of resolving the deadlock mentioned in commit "block: -move draining out of bdrv_change_aio_context() and mark GRAPH_RDLOCK". - -Convert the function to a _locked() version that has to be called with -the graph lock held and add a convenience wrapper that has to be -called with the graph unlocked, which drains and takes the lock -itself. Since bdrv_try_change_aio_context() is global state code, the -wrapper is too. - -Callers are adapted to use the appropriate variant, depending on -whether the caller already holds the lock. In the -test_set_aio_context() unit test, prior drains can be removed, because -draining already happens inside the new wrapper. - -Note that bdrv_attach_child_common_abort(), bdrv_attach_child_common() -and bdrv_root_unref_child() hold the graph lock and are not actually -allowed to drain either. This will be addressed in the following -commits. - -Functions like qmp_blockdev_mirror() query the nodes to act on before -draining and locking. In theory, draining could invalidate those nodes. -This kind of issue is not addressed by these commits. - -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-10-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit a1ea8eb5912256c0b2be16fae5d3786aebc80cb1) -Signed-off-by: Kevin Wolf ---- - block.c | 58 ++++++++++++++++++++++-------- - blockdev.c | 15 +++++--- - include/block/block-global-state.h | 8 +++-- - tests/unit/test-bdrv-drain.c | 4 --- - 4 files changed, 59 insertions(+), 26 deletions(-) - -diff --git a/block.c b/block.c -index af438ae7ff..3c2e8c5592 100644 ---- a/block.c -+++ b/block.c -@@ -3028,7 +3028,10 @@ static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque) - bdrv_replace_child_noperm(s->child, NULL); - - if (bdrv_get_aio_context(bs) != s->old_child_ctx) { -- bdrv_try_change_aio_context(bs, s->old_child_ctx, NULL, &error_abort); -+ bdrv_drain_all_begin(); -+ bdrv_try_change_aio_context_locked(bs, s->old_child_ctx, NULL, -+ &error_abort); -+ bdrv_drain_all_end(); - } - - if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) { -@@ -3115,8 +3118,10 @@ bdrv_attach_child_common(BlockDriverState *child_bs, - parent_ctx = bdrv_child_get_parent_aio_context(new_child); - if (child_ctx != parent_ctx) { - Error *local_err = NULL; -- int ret = bdrv_try_change_aio_context(child_bs, parent_ctx, NULL, -- &local_err); -+ bdrv_drain_all_begin(); -+ int ret = bdrv_try_change_aio_context_locked(child_bs, parent_ctx, NULL, -+ &local_err); -+ bdrv_drain_all_end(); - - if (ret < 0 && child_class->change_aio_ctx) { - Transaction *aio_ctx_tran = tran_new(); -@@ -3319,8 +3324,10 @@ void bdrv_root_unref_child(BdrvChild *child) - * When the parent requiring a non-default AioContext is removed, the - * node moves back to the main AioContext - */ -- bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL, -- NULL); -+ bdrv_drain_all_begin(); -+ bdrv_try_change_aio_context_locked(child_bs, qemu_get_aio_context(), -+ NULL, NULL); -+ bdrv_drain_all_end(); - } - - bdrv_schedule_unref(child_bs); -@@ -7719,9 +7726,13 @@ bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, - * - * If ignore_child is not NULL, that child (and its subgraph) will not - * be touched. -+ * -+ * Called with the graph lock held. -+ * -+ * Called while all bs are drained. - */ --int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, -- BdrvChild *ignore_child, Error **errp) -+int bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx, -+ BdrvChild *ignore_child, Error **errp) - { - Transaction *tran; - GHashTable *visited; -@@ -7730,17 +7741,15 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - - /* - * Recursion phase: go through all nodes of the graph. -- * Take care of checking that all nodes support changing AioContext -- * and drain them, building a linear list of callbacks to run if everything -- * is successful (the transaction itself). -+ * Take care of checking that all nodes support changing AioContext, -+ * building a linear list of callbacks to run if everything is successful -+ * (the transaction itself). - */ - tran = tran_new(); - visited = g_hash_table_new(NULL, NULL); - if (ignore_child) { - g_hash_table_add(visited, ignore_child); - } -- bdrv_drain_all_begin(); -- bdrv_graph_rdlock_main_loop(); - ret = bdrv_change_aio_context(bs, ctx, visited, tran, errp); - g_hash_table_destroy(visited); - -@@ -7754,15 +7763,34 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - if (!ret) { - /* Just run clean() callbacks. No AioContext changed. */ - tran_abort(tran); -- bdrv_graph_rdunlock_main_loop(); -- bdrv_drain_all_end(); - return -EPERM; - } - - tran_commit(tran); -+ return 0; -+} -+ -+/* -+ * Change bs's and recursively all of its parents' and children's AioContext -+ * to the given new context, returning an error if that isn't possible. -+ * -+ * If ignore_child is not NULL, that child (and its subgraph) will not -+ * be touched. -+ */ -+int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, -+ BdrvChild *ignore_child, Error **errp) -+{ -+ int ret; -+ -+ GLOBAL_STATE_CODE(); -+ -+ bdrv_drain_all_begin(); -+ bdrv_graph_rdlock_main_loop(); -+ ret = bdrv_try_change_aio_context_locked(bs, ctx, ignore_child, errp); - bdrv_graph_rdunlock_main_loop(); - bdrv_drain_all_end(); -- return 0; -+ -+ return ret; - } - - void bdrv_add_aio_context_notifier(BlockDriverState *bs, -diff --git a/blockdev.c b/blockdev.c -index efa7d1d0b2..41b6481c9a 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3595,12 +3595,13 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, - AioContext *new_context; - BlockDriverState *bs; - -- GRAPH_RDLOCK_GUARD_MAINLOOP(); -+ bdrv_drain_all_begin(); -+ bdrv_graph_rdlock_main_loop(); - - bs = bdrv_find_node(node_name); - if (!bs) { - error_setg(errp, "Failed to find node with node-name='%s'", node_name); -- return; -+ goto out; - } - - /* Protects against accidents. */ -@@ -3608,14 +3609,14 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, - error_setg(errp, "Node %s is associated with a BlockBackend and could " - "be in use (use force=true to override this check)", - node_name); -- return; -+ goto out; - } - - if (iothread->type == QTYPE_QSTRING) { - IOThread *obj = iothread_by_id(iothread->u.s); - if (!obj) { - error_setg(errp, "Cannot find iothread %s", iothread->u.s); -- return; -+ goto out; - } - - new_context = iothread_get_aio_context(obj); -@@ -3623,7 +3624,11 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, - new_context = qemu_get_aio_context(); - } - -- bdrv_try_change_aio_context(bs, new_context, NULL, errp); -+ bdrv_try_change_aio_context_locked(bs, new_context, NULL, errp); -+ -+out: -+ bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_end(); - } - - QemuOptsList qemu_common_drive_opts = { -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index aad160956a..91f249b5ad 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -278,8 +278,12 @@ bool GRAPH_RDLOCK - bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); --int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, -- BdrvChild *ignore_child, Error **errp); -+int GRAPH_UNLOCKED -+bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, -+ BdrvChild *ignore_child, Error **errp); -+int GRAPH_RDLOCK -+bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx, -+ BdrvChild *ignore_child, Error **errp); - - int GRAPH_RDLOCK bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); - int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 290cd2a70e..3185f3f429 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -1396,14 +1396,10 @@ static void test_set_aio_context(void) - bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, - &error_abort); - -- bdrv_drained_begin(bs); - bdrv_try_change_aio_context(bs, ctx_a, NULL, &error_abort); -- bdrv_drained_end(bs); - -- bdrv_drained_begin(bs); - bdrv_try_change_aio_context(bs, ctx_b, NULL, &error_abort); - bdrv_try_change_aio_context(bs, qemu_get_aio_context(), NULL, &error_abort); -- bdrv_drained_end(bs); - - bdrv_unref(bs); - iothread_join(a); --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-quorum_add_child.patch b/kvm-block-move-drain-outside-of-quorum_add_child.patch deleted file mode 100644 index a4edd01..0000000 --- a/kvm-block-move-drain-outside-of-quorum_add_child.patch +++ /dev/null @@ -1,120 +0,0 @@ -From af295ac08ffca4efd6f10a2d1a38eaa8d09d8e6f Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:51 +0200 -Subject: [PATCH 26/33] block: move drain outside of quorum_add_child() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [14/21] 1079c7996e39d09d021d210d974522fba2e5515d (kmwolf/centos-qemu-kvm) - -This is part of resolving the deadlock mentioned in commit "block: -move draining out of bdrv_change_aio_context() and mark GRAPH_RDLOCK". - -The quorum_add_child() callback runs under the graph lock, so it is -not allowed to drain. It is only called as the .bdrv_add_child() -callback, which is only called in the bdrv_add_child() function, which -also runs under the graph lock. - -The bdrv_add_child() function is called by qmp_x_blockdev_change(), -where a drained section is introduced. - -Signed-off-by: Fiona Ebner -Message-ID: <20250530151125.955508-15-f.ebner@proxmox.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 0414930d3adfa89299eaea5ce92accab15d9fba5) -Signed-off-by: Kevin Wolf ---- - block.c | 10 ++++++++-- - block/quorum.c | 2 -- - blockdev.c | 2 ++ - include/block/block_int-common.h | 7 +++++++ - 4 files changed, 17 insertions(+), 4 deletions(-) - -diff --git a/block.c b/block.c -index 3857f42877..d7425ff971 100644 ---- a/block.c -+++ b/block.c -@@ -8220,8 +8220,10 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp) - } - - /* -- * Hot add/remove a BDS's child. So the user can take a child offline when -- * it is broken and take a new child online -+ * Hot add a BDS's child. Used in combination with bdrv_del_child, so the user -+ * can take a child offline when it is broken and take a new child online. -+ * -+ * All block nodes must be drained. - */ - void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, - Error **errp) -@@ -8261,6 +8263,10 @@ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, - parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); - } - -+/* -+ * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the -+ * user can take a child offline when it is broken and take a new child online. -+ */ - void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) - { - BdrvChild *tmp; -diff --git a/block/quorum.c b/block/quorum.c -index ea17b0ec13..ed8ce801ee 100644 ---- a/block/quorum.c -+++ b/block/quorum.c -@@ -1096,10 +1096,8 @@ quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, Error **errp) - /* We can safely add the child now */ - bdrv_ref(child_bs); - -- bdrv_drain_all_begin(); - child = bdrv_attach_child(bs, child_bs, indexstr, &child_of_bds, - BDRV_CHILD_DATA, errp); -- bdrv_drain_all_end(); - if (child == NULL) { - s->next_child_index--; - return; -diff --git a/blockdev.c b/blockdev.c -index 41b6481c9a..8edd3e7bba 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3525,6 +3525,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, - BlockDriverState *parent_bs, *new_bs = NULL; - BdrvChild *p_child; - -+ bdrv_drain_all_begin(); - bdrv_graph_wrlock(); - - parent_bs = bdrv_lookup_bs(parent, parent, errp); -@@ -3562,6 +3563,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, - - out: - bdrv_graph_wrunlock(); -+ bdrv_drain_all_end(); - } - - BlockJobInfoList *qmp_query_block_jobs(Error **errp) -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 5d34e6a510..8d76b37c03 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -396,6 +396,13 @@ struct BlockDriver { - int GRAPH_RDLOCK_PTR (*bdrv_probe_geometry)( - BlockDriverState *bs, HDGeometry *geo); - -+ /** -+ * Hot add a BDS's child. Used in combination with bdrv_del_child, so the -+ * user can take a child offline when it is broken and take a new child -+ * online. -+ * -+ * All block nodes must be drained. -+ */ - void GRAPH_WRLOCK_PTR (*bdrv_add_child)( - BlockDriverState *parent, BlockDriverState *child, Error **errp); - --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-quorum_del_child.patch b/kvm-block-move-drain-outside-of-quorum_del_child.patch deleted file mode 100644 index c06c556..0000000 --- a/kvm-block-move-drain-outside-of-quorum_del_child.patch +++ /dev/null @@ -1,85 +0,0 @@ -From de70d5b485006ecd92e860242634a3166b709fa8 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:53 +0200 -Subject: [PATCH 28/33] block: move drain outside of quorum_del_child() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [16/21] abceebd102dde266fa880f2c40ef83de25a87ed6 (kmwolf/centos-qemu-kvm) - -The quorum_del_child() callback runs under the graph lock, so it is -not allowed to drain. It is only called as the .bdrv_del_child() -callback, which is only called in the bdrv_del_child() function, which -also runs under the graph lock. - -The bdrv_del_child() function is called by qmp_x_blockdev_change(). -A drained section was already introduced there by commit "block: move -drain out of quorum_add_child()". - -This finally finishes moving out the drain to places that are not -under the graph lock started in "block: move draining out of -bdrv_change_aio_context() and mark GRAPH_RDLOCK". - -Signed-off-by: Fiona Ebner -Message-ID: <20250530151125.955508-17-f.ebner@proxmox.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit d75f8ed1d7fc27cf1643e549cd006a68d3bf6ef1) -Signed-off-by: Kevin Wolf ---- - block.c | 2 ++ - block/quorum.c | 2 -- - include/block/block_int-common.h | 7 +++++++ - 3 files changed, 9 insertions(+), 2 deletions(-) - -diff --git a/block.c b/block.c -index 51bc084b1e..309ef1349a 100644 ---- a/block.c -+++ b/block.c -@@ -8276,6 +8276,8 @@ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, - /* - * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the - * user can take a child offline when it is broken and take a new child online. -+ * -+ * All block nodes must be drained. - */ - void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) - { -diff --git a/block/quorum.c b/block/quorum.c -index 81407a38ee..cc3bc5f4e7 100644 ---- a/block/quorum.c -+++ b/block/quorum.c -@@ -1147,9 +1147,7 @@ quorum_del_child(BlockDriverState *bs, BdrvChild *child, Error **errp) - (s->num_children - i - 1) * sizeof(BdrvChild *)); - s->children = g_renew(BdrvChild *, s->children, --s->num_children); - -- bdrv_drain_all_begin(); - bdrv_unref_child(bs, child); -- bdrv_drain_all_end(); - - quorum_refresh_flags(bs); - } -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 8d76b37c03..f33695ab74 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -406,6 +406,13 @@ struct BlockDriver { - void GRAPH_WRLOCK_PTR (*bdrv_add_child)( - BlockDriverState *parent, BlockDriverState *child, Error **errp); - -+ /** -+ * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the -+ * user can take a child offline when it is broken and take a new child -+ * online. -+ * -+ * All block nodes must be drained. -+ */ - void GRAPH_WRLOCK_PTR (*bdrv_del_child)( - BlockDriverState *parent, BdrvChild *child, Error **errp); - --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-read-locked-bdrv_inactiv.patch b/kvm-block-move-drain-outside-of-read-locked-bdrv_inactiv.patch deleted file mode 100644 index 5504b27..0000000 --- a/kvm-block-move-drain-outside-of-read-locked-bdrv_inactiv.patch +++ /dev/null @@ -1,106 +0,0 @@ -From a4f11515016abb663e37c9796ef4655d5e7e831b Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:41 +0200 -Subject: [PATCH 16/33] block: move drain outside of read-locked - bdrv_inactivate_recurse() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [4/21] 56ced7ba208c22d5998ab27c70ec6a027dc2469e (kmwolf/centos-qemu-kvm) - -This is in preparation to mark bdrv_drained_begin() as GRAPH_UNLOCKED. - -More granular draining is not trivially possible, because -bdrv_inactivate_recurse() can recursively call itself. - -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-5-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 841998e08650f5b4476fa2d1eb84a592ab405f51) -Signed-off-by: Kevin Wolf ---- - block.c | 25 ++++++++++++++++++------- - 1 file changed, 18 insertions(+), 7 deletions(-) - -diff --git a/block.c b/block.c -index 85efdf9c1b..e340bac177 100644 ---- a/block.c -+++ b/block.c -@@ -6989,6 +6989,8 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) - - GLOBAL_STATE_CODE(); - -+ assert(bs->quiesce_counter > 0); -+ - if (!bs->drv) { - return -ENOMEDIUM; - } -@@ -7032,9 +7034,7 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) - return -EPERM; - } - -- bdrv_drained_begin(bs); - bs->open_flags |= BDRV_O_INACTIVE; -- bdrv_drained_end(bs); - - /* - * Update permissions, they may differ for inactive nodes. -@@ -7059,20 +7059,26 @@ int bdrv_inactivate(BlockDriverState *bs, Error **errp) - int ret; - - GLOBAL_STATE_CODE(); -- GRAPH_RDLOCK_GUARD_MAINLOOP(); -+ -+ bdrv_drain_all_begin(); -+ bdrv_graph_rdlock_main_loop(); - - if (bdrv_has_bds_parent(bs, true)) { - error_setg(errp, "Node has active parent node"); -- return -EPERM; -+ ret = -EPERM; -+ goto out; - } - - ret = bdrv_inactivate_recurse(bs, true); - if (ret < 0) { - error_setg_errno(errp, -ret, "Failed to inactivate node"); -- return ret; -+ goto out; - } - -- return 0; -+out: -+ bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_end(); -+ return ret; - } - - int bdrv_inactivate_all(void) -@@ -7082,7 +7088,9 @@ int bdrv_inactivate_all(void) - int ret = 0; - - GLOBAL_STATE_CODE(); -- GRAPH_RDLOCK_GUARD_MAINLOOP(); -+ -+ bdrv_drain_all_begin(); -+ bdrv_graph_rdlock_main_loop(); - - for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { - /* Nodes with BDS parents are covered by recursion from the last -@@ -7098,6 +7106,9 @@ int bdrv_inactivate_all(void) - } - } - -+ bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_end(); -+ - return ret; - } - --- -2.39.3 - diff --git a/kvm-block-move-drain-outside-of-read-locked-bdrv_reopen_.patch b/kvm-block-move-drain-outside-of-read-locked-bdrv_reopen_.patch deleted file mode 100644 index 03d8bbc..0000000 --- a/kvm-block-move-drain-outside-of-read-locked-bdrv_reopen_.patch +++ /dev/null @@ -1,87 +0,0 @@ -From c5eb44ccd03e327f15977acccfeaf23a047e0dc6 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:39 +0200 -Subject: [PATCH 14/33] block: move drain outside of read-locked - bdrv_reopen_queue_child() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [2/21] 726503557f318087c2142ee95a8193f22d7df5b2 (kmwolf/centos-qemu-kvm) - -This is in preparation to mark bdrv_drained_begin() as GRAPH_UNLOCKED. - -More granular draining is not trivially possible, because -bdrv_reopen_queue_child() can recursively call itself. - -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-3-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit e1d681b3e1d8256047dbfc6d2c796028b9694eaf) -Signed-off-by: Kevin Wolf ---- - block.c | 19 +++++++++++-------- - 1 file changed, 11 insertions(+), 8 deletions(-) - -diff --git a/block.c b/block.c -index 9346486ac6..85efdf9c1b 100644 ---- a/block.c -+++ b/block.c -@@ -4358,7 +4358,7 @@ bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child) - * returns a pointer to bs_queue, which is either the newly allocated - * bs_queue, or the existing bs_queue being used. - * -- * bs is drained here and undrained by bdrv_reopen_queue_free(). -+ * bs must be drained. - */ - static BlockReopenQueue * GRAPH_RDLOCK - bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, -@@ -4377,12 +4377,7 @@ bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, - - GLOBAL_STATE_CODE(); - -- /* -- * Strictly speaking, draining is illegal under GRAPH_RDLOCK. We know that -- * we've been called with bdrv_graph_rdlock_main_loop(), though, so it's ok -- * in practice. -- */ -- bdrv_drained_begin(bs); -+ assert(bs->quiesce_counter > 0); - - if (bs_queue == NULL) { - bs_queue = g_new0(BlockReopenQueue, 1); -@@ -4522,6 +4517,12 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, - QDict *options, bool keep_old_opts) - { - GLOBAL_STATE_CODE(); -+ -+ if (bs_queue == NULL) { -+ /* Paired with bdrv_drain_all_end() in bdrv_reopen_queue_free(). */ -+ bdrv_drain_all_begin(); -+ } -+ - GRAPH_RDLOCK_GUARD_MAINLOOP(); - - return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, -@@ -4534,12 +4535,14 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) - if (bs_queue) { - BlockReopenQueueEntry *bs_entry, *next; - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { -- bdrv_drained_end(bs_entry->state.bs); - qobject_unref(bs_entry->state.explicit_options); - qobject_unref(bs_entry->state.options); - g_free(bs_entry); - } - g_free(bs_queue); -+ -+ /* Paired with bdrv_drain_all_begin() in bdrv_reopen_queue(). */ -+ bdrv_drain_all_end(); - } - } - --- -2.39.3 - diff --git a/kvm-block-remove-outdated-comments-about-AioContext-lock.patch b/kvm-block-remove-outdated-comments-about-AioContext-lock.patch deleted file mode 100644 index 9d1219b..0000000 --- a/kvm-block-remove-outdated-comments-about-AioContext-lock.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 055168058f614c9f3b8be6c0692794a6299420ab Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:38 +0200 -Subject: [PATCH 13/33] block: remove outdated comments about AioContext - locking - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [1/21] 607caf7954ab7d1cd4616335fba031cdbfa30324 (kmwolf/centos-qemu-kvm) - -AioContext locking was removed in commit b49f4755c7 ("block: remove -AioContext locking"). - -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-2-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit f1bf3be14bd5d6e6a2cfbbe64cdd4d58a8595d68) -Signed-off-by: Kevin Wolf ---- - block.c | 7 ------- - 1 file changed, 7 deletions(-) - -diff --git a/block.c b/block.c -index 0ece805e41..9346486ac6 100644 ---- a/block.c -+++ b/block.c -@@ -4359,8 +4359,6 @@ bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child) - * bs_queue, or the existing bs_queue being used. - * - * bs is drained here and undrained by bdrv_reopen_queue_free(). -- * -- * To be called with bs->aio_context locked. - */ - static BlockReopenQueue * GRAPH_RDLOCK - bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, -@@ -4519,7 +4517,6 @@ bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, - return bs_queue; - } - --/* To be called with bs->aio_context locked */ - BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, - BlockDriverState *bs, - QDict *options, bool keep_old_opts) -@@ -7278,10 +7275,6 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs) - return true; - } - --/* -- * Must not be called while holding the lock of an AioContext other than the -- * current one. -- */ - void bdrv_img_create(const char *filename, const char *fmt, - const char *base_filename, const char *base_fmt, - char *options, uint64_t img_size, int flags, bool quiet, --- -2.39.3 - diff --git a/kvm-block-skip-automatic-zero-init-of-large-array-in-ioq.patch b/kvm-block-skip-automatic-zero-init-of-large-array-in-ioq.patch deleted file mode 100644 index 65204cf..0000000 --- a/kvm-block-skip-automatic-zero-init-of-large-array-in-ioq.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 116f42add040dfa1eaf25087db1038f8c4198bf7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:41 +0100 -Subject: [PATCH 11/43] block: skip automatic zero-init of large array in - ioq_submit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/31] 3d4c81fcc56f1c7a4d1d3819214bd6296edc2a1e (stefanha/centos-stream-qemu-kvm) - -The 'ioq_submit' method has a struct array that is 8k in size. -Skip the automatic zero-init of this array to eliminate the -performance overhead in the I/O hot path. - -The 'iocbs' array will selectively initialized when processing -the I/O data. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-4-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 83750c1da807c973b0b11d977d61df7e41122d03) -Signed-off-by: Stefan Hajnoczi ---- - block/linux-aio.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/linux-aio.c b/block/linux-aio.c -index 407369f5c9..c200e7ad20 100644 ---- a/block/linux-aio.c -+++ b/block/linux-aio.c -@@ -291,7 +291,7 @@ static void ioq_submit(LinuxAioState *s) - { - int ret, len; - struct qemu_laiocb *aiocb; -- struct iocb *iocbs[MAX_EVENTS]; -+ QEMU_UNINITIALIZED struct iocb *iocbs[MAX_EVENTS]; - QSIMPLEQ_HEAD(, qemu_laiocb) completed; - - do { --- -2.39.3 - diff --git a/kvm-block-snapshot-move-drain-outside-of-read-locked-bdr.patch b/kvm-block-snapshot-move-drain-outside-of-read-locked-bdr.patch deleted file mode 100644 index 3f4f810..0000000 --- a/kvm-block-snapshot-move-drain-outside-of-read-locked-bdr.patch +++ /dev/null @@ -1,231 +0,0 @@ -From 5c8408a7d6b0f7a66de2bfa31ef228b1817200ec Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:40 +0200 -Subject: [PATCH 15/33] block/snapshot: move drain outside of read-locked - bdrv_snapshot_delete() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [3/21] 4263420635ed51a5df458d079f60618c53120d87 (kmwolf/centos-qemu-kvm) - -This is in preparation to mark bdrv_drained_begin() as GRAPH_UNLOCKED. - -More granular draining is not trivially possible, because -bdrv_snapshot_delete() can recursively call itself. - -The return value of bdrv_all_delete_snapshot() changes from -1 to --errno propagated from failed sub-calls. This is fine for the existing -callers of bdrv_all_delete_snapshot(). - -Signed-off-by: Fiona Ebner -Reviewed-by: Kevin Wolf -Message-ID: <20250530151125.955508-4-f.ebner@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit d4c5f8c980f1073356d2f18d51dc68d42bebb59d) -Signed-off-by: Kevin Wolf ---- - block/snapshot.c | 26 +++++++++++++++----------- - blockdev.c | 25 +++++++++++++++++-------- - qemu-img.c | 2 ++ - 3 files changed, 34 insertions(+), 19 deletions(-) - -diff --git a/block/snapshot.c b/block/snapshot.c -index 22567f1fb9..9f300a78bd 100644 ---- a/block/snapshot.c -+++ b/block/snapshot.c -@@ -327,7 +327,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, - - /** - * Delete an internal snapshot by @snapshot_id and @name. -- * @bs: block device used in the operation -+ * @bs: block device used in the operation, must be drained - * @snapshot_id: unique snapshot ID, or NULL - * @name: snapshot name, or NULL - * @errp: location to store error -@@ -358,6 +358,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs, - - GLOBAL_STATE_CODE(); - -+ assert(bs->quiesce_counter > 0); -+ - if (!drv) { - error_setg(errp, "Device '%s' has no medium", - bdrv_get_device_name(bs)); -@@ -368,9 +370,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs, - return -EINVAL; - } - -- /* drain all pending i/o before deleting snapshot */ -- bdrv_drained_begin(bs); -- - if (drv->bdrv_snapshot_delete) { - ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); - } else if (fallback_bs) { -@@ -382,7 +381,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs, - ret = -ENOTSUP; - } - -- bdrv_drained_end(bs); - return ret; - } - -@@ -571,19 +569,22 @@ int bdrv_all_delete_snapshot(const char *name, - ERRP_GUARD(); - g_autoptr(GList) bdrvs = NULL; - GList *iterbdrvs; -+ int ret = 0; - - GLOBAL_STATE_CODE(); -- GRAPH_RDLOCK_GUARD_MAINLOOP(); - -- if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { -- return -1; -+ bdrv_drain_all_begin(); -+ bdrv_graph_rdlock_main_loop(); -+ -+ ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp); -+ if (ret < 0) { -+ goto out; - } - - iterbdrvs = bdrvs; - while (iterbdrvs) { - BlockDriverState *bs = iterbdrvs->data; - QEMUSnapshotInfo sn1, *snapshot = &sn1; -- int ret = 0; - - if ((devices || bdrv_all_snapshots_includes_bs(bs)) && - bdrv_snapshot_find(bs, snapshot, name) >= 0) -@@ -594,13 +595,16 @@ int bdrv_all_delete_snapshot(const char *name, - if (ret < 0) { - error_prepend(errp, "Could not delete snapshot '%s' on '%s': ", - name, bdrv_get_device_or_node_name(bs)); -- return -1; -+ goto out; - } - - iterbdrvs = iterbdrvs->next; - } - -- return 0; -+out: -+ bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_end(); -+ return ret; - } - - -diff --git a/blockdev.c b/blockdev.c -index 0fa8813efe..efa7d1d0b2 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1132,39 +1132,41 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, - int ret; - - GLOBAL_STATE_CODE(); -- GRAPH_RDLOCK_GUARD_MAINLOOP(); -+ -+ bdrv_drain_all_begin(); -+ bdrv_graph_rdlock_main_loop(); - - bs = qmp_get_root_bs(device, errp); - if (!bs) { -- return NULL; -+ goto error; - } - - if (!id && !name) { - error_setg(errp, "Name or id must be provided"); -- return NULL; -+ goto error; - } - - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) { -- return NULL; -+ goto error; - } - - ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- return NULL; -+ goto error; - } - if (!ret) { - error_setg(errp, - "Snapshot with id '%s' and name '%s' does not exist on " - "device '%s'", - STR_OR_NULL(id), STR_OR_NULL(name), device); -- return NULL; -+ goto error; - } - - bdrv_snapshot_delete(bs, id, name, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- return NULL; -+ goto error; - } - - info = g_new0(SnapshotInfo, 1); -@@ -1180,6 +1182,9 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, - info->has_icount = true; - } - -+error: -+ bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_end(); - return info; - } - -@@ -1295,12 +1300,14 @@ static void internal_snapshot_abort(void *opaque) - Error *local_error = NULL; - - GLOBAL_STATE_CODE(); -- GRAPH_RDLOCK_GUARD_MAINLOOP(); - - if (!state->created) { - return; - } - -+ bdrv_drain_all_begin(); -+ bdrv_graph_rdlock_main_loop(); -+ - if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) { - error_reportf_err(local_error, - "Failed to delete snapshot with id '%s' and " -@@ -1308,6 +1315,8 @@ static void internal_snapshot_abort(void *opaque) - sn->id_str, sn->name, - bdrv_get_device_name(bs)); - } -+ bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_end(); - } - - static void internal_snapshot_clean(void *opaque) -diff --git a/qemu-img.c b/qemu-img.c -index 2044c22a4c..ba8412f66e 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -3505,6 +3505,7 @@ static int img_snapshot(int argc, char **argv) - break; - - case SNAPSHOT_DELETE: -+ bdrv_drain_all_begin(); - bdrv_graph_rdlock_main_loop(); - ret = bdrv_snapshot_find(bs, &sn, snapshot_name); - if (ret < 0) { -@@ -3520,6 +3521,7 @@ static int img_snapshot(int argc, char **argv) - } - } - bdrv_graph_rdunlock_main_loop(); -+ bdrv_drain_all_end(); - break; - } - --- -2.39.3 - diff --git a/kvm-blockdev-drain-while-unlocked-in-external_snapshot_a.patch b/kvm-blockdev-drain-while-unlocked-in-external_snapshot_a.patch deleted file mode 100644 index bf2907b..0000000 --- a/kvm-blockdev-drain-while-unlocked-in-external_snapshot_a.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 96407f6d729312c373e9b2ccbf97918d453c7a52 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:55 +0200 -Subject: [PATCH 30/33] blockdev: drain while unlocked in - external_snapshot_action() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [18/21] b651de913db70b4897f0085af696cdaf925f5f81 (kmwolf/centos-qemu-kvm) - -This is in preparation to mark bdrv_drained_begin() as GRAPH_UNLOCKED. - -Signed-off-by: Fiona Ebner -Message-ID: <20250530151125.955508-19-f.ebner@proxmox.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 195a8a946a8681dfe7e8aa8d49db415693db5311) -Signed-off-by: Kevin Wolf ---- - blockdev.c | 17 ++++++++++++++++- - 1 file changed, 16 insertions(+), 1 deletion(-) - -diff --git a/blockdev.c b/blockdev.c -index 2560a11a53..998dbe38a5 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1377,9 +1377,10 @@ static void external_snapshot_action(TransactionAction *action, - const char *new_image_file; - ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1); - uint64_t perm, shared; -+ BlockDriverState *check_bs; - - /* TODO We'll eventually have to take a writer lock in this function */ -- GRAPH_RDLOCK_GUARD_MAINLOOP(); -+ bdrv_graph_rdlock_main_loop(); - - tran_add(tran, &external_snapshot_drv, state); - -@@ -1412,11 +1413,25 @@ static void external_snapshot_action(TransactionAction *action, - - state->old_bs = bdrv_lookup_bs(device, node_name, errp); - if (!state->old_bs) { -+ bdrv_graph_rdunlock_main_loop(); - return; - } - -+ /* Need to drain while unlocked. */ -+ bdrv_graph_rdunlock_main_loop(); - /* Paired with .clean() */ - bdrv_drained_begin(state->old_bs); -+ GRAPH_RDLOCK_GUARD_MAINLOOP(); -+ -+ /* Make sure the associated bs did not change with the drain. */ -+ check_bs = bdrv_lookup_bs(device, node_name, errp); -+ if (state->old_bs != check_bs) { -+ if (check_bs) { -+ error_setg(errp, "Block node of device '%s' unexpectedly changed", -+ device); -+ } /* else errp is already set */ -+ return; -+ } - - if (!bdrv_is_inserted(state->old_bs)) { - error_setg(errp, "Device '%s' has no medium", --- -2.39.3 - diff --git a/kvm-blockdev-drain-while-unlocked-in-internal_snapshot_a.patch b/kvm-blockdev-drain-while-unlocked-in-internal_snapshot_a.patch deleted file mode 100644 index db9f08a..0000000 --- a/kvm-blockdev-drain-while-unlocked-in-internal_snapshot_a.patch +++ /dev/null @@ -1,80 +0,0 @@ -From a2b28210cbcbb8e66815d9b38484fde3122a3ab9 Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:54 +0200 -Subject: [PATCH 29/33] blockdev: drain while unlocked in - internal_snapshot_action() - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [17/21] 8379ac5e6ec0ecf71b540d2631cf42394d985f16 (kmwolf/centos-qemu-kvm) - -This is in preparation to mark bdrv_drained_begin() as GRAPH_UNLOCKED. - -Signed-off-by: Fiona Ebner -Message-ID: <20250530151125.955508-18-f.ebner@proxmox.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 6f101614f95c889399352b8301917c0ac7919ae7) -Signed-off-by: Kevin Wolf ---- - blockdev.c | 19 +++++++++++++++++-- - 1 file changed, 17 insertions(+), 2 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 8edd3e7bba..2560a11a53 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1208,7 +1208,7 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - Error *local_err = NULL; - const char *device; - const char *name; -- BlockDriverState *bs; -+ BlockDriverState *bs, *check_bs; - QEMUSnapshotInfo old_sn, *sn; - bool ret; - int64_t rt; -@@ -1216,7 +1216,7 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - int ret1; - - GLOBAL_STATE_CODE(); -- GRAPH_RDLOCK_GUARD_MAINLOOP(); -+ bdrv_graph_rdlock_main_loop(); - - tran_add(tran, &internal_snapshot_drv, state); - -@@ -1225,14 +1225,29 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, - - bs = qmp_get_root_bs(device, errp); - if (!bs) { -+ bdrv_graph_rdunlock_main_loop(); - return; - } - - state->bs = bs; - -+ /* Need to drain while unlocked. */ -+ bdrv_graph_rdunlock_main_loop(); - /* Paired with .clean() */ - bdrv_drained_begin(bs); - -+ GRAPH_RDLOCK_GUARD_MAINLOOP(); -+ -+ /* Make sure the root bs did not change with the drain. */ -+ check_bs = qmp_get_root_bs(device, errp); -+ if (bs != check_bs) { -+ if (check_bs) { -+ error_setg(errp, "Block node of device '%s' unexpectedly changed", -+ device); -+ } /* else errp is already set */ -+ return; -+ } -+ - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) { - return; - } --- -2.39.3 - diff --git a/kvm-chardev-char-fd-skip-automatic-zero-init-of-large-ar.patch b/kvm-chardev-char-fd-skip-automatic-zero-init-of-large-ar.patch deleted file mode 100644 index e073d38..0000000 --- a/kvm-chardev-char-fd-skip-automatic-zero-init-of-large-ar.patch +++ /dev/null @@ -1,49 +0,0 @@ -From c7114f381bfa2663bdaa368b7b0dca764e28a07e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:42 +0100 -Subject: [PATCH 12/43] chardev/char-fd: skip automatic zero-init of large - array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/31] b26dc4c1b9677adb1332c778e6c9ec452952f4c1 (stefanha/centos-stream-qemu-kvm) - -The 'fd_chr_read' method has a 4k byte array used for copying -data between the socket and device. Skip the automatic zero-init -of this array to eliminate the performance overhead in the I/O -hot path. - -The 'buf' array will be fully initialized when reading data off -the network socket. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-5-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit a503bdc22b91869e3bf45522e36b122889465306) -Signed-off-by: Stefan Hajnoczi ---- - chardev/char-fd.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/chardev/char-fd.c b/chardev/char-fd.c -index d2c4923359..8dd662c066 100644 ---- a/chardev/char-fd.c -+++ b/chardev/char-fd.c -@@ -50,7 +50,7 @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) - Chardev *chr = CHARDEV(opaque); - FDChardev *s = FD_CHARDEV(opaque); - int len; -- uint8_t buf[CHR_READ_BUF_LEN]; -+ QEMU_UNINITIALIZED uint8_t buf[CHR_READ_BUF_LEN]; - ssize_t ret; - - len = sizeof(buf); --- -2.39.3 - diff --git a/kvm-chardev-char-pty-skip-automatic-zero-init-of-large-a.patch b/kvm-chardev-char-pty-skip-automatic-zero-init-of-large-a.patch deleted file mode 100644 index c1b20a2..0000000 --- a/kvm-chardev-char-pty-skip-automatic-zero-init-of-large-a.patch +++ /dev/null @@ -1,49 +0,0 @@ -From dfe44e34228c242f539f61383c431b6b9acd86f0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:43 +0100 -Subject: [PATCH 13/43] chardev/char-pty: skip automatic zero-init of large - array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/31] e41c3503091674e90cc7c17f202e5d4ae1f417cd (stefanha/centos-stream-qemu-kvm) - -The 'pty_chr_read' method has a 4k byte array used for copying -data between the PTY and device. Skip the automatic zero-init -of this array to eliminate the performance overhead in the I/O -hot path. - -The 'buf' array will be fully initialized when reading data off -the PTY. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-6-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 45bb7fb21c8d18294a9f92da99d01ab3c67c7df2) -Signed-off-by: Stefan Hajnoczi ---- - chardev/char-pty.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/chardev/char-pty.c b/chardev/char-pty.c -index 6a2c1dc13a..f484aac78d 100644 ---- a/chardev/char-pty.c -+++ b/chardev/char-pty.c -@@ -154,7 +154,7 @@ static gboolean pty_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) - Chardev *chr = CHARDEV(opaque); - PtyChardev *s = PTY_CHARDEV(opaque); - gsize len; -- uint8_t buf[CHR_READ_BUF_LEN]; -+ QEMU_UNINITIALIZED uint8_t buf[CHR_READ_BUF_LEN]; - ssize_t ret; - - len = sizeof(buf); --- -2.39.3 - diff --git a/kvm-chardev-char-socket-skip-automatic-zero-init-of-larg.patch b/kvm-chardev-char-socket-skip-automatic-zero-init-of-larg.patch deleted file mode 100644 index c9a6650..0000000 --- a/kvm-chardev-char-socket-skip-automatic-zero-init-of-larg.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 1dbdcc30075e480b1d6da9ef19a8bd38e1762ac9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:44 +0100 -Subject: [PATCH 14/43] chardev/char-socket: skip automatic zero-init of large - array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/31] 97ec8420f5a8cc37d5ece5721d409e9d94f8b11f (stefanha/centos-stream-qemu-kvm) - -The 'tcp_chr_read' method has a 4k byte array used for copying -data between the socket and device. Skip the automatic zero-init -of this array to eliminate the performance overhead in the I/O -hot path. - -The 'buf' array will be fully initialized when reading data off -the network socket. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-7-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 9a23075cef1ac6e73a95a489ac72f41c573ceb9b) -Signed-off-by: Stefan Hajnoczi ---- - chardev/char-socket.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/chardev/char-socket.c b/chardev/char-socket.c -index 2f842f9f88..22c87d0885 100644 ---- a/chardev/char-socket.c -+++ b/chardev/char-socket.c -@@ -497,7 +497,7 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) - { - Chardev *chr = CHARDEV(opaque); - SocketChardev *s = SOCKET_CHARDEV(opaque); -- uint8_t buf[CHR_READ_BUF_LEN]; -+ QEMU_UNINITIALIZED uint8_t buf[CHR_READ_BUF_LEN]; - int len, size; - - if ((s->state != TCP_CHARDEV_STATE_CONNECTED) || --- -2.39.3 - diff --git a/kvm-docs-Don-t-define-duplicate-label-in-qemu-block-driv.patch b/kvm-docs-Don-t-define-duplicate-label-in-qemu-block-driv.patch deleted file mode 100644 index b6628e6..0000000 --- a/kvm-docs-Don-t-define-duplicate-label-in-qemu-block-driv.patch +++ /dev/null @@ -1,71 +0,0 @@ -From c73cd0b322ad22a4ba47035ba87a0e83ff851b6a Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Thu, 1 May 2025 10:31:26 +0100 -Subject: [PATCH] docs: Don't define duplicate label in - qemu-block-drivers.rst.inc - -Sphinx requires that labels within documents are unique across the -whole manual. This is because the "create a hyperlink" directive -specifies only the name of the label, not a filename+label. Some -Sphinx versions will warn about duplicate labels, but even if there -is no warning there is still an ambiguity and no guarantee that the -hyperlink will be created to the right target. - -For QEMU this is awkward, because we have various .rst.inc fragments -which we include into multiple .rst files. If you define a label in -the .rst.inc file then it will be a duplicate label. We have mostly -worked around this by not putting labels into those .rst.inc files, -or by adding "insert a label" functionality into the hxtool extension -(see commit 1eeb432a953b0 "doc/sphinx/hxtool.py: add optional label -argument to SRST directive"). - -Unfortunately in commit 7f6314427e78 ("docs/devel: add a codebase -section") we accidentally added a duplicate label, because not all -Sphinx versions warn about the mistake. - -In this case the link was only from the developer docs codebase -summary, so as the simplest fix for the stable branch, we drop -the link entirely. - -Cc: qemu-stable@nongnu.org -Fixes: 1eeb432a953b0 "doc/sphinx/hxtool.py: add optional label argument to SRST directive" -Reported-by: Dario Faggioli -Signed-off-by: Peter Maydell -Acked-by: Eric Blake -Reviewed-by: Pierrick Bouvier -Message-id: 20250501093126.716667-1-peter.maydell@linaro.org -Signed-off-by: Miroslav Rezanina ---- - docs/devel/codebase.rst | 2 +- - docs/system/qemu-block-drivers.rst.inc | 2 -- - 2 files changed, 1 insertion(+), 3 deletions(-) - -diff --git a/docs/devel/codebase.rst b/docs/devel/codebase.rst -index ef98578296..085da10a22 100644 ---- a/docs/devel/codebase.rst -+++ b/docs/devel/codebase.rst -@@ -116,7 +116,7 @@ yet, so sometimes the source code is all you have. - * `monitor `_: - `Monitor ` implementation (HMP & QMP). - * `nbd `_: -- QEMU `NBD (Network Block Device) ` server. -+ QEMU NBD (Network Block Device) server. - * `net `_: - Network (host) support. - * `pc-bios `_: -diff --git a/docs/system/qemu-block-drivers.rst.inc b/docs/system/qemu-block-drivers.rst.inc -index cfe1acb78a..384e95ba76 100644 ---- a/docs/system/qemu-block-drivers.rst.inc -+++ b/docs/system/qemu-block-drivers.rst.inc -@@ -500,8 +500,6 @@ What you should *never* do: - - expect it to work when loadvm'ing - - write to the FAT directory on the host system while accessing it with the guest system - --.. _nbd: -- - NBD access - ~~~~~~~~~~ - --- -2.39.3 - diff --git a/kvm-file-posix-Fix-aio-threads-performance-regression-af.patch b/kvm-file-posix-Fix-aio-threads-performance-regression-af.patch deleted file mode 100644 index a077f4f..0000000 --- a/kvm-file-posix-Fix-aio-threads-performance-regression-af.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 53711e6aad8a6e40426ccef25e911d3cad93220a Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 25 Jun 2025 10:50:19 +0200 -Subject: [PATCH 12/33] file-posix: Fix aio=threads performance regression - after enablign FUA - -RH-Author: Kevin Wolf -RH-MergeRequest: 392: file-posix: Fix aio=threads performance regression after enabling FUA -RH-Jira: RHEL-96854 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [1/1] e523c0305e3072d59ad6454afc690cabfcfb86b2 (kmwolf/centos-qemu-kvm) - -For aio=threads, we're currently not implementing REQ_FUA in any useful -way, but just do a separate raw_co_flush_to_disk() call. This changes -behaviour compared to the old state, which used bdrv_co_flush() with its -optimisations. As a quick fix, call bdrv_co_flush() again like before. -Eventually, we can use pwritev2() to make use of RWF_DSYNC if available, -but we'll still have to keep this code path as a fallback, so this fix -is required either way. - -While the fix itself is a one-liner, some new graph locking annotations -are needed to convince TSA that the locking is correct. - -Cc: qemu-stable@nongnu.org -Fixes: 984a32f17e8d ("file-posix: Support FUA writes") -Buglink: https://issues.redhat.com/browse/RHEL-96854 -Reported-by: Tingting Mao -Signed-off-by: Kevin Wolf -Message-ID: <20250625085019.27735-1-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit d402da1360c2240e81f0e5fc80ddbfc6238e0da8) -Signed-off-by: Kevin Wolf ---- - block/file-posix.c | 29 +++++++++++++++-------------- - 1 file changed, 15 insertions(+), 14 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 77a35d9ae9..d3c7dcc7e4 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -2573,9 +2573,9 @@ static inline bool raw_check_linux_aio(BDRVRawState *s) - } - #endif - --static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, -- uint64_t bytes, QEMUIOVector *qiov, int type, -- int flags) -+static int coroutine_fn GRAPH_RDLOCK -+raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, uint64_t bytes, -+ QEMUIOVector *qiov, int type, int flags) - { - BDRVRawState *s = bs->opaque; - RawPosixAIOData acb; -@@ -2634,7 +2634,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, - ret = raw_thread_pool_submit(handle_aiocb_rw, &acb); - if (ret == 0 && (flags & BDRV_REQ_FUA)) { - /* TODO Use pwritev2() instead if it's available */ -- ret = raw_co_flush_to_disk(bs); -+ ret = bdrv_co_flush(bs); - } - goto out; /* Avoid the compiler err of unused label */ - -@@ -2669,16 +2669,16 @@ out: - return ret; - } - --static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset, -- int64_t bytes, QEMUIOVector *qiov, -- BdrvRequestFlags flags) -+static int coroutine_fn GRAPH_RDLOCK -+raw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, -+ QEMUIOVector *qiov, BdrvRequestFlags flags) - { - return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags); - } - --static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, -- int64_t bytes, QEMUIOVector *qiov, -- BdrvRequestFlags flags) -+static int coroutine_fn GRAPH_RDLOCK -+raw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, -+ QEMUIOVector *qiov, BdrvRequestFlags flags) - { - return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags); - } -@@ -3615,10 +3615,11 @@ static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op, - #endif - - #if defined(CONFIG_BLKZONED) --static int coroutine_fn raw_co_zone_append(BlockDriverState *bs, -- int64_t *offset, -- QEMUIOVector *qiov, -- BdrvRequestFlags flags) { -+static int coroutine_fn GRAPH_RDLOCK -+raw_co_zone_append(BlockDriverState *bs, -+ int64_t *offset, -+ QEMUIOVector *qiov, -+ BdrvRequestFlags flags) { - assert(flags == 0); - int64_t zone_size_mask = bs->bl.zone_size - 1; - int64_t iov_len = 0; --- -2.39.3 - diff --git a/kvm-file-posix-Fix-crash-on-discard_granularity-0.patch b/kvm-file-posix-Fix-crash-on-discard_granularity-0.patch deleted file mode 100644 index d7145e3..0000000 --- a/kvm-file-posix-Fix-crash-on-discard_granularity-0.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 4d575970f12462a054a207b593438aff0d40881a Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 29 Apr 2025 17:56:54 +0200 -Subject: [PATCH 3/4] file-posix: Fix crash on discard_granularity == 0 - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 354: file-posix: probe discard alignment on Linux block devices -RH-Jira: RHEL-87642 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake -RH-Commit: [3/3] dbe73aef453e77263d30ebebc690ab21145f6bab (stefanha/centos-stream-qemu-kvm) - -Block devices that don't support discard have a discard_granularity of -0. Currently, this results in a division by zero when we try to make -sure that it's a multiple of request_alignment. Only try to update -bs->bl.pdiscard_alignment when we got a non-zero discard_granularity -from sysfs. - -Fixes: f605796aae4 ('file-posix: probe discard alignment on Linux block devices') -Signed-off-by: Kevin Wolf -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Eric Blake -Message-ID: <20250429155654.102735-1-kwolf@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 71a30d54e6ab1d5c102a8bee2c263414697402ea) -Signed-off-by: Stefan Hajnoczi ---- - block/file-posix.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 0d6e12f880..0d85123d0f 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1573,7 +1573,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) - int ret; - - ret = hdev_get_pdiscard_alignment(&st, &dalign); -- if (ret == 0) { -+ if (ret == 0 && dalign != 0) { - uint32_t ralign = bs->bl.request_alignment; - - /* Probably never happens, but handle it just in case */ --- -2.39.3 - diff --git a/kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch b/kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch deleted file mode 100644 index 23a25f7..0000000 --- a/kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch +++ /dev/null @@ -1,215 +0,0 @@ -From 35a2470f67cb38c52246974a853d843dbb80b84d Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 22 May 2025 15:08:03 +0200 -Subject: [PATCH 2/9] file-posix: Probe paths and retry SG_IO on potential path - errors - -RH-Author: Kevin Wolf -RH-MergeRequest: 370: file-posix: Fix multipath failover with SCSI passthrough -RH-Jira: RHEL-65852 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/2] bbaa1d4de0675a2c67dafed74eacc0d1103aab18 (kmwolf/centos-qemu-kvm) - -When scsi-block is used on a host multipath device, it runs into the -problem that the kernel dm-mpath doesn't know anything about SCSI or -SG_IO and therefore can't decide if a SG_IO request returned an error -and needs to be retried on a different path. Instead of getting working -failover, an error is returned to scsi-block and handled according to -the configured error policy. Obviously, this is not what users want, -they want working failover. - -QEMU can parse the SG_IO result and determine whether this could have -been a path error, but just retrying the same request could just send it -to the same failing path again and result in the same error. - -With a kernel that supports the DM_MPATH_PROBE_PATHS ioctl on dm-mpath -block devices (queued in the device mapper tree for Linux 6.16), we can -tell the kernel to probe all paths and tell us if any usable paths -remained. If so, we can now retry the SG_IO ioctl and expect it to be -sent to a working path. - -Signed-off-by: Kevin Wolf -Message-ID: <20250522130803.34738-1-kwolf@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Hanna Czenczek -Signed-off-by: Kevin Wolf -(cherry picked from commit bf627788ef17721955bfcfba84209a07ae5f54ea) -Signed-off-by: Kevin Wolf ---- - block/file-posix.c | 115 ++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 114 insertions(+), 1 deletion(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 52cc25db84..77a35d9ae9 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -41,6 +41,7 @@ - - #include "scsi/pr-manager.h" - #include "scsi/constants.h" -+#include "scsi/utils.h" - - #if defined(__APPLE__) && (__MACH__) - #include -@@ -72,6 +73,7 @@ - #include - #endif - #include -+#include - #include - #include - #include -@@ -139,6 +141,22 @@ - #define DM_MPATH_PROBE_PATHS _IO(DM_IOCTL, DM_GET_TARGET_VERSION_CMD + 1) - #endif - -+/* -+ * Multiple retries are mostly meant for two separate scenarios: -+ * -+ * - DM_MPATH_PROBE_PATHS returns success, but before SG_IO completes, another -+ * path goes down. -+ * -+ * - DM_MPATH_PROBE_PATHS failed all paths in the current path group, so we have -+ * to send another SG_IO to switch to another path group to probe the paths in -+ * it. -+ * -+ * Even if each path is in a separate path group (path_grouping_policy set to -+ * failover), it's rare to have more than eight path groups - and even then -+ * pretty unlikely that only bad path groups would be chosen in eight retries. -+ */ -+#define SG_IO_MAX_RETRIES 8 -+ - typedef struct BDRVRawState { - int fd; - bool use_lock; -@@ -166,6 +184,7 @@ typedef struct BDRVRawState { - bool use_linux_aio:1; - bool has_laio_fdsync:1; - bool use_linux_io_uring:1; -+ bool use_mpath:1; - int page_cache_inconsistent; /* errno from fdatasync failure */ - bool has_fallocate; - bool needs_alignment; -@@ -4262,15 +4281,105 @@ hdev_open_Mac_error: - /* Since this does ioctl the device must be already opened */ - bs->sg = hdev_is_sg(bs); - -+ /* sg devices aren't even block devices and can't use dm-mpath */ -+ s->use_mpath = !bs->sg; -+ - return ret; - } - - #if defined(__linux__) -+#if defined(DM_MPATH_PROBE_PATHS) -+static bool coroutine_fn sgio_path_error(int ret, sg_io_hdr_t *io_hdr) -+{ -+ if (ret < 0) { -+ switch (ret) { -+ case -ENODEV: -+ return true; -+ case -EAGAIN: -+ /* -+ * The device is probably suspended. This happens while the dm table -+ * is reloaded, e.g. because a path is added or removed. This is an -+ * operation that should complete within 1ms, so just wait a bit and -+ * retry. -+ * -+ * If the device was suspended for another reason, we'll wait and -+ * retry SG_IO_MAX_RETRIES times. This is a tolerable delay before -+ * we return an error and potentially stop the VM. -+ */ -+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); -+ return true; -+ default: -+ return false; -+ } -+ } -+ -+ if (io_hdr->host_status != SCSI_HOST_OK) { -+ return true; -+ } -+ -+ switch (io_hdr->status) { -+ case GOOD: -+ case CONDITION_GOOD: -+ case INTERMEDIATE_GOOD: -+ case INTERMEDIATE_C_GOOD: -+ case RESERVATION_CONFLICT: -+ case COMMAND_TERMINATED: -+ return false; -+ case CHECK_CONDITION: -+ return !scsi_sense_buf_is_guest_recoverable(io_hdr->sbp, -+ io_hdr->mx_sb_len); -+ default: -+ return true; -+ } -+} -+ -+static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret) -+{ -+ BDRVRawState *s = acb->bs->opaque; -+ RawPosixAIOData probe_acb; -+ -+ if (!s->use_mpath) { -+ return false; -+ } -+ -+ if (!sgio_path_error(ret, acb->ioctl.buf)) { -+ return false; -+ } -+ -+ probe_acb = (RawPosixAIOData) { -+ .bs = acb->bs, -+ .aio_type = QEMU_AIO_IOCTL, -+ .aio_fildes = s->fd, -+ .aio_offset = 0, -+ .ioctl = { -+ .buf = NULL, -+ .cmd = DM_MPATH_PROBE_PATHS, -+ }, -+ }; -+ -+ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &probe_acb); -+ if (ret == -ENOTTY) { -+ s->use_mpath = false; -+ } else if (ret == -EAGAIN) { -+ /* The device might be suspended for a table reload, worth retrying */ -+ return true; -+ } -+ -+ return ret == 0; -+} -+#else -+static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret) -+{ -+ return false; -+} -+#endif /* DM_MPATH_PROBE_PATHS */ -+ - static int coroutine_fn - hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) - { - BDRVRawState *s = bs->opaque; - RawPosixAIOData acb; -+ int retries = SG_IO_MAX_RETRIES; - int ret; - - ret = fd_open(bs); -@@ -4298,7 +4407,11 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) - }, - }; - -- return raw_thread_pool_submit(handle_aiocb_ioctl, &acb); -+ do { -+ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb); -+ } while (req == SG_IO && retries-- && hdev_co_ioctl_sgio_retry(&acb, ret)); -+ -+ return ret; - } - #endif /* linux */ - --- -2.39.3 - diff --git a/kvm-file-posix-gluster-Handle-zero-block-status-hint-bet.patch b/kvm-file-posix-gluster-Handle-zero-block-status-hint-bet.patch deleted file mode 100644 index 1405719..0000000 --- a/kvm-file-posix-gluster-Handle-zero-block-status-hint-bet.patch +++ /dev/null @@ -1,64 +0,0 @@ -From f8d89f67817fa362a3b8ed0721775e353dac8f18 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:19 -0500 -Subject: [PATCH 02/14] file-posix, gluster: Handle zero block status hint - better - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/14] c40cd3f8cda2ea1646d90fd174b5f0dbd3e1a50b (ebblake/centos-qemu-kvm) - -Although the previous patch to change 'bool want_zero' into a bitmask -made no semantic change, it is now time to differentiate. When the -caller specifically wants to know what parts of the file read as zero, -we need to use lseek and actually reporting holes, rather than -short-circuiting and advertising full allocation. - -This change will be utilized in later patches to let mirroring -optimize for the case when the destination already reads as zeroes. - -Signed-off-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-ID: <20250509204341.3553601-17-eblake@redhat.com> -(cherry picked from commit a6a0a7fb0e327d17594c971b4a39de14e025b415) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/file-posix.c | 3 ++- - block/gluster.c | 2 +- - 2 files changed, 3 insertions(+), 2 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 0c6569742f..dea7b09b6c 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3282,7 +3282,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, - return ret; - } - -- if (mode != BDRV_WANT_PRECISE) { -+ if (!(mode & BDRV_WANT_ZERO)) { -+ /* There is no backing file - all bytes are allocated in this file. */ - *pnum = bytes; - *map = offset; - *file = bs; -diff --git a/block/gluster.c b/block/gluster.c -index 8197b0ecef..e702666cbc 100644 ---- a/block/gluster.c -+++ b/block/gluster.c -@@ -1482,7 +1482,7 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs, - return ret; - } - -- if (mode != BDRV_WANT_PRECISE) { -+ if (!(mode & BDRV_WANT_ZERO)) { - *pnum = bytes; - *map = offset; - *file = bs; --- -2.39.3 - diff --git a/kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch b/kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch deleted file mode 100644 index cd24d41..0000000 --- a/kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 2baedec75a8a0daf9e93228795d1e6f2974f4825 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 17 Apr 2025 11:05:27 -0400 -Subject: [PATCH 1/4] file-posix: probe discard alignment on Linux block - devices - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 354: file-posix: probe discard alignment on Linux block devices -RH-Jira: RHEL-87642 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake -RH-Commit: [1/3] 84de24191bfa47e94cd475e78dcafd38a50a5888 (stefanha/centos-stream-qemu-kvm) - -Populate the pdiscard_alignment block limit so the block layer is able -align discard requests correctly. - -Signed-off-by: Stefan Hajnoczi -Message-ID: <20250417150528.76470-2-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit f605796aae42885034400c83ed6a9b07cd6d6481) -Signed-off-by: Stefan Hajnoczi ---- - block/file-posix.c | 67 +++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 66 insertions(+), 1 deletion(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 56d1972d15..0d6e12f880 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1276,10 +1276,10 @@ static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned) - } - #endif /* defined(CONFIG_BLKZONED) */ - -+#ifdef CONFIG_LINUX - /* - * Get a sysfs attribute value as a long integer. - */ --#ifdef CONFIG_LINUX - static long get_sysfs_long_val(struct stat *st, const char *attribute) - { - g_autofree char *str = NULL; -@@ -1299,6 +1299,30 @@ static long get_sysfs_long_val(struct stat *st, const char *attribute) - } - return ret; - } -+ -+/* -+ * Get a sysfs attribute value as a uint32_t. -+ */ -+static int get_sysfs_u32_val(struct stat *st, const char *attribute, -+ uint32_t *u32) -+{ -+ g_autofree char *str = NULL; -+ const char *end; -+ unsigned int val; -+ int ret; -+ -+ ret = get_sysfs_str_val(st, attribute, &str); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ /* The file is ended with '\n', pass 'end' to accept that. */ -+ ret = qemu_strtoui(str, &end, 10, &val); -+ if (ret == 0 && end && *end == '\0') { -+ *u32 = val; -+ } -+ return ret; -+} - #endif - - static int hdev_get_max_segments(int fd, struct stat *st) -@@ -1318,6 +1342,23 @@ static int hdev_get_max_segments(int fd, struct stat *st) - #endif - } - -+/* -+ * Fills in *dalign with the discard alignment and returns 0 on success, -+ * -errno otherwise. -+ */ -+static int hdev_get_pdiscard_alignment(struct stat *st, uint32_t *dalign) -+{ -+#ifdef CONFIG_LINUX -+ /* -+ * Note that Linux "discard_granularity" is QEMU "discard_alignment". Linux -+ * "discard_alignment" is something else. -+ */ -+ return get_sysfs_u32_val(st, "discard_granularity", dalign); -+#else -+ return -ENOTSUP; -+#endif -+} -+ - #if defined(CONFIG_BLKZONED) - /* - * If the reset_all flag is true, then the wps of zone whose state is -@@ -1527,6 +1568,30 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) - } - } - -+ if (S_ISBLK(st.st_mode)) { -+ uint32_t dalign = 0; -+ int ret; -+ -+ ret = hdev_get_pdiscard_alignment(&st, &dalign); -+ if (ret == 0) { -+ uint32_t ralign = bs->bl.request_alignment; -+ -+ /* Probably never happens, but handle it just in case */ -+ if (dalign < ralign && (ralign % dalign == 0)) { -+ dalign = ralign; -+ } -+ -+ /* The block layer requires a multiple of request_alignment */ -+ if (dalign % ralign != 0) { -+ error_setg(errp, "Invalid pdiscard_alignment limit %u is not a " -+ "multiple of request_alignment %u", dalign, ralign); -+ return; -+ } -+ -+ bs->bl.pdiscard_alignment = dalign; -+ } -+ } -+ - raw_refresh_zoned_limits(bs, &st, errp); - } - --- -2.39.3 - diff --git a/kvm-hw-audio-ac97-skip-automatic-zero-init-of-large-arra.patch b/kvm-hw-audio-ac97-skip-automatic-zero-init-of-large-arra.patch deleted file mode 100644 index 848b9c2..0000000 --- a/kvm-hw-audio-ac97-skip-automatic-zero-init-of-large-arra.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 662b91cbf6bebfa4f38fe2d1b1868d316bfe8838 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:45 +0100 -Subject: [PATCH 15/43] hw/audio/ac97: skip automatic zero-init of large arrays -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/31] 5c4f1eea2eab9ef363ac7f0076725fc5767ee51f (stefanha/centos-stream-qemu-kvm) - -The 'read_audio' & 'write_audio' methods have a 4k byte array used -for copying data between the audio backend and device. Skip the -automatic zero-init of these arrays to eliminate the performance -overhead in the I/O hot path. - -The 'tmpbuf' array will be fully initialized when reading data from -the audio backend and/or device memory. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-8-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 2553d2d26a9d0f46386bf8c37d184567e5cede6c) -Signed-off-by: Stefan Hajnoczi ---- - hw/audio/ac97.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c -index 05c573776e..2b290cb2b1 100644 ---- a/hw/audio/ac97.c -+++ b/hw/audio/ac97.c -@@ -886,7 +886,7 @@ static void nabm_writel(void *opaque, uint32_t addr, uint32_t val) - static int write_audio(AC97LinkState *s, AC97BusMasterRegs *r, - int max, int *stop) - { -- uint8_t tmpbuf[4096]; -+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; - uint32_t addr = r->bd.addr; - uint32_t temp = r->picb << 1; - uint32_t written = 0; -@@ -959,7 +959,7 @@ static void write_bup(AC97LinkState *s, int elapsed) - static int read_audio(AC97LinkState *s, AC97BusMasterRegs *r, - int max, int *stop) - { -- uint8_t tmpbuf[4096]; -+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; - uint32_t addr = r->bd.addr; - uint32_t temp = r->picb << 1; - uint32_t nread = 0; --- -2.39.3 - diff --git a/kvm-hw-audio-cs4231a-skip-automatic-zero-init-of-large-a.patch b/kvm-hw-audio-cs4231a-skip-automatic-zero-init-of-large-a.patch deleted file mode 100644 index 9baed12..0000000 --- a/kvm-hw-audio-cs4231a-skip-automatic-zero-init-of-large-a.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 301bc6085046756e7ae0b5c2d4a95fa8cc88be0d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:46 +0100 -Subject: [PATCH 16/43] hw/audio/cs4231a: skip automatic zero-init of large - arrays -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/31] 8633c398b929f3c4e0c1d7d5874b2fead4496a7d (stefanha/centos-stream-qemu-kvm) - -The 'cs_write_audio' method has a pair of byte arrays, one 4k in size -and one 8k, which are used in converting audio samples. Skip the -automatic zero-init of these arrays to eliminate the performance -overhead in the I/O hot path. - -The 'tmpbuf' array will be fully initialized when reading a block of -data from the guest. The 'linbuf' array will be fully initialized -when converting the audio samples. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-9-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit ca2cc0385d97cea66cd54ee42553f385c403d4a6) -Signed-off-by: Stefan Hajnoczi ---- - hw/audio/cs4231a.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/audio/cs4231a.c b/hw/audio/cs4231a.c -index 5a9be80ba3..eb916035ec 100644 ---- a/hw/audio/cs4231a.c -+++ b/hw/audio/cs4231a.c -@@ -528,7 +528,7 @@ static int cs_write_audio (CSState *s, int nchan, int dma_pos, - int dma_len, int len) - { - int temp, net; -- uint8_t tmpbuf[4096]; -+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; - IsaDmaClass *k = ISADMA_GET_CLASS(s->isa_dma); - - temp = len; -@@ -547,7 +547,7 @@ static int cs_write_audio (CSState *s, int nchan, int dma_pos, - copied = k->read_memory(s->isa_dma, nchan, tmpbuf, dma_pos, to_copy); - if (s->tab) { - int i; -- int16_t linbuf[4096]; -+ QEMU_UNINITIALIZED int16_t linbuf[4096]; - - for (i = 0; i < copied; ++i) - linbuf[i] = s->tab[tmpbuf[i]]; --- -2.39.3 - diff --git a/kvm-hw-audio-es1370-skip-automatic-zero-init-of-large-ar.patch b/kvm-hw-audio-es1370-skip-automatic-zero-init-of-large-ar.patch deleted file mode 100644 index f3cab17..0000000 --- a/kvm-hw-audio-es1370-skip-automatic-zero-init-of-large-ar.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 084ce8fa1c5db2eb9c5567fbcd0568e3e2cd37a4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:47 +0100 -Subject: [PATCH 17/43] hw/audio/es1370: skip automatic zero-init of large - array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/31] 3d8c0165ee60c4027302706abc9d6c55461c884b (stefanha/centos-stream-qemu-kvm) - -The 'es1370_transfer_audio' method has a 4k byte array used for -copying data between the audio backend and device. Skip the automatic -zero-init of this array to eliminate the performance overhead in -the I/O hot path. - -The 'tmpbuf' array will be fully initialized when reading data from -the audio backend and/or device memory. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-10-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 8236e206084b832d1d7ec947a4798b818f4cdf1f) -Signed-off-by: Stefan Hajnoczi ---- - hw/audio/es1370.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c -index 75f71e5d78..d0ed0052db 100644 ---- a/hw/audio/es1370.c -+++ b/hw/audio/es1370.c -@@ -604,7 +604,7 @@ static uint64_t es1370_read(void *opaque, hwaddr addr, unsigned size) - static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel, - int max, bool *irq) - { -- uint8_t tmpbuf[4096]; -+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; - size_t to_transfer; - uint32_t addr = d->frame_addr; - int sc = d->scount & 0xffff; --- -2.39.3 - diff --git a/kvm-hw-audio-gus-skip-automatic-zero-init-of-large-array.patch b/kvm-hw-audio-gus-skip-automatic-zero-init-of-large-array.patch deleted file mode 100644 index 7530880..0000000 --- a/kvm-hw-audio-gus-skip-automatic-zero-init-of-large-array.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 7c889953afbf830cb4522ec259221a9319c0f42a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:48 +0100 -Subject: [PATCH 18/43] hw/audio/gus: skip automatic zero-init of large array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/31] a580b7fae56231c7bf1e2b797a4037eeb45f817a (stefanha/centos-stream-qemu-kvm) - -The 'GUS_read_DMA' method has a 4k byte array used for copying -data between the audio backend and device. Skip the automatic -zero-init of this array to eliminate the performance overhead in -the I/O hot path. - -The 'tmpbuf' array will be fully initialized when reading data -from device memory. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-11-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 2e438da4929018c62609381e1156aac0b2fe3de3) -Signed-off-by: Stefan Hajnoczi ---- - hw/audio/gus.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/audio/gus.c b/hw/audio/gus.c -index e718c1183e..bd242e19a3 100644 ---- a/hw/audio/gus.c -+++ b/hw/audio/gus.c -@@ -183,7 +183,7 @@ static int GUS_read_DMA (void *opaque, int nchan, int dma_pos, int dma_len) - { - GUSState *s = opaque; - IsaDmaClass *k = ISADMA_GET_CLASS(s->isa_dma); -- char tmpbuf[4096]; -+ QEMU_UNINITIALIZED char tmpbuf[4096]; - int pos = dma_pos, mode, left = dma_len - dma_pos; - - ldebug ("read DMA %#x %d\n", dma_pos, dma_len); --- -2.39.3 - diff --git a/kvm-hw-audio-marvell_88w8618-skip-automatic-zero-init-of.patch b/kvm-hw-audio-marvell_88w8618-skip-automatic-zero-init-of.patch deleted file mode 100644 index 7d596bc..0000000 --- a/kvm-hw-audio-marvell_88w8618-skip-automatic-zero-init-of.patch +++ /dev/null @@ -1,50 +0,0 @@ -From ef07fc569ae2a50f54f8bf29f09555cb0e8b08db Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:49 +0100 -Subject: [PATCH 19/43] hw/audio/marvell_88w8618: skip automatic zero-init of - large array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/31] c45c78043e51ac3195e76aaf0b745dd41c8d8345 (stefanha/centos-stream-qemu-kvm) - -The 'mv88w8618_audio_callback' method has a 4k byte array used for -copying data between the audio backend and device. Skip the automatic -zero-init of this array to eliminate the performance overhead in -the I/O hot path. - -The 'buf' array will be fully initialized when reading data from -device memory. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-12-berrange@redhat.com -[Fixed hw/audio/gus in commit message --Stefan] -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 5b6cd5c5df4229972d8a0fd9dd9a089a1644d6ba) -Signed-off-by: Stefan Hajnoczi ---- - hw/audio/marvell_88w8618.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/audio/marvell_88w8618.c b/hw/audio/marvell_88w8618.c -index 28f9af320d..31a73f53b9 100644 ---- a/hw/audio/marvell_88w8618.c -+++ b/hw/audio/marvell_88w8618.c -@@ -66,7 +66,7 @@ static void mv88w8618_audio_callback(void *opaque, int free_out, int free_in) - { - mv88w8618_audio_state *s = opaque; - int16_t *codec_buffer; -- int8_t buf[4096]; -+ QEMU_UNINITIALIZED int8_t buf[4096]; - int8_t *mem_buffer; - int pos, block_size; - --- -2.39.3 - diff --git a/kvm-hw-audio-sb16-skip-automatic-zero-init-of-large-arra.patch b/kvm-hw-audio-sb16-skip-automatic-zero-init-of-large-arra.patch deleted file mode 100644 index f550b32..0000000 --- a/kvm-hw-audio-sb16-skip-automatic-zero-init-of-large-arra.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 5b5db5d5901a9d8106cf594f7f7ebf9c9152a53f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:50 +0100 -Subject: [PATCH 20/43] hw/audio/sb16: skip automatic zero-init of large array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/31] 2f715c640897101a96a6f397d574f248fc1a7fce (stefanha/centos-stream-qemu-kvm) - -The 'write_audio' method has a 4k byte array used for copying data -between the audio backend and device. Skip the automatic zero-init -of this array to eliminate the performance overhead in the I/O hot -path. - -The 'tmpbuf' array will be fully initialized when reading data from -device memory. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-13-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 30c82f6657c1ee9fbb5473924b4d3273f214bd6f) -Signed-off-by: Stefan Hajnoczi ---- - hw/audio/sb16.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/audio/sb16.c b/hw/audio/sb16.c -index 0c661b4947..afee59d798 100644 ---- a/hw/audio/sb16.c -+++ b/hw/audio/sb16.c -@@ -1181,7 +1181,7 @@ static int write_audio (SB16State *s, int nchan, int dma_pos, - IsaDma *isa_dma = nchan == s->dma ? s->isa_dma : s->isa_hdma; - IsaDmaClass *k = ISADMA_GET_CLASS(isa_dma); - int temp, net; -- uint8_t tmpbuf[4096]; -+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; - - temp = len; - net = 0; --- -2.39.3 - diff --git a/kvm-hw-audio-via-ac97-skip-automatic-zero-init-of-large-.patch b/kvm-hw-audio-via-ac97-skip-automatic-zero-init-of-large-.patch deleted file mode 100644 index dea7ff7..0000000 --- a/kvm-hw-audio-via-ac97-skip-automatic-zero-init-of-large-.patch +++ /dev/null @@ -1,49 +0,0 @@ -From dc08736e78d641eaab2f6df35218fb2b0f88ee50 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:51 +0100 -Subject: [PATCH 21/43] hw/audio/via-ac97: skip automatic zero-init of large - array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/31] 8afade9e5ac67ec271f3318473c04b6fb9947f8d (stefanha/centos-stream-qemu-kvm) - -The 'out_cb' method has a 4k byte array used for copying data -between the audio backend and device. Skip the automatic zero-init -of this array to eliminate the performance overhead in the I/O hot -path. - -The 'tmpbuf' array will be fully initialized when reading data from -device memory. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-14-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit bb71d9fe1419f44529c91d1b09464718d157e647) -Signed-off-by: Stefan Hajnoczi ---- - hw/audio/via-ac97.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/audio/via-ac97.c b/hw/audio/via-ac97.c -index 4e115e011e..08e6762597 100644 ---- a/hw/audio/via-ac97.c -+++ b/hw/audio/via-ac97.c -@@ -175,7 +175,7 @@ static void out_cb(void *opaque, int avail) - ViaAC97SGDChannel *c = &s->aur; - int temp, to_copy, copied; - bool stop = false; -- uint8_t tmpbuf[4096]; -+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; - - if (c->stat & STAT_PAUSED) { - return; --- -2.39.3 - diff --git a/kvm-hw-char-sclpconsole-lm-skip-automatic-zero-init-of-l.patch b/kvm-hw-char-sclpconsole-lm-skip-automatic-zero-init-of-l.patch deleted file mode 100644 index 77a2ee1..0000000 --- a/kvm-hw-char-sclpconsole-lm-skip-automatic-zero-init-of-l.patch +++ /dev/null @@ -1,49 +0,0 @@ -From d7f96f00428f759f4323364ca1688988b34c17b0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:52 +0100 -Subject: [PATCH 22/43] hw/char/sclpconsole-lm: skip automatic zero-init of - large array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [14/31] 17140f6dae4e1f23ae5d2ba5e320dd8335233a5c (stefanha/centos-stream-qemu-kvm) - -The 'process_mdb' method has a 4k byte array used for copying data -between the guest and the chardev backend. Skip the automatic zero-init -of this array to eliminate the performance overhead in the I/O hot -path. - -The 'buffer' array will be selectively initialized when data is converted -between EBCDIC and ASCII. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-15-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 8b1dac1ad57082611419b0e2f347acd96115d25f) -Signed-off-by: Stefan Hajnoczi ---- - hw/char/sclpconsole-lm.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/char/sclpconsole-lm.c b/hw/char/sclpconsole-lm.c -index ddb9a726d5..5084531b7b 100644 ---- a/hw/char/sclpconsole-lm.c -+++ b/hw/char/sclpconsole-lm.c -@@ -214,7 +214,7 @@ static int process_mdb(SCLPEvent *event, MDBO *mdbo) - { - int rc; - int len; -- uint8_t buffer[SIZE_BUFFER]; -+ QEMU_UNINITIALIZED uint8_t buffer[SIZE_BUFFER]; - - len = be16_to_cpu(mdbo->length); - len -= sizeof(mdbo->length) + sizeof(mdbo->type) --- -2.39.3 - diff --git a/kvm-hw-display-vmware_vga-skip-automatic-zero-init-of-la.patch b/kvm-hw-display-vmware_vga-skip-automatic-zero-init-of-la.patch deleted file mode 100644 index 9f3d30c..0000000 --- a/kvm-hw-display-vmware_vga-skip-automatic-zero-init-of-la.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 886a2ce3ff5c05087dc667578d59a37bfedaee19 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:54 +0100 -Subject: [PATCH 24/43] hw/display/vmware_vga: skip automatic zero-init of - large struct -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [16/31] 201c9ceb2782d17dc9ca800b2d924a8ee3a8eb98 (stefanha/centos-stream-qemu-kvm) - -The 'vmsvga_fifo_run' method has a struct which is a little over 20k -in size, used for holding image data for cursor changes. Skip the -automatic zero-init of this struct to eliminate the performance -overhead in the I/O hot path. - -The cursor variable will be fully initialized only when processing -a cursor definition message from the guest. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-17-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 7048e70f391df76d009eecca25f8027858f9f304) -Signed-off-by: Stefan Hajnoczi ---- - hw/display/vmware_vga.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c -index 2dd661e3c1..fae10068cb 100644 ---- a/hw/display/vmware_vga.c -+++ b/hw/display/vmware_vga.c -@@ -618,7 +618,7 @@ static void vmsvga_fifo_run(struct vmsvga_state_s *s) - uint32_t cmd, colour; - int args, len, maxloop = 1024; - int x, y, dx, dy, width, height; -- struct vmsvga_cursor_definition_s cursor; -+ QEMU_UNINITIALIZED struct vmsvga_cursor_definition_s cursor; - uint32_t cmd_start; - - len = vmsvga_fifo_length(s); --- -2.39.3 - diff --git a/kvm-hw-dma-xlnx_csu_dma-skip-automatic-zero-init-of-larg.patch b/kvm-hw-dma-xlnx_csu_dma-skip-automatic-zero-init-of-larg.patch deleted file mode 100644 index 8bd1537..0000000 --- a/kvm-hw-dma-xlnx_csu_dma-skip-automatic-zero-init-of-larg.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 277dbec12b1a109aca6cfe65046f2b4d6b41ee43 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:53 +0100 -Subject: [PATCH 23/43] hw/dma/xlnx_csu_dma: skip automatic zero-init of large - array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [15/31] 8832791cb87b342e3b9882893891a824d31b687a (stefanha/centos-stream-qemu-kvm) - -The 'xlnx_csu_dma_src_notify' method has a 4k byte array used for -copying DMA data. Skip the automatic zero-init of this array to -eliminate the performance overhead in the I/O hot path. - -The 'buf' array will be fully initialized when data is copied. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-16-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit ce14f24611aa0469b464a9512e192b4fd51dca2b) -Signed-off-by: Stefan Hajnoczi ---- - hw/dma/xlnx_csu_dma.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c -index 1afaa0bf51..8091a785cc 100644 ---- a/hw/dma/xlnx_csu_dma.c -+++ b/hw/dma/xlnx_csu_dma.c -@@ -287,7 +287,7 @@ static uint32_t xlnx_csu_dma_advance(XlnxCSUDMA *s, uint32_t len) - static void xlnx_csu_dma_src_notify(void *opaque) - { - XlnxCSUDMA *s = XLNX_CSU_DMA(opaque); -- unsigned char buf[4 * 1024]; -+ QEMU_UNINITIALIZED unsigned char buf[4 * 1024]; - size_t rlen = 0; - - ptimer_transaction_begin(s->src_timer); --- -2.39.3 - diff --git a/kvm-hw-hyperv-syndbg-skip-automatic-zero-init-of-large-a.patch b/kvm-hw-hyperv-syndbg-skip-automatic-zero-init-of-large-a.patch deleted file mode 100644 index 352c72d..0000000 --- a/kvm-hw-hyperv-syndbg-skip-automatic-zero-init-of-large-a.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 6ae70910d16ef313e4addefea33c00083cc5665c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:55 +0100 -Subject: [PATCH 25/43] hw/hyperv/syndbg: skip automatic zero-init of large - array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [17/31] 2c7e0765ddb0fe5880403b234c299931da6daabf (stefanha/centos-stream-qemu-kvm) - -The 'handle_recv_msg' method has a 4k byte array used for copying -data between the network socket and guest memory. Skip the automatic -zero-init of this array to eliminate the performance overhead in the -I/O hot path. - -The 'data_buf' array will be fully initialized when data is read -off the network socket. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-18-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 5a1f614d0cd0bcc8e84e0b7ab6af63d56bd348a2) -Signed-off-by: Stefan Hajnoczi - -Conflicts: - hw/hyperv/syndbg.c - - Context conflict due to missing commit 3efb9d226221 - ("hw/hyperv/syndbg: common compilation unit") downstream. There is no - need to backport the commit because it's not a bug fix. ---- - hw/hyperv/syndbg.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/hyperv/syndbg.c b/hw/hyperv/syndbg.c -index d3e3917077..16ed1ab66b 100644 ---- a/hw/hyperv/syndbg.c -+++ b/hw/hyperv/syndbg.c -@@ -188,7 +188,7 @@ static uint16_t handle_recv_msg(HvSynDbg *syndbg, uint64_t outgpa, - uint64_t timeout, uint32_t *retrieved_count) - { - uint16_t ret; -- uint8_t data_buf[TARGET_PAGE_SIZE - UDP_PKT_HEADER_SIZE]; -+ QEMU_UNINITIALIZED uint8_t data_buf[TARGET_PAGE_SIZE - UDP_PKT_HEADER_SIZE]; - hwaddr out_len; - void *out_data; - ssize_t recv_byte_count; --- -2.39.3 - diff --git a/kvm-hw-i386-amd_iommu-Allow-migration-when-explicitly-cr.patch b/kvm-hw-i386-amd_iommu-Allow-migration-when-explicitly-cr.patch deleted file mode 100644 index 34a65b6..0000000 --- a/kvm-hw-i386-amd_iommu-Allow-migration-when-explicitly-cr.patch +++ /dev/null @@ -1,117 +0,0 @@ -From c295cfa98a464eba271e75846275913a0ed6435a Mon Sep 17 00:00:00 2001 -From: Suravee Suthikulpanit -Date: Sun, 4 May 2025 17:04:05 +0000 -Subject: [PATCH 41/43] hw/i386/amd_iommu: Allow migration when explicitly - create the AMDVI-PCI device - -RH-Author: John Allen -RH-MergeRequest: 383: Add ability to manually specify the AMDVI-PCI device -RH-Jira: RHEL-85649 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] ee1cdd746a3b7051dc1e7c5748876384320a3f28 (johnalle/qemu-kvm-fork) - -Add migration support for AMD IOMMU model by saving necessary AMDVIState -parameters for MMIO registers, device table, command buffer, and event -buffers. - -Also change devtab_len type from size_t to uint64_t to avoid 32-bit build -issue. - -Signed-off-by: Suravee Suthikulpanit -Message-Id: <20250504170405.12623-3-suravee.suthikulpanit@amd.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 28931c2e1591deb4bfaaf744fdc8813e96c230f1) - -JIRA: https://issues.redhat.com/browse/RHEL-85649 - -Signed-off-by: John Allen ---- - hw/i386/amd_iommu.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ - hw/i386/amd_iommu.h | 2 +- - 2 files changed, 49 insertions(+), 1 deletion(-) - -diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c -index da5313f3d2..fbe0be440e 100644 ---- a/hw/i386/amd_iommu.c -+++ b/hw/i386/amd_iommu.c -@@ -1611,8 +1611,55 @@ static void amdvi_sysbus_reset(DeviceState *dev) - amdvi_init(s); - } - -+static const VMStateDescription vmstate_amdvi_sysbus_migratable = { -+ .name = "amd-iommu", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .priority = MIG_PRI_IOMMU, -+ .fields = (VMStateField[]) { -+ /* Updated in amdvi_handle_control_write() */ -+ VMSTATE_BOOL(enabled, AMDVIState), -+ VMSTATE_BOOL(ga_enabled, AMDVIState), -+ VMSTATE_BOOL(ats_enabled, AMDVIState), -+ VMSTATE_BOOL(cmdbuf_enabled, AMDVIState), -+ VMSTATE_BOOL(completion_wait_intr, AMDVIState), -+ VMSTATE_BOOL(evtlog_enabled, AMDVIState), -+ VMSTATE_BOOL(evtlog_intr, AMDVIState), -+ /* Updated in amdvi_handle_devtab_write() */ -+ VMSTATE_UINT64(devtab, AMDVIState), -+ VMSTATE_UINT64(devtab_len, AMDVIState), -+ /* Updated in amdvi_handle_cmdbase_write() */ -+ VMSTATE_UINT64(cmdbuf, AMDVIState), -+ VMSTATE_UINT64(cmdbuf_len, AMDVIState), -+ /* Updated in amdvi_handle_cmdhead_write() */ -+ VMSTATE_UINT32(cmdbuf_head, AMDVIState), -+ /* Updated in amdvi_handle_cmdtail_write() */ -+ VMSTATE_UINT32(cmdbuf_tail, AMDVIState), -+ /* Updated in amdvi_handle_evtbase_write() */ -+ VMSTATE_UINT64(evtlog, AMDVIState), -+ VMSTATE_UINT32(evtlog_len, AMDVIState), -+ /* Updated in amdvi_handle_evthead_write() */ -+ VMSTATE_UINT32(evtlog_head, AMDVIState), -+ /* Updated in amdvi_handle_evttail_write() */ -+ VMSTATE_UINT32(evtlog_tail, AMDVIState), -+ /* Updated in amdvi_handle_pprbase_write() */ -+ VMSTATE_UINT64(ppr_log, AMDVIState), -+ VMSTATE_UINT32(pprlog_len, AMDVIState), -+ /* Updated in amdvi_handle_pprhead_write() */ -+ VMSTATE_UINT32(pprlog_head, AMDVIState), -+ /* Updated in amdvi_handle_tailhead_write() */ -+ VMSTATE_UINT32(pprlog_tail, AMDVIState), -+ /* MMIO registers */ -+ VMSTATE_UINT8_ARRAY(mmior, AMDVIState, AMDVI_MMIO_SIZE), -+ VMSTATE_UINT8_ARRAY(romask, AMDVIState, AMDVI_MMIO_SIZE), -+ VMSTATE_UINT8_ARRAY(w1cmask, AMDVIState, AMDVI_MMIO_SIZE), -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ - static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) - { -+ DeviceClass *dc = (DeviceClass *) object_get_class(OBJECT(dev)); - AMDVIState *s = AMD_IOMMU_DEVICE(dev); - MachineState *ms = MACHINE(qdev_get_machine()); - PCMachineState *pcms = PC_MACHINE(ms); -@@ -1634,6 +1681,7 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) - } - - s->pci = AMD_IOMMU_PCI(pdev); -+ dc->vmsd = &vmstate_amdvi_sysbus_migratable; - } else { - s->pci = AMD_IOMMU_PCI(object_new(TYPE_AMD_IOMMU_PCI)); - /* This device should take care of IOMMU PCI properties */ -diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h -index 7a28181d9c..5672bdef89 100644 ---- a/hw/i386/amd_iommu.h -+++ b/hw/i386/amd_iommu.h -@@ -329,7 +329,7 @@ struct AMDVIState { - bool excl_enabled; - - hwaddr devtab; /* base address device table */ -- size_t devtab_len; /* device table length */ -+ uint64_t devtab_len; /* device table length */ - - hwaddr cmdbuf; /* command buffer base address */ - uint64_t cmdbuf_len; /* command buffer length */ --- -2.39.3 - diff --git a/kvm-hw-i386-amd_iommu-Isolate-AMDVI-PCI-from-amd-iommu-d.patch b/kvm-hw-i386-amd_iommu-Isolate-AMDVI-PCI-from-amd-iommu-d.patch deleted file mode 100644 index d23e2f9..0000000 --- a/kvm-hw-i386-amd_iommu-Isolate-AMDVI-PCI-from-amd-iommu-d.patch +++ /dev/null @@ -1,267 +0,0 @@ -From 1922ff43d7eafaad767496de00d4a1af766728e6 Mon Sep 17 00:00:00 2001 -From: Suravee Suthikulpanit -Date: Sun, 4 May 2025 17:04:04 +0000 -Subject: [PATCH 40/43] hw/i386/amd_iommu: Isolate AMDVI-PCI from amd-iommu - device to allow full control over the PCI device creation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: John Allen -RH-MergeRequest: 383: Add ability to manually specify the AMDVI-PCI device -RH-Jira: RHEL-85649 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 3468e169fa46bca1d0a5941dfe652254b830e9c6 (johnalle/qemu-kvm-fork) - -Current amd-iommu model internally creates an AMDVI-PCI device. Here is -a snippet from info qtree: - - bus: main-system-bus - type System - dev: amd-iommu, id "" - xtsup = false - pci-id = "" - intremap = "on" - device-iotlb = false - pt = true - ... - dev: q35-pcihost, id "" - MCFG = -1 (0xffffffffffffffff) - pci-hole64-size = 34359738368 (32 GiB) - below-4g-mem-size = 134217728 (128 MiB) - above-4g-mem-size = 0 (0 B) - smm-ranges = true - x-pci-hole64-fix = true - x-config-reg-migration-enabled = true - bypass-iommu = false - bus: pcie.0 - type PCIE - dev: AMDVI-PCI, id "" - addr = 01.0 - romfile = "" - romsize = 4294967295 (0xffffffff) - rombar = -1 (0xffffffffffffffff) - multifunction = false - x-pcie-lnksta-dllla = true - x-pcie-extcap-init = true - failover_pair_id = "" - acpi-index = 0 (0x0) - x-pcie-err-unc-mask = true - x-pcie-ari-nextfn-1 = false - x-max-bounce-buffer-size = 4096 (4 KiB) - x-pcie-ext-tag = true - busnr = 0 (0x0) - class Class 0806, addr 00:01.0, pci id 1022:0000 (sub 1af4:1100) - ... - -This prohibits users from specifying the PCI topology for the amd-iommu device, -which becomes a problem when trying to support VM migration since it does not -guarantee the same enumeration of AMD IOMMU device. - -Therefore, allow the 'AMDVI-PCI' device to optionally be pre-created and -associated with a 'amd-iommu' device via a new 'pci-id' parameter on the -latter. - -For example: - -device AMDVI-PCI,id=iommupci0,bus=pcie.0,addr=0x05 \ - -device amd-iommu,intremap=on,pt=on,xtsup=on,pci-id=iommupci0 \ - -For backward-compatibility, internally create the AMDVI-PCI device if not -specified on the CLI. - -Co-developed-by: Daniel P. Berrangé -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Suravee Suthikulpanit -Message-Id: <20250504170405.12623-2-suravee.suthikulpanit@amd.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit f864a3235ea1d1d714b3cde2d9a810ea6344a7b5) - -JIRA: https://issues.redhat.com/browse/RHEL-85649 - -Signed-off-by: John Allen ---- - hw/i386/acpi-build.c | 8 +++---- - hw/i386/amd_iommu.c | 53 ++++++++++++++++++++++++++------------------ - hw/i386/amd_iommu.h | 3 ++- - 3 files changed, 38 insertions(+), 26 deletions(-) - -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 3fffa4a332..f4b65701a4 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -2333,10 +2333,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, - build_append_int_noprefix(table_data, ivhd_blob->len + 24, 2); - /* DeviceID */ - build_append_int_noprefix(table_data, -- object_property_get_int(OBJECT(&s->pci), "addr", -+ object_property_get_int(OBJECT(s->pci), "addr", - &error_abort), 2); - /* Capability offset */ -- build_append_int_noprefix(table_data, s->pci.capab_offset, 2); -+ build_append_int_noprefix(table_data, s->pci->capab_offset, 2); - /* IOMMU base address */ - build_append_int_noprefix(table_data, s->mr_mmio.addr, 8); - /* PCI Segment Group */ -@@ -2368,10 +2368,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, - build_append_int_noprefix(table_data, ivhd_blob->len + 40, 2); - /* DeviceID */ - build_append_int_noprefix(table_data, -- object_property_get_int(OBJECT(&s->pci), "addr", -+ object_property_get_int(OBJECT(s->pci), "addr", - &error_abort), 2); - /* Capability offset */ -- build_append_int_noprefix(table_data, s->pci.capab_offset, 2); -+ build_append_int_noprefix(table_data, s->pci->capab_offset, 2); - /* IOMMU base address */ - build_append_int_noprefix(table_data, s->mr_mmio.addr, 8); - /* PCI Segment Group */ -diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c -index 5f9b952799..da5313f3d2 100644 ---- a/hw/i386/amd_iommu.c -+++ b/hw/i386/amd_iommu.c -@@ -167,11 +167,11 @@ static void amdvi_generate_msi_interrupt(AMDVIState *s) - { - MSIMessage msg = {}; - MemTxAttrs attrs = { -- .requester_id = pci_requester_id(&s->pci.dev) -+ .requester_id = pci_requester_id(&s->pci->dev) - }; - -- if (msi_enabled(&s->pci.dev)) { -- msg = msi_get_message(&s->pci.dev, 0); -+ if (msi_enabled(&s->pci->dev)) { -+ msg = msi_get_message(&s->pci->dev, 0); - address_space_stl_le(&address_space_memory, msg.address, msg.data, - attrs, NULL); - } -@@ -239,7 +239,7 @@ static void amdvi_page_fault(AMDVIState *s, uint16_t devid, - info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF; - amdvi_encode_event(evt, devid, addr, info); - amdvi_log_event(s, evt); -- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, -+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, - PCI_STATUS_SIG_TARGET_ABORT); - } - /* -@@ -256,7 +256,7 @@ static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, - - amdvi_encode_event(evt, devid, devtab, info); - amdvi_log_event(s, evt); -- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, -+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, - PCI_STATUS_SIG_TARGET_ABORT); - } - /* log an event trying to access command buffer -@@ -269,7 +269,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) - - amdvi_encode_event(evt, 0, addr, info); - amdvi_log_event(s, evt); -- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, -+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, - PCI_STATUS_SIG_TARGET_ABORT); - } - /* log an illegal command event -@@ -310,7 +310,7 @@ static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, - info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR; - amdvi_encode_event(evt, devid, addr, info); - amdvi_log_event(s, evt); -- pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, -+ pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, - PCI_STATUS_SIG_TARGET_ABORT); - } - -@@ -1607,7 +1607,7 @@ static void amdvi_sysbus_reset(DeviceState *dev) - { - AMDVIState *s = AMD_IOMMU_DEVICE(dev); - -- msi_reset(&s->pci.dev); -+ msi_reset(&s->pci->dev); - amdvi_init(s); - } - -@@ -1619,14 +1619,32 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) - X86MachineState *x86ms = X86_MACHINE(ms); - PCIBus *bus = pcms->pcibus; - -- s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, -- amdvi_uint64_equal, g_free, g_free); -+ if (s->pci_id) { -+ PCIDevice *pdev = NULL; -+ int ret = pci_qdev_find_device(s->pci_id, &pdev); - -- /* This device should take care of IOMMU PCI properties */ -- if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { -- return; -+ if (ret) { -+ error_report("Cannot find PCI device '%s'", s->pci_id); -+ return; -+ } -+ -+ if (!object_dynamic_cast(OBJECT(pdev), TYPE_AMD_IOMMU_PCI)) { -+ error_report("Device '%s' must be an AMDVI-PCI device type", s->pci_id); -+ return; -+ } -+ -+ s->pci = AMD_IOMMU_PCI(pdev); -+ } else { -+ s->pci = AMD_IOMMU_PCI(object_new(TYPE_AMD_IOMMU_PCI)); -+ /* This device should take care of IOMMU PCI properties */ -+ if (!qdev_realize(DEVICE(s->pci), &bus->qbus, errp)) { -+ return; -+ } - } - -+ s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, -+ amdvi_uint64_equal, g_free, g_free); -+ - /* Pseudo address space under root PCI bus. */ - x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); - -@@ -1663,6 +1681,7 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) - - static const Property amdvi_properties[] = { - DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false), -+ DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id), - }; - - static const VMStateDescription vmstate_amdvi_sysbus = { -@@ -1670,13 +1689,6 @@ static const VMStateDescription vmstate_amdvi_sysbus = { - .unmigratable = 1 - }; - --static void amdvi_sysbus_instance_init(Object *klass) --{ -- AMDVIState *s = AMD_IOMMU_DEVICE(klass); -- -- object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI); --} -- - static void amdvi_sysbus_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); -@@ -1696,7 +1708,6 @@ static const TypeInfo amdvi_sysbus = { - .name = TYPE_AMD_IOMMU_DEVICE, - .parent = TYPE_X86_IOMMU_DEVICE, - .instance_size = sizeof(AMDVIState), -- .instance_init = amdvi_sysbus_instance_init, - .class_init = amdvi_sysbus_class_init - }; - -diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h -index 28125130c6..7a28181d9c 100644 ---- a/hw/i386/amd_iommu.h -+++ b/hw/i386/amd_iommu.h -@@ -315,7 +315,8 @@ struct AMDVIPCIState { - - struct AMDVIState { - X86IOMMUState iommu; /* IOMMU bus device */ -- AMDVIPCIState pci; /* IOMMU PCI device */ -+ AMDVIPCIState *pci; /* IOMMU PCI device */ -+ char *pci_id; /* ID of AMDVI-PCI device, if user created */ - - uint32_t version; - --- -2.39.3 - diff --git a/kvm-hw-misc-aspeed_hace-skip-automatic-zero-init-of-larg.patch b/kvm-hw-misc-aspeed_hace-skip-automatic-zero-init-of-larg.patch deleted file mode 100644 index 87b163c..0000000 --- a/kvm-hw-misc-aspeed_hace-skip-automatic-zero-init-of-larg.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 2bfd29936ae867af81ac7aad36a615e5f478d0ae Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:56 +0100 -Subject: [PATCH 26/43] hw/misc/aspeed_hace: skip automatic zero-init of large - array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [18/31] 300760bfe80f17dd429ddbf8bb969a741e596421 (stefanha/centos-stream-qemu-kvm) - -The 'do_hash_operation' method has a 256 element iovec array used for -holding pointers to data that is to be hashed. Skip the automatic -zero-init of this array to eliminate the performance overhead in the -I/O hot path. - -The 'iovec' array will be selectively initialized based on data that -needs to be hashed. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-19-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 6992c886838282f36b20deee44b666bbfc573a8f) -Signed-off-by: Stefan Hajnoczi - -Conflicts: - hw/misc/aspeed_hace.c - - Context conflict due to missing commit b9ccbe212e24 - ("hw/misc/aspeed_hace: Extract accumulation-mode hash execution into - helper function") downstream. The commit is not a bug fix, so there is - no need to backport it. ---- - hw/misc/aspeed_hace.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c -index d75da33353..9273aac9c1 100644 ---- a/hw/misc/aspeed_hace.c -+++ b/hw/misc/aspeed_hace.c -@@ -164,7 +164,7 @@ static int reconstruct_iov(AspeedHACEState *s, struct iovec *iov, int id, - static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode, - bool acc_mode) - { -- struct iovec iov[ASPEED_HACE_MAX_SG]; -+ QEMU_UNINITIALIZED struct iovec iov[ASPEED_HACE_MAX_SG]; - uint32_t total_msg_len; - uint32_t pad_offset; - g_autofree uint8_t *digest_buf = NULL; --- -2.39.3 - diff --git a/kvm-hw-net-rtl8139-skip-automatic-zero-init-of-large-arr.patch b/kvm-hw-net-rtl8139-skip-automatic-zero-init-of-large-arr.patch deleted file mode 100644 index 68af4bf..0000000 --- a/kvm-hw-net-rtl8139-skip-automatic-zero-init-of-large-arr.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 2aa9f26afe4f3c5d69771b71b0e69e123ac5d893 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:57 +0100 -Subject: [PATCH 27/43] hw/net/rtl8139: skip automatic zero-init of large array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [19/31] 489f599053f7f36dbf382a99697e89309e1273de (stefanha/centos-stream-qemu-kvm) - -The 'rtl8139_transmit_one' method has a 8k byte array used for -copying data between guest and host. Skip the automatic zero-init -of this array to eliminate the performance overhead in the I/O -hot path. - -The 'txbuffer' will be fully initialized when reading PCI DMA -buffers. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-20-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 3ccc6489dd4925ddd1f3066bd3751389169cd7aa) -Signed-off-by: Stefan Hajnoczi ---- - hw/net/rtl8139.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 6c57a8985b..31a6956252 100644 ---- a/hw/net/rtl8139.c -+++ b/hw/net/rtl8139.c -@@ -1816,7 +1816,7 @@ static int rtl8139_transmit_one(RTL8139State *s, int descriptor) - - PCIDevice *d = PCI_DEVICE(s); - int txsize = s->TxStatus[descriptor] & 0x1fff; -- uint8_t txbuffer[0x2000]; -+ QEMU_UNINITIALIZED uint8_t txbuffer[0x2000]; - - DPRINTF("+++ transmit reading %d bytes from host memory at 0x%08x\n", - txsize, s->TxAddr[descriptor]); --- -2.39.3 - diff --git a/kvm-hw-net-tulip-skip-automatic-zero-init-of-large-array.patch b/kvm-hw-net-tulip-skip-automatic-zero-init-of-large-array.patch deleted file mode 100644 index 0b3cbca..0000000 --- a/kvm-hw-net-tulip-skip-automatic-zero-init-of-large-array.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 3164836816fb41146e617c769c3cc82f30fa2f38 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:58 +0100 -Subject: [PATCH 28/43] hw/net/tulip: skip automatic zero-init of large array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [20/31] 289701647a64bf8bbadfd32a4592ffc70d11dba9 (stefanha/centos-stream-qemu-kvm) - -The 'tulip_setup_frame' method has a 4k byte array used for copynig -DMA data from the device. Skip the automatic zero-init of this array -to eliminate the performance overhead in the I/O hot path. - -The 'buf' array will be fully initialized when reading data from the -device. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-21-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit e1afd5ee6eb2954f4baf3c97820e4aaf7de97d2a) -Signed-off-by: Stefan Hajnoczi ---- - hw/net/tulip.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/tulip.c b/hw/net/tulip.c -index a0646bb84c..97bffe4643 100644 ---- a/hw/net/tulip.c -+++ b/hw/net/tulip.c -@@ -629,7 +629,7 @@ static void tulip_setup_filter_addr(TULIPState *s, uint8_t *buf, int n) - static void tulip_setup_frame(TULIPState *s, - struct tulip_descriptor *desc) - { -- uint8_t buf[4096]; -+ QEMU_UNINITIALIZED uint8_t buf[4096]; - int len = (desc->control >> TDES1_BUF1_SIZE_SHIFT) & TDES1_BUF1_SIZE_MASK; - int i; - --- -2.39.3 - diff --git a/kvm-hw-net-virtio-net-skip-automatic-zero-init-of-large-.patch b/kvm-hw-net-virtio-net-skip-automatic-zero-init-of-large-.patch deleted file mode 100644 index 3d6a35f..0000000 --- a/kvm-hw-net-virtio-net-skip-automatic-zero-init-of-large-.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 9f9c5b4e9f0c230e81167f937d9d875a67e4558b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:36:59 +0100 -Subject: [PATCH 29/43] hw/net/virtio-net: skip automatic zero-init of large - arrays -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [21/31] 87ec97d9e701b7ee27f4b728f0622f5b730e1c03 (stefanha/centos-stream-qemu-kvm) - -The 'virtio_net_receive_rcu' method has three arrays with -VIRTQUEUE_MAX_SIZE elements, which are apprixmately 32k in -size used for copying data between guest and host. Skip the -automatic zero-init of these arrays to eliminate the -performance overhead in the I/O hot path. - -The three arrays will be selectively initialized as required -when processing network buffers. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-22-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 21cf31c51a7aeff4270c9b30b37e019c536d54b2) -Signed-off-by: Stefan Hajnoczi ---- - hw/net/virtio-net.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index bd37651dab..aaa024d70c 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -1910,9 +1910,9 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, - VirtIONet *n = qemu_get_nic_opaque(nc); - VirtIONetQueue *q; - VirtIODevice *vdev = VIRTIO_DEVICE(n); -- VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; -- size_t lens[VIRTQUEUE_MAX_SIZE]; -- struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; -+ QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; -+ QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE]; -+ QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; - struct virtio_net_hdr_v1_hash extra_hdr; - unsigned mhdr_cnt = 0; - size_t offset, i, guest_offset, j; --- -2.39.3 - diff --git a/kvm-hw-net-xgamc-skip-automatic-zero-init-of-large-array.patch b/kvm-hw-net-xgamc-skip-automatic-zero-init-of-large-array.patch deleted file mode 100644 index 3a554e5..0000000 --- a/kvm-hw-net-xgamc-skip-automatic-zero-init-of-large-array.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 9df074f93f69dcb7f3a61bcdb05c8e2ece7b6698 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:37:00 +0100 -Subject: [PATCH 30/43] hw/net/xgamc: skip automatic zero-init of large array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [22/31] 252d607753e42558049b0516924dad3ca08092c0 (stefanha/centos-stream-qemu-kvm) - -The 'xgmac_enet_send' method has a 8k byte array used for copying -data between guest and host. Skip the automatic zero-init of this -array to eliminate the performance overhead in the I/O hot path. - -The 'frame' buffer will be fully initialized when reading guest -memory to fetch the data to send. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-23-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 8b723287b84a62bb5d1a7799ef0959ca8e6c293a) -Signed-off-by: Stefan Hajnoczi ---- - hw/net/xgmac.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c -index e3cc4c60eb..14225eb27a 100644 ---- a/hw/net/xgmac.c -+++ b/hw/net/xgmac.c -@@ -207,7 +207,7 @@ static void xgmac_enet_send(XgmacState *s) - struct desc bd; - int frame_size; - int len; -- uint8_t frame[8192]; -+ QEMU_UNINITIALIZED uint8_t frame[8192]; - uint8_t *ptr; - - ptr = frame; --- -2.39.3 - diff --git a/kvm-hw-nvme-ctrl-skip-automatic-zero-init-of-large-array.patch b/kvm-hw-nvme-ctrl-skip-automatic-zero-init-of-large-array.patch deleted file mode 100644 index cb185e8..0000000 --- a/kvm-hw-nvme-ctrl-skip-automatic-zero-init-of-large-array.patch +++ /dev/null @@ -1,72 +0,0 @@ -From a39a353ec7656ef7a805391270cec24dfa815b7d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:37:01 +0100 -Subject: [PATCH 31/43] hw/nvme/ctrl: skip automatic zero-init of large arrays -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [23/31] 926fb489c480ad10c8560efa934b8a7fb3e7a8a3 (stefanha/centos-stream-qemu-kvm) - -The 'nvme_map_sgl' method has a 256 element array used for copying -data from the device. Skip the automatic zero-init of this array -to eliminate the performance overhead in the I/O hot path. - -The 'segment' array will be fully initialized when reading data from -the device. - -The 'nme_changed_nslist' method has a 4k byte array that is manually -initialized with memset(). The compiler ought to be intelligent -enough to turn the memset() into a static initialization operation, -and thus not duplicate the automatic zero-init. Replacing memset() -with '{}' makes it unambiguous that the array is statically initialized. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Klaus Jensen -Message-id: 20250610123709.835102-24-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 7eeb1d3acc175813ad3d5e824f26123e0992093a) -Signed-off-by: Stefan Hajnoczi ---- - hw/nvme/ctrl.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c -index d6b77d4fbc..ad6b264933 100644 ---- a/hw/nvme/ctrl.c -+++ b/hw/nvme/ctrl.c -@@ -1057,7 +1057,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl, - */ - #define SEG_CHUNK_SIZE 256 - -- NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld; -+ QEMU_UNINITIALIZED NvmeSglDescriptor segment[SEG_CHUNK_SIZE]; -+ NvmeSglDescriptor *sgld, *last_sgld; - uint64_t nsgld; - uint32_t seg_len; - uint16_t status; -@@ -5128,7 +5129,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, - static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, - uint64_t off, NvmeRequest *req) - { -- uint32_t nslist[1024]; -+ uint32_t nslist[1024] = {}; - uint32_t trans_len; - int i = 0; - uint32_t nsid; -@@ -5138,7 +5139,6 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, - return NVME_INVALID_FIELD | NVME_DNR; - } - -- memset(nslist, 0x0, sizeof(nslist)); - trans_len = MIN(sizeof(nslist) - off, buf_len); - - while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) != --- -2.39.3 - diff --git a/kvm-hw-ppc-pnv_occ-skip-automatic-zero-init-of-large-str.patch b/kvm-hw-ppc-pnv_occ-skip-automatic-zero-init-of-large-str.patch deleted file mode 100644 index b03d925..0000000 --- a/kvm-hw-ppc-pnv_occ-skip-automatic-zero-init-of-large-str.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 71d1656aa3a7ea9b0bb7749212246f3dc8382534 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:37:02 +0100 -Subject: [PATCH 32/43] hw/ppc/pnv_occ: skip automatic zero-init of large - struct -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [24/31] c6db01eff71723d490feafb993764d76aa13e3da (stefanha/centos-stream-qemu-kvm) - -The 'occ_model_tick' method has a 12k struct used for copying -data between guest and host. Skip the automatic zero-init of this -struct to eliminate the performance overhead in the I/O hot path. - -The 'dynamic_data' buffer will be fully initialized when reading -data from the guest. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Klaus Jensen -Reviewed-by: Harsh Prateek Bora -Message-id: 20250610123709.835102-25-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 3438eabaf4f8ae58b6c47f1727938d1d7dac4823) -Signed-off-by: Stefan Hajnoczi ---- - hw/ppc/pnv_occ.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c -index 177c5e514b..577af71d43 100644 ---- a/hw/ppc/pnv_occ.c -+++ b/hw/ppc/pnv_occ.c -@@ -790,7 +790,7 @@ static bool occ_opal_process_command(PnvOCC *occ, - - static bool occ_model_tick(PnvOCC *occ) - { -- struct occ_dynamic_data dynamic_data; -+ QEMU_UNINITIALIZED struct occ_dynamic_data dynamic_data; - - if (!occ_read_dynamic_data(occ, &dynamic_data, NULL)) { - /* Can't move OCC state field to safe because we can't map it! */ --- -2.39.3 - diff --git a/kvm-hw-ppc-spapr_tpm_proxy-skip-automatic-zero-init-of-l.patch b/kvm-hw-ppc-spapr_tpm_proxy-skip-automatic-zero-init-of-l.patch deleted file mode 100644 index 468e38c..0000000 --- a/kvm-hw-ppc-spapr_tpm_proxy-skip-automatic-zero-init-of-l.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 3fafd694cf5df4fa7a9f4f48aad86748d18bd15a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:37:03 +0100 -Subject: [PATCH 33/43] hw/ppc/spapr_tpm_proxy: skip automatic zero-init of - large arrays -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [25/31] fe7e91a1f74a696e03336cf3429042681c77c4c7 (stefanha/centos-stream-qemu-kvm) - -The 'tpm_execute' method has a pair of 4k arrays used for copying -data between guest and host. Skip the automatic zero-init of these -arrays to eliminate the performance overhead in the I/O hot path. - -The two arrays will be fully initialized when reading data from -guest memory or reading data from the proxy FD. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Klaus Jensen -Reviewed-by: Harsh Prateek Bora -Message-id: 20250610123709.835102-26-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 5dd9087fff74b5672526cad254e76f790fb35c7a) -Signed-off-by: Stefan Hajnoczi ---- - hw/ppc/spapr_tpm_proxy.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr_tpm_proxy.c b/hw/ppc/spapr_tpm_proxy.c -index ceaa0acaa1..7472ff8709 100644 ---- a/hw/ppc/spapr_tpm_proxy.c -+++ b/hw/ppc/spapr_tpm_proxy.c -@@ -41,8 +41,8 @@ static ssize_t tpm_execute(SpaprTpmProxy *tpm_proxy, target_ulong *args) - target_ulong data_in_size = args[2]; - uint64_t data_out = ppc64_phys_to_real(args[3]); - target_ulong data_out_size = args[4]; -- uint8_t buf_in[TPM_SPAPR_BUFSIZE]; -- uint8_t buf_out[TPM_SPAPR_BUFSIZE]; -+ QEMU_UNINITIALIZED uint8_t buf_in[TPM_SPAPR_BUFSIZE]; -+ QEMU_UNINITIALIZED uint8_t buf_out[TPM_SPAPR_BUFSIZE]; - ssize_t ret; - - trace_spapr_tpm_execute(data_in, data_in_size, data_out, data_out_size); --- -2.39.3 - diff --git a/kvm-hw-s390x-ccw-device-Fix-memory-leak-in-loadparm-sett.patch b/kvm-hw-s390x-ccw-device-Fix-memory-leak-in-loadparm-sett.patch deleted file mode 100644 index 10fc8c4..0000000 --- a/kvm-hw-s390x-ccw-device-Fix-memory-leak-in-loadparm-sett.patch +++ /dev/null @@ -1,47 +0,0 @@ -From fee9f1ed047043035ce91284fe0f0feffb27d3af Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 25 Jun 2025 10:27:51 +0200 -Subject: [PATCH 02/43] hw/s390x/ccw-device: Fix memory leak in loadparm setter -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 386: s390x: Fix memory leaks related to loadparm [rhel-10] -RH-Jira: RHEL-98555 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Kevin Wolf -RH-Commit: [2/2] f238e2b4819d7e5daf53df3f2eed8744cf534c02 (thuth/qemu-kvm-cs) - -Commit bdf12f2a fixed the setter for the "loadparm" machine property, -which gets a string from a visitor, passes it to s390_ipl_fmt_loadparm() -and then forgot to free it. It left another instance of the same problem -unfixed in the "loadparm" device property. Fix it. - -Signed-off-by: Kevin Wolf -Message-ID: <20250625082751.24896-1-kwolf@redhat.com> -Reviewed-by: Eric Farman -Reviewed-by: Halil Pasic -Tested-by: Thomas Huth -Signed-off-by: Thomas Huth -(cherry picked from commit 78e3781541209b3dcd6f4bb66adf3a3e504b88a4) ---- - hw/s390x/ccw-device.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c -index 1ea9934f6c..a5ee9dc84d 100644 ---- a/hw/s390x/ccw-device.c -+++ b/hw/s390x/ccw-device.c -@@ -57,7 +57,7 @@ static void ccw_device_set_loadparm(Object *obj, Visitor *v, - Error **errp) - { - CcwDevice *dev = CCW_DEVICE(obj); -- char *val; -+ g_autofree char *val = NULL; - int index; - - index = object_property_get_int(obj, "bootindex", NULL); --- -2.39.3 - diff --git a/kvm-hw-scsi-lsi53c895a-skip-automatic-zero-init-of-large.patch b/kvm-hw-scsi-lsi53c895a-skip-automatic-zero-init-of-large.patch deleted file mode 100644 index 38b1bc2..0000000 --- a/kvm-hw-scsi-lsi53c895a-skip-automatic-zero-init-of-large.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 6a11d5845b62f4f1a1c481b9be33aae9acded335 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:37:05 +0100 -Subject: [PATCH 35/43] hw/scsi/lsi53c895a: skip automatic zero-init of large - array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [27/31] d54767678b4bd133b69cc7461220121eee04c9bb (stefanha/centos-stream-qemu-kvm) - -The 'lsi_memcpy' method has a 4k byte array used for copying data -to/from the device. Skip the automatic zero-init of this array to -eliminate the performance overhead in the I/O hot path. - -The 'buf' array will be fully initialized when data is copied. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Klaus Jensen -Reviewed-by: Harsh Prateek Bora -Message-id: 20250610123709.835102-28-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 55243edf42ee87bce9f36ca251f3ab9cda1563e4) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/lsi53c895a.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index 6689ebba25..bacc6593f6 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -1112,7 +1112,7 @@ bad: - static void lsi_memcpy(LSIState *s, uint32_t dest, uint32_t src, int count) - { - int n; -- uint8_t buf[LSI_BUF_SIZE]; -+ QEMU_UNINITIALIZED uint8_t buf[LSI_BUF_SIZE]; - - trace_lsi_memcpy(dest, src, count); - while (count) { --- -2.39.3 - diff --git a/kvm-hw-scsi-megasas-skip-automatic-zero-init-of-large-ar.patch b/kvm-hw-scsi-megasas-skip-automatic-zero-init-of-large-ar.patch deleted file mode 100644 index 98ca107..0000000 --- a/kvm-hw-scsi-megasas-skip-automatic-zero-init-of-large-ar.patch +++ /dev/null @@ -1,73 +0,0 @@ -From e1ca896e3388b7a92e3f65c37c7d0f7a9b4679a0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:37:06 +0100 -Subject: [PATCH 36/43] hw/scsi/megasas: skip automatic zero-init of large - arrays -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [28/31] 2123323efa4331a9899c62af9edeeac388f09fc4 (stefanha/centos-stream-qemu-kvm) - -The 'megasas_dcmd_pd_get_list' and 'megasas_dcmd_get_properties' -methods have 4k structs used for copying data from the device. -Skip the automatic zero-init of this array to eliminate the -performance overhead in the I/O hot path. - -The 'info' structs are manually initialized with memset(). The -compiler ought to be intelligent enough to turn the memset() -into a static initialization operation, and thus not duplicate -the automatic zero-init. Replacing memset() with '{}' makes it -unambiguous that the arrays are statically initialized. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Klaus Jensen -Reviewed-by: Harsh Prateek Bora -Message-id: 20250610123709.835102-29-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit ca0559e2350c618048f7caf80cb79c1259e7cfd2) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/megasas.c | 7 ++----- - 1 file changed, 2 insertions(+), 5 deletions(-) - -diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c -index 9f3b30e6ce..79b0fdcfe3 100644 ---- a/hw/scsi/megasas.c -+++ b/hw/scsi/megasas.c -@@ -981,13 +981,11 @@ static int megasas_event_wait(MegasasState *s, MegasasCmd *cmd) - - static int megasas_dcmd_pd_get_list(MegasasState *s, MegasasCmd *cmd) - { -- struct mfi_pd_list info; -- size_t dcmd_size = sizeof(info); -+ struct mfi_pd_list info = {}; - BusChild *kid; - uint32_t offset, dcmd_limit, num_pd_disks = 0, max_pd_disks; - dma_addr_t residual; - -- memset(&info, 0, dcmd_size); - offset = 8; - dcmd_limit = offset + sizeof(struct mfi_pd_address); - if (cmd->iov_size < dcmd_limit) { -@@ -1429,11 +1427,10 @@ static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd) - - static int megasas_dcmd_get_properties(MegasasState *s, MegasasCmd *cmd) - { -- struct mfi_ctrl_props info; -+ struct mfi_ctrl_props info = {}; - size_t dcmd_size = sizeof(info); - dma_addr_t residual; - -- memset(&info, 0x0, dcmd_size); - if (cmd->iov_size < dcmd_size) { - trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, - dcmd_size); --- -2.39.3 - diff --git a/kvm-hw-ufs-lu-skip-automatic-zero-init-of-large-array.patch b/kvm-hw-ufs-lu-skip-automatic-zero-init-of-large-array.patch deleted file mode 100644 index 430b484..0000000 --- a/kvm-hw-ufs-lu-skip-automatic-zero-init-of-large-array.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b6904ecdbd09b38339963465dd9bd81c25acf5cc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:37:07 +0100 -Subject: [PATCH 37/43] hw/ufs/lu: skip automatic zero-init of large array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [29/31] 9b4da8039e1856614cd65981c2ec0a4b59c4e379 (stefanha/centos-stream-qemu-kvm) - -The 'ufs_emulate_scsi_cmd' method has a 4k byte array used for -copying data from the device. Skip the automatic zero-init of -this array to eliminate the performance overhead in the I/O hot -path. - -The 'outbuf' array will be fully initialized when data is copied -from the guest. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Klaus Jensen -Reviewed-by: Harsh Prateek Bora -Message-id: 20250610123709.835102-30-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 7708e298180550eac262c1fd742e6e80c711a5d8) -Signed-off-by: Stefan Hajnoczi ---- - hw/ufs/lu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/ufs/lu.c b/hw/ufs/lu.c -index 1c3794b2d4..63e482bf83 100644 ---- a/hw/ufs/lu.c -+++ b/hw/ufs/lu.c -@@ -194,7 +194,7 @@ static int ufs_emulate_wlun_inquiry(UfsRequest *req, uint8_t *outbuf, - static UfsReqResult ufs_emulate_scsi_cmd(UfsLu *lu, UfsRequest *req) - { - uint8_t lun = lu->lun; -- uint8_t outbuf[4096]; -+ QEMU_UNINITIALIZED uint8_t outbuf[4096]; - uint8_t sense_buf[UFS_SENSE_SIZE]; - uint8_t scsi_status; - int len = 0; --- -2.39.3 - diff --git a/kvm-hw-usb-hcd-ohci-skip-automatic-zero-init-of-large-ar.patch b/kvm-hw-usb-hcd-ohci-skip-automatic-zero-init-of-large-ar.patch deleted file mode 100644 index 4d09fdf..0000000 --- a/kvm-hw-usb-hcd-ohci-skip-automatic-zero-init-of-large-ar.patch +++ /dev/null @@ -1,50 +0,0 @@ -From d906c6d5ce5b69083890f4ceea4136e736704aab Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:37:04 +0100 -Subject: [PATCH 34/43] hw/usb/hcd-ohci: skip automatic zero-init of large - array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [26/31] 4a8000f1c5065bfc95318615952c62f7070500d9 (stefanha/centos-stream-qemu-kvm) - -The 'ohci_service_iso_td' method has a 8k byte array used for copying -data between guest and host. Skip the automatic zero-init of this -array to eliminate the performance overhead in the I/O hot path. - -The 'buf' array will be fully initialized when reading data from guest -memory. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Klaus Jensen -Reviewed-by: Harsh Prateek Bora -Message-id: 20250610123709.835102-27-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 14997d521d1cd0bb36c902ef1032f0d3f2a3c912) -Signed-off-by: Stefan Hajnoczi ---- - hw/usb/hcd-ohci.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c -index 71b54914d3..72a9f9f474 100644 ---- a/hw/usb/hcd-ohci.c -+++ b/hw/usb/hcd-ohci.c -@@ -577,7 +577,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed) - USBDevice *dev; - USBEndpoint *ep; - USBPacket *pkt; -- uint8_t buf[8192]; -+ QEMU_UNINITIALIZED uint8_t buf[8192]; - bool int_req; - struct ohci_iso_td iso_td; - uint32_t addr; --- -2.39.3 - diff --git a/kvm-hw-virtio-virtio-avoid-cost-of-ftrivial-auto-var-ini.patch b/kvm-hw-virtio-virtio-avoid-cost-of-ftrivial-auto-var-ini.patch deleted file mode 100644 index 85fb74b..0000000 --- a/kvm-hw-virtio-virtio-avoid-cost-of-ftrivial-auto-var-ini.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 6063d3fedb2767f1a4d668828f49ef8505fa54f3 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 10 Jun 2025 13:36:40 +0100 -Subject: [PATCH 10/43] hw/virtio/virtio: avoid cost of -ftrivial-auto-var-init - in hot path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/31] 12f251ba363203812e46f268a6f46ccfdbf21b41 (stefanha/centos-stream-qemu-kvm) - -Since commit 7ff9ff039380 ("meson: mitigate against use of uninitialize -stack for exploits") the -ftrivial-auto-var-init=zero compiler option is -used to zero local variables. While this reduces security risks -associated with uninitialized stack data, it introduced a measurable -bottleneck in the virtqueue_split_pop() and virtqueue_packed_pop() -functions. - -These virtqueue functions are in the hot path. They are called for each -element (request) that is popped from a VIRTIO device's virtqueue. Using -__attribute__((uninitialized)) on large stack variables in these -functions improves fio randread bs=4k iodepth=64 performance from 304k -to 332k IOPS (+9%). - -This issue was found using perf-top(1). virtqueue_split_pop() was one of -the top CPU consumers and the "annotate" feature showed that the memory -zeroing instructions at the beginning of the functions were hot. - -Fixes: 7ff9ff039380 ("meson: mitigate against use of uninitialize stack for exploits") -Cc: Daniel P. Berrangé -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Stefan Hajnoczi -Message-id: 20250610123709.835102-3-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit ba2868ce091cd4abe4be6de4b7e44b3be303b352) -Signed-off-by: Stefan Hajnoczi ---- - hw/virtio/virtio.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 85110bce37..f41a418da3 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -1680,8 +1680,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) - VirtIODevice *vdev = vq->vdev; - VirtQueueElement *elem = NULL; - unsigned out_num, in_num, elem_entries; -- hwaddr addr[VIRTQUEUE_MAX_SIZE]; -- struct iovec iov[VIRTQUEUE_MAX_SIZE]; -+ hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE]; -+ struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE]; - VRingDesc desc; - int rc; - -@@ -1826,8 +1826,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) - VirtIODevice *vdev = vq->vdev; - VirtQueueElement *elem = NULL; - unsigned out_num, in_num, elem_entries; -- hwaddr addr[VIRTQUEUE_MAX_SIZE]; -- struct iovec iov[VIRTQUEUE_MAX_SIZE]; -+ hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE]; -+ struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE]; - VRingPackedDesc desc; - uint16_t id; - int rc; --- -2.39.3 - diff --git a/kvm-include-qemu-compiler-add-QEMU_UNINITIALIZED-attribu.patch b/kvm-include-qemu-compiler-add-QEMU_UNINITIALIZED-attribu.patch deleted file mode 100644 index c5d1e4f..0000000 --- a/kvm-include-qemu-compiler-add-QEMU_UNINITIALIZED-attribu.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 0aea786f8ab4f7a4122bb2019ebe5136763c9609 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 10 Jun 2025 13:36:39 +0100 -Subject: [PATCH 09/43] include/qemu/compiler: add QEMU_UNINITIALIZED attribute - macro -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/31] ddf39e782add4426708480dfb78ebbd71fb777f0 (stefanha/centos-stream-qemu-kvm) - -The QEMU_UNINITIALIZED macro is to be used to skip the default compiler -variable initialization done by -ftrivial-auto-var-init=zero. - -Use this in cases where there a method in the device I/O path (or other -important hot paths), that has large variables on the stack. A rule of -thumb is that "large" means a method with 4kb data in the local stack -frame. Any variables which are KB in size, should be annotated with this -attribute, to pre-emptively eliminate any potential overhead from the -compiler zero'ing memory. - -Given that this turns off a security hardening feature, when using this -to flag variables, it is important that the code is double-checked to -ensure there is no possible use of uninitialized data in the method. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Daniel P. Berrangé -Message-id: 20250610123709.835102-2-berrange@redhat.com -[DB: split off patch & rewrite guidance on when to use the annotation] -Signed-off-by: Daniel P. Berrangé -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit c653b67d1863b7ebfa67f7c9f4aec209d7b5ced5) -Signed-off-by: Stefan Hajnoczi ---- - include/qemu/compiler.h | 20 ++++++++++++++++++++ - 1 file changed, 20 insertions(+) - -diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h -index 496dac5ac1..65b89958d3 100644 ---- a/include/qemu/compiler.h -+++ b/include/qemu/compiler.h -@@ -207,6 +207,26 @@ - # define QEMU_USED - #endif - -+/* -+ * Disable -ftrivial-auto-var-init on a local variable. -+ * -+ * Use this in cases where there a method in the device I/O path (or other -+ * important hot paths), that has large variables on the stack. A rule of -+ * thumb is that "large" means a method with 4kb data in the local stack -+ * frame. Any variables which are KB in size, should be annotated with this -+ * attribute, to pre-emptively eliminate any potential overhead from the -+ * compiler's implicit zero'ing of memory. -+ * -+ * Given that this turns off a security hardening feature, when using this -+ * to flag variables, it is important that the code is double-checked to -+ * ensure there is no possible use of uninitialized data in the method. -+ */ -+#if __has_attribute(uninitialized) -+# define QEMU_UNINITIALIZED __attribute__((uninitialized)) -+#else -+# define QEMU_UNINITIALIZED -+#endif -+ - /* - * http://clang.llvm.org/docs/ThreadSafetyAnalysis.html - * --- -2.39.3 - diff --git a/kvm-io-Fix-partial-struct-copy-in-qio_dns_resolver_looku.patch b/kvm-io-Fix-partial-struct-copy-in-qio_dns_resolver_looku.patch deleted file mode 100644 index d22cb95..0000000 --- a/kvm-io-Fix-partial-struct-copy-in-qio_dns_resolver_looku.patch +++ /dev/null @@ -1,73 +0,0 @@ -From dcf18bc367aac87def0b03e2f4450d14b6dd53a5 Mon Sep 17 00:00:00 2001 -From: Juraj Marcin -Date: Wed, 21 May 2025 15:52:30 +0200 -Subject: [PATCH 3/9] io: Fix partial struct copy in - qio_dns_resolver_lookup_sync_inet() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juraj Marcin -RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive -RH-Jira: RHEL-67706 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/7] 157425de9a5bcab4a63f84cceb35eb4954e7ed8b (JurajMarcin/centos-src-qemu-kvm) - -Commit aec21d3175 (qapi: Add InetSocketAddress member keep-alive) -introduces the keep-alive flag, but this flag is not copied together -with other options in qio_dns_resolver_lookup_sync_inet(). - -This patch fixes this issue and also prevents future ones by copying the -entire structure first and only then overriding a few attributes that -need to be different. - -Fixes: aec21d31756c (qapi: Add InetSocketAddress member keep-alive) -Signed-off-by: Juraj Marcin -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Daniel P. Berrangé - -(cherry picked from commit 0dc051aa85e1bd68d5c5110fa8af69204e6dbd3d) - -JIRA: https://issues.redhat.com/browse/RHEL-67706 - -Signed-off-by: Juraj Marcin ---- - io/dns-resolver.c | 21 +++++---------------- - 1 file changed, 5 insertions(+), 16 deletions(-) - -diff --git a/io/dns-resolver.c b/io/dns-resolver.c -index 53b0e8407a..3712438f82 100644 ---- a/io/dns-resolver.c -+++ b/io/dns-resolver.c -@@ -111,22 +111,11 @@ static int qio_dns_resolver_lookup_sync_inet(QIODNSResolver *resolver, - uaddr, INET6_ADDRSTRLEN, uport, 32, - NI_NUMERICHOST | NI_NUMERICSERV); - -- newaddr->u.inet = (InetSocketAddress){ -- .host = g_strdup(uaddr), -- .port = g_strdup(uport), -- .has_numeric = true, -- .numeric = true, -- .has_to = iaddr->has_to, -- .to = iaddr->to, -- .has_ipv4 = iaddr->has_ipv4, -- .ipv4 = iaddr->ipv4, -- .has_ipv6 = iaddr->has_ipv6, -- .ipv6 = iaddr->ipv6, --#ifdef HAVE_IPPROTO_MPTCP -- .has_mptcp = iaddr->has_mptcp, -- .mptcp = iaddr->mptcp, --#endif -- }; -+ newaddr->u.inet = *iaddr; -+ newaddr->u.inet.host = g_strdup(uaddr), -+ newaddr->u.inet.port = g_strdup(uport), -+ newaddr->u.inet.has_numeric = true, -+ newaddr->u.inet.numeric = true, - - (*addrs)[i] = newaddr; - } --- -2.39.3 - diff --git a/kvm-iotests-Improve-iotest-194-to-mirror-data.patch b/kvm-iotests-Improve-iotest-194-to-mirror-data.patch deleted file mode 100644 index caeec40..0000000 --- a/kvm-iotests-Improve-iotest-194-to-mirror-data.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 11b46a271d73631177f59ff581a408f967c30fb9 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:22 -0500 -Subject: [PATCH 05/14] iotests: Improve iotest 194 to mirror data - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/14] 9f1fd3c7d4332ac310af4eb37e8f2122f6324294 (ebblake/centos-qemu-kvm) - -Mirroring a completely sparse image to a sparse destination should be -practically instantaneous. It isn't yet, but the test will be more -realistic if it has some non-zero to mirror as well as the holes. - -Signed-off-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-ID: <20250509204341.3553601-20-eblake@redhat.com> -(cherry picked from commit eb89627899bb84148d272394e885725eff456ae9) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - tests/qemu-iotests/194 | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tests/qemu-iotests/194 b/tests/qemu-iotests/194 -index c0ce82dd25..d0b9c084f5 100755 ---- a/tests/qemu-iotests/194 -+++ b/tests/qemu-iotests/194 -@@ -34,6 +34,7 @@ with iotests.FilePath('source.img') as source_img_path, \ - - img_size = '1G' - iotests.qemu_img_create('-f', iotests.imgfmt, source_img_path, img_size) -+ iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write 512M 1M', source_img_path) - iotests.qemu_img_create('-f', iotests.imgfmt, dest_img_path, img_size) - - iotests.log('Launching VMs...') --- -2.39.3 - diff --git a/kvm-iotests-common.rc-add-disk_usage-function.patch b/kvm-iotests-common.rc-add-disk_usage-function.patch deleted file mode 100644 index 02fc97a..0000000 --- a/kvm-iotests-common.rc-add-disk_usage-function.patch +++ /dev/null @@ -1,68 +0,0 @@ -From d8ed5039981b1eb81d229d8ee672d5ee28862e92 Mon Sep 17 00:00:00 2001 -From: Andrey Drobyshev -Date: Fri, 9 May 2025 15:40:29 -0500 -Subject: [PATCH 12/14] iotests/common.rc: add disk_usage function - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/14] 0a007f9d09f01b50cf4edeb8ac8217356b2cb5d2 (ebblake/centos-qemu-kvm) - -Move the definition from iotests/250 to common.rc. This is used to -detect real disk usage of sparse files. In particular, we want to use -it for checking subclusters-based discards. - -Signed-off-by: Andrey Drobyshev -Reviewed-by: Alexander Ivanov -Reviewed-by: Alberto Garcia -Message-ID: <20240913163942.423050-6-andrey.drobyshev@virtuozzo.com> -Signed-off-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-ID: <20250509204341.3553601-27-eblake@redhat.com> -(cherry picked from commit be9bac072ede6e6aa27079f59efcf17b56bd7b26) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - tests/qemu-iotests/250 | 5 ----- - tests/qemu-iotests/common.rc | 6 ++++++ - 2 files changed, 6 insertions(+), 5 deletions(-) - -diff --git a/tests/qemu-iotests/250 b/tests/qemu-iotests/250 -index af48f83aba..c0a0dbc0ff 100755 ---- a/tests/qemu-iotests/250 -+++ b/tests/qemu-iotests/250 -@@ -52,11 +52,6 @@ _unsupported_imgopts data_file - # bdrv_co_truncate(bs->file) call in qcow2_co_truncate(), which might succeed - # anyway. - --disk_usage() --{ -- du --block-size=1 $1 | awk '{print $1}' --} -- - size=2100M - - _make_test_img -o "cluster_size=1M,preallocation=metadata" $size -diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc -index 95c12577dd..237f746af8 100644 ---- a/tests/qemu-iotests/common.rc -+++ b/tests/qemu-iotests/common.rc -@@ -140,6 +140,12 @@ _optstr_add() - fi - } - -+# report real disk usage for sparse files -+disk_usage() -+{ -+ du --block-size=1 "$1" | awk '{print $1}' -+} -+ - # Set the variables to the empty string to turn Valgrind off - # for specific processes, e.g. - # $ VALGRIND_QEMU_IO= ./check -qcow2 -valgrind 015 --- -2.39.3 - diff --git a/kvm-iotests-graph-changes-while-io-add-test-case-with-re.patch b/kvm-iotests-graph-changes-while-io-add-test-case-with-re.patch deleted file mode 100644 index 4ec2cc5..0000000 --- a/kvm-iotests-graph-changes-while-io-add-test-case-with-re.patch +++ /dev/null @@ -1,176 +0,0 @@ -From 19aa4d70aa02db7183997cfb2e6086a125ee2cdd Mon Sep 17 00:00:00 2001 -From: Andrey Drobyshev -Date: Fri, 30 May 2025 17:10:58 +0200 -Subject: [PATCH 33/33] iotests/graph-changes-while-io: add test case with - removal of lower snapshot - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [21/21] 2382d5d35e99a8d2dc481e1ddf89b475d13a620f (kmwolf/centos-qemu-kvm) - -This case is catching potential deadlock which takes place when job-dismiss -is issued when I/O requests are processed in a separate iothread. - -See https://mail.gnu.org/archive/html/qemu-devel/2025-04/msg04421.html - -Signed-off-by: Andrey Drobyshev -[FE: re-use top image and rename snap1->mid as suggested by Kevin Wolf - remove image file after test as suggested by Kevin Wolf - add type annotation for function argument to make mypy happy] -Signed-off-by: Fiona Ebner -Message-ID: <20250530151125.955508-22-f.ebner@proxmox.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 09d98a018e1fd2db0bb73bbe9b4a7110c8ae354f) -Signed-off-by: Kevin Wolf ---- - .../qemu-iotests/tests/graph-changes-while-io | 101 ++++++++++++++++-- - .../tests/graph-changes-while-io.out | 4 +- - 2 files changed, 96 insertions(+), 9 deletions(-) - -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io -index 35489e3b5e..dca1167b6d 100755 ---- a/tests/qemu-iotests/tests/graph-changes-while-io -+++ b/tests/qemu-iotests/tests/graph-changes-while-io -@@ -27,6 +27,7 @@ from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ - - - top = os.path.join(iotests.test_dir, 'top.img') -+mid = os.path.join(iotests.test_dir, 'mid.img') - nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') - - -@@ -59,6 +60,15 @@ class TestGraphChangesWhileIO(QMPTestCase): - self.qsd.stop() - os.remove(top) - -+ def _wait_for_blockjob(self, status: str) -> None: -+ done = False -+ while not done: -+ for event in self.qsd.get_qmp().get_events(wait=10.0): -+ if event['event'] != 'JOB_STATUS_CHANGE': -+ continue -+ if event['data']['status'] == status: -+ done = True -+ - def test_blockdev_add_while_io(self) -> None: - # Run qemu-img bench in the background - bench_thr = Thread(target=do_qemu_img_bench) -@@ -117,15 +127,92 @@ class TestGraphChangesWhileIO(QMPTestCase): - 'device': 'job0', - }) - -- cancelled = False -- while not cancelled: -- for event in self.qsd.get_qmp().get_events(wait=10.0): -- if event['event'] != 'JOB_STATUS_CHANGE': -- continue -- if event['data']['status'] == 'null': -- cancelled = True -+ self._wait_for_blockjob('null') -+ -+ bench_thr.join() -+ -+ def test_remove_lower_snapshot_while_io(self) -> None: -+ # Run qemu-img bench in the background -+ bench_thr = Thread(target=do_qemu_img_bench, args=(100000, )) -+ bench_thr.start() -+ -+ # While I/O is performed on 'node0' node, consequently add 2 snapshots -+ # on top of it, then remove (commit) them starting from lower one. -+ while bench_thr.is_alive(): -+ # Recreate snapshot images on every iteration -+ qemu_img_create('-f', imgfmt, mid, '1G') -+ qemu_img_create('-f', imgfmt, top, '1G') -+ -+ self.qsd.cmd('blockdev-add', { -+ 'driver': imgfmt, -+ 'node-name': 'mid', -+ 'file': { -+ 'driver': 'file', -+ 'filename': mid -+ } -+ }) -+ -+ self.qsd.cmd('blockdev-snapshot', { -+ 'node': 'node0', -+ 'overlay': 'mid', -+ }) -+ -+ self.qsd.cmd('blockdev-add', { -+ 'driver': imgfmt, -+ 'node-name': 'top', -+ 'file': { -+ 'driver': 'file', -+ 'filename': top -+ } -+ }) -+ -+ self.qsd.cmd('blockdev-snapshot', { -+ 'node': 'mid', -+ 'overlay': 'top', -+ }) -+ -+ self.qsd.cmd('block-commit', { -+ 'job-id': 'commit-mid', -+ 'device': 'top', -+ 'top-node': 'mid', -+ 'base-node': 'node0', -+ 'auto-finalize': True, -+ 'auto-dismiss': False, -+ }) -+ -+ self._wait_for_blockjob('concluded') -+ self.qsd.cmd('job-dismiss', { -+ 'id': 'commit-mid', -+ }) -+ -+ self.qsd.cmd('block-commit', { -+ 'job-id': 'commit-top', -+ 'device': 'top', -+ 'top-node': 'top', -+ 'base-node': 'node0', -+ 'auto-finalize': True, -+ 'auto-dismiss': False, -+ }) -+ -+ self._wait_for_blockjob('ready') -+ self.qsd.cmd('job-complete', { -+ 'id': 'commit-top', -+ }) -+ -+ self._wait_for_blockjob('concluded') -+ self.qsd.cmd('job-dismiss', { -+ 'id': 'commit-top', -+ }) -+ -+ self.qsd.cmd('blockdev-del', { -+ 'node-name': 'mid' -+ }) -+ self.qsd.cmd('blockdev-del', { -+ 'node-name': 'top' -+ }) - - bench_thr.join() -+ os.remove(mid) - - if __name__ == '__main__': - # Format must support raw backing files -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out -index fbc63e62f8..8d7e996700 100644 ---- a/tests/qemu-iotests/tests/graph-changes-while-io.out -+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out -@@ -1,5 +1,5 @@ --.. -+... - ---------------------------------------------------------------------- --Ran 2 tests -+Ran 3 tests - - OK --- -2.39.3 - diff --git a/kvm-iotests-graph-changes-while-io-remove-image-file-aft.patch b/kvm-iotests-graph-changes-while-io-remove-image-file-aft.patch deleted file mode 100644 index af0c52f..0000000 --- a/kvm-iotests-graph-changes-while-io-remove-image-file-aft.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 727fb4bf3409e170fbab697981f5f57b6ee2f93b Mon Sep 17 00:00:00 2001 -From: Fiona Ebner -Date: Fri, 30 May 2025 17:10:57 +0200 -Subject: [PATCH 32/33] iotests/graph-changes-while-io: remove image file after - test - -RH-Author: Kevin Wolf -RH-MergeRequest: 393: block: do not drain while holding the graph lock -RH-Jira: RHEL-88561 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Czenczek -RH-Commit: [20/21] b7c8fe57ca9058829b6c959ca2305420261d99f5 (kmwolf/centos-qemu-kvm) - -Suggested-by: Kevin Wolf -Signed-off-by: Fiona Ebner -Message-ID: <20250530151125.955508-21-f.ebner@proxmox.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit ed8c62927e8facebb1e41b417daee3109e398712) -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/tests/graph-changes-while-io | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io -index 194fda500e..35489e3b5e 100755 ---- a/tests/qemu-iotests/tests/graph-changes-while-io -+++ b/tests/qemu-iotests/tests/graph-changes-while-io -@@ -57,6 +57,7 @@ class TestGraphChangesWhileIO(QMPTestCase): - - def tearDown(self) -> None: - self.qsd.stop() -+ os.remove(top) - - def test_blockdev_add_while_io(self) -> None: - # Run qemu-img bench in the background --- -2.39.3 - diff --git a/kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch b/kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch deleted file mode 100644 index c2fc552..0000000 --- a/kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 8227a9534bb09d202441b3e554da53815be66a28 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Fri, 25 Apr 2025 13:17:12 +0100 -Subject: [PATCH 2/4] meson/configure: add 'valgrind' option & --{en, - dis}able-valgrind flag -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 360: distro: add an explicit valgrind-devel build dep -RH-Jira: RHEL-88457 -RH-Acked-by: Thomas Huth -RH-Acked-by: Eric Blake -RH-Commit: [1/2] 55ab738650e95ff0e951897001d9246a725ee699 (berrange/centos-src-qemu) - -Currently valgrind debugging support for coroutine stacks is enabled -unconditionally when valgrind/valgrind.h is found. There is no way -to disable valgrind support if valgrind.h is present in the build env. - -This is bad for distros, as an dependency far down the chain may cause -valgrind.h to become installed, inadvertently enabling QEMU's valgrind -debugging support. It also means if a distro wants valgrind support -there is no way to mandate this. - -The solution is to add a 'valgrind' build feature to meson and thus -configure script. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Thomas Huth -Message-ID: <20250425121713.1913424-1-berrange@redhat.com> -Signed-off-by: Thomas Huth -(cherry picked from commit 6b1c744ec0d66d6d568f9a156282153fc11a21cf) ---- - meson.build | 13 ++++++++++++- - meson_options.txt | 2 ++ - scripts/meson-buildoptions.sh | 3 +++ - 3 files changed, 17 insertions(+), 1 deletion(-) - -diff --git a/meson.build b/meson.build -index 0607c1313b..dadd47d362 100644 ---- a/meson.build -+++ b/meson.build -@@ -2617,7 +2617,17 @@ config_host_data.set('CONFIG_FSTRIM', qga_fstrim) - # has_header - config_host_data.set('CONFIG_EPOLL', cc.has_header('sys/epoll.h')) - config_host_data.set('CONFIG_LINUX_MAGIC_H', cc.has_header('linux/magic.h')) --config_host_data.set('CONFIG_VALGRIND_H', cc.has_header('valgrind/valgrind.h')) -+valgrind = false -+if get_option('valgrind').allowed() -+ if cc.has_header('valgrind/valgrind.h') -+ valgrind = true -+ else -+ if get_option('valgrind').enabled() -+ error('valgrind requested but valgrind.h not found') -+ endif -+ endif -+endif -+config_host_data.set('CONFIG_VALGRIND_H', valgrind) - config_host_data.set('HAVE_BTRFS_H', cc.has_header('linux/btrfs.h')) - config_host_data.set('HAVE_DRM_H', cc.has_header('libdrm/drm.h')) - config_host_data.set('HAVE_OPENAT2_H', cc.has_header('linux/openat2.h')) -@@ -4856,6 +4866,7 @@ endif - if host_os == 'darwin' - summary_info += {'ParavirtualizedGraphics support': pvg} - endif -+summary_info += {'valgrind': valgrind} - summary(summary_info, bool_yn: true, section: 'Dependencies') - - if host_arch == 'unknown' -diff --git a/meson_options.txt b/meson_options.txt -index ad6996178c..6592d9fb07 100644 ---- a/meson_options.txt -+++ b/meson_options.txt -@@ -117,6 +117,8 @@ option('dbus_display', type: 'feature', value: 'auto', - description: '-display dbus support') - option('tpm', type : 'feature', value : 'auto', - description: 'TPM support') -+option('valgrind', type : 'feature', value: 'auto', -+ description: 'valgrind debug support for coroutine stacks') - - # Do not enable it by default even for Mingw32, because it doesn't - # work on Wine. -diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh -index e9edc8a919..541e1e7a2f 100644 ---- a/scripts/meson-buildoptions.sh -+++ b/scripts/meson-buildoptions.sh -@@ -199,6 +199,7 @@ meson_options_help() { - printf "%s\n" ' u2f U2F emulation support' - printf "%s\n" ' uadk UADK Library support' - printf "%s\n" ' usb-redir libusbredir support' -+ printf "%s\n" ' valgrind valgrind debug support for coroutine stacks' - printf "%s\n" ' vde vde network backend support' - printf "%s\n" ' vdi vdi image format support' - printf "%s\n" ' vduse-blk-export' -@@ -528,6 +529,8 @@ _meson_option_parse() { - --disable-ubsan) printf "%s" -Dubsan=false ;; - --enable-usb-redir) printf "%s" -Dusb_redir=enabled ;; - --disable-usb-redir) printf "%s" -Dusb_redir=disabled ;; -+ --enable-valgrind) printf "%s" -Dvalgrind=enabled ;; -+ --disable-valgrind) printf "%s" -Dvalgrind=disabled ;; - --enable-vde) printf "%s" -Dvde=enabled ;; - --disable-vde) printf "%s" -Dvde=disabled ;; - --enable-vdi) printf "%s" -Dvdi=enabled ;; --- -2.39.3 - diff --git a/kvm-migration-Add-qtest-for-migration-over-RDMA.patch b/kvm-migration-Add-qtest-for-migration-over-RDMA.patch deleted file mode 100644 index c6b3a73..0000000 --- a/kvm-migration-Add-qtest-for-migration-over-RDMA.patch +++ /dev/null @@ -1,222 +0,0 @@ -From a408d755e0c764f80c8dc50942c9d74e4458cf98 Mon Sep 17 00:00:00 2001 -From: Li Zhijian -Date: Tue, 11 Mar 2025 10:42:21 +0800 -Subject: [PATCH 08/33] migration: Add qtest for migration over RDMA -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/11] bf9644e800b982d81d3b68f6d3951c207c66fc76 (pjp/cs-qemu-kvm) - -This qtest requires there is a RDMA(RoCE) link in the host. -In order to make the test work smoothly, introduce a -scripts/rdma-migration-helper.sh to detect existing RoCE link before -running the test. - -Test will be skipped if there is no available RoCE link. - # Start of rdma tests - # Running /x86_64/migration/precopy/rdma/plain - ok 1 /x86_64/migration/precopy/rdma/plain # SKIP No rdma link available - # To enable the test: - # Run 'scripts/rdma-migration-helper.sh setup' with root to setup a new rdma/rxe link and rerun the test - # Optional: run 'scripts/rdma-migration-helper.sh clean' to revert the 'setup' - - # End of rdma tests - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Cc: Philippe Mathieu-Daudé -Cc: Stefan Hajnoczi -Reviewed-by: Peter Xu -Signed-off-by: Li Zhijian -Message-ID: <20250311024221.363421-1-lizhijian@fujitsu.com> -[add 'head -1' to script, reformat test message] -Signed-off-by: Fabiano Rosas -(cherry picked from commit 7d9849c3c41463ab9ba40348a8606927dc0fb85d) -Signed-off-by: Prasad Pandit ---- - MAINTAINERS | 1 + - scripts/rdma-migration-helper.sh | 70 +++++++++++++++++++++++++++ - tests/qtest/migration/precopy-tests.c | 66 +++++++++++++++++++++++++ - 3 files changed, 137 insertions(+) - create mode 100755 scripts/rdma-migration-helper.sh - -diff --git a/MAINTAINERS b/MAINTAINERS -index d54b5578f8..465aedbcfb 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -3516,6 +3516,7 @@ R: Li Zhijian - R: Peter Xu - S: Odd Fixes - F: migration/rdma* -+F: scripts/rdma-migration-helper.sh - - Migration dirty limit and dirty page rate - M: Hyman Huang -diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh -new file mode 100755 -index 0000000000..a39f2fb0e5 ---- /dev/null -+++ b/scripts/rdma-migration-helper.sh -@@ -0,0 +1,70 @@ -+#!/bin/bash -+ -+# Copied from blktests -+get_ipv4_addr() -+{ -+ ip -4 -o addr show dev "$1" | -+ sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' | -+ head -1 | tr -d '\n' -+} -+ -+# existing rdma interfaces -+rdma_interfaces() -+{ -+ rdma link show | sed -nE 's/^link .* netdev ([^ ]+).*$/\1 /p' -+} -+ -+# existing valid ipv4 interfaces -+ipv4_interfaces() -+{ -+ ip -o addr show | awk '/inet / {print $2}' | grep -v -w lo -+} -+ -+rdma_rxe_detect() -+{ -+ for r in $(rdma_interfaces) -+ do -+ ipv4_interfaces | grep -qw $r && get_ipv4_addr $r && return -+ done -+ -+ return 1 -+} -+ -+rdma_rxe_setup() -+{ -+ for i in $(ipv4_interfaces) -+ do -+ rdma_interfaces | grep -qw $i && continue -+ rdma link add "${i}_rxe" type rxe netdev "$i" && { -+ echo "Setup new rdma/rxe ${i}_rxe for $i with $(get_ipv4_addr $i)" -+ return -+ } -+ done -+ -+ echo "Failed to setup any new rdma/rxe link" >&2 -+ return 1 -+} -+ -+rdma_rxe_clean() -+{ -+ modprobe -r rdma_rxe -+} -+ -+operation=${1:-detect} -+ -+command -v rdma >/dev/null || { -+ echo "Command 'rdma' is not available, please install it first." >&2 -+ exit 1 -+} -+ -+if [ "$operation" == "setup" ] || [ "$operation" == "clean" ]; then -+ [ "$UID" == 0 ] || { -+ echo "Root privilege is required to setup/clean a rdma/rxe link" >&2 -+ exit 1 -+ } -+ rdma_rxe_"$operation" -+elif [ "$operation" == "detect" ]; then -+ rdma_rxe_detect -+else -+ echo "Usage: $0 [setup | detect | clean]" -+fi -diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c -index f8404793b8..87b0a7e8ef 100644 ---- a/tests/qtest/migration/precopy-tests.c -+++ b/tests/qtest/migration/precopy-tests.c -@@ -99,6 +99,68 @@ static void test_precopy_unix_dirty_ring(void) - test_precopy_common(&args); - } - -+#ifdef CONFIG_RDMA -+ -+#define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh" -+static int new_rdma_link(char *buffer) -+{ -+ char cmd[256]; -+ bool verbose = g_getenv("QTEST_LOG"); -+ -+ snprintf(cmd, sizeof(cmd), "%s detect %s", RDMA_MIGRATION_HELPER, -+ verbose ? "" : "2>/dev/null"); -+ -+ FILE *pipe = popen(cmd, "r"); -+ if (pipe == NULL) { -+ perror("Failed to run script"); -+ return -1; -+ } -+ -+ int idx = 0; -+ while (fgets(buffer + idx, 128 - idx, pipe) != NULL) { -+ idx += strlen(buffer); -+ } -+ -+ int status = pclose(pipe); -+ if (status == -1) { -+ perror("Error reported by pclose()"); -+ return -1; -+ } else if (WIFEXITED(status)) { -+ return WEXITSTATUS(status); -+ } -+ -+ return -1; -+} -+ -+static void test_precopy_rdma_plain(void) -+{ -+ char buffer[128] = {}; -+ -+ if (new_rdma_link(buffer)) { -+ g_test_skip("No rdma link available\n" -+ "# To enable the test:\n" -+ "# Run \'" RDMA_MIGRATION_HELPER " setup\' with root to " -+ "setup a new rdma/rxe link and rerun the test\n" -+ "# Optional: run 'scripts/rdma-migration-helper.sh clean' " -+ "to revert the 'setup'"); -+ return; -+ } -+ -+ /* -+ * TODO: query a free port instead of hard code. -+ * 29200=('R'+'D'+'M'+'A')*100 -+ **/ -+ g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer); -+ -+ MigrateCommon args = { -+ .listen_uri = uri, -+ .connect_uri = uri, -+ }; -+ -+ test_precopy_common(&args); -+} -+#endif -+ - static void test_precopy_tcp_plain(void) - { - MigrateCommon args = { -@@ -1127,6 +1189,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env) - test_multifd_tcp_uri_none); - migration_test_add("/migration/multifd/tcp/plain/cancel", - test_multifd_tcp_cancel); -+#ifdef CONFIG_RDMA -+ migration_test_add("/migration/precopy/rdma/plain", -+ test_precopy_rdma_plain); -+#endif - } - - void migration_test_add_precopy(MigrationTestEnv *env) --- -2.39.3 - diff --git a/kvm-migration-Add-save_postcopy_prepare-savevm-handler.patch b/kvm-migration-Add-save_postcopy_prepare-savevm-handler.patch deleted file mode 100644 index 861a15a..0000000 --- a/kvm-migration-Add-save_postcopy_prepare-savevm-handler.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 883ddd4af17376fc62bdee9f4b30dfaa45d0c968 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 11 Apr 2025 17:15:30 +0530 -Subject: [PATCH 03/33] migration: Add save_postcopy_prepare() savevm handler - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/11] 2f2f108041d921985e2ddce5fd8e805fc539e74f (pjp/cs-qemu-kvm) - -Add a savevm handler for a module to opt-in sending extra sections right -before postcopy starts, and before VM is stopped. - -RAM will start to use this new savevm handler in the next patch to do flush -and sync for multifd pages. - -Note that we choose to do it before VM stopped because the current only -potential user is not sensitive to VM status, so doing it before VM is -stopped is preferred to enlarge any postcopy downtime. - -It is still a bit unfortunate that we need to introduce such a new savevm -handler just for the only use case, however it's so far the cleanest. - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Signed-off-by: Peter Xu -Signed-off-by: Prasad Pandit -Reviewed-by: Fabiano Rosas -Message-ID: <20250411114534.3370816-4-ppandit@redhat.com> -Signed-off-by: Fabiano Rosas -(cherry picked from commit 1d481116015428c02f7e3635f9bc0b88b0978fdc) -Signed-off-by: Prasad Pandit ---- - include/migration/register.h | 15 +++++++++++++++ - migration/migration.c | 4 ++++ - migration/savevm.c | 33 +++++++++++++++++++++++++++++++++ - migration/savevm.h | 1 + - 4 files changed, 53 insertions(+) - -diff --git a/include/migration/register.h b/include/migration/register.h -index c041ce32f2..b79dc81b8d 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -189,6 +189,21 @@ typedef struct SaveVMHandlers { - - /* This runs outside the BQL! */ - -+ /** -+ * @save_postcopy_prepare -+ * -+ * This hook will be invoked on the source side right before switching -+ * to postcopy (before VM stopped). -+ * -+ * @f: QEMUFile where to send the data -+ * @opaque: Data pointer passed to register_savevm_live() -+ * @errp: Error** used to report error message -+ * -+ * Returns: true if succeeded, false if error occured. When false is -+ * returned, @errp must be set. -+ */ -+ bool (*save_postcopy_prepare)(QEMUFile *f, void *opaque, Error **errp); -+ - /** - * @state_pending_estimate - * -diff --git a/migration/migration.c b/migration/migration.c -index 64f4f40ae3..4bb29b7193 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2717,6 +2717,10 @@ static int postcopy_start(MigrationState *ms, Error **errp) - } - } - -+ if (!qemu_savevm_state_postcopy_prepare(ms->to_dst_file, errp)) { -+ return -1; -+ } -+ - trace_postcopy_start(); - bql_lock(); - trace_postcopy_start_set_run(); -diff --git a/migration/savevm.c b/migration/savevm.c -index ce158c3512..23ef4c7dc9 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1523,6 +1523,39 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f) - qemu_fflush(f); - } - -+bool qemu_savevm_state_postcopy_prepare(QEMUFile *f, Error **errp) -+{ -+ SaveStateEntry *se; -+ bool ret; -+ -+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { -+ if (!se->ops || !se->ops->save_postcopy_prepare) { -+ continue; -+ } -+ -+ if (se->ops->is_active) { -+ if (!se->ops->is_active(se->opaque)) { -+ continue; -+ } -+ } -+ -+ trace_savevm_section_start(se->idstr, se->section_id); -+ -+ save_section_header(f, se, QEMU_VM_SECTION_PART); -+ ret = se->ops->save_postcopy_prepare(f, se->opaque, errp); -+ save_section_footer(f, se); -+ -+ trace_savevm_section_end(se->idstr, se->section_id, ret); -+ -+ if (!ret) { -+ assert(*errp); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) - { - int64_t start_ts_each, end_ts_each; -diff --git a/migration/savevm.h b/migration/savevm.h -index 138c39a7f9..2d5e9c7166 100644 ---- a/migration/savevm.h -+++ b/migration/savevm.h -@@ -45,6 +45,7 @@ void qemu_savevm_state_pending_exact(uint64_t *must_precopy, - void qemu_savevm_state_pending_estimate(uint64_t *must_precopy, - uint64_t *can_postcopy); - int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy); -+bool qemu_savevm_state_postcopy_prepare(QEMUFile *f, Error **errp); - void qemu_savevm_send_ping(QEMUFile *f, uint32_t value); - void qemu_savevm_send_open_return_path(QEMUFile *f); - int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len); --- -2.39.3 - diff --git a/kvm-migration-enable-multifd-and-postcopy-together.patch b/kvm-migration-enable-multifd-and-postcopy-together.patch deleted file mode 100644 index 5b8a373..0000000 --- a/kvm-migration-enable-multifd-and-postcopy-together.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 12f7ba5e8b344e578dc99f5ce6e371d4c51108bb Mon Sep 17 00:00:00 2001 -From: Prasad Pandit -Date: Mon, 12 May 2025 18:21:23 +0530 -Subject: [PATCH 07/33] migration: enable multifd and postcopy together - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/11] 3e5d91a4aed5f18f1c29ae1ab9296ae41b6cf3ca (pjp/cs-qemu-kvm) - -Enable Multifd and Postcopy migration together. -The migration_ioc_process_incoming() routine checks -magic value sent on each channel and helps to properly -setup multifd and postcopy channels. - -The Precopy and Multifd threads work during the initial -guest RAM transfer. When migration moves to the Postcopy -phase, the multifd threads cease to send data on multifd -channels and Postcopy threads on the destination -request/pull data from the source side. - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Reviewed-by: Fabiano Rosas -Signed-off-by: Prasad Pandit -Link: https://lore.kernel.org/r/20250512125124.147064-3-ppandit@redhat.com -Signed-off-by: Peter Xu -(cherry picked from commit e27418861288285d20352448fef4491a68223d39) -Signed-off-by: Prasad Pandit ---- - migration/multifd-nocomp.c | 3 ++- - migration/multifd.c | 7 +++++++ - migration/options.c | 5 ----- - migration/ram.c | 5 ++--- - 4 files changed, 11 insertions(+), 9 deletions(-) - -diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c -index ffe75256c9..02f8bf8ce8 100644 ---- a/migration/multifd-nocomp.c -+++ b/migration/multifd-nocomp.c -@@ -17,6 +17,7 @@ - #include "migration-stats.h" - #include "multifd.h" - #include "options.h" -+#include "migration.h" - #include "qapi/error.h" - #include "qemu/cutils.h" - #include "qemu/error-report.h" -@@ -399,7 +400,7 @@ int multifd_ram_flush_and_sync(QEMUFile *f) - MultiFDSyncReq req; - int ret; - -- if (!migrate_multifd()) { -+ if (!migrate_multifd() || migration_in_postcopy()) { - return 0; - } - -diff --git a/migration/multifd.c b/migration/multifd.c -index 6139cabe44..074d16d07d 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -1379,6 +1379,13 @@ static void *multifd_recv_thread(void *opaque) - } - - if (has_data) { -+ /* -+ * multifd thread should not be active and receive data -+ * when migration is in the Postcopy phase. Two threads -+ * writing the same memory area could easily corrupt -+ * the guest state. -+ */ -+ assert(!migration_in_postcopy()); - if (is_device_state) { - assert(use_packets); - ret = multifd_device_state_recv(p, &local_err); -diff --git a/migration/options.c b/migration/options.c -index b0ac2ea408..48aa6076de 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -491,11 +491,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - error_setg(errp, "Postcopy is not compatible with ignore-shared"); - return false; - } -- -- if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { -- error_setg(errp, "Postcopy is not yet compatible with multifd"); -- return false; -- } - } - - if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { -diff --git a/migration/ram.c b/migration/ram.c -index 856769a77c..6f390b28d9 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2013,9 +2013,8 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) - } - } - -- if (migrate_multifd()) { -- RAMBlock *block = pss->block; -- return ram_save_multifd_page(block, offset); -+ if (migrate_multifd() && !migration_in_postcopy()) { -+ return ram_save_multifd_page(pss->block, offset); - } - - return ram_save_page(rs, pss); --- -2.39.3 - diff --git a/kvm-migration-multifd-move-macros-to-multifd-header.patch b/kvm-migration-multifd-move-macros-to-multifd-header.patch deleted file mode 100644 index 053eeba..0000000 --- a/kvm-migration-multifd-move-macros-to-multifd-header.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 8eb8ea8cf070ac88d8caa50c4a0cfc9e00398616 Mon Sep 17 00:00:00 2001 -From: Prasad Pandit -Date: Fri, 11 Apr 2025 17:15:28 +0530 -Subject: [PATCH 01/33] migration/multifd: move macros to multifd header - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/11] 438135a73f4247ac3d35f2798b7ca75b0b55cbd8 (pjp/cs-qemu-kvm) - -Move MULTIFD_ macros to the header file so that -they are accessible from other source files. - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Reviewed-by: Fabiano Rosas -Signed-off-by: Prasad Pandit -Reviewed-by: Peter Xu -Message-ID: <20250411114534.3370816-2-ppandit@redhat.com> -Signed-off-by: Fabiano Rosas -(cherry picked from commit 56e3c89f44ecebc946fbe4ffed325d1a79b26e38) -Signed-off-by: Prasad Pandit ---- - migration/multifd.c | 5 ----- - migration/multifd.h | 5 +++++ - 2 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index dfb5189f0e..6139cabe44 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -36,11 +36,6 @@ - #include "io/channel-socket.h" - #include "yank_functions.h" - --/* Multiple fd's */ -- --#define MULTIFD_MAGIC 0x11223344U --#define MULTIFD_VERSION 1 -- - typedef struct { - uint32_t magic; - uint32_t version; -diff --git a/migration/multifd.h b/migration/multifd.h -index 2d337e7b3b..9b6d81e7ed 100644 ---- a/migration/multifd.h -+++ b/migration/multifd.h -@@ -49,6 +49,11 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); - bool multifd_recv(void); - MultiFDRecvData *multifd_get_recv_data(void); - -+/* Multiple fd's */ -+ -+#define MULTIFD_MAGIC 0x11223344U -+#define MULTIFD_VERSION 1 -+ - /* Multifd Compression flags */ - #define MULTIFD_FLAG_SYNC (1 << 0) - --- -2.39.3 - diff --git a/kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch b/kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch deleted file mode 100644 index c6d9ec0..0000000 --- a/kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch +++ /dev/null @@ -1,234 +0,0 @@ -From 2e49bdf20390600286cd2596859feb400a932c44 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Thu, 24 Apr 2025 18:07:05 -0400 -Subject: [PATCH 1/4] migration/postcopy: Spatial locality page hint for - preempt mode - -RH-Author: Peter Xu -RH-MergeRequest: 357: migration/postcopy: Spatial locality page hint for preempt mode -RH-Jira: RHEL-85635 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 05f3befe60e1d3f2f8beaaf0e0e577e49a126dfe (peterx/qemu-kvm) - -The preempt mode postcopy has been introduced for a while. From latency -POV, it should always win the vanilla postcopy. - -However there's one thing missing when preempt mode is enabled right now, -which is the spatial locality hint when there're page requests from the -destination side. - -In vanilla postcopy, as long as a page request was unqueued, it will update -the PSS of the precopy background stream, so that after a page request the -background thread will move the pages after whatever was requested. It's -pretty much a natural behavior when there's only one channel anyway, and -one scanner to send the pages. - -Preempt mode didn't follow that, because preempt mode has its own channel -and its own PSS (which doesn't linearly scan the guest memory, but -dedicated to resolve page requested from destination). So the page request -process and the background migration process are completely separate. - -This patch adds the hint explicitly for preempt mode. With that, whenever -the preempt mode receives a page request on the source, it will service the -remote page fault in the return path, then it'll provide a hint to the -background thread so that we'll start sending the pages right after the -requested ones in the background, assuming the follow up pages have a -higher chance to be accessed later. - -NOTE: since the background migration thread and return path thread run -completely concurrently, it doesn't always mean the hint will be applied -every single time. For example, it's possible that the return path thread -receives multiple page requests in a row without the background thread -getting the chance to consume one. In such case, the preempt thread only -provide the hint if the previous hint has been consumed. After all, -there's no point queuing hints when we only have one linear scanner. - -This could measureably improve the simple sequential memory access pattern -during postcopy (when preempt is on). For random accesses, I can measure a -slight increase of remote page fault latency from ~500us -> ~600us, that -could be a trade-off to have such hint mechanism, and after all that's -still greatly improved comparing to vanilla postcopy on random (~10ms). - -The patch is verified by our QE team in a video streaming test case, to -reduce the pause of the video from ~1min to a few seconds when switching -over to postcopy with preempt mode. - -Reported-by: Xiaohui Li -Tested-by: Xiaohui Li -Reviewed-by: Juraj Marcin -Link: https://lore.kernel.org/r/20250424220705.195544-1-peterx@redhat.com -Signed-off-by: Peter Xu -(cherry picked from commit 20d82622812d888478d04a2d0d8575d70eb5d749) -Signed-off-by: Peter Xu ---- - migration/ram.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 96 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 424df6d9f1..21d2f87ff1 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -91,6 +91,36 @@ - - XBZRLECacheStats xbzrle_counters; - -+/* -+ * This structure locates a specific location of a guest page. In QEMU, -+ * it's described in a tuple of (ramblock, offset). -+ */ -+struct PageLocation { -+ RAMBlock *block; -+ unsigned long offset; -+}; -+typedef struct PageLocation PageLocation; -+ -+/** -+ * PageLocationHint: describes a hint to a page location -+ * -+ * @valid set if the hint is vaild and to be consumed -+ * @location: the hint content -+ * -+ * In postcopy preempt mode, the urgent channel may provide hints to the -+ * background channel, so that QEMU source can try to migrate whatever is -+ * right after the requested urgent pages. -+ * -+ * This is based on the assumption that the VM (already running on the -+ * destination side) tends to access the memory with spatial locality. -+ * This is also the default behavior of vanilla postcopy (preempt off). -+ */ -+struct PageLocationHint { -+ bool valid; -+ PageLocation location; -+}; -+typedef struct PageLocationHint PageLocationHint; -+ - /* used by the search for pages to send */ - struct PageSearchStatus { - /* The migration channel used for a specific host page */ -@@ -395,6 +425,13 @@ struct RAMState { - * RAM migration. - */ - unsigned int postcopy_bmap_sync_requested; -+ /* -+ * Page hint during postcopy when preempt mode is on. Return path -+ * thread sets it, while background migration thread consumes it. -+ * -+ * Protected by @bitmap_mutex. -+ */ -+ PageLocationHint page_hint; - }; - typedef struct RAMState RAMState; - -@@ -2039,6 +2076,21 @@ static void pss_host_page_finish(PageSearchStatus *pss) - pss->host_page_start = pss->host_page_end = 0; - } - -+static void ram_page_hint_update(RAMState *rs, PageSearchStatus *pss) -+{ -+ PageLocationHint *hint = &rs->page_hint; -+ -+ /* If there's a pending hint not consumed, don't bother */ -+ if (hint->valid) { -+ return; -+ } -+ -+ /* Provide a hint to the background stream otherwise */ -+ hint->location.block = pss->block; -+ hint->location.offset = pss->page; -+ hint->valid = true; -+} -+ - /* - * Send an urgent host page specified by `pss'. Need to be called with - * bitmap_mutex held. -@@ -2084,6 +2136,7 @@ out: - /* For urgent requests, flush immediately if sent */ - if (sent) { - qemu_fflush(pss->pss_channel); -+ ram_page_hint_update(rs, pss); - } - return ret; - } -@@ -2171,6 +2224,30 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) - return (res < 0 ? res : pages); - } - -+static bool ram_page_hint_valid(RAMState *rs) -+{ -+ /* There's only page hint during postcopy preempt mode */ -+ if (!postcopy_preempt_active()) { -+ return false; -+ } -+ -+ return rs->page_hint.valid; -+} -+ -+static void ram_page_hint_collect(RAMState *rs, RAMBlock **block, -+ unsigned long *page) -+{ -+ PageLocationHint *hint = &rs->page_hint; -+ -+ assert(hint->valid); -+ -+ *block = hint->location.block; -+ *page = hint->location.offset; -+ -+ /* Mark the hint consumed */ -+ hint->valid = false; -+} -+ - /** - * ram_find_and_save_block: finds a dirty page and sends it to f - * -@@ -2187,6 +2264,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) - static int ram_find_and_save_block(RAMState *rs) - { - PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY]; -+ unsigned long next_page; -+ RAMBlock *next_block; - int pages = 0; - - /* No dirty page as there is zero RAM */ -@@ -2206,7 +2285,14 @@ static int ram_find_and_save_block(RAMState *rs) - rs->last_page = 0; - } - -- pss_init(pss, rs->last_seen_block, rs->last_page); -+ if (ram_page_hint_valid(rs)) { -+ ram_page_hint_collect(rs, &next_block, &next_page); -+ } else { -+ next_block = rs->last_seen_block; -+ next_page = rs->last_page; -+ } -+ -+ pss_init(pss, next_block, next_page); - - while (true){ - if (!get_queued_page(rs, pss)) { -@@ -2339,6 +2425,13 @@ static void ram_save_cleanup(void *opaque) - ram_state_cleanup(rsp); - } - -+static void ram_page_hint_reset(PageLocationHint *hint) -+{ -+ hint->location.block = NULL; -+ hint->location.offset = 0; -+ hint->valid = false; -+} -+ - static void ram_state_reset(RAMState *rs) - { - int i; -@@ -2351,6 +2444,8 @@ static void ram_state_reset(RAMState *rs) - rs->last_page = 0; - rs->last_version = ram_list.version; - rs->xbzrle_started = false; -+ -+ ram_page_hint_reset(&rs->page_hint); - } - - #define MAX_WAIT 50 /* ms, half buffered_file limit */ --- -2.39.3 - diff --git a/kvm-migration-ram-Implement-save_postcopy_prepare.patch b/kvm-migration-ram-Implement-save_postcopy_prepare.patch deleted file mode 100644 index 6991716..0000000 --- a/kvm-migration-ram-Implement-save_postcopy_prepare.patch +++ /dev/null @@ -1,85 +0,0 @@ -From d5b76b77dc891f0bec211a6d00b099a2979223ee Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 11 Apr 2025 17:15:31 +0530 -Subject: [PATCH 04/33] migration/ram: Implement save_postcopy_prepare() - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/11] 4df55b4e65458ffccf48df5de3afad0b38cded51 (pjp/cs-qemu-kvm) - -Implement save_postcopy_prepare(), preparing for the enablement -of both multifd and postcopy. - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Signed-off-by: Peter Xu -Signed-off-by: Prasad Pandit -Reviewed-by: Fabiano Rosas -Message-ID: <20250411114534.3370816-5-ppandit@redhat.com> -Signed-off-by: Fabiano Rosas -(cherry picked from commit ad8d82ffbb8b8034f58a570911e6e9c6328c9384) -Signed-off-by: Prasad Pandit ---- - migration/ram.c | 37 +++++++++++++++++++++++++++++++++++++ - 1 file changed, 37 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 21d2f87ff1..856769a77c 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -4515,6 +4515,42 @@ static int ram_resume_prepare(MigrationState *s, void *opaque) - return 0; - } - -+static bool ram_save_postcopy_prepare(QEMUFile *f, void *opaque, Error **errp) -+{ -+ int ret; -+ -+ if (migrate_multifd()) { -+ /* -+ * When multifd is enabled, source QEMU needs to make sure all the -+ * pages queued before postcopy starts have been flushed. -+ * -+ * The load of these pages must happen before switching to postcopy. -+ * It's because loading of guest pages (so far) in multifd recv -+ * threads is still non-atomic, so the load cannot happen with vCPUs -+ * running on the destination side. -+ * -+ * This flush and sync will guarantee that those pages are loaded -+ * _before_ postcopy starts on the destination. The rationale is, -+ * this happens before VM stops (and before source QEMU sends all -+ * the rest of the postcopy messages). So when the destination QEMU -+ * receives the postcopy messages, it must have received the sync -+ * message on the main channel (either RAM_SAVE_FLAG_MULTIFD_FLUSH, -+ * or RAM_SAVE_FLAG_EOS), and such message would guarantee that -+ * all previous guest pages queued in the multifd channels are -+ * completely loaded. -+ */ -+ ret = multifd_ram_flush_and_sync(f); -+ if (ret < 0) { -+ error_setg(errp, "%s: multifd flush and sync failed", __func__); -+ return false; -+ } -+ } -+ -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ -+ return true; -+} -+ - void postcopy_preempt_shutdown_file(MigrationState *s) - { - qemu_put_be64(s->postcopy_qemufile_src, RAM_SAVE_FLAG_EOS); -@@ -4534,6 +4570,7 @@ static SaveVMHandlers savevm_ram_handlers = { - .load_setup = ram_load_setup, - .load_cleanup = ram_load_cleanup, - .resume_prepare = ram_resume_prepare, -+ .save_postcopy_prepare = ram_save_postcopy_prepare, - }; - - static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, --- -2.39.3 - diff --git a/kvm-migration-refactor-channel-discovery-mechanism.patch b/kvm-migration-refactor-channel-discovery-mechanism.patch deleted file mode 100644 index d77363b..0000000 --- a/kvm-migration-refactor-channel-discovery-mechanism.patch +++ /dev/null @@ -1,239 +0,0 @@ -From 21ec86cdc48de9ddf3f5bba994edd9f9427ffd4c Mon Sep 17 00:00:00 2001 -From: Prasad Pandit -Date: Fri, 11 Apr 2025 17:15:29 +0530 -Subject: [PATCH 02/33] migration: refactor channel discovery mechanism - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/11] 7f40da01d8c9a827627c73641f3b90a27bbfb8a0 (pjp/cs-qemu-kvm) - -The various logical migration channels don't have a -standardized way of advertising themselves and their -connections may be seen out of order by the migration -destination. When a new connection arrives, the incoming -migration currently make use of heuristics to determine -which channel it belongs to. - -The next few patches will need to change how the multifd -and postcopy capabilities interact and that affects the -channel discovery heuristic. - -Refactor the channel discovery heuristic to make it less -opaque and simplify the subsequent patches. - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Signed-off-by: Prasad Pandit -Reviewed-by: Fabiano Rosas -Message-ID: <20250411114534.3370816-3-ppandit@redhat.com> -Signed-off-by: Fabiano Rosas -(cherry picked from commit 00f3fcef1981eb23f98b956d9cda2df528bfef40) -Signed-off-by: Prasad Pandit ---- - migration/migration.c | 130 +++++++++++++++++++++++------------------- - 1 file changed, 70 insertions(+), 60 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index d46e776e24..64f4f40ae3 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -95,6 +95,9 @@ enum mig_rp_message_type { - MIG_RP_MSG_MAX - }; - -+/* Migration channel types */ -+enum { CH_MAIN, CH_MULTIFD, CH_POSTCOPY }; -+ - /* When we add fault tolerance, we could have several - migrations at once. For now we don't need to add - dynamic creation of migration */ -@@ -931,9 +934,8 @@ static void migration_incoming_setup(QEMUFile *f) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - -- if (!mis->from_src_file) { -- mis->from_src_file = f; -- } -+ assert(!mis->from_src_file); -+ mis->from_src_file = f; - qemu_file_set_blocking(f, false); - } - -@@ -985,28 +987,19 @@ void migration_fd_process_incoming(QEMUFile *f) - migration_incoming_process(); - } - --/* -- * Returns true when we want to start a new incoming migration process, -- * false otherwise. -- */ --static bool migration_should_start_incoming(bool main_channel) -+static bool migration_has_main_and_multifd_channels(void) - { -- /* Multifd doesn't start unless all channels are established */ -- if (migrate_multifd()) { -- return migration_has_all_channels(); -+ MigrationIncomingState *mis = migration_incoming_get_current(); -+ if (!mis->from_src_file) { -+ /* main channel not established */ -+ return false; - } - -- /* Preempt channel only starts when the main channel is created */ -- if (migrate_postcopy_preempt()) { -- return main_channel; -+ if (migrate_multifd() && !multifd_recv_all_channels_created()) { -+ return false; - } - -- /* -- * For all the rest types of migration, we should only reach here when -- * it's the main channel that's being created, and we should always -- * proceed with this channel. -- */ -- assert(main_channel); -+ /* main and all multifd channels are established */ - return true; - } - -@@ -1015,59 +1008,81 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - MigrationIncomingState *mis = migration_incoming_get_current(); - Error *local_err = NULL; - QEMUFile *f; -- bool default_channel = true; -+ uint8_t channel; - uint32_t channel_magic = 0; - int ret = 0; - -- if (migrate_multifd() && !migrate_mapped_ram() && -- !migrate_postcopy_ram() && -- qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { -- /* -- * With multiple channels, it is possible that we receive channels -- * out of order on destination side, causing incorrect mapping of -- * source channels on destination side. Check channel MAGIC to -- * decide type of channel. Please note this is best effort, postcopy -- * preempt channel does not send any magic number so avoid it for -- * postcopy live migration. Also tls live migration already does -- * tls handshake while initializing main channel so with tls this -- * issue is not possible. -- */ -- ret = migration_channel_read_peek(ioc, (void *)&channel_magic, -- sizeof(channel_magic), errp); -+ if (!migration_has_main_and_multifd_channels()) { -+ if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { -+ /* -+ * With multiple channels, it is possible that we receive channels -+ * out of order on destination side, causing incorrect mapping of -+ * source channels on destination side. Check channel MAGIC to -+ * decide type of channel. Please note this is best effort, -+ * postcopy preempt channel does not send any magic number so -+ * avoid it for postcopy live migration. Also tls live migration -+ * already does tls handshake while initializing main channel so -+ * with tls this issue is not possible. -+ */ -+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic, -+ sizeof(channel_magic), errp); -+ if (ret != 0) { -+ return; -+ } - -- if (ret != 0) { -+ channel_magic = be32_to_cpu(channel_magic); -+ if (channel_magic == QEMU_VM_FILE_MAGIC) { -+ channel = CH_MAIN; -+ } else if (channel_magic == MULTIFD_MAGIC) { -+ assert(migrate_multifd()); -+ channel = CH_MULTIFD; -+ } else if (!mis->from_src_file && -+ mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { -+ /* reconnect main channel for postcopy recovery */ -+ channel = CH_MAIN; -+ } else { -+ error_setg(errp, "unknown channel magic: %u", channel_magic); -+ return; -+ } -+ } else if (mis->from_src_file && migrate_multifd()) { -+ /* -+ * Non-peekable channels like tls/file are processed as -+ * multifd channels when multifd is enabled. -+ */ -+ channel = CH_MULTIFD; -+ } else if (!mis->from_src_file) { -+ channel = CH_MAIN; -+ } else { -+ error_setg(errp, "non-peekable channel used without multifd"); - return; - } -- -- default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); - } else { -- default_channel = !mis->from_src_file; -+ assert(migrate_postcopy_preempt()); -+ channel = CH_POSTCOPY; - } - - if (multifd_recv_setup(errp) != 0) { - return; - } - -- if (default_channel) { -+ if (channel == CH_MAIN) { - f = qemu_file_new_input(ioc); - migration_incoming_setup(f); -- } else { -+ } else if (channel == CH_MULTIFD) { - /* Multiple connections */ -- assert(migration_needs_multiple_sockets()); -- if (migrate_multifd()) { -- multifd_recv_new_channel(ioc, &local_err); -- } else { -- assert(migrate_postcopy_preempt()); -- f = qemu_file_new_input(ioc); -- postcopy_preempt_new_channel(mis, f); -- } -+ multifd_recv_new_channel(ioc, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } -+ } else if (channel == CH_POSTCOPY) { -+ assert(!mis->postcopy_qemufile_dst); -+ f = qemu_file_new_input(ioc); -+ postcopy_preempt_new_channel(mis, f); -+ return; - } - -- if (migration_should_start_incoming(default_channel)) { -+ if (migration_has_main_and_multifd_channels()) { - /* If it's a recovery, we're done */ - if (postcopy_try_recover()) { - return; -@@ -1084,18 +1099,13 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - */ - bool migration_has_all_channels(void) - { -- MigrationIncomingState *mis = migration_incoming_get_current(); -- -- if (!mis->from_src_file) { -+ if (!migration_has_main_and_multifd_channels()) { - return false; - } - -- if (migrate_multifd()) { -- return multifd_recv_all_channels_created(); -- } -- -- if (migrate_postcopy_preempt()) { -- return mis->postcopy_qemufile_dst != NULL; -+ MigrationIncomingState *mis = migration_incoming_get_current(); -+ if (migrate_postcopy_preempt() && !mis->postcopy_qemufile_dst) { -+ return false; - } - - return true; --- -2.39.3 - diff --git a/kvm-migration-write-zero-pages-when-postcopy-enabled.patch b/kvm-migration-write-zero-pages-when-postcopy-enabled.patch deleted file mode 100644 index 953132e..0000000 --- a/kvm-migration-write-zero-pages-when-postcopy-enabled.patch +++ /dev/null @@ -1,69 +0,0 @@ -From d25e369e01fcb30d4d12802907372d3320c095ff Mon Sep 17 00:00:00 2001 -From: Prasad Pandit -Date: Mon, 12 May 2025 18:21:22 +0530 -Subject: [PATCH 06/33] migration: write zero pages when postcopy enabled - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/11] 28ab95cd8a382d400d28511b0bb2e1ea1fd21c0a (pjp/cs-qemu-kvm) - -During multifd migration, zero pages are written if -they are migrated more than once. - -This may result in a migration thread hang issue when -multifd and postcopy are enabled together. - -When postcopy is enabled, always write zero pages as and -when they are migrated. - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Signed-off-by: Prasad Pandit -Reviewed-by: Fabiano Rosas -Link: https://lore.kernel.org/r/20250512125124.147064-2-ppandit@redhat.com -Signed-off-by: Peter Xu -(cherry picked from commit 249543d0c02d7645b8bcda552dad138769e96831) -Signed-off-by: Prasad Pandit ---- - migration/multifd-zero-page.c | 22 ++++++++++++++++++++-- - 1 file changed, 20 insertions(+), 2 deletions(-) - -diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c -index f1e988a959..3e0a04f2b5 100644 ---- a/migration/multifd-zero-page.c -+++ b/migration/multifd-zero-page.c -@@ -85,9 +85,27 @@ void multifd_recv_zero_page_process(MultiFDRecvParams *p) - { - for (int i = 0; i < p->zero_num; i++) { - void *page = p->host + p->zero[i]; -- if (ramblock_recv_bitmap_test_byte_offset(p->block, p->zero[i])) { -+ bool received = -+ ramblock_recv_bitmap_test_byte_offset(p->block, p->zero[i]); -+ -+ /* -+ * During multifd migration zero page is written to the memory -+ * only if it is migrated more than once. -+ * -+ * It becomes a problem when both multifd & postcopy options are -+ * enabled. If the zero page which was skipped during multifd phase, -+ * is accessed during the postcopy phase of the migration, a page -+ * fault occurs. But this page fault is not served because the -+ * 'receivedmap' says the zero page is already received. Thus the -+ * thread accessing that page may hang. -+ * -+ * When postcopy is enabled, always write the zero page as and when -+ * it is migrated. -+ */ -+ if (migrate_postcopy_ram() || received) { - memset(page, 0, multifd_ram_page_size()); -- } else { -+ } -+ if (!received) { - ramblock_recv_bitmap_set_offset(p->block, p->zero[i]); - } - } --- -2.39.3 - diff --git a/kvm-mirror-Allow-QMP-override-to-declare-target-already-.patch b/kvm-mirror-Allow-QMP-override-to-declare-target-already-.patch deleted file mode 100644 index 19b79e7..0000000 --- a/kvm-mirror-Allow-QMP-override-to-declare-target-already-.patch +++ /dev/null @@ -1,295 +0,0 @@ -From bc4571743fc3bbb829101fbf294615e3b8fb3577 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:25 -0500 -Subject: [PATCH 08/14] mirror: Allow QMP override to declare target already - zero - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/14] 4ad06d67db8c43df4e6e0b8f929b1d8c19e4b338 (ebblake/centos-qemu-kvm) - -QEMU has an optimization for a just-created drive-mirror destination -that is not possible for blockdev-mirror (which can't create the -destination) - any time we know the destination starts life as all -zeroes, we can skip a pre-zeroing pass on the destination. Recent -patches have added an improved heuristic for detecting if a file -contains all zeroes, and we plan to use that heuristic in upcoming -patches. But since a heuristic cannot quickly detect all scenarios, -and there may be cases where the caller is aware of information that -QEMU cannot learn quickly, it makes sense to have a way to tell QEMU -to assume facts about the destination that can make the mirror -operation faster. Given our existing example of "qemu-img convert ---target-is-zero", it is time to expose this override in QMP for -blockdev-mirror as well. - -This patch results in some slight redundancy between the older -s->zero_target (set any time mode==FULL and the destination image was -not just created - ie. clear if drive-mirror is asking to skip the -pre-zero pass) and the newly-introduced s->target_is_zero (in addition -to the QMP override, it is set when drive-mirror creates the -destination image); this will be cleaned up in the next patch. - -There is also a subtlety that we must consider. When drive-mirror is -passing target_is_zero on behalf of a just-created image, we know the -image is sparse (skipping the pre-zeroing keeps it that way), so it -doesn't matter whether the destination also has "discard":"unmap" and -"detect-zeroes":"unmap". But now that we are letting the user set the -knob for target-is-zero, if the user passes a pre-existing file that -is fully allocated, it is fine to leave the file fully allocated under -"detect-zeroes":"on", but if the file is open with -"detect-zeroes":"unmap", we should really be trying harder to punch -holes in the destination for every region of zeroes copied from the -source. The easiest way to do this is to still run the pre-zeroing -pass (turning the entire destination file sparse before populating -just the allocated portions of the source), even though that currently -results in double I/O to the portions of the file that are allocated. -A later patch will add further optimizations to reduce redundant -zeroing I/O during the mirror operation. - -Since "target-is-zero":true is designed for optimizations, it is okay -to silently ignore the parameter rather than erroring if the user ever -sets the parameter in a scenario where the mirror job can't exploit it -(for example, when doing "sync":"top" instead of "sync":"full", we -can't pre-zero, so setting the parameter won't make a speed -difference). - -Signed-off-by: Eric Blake -Acked-by: Markus Armbruster -Message-ID: <20250509204341.3553601-23-eblake@redhat.com> -Reviewed-by: Sunny Zhu -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit d17a34bfb94bda3a89d7320ae67255ded1d8c939) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/mirror.c | 27 ++++++++++++++++++++++---- - blockdev.c | 18 ++++++++++------- - include/block/block_int-global-state.h | 3 ++- - qapi/block-core.json | 8 +++++++- - tests/unit/test-block-iothread.c | 2 +- - 5 files changed, 44 insertions(+), 14 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 2599b75d09..4dcb50c81a 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -55,6 +55,8 @@ typedef struct MirrorBlockJob { - BlockMirrorBackingMode backing_mode; - /* Whether the target image requires explicit zero-initialization */ - bool zero_target; -+ /* Whether the target should be assumed to be already zero initialized */ -+ bool target_is_zero; - /* - * To be accesssed with atomics. Written only under the BQL (required by the - * current implementation of mirror_change()). -@@ -844,12 +846,26 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) - BlockDriverState *target_bs = blk_bs(s->target); - int ret = -EIO; - int64_t count; -+ bool punch_holes = -+ target_bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && -+ bdrv_can_write_zeroes_with_unmap(target_bs); - - bdrv_graph_co_rdlock(); - bs = s->mirror_top_bs->backing->bs; - bdrv_graph_co_rdunlock(); - -- if (s->zero_target) { -+ if (s->zero_target && (!s->target_is_zero || punch_holes)) { -+ /* -+ * Here, we are in FULL mode; our goal is to avoid writing -+ * zeroes if the destination already reads as zero, except -+ * when we are trying to punch holes. This is possible if -+ * zeroing happened externally (s->target_is_zero) or if we -+ * have a fast way to pre-zero the image (the dirty bitmap -+ * will be populated later by the non-zero portions, the same -+ * as for TOP mode). If pre-zeroing is not fast, or we need -+ * to punch holes, then our only recourse is to write the -+ * entire image. -+ */ - if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); - return 0; -@@ -1714,7 +1730,7 @@ static BlockJob *mirror_start_job( - uint32_t granularity, int64_t buf_size, - MirrorSyncMode sync_mode, - BlockMirrorBackingMode backing_mode, -- bool zero_target, -+ bool zero_target, bool target_is_zero, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, -@@ -1883,6 +1899,7 @@ static BlockJob *mirror_start_job( - s->sync_mode = sync_mode; - s->backing_mode = backing_mode; - s->zero_target = zero_target; -+ s->target_is_zero = target_is_zero; - qatomic_set(&s->copy_mode, copy_mode); - s->base = base; - s->base_overlay = bdrv_find_overlay(bs, base); -@@ -2011,7 +2028,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - int creation_flags, int64_t speed, - uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, -- bool zero_target, -+ bool zero_target, bool target_is_zero, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, -@@ -2034,7 +2051,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - - mirror_start_job(job_id, bs, creation_flags, target, replaces, - speed, granularity, buf_size, mode, backing_mode, -- zero_target, on_source_error, on_target_error, unmap, -+ zero_target, -+ target_is_zero, on_source_error, on_target_error, unmap, - NULL, NULL, &mirror_job_driver, base, false, - filter_node_name, true, copy_mode, false, errp); - } -@@ -2062,6 +2080,7 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, - job = mirror_start_job( - job_id, bs, creation_flags, base, NULL, speed, 0, 0, - MIRROR_SYNC_MODE_TOP, MIRROR_LEAVE_BACKING_CHAIN, false, -+ false, - on_error, on_error, true, cb, opaque, - &commit_active_job_driver, base, auto_complete, - filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, -diff --git a/blockdev.c b/blockdev.c -index 1d1f27cfff..2e2fed539e 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2798,7 +2798,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - const char *replaces, - enum MirrorSyncMode sync, - BlockMirrorBackingMode backing_mode, -- bool zero_target, -+ bool zero_target, bool target_is_zero, - bool has_speed, int64_t speed, - bool has_granularity, uint32_t granularity, - bool has_buf_size, int64_t buf_size, -@@ -2909,11 +2909,10 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - /* pass the node name to replace to mirror start since it's loose coupling - * and will allow to check whether the node still exist at mirror completion - */ -- mirror_start(job_id, bs, target, -- replaces, job_flags, -+ mirror_start(job_id, bs, target, replaces, job_flags, - speed, granularity, buf_size, sync, backing_mode, zero_target, -- on_source_error, on_target_error, unmap, filter_node_name, -- copy_mode, errp); -+ target_is_zero, on_source_error, on_target_error, unmap, -+ filter_node_name, copy_mode, errp); - } - - void qmp_drive_mirror(DriveMirror *arg, Error **errp) -@@ -2928,6 +2927,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - int64_t size; - const char *format = arg->format; - bool zero_target; -+ bool target_is_zero; - int ret; - - bs = qmp_get_root_bs(arg->device, errp); -@@ -3044,6 +3044,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL && - (arg->mode == NEW_IMAGE_MODE_EXISTING || - !bdrv_has_zero_init(target_bs))); -+ target_is_zero = (arg->mode != NEW_IMAGE_MODE_EXISTING && -+ bdrv_has_zero_init(target_bs)); - bdrv_graph_rdunlock_main_loop(); - - -@@ -3055,7 +3057,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - - blockdev_mirror_common(arg->job_id, bs, target_bs, - arg->replaces, arg->sync, -- backing_mode, zero_target, -+ backing_mode, zero_target, target_is_zero, - arg->has_speed, arg->speed, - arg->has_granularity, arg->granularity, - arg->has_buf_size, arg->buf_size, -@@ -3085,6 +3087,7 @@ void qmp_blockdev_mirror(const char *job_id, - bool has_copy_mode, MirrorCopyMode copy_mode, - bool has_auto_finalize, bool auto_finalize, - bool has_auto_dismiss, bool auto_dismiss, -+ bool has_target_is_zero, bool target_is_zero, - Error **errp) - { - BlockDriverState *bs; -@@ -3115,7 +3118,8 @@ void qmp_blockdev_mirror(const char *job_id, - - blockdev_mirror_common(job_id, bs, target_bs, - replaces, sync, backing_mode, -- zero_target, has_speed, speed, -+ zero_target, has_target_is_zero && target_is_zero, -+ has_speed, speed, - has_granularity, granularity, - has_buf_size, buf_size, - has_on_source_error, on_source_error, -diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h -index eb2d92a226..8cf0003ce7 100644 ---- a/include/block/block_int-global-state.h -+++ b/include/block/block_int-global-state.h -@@ -140,6 +140,7 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, - * @mode: Whether to collapse all images in the chain to the target. - * @backing_mode: How to establish the target's backing chain after completion. - * @zero_target: Whether the target should be explicitly zero-initialized -+ * @target_is_zero: Whether the target already is zero-initialized. - * @on_source_error: The action to take upon error reading from the source. - * @on_target_error: The action to take upon error writing to the target. - * @unmap: Whether to unmap target where source sectors only contain zeroes. -@@ -159,7 +160,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - int creation_flags, int64_t speed, - uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, -- bool zero_target, -+ bool zero_target, bool target_is_zero, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index b1937780e1..7f70ec6d3c 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -2538,6 +2538,11 @@ - # disappear from the query list without user intervention. - # Defaults to true. (Since 3.1) - # -+# @target-is-zero: Assume the destination reads as all zeroes before -+# the mirror started. Setting this to true can speed up the -+# mirror. Setting this to true when the destination is not -+# actually all zero can corrupt the destination. (Since 10.1) -+# - # Since: 2.6 - # - # .. qmp-example:: -@@ -2557,7 +2562,8 @@ - '*on-target-error': 'BlockdevOnError', - '*filter-node-name': 'str', - '*copy-mode': 'MirrorCopyMode', -- '*auto-finalize': 'bool', '*auto-dismiss': 'bool' }, -+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool', -+ '*target-is-zero': 'bool'}, - 'allow-preconfig': true } - - ## -diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c -index e26b3be593..54aed8252c 100644 ---- a/tests/unit/test-block-iothread.c -+++ b/tests/unit/test-block-iothread.c -@@ -755,7 +755,7 @@ static void test_propagate_mirror(void) - - /* Start a mirror job */ - mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, -- MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false, -+ MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false, false, - BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, - false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, - &error_abort); --- -2.39.3 - diff --git a/kvm-mirror-Drop-redundant-zero_target-parameter.patch b/kvm-mirror-Drop-redundant-zero_target-parameter.patch deleted file mode 100644 index bd35c8c..0000000 --- a/kvm-mirror-Drop-redundant-zero_target-parameter.patch +++ /dev/null @@ -1,241 +0,0 @@ -From db1a158312c2b94af1c1a50e0f13ace6ae58f0b6 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:26 -0500 -Subject: [PATCH 09/14] mirror: Drop redundant zero_target parameter - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/14] b4cbd267c81b4758f59e0d51b947fd450caf6ef5 (ebblake/centos-qemu-kvm) - -The two callers to a mirror job (drive-mirror and blockdev-mirror) set -zero_target precisely when sync mode == FULL, with the one exception -that drive-mirror skips zeroing the target if it was newly created and -reads as zero. But given the previous patch, that exception is -equally captured by target_is_zero. - -Meanwhile, there is another slight wrinkle, fortunately caught by -iotest 185: if the caller uses "sync":"top" but the source has no -backing file, the code in blockdev.c was changing sync to be FULL, but -only after it had set zero_target=false. In mirror.c, prior to recent -patches, this didn't matter: the only places that inspected sync were -setting is_none_mode (both TOP and FULL had set that to false), and -mirror_start() setting base = mode == MIRROR_SYNC_MODE_TOP ? -bdrv_backing_chain_next(bs) : NULL. But now that we are passing sync -around, the slammed sync mode would result in a new pre-zeroing pass -even when the user had passed "sync":"top" in an effort to skip -pre-zeroing. Fortunately, the assignment of base when bs has no -backing chain still works out to NULL if we don't slam things. So -with the forced change of sync ripped out of blockdev.c, the sync mode -is passed through the full callstack unmolested, and we can now -reliably reconstruct the same settings as what used to be passed in by -zero_target=false, without the redundant parameter. - -Signed-off-by: Eric Blake -Message-ID: <20250509204341.3553601-24-eblake@redhat.com> -Reviewed-by: Sunny Zhu -Reviewed-by: Stefan Hajnoczi -[eblake: Fix regression in iotest 185] -Signed-off-by: Eric Blake -(cherry picked from commit 253b43a29077de9266351e120c600a73b82e9c49) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/mirror.c | 13 +++++-------- - blockdev.c | 19 ++++--------------- - include/block/block_int-global-state.h | 3 +-- - tests/unit/test-block-iothread.c | 2 +- - 4 files changed, 11 insertions(+), 26 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 4dcb50c81a..d04db85883 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -53,8 +53,6 @@ typedef struct MirrorBlockJob { - Error *replace_blocker; - MirrorSyncMode sync_mode; - BlockMirrorBackingMode backing_mode; -- /* Whether the target image requires explicit zero-initialization */ -- bool zero_target; - /* Whether the target should be assumed to be already zero initialized */ - bool target_is_zero; - /* -@@ -854,7 +852,9 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) - bs = s->mirror_top_bs->backing->bs; - bdrv_graph_co_rdunlock(); - -- if (s->zero_target && (!s->target_is_zero || punch_holes)) { -+ if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { -+ /* In TOP mode, there is no benefit to a pre-zeroing pass. */ -+ } else if (!s->target_is_zero || punch_holes) { - /* - * Here, we are in FULL mode; our goal is to avoid writing - * zeroes if the destination already reads as zero, except -@@ -1730,7 +1730,7 @@ static BlockJob *mirror_start_job( - uint32_t granularity, int64_t buf_size, - MirrorSyncMode sync_mode, - BlockMirrorBackingMode backing_mode, -- bool zero_target, bool target_is_zero, -+ bool target_is_zero, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, -@@ -1898,7 +1898,6 @@ static BlockJob *mirror_start_job( - s->on_target_error = on_target_error; - s->sync_mode = sync_mode; - s->backing_mode = backing_mode; -- s->zero_target = zero_target; - s->target_is_zero = target_is_zero; - qatomic_set(&s->copy_mode, copy_mode); - s->base = base; -@@ -2028,7 +2027,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - int creation_flags, int64_t speed, - uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, -- bool zero_target, bool target_is_zero, -+ bool target_is_zero, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, -@@ -2051,7 +2050,6 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - - mirror_start_job(job_id, bs, creation_flags, target, replaces, - speed, granularity, buf_size, mode, backing_mode, -- zero_target, - target_is_zero, on_source_error, on_target_error, unmap, - NULL, NULL, &mirror_job_driver, base, false, - filter_node_name, true, copy_mode, false, errp); -@@ -2080,7 +2078,6 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, - job = mirror_start_job( - job_id, bs, creation_flags, base, NULL, speed, 0, 0, - MIRROR_SYNC_MODE_TOP, MIRROR_LEAVE_BACKING_CHAIN, false, -- false, - on_error, on_error, true, cb, opaque, - &commit_active_job_driver, base, auto_complete, - filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, -diff --git a/blockdev.c b/blockdev.c -index 2e2fed539e..0fa8813efe 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2798,7 +2798,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - const char *replaces, - enum MirrorSyncMode sync, - BlockMirrorBackingMode backing_mode, -- bool zero_target, bool target_is_zero, -+ bool target_is_zero, - bool has_speed, int64_t speed, - bool has_granularity, uint32_t granularity, - bool has_buf_size, int64_t buf_size, -@@ -2865,10 +2865,6 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - return; - } - -- if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) { -- sync = MIRROR_SYNC_MODE_FULL; -- } -- - if (!replaces) { - /* We want to mirror from @bs, but keep implicit filters on top */ - unfiltered_bs = bdrv_skip_implicit_filters(bs); -@@ -2910,7 +2906,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - * and will allow to check whether the node still exist at mirror completion - */ - mirror_start(job_id, bs, target, replaces, job_flags, -- speed, granularity, buf_size, sync, backing_mode, zero_target, -+ speed, granularity, buf_size, sync, backing_mode, - target_is_zero, on_source_error, on_target_error, unmap, - filter_node_name, copy_mode, errp); - } -@@ -2926,7 +2922,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - int flags; - int64_t size; - const char *format = arg->format; -- bool zero_target; - bool target_is_zero; - int ret; - -@@ -3041,9 +3036,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - } - - bdrv_graph_rdlock_main_loop(); -- zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL && -- (arg->mode == NEW_IMAGE_MODE_EXISTING || -- !bdrv_has_zero_init(target_bs))); - target_is_zero = (arg->mode != NEW_IMAGE_MODE_EXISTING && - bdrv_has_zero_init(target_bs)); - bdrv_graph_rdunlock_main_loop(); -@@ -3057,7 +3049,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - - blockdev_mirror_common(arg->job_id, bs, target_bs, - arg->replaces, arg->sync, -- backing_mode, zero_target, target_is_zero, -+ backing_mode, target_is_zero, - arg->has_speed, arg->speed, - arg->has_granularity, arg->granularity, - arg->has_buf_size, arg->buf_size, -@@ -3094,7 +3086,6 @@ void qmp_blockdev_mirror(const char *job_id, - BlockDriverState *target_bs; - AioContext *aio_context; - BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; -- bool zero_target; - int ret; - - bs = qmp_get_root_bs(device, errp); -@@ -3107,8 +3098,6 @@ void qmp_blockdev_mirror(const char *job_id, - return; - } - -- zero_target = (sync == MIRROR_SYNC_MODE_FULL); -- - aio_context = bdrv_get_aio_context(bs); - - ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); -@@ -3118,7 +3107,7 @@ void qmp_blockdev_mirror(const char *job_id, - - blockdev_mirror_common(job_id, bs, target_bs, - replaces, sync, backing_mode, -- zero_target, has_target_is_zero && target_is_zero, -+ has_target_is_zero && target_is_zero, - has_speed, speed, - has_granularity, granularity, - has_buf_size, buf_size, -diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h -index 8cf0003ce7..d21bd7fd2f 100644 ---- a/include/block/block_int-global-state.h -+++ b/include/block/block_int-global-state.h -@@ -139,7 +139,6 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, - * @buf_size: The amount of data that can be in flight at one time. - * @mode: Whether to collapse all images in the chain to the target. - * @backing_mode: How to establish the target's backing chain after completion. -- * @zero_target: Whether the target should be explicitly zero-initialized - * @target_is_zero: Whether the target already is zero-initialized. - * @on_source_error: The action to take upon error reading from the source. - * @on_target_error: The action to take upon error writing to the target. -@@ -160,7 +159,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - int creation_flags, int64_t speed, - uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, -- bool zero_target, bool target_is_zero, -+ bool target_is_zero, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, -diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c -index 54aed8252c..e26b3be593 100644 ---- a/tests/unit/test-block-iothread.c -+++ b/tests/unit/test-block-iothread.c -@@ -755,7 +755,7 @@ static void test_propagate_mirror(void) - - /* Start a mirror job */ - mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, -- MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false, false, -+ MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false, - BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, - false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, - &error_abort); --- -2.39.3 - diff --git a/kvm-mirror-Minor-refactoring.patch b/kvm-mirror-Minor-refactoring.patch deleted file mode 100644 index d658673..0000000 --- a/kvm-mirror-Minor-refactoring.patch +++ /dev/null @@ -1,90 +0,0 @@ -From e95294aecc606deacf716861d716f9178b132ed8 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:23 -0500 -Subject: [PATCH 06/14] mirror: Minor refactoring - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/14] 22a87aca8033f3f5d10fd224dc0786633a4d040f (ebblake/centos-qemu-kvm) - -Commit 5791ba52 (v9.2) pre-initialized ret in mirror_dirty_init to -silence a false positive compiler warning, even though in all code -paths where ret is used, it was guaranteed to be reassigned -beforehand. But since the function returns -errno, and -1 is not -always the right errno, it's better to initialize to -EIO. - -An upcoming patch wants to track two bitmaps in -do_sync_target_write(); this will be easier if the current variables -related to the dirty bitmap are renamed. - -Signed-off-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-ID: <20250509204341.3553601-21-eblake@redhat.com> -(cherry picked from commit 870f8963cf1a84f8ec929b05a6d68906974a76c5) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/mirror.c | 22 +++++++++++----------- - 1 file changed, 11 insertions(+), 11 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index a53582f17b..34c6c5252e 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -841,7 +841,7 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) - int64_t offset; - BlockDriverState *bs; - BlockDriverState *target_bs = blk_bs(s->target); -- int ret = -1; -+ int ret = -EIO; - int64_t count; - - bdrv_graph_co_rdlock(); -@@ -1341,7 +1341,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, - { - int ret; - size_t qiov_offset = 0; -- int64_t bitmap_offset, bitmap_end; -+ int64_t dirty_bitmap_offset, dirty_bitmap_end; - - if (!QEMU_IS_ALIGNED(offset, job->granularity) && - bdrv_dirty_bitmap_get(job->dirty_bitmap, offset)) -@@ -1388,11 +1388,11 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, - * Tails are either clean or shrunk, so for bitmap resetting - * we safely align the range down. - */ -- bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); -- bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); -- if (bitmap_offset < bitmap_end) { -- bdrv_reset_dirty_bitmap(job->dirty_bitmap, bitmap_offset, -- bitmap_end - bitmap_offset); -+ dirty_bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); -+ dirty_bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); -+ if (dirty_bitmap_offset < dirty_bitmap_end) { -+ bdrv_reset_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset, -+ dirty_bitmap_end - dirty_bitmap_offset); - } - - job_progress_increase_remaining(&job->common.job, bytes); -@@ -1430,10 +1430,10 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, - * at function start, and they must be still dirty, as we've locked - * the region for in-flight op. - */ -- bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); -- bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); -- bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset, -- bitmap_end - bitmap_offset); -+ dirty_bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); -+ dirty_bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); -+ bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset, -+ dirty_bitmap_end - dirty_bitmap_offset); - qatomic_set(&job->actively_synced, false); - - action = mirror_error_action(job, false, -ret); --- -2.39.3 - diff --git a/kvm-mirror-Pass-full-sync-mode-rather-than-bool-to-inter.patch b/kvm-mirror-Pass-full-sync-mode-rather-than-bool-to-inter.patch deleted file mode 100644 index 3029150..0000000 --- a/kvm-mirror-Pass-full-sync-mode-rather-than-bool-to-inter.patch +++ /dev/null @@ -1,139 +0,0 @@ -From db0b92495a4e774caafaaa148e778b575112bad2 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:24 -0500 -Subject: [PATCH 07/14] mirror: Pass full sync mode rather than bool to - internals - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/14] e8872e3edad069ee6c76f6104c1bc277c025b5ac (ebblake/centos-qemu-kvm) - -Out of the five possible values for MirrorSyncMode, INCREMENTAL and -BITMAP are already rejected up front in mirror_start, leaving NONE, -TOP, and FULL as the remaining values that the code was collapsing -into a single bool is_none_mode. Furthermore, mirror_dirty_init() is -only reachable for modes TOP and FULL, as further guided by -s->zero_target. However, upcoming patches want to further optimize -the pre-zeroing pass of a sync=full mirror in mirror_dirty_init(), -while avoiding that pass on a sync=top action. Instead of throwing -away context by collapsing these two values into -s->is_none_mode=false, it is better to pass s->sync_mode throughout -the entire operation. For active commit, the desired semantics match -sync mode TOP. - -Signed-off-by: Eric Blake -Message-ID: <20250509204341.3553601-22-eblake@redhat.com> -Reviewed-by: Sunny Zhu -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 9474d97bd7421b4fe7c806ab0949697514d11e88) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/mirror.c | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 34c6c5252e..2599b75d09 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -51,7 +51,7 @@ typedef struct MirrorBlockJob { - BlockDriverState *to_replace; - /* Used to block operations on the drive-mirror-replace target */ - Error *replace_blocker; -- bool is_none_mode; -+ MirrorSyncMode sync_mode; - BlockMirrorBackingMode backing_mode; - /* Whether the target image requires explicit zero-initialization */ - bool zero_target; -@@ -723,9 +723,10 @@ static int mirror_exit_common(Job *job) - &error_abort); - - if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { -- BlockDriverState *backing = s->is_none_mode ? src : s->base; -+ BlockDriverState *backing; - BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs); - -+ backing = s->sync_mode == MIRROR_SYNC_MODE_NONE ? src : s->base; - if (bdrv_cow_bs(unfiltered_target) != backing) { - bdrv_set_backing_hd(unfiltered_target, backing, &local_err); - if (local_err) { -@@ -1020,7 +1021,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - mirror_free_init(s); - - s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); -- if (!s->is_none_mode) { -+ if (s->sync_mode != MIRROR_SYNC_MODE_NONE) { - ret = mirror_dirty_init(s); - if (ret < 0 || job_is_cancelled(&s->common.job)) { - goto immediate_exit; -@@ -1711,6 +1712,7 @@ static BlockJob *mirror_start_job( - int creation_flags, BlockDriverState *target, - const char *replaces, int64_t speed, - uint32_t granularity, int64_t buf_size, -+ MirrorSyncMode sync_mode, - BlockMirrorBackingMode backing_mode, - bool zero_target, - BlockdevOnError on_source_error, -@@ -1719,7 +1721,7 @@ static BlockJob *mirror_start_job( - BlockCompletionFunc *cb, - void *opaque, - const BlockJobDriver *driver, -- bool is_none_mode, BlockDriverState *base, -+ BlockDriverState *base, - bool auto_complete, const char *filter_node_name, - bool is_mirror, MirrorCopyMode copy_mode, - bool base_ro, -@@ -1878,7 +1880,7 @@ static BlockJob *mirror_start_job( - s->replaces = g_strdup(replaces); - s->on_source_error = on_source_error; - s->on_target_error = on_target_error; -- s->is_none_mode = is_none_mode; -+ s->sync_mode = sync_mode; - s->backing_mode = backing_mode; - s->zero_target = zero_target; - qatomic_set(&s->copy_mode, copy_mode); -@@ -2015,7 +2017,6 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - bool unmap, const char *filter_node_name, - MirrorCopyMode copy_mode, Error **errp) - { -- bool is_none_mode; - BlockDriverState *base; - - GLOBAL_STATE_CODE(); -@@ -2028,14 +2029,13 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - } - - bdrv_graph_rdlock_main_loop(); -- is_none_mode = mode == MIRROR_SYNC_MODE_NONE; - base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL; - bdrv_graph_rdunlock_main_loop(); - - mirror_start_job(job_id, bs, creation_flags, target, replaces, -- speed, granularity, buf_size, backing_mode, zero_target, -- on_source_error, on_target_error, unmap, NULL, NULL, -- &mirror_job_driver, is_none_mode, base, false, -+ speed, granularity, buf_size, mode, backing_mode, -+ zero_target, on_source_error, on_target_error, unmap, -+ NULL, NULL, &mirror_job_driver, base, false, - filter_node_name, true, copy_mode, false, errp); - } - -@@ -2061,9 +2061,9 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, - - job = mirror_start_job( - job_id, bs, creation_flags, base, NULL, speed, 0, 0, -- MIRROR_LEAVE_BACKING_CHAIN, false, -+ MIRROR_SYNC_MODE_TOP, MIRROR_LEAVE_BACKING_CHAIN, false, - on_error, on_error, true, cb, opaque, -- &commit_active_job_driver, false, base, auto_complete, -+ &commit_active_job_driver, base, auto_complete, - filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, - base_read_only, errp); - if (!job) { --- -2.39.3 - diff --git a/kvm-mirror-Reduce-I-O-when-destination-is-detect-zeroes-.patch b/kvm-mirror-Reduce-I-O-when-destination-is-detect-zeroes-.patch deleted file mode 100644 index 27a0ccc..0000000 --- a/kvm-mirror-Reduce-I-O-when-destination-is-detect-zeroes-.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 9fedd14da6f1dc7aa3f0711d86f722397d080993 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 13 May 2025 17:00:45 -0500 -Subject: [PATCH 14/14] mirror: Reduce I/O when destination is - detect-zeroes:unmap - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [14/14] d4ba9d88a8da00f82c2ba7ebf050152fbe1e2465 (ebblake/centos-qemu-kvm) - -If we are going to punch holes in the mirror destination even for the -portions where the source image is unallocated, it is nicer to treat -the entire image as dirty and punch as we go, rather than pre-zeroing -the entire image just to re-do I/O to the allocated portions of the -image. - -Signed-off-by: Eric Blake -Message-ID: <20250513220142.535200-2-eblake@redhat.com> -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 9abfc81246c9cc1845080eec5920779961187c07) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/mirror.c | 13 +++++++++---- - 1 file changed, 9 insertions(+), 4 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 724318f037..c2c5099c95 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -920,11 +920,16 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) - * zeroing happened externally (ret > 0) or if we have a fast - * way to pre-zero the image (the dirty bitmap will be - * populated later by the non-zero portions, the same as for -- * TOP mode). If pre-zeroing is not fast, then our only -- * recourse is to mark the entire image dirty. The act of -- * pre-zeroing will populate the zero bitmap. -+ * TOP mode). If pre-zeroing is not fast, or we need to visit -+ * the entire image in order to punch holes even in the -+ * non-allocated regions of the source, then just mark the -+ * entire image dirty and leave the zero bitmap clear at this -+ * point in time. Otherwise, it can be faster to pre-zero the -+ * image now, even if we re-write the allocated portions of -+ * the disk later, and the pre-zero pass will populate the -+ * zero bitmap. - */ -- if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { -+ if (!bdrv_can_write_zeroes_with_unmap(target_bs) || punch_holes) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); - return 0; - } --- -2.39.3 - diff --git a/kvm-mirror-Skip-pre-zeroing-destination-if-it-is-already.patch b/kvm-mirror-Skip-pre-zeroing-destination-if-it-is-already.patch deleted file mode 100644 index cb2b25f..0000000 --- a/kvm-mirror-Skip-pre-zeroing-destination-if-it-is-already.patch +++ /dev/null @@ -1,180 +0,0 @@ -From 92a033b6c8394c8efb5b881cbbe463eeff5711cd Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:27 -0500 -Subject: [PATCH 10/14] mirror: Skip pre-zeroing destination if it is already - zero - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/14] 5d86d9c763a1bb49fab591a45e211d3be819ccfe (ebblake/centos-qemu-kvm) - -When doing a sync=full mirroring, we can skip pre-zeroing the -destination if it already reads as zeroes and we are not also trying -to punch holes due to detect-zeroes. With this patch, there are fewer -scenarios that have to pass in an explicit target-is-zero, while still -resulting in a sparse destination remaining sparse. - -A later patch will then further improve things to skip writing to the -destination for parts of the image where the source is zero; but even -with just this patch, it is possible to see a difference for any -source that does not report itself as fully allocated, coupled with a -destination BDS that can quickly report that it already reads as zero. -(For a source that reports as fully allocated, such as a file, the -rest of mirror_dirty_init() still sets the entire dirty bitmap to -true, so even though we avoided the pre-zeroing, we are not yet -avoiding all redundant I/O). - -Iotest 194 detects the difference made by this patch: for a file -source (where block status reports the entire image as allocated, and -therefore we end up writing zeroes everywhere in the destination -anyways), the job length remains the same. But for a qcow2 source and -a destination that reads as all zeroes, the dirty bitmap changes to -just tracking the allocated portions of the source, which results in -faster completion and smaller job statistics. For the test to pass -with both ./check -file and -qcow2, a new python filter is needed to -mask out the now-varying job amounts (this matches the shell filters -_filter_block_job_{offset,len} in common.filter). A later test will -also be added which further validates expected sparseness, so it does -not matter that 194 is no longer explicitly looking at how many bytes -were copied. - -Signed-off-by: Eric Blake -Message-ID: <20250509204341.3553601-25-eblake@redhat.com> -Reviewed-by: Sunny Zhu -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 181a63667adf16c35b57e446def3e41c70f1fea6) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/mirror.c | 24 ++++++++++++++++-------- - tests/qemu-iotests/194 | 6 ++++-- - tests/qemu-iotests/194.out | 4 ++-- - tests/qemu-iotests/iotests.py | 12 +++++++++++- - 4 files changed, 33 insertions(+), 13 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index d04db85883..bca99ec206 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -848,23 +848,31 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) - target_bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && - bdrv_can_write_zeroes_with_unmap(target_bs); - -+ /* Determine if the image is already zero, regardless of sync mode. */ - bdrv_graph_co_rdlock(); - bs = s->mirror_top_bs->backing->bs; -+ if (s->target_is_zero) { -+ ret = 1; -+ } else { -+ ret = bdrv_co_is_all_zeroes(target_bs); -+ } - bdrv_graph_co_rdunlock(); - -- if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { -+ /* Determine if a pre-zeroing pass is necessary. */ -+ if (ret < 0) { -+ return ret; -+ } else if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { - /* In TOP mode, there is no benefit to a pre-zeroing pass. */ -- } else if (!s->target_is_zero || punch_holes) { -+ } else if (ret == 0 || punch_holes) { - /* - * Here, we are in FULL mode; our goal is to avoid writing - * zeroes if the destination already reads as zero, except - * when we are trying to punch holes. This is possible if -- * zeroing happened externally (s->target_is_zero) or if we -- * have a fast way to pre-zero the image (the dirty bitmap -- * will be populated later by the non-zero portions, the same -- * as for TOP mode). If pre-zeroing is not fast, or we need -- * to punch holes, then our only recourse is to write the -- * entire image. -+ * zeroing happened externally (ret > 0) or if we have a fast -+ * way to pre-zero the image (the dirty bitmap will be -+ * populated later by the non-zero portions, the same as for -+ * TOP mode). If pre-zeroing is not fast, or we need to punch -+ * holes, then our only recourse is to write the entire image. - */ - if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); -diff --git a/tests/qemu-iotests/194 b/tests/qemu-iotests/194 -index d0b9c084f5..e114c0b269 100755 ---- a/tests/qemu-iotests/194 -+++ b/tests/qemu-iotests/194 -@@ -62,7 +62,8 @@ with iotests.FilePath('source.img') as source_img_path, \ - - iotests.log('Waiting for `drive-mirror` to complete...') - iotests.log(source_vm.event_wait('BLOCK_JOB_READY'), -- filters=[iotests.filter_qmp_event]) -+ filters=[iotests.filter_qmp_event, -+ iotests.filter_block_job]) - - iotests.log('Starting migration...') - capabilities = [{'capability': 'events', 'state': True}, -@@ -88,7 +89,8 @@ with iotests.FilePath('source.img') as source_img_path, \ - - while True: - event2 = source_vm.event_wait('BLOCK_JOB_COMPLETED') -- iotests.log(event2, filters=[iotests.filter_qmp_event]) -+ iotests.log(event2, filters=[iotests.filter_qmp_event, -+ iotests.filter_block_job]) - if event2['event'] == 'BLOCK_JOB_COMPLETED': - iotests.log('Stopping the NBD server on destination...') - iotests.log(dest_vm.qmp('nbd-server-stop')) -diff --git a/tests/qemu-iotests/194.out b/tests/qemu-iotests/194.out -index 6940e809cd..d02655a514 100644 ---- a/tests/qemu-iotests/194.out -+++ b/tests/qemu-iotests/194.out -@@ -7,7 +7,7 @@ Launching NBD server on destination... - Starting `drive-mirror` on source... - {"return": {}} - Waiting for `drive-mirror` to complete... --{"data": {"device": "mirror-job0", "len": 1073741824, "offset": 1073741824, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"device": "mirror-job0", "len": "LEN", "offset": "OFFSET", "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - Starting migration... - {"return": {}} - {"execute": "migrate-start-postcopy", "arguments": {}} -@@ -18,7 +18,7 @@ Starting migration... - {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - Gracefully ending the `drive-mirror` job on source... - {"return": {}} --{"data": {"device": "mirror-job0", "len": 1073741824, "offset": 1073741824, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"device": "mirror-job0", "len": "LEN", "offset": "OFFSET", "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - Stopping the NBD server on destination... - {"return": {}} - Wait for migration completion on target... -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 7292c8b342..05274772ce 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -601,13 +601,23 @@ def filter_chown(msg): - return chown_re.sub("chown UID:GID", msg) - - def filter_qmp_event(event): -- '''Filter a QMP event dict''' -+ '''Filter the timestamp of a QMP event dict''' - event = dict(event) - if 'timestamp' in event: - event['timestamp']['seconds'] = 'SECS' - event['timestamp']['microseconds'] = 'USECS' - return event - -+def filter_block_job(event): -+ '''Filter the offset and length of a QMP block job event dict''' -+ event = dict(event) -+ if 'data' in event: -+ if 'offset' in event['data']: -+ event['data']['offset'] = 'OFFSET' -+ if 'len' in event['data']: -+ event['data']['len'] = 'LEN' -+ return event -+ - def filter_qmp(qmsg, filter_fn): - '''Given a string filter, filter a QMP object's values. - filter_fn takes a (key, value) pair.''' --- -2.39.3 - diff --git a/kvm-mirror-Skip-writing-zeroes-when-target-is-already-ze.patch b/kvm-mirror-Skip-writing-zeroes-when-target-is-already-ze.patch deleted file mode 100644 index af29809..0000000 --- a/kvm-mirror-Skip-writing-zeroes-when-target-is-already-ze.patch +++ /dev/null @@ -1,355 +0,0 @@ -From cc72e6ec30fb113b82fcdb61f79a0fae18e31e79 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:28 -0500 -Subject: [PATCH 11/14] mirror: Skip writing zeroes when target is already zero - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/14] 82df7fcf94606e6b6570469d3c05666da5039408 (ebblake/centos-qemu-kvm) - -When mirroring, the goal is to ensure that the destination reads the -same as the source; this goal is met whether the destination is sparse -or fully-allocated (except when explicitly punching holes, then merely -reading zero is not enough to know if it is sparse, so we still want -to punch the hole). Avoiding a redundant write to zero (whether in -the background because the zero cluster was marked in the dirty -bitmap, or in the foreground because the guest is writing zeroes) when -the destination already reads as zero makes mirroring faster, and -avoids allocating the destination merely because the source reports as -allocated. - -The effect is especially pronounced when the source is a raw file. -That's because when the source is a qcow2 file, the dirty bitmap only -visits the portions of the source that are allocated, which tend to be -non-zero. But when the source is a raw file, -bdrv_co_is_allocated_above() reports the entire file as allocated so -mirror_dirty_init sets the entire dirty bitmap, and it is only later -during mirror_iteration that we change to consulting the more precise -bdrv_co_block_status_above() to learn where the source reads as zero. - -Remember that since a mirror operation can write a cluster more than -once (every time the guest changes the source, the destination is also -changed to keep up), and the guest can change whether a given cluster -reads as zero, is discarded, or has non-zero data over the course of -the mirror operation, we can't take the shortcut of relying on -s->target_is_zero (which is static for the life of the job) in -mirror_co_zero() to see if the destination is already zero, because -that information may be stale. Any solution we use must be dynamic in -the face of the guest writing or discarding a cluster while the mirror -has been ongoing. - -We could just teach mirror_co_zero() to do a block_status() probe of -the destination, and skip the zeroes if the destination already reads -as zero, but we know from past experience that extra block_status() -calls are not always cheap (tmpfs, anyone?), especially when they are -random access rather than linear. Use of block_status() of the source -by the background task in a linear fashion is not our bottleneck (it's -a background task, after all); but since mirroring can be done while -the source is actively being changed, we don't want a slow -block_status() of the destination to occur on the hot path of the -guest trying to do random-access writes to the source. - -So this patch takes a slightly different approach: any time we have to -track dirty clusters, we can also track which clusters are known to -read as zero. For sync=TOP or when we are punching holes from -"detect-zeroes":"unmap", the zero bitmap starts out empty, but -prevents a second write zero to a cluster that was already zero by an -earlier pass; for sync=FULL when we are not punching holes, the zero -bitmap starts out full if the destination reads as zero during -initialization. Either way, I/O to the destination can now avoid -redundant write zero to a cluster that already reads as zero, all -without having to do a block_status() per write on the destination. - -With this patch, if I create a raw sparse destination file, connect it -with QMP 'blockdev-add' while leaving it at the default "discard": -"ignore", then run QMP 'blockdev-mirror' with "sync": "full", the -destination remains sparse rather than fully allocated. Meanwhile, a -destination image that is already fully allocated remains so unless it -was opened with "detect-zeroes": "unmap". And any time writing zeroes -is skipped, the job counters are not incremented. - -Signed-off-by: Eric Blake -Message-ID: <20250509204341.3553601-26-eblake@redhat.com> -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 7e277545b90874171128804e256a538fb0e8dd7e) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - block/mirror.c | 107 ++++++++++++++++++++++++++++++++++++++++++------- - 1 file changed, 93 insertions(+), 14 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index bca99ec206..724318f037 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -73,6 +73,7 @@ typedef struct MirrorBlockJob { - size_t buf_size; - int64_t bdev_length; - unsigned long *cow_bitmap; -+ unsigned long *zero_bitmap; - BdrvDirtyBitmap *dirty_bitmap; - BdrvDirtyBitmapIter *dbi; - uint8_t *buf; -@@ -108,9 +109,12 @@ struct MirrorOp { - int64_t offset; - uint64_t bytes; - -- /* The pointee is set by mirror_co_read(), mirror_co_zero(), and -- * mirror_co_discard() before yielding for the first time */ -+ /* -+ * These pointers are set by mirror_co_read(), mirror_co_zero(), and -+ * mirror_co_discard() before yielding for the first time -+ */ - int64_t *bytes_handled; -+ bool *io_skipped; - - bool is_pseudo_op; - bool is_active_write; -@@ -408,15 +412,34 @@ static void coroutine_fn mirror_co_read(void *opaque) - static void coroutine_fn mirror_co_zero(void *opaque) - { - MirrorOp *op = opaque; -- int ret; -+ bool write_needed = true; -+ int ret = 0; - - op->s->in_flight++; - op->s->bytes_in_flight += op->bytes; - *op->bytes_handled = op->bytes; - op->is_in_flight = true; - -- ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, -- op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); -+ if (op->s->zero_bitmap) { -+ unsigned long end = DIV_ROUND_UP(op->offset + op->bytes, -+ op->s->granularity); -+ assert(QEMU_IS_ALIGNED(op->offset, op->s->granularity)); -+ assert(QEMU_IS_ALIGNED(op->bytes, op->s->granularity) || -+ op->offset + op->bytes == op->s->bdev_length); -+ if (find_next_zero_bit(op->s->zero_bitmap, end, -+ op->offset / op->s->granularity) == end) { -+ write_needed = false; -+ *op->io_skipped = true; -+ } -+ } -+ if (write_needed) { -+ ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, -+ op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); -+ } -+ if (ret >= 0 && op->s->zero_bitmap) { -+ bitmap_set(op->s->zero_bitmap, op->offset / op->s->granularity, -+ DIV_ROUND_UP(op->bytes, op->s->granularity)); -+ } - mirror_write_complete(op, ret); - } - -@@ -435,29 +458,43 @@ static void coroutine_fn mirror_co_discard(void *opaque) - } - - static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, -- unsigned bytes, MirrorMethod mirror_method) -+ unsigned bytes, MirrorMethod mirror_method, -+ bool *io_skipped) - { - MirrorOp *op; - Coroutine *co; - int64_t bytes_handled = -1; - -+ assert(QEMU_IS_ALIGNED(offset, s->granularity)); -+ assert(QEMU_IS_ALIGNED(bytes, s->granularity) || -+ offset + bytes == s->bdev_length); - op = g_new(MirrorOp, 1); - *op = (MirrorOp){ - .s = s, - .offset = offset, - .bytes = bytes, - .bytes_handled = &bytes_handled, -+ .io_skipped = io_skipped, - }; - qemu_co_queue_init(&op->waiting_requests); - - switch (mirror_method) { - case MIRROR_METHOD_COPY: -+ if (s->zero_bitmap) { -+ bitmap_clear(s->zero_bitmap, offset / s->granularity, -+ DIV_ROUND_UP(bytes, s->granularity)); -+ } - co = qemu_coroutine_create(mirror_co_read, op); - break; - case MIRROR_METHOD_ZERO: -+ /* s->zero_bitmap handled in mirror_co_zero */ - co = qemu_coroutine_create(mirror_co_zero, op); - break; - case MIRROR_METHOD_DISCARD: -+ if (s->zero_bitmap) { -+ bitmap_clear(s->zero_bitmap, offset / s->granularity, -+ DIV_ROUND_UP(bytes, s->granularity)); -+ } - co = qemu_coroutine_create(mirror_co_discard, op); - break; - default: -@@ -568,6 +605,7 @@ static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) - int ret = -1; - int64_t io_bytes; - int64_t io_bytes_acct; -+ bool io_skipped = false; - MirrorMethod mirror_method = MIRROR_METHOD_COPY; - - assert(!(offset % s->granularity)); -@@ -611,8 +649,10 @@ static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) - } - - io_bytes = mirror_clip_bytes(s, offset, io_bytes); -- io_bytes = mirror_perform(s, offset, io_bytes, mirror_method); -- if (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok) { -+ io_bytes = mirror_perform(s, offset, io_bytes, mirror_method, -+ &io_skipped); -+ if (io_skipped || -+ (mirror_method != MIRROR_METHOD_COPY && write_zeroes_ok)) { - io_bytes_acct = 0; - } else { - io_bytes_acct = io_bytes; -@@ -847,8 +887,10 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) - bool punch_holes = - target_bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && - bdrv_can_write_zeroes_with_unmap(target_bs); -+ int64_t bitmap_length = DIV_ROUND_UP(s->bdev_length, s->granularity); - - /* Determine if the image is already zero, regardless of sync mode. */ -+ s->zero_bitmap = bitmap_new(bitmap_length); - bdrv_graph_co_rdlock(); - bs = s->mirror_top_bs->backing->bs; - if (s->target_is_zero) { -@@ -862,7 +904,14 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) - if (ret < 0) { - return ret; - } else if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { -- /* In TOP mode, there is no benefit to a pre-zeroing pass. */ -+ /* -+ * In TOP mode, there is no benefit to a pre-zeroing pass, but -+ * the zero bitmap can be set if the destination already reads -+ * as zero and we are not punching holes. -+ */ -+ if (ret > 0 && !punch_holes) { -+ bitmap_set(s->zero_bitmap, 0, bitmap_length); -+ } - } else if (ret == 0 || punch_holes) { - /* - * Here, we are in FULL mode; our goal is to avoid writing -@@ -871,8 +920,9 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) - * zeroing happened externally (ret > 0) or if we have a fast - * way to pre-zero the image (the dirty bitmap will be - * populated later by the non-zero portions, the same as for -- * TOP mode). If pre-zeroing is not fast, or we need to punch -- * holes, then our only recourse is to write the entire image. -+ * TOP mode). If pre-zeroing is not fast, then our only -+ * recourse is to mark the entire image dirty. The act of -+ * pre-zeroing will populate the zero bitmap. - */ - if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { - bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); -@@ -883,6 +933,7 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) - for (offset = 0; offset < s->bdev_length; ) { - int bytes = MIN(s->bdev_length - offset, - QEMU_ALIGN_DOWN(INT_MAX, s->granularity)); -+ bool ignored; - - mirror_throttle(s); - -@@ -898,12 +949,15 @@ static int coroutine_fn GRAPH_UNLOCKED mirror_dirty_init(MirrorBlockJob *s) - continue; - } - -- mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO); -+ mirror_perform(s, offset, bytes, MIRROR_METHOD_ZERO, &ignored); - offset += bytes; - } - - mirror_wait_for_all_io(s); - s->initial_zeroing_ongoing = false; -+ } else { -+ /* In FULL mode, and image already reads as zero. */ -+ bitmap_set(s->zero_bitmap, 0, bitmap_length); - } - - /* First part, loop on the sectors and initialize the dirty bitmap. */ -@@ -1188,6 +1242,7 @@ immediate_exit: - assert(s->in_flight == 0); - qemu_vfree(s->buf); - g_free(s->cow_bitmap); -+ g_free(s->zero_bitmap); - g_free(s->in_flight_bitmap); - bdrv_dirty_iter_free(s->dbi); - -@@ -1367,6 +1422,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, - int ret; - size_t qiov_offset = 0; - int64_t dirty_bitmap_offset, dirty_bitmap_end; -+ int64_t zero_bitmap_offset, zero_bitmap_end; - - if (!QEMU_IS_ALIGNED(offset, job->granularity) && - bdrv_dirty_bitmap_get(job->dirty_bitmap, offset)) -@@ -1410,8 +1466,9 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, - } - - /* -- * Tails are either clean or shrunk, so for bitmap resetting -- * we safely align the range down. -+ * Tails are either clean or shrunk, so for dirty bitmap resetting -+ * we safely align the range narrower. But for zero bitmap, round -+ * range wider for checking or clearing, and narrower for setting. - */ - dirty_bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); - dirty_bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); -@@ -1419,22 +1476,44 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, - bdrv_reset_dirty_bitmap(job->dirty_bitmap, dirty_bitmap_offset, - dirty_bitmap_end - dirty_bitmap_offset); - } -+ zero_bitmap_offset = offset / job->granularity; -+ zero_bitmap_end = DIV_ROUND_UP(offset + bytes, job->granularity); - - job_progress_increase_remaining(&job->common.job, bytes); - job->active_write_bytes_in_flight += bytes; - - switch (method) { - case MIRROR_METHOD_COPY: -+ if (job->zero_bitmap) { -+ bitmap_clear(job->zero_bitmap, zero_bitmap_offset, -+ zero_bitmap_end - zero_bitmap_offset); -+ } - ret = blk_co_pwritev_part(job->target, offset, bytes, - qiov, qiov_offset, flags); - break; - - case MIRROR_METHOD_ZERO: -+ if (job->zero_bitmap) { -+ if (find_next_zero_bit(job->zero_bitmap, zero_bitmap_end, -+ zero_bitmap_offset) == zero_bitmap_end) { -+ ret = 0; -+ break; -+ } -+ } - assert(!qiov); - ret = blk_co_pwrite_zeroes(job->target, offset, bytes, flags); -+ if (job->zero_bitmap && ret >= 0) { -+ bitmap_set(job->zero_bitmap, dirty_bitmap_offset / job->granularity, -+ (dirty_bitmap_end - dirty_bitmap_offset) / -+ job->granularity); -+ } - break; - - case MIRROR_METHOD_DISCARD: -+ if (job->zero_bitmap) { -+ bitmap_clear(job->zero_bitmap, zero_bitmap_offset, -+ zero_bitmap_end - zero_bitmap_offset); -+ } - assert(!qiov); - ret = blk_co_pdiscard(job->target, offset, bytes); - break; --- -2.39.3 - diff --git a/kvm-net-socket-skip-automatic-zero-init-of-large-array.patch b/kvm-net-socket-skip-automatic-zero-init-of-large-array.patch deleted file mode 100644 index 6bc5323..0000000 --- a/kvm-net-socket-skip-automatic-zero-init-of-large-array.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 7f3f625c3046f8a44ac158bef1e627f18856a2ef Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:37:08 +0100 -Subject: [PATCH 38/43] net/socket: skip automatic zero-init of large array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [30/31] b45f973634d85a5cb99d51ef79b0c04c695a43d6 (stefanha/centos-stream-qemu-kvm) - -The 'net_socket_send' method has a 68k byte array used for copying -data between guest and host. Skip the automatic zero-init of this -array to eliminate the performance overhead in the I/O hot path. - -The 'buf1' array will be fully initialized when reading data off -the network socket. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Klaus Jensen -Reviewed-by: Harsh Prateek Bora -Message-id: 20250610123709.835102-31-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 751b0e79f1e0e7f88fad2fe2f22595ad03d78859) -Signed-off-by: Stefan Hajnoczi ---- - net/socket.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/socket.c b/net/socket.c -index 8e3702e1f3..784dda686f 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -157,7 +157,7 @@ static void net_socket_send(void *opaque) - NetSocketState *s = opaque; - int size; - int ret; -- uint8_t buf1[NET_BUFSIZE]; -+ QEMU_UNINITIALIZED uint8_t buf1[NET_BUFSIZE]; - const uint8_t *buf; - - size = recv(s->fd, buf1, sizeof(buf1), 0); --- -2.39.3 - diff --git a/kvm-net-stream-skip-automatic-zero-init-of-large-array.patch b/kvm-net-stream-skip-automatic-zero-init-of-large-array.patch deleted file mode 100644 index 62e5d38..0000000 --- a/kvm-net-stream-skip-automatic-zero-init-of-large-array.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 074465b3a94a8d18c4202d13d9506f9dee8030e5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Tue, 10 Jun 2025 13:37:09 +0100 -Subject: [PATCH 39/43] net/stream: skip automatic zero-init of large array -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 381: Solve -ftrivial-auto-var-init performance regression with QEMU_UNINITIALIZED -RH-Jira: RHEL-95479 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [31/31] 5200215cfd9e263d24342ee4a2b5aafe3d1e1eb7 (stefanha/centos-stream-qemu-kvm) - -The 'net_stream_send' method has a 68k byte array used for copying -data between guest and host. Skip the automatic zero-init of this -array to eliminate the performance overhead in the I/O hot path. - -The 'buf1' array will be fully initialized when reading data off -the network socket. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Klaus Jensen -Reviewed-by: Harsh Prateek Bora -Message-id: 20250610123709.835102-32-berrange@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 837b87c4c5ba9ac7a255133c6642b8d578272a70) -Signed-off-by: Stefan Hajnoczi ---- - net/stream.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/stream.c b/net/stream.c -index 4de5613844..6152d2a05e 100644 ---- a/net/stream.c -+++ b/net/stream.c -@@ -148,7 +148,7 @@ static gboolean net_stream_send(QIOChannel *ioc, - NetStreamState *s = data; - int size; - int ret; -- char buf1[NET_BUFSIZE]; -+ QEMU_UNINITIALIZED char buf1[NET_BUFSIZE]; - const char *buf; - - size = qio_channel_read(s->ioc, buf1, sizeof(buf1), NULL); --- -2.39.3 - diff --git a/kvm-qtest-migration-rdma-Add-test-for-rdma-migration-wit.patch b/kvm-qtest-migration-rdma-Add-test-for-rdma-migration-wit.patch deleted file mode 100644 index 33482c9..0000000 --- a/kvm-qtest-migration-rdma-Add-test-for-rdma-migration-wit.patch +++ /dev/null @@ -1,212 +0,0 @@ -From 879b050c6cef5cf2ae1944ffb8b203faeca62f1a Mon Sep 17 00:00:00 2001 -From: Li Zhijian -Date: Tue, 13 May 2025 09:22:07 +0800 -Subject: [PATCH 10/33] qtest/migration/rdma: Add test for rdma migration with - ipv6 - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/11] d2dd06604b461abf0c6d47dc860b1bb925889133 (pjp/cs-qemu-kvm) - -Recently, we removed ipv6 restriction[0] from RDMA migration, add a -test for it. - -[0] https://lore.kernel.org/qemu-devel/20250326095224.9918-1-jinpu.wang@ionos.com/ - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Cc: Jack Wang -Cc: Michael R. Galaxy -Cc: Peter Xu -Cc: Yu Zhang -Reviewed-by: Jack Wang -Signed-off-by: Li Zhijian -Link: https://lore.kernel.org/r/20250513012207.2867069-1-lizhijian@fujitsu.com -[peterx: Fix over long lines] -Signed-off-by: Peter Xu -(cherry picked from commit 6b84c46e8e0ef6f83f33657a29a8abb2b8362d02) -Signed-off-by: Prasad Pandit ---- - scripts/rdma-migration-helper.sh | 57 ++++++++++++++++++++++----- - tests/qtest/migration/precopy-tests.c | 21 ++++++++-- - 2 files changed, 65 insertions(+), 13 deletions(-) - -diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh -index a39f2fb0e5..d784d1566a 100755 ---- a/scripts/rdma-migration-helper.sh -+++ b/scripts/rdma-migration-helper.sh -@@ -8,23 +8,44 @@ get_ipv4_addr() - head -1 | tr -d '\n' - } - -+get_ipv6_addr() { -+ ipv6=$(ip -6 -o addr show dev "$1" | -+ sed -n 's/.*[[:blank:]]inet6[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' | -+ head -1 | tr -d '\n') -+ -+ [ $? -eq 0 ] || return -+ -+ if [[ "$ipv6" =~ ^fe80: ]]; then -+ echo -n "[$ipv6%$1]" -+ else -+ echo -n "[$ipv6]" -+ fi -+} -+ - # existing rdma interfaces - rdma_interfaces() - { -- rdma link show | sed -nE 's/^link .* netdev ([^ ]+).*$/\1 /p' -+ rdma link show | sed -nE 's/^link .* netdev ([^ ]+).*$/\1 /p' | -+ grep -Ev '^(lo|tun|tap)' - } - - # existing valid ipv4 interfaces - ipv4_interfaces() - { -- ip -o addr show | awk '/inet / {print $2}' | grep -v -w lo -+ ip -o addr show | awk '/inet / {print $2}' | grep -Ev '^(lo|tun|tap)' -+} -+ -+ipv6_interfaces() -+{ -+ ip -o addr show | awk '/inet6 / {print $2}' | grep -Ev '^(lo|tun|tap)' - } - - rdma_rxe_detect() - { -+ family=$1 - for r in $(rdma_interfaces) - do -- ipv4_interfaces | grep -qw $r && get_ipv4_addr $r && return -+ "$family"_interfaces | grep -qw $r && get_"$family"_addr $r && return - done - - return 1 -@@ -32,16 +53,23 @@ rdma_rxe_detect() - - rdma_rxe_setup() - { -- for i in $(ipv4_interfaces) -+ family=$1 -+ for i in $("$family"_interfaces) - do -- rdma_interfaces | grep -qw $i && continue -+ if rdma_interfaces | grep -qw $i; then -+ echo "$family: Reuse the existing rdma/rxe ${i}_rxe" \ -+ "for $i with $(get_"$family"_addr $i)" -+ return -+ fi -+ - rdma link add "${i}_rxe" type rxe netdev "$i" && { -- echo "Setup new rdma/rxe ${i}_rxe for $i with $(get_ipv4_addr $i)" -+ echo "$family: Setup new rdma/rxe ${i}_rxe" \ -+ "for $i with $(get_"$family"_addr $i)" - return - } - done - -- echo "Failed to setup any new rdma/rxe link" >&2 -+ echo "$family: Failed to setup any new rdma/rxe link" >&2 - return 1 - } - -@@ -50,6 +78,12 @@ rdma_rxe_clean() - modprobe -r rdma_rxe - } - -+IP_FAMILY=${IP_FAMILY:-ipv4} -+if [ "$IP_FAMILY" != "ipv6" ] && [ "$IP_FAMILY" != "ipv4" ]; then -+ echo "Unknown ip family '$IP_FAMILY', only ipv4 or ipv6 is supported." >&2 -+ exit 1 -+fi -+ - operation=${1:-detect} - - command -v rdma >/dev/null || { -@@ -62,9 +96,14 @@ if [ "$operation" == "setup" ] || [ "$operation" == "clean" ]; then - echo "Root privilege is required to setup/clean a rdma/rxe link" >&2 - exit 1 - } -- rdma_rxe_"$operation" -+ if [ "$operation" == "setup" ]; then -+ rdma_rxe_setup ipv4 -+ rdma_rxe_setup ipv6 -+ else -+ rdma_rxe_clean -+ fi - elif [ "$operation" == "detect" ]; then -- rdma_rxe_detect -+ rdma_rxe_detect "$IP_FAMILY" - else - echo "Usage: $0 [setup | detect | clean]" - fi -diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c -index 5be1cd5742..a62d3c5378 100644 ---- a/tests/qtest/migration/precopy-tests.c -+++ b/tests/qtest/migration/precopy-tests.c -@@ -131,12 +131,13 @@ static bool mlock_check(void) - } - - #define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh" --static int new_rdma_link(char *buffer) -+static int new_rdma_link(char *buffer, bool ipv6) - { - char cmd[256]; - bool verbose = g_getenv("QTEST_LOG"); - -- snprintf(cmd, sizeof(cmd), "%s detect %s", RDMA_MIGRATION_HELPER, -+ snprintf(cmd, sizeof(cmd), "IP_FAMILY=%s %s detect %s", -+ ipv6 ? "ipv6" : "ipv4", RDMA_MIGRATION_HELPER, - verbose ? "" : "2>/dev/null"); - - FILE *pipe = popen(cmd, "r"); -@@ -161,7 +162,7 @@ static int new_rdma_link(char *buffer) - return -1; - } - --static void test_precopy_rdma_plain(void) -+static void __test_precopy_rdma_plain(bool ipv6) - { - char buffer[128] = {}; - -@@ -170,7 +171,7 @@ static void test_precopy_rdma_plain(void) - return; - } - -- if (new_rdma_link(buffer)) { -+ if (new_rdma_link(buffer, ipv6)) { - g_test_skip("No rdma link available\n" - "# To enable the test:\n" - "# Run \'" RDMA_MIGRATION_HELPER " setup\' with root to " -@@ -193,6 +194,16 @@ static void test_precopy_rdma_plain(void) - - test_precopy_common(&args); - } -+ -+static void test_precopy_rdma_plain(void) -+{ -+ __test_precopy_rdma_plain(false); -+} -+ -+static void test_precopy_rdma_plain_ipv6(void) -+{ -+ __test_precopy_rdma_plain(true); -+} - #endif - - static void test_precopy_tcp_plain(void) -@@ -1226,6 +1237,8 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env) - #ifdef CONFIG_RDMA - migration_test_add("/migration/precopy/rdma/plain", - test_precopy_rdma_plain); -+ migration_test_add("/migration/precopy/rdma/plain/ipv6", -+ test_precopy_rdma_plain_ipv6); - #endif - } - --- -2.39.3 - diff --git a/kvm-qtest-migration-rdma-Enforce-RLIMIT_MEMLOCK-128MB-re.patch b/kvm-qtest-migration-rdma-Enforce-RLIMIT_MEMLOCK-128MB-re.patch deleted file mode 100644 index 88cceb0..0000000 --- a/kvm-qtest-migration-rdma-Enforce-RLIMIT_MEMLOCK-128MB-re.patch +++ /dev/null @@ -1,94 +0,0 @@ -From f65658024595c8fa58c5f9a6a8892d230e68e4c7 Mon Sep 17 00:00:00 2001 -From: Li Zhijian -Date: Fri, 9 May 2025 09:42:10 +0800 -Subject: [PATCH 09/33] qtest/migration/rdma: Enforce RLIMIT_MEMLOCK >= 128MB - requirement - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/11] bc8b4e4e55e119d1ba1e5567dfe69fe675a5f52e (pjp/cs-qemu-kvm) - -Ensure successful migration over RDMA by verifying that RLIMIT_MEMLOCK is -set to at least 128MB. This allocation is necessary due to the requirement -to pin significant portions of guest memory, typically exceeding 100MB -in this test, while the remainder is transmitted as compressed zero pages. - -Otherwise, it will fail with: -stderr: -qemu-system-x86_64: cannot get rkey -qemu-system-x86_64: error while loading state section id 2(ram) -qemu-system-x86_64: load of migration failed: Operation not permitted -qemu-system-x86_64: rdma migration: recv polling control error! -qemu-system-x86_64: RDMA is in an error state waiting migration to abort! -qemu-system-x86_64: failed to save SaveStateEntry with id(name): 2(ram): -1 -qemu-system-x86_64: Channel error: Operation not permitted - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Reported-by: Peter Xu -Signed-off-by: Li Zhijian -Link: https://lore.kernel.org/r/20250509014211.1272640-1-lizhijian@fujitsu.com -Signed-off-by: Peter Xu -(cherry picked from commit 7b2e4f788d60a8ec25efbf1e6bb6552ee0cef17c) -Signed-off-by: Prasad Pandit ---- - tests/qtest/migration/precopy-tests.c | 34 +++++++++++++++++++++++++++ - 1 file changed, 34 insertions(+) - -diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c -index 87b0a7e8ef..5be1cd5742 100644 ---- a/tests/qtest/migration/precopy-tests.c -+++ b/tests/qtest/migration/precopy-tests.c -@@ -101,6 +101,35 @@ static void test_precopy_unix_dirty_ring(void) - - #ifdef CONFIG_RDMA - -+#include -+ -+/* -+ * During migration over RDMA, it will try to pin portions of guest memory, -+ * typically exceeding 100MB in this test, while the remainder will be -+ * transmitted as compressed zero pages. -+ * -+ * REQUIRED_MEMLOCK_SZ indicates the minimal mlock size in the current context. -+ */ -+#define REQUIRED_MEMLOCK_SZ (128 << 20) /* 128MB */ -+ -+/* check 'ulimit -l' */ -+static bool mlock_check(void) -+{ -+ uid_t uid; -+ struct rlimit rlim; -+ -+ uid = getuid(); -+ if (uid == 0) { -+ return true; -+ } -+ -+ if (getrlimit(RLIMIT_MEMLOCK, &rlim) != 0) { -+ return false; -+ } -+ -+ return rlim.rlim_cur >= REQUIRED_MEMLOCK_SZ; -+} -+ - #define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh" - static int new_rdma_link(char *buffer) - { -@@ -136,6 +165,11 @@ static void test_precopy_rdma_plain(void) - { - char buffer[128] = {}; - -+ if (!mlock_check()) { -+ g_test_skip("'ulimit -l' is too small, require >=128M"); -+ return; -+ } -+ - if (new_rdma_link(buffer)) { - g_test_skip("No rdma link available\n" - "# To enable the test:\n" --- -2.39.3 - diff --git a/kvm-rbd-Fix-.bdrv_get_specific_info-implementation.patch b/kvm-rbd-Fix-.bdrv_get_specific_info-implementation.patch deleted file mode 100644 index 67f27e7..0000000 --- a/kvm-rbd-Fix-.bdrv_get_specific_info-implementation.patch +++ /dev/null @@ -1,273 +0,0 @@ -From 1fe4e3379f5f3ae4e5554b18b4b8c50fedd9203f Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 11 Aug 2025 15:40:10 +0200 -Subject: [PATCH] rbd: Fix .bdrv_get_specific_info implementation - -RH-Author: Kevin Wolf -RH-MergeRequest: 399: rbd: Fix .bdrv_get_specific_info implementation -RH-Jira: RHEL-105440 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/1] 840c954c84c97f768ae6cfb4aa0e2766b22a6f06 (kmwolf/centos-qemu-kvm) - -qemu_rbd_get_specific_info() has at least two problems: - -The first is that it issues a blocking rbd_read() call in order to probe -the encryption format for the image while querying the node. This means -that if the connection to the server goes down, not only I/O is stuck -(which is unavoidable), but query-names-block-nodes will actually make -the whole QEMU instance unresponsive. .bdrv_get_specific_info -implementations shouldn't perform blocking operations, but only return -what is already known. - -The second is that the information returned isn't even correct. If the -image is already opened with encryption enabled at the RBD level, we'll -probe for "double encryption", i.e. if the encrypted data contains -another encryption header. If it doesn't (which is the normal case), we -won't return the encryption format. If it does, we return misleading -information because it looks like we're talking about the outer level -(the encryption format of the image itself) while the information is -about an encryption header in the guest data. - -Fix this by storing the encryption format in BDRVRBDState when the image -is opened (and we do blocking operations anyway) and returning only the -stored information in qemu_rbd_get_specific_info(). - -The information we'll store is either the actual encryption format that -we enabled on the RBD level, or if the image is unencrypted, the result -of the same probing as we previously did when querying the node. Probing -image formats based on content that can be modified by the guest has -long been known as problematic, but as long as we only output it to the -user instead of making decisions based on it, it should be okay. It is -undoubtedly useful in the context of 'qemu-img info' when you're trying -to figure out which encryption options you have to use to open the -image successfully. - -Fixes: 42e4ac9ef5a6 ("block/rbd: Add support for rbd image encryption") -Buglink: https://issues.redhat.com/browse/RHEL-105440 -Signed-off-by: Kevin Wolf -Message-ID: <20250811134010.81787-1-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Signed-off-by: Kevin Wolf -(cherry picked from commit 4af976ef398e4e823addc00bf1c58787ba4952fe) -Signed-off-by: Kevin Wolf ---- - block/rbd.c | 104 ++++++++++++++++++++++++++++--------------- - qapi/block-core.json | 9 +++- - 2 files changed, 76 insertions(+), 37 deletions(-) - -diff --git a/block/rbd.c b/block/rbd.c -index 4f3d42a8e7..9b7b834f04 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -99,6 +99,14 @@ typedef struct BDRVRBDState { - char *namespace; - uint64_t image_size; - uint64_t object_size; -+ -+ /* -+ * If @bs->encrypted is true, this is the encryption format actually loaded -+ * at the librbd level. If it is false, it is the result of probing. -+ * RBD_IMAGE_ENCRYPTION_FORMAT__MAX means that encryption is not enabled and -+ * probing didn't find any known encryption header either. -+ */ -+ RbdImageEncryptionFormat encryption_format; - } BDRVRBDState; - - typedef struct RBDTask { -@@ -471,10 +479,12 @@ static int qemu_rbd_encryption_format(rbd_image_t image, - return 0; - } - --static int qemu_rbd_encryption_load(rbd_image_t image, -+static int qemu_rbd_encryption_load(BlockDriverState *bs, -+ rbd_image_t image, - RbdEncryptionOptions *encrypt, - Error **errp) - { -+ BDRVRBDState *s = bs->opaque; - int r = 0; - g_autofree char *passphrase = NULL; - rbd_encryption_luks1_format_options_t luks_opts; -@@ -545,15 +555,19 @@ static int qemu_rbd_encryption_load(rbd_image_t image, - error_setg_errno(errp, -r, "encryption load fail"); - return r; - } -+ bs->encrypted = true; -+ s->encryption_format = encrypt->format; - - return 0; - } - - #ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2 --static int qemu_rbd_encryption_load2(rbd_image_t image, -+static int qemu_rbd_encryption_load2(BlockDriverState *bs, -+ rbd_image_t image, - RbdEncryptionOptions *encrypt, - Error **errp) - { -+ BDRVRBDState *s = bs->opaque; - int r = 0; - int encrypt_count = 1; - int i; -@@ -639,6 +653,8 @@ static int qemu_rbd_encryption_load2(rbd_image_t image, - error_setg_errno(errp, -r, "layered encryption load fail"); - goto exit; - } -+ bs->encrypted = true; -+ s->encryption_format = encrypt->format; - - exit: - for (i = 0; i < encrypt_count; ++i) { -@@ -672,6 +688,45 @@ exit: - #endif - #endif - -+/* -+ * For an image without encryption enabled on the rbd layer, probe the start of -+ * the image if it could be opened as an encrypted image so that we can display -+ * it when the user queries the node (most importantly in qemu-img). -+ * -+ * If the guest writes an encryption header to its disk after this probing, this -+ * won't be reflected when queried, but that's okay. There is no reason why the -+ * user should want to apply encryption at the rbd level while the image is -+ * still in use. This is just guest data. -+ */ -+static void qemu_rbd_encryption_probe(BlockDriverState *bs) -+{ -+ BDRVRBDState *s = bs->opaque; -+ char buf[RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {0}; -+ int r; -+ -+ assert(s->encryption_format == RBD_IMAGE_ENCRYPTION_FORMAT__MAX); -+ -+ r = rbd_read(s->image, 0, -+ RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN, buf); -+ if (r < RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) { -+ return; -+ } -+ -+ if (memcmp(buf, rbd_luks_header_verification, -+ RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) { -+ s->encryption_format = RBD_IMAGE_ENCRYPTION_FORMAT_LUKS; -+ } else if (memcmp(buf, rbd_luks2_header_verification, -+ RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) { -+ s->encryption_format = RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2; -+ } else if (memcmp(buf, rbd_layered_luks_header_verification, -+ RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) { -+ s->encryption_format = RBD_IMAGE_ENCRYPTION_FORMAT_LUKS; -+ } else if (memcmp(buf, rbd_layered_luks2_header_verification, -+ RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) { -+ s->encryption_format = RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2; -+ } -+} -+ - /* FIXME Deprecate and remove keypairs or make it available in QMP. */ - static int qemu_rbd_do_create(BlockdevCreateOptions *options, - const char *keypairs, const char *password_secret, -@@ -1134,17 +1189,18 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, - goto failed_open; - } - -+ s->encryption_format = RBD_IMAGE_ENCRYPTION_FORMAT__MAX; - if (opts->encrypt) { - #ifdef LIBRBD_SUPPORTS_ENCRYPTION - if (opts->encrypt->parent) { - #ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2 -- r = qemu_rbd_encryption_load2(s->image, opts->encrypt, errp); -+ r = qemu_rbd_encryption_load2(bs, s->image, opts->encrypt, errp); - #else - r = -ENOTSUP; - error_setg(errp, "RBD library does not support layered encryption"); - #endif - } else { -- r = qemu_rbd_encryption_load(s->image, opts->encrypt, errp); -+ r = qemu_rbd_encryption_load(bs, s->image, opts->encrypt, errp); - } - if (r < 0) { - goto failed_post_open; -@@ -1154,6 +1210,8 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, - error_setg(errp, "RBD library does not support image encryption"); - goto failed_post_open; - #endif -+ } else { -+ qemu_rbd_encryption_probe(bs); - } - - r = rbd_stat(s->image, &info, sizeof(info)); -@@ -1413,17 +1471,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs, - { - BDRVRBDState *s = bs->opaque; - ImageInfoSpecific *spec_info; -- char buf[RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {0}; -- int r; -- -- if (s->image_size >= RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) { -- r = rbd_read(s->image, 0, -- RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN, buf); -- if (r < 0) { -- error_setg_errno(errp, -r, "cannot read image start for probe"); -- return NULL; -- } -- } - - spec_info = g_new(ImageInfoSpecific, 1); - *spec_info = (ImageInfoSpecific){ -@@ -1431,28 +1478,13 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs, - .u.rbd.data = g_new0(ImageInfoSpecificRbd, 1), - }; - -- if (memcmp(buf, rbd_luks_header_verification, -- RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) { -- spec_info->u.rbd.data->encryption_format = -- RBD_IMAGE_ENCRYPTION_FORMAT_LUKS; -- spec_info->u.rbd.data->has_encryption_format = true; -- } else if (memcmp(buf, rbd_luks2_header_verification, -- RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) { -- spec_info->u.rbd.data->encryption_format = -- RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2; -- spec_info->u.rbd.data->has_encryption_format = true; -- } else if (memcmp(buf, rbd_layered_luks_header_verification, -- RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) { -- spec_info->u.rbd.data->encryption_format = -- RBD_IMAGE_ENCRYPTION_FORMAT_LUKS; -- spec_info->u.rbd.data->has_encryption_format = true; -- } else if (memcmp(buf, rbd_layered_luks2_header_verification, -- RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) { -- spec_info->u.rbd.data->encryption_format = -- RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2; -- spec_info->u.rbd.data->has_encryption_format = true; -+ if (s->encryption_format == RBD_IMAGE_ENCRYPTION_FORMAT__MAX) { -+ assert(!bs->encrypted); - } else { -- spec_info->u.rbd.data->has_encryption_format = false; -+ ImageInfoSpecificRbd *rbd_info = spec_info->u.rbd.data; -+ -+ rbd_info->has_encryption_format = true; -+ rbd_info->encryption_format = s->encryption_format; - } - - return spec_info; -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 7f70ec6d3c..d00161af87 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -158,7 +158,14 @@ - ## - # @ImageInfoSpecificRbd: - # --# @encryption-format: Image encryption format -+# @encryption-format: Image encryption format. If encryption is enabled for the -+# image (see encrypted in BlockNodeInfo), this is the actual format in which the -+# image is accessed. If encryption is not enabled, this is the result of -+# probing when the image was opened, to give a suggestion which encryption -+# format could be enabled. Note that probing results can be changed by the -+# guest by writing a (possibly partial) encryption format header to the -+# image, so don't treat this information as trusted if the guest is not -+# trusted. - # - # Since: 6.1 - ## --- -2.39.3 - diff --git a/kvm-s390x-Fix-leak-in-machine_set_loadparm.patch b/kvm-s390x-Fix-leak-in-machine_set_loadparm.patch deleted file mode 100644 index 18ac593..0000000 --- a/kvm-s390x-Fix-leak-in-machine_set_loadparm.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 24605a535cf850b11fd5910045d5a9a494a0857d Mon Sep 17 00:00:00 2001 -From: Fabiano Rosas -Date: Fri, 9 May 2025 14:49:38 -0300 -Subject: [PATCH 01/43] s390x: Fix leak in machine_set_loadparm -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 386: s390x: Fix memory leaks related to loadparm [rhel-10] -RH-Jira: RHEL-98555 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Kevin Wolf -RH-Commit: [1/2] 04997385648ba61cc55c1c6a11ba553b65bf9fe9 (thuth/qemu-kvm-cs) - -ASAN spotted a leaking string in machine_set_loadparm(): - -Direct leak of 9 byte(s) in 1 object(s) allocated from: - #0 0x560ffb5bb379 in malloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:69:3 - #1 0x7f1aca926518 in g_malloc ../glib/gmem.c:106 - #2 0x7f1aca94113e in g_strdup ../glib/gstrfuncs.c:364 - #3 0x560ffc8afbf9 in qobject_input_type_str ../qapi/qobject-input-visitor.c:542:12 - #4 0x560ffc8a80ff in visit_type_str ../qapi/qapi-visit-core.c:349:10 - #5 0x560ffbe6053a in machine_set_loadparm ../hw/s390x/s390-virtio-ccw.c:802:10 - #6 0x560ffc0c5e52 in object_property_set ../qom/object.c:1450:5 - #7 0x560ffc0d4175 in object_property_set_qobject ../qom/qom-qobject.c:28:10 - #8 0x560ffc0c6004 in object_property_set_str ../qom/object.c:1458:15 - #9 0x560ffbe2ae60 in update_machine_ipl_properties ../hw/s390x/ipl.c:569:9 - #10 0x560ffbe2aa65 in s390_ipl_update_diag308 ../hw/s390x/ipl.c:594:5 - #11 0x560ffbdee132 in handle_diag_308 ../target/s390x/diag.c:147:9 - #12 0x560ffbebb956 in helper_diag ../target/s390x/tcg/misc_helper.c:137:9 - #13 0x7f1a3c51c730 (/memfd:tcg-jit (deleted)+0x39730) - -Cc: qemu-stable@nongnu.org -Signed-off-by: Fabiano Rosas -Message-ID: <20250509174938.25935-1-farosas@suse.de> -Fixes: 1fd396e3228 ("s390x: Register TYPE_S390_CCW_MACHINE properties as class properties") -Reviewed-by: Thomas Huth -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth -(cherry picked from commit bdf12f2a56bf3f13c52eb51f0a994bbfe40706b2) ---- - hw/s390x/s390-virtio-ccw.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 59b545740e..dd3fc13683 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -804,6 +804,7 @@ static void machine_set_loadparm(Object *obj, Visitor *v, - } - - s390_ipl_fmt_loadparm(ms->loadparm, val, errp); -+ g_free(val); - } - - static void ccw_machine_class_init(ObjectClass *oc, void *data) --- -2.39.3 - diff --git a/kvm-scsi-disk-Add-native-FUA-write-support.patch b/kvm-scsi-disk-Add-native-FUA-write-support.patch deleted file mode 100644 index 895c735..0000000 --- a/kvm-scsi-disk-Add-native-FUA-write-support.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 776febe2ec5735280ea562309cddf54b1b93a673 Mon Sep 17 00:00:00 2001 -From: Alberto Faria -Date: Fri, 2 May 2025 13:11:14 +0100 -Subject: [PATCH 1/5] scsi-disk: Add native FUA write support - -RH-Author: Alberto Faria -RH-MergeRequest: 374: scsi-disk: Add FUA write support -RH-Jira: RHEL-71962 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/1] c51c618a2d8f027422b603b9ced89d000cbf5fa1 - -Simply propagate the FUA flag on write requests to the driver. The block -layer will emulate it if necessary. - -Signed-off-by: Alberto Faria -Message-ID: <20250502121115.3613717-2-afaria@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit e460991883d7209d52d0fdb534d9cd8cce0f9cce) -Signed-off-by: Alberto Faria ---- - hw/scsi/scsi-disk.c | 53 +++++++++++++-------------------------------- - 1 file changed, 15 insertions(+), 38 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index e59632e9b1..40cf379017 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -74,7 +74,7 @@ struct SCSIDiskClass { - */ - DMAIOFunc *dma_readv; - DMAIOFunc *dma_writev; -- bool (*need_fua_emulation)(SCSICommand *cmd); -+ bool (*need_fua)(SCSICommand *cmd); - void (*update_sense)(SCSIRequest *r); - }; - -@@ -85,7 +85,7 @@ typedef struct SCSIDiskReq { - uint32_t sector_count; - uint32_t buflen; - bool started; -- bool need_fua_emulation; -+ bool need_fua; - struct iovec iov; - QEMUIOVector qiov; - BlockAcctCookie acct; -@@ -389,24 +389,6 @@ static bool scsi_is_cmd_fua(SCSICommand *cmd) - } - } - --static void scsi_write_do_fua(SCSIDiskReq *r) --{ -- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); -- -- assert(r->req.aiocb == NULL); -- assert(!r->req.io_canceled); -- -- if (r->need_fua_emulation) { -- block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, -- BLOCK_ACCT_FLUSH); -- r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r); -- return; -- } -- -- scsi_req_complete(&r->req, GOOD); -- scsi_req_unref(&r->req); --} -- - static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) - { - assert(r->req.aiocb == NULL); -@@ -416,12 +398,7 @@ static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) - - r->sector += r->sector_count; - r->sector_count = 0; -- if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { -- scsi_write_do_fua(r); -- return; -- } else { -- scsi_req_complete(&r->req, GOOD); -- } -+ scsi_req_complete(&r->req, GOOD); - - done: - scsi_req_unref(&r->req); -@@ -564,7 +541,7 @@ static void scsi_read_data(SCSIRequest *req) - - first = !r->started; - r->started = true; -- if (first && r->need_fua_emulation) { -+ if (first && r->need_fua) { - block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, - BLOCK_ACCT_FLUSH); - r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r); -@@ -589,8 +566,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) - r->sector += n; - r->sector_count -= n; - if (r->sector_count == 0) { -- scsi_write_do_fua(r); -- return; -+ scsi_req_complete(&r->req, GOOD); - } else { - scsi_init_iovec(r, SCSI_DMA_BUF_SIZE); - trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size); -@@ -623,6 +599,7 @@ static void scsi_write_data(SCSIRequest *req) - SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); -+ BlockCompletionFunc *cb; - - /* No data transfer may already be in progress */ - assert(r->req.aiocb == NULL); -@@ -648,11 +625,10 @@ static void scsi_write_data(SCSIRequest *req) - - if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 || - r->req.cmd.buf[0] == VERIFY_16) { -- if (r->req.sg) { -- scsi_dma_complete_noio(r, 0); -- } else { -- scsi_write_complete_noio(r, 0); -- } -+ block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, -+ BLOCK_ACCT_FLUSH); -+ cb = r->req.sg ? scsi_dma_complete : scsi_write_complete; -+ r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, cb, r); - return; - } - -@@ -2391,7 +2367,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf) - scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE)); - return 0; - } -- r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd); -+ r->need_fua = sdc->need_fua(&r->req.cmd); - if (r->sector_count == 0) { - scsi_req_complete(&r->req, GOOD); - } -@@ -3137,7 +3113,8 @@ BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov, - { - SCSIDiskReq *r = opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); -- return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque); -+ int flags = r->need_fua ? BDRV_REQ_FUA : 0; -+ return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, flags, cb, cb_opaque); - } - - static char *scsi_property_get_loadparm(Object *obj, Error **errp) -@@ -3186,7 +3163,7 @@ static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data) - device_class_set_legacy_reset(dc, scsi_disk_reset); - sdc->dma_readv = scsi_dma_readv; - sdc->dma_writev = scsi_dma_writev; -- sdc->need_fua_emulation = scsi_is_cmd_fua; -+ sdc->need_fua = scsi_is_cmd_fua; - } - - static const TypeInfo scsi_disk_base_info = { -@@ -3338,7 +3315,7 @@ static void scsi_block_class_initfn(ObjectClass *klass, void *data) - sdc->dma_readv = scsi_block_dma_readv; - sdc->dma_writev = scsi_block_dma_writev; - sdc->update_sense = scsi_block_update_sense; -- sdc->need_fua_emulation = scsi_block_no_fua; -+ sdc->need_fua = scsi_block_no_fua; - dc->desc = "SCSI block device passthrough"; - device_class_set_props(dc, scsi_block_properties); - dc->vmsd = &vmstate_scsi_disk_state; --- -2.39.3 - diff --git a/kvm-target-i386-Add-couple-of-feature-bits-in-CPUID_Fn80.patch b/kvm-target-i386-Add-couple-of-feature-bits-in-CPUID_Fn80.patch deleted file mode 100644 index 9199986..0000000 --- a/kvm-target-i386-Add-couple-of-feature-bits-in-CPUID_Fn80.patch +++ /dev/null @@ -1,81 +0,0 @@ -From d2fcec417c3ce57ef25f8eea957a5fef9c2866bf Mon Sep 17 00:00:00 2001 -From: Babu Moger -Date: Thu, 8 May 2025 14:58:02 -0500 -Subject: [PATCH 06/43] target/i386: Add couple of feature bits in - CPUID_Fn80000021_EAX - -RH-Author: John Allen -RH-MergeRequest: 377: Update EPYC Models and Feature Bits -RH-Jira: RHEL-52650 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/6] 568d79477d85be653fa580d329495193c24e1500 (johnalle/qemu-kvm-fork) - -Add CPUID bit indicates that a WRMSR to MSR_FS_BASE, MSR_GS_BASE, or -MSR_KERNEL_GS_BASE is non-serializing amd PREFETCHI that the indicates -support for IC prefetch. - -CPUID_Fn80000021_EAX -Bit Feature description -20 Indicates support for IC prefetch. -1 FsGsKernelGsBaseNonSerializing. - WRMSR to FS_BASE, GS_BASE and KernelGSbase are non-serializing. - -Link: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/programmer-references/57238.zip -Signed-off-by: Babu Moger -Reviewed-by: Maksim Davydov -Reviewed-by: Zhao Liu -Link: https://lore.kernel.org/r/a5f6283a59579b09ac345b3f21ecb3b3b2d92451.1746734284.git.babu.moger@amd.com -Signed-off-by: Paolo Bonzini -(cherry picked from commit dfd5b456108a75588ab094358ba5754787146d3d) - -JIRA: https://issues.redhat.com/browse/RHEL-52650 - -Signed-off-by: John Allen ---- - target/i386/cpu.c | 4 ++-- - target/i386/cpu.h | 4 ++++ - 2 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index b3052c6c2c..dd16f885cf 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1237,12 +1237,12 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - [FEAT_8000_0021_EAX] = { - .type = CPUID_FEATURE_WORD, - .feat_names = { -- "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, -+ "no-nested-data-bp", "fs-gs-base-ns", "lfence-always-serializing", NULL, - NULL, NULL, "null-sel-clr-base", NULL, - "auto-ibrs", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -+ "prefetchi", NULL, NULL, NULL, - "eraps", NULL, NULL, "sbpb", - "ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL, - }, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 76f24446a5..d7dcd59b2d 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -1074,12 +1074,16 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); - - /* Processor ignores nested data breakpoints */ - #define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) -+/* WRMSR to FS_BASE, GS_BASE, or KERNEL_GS_BASE is non-serializing */ -+#define CPUID_8000_0021_EAX_FS_GS_BASE_NS (1U << 1) - /* LFENCE is always serializing */ - #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) - /* Null Selector Clears Base */ - #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) - /* Automatic IBRS */ - #define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) -+/* Indicates support for IC prefetch */ -+#define CPUID_8000_0021_EAX_PREFETCHI (1U << 20) - /* Enhanced Return Address Predictor Scurity */ - #define CPUID_8000_0021_EAX_ERAPS (1U << 24) - /* Selective Branch Predictor Barrier */ --- -2.39.3 - diff --git a/kvm-target-i386-Add-support-for-EPYC-Turin-model.patch b/kvm-target-i386-Add-support-for-EPYC-Turin-model.patch deleted file mode 100644 index 4afe216..0000000 --- a/kvm-target-i386-Add-support-for-EPYC-Turin-model.patch +++ /dev/null @@ -1,200 +0,0 @@ -From 6a86e17bd55a867b903e14a6cff3d985a046a794 Mon Sep 17 00:00:00 2001 -From: Babu Moger -Date: Thu, 8 May 2025 14:58:04 -0500 -Subject: [PATCH 08/43] target/i386: Add support for EPYC-Turin model - -RH-Author: John Allen -RH-MergeRequest: 377: Update EPYC Models and Feature Bits -RH-Jira: RHEL-52650 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/6] 3d8805bafc53da2bd5a2b2b96cc892ea7ca9f2b3 (johnalle/qemu-kvm-fork) - -Add the support for AMD EPYC zen 5 processors (EPYC-Turin). - -Add the following new feature bits on top of the feature bits from -the previous generation EPYC models. - -movdiri : Move Doubleword as Direct Store Instruction -movdir64b : Move 64 Bytes as Direct Store Instruction -avx512-vp2intersect : AVX512 Vector Pair Intersection to a Pair - of Mask Register -avx-vnni : AVX VNNI Instruction -prefetchi : Indicates support for IC prefetch -sbpb : Selective Branch Predictor Barrier -ibpb-brtype : IBPB includes branch type prediction flushing -srso-user-kernel-no : Not vulnerable to SRSO at the user-kernel boundary - -Link: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/programmer-references/57238.zip -Link: https://www.amd.com/content/dam/amd/en/documents/corporate/cr/speculative-return-stack-overflow-whitepaper.pdf -Signed-off-by: Babu Moger -Reviewed-by: Zhao Liu -Link: https://lore.kernel.org/r/b4fa7708a0e1453d2e9b8ec3dc881feb92eeca0b.1746734284.git.babu.moger@amd.com -Signed-off-by: Paolo Bonzini -(cherry picked from commit 3771a4daa273ba17cb27309984413790d1df5651) - -JIRA: https://issues.redhat.com/browse/RHEL-52650 - -Signed-off-by: John Allen ---- - target/i386/cpu.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 138 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 523ac08a07..3e31999d19 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2668,6 +2668,61 @@ static const CPUCaches epyc_genoa_v2_cache_info = { - .share_level = CPU_TOPOLOGY_LEVEL_DIE, - }, - }; -+ -+static const CPUCaches epyc_turin_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 48 * KiB, -+ .line_size = 64, -+ .associativity = 12, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 1 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .no_invd_sharing = true, -+ .complex_indexing = false, -+ .share_level = CPU_TOPOLOGY_LEVEL_DIE, -+ }, -+}; -+ - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -5937,6 +5992,89 @@ static const X86CPUDefinition builtin_x86_defs[] = { - .model_id = "Zhaoxin YongFeng Processor", - }, - #endif -+ { -+ .name = "EPYC-Turin", -+ .level = 0xd, -+ .vendor = CPUID_VENDOR_AMD, -+ .family = 26, -+ .model = 0, -+ .stepping = 0, -+ .features[FEAT_1_ECX] = -+ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | -+ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | -+ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | -+ CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | -+ CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | -+ CPUID_EXT_SSE3, -+ .features[FEAT_1_EDX] = -+ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | -+ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | -+ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | -+ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | -+ CPUID_VME | CPUID_FP87, -+ .features[FEAT_6_EAX] = -+ CPUID_6_EAX_ARAT, -+ .features[FEAT_7_0_EBX] = -+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | -+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | -+ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | -+ CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | -+ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | -+ CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | -+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, -+ .features[FEAT_7_0_ECX] = -+ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | -+ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | -+ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | -+ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | -+ CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | -+ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_MOVDIRI | -+ CPUID_7_0_ECX_MOVDIR64B, -+ .features[FEAT_7_0_EDX] = -+ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_AVX512_VP2INTERSECT, -+ .features[FEAT_7_1_EAX] = -+ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16, -+ .features[FEAT_8000_0001_ECX] = -+ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | -+ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | -+ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, -+ .features[FEAT_8000_0001_EDX] = -+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | -+ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | -+ CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0007_EBX] = -+ CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR, -+ .features[FEAT_8000_0008_EBX] = -+ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | -+ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | -+ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | -+ CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | -+ CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, -+ .features[FEAT_8000_0021_EAX] = -+ CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | -+ CPUID_8000_0021_EAX_FS_GS_BASE_NS | -+ CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | -+ CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | -+ CPUID_8000_0021_EAX_AUTO_IBRS | CPUID_8000_0021_EAX_PREFETCHI | -+ CPUID_8000_0021_EAX_SBPB | CPUID_8000_0021_EAX_IBPB_BRTYPE | -+ CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO, -+ .features[FEAT_8000_0022_EAX] = -+ CPUID_8000_0022_EAX_PERFMON_V2, -+ .features[FEAT_XSAVE] = -+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | -+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, -+ .features[FEAT_SVM] = -+ CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE | -+ CPUID_SVM_TSCSCALE | CPUID_SVM_VMCBCLEAN | CPUID_SVM_FLUSHASID | -+ CPUID_SVM_PAUSEFILTER | CPUID_SVM_PFTHRESHOLD | -+ CPUID_SVM_V_VMSAVE_VMLOAD | CPUID_SVM_VGIF | -+ CPUID_SVM_VNMI | CPUID_SVM_SVME_ADDR_CHK, -+ .xlevel = 0x80000022, -+ .model_id = "AMD EPYC-Turin Processor", -+ .cache_info = &epyc_turin_cache_info, -+ }, - }; - - /* --- -2.39.3 - diff --git a/kvm-target-i386-Update-EPYC-CPU-model-for-Cache-property.patch b/kvm-target-i386-Update-EPYC-CPU-model-for-Cache-property.patch deleted file mode 100644 index 623736f..0000000 --- a/kvm-target-i386-Update-EPYC-CPU-model-for-Cache-property.patch +++ /dev/null @@ -1,147 +0,0 @@ -From bec357aa1a80983969a1bb8524d4d649dd5ca355 Mon Sep 17 00:00:00 2001 -From: Babu Moger -Date: Thu, 8 May 2025 14:57:59 -0500 -Subject: [PATCH 03/43] target/i386: Update EPYC CPU model for Cache property, - RAS, SVM feature bits - -RH-Author: John Allen -RH-MergeRequest: 377: Update EPYC Models and Feature Bits -RH-Jira: RHEL-52650 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/6] 4d46c1c5cc85b8c5085fe1089ee34a03fcc24b71 (johnalle/qemu-kvm-fork) - -Found that some of the cache properties are not set correctly for EPYC models. - -l1d_cache.no_invd_sharing should not be true. -l1i_cache.no_invd_sharing should not be true. - -L2.self_init should be true. -L2.inclusive should be true. - -L3.inclusive should not be true. -L3.no_invd_sharing should be true. - -Fix the cache properties. - -Also add the missing RAS and SVM features bits on AMD -EPYC CPU models. The SVM feature bits are used in nested guests. - -succor : Software uncorrectable error containment and recovery capability. -overflow-recov : MCA overflow recovery support. -lbrv : LBR virtualization -tsc-scale : MSR based TSC rate control -vmcb-clean : VMCB clean bits -flushbyasid : Flush by ASID -pause-filter : Pause intercept filter -pfthreshold : PAUSE filter threshold -v-vmsave-vmload : Virtualized VMLOAD and VMSAVE -vgif : Virtualized GIF - -Signed-off-by: Babu Moger -Reviewed-by: Maksim Davydov -Reviewed-by: Zhao Liu -Link: https://lore.kernel.org/r/515941861700d7066186c9600bc5d96a1741ef0c.1746734284.git.babu.moger@amd.com -Signed-off-by: Paolo Bonzini -(cherry picked from commit 397db937e85d7b9f5a6f0b30764786cef09d1ff3) - -JIRA: https://issues.redhat.com/browse/RHEL-52650 - -Signed-off-by: John Allen ---- - target/i386/cpu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 73 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 0b09990a8f..e588f81b2e 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2183,6 +2183,60 @@ static CPUCaches epyc_v4_cache_info = { - }, - }; - -+static CPUCaches epyc_v5_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 64 * KiB, -+ .line_size = 64, -+ .associativity = 4, -+ .partitions = 1, -+ .sets = 256, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 8 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 8192, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .no_invd_sharing = true, -+ .complex_indexing = false, -+ .share_level = CPU_TOPOLOGY_LEVEL_DIE, -+ }, -+}; -+ - static const CPUCaches epyc_rome_cache_info = { - .l1d_cache = &(CPUCacheInfo) { - .type = DATA_CACHE, -@@ -5226,6 +5280,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - .cache_info = &epyc_v4_cache_info - }, -+ { -+ .version = 5, -+ .props = (PropValue[]) { -+ { "overflow-recov", "on" }, -+ { "succor", "on" }, -+ { "lbrv", "on" }, -+ { "tsc-scale", "on" }, -+ { "vmcb-clean", "on" }, -+ { "flushbyasid", "on" }, -+ { "pause-filter", "on" }, -+ { "pfthreshold", "on" }, -+ { "v-vmsave-vmload", "on" }, -+ { "vgif", "on" }, -+ { "model-id", -+ "AMD EPYC-v5 Processor" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_v5_cache_info -+ }, - { /* end of list */ } - } - }, --- -2.39.3 - diff --git a/kvm-target-i386-Update-EPYC-Genoa-for-Cache-property-per.patch b/kvm-target-i386-Update-EPYC-Genoa-for-Cache-property-per.patch deleted file mode 100644 index 7f9ed58..0000000 --- a/kvm-target-i386-Update-EPYC-Genoa-for-Cache-property-per.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 9e8cee2f5b79e7769533b0db15cddb98acb3fabe Mon Sep 17 00:00:00 2001 -From: Babu Moger -Date: Thu, 8 May 2025 14:58:03 -0500 -Subject: [PATCH 07/43] target/i386: Update EPYC-Genoa for Cache property, - perfmon-v2, RAS and SVM feature bits - -RH-Author: John Allen -RH-MergeRequest: 377: Update EPYC Models and Feature Bits -RH-Jira: RHEL-52650 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/6] 1341ffa8eeda12c964d793c7c99e56c114796f04 (johnalle/qemu-kvm-fork) - -Found that some of the cache properties are not set correctly for EPYC models. -l1d_cache.no_invd_sharing should not be true. -l1i_cache.no_invd_sharing should not be true. - -L2.self_init should be true. -L2.inclusive should be true. - -L3.inclusive should not be true. -L3.no_invd_sharing should be true. - -Fix these cache properties. - -Also add the missing RAS and SVM features bits on AMD EPYC-Genoa model. -The SVM feature bits are used in nested guests. - -perfmon-v2 : Allow guests to make use of the PerfMonV2 features. -succor : Software uncorrectable error containment and recovery capability. -overflow-recov : MCA overflow recovery support. -lbrv : LBR virtualization -tsc-scale : MSR based TSC rate control -vmcb-clean : VMCB clean bits -flushbyasid : Flush by ASID -pause-filter : Pause intercept filter -pfthreshold : PAUSE filter threshold -v-vmsave-vmload: Virtualized VMLOAD and VMSAVE -vgif : Virtualized GIF -fs-gs-base-ns : WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing - -The feature details are available in APM listed below [1]. -[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming -Publication # 24593 Revision 3.41. - -Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 -Signed-off-by: Babu Moger -Reviewed-by: Maksim Davydov -Reviewed-by: Zhao Liu -Link: https://lore.kernel.org/r/afe3f05d4116124fd5795f28fc23d7b396140313.1746734284.git.babu.moger@amd.com -Signed-off-by: Paolo Bonzini -(cherry picked from commit abc92cc8488b5dbcc403b5be24d8092180605101) - -JIRA: https://issues.redhat.com/browse/RHEL-52650 - -Signed-off-by: John Allen ---- - target/i386/cpu.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 78 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index dd16f885cf..523ac08a07 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2615,6 +2615,59 @@ static const CPUCaches epyc_genoa_cache_info = { - }, - }; - -+static const CPUCaches epyc_genoa_v2_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 1 * MiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 2048, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .no_invd_sharing = true, -+ .complex_indexing = false, -+ .share_level = CPU_TOPOLOGY_LEVEL_DIE, -+ }, -+}; - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -5732,6 +5785,31 @@ static const X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x80000022, - .model_id = "AMD EPYC-Genoa Processor", - .cache_info = &epyc_genoa_cache_info, -+ .versions = (X86CPUVersionDefinition[]) { -+ { .version = 1 }, -+ { -+ .version = 2, -+ .props = (PropValue[]) { -+ { "overflow-recov", "on" }, -+ { "succor", "on" }, -+ { "lbrv", "on" }, -+ { "tsc-scale", "on" }, -+ { "vmcb-clean", "on" }, -+ { "flushbyasid", "on" }, -+ { "pause-filter", "on" }, -+ { "pfthreshold", "on" }, -+ { "v-vmsave-vmload", "on" }, -+ { "vgif", "on" }, -+ { "fs-gs-base-ns", "on" }, -+ { "perfmon-v2", "on" }, -+ { "model-id", -+ "AMD EPYC-Genoa-v2 Processor" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_genoa_v2_cache_info -+ }, -+ { /* end of list */ } -+ } - }, - #if 0 // Disabled for Red Hat Enterprise Linux - { --- -2.39.3 - diff --git a/kvm-target-i386-Update-EPYC-Milan-CPU-model-for-Cache-pr.patch b/kvm-target-i386-Update-EPYC-Milan-CPU-model-for-Cache-pr.patch deleted file mode 100644 index 8457fad..0000000 --- a/kvm-target-i386-Update-EPYC-Milan-CPU-model-for-Cache-pr.patch +++ /dev/null @@ -1,146 +0,0 @@ -From 18fc3436b689504c353e92a0e010fc3bb6a5a4af Mon Sep 17 00:00:00 2001 -From: Babu Moger -Date: Thu, 8 May 2025 14:58:01 -0500 -Subject: [PATCH 05/43] target/i386: Update EPYC-Milan CPU model for Cache - property, RAS, SVM feature bits - -RH-Author: John Allen -RH-MergeRequest: 377: Update EPYC Models and Feature Bits -RH-Jira: RHEL-52650 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/6] f6f2a437a158296e884da0f4deac28d58d15ffae (johnalle/qemu-kvm-fork) - -Found that some of the cache properties are not set correctly for EPYC models. -l1d_cache.no_invd_sharing should not be true. -l1i_cache.no_invd_sharing should not be true. - -L2.self_init should be true. -L2.inclusive should be true. - -L3.inclusive should not be true. -L3.no_invd_sharing should be true. - -Fix these cache properties. - -Also add the missing RAS and SVM features bits on AMD EPYC-Milan model. -The SVM feature bits are used in nested guests. - -succor : Software uncorrectable error containment and recovery capability. -overflow-recov : MCA overflow recovery support. -lbrv : LBR virtualization -tsc-scale : MSR based TSC rate control -vmcb-clean : VMCB clean bits -flushbyasid : Flush by ASID -pause-filter : Pause intercept filter -pfthreshold : PAUSE filter threshold -v-vmsave-vmload : Virtualized VMLOAD and VMSAVE -vgif : Virtualized GIF - -Signed-off-by: Babu Moger -Reviewed-by: Maksim Davydov -Reviewed-by: Zhao Liu -Link: https://lore.kernel.org/r/c619c0e09a9d5d496819ed48d69181d65f416891.1746734284.git.babu.moger@amd.com -Signed-off-by: Paolo Bonzini -(cherry picked from commit fc014d9ba5b26b27401e0e88a4e1ef827c68fe64) - -JIRA: https://issues.redhat.com/browse/RHEL-52650 - -Signed-off-by: John Allen ---- - target/i386/cpu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 73 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 403c72ca60..b3052c6c2c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2507,6 +2507,60 @@ static const CPUCaches epyc_milan_v2_cache_info = { - }, - }; - -+static const CPUCaches epyc_milan_v3_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .no_invd_sharing = true, -+ .complex_indexing = false, -+ .share_level = CPU_TOPOLOGY_LEVEL_DIE, -+ }, -+}; -+ - static const CPUCaches epyc_genoa_cache_info = { - .l1d_cache = &(CPUCacheInfo) { - .type = DATA_CACHE, -@@ -5585,6 +5639,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - .cache_info = &epyc_milan_v2_cache_info - }, -+ { -+ .version = 3, -+ .props = (PropValue[]) { -+ { "overflow-recov", "on" }, -+ { "succor", "on" }, -+ { "lbrv", "on" }, -+ { "tsc-scale", "on" }, -+ { "vmcb-clean", "on" }, -+ { "flushbyasid", "on" }, -+ { "pause-filter", "on" }, -+ { "pfthreshold", "on" }, -+ { "v-vmsave-vmload", "on" }, -+ { "vgif", "on" }, -+ { "model-id", -+ "AMD EPYC-Milan-v3 Processor" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_milan_v3_cache_info -+ }, - { /* end of list */ } - } - }, --- -2.39.3 - diff --git a/kvm-target-i386-Update-EPYC-Rome-CPU-model-for-Cache-pro.patch b/kvm-target-i386-Update-EPYC-Rome-CPU-model-for-Cache-pro.patch deleted file mode 100644 index 7d454bf..0000000 --- a/kvm-target-i386-Update-EPYC-Rome-CPU-model-for-Cache-pro.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 3f18ecff6d6cc0601662fa281551881fdf49d33a Mon Sep 17 00:00:00 2001 -From: Babu Moger -Date: Thu, 8 May 2025 14:58:00 -0500 -Subject: [PATCH 04/43] target/i386: Update EPYC-Rome CPU model for Cache - property, RAS, SVM feature bits - -RH-Author: John Allen -RH-MergeRequest: 377: Update EPYC Models and Feature Bits -RH-Jira: RHEL-52650 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/6] 2e08f5372c332152044332474aec4b783c121488 (johnalle/qemu-kvm-fork) - -Found that some of the cache properties are not set correctly for EPYC models. - -l1d_cache.no_invd_sharing should not be true. -l1i_cache.no_invd_sharing should not be true. - -L2.self_init should be true. -L2.inclusive should be true. - -L3.inclusive should not be true. -L3.no_invd_sharing should be true. - -Fix these cache properties. - -Also add the missing RAS and SVM features bits on AMD EPYC-Rome. The SVM -feature bits are used in nested guests. - -succor : Software uncorrectable error containment and recovery capability. -overflow-recov : MCA overflow recovery support. -lbrv : LBR virtualization -tsc-scale : MSR based TSC rate control -vmcb-clean : VMCB clean bits -flushbyasid : Flush by ASID -pause-filter : Pause intercept filter -pfthreshold : PAUSE filter threshold -v-vmsave-vmload : Virtualized VMLOAD and VMSAVE -vgif : Virtualized GIF - -Signed-off-by: Babu Moger -Reviewed-by: Maksim Davydov -Reviewed-by: Zhao Liu -Link: https://lore.kernel.org/r/8265af72057b84c99ac3a02a5487e32759cc69b1.1746734284.git.babu.moger@amd.com -Signed-off-by: Paolo Bonzini -(cherry picked from commit 83d940e9700527ff080416ce2fa52ee1f4771d72) - -JIRA: https://issues.redhat.com/browse/RHEL-52650 - -Signed-off-by: John Allen ---- - target/i386/cpu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 73 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index e588f81b2e..403c72ca60 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2345,6 +2345,60 @@ static const CPUCaches epyc_rome_v3_cache_info = { - }, - }; - -+static const CPUCaches epyc_rome_v5_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .share_level = CPU_TOPOLOGY_LEVEL_CORE, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 16 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 16384, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .no_invd_sharing = true, -+ .complex_indexing = false, -+ .share_level = CPU_TOPOLOGY_LEVEL_DIE, -+ }, -+}; -+ - static const CPUCaches epyc_milan_cache_info = { - .l1d_cache = &(CPUCacheInfo) { - .type = DATA_CACHE, -@@ -5437,6 +5491,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - }, - }, -+ { -+ .version = 5, -+ .props = (PropValue[]) { -+ { "overflow-recov", "on" }, -+ { "succor", "on" }, -+ { "lbrv", "on" }, -+ { "tsc-scale", "on" }, -+ { "vmcb-clean", "on" }, -+ { "flushbyasid", "on" }, -+ { "pause-filter", "on" }, -+ { "pfthreshold", "on" }, -+ { "v-vmsave-vmload", "on" }, -+ { "vgif", "on" }, -+ { "model-id", -+ "AMD EPYC-Rome-v5 Processor" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_rome_v5_cache_info -+ }, - { /* end of list */ } - } - }, --- -2.39.3 - diff --git a/kvm-tests-Add-iotest-mirror-sparse-for-recent-patches.patch b/kvm-tests-Add-iotest-mirror-sparse-for-recent-patches.patch deleted file mode 100644 index 70ab894..0000000 --- a/kvm-tests-Add-iotest-mirror-sparse-for-recent-patches.patch +++ /dev/null @@ -1,545 +0,0 @@ -From 2bb881df5b93f5534e5f0b91cf1ed3e0b524c2d3 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 9 May 2025 15:40:30 -0500 -Subject: [PATCH 13/14] tests: Add iotest mirror-sparse for recent patches - -RH-Author: Eric Blake -RH-MergeRequest: 363: blockdev-mirror: More efficient handling of sparse mirrors -RH-Jira: RHEL-88435 RHEL-88437 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/14] 474f12dfe9161c7e9f59cafde203e5183e2fc3f5 (ebblake/centos-qemu-kvm) - -Prove that blockdev-mirror can now result in sparse raw destination -files, regardless of whether the source is raw or qcow2. By making -this a separate test, it was possible to test effects of individual -patches for the various pieces that all have to work together for a -sparse mirror to be successful. - -Note that ./check -file produces different job lengths than ./check --qcow2 (the test uses a filter to normalize); that's because when -deciding how much of the image to be mirrored, the code looks at how -much of the source image was allocated (for qcow2, this is only the -written clusters; for raw, it is the entire file). But the important -part is that the destination file ends up smaller than 3M, rather than -the 20M it used to be before this patch series. - -Signed-off-by: Eric Blake -Message-ID: <20250509204341.3553601-28-eblake@redhat.com> -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit c0ddcb2cbc146e64f666eaae4edc7b5db7e5814d) -Jira: https://issues.redhat.com/browse/RHEL-88435 -Jira: https://issues.redhat.com/browse/RHEL-88437 -Signed-off-by: Eric Blake ---- - tests/qemu-iotests/tests/mirror-sparse | 125 +++++++ - tests/qemu-iotests/tests/mirror-sparse.out | 365 +++++++++++++++++++++ - 2 files changed, 490 insertions(+) - create mode 100755 tests/qemu-iotests/tests/mirror-sparse - create mode 100644 tests/qemu-iotests/tests/mirror-sparse.out - -diff --git a/tests/qemu-iotests/tests/mirror-sparse b/tests/qemu-iotests/tests/mirror-sparse -new file mode 100755 -index 0000000000..8c52a4e244 ---- /dev/null -+++ b/tests/qemu-iotests/tests/mirror-sparse -@@ -0,0 +1,125 @@ -+#!/usr/bin/env bash -+# group: rw auto quick -+# -+# Test blockdev-mirror with raw sparse destination -+# -+# Copyright (C) 2025 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+seq="$(basename $0)" -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+ _cleanup_qemu -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+cd .. -+. ./common.rc -+. ./common.filter -+. ./common.qemu -+ -+_supported_fmt qcow2 raw # Format of the source. dst is always raw file -+_supported_proto file -+_supported_os Linux -+ -+echo -+echo "=== Initial image setup ===" -+echo -+ -+TEST_IMG="$TEST_IMG.base" _make_test_img 20M -+$QEMU_IO -c 'w 8M 2M' -f $IMGFMT "$TEST_IMG.base" | _filter_qemu_io -+ -+_launch_qemu \ -+ -blockdev '{"driver":"file", "cache":{"direct":true, "no-flush":false}, -+ "filename":"'"$TEST_IMG.base"'", "node-name":"src-file"}' \ -+ -blockdev '{"driver":"'$IMGFMT'", "node-name":"src", "file":"src-file"}' -+h1=$QEMU_HANDLE -+_send_qemu_cmd $h1 '{"execute": "qmp_capabilities"}' 'return' -+ -+# Check several combinations; most should result in a sparse destination; -+# the destination should only be fully allocated if pre-allocated -+# and not punching holes due to detect-zeroes -+# do_test creation discard zeroes result -+do_test() { -+ creation=$1 -+ discard=$2 -+ zeroes=$3 -+ expected=$4 -+ -+echo -+echo "=== Testing creation=$creation discard=$discard zeroes=$zeroes ===" -+echo -+ -+rm -f $TEST_IMG -+if test $creation = external; then -+ truncate --size=20M $TEST_IMG -+else -+ _send_qemu_cmd $h1 '{"execute": "blockdev-create", "arguments": -+ {"options": {"driver":"file", "filename":"'$TEST_IMG'", -+ "size":'$((20*1024*1024))', "preallocation":"'$creation'"}, -+ "job-id":"job1"}}' 'concluded' -+ _send_qemu_cmd $h1 '{"execute": "job-dismiss", "arguments": -+ {"id": "job1"}}' 'return' -+fi -+_send_qemu_cmd $h1 '{"execute": "blockdev-add", "arguments": -+ {"node-name": "dst", "driver":"file", -+ "filename":"'$TEST_IMG'", "aio":"threads", -+ "auto-read-only":true, "discard":"'$discard'", -+ "detect-zeroes":"'$zeroes'"}}' 'return' -+_send_qemu_cmd $h1 '{"execute":"blockdev-mirror", "arguments": -+ {"sync":"full", "device":"src", "target":"dst", -+ "job-id":"job2"}}' 'return' -+_timed_wait_for $h1 '"ready"' -+_send_qemu_cmd $h1 '{"execute": "job-complete", "arguments": -+ {"id":"job2"}}' 'return' \ -+ | _filter_block_job_offset | _filter_block_job_len -+_send_qemu_cmd $h1 '{"execute": "blockdev-del", "arguments": -+ {"node-name": "dst"}}' 'return' \ -+ | _filter_block_job_offset | _filter_block_job_len -+$QEMU_IMG compare -U -f $IMGFMT -F raw $TEST_IMG.base $TEST_IMG -+result=$(disk_usage $TEST_IMG) -+if test $result -lt $((3*1024*1024)); then -+ actual=sparse -+elif test $result = $((20*1024*1024)); then -+ actual=full -+else -+ actual=unknown -+fi -+echo "Destination is $actual; expected $expected" -+} -+ -+do_test external ignore off sparse -+do_test external unmap off sparse -+do_test external unmap unmap sparse -+do_test off ignore off sparse -+do_test off unmap off sparse -+do_test off unmap unmap sparse -+do_test full ignore off full -+do_test full unmap off sparse -+do_test full unmap unmap sparse -+ -+_send_qemu_cmd $h1 '{"execute":"quit"}' '' -+ -+# success, all done -+echo '*** done' -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/tests/mirror-sparse.out b/tests/qemu-iotests/tests/mirror-sparse.out -new file mode 100644 -index 0000000000..2103b891c3 ---- /dev/null -+++ b/tests/qemu-iotests/tests/mirror-sparse.out -@@ -0,0 +1,365 @@ -+QA output created by mirror-sparse -+ -+=== Initial image setup === -+ -+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=20971520 -+wrote 2097152/2097152 bytes at offset 8388608 -+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+{"execute": "qmp_capabilities"} -+{"return": {}} -+ -+=== Testing creation=external discard=ignore zeroes=off === -+ -+{"execute": "blockdev-add", "arguments": -+ {"node-name": "dst", "driver":"file", -+ "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", -+ "auto-read-only":true, "discard":"ignore", -+ "detect-zeroes":"off"}} -+{"return": {}} -+{"execute":"blockdev-mirror", "arguments": -+ {"sync":"full", "device":"src", "target":"dst", -+ "job-id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} -+{"execute": "job-complete", "arguments": -+ {"id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": -+ {"node-name": "dst"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} -+{"return": {}} -+Images are identical. -+Destination is sparse; expected sparse -+ -+=== Testing creation=external discard=unmap zeroes=off === -+ -+{"execute": "blockdev-add", "arguments": -+ {"node-name": "dst", "driver":"file", -+ "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", -+ "auto-read-only":true, "discard":"unmap", -+ "detect-zeroes":"off"}} -+{"return": {}} -+{"execute":"blockdev-mirror", "arguments": -+ {"sync":"full", "device":"src", "target":"dst", -+ "job-id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} -+{"execute": "job-complete", "arguments": -+ {"id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": -+ {"node-name": "dst"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} -+{"return": {}} -+Images are identical. -+Destination is sparse; expected sparse -+ -+=== Testing creation=external discard=unmap zeroes=unmap === -+ -+{"execute": "blockdev-add", "arguments": -+ {"node-name": "dst", "driver":"file", -+ "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", -+ "auto-read-only":true, "discard":"unmap", -+ "detect-zeroes":"unmap"}} -+{"return": {}} -+{"execute":"blockdev-mirror", "arguments": -+ {"sync":"full", "device":"src", "target":"dst", -+ "job-id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} -+{"execute": "job-complete", "arguments": -+ {"id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": -+ {"node-name": "dst"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} -+{"return": {}} -+Images are identical. -+Destination is sparse; expected sparse -+ -+=== Testing creation=off discard=ignore zeroes=off === -+ -+{"execute": "blockdev-create", "arguments": -+ {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", -+ "size":20971520, "preallocation":"off"}, -+ "job-id":"job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} -+{"execute": "job-dismiss", "arguments": -+ {"id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} -+{"return": {}} -+{"execute": "blockdev-add", "arguments": -+ {"node-name": "dst", "driver":"file", -+ "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", -+ "auto-read-only":true, "discard":"ignore", -+ "detect-zeroes":"off"}} -+{"return": {}} -+{"execute":"blockdev-mirror", "arguments": -+ {"sync":"full", "device":"src", "target":"dst", -+ "job-id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} -+{"execute": "job-complete", "arguments": -+ {"id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": -+ {"node-name": "dst"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} -+{"return": {}} -+Images are identical. -+Destination is sparse; expected sparse -+ -+=== Testing creation=off discard=unmap zeroes=off === -+ -+{"execute": "blockdev-create", "arguments": -+ {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", -+ "size":20971520, "preallocation":"off"}, -+ "job-id":"job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} -+{"execute": "job-dismiss", "arguments": -+ {"id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} -+{"return": {}} -+{"execute": "blockdev-add", "arguments": -+ {"node-name": "dst", "driver":"file", -+ "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", -+ "auto-read-only":true, "discard":"unmap", -+ "detect-zeroes":"off"}} -+{"return": {}} -+{"execute":"blockdev-mirror", "arguments": -+ {"sync":"full", "device":"src", "target":"dst", -+ "job-id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} -+{"execute": "job-complete", "arguments": -+ {"id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": -+ {"node-name": "dst"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} -+{"return": {}} -+Images are identical. -+Destination is sparse; expected sparse -+ -+=== Testing creation=off discard=unmap zeroes=unmap === -+ -+{"execute": "blockdev-create", "arguments": -+ {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", -+ "size":20971520, "preallocation":"off"}, -+ "job-id":"job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} -+{"execute": "job-dismiss", "arguments": -+ {"id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} -+{"return": {}} -+{"execute": "blockdev-add", "arguments": -+ {"node-name": "dst", "driver":"file", -+ "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", -+ "auto-read-only":true, "discard":"unmap", -+ "detect-zeroes":"unmap"}} -+{"return": {}} -+{"execute":"blockdev-mirror", "arguments": -+ {"sync":"full", "device":"src", "target":"dst", -+ "job-id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} -+{"execute": "job-complete", "arguments": -+ {"id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": -+ {"node-name": "dst"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} -+{"return": {}} -+Images are identical. -+Destination is sparse; expected sparse -+ -+=== Testing creation=full discard=ignore zeroes=off === -+ -+{"execute": "blockdev-create", "arguments": -+ {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", -+ "size":20971520, "preallocation":"full"}, -+ "job-id":"job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} -+{"execute": "job-dismiss", "arguments": -+ {"id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} -+{"return": {}} -+{"execute": "blockdev-add", "arguments": -+ {"node-name": "dst", "driver":"file", -+ "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", -+ "auto-read-only":true, "discard":"ignore", -+ "detect-zeroes":"off"}} -+{"return": {}} -+{"execute":"blockdev-mirror", "arguments": -+ {"sync":"full", "device":"src", "target":"dst", -+ "job-id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} -+{"execute": "job-complete", "arguments": -+ {"id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": -+ {"node-name": "dst"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} -+{"return": {}} -+Images are identical. -+Destination is full; expected full -+ -+=== Testing creation=full discard=unmap zeroes=off === -+ -+{"execute": "blockdev-create", "arguments": -+ {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", -+ "size":20971520, "preallocation":"full"}, -+ "job-id":"job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} -+{"execute": "job-dismiss", "arguments": -+ {"id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} -+{"return": {}} -+{"execute": "blockdev-add", "arguments": -+ {"node-name": "dst", "driver":"file", -+ "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", -+ "auto-read-only":true, "discard":"unmap", -+ "detect-zeroes":"off"}} -+{"return": {}} -+{"execute":"blockdev-mirror", "arguments": -+ {"sync":"full", "device":"src", "target":"dst", -+ "job-id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} -+{"execute": "job-complete", "arguments": -+ {"id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": -+ {"node-name": "dst"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} -+{"return": {}} -+Images are identical. -+Destination is sparse; expected sparse -+ -+=== Testing creation=full discard=unmap zeroes=unmap === -+ -+{"execute": "blockdev-create", "arguments": -+ {"options": {"driver":"file", "filename":"TEST_DIR/t.IMGFMT", -+ "size":20971520, "preallocation":"full"}, -+ "job-id":"job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job1"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job1"}} -+{"execute": "job-dismiss", "arguments": -+ {"id": "job1"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job1"}} -+{"return": {}} -+{"execute": "blockdev-add", "arguments": -+ {"node-name": "dst", "driver":"file", -+ "filename":"TEST_DIR/t.IMGFMT", "aio":"threads", -+ "auto-read-only":true, "discard":"unmap", -+ "detect-zeroes":"unmap"}} -+{"return": {}} -+{"execute":"blockdev-mirror", "arguments": -+ {"sync":"full", "device":"src", "target":"dst", -+ "job-id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job2"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job2"}} -+{"execute": "job-complete", "arguments": -+ {"id":"job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"return": {}} -+{"execute": "blockdev-del", "arguments": -+ {"node-name": "dst"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job2", "len": LEN, "offset": OFFSET, "speed": 0, "type": "mirror"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job2"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job2"}} -+{"return": {}} -+Images are identical. -+Destination is sparse; expected sparse -+{"execute":"quit"} -+*** done --- -2.39.3 - diff --git a/kvm-tests-qtest-migration-add-postcopy-tests-with-multif.patch b/kvm-tests-qtest-migration-add-postcopy-tests-with-multif.patch deleted file mode 100644 index ae3fb34..0000000 --- a/kvm-tests-qtest-migration-add-postcopy-tests-with-multif.patch +++ /dev/null @@ -1,270 +0,0 @@ -From 73a8cce94ae861259b3769fe05d5cb79f5a20abb Mon Sep 17 00:00:00 2001 -From: Prasad Pandit -Date: Mon, 12 May 2025 18:21:24 +0530 -Subject: [PATCH 11/33] tests/qtest/migration: add postcopy tests with multifd - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/11] 35d1ea7a8473725d876d02ebbc29ab6063823742 (pjp/cs-qemu-kvm) - -Add new qtests to run postcopy migration with multifd -channels enabled. - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Signed-off-by: Prasad Pandit -Link: https://lore.kernel.org/r/20250512125124.147064-4-ppandit@redhat.com -[peterx: rename all new tests to be under /migration/multifd+postcopy/] -Signed-off-by: Peter Xu -(cherry picked from commit 766bbabac8f00bc5cf23ba90a8326678636280ed) -Signed-off-by: Prasad Pandit ---- - tests/qtest/migration/compression-tests.c | 18 ++++++++ - tests/qtest/migration/postcopy-tests.c | 27 ++++++++++++ - tests/qtest/migration/precopy-tests.c | 28 ++++++++++++- - tests/qtest/migration/tls-tests.c | 51 +++++++++++++++++++++++ - 4 files changed, 122 insertions(+), 2 deletions(-) - -diff --git a/tests/qtest/migration/compression-tests.c b/tests/qtest/migration/compression-tests.c -index 41e79f031b..b827665b8e 100644 ---- a/tests/qtest/migration/compression-tests.c -+++ b/tests/qtest/migration/compression-tests.c -@@ -42,6 +42,20 @@ static void test_multifd_tcp_zstd(void) - }; - test_precopy_common(&args); - } -+ -+static void test_multifd_postcopy_tcp_zstd(void) -+{ -+ MigrateCommon args = { -+ .listen_uri = "defer", -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ .caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] = true, -+ }, -+ .start_hook = migrate_hook_start_precopy_tcp_multifd_zstd, -+ }; -+ -+ test_precopy_common(&args); -+} - #endif /* CONFIG_ZSTD */ - - #ifdef CONFIG_QATZIP -@@ -184,6 +198,10 @@ void migration_test_add_compression(MigrationTestEnv *env) - #ifdef CONFIG_ZSTD - migration_test_add("/migration/multifd/tcp/plain/zstd", - test_multifd_tcp_zstd); -+ if (env->has_uffd) { -+ migration_test_add("/migration/multifd+postcopy/tcp/plain/zstd", -+ test_multifd_postcopy_tcp_zstd); -+ } - #endif - - #ifdef CONFIG_QATZIP -diff --git a/tests/qtest/migration/postcopy-tests.c b/tests/qtest/migration/postcopy-tests.c -index 483e3ff99f..3773525843 100644 ---- a/tests/qtest/migration/postcopy-tests.c -+++ b/tests/qtest/migration/postcopy-tests.c -@@ -94,6 +94,29 @@ static void migration_test_add_postcopy_smoke(MigrationTestEnv *env) - } - } - -+static void test_multifd_postcopy(void) -+{ -+ MigrateCommon args = { -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, -+ }; -+ -+ test_postcopy_common(&args); -+} -+ -+static void test_multifd_postcopy_preempt(void) -+{ -+ MigrateCommon args = { -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true, -+ }, -+ }; -+ -+ test_postcopy_common(&args); -+} -+ - void migration_test_add_postcopy(MigrationTestEnv *env) - { - migration_test_add_postcopy_smoke(env); -@@ -114,6 +137,10 @@ void migration_test_add_postcopy(MigrationTestEnv *env) - "/migration/postcopy/recovery/double-failures/reconnect", - test_postcopy_recovery_fail_reconnect); - -+ migration_test_add("/migration/multifd+postcopy/plain", -+ test_multifd_postcopy); -+ migration_test_add("/migration/multifd+postcopy/preempt/plain", -+ test_multifd_postcopy_preempt); - if (env->is_x86) { - migration_test_add("/migration/postcopy/suspend", - test_postcopy_suspend); -diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c -index a62d3c5378..bb38292550 100644 ---- a/tests/qtest/migration/precopy-tests.c -+++ b/tests/qtest/migration/precopy-tests.c -@@ -569,7 +569,7 @@ static void test_multifd_tcp_channels_none(void) - * - * And see that it works - */ --static void test_multifd_tcp_cancel(void) -+static void test_multifd_tcp_cancel(bool postcopy_ram) - { - MigrateStart args = { - .hide_stderr = true, -@@ -583,6 +583,11 @@ static void test_multifd_tcp_cancel(void) - migrate_ensure_non_converge(from); - migrate_prepare_for_dirty_mem(from); - -+ if (postcopy_ram) { -+ migrate_set_capability(from, "postcopy-ram", true); -+ migrate_set_capability(to, "postcopy-ram", true); -+ } -+ - migrate_set_parameter_int(from, "multifd-channels", 16); - migrate_set_parameter_int(to, "multifd-channels", 16); - -@@ -624,6 +629,10 @@ static void test_multifd_tcp_cancel(void) - return; - } - -+ if (postcopy_ram) { -+ migrate_set_capability(to2, "postcopy-ram", true); -+ } -+ - migrate_set_parameter_int(to2, "multifd-channels", 16); - - migrate_set_capability(to2, "multifd", true); -@@ -647,6 +656,16 @@ static void test_multifd_tcp_cancel(void) - migrate_end(from, to2, true); - } - -+static void test_multifd_precopy_tcp_cancel(void) -+{ -+ test_multifd_tcp_cancel(false); -+} -+ -+static void test_multifd_postcopy_tcp_cancel(void) -+{ -+ test_multifd_tcp_cancel(true); -+} -+ - static void test_cancel_src_after_failed(QTestState *from, QTestState *to, - const char *uri, const char *phase) - { -@@ -1233,7 +1252,12 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env) - migration_test_add("/migration/multifd/tcp/uri/plain/none", - test_multifd_tcp_uri_none); - migration_test_add("/migration/multifd/tcp/plain/cancel", -- test_multifd_tcp_cancel); -+ test_multifd_precopy_tcp_cancel); -+ if (env->has_uffd) { -+ migration_test_add("/migration/multifd+postcopy/tcp/plain/cancel", -+ test_multifd_postcopy_tcp_cancel); -+ } -+ - #ifdef CONFIG_RDMA - migration_test_add("/migration/precopy/rdma/plain", - test_precopy_rdma_plain); -diff --git a/tests/qtest/migration/tls-tests.c b/tests/qtest/migration/tls-tests.c -index 72f44defbb..21e9fec87d 100644 ---- a/tests/qtest/migration/tls-tests.c -+++ b/tests/qtest/migration/tls-tests.c -@@ -395,6 +395,19 @@ static void test_postcopy_recovery_tls_psk(void) - test_postcopy_recovery_common(&args); - } - -+static void test_multifd_postcopy_recovery_tls_psk(void) -+{ -+ MigrateCommon args = { -+ .start_hook = migrate_hook_start_tls_psk_match, -+ .end_hook = migrate_hook_end_tls_psk, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, -+ }; -+ -+ test_postcopy_recovery_common(&args); -+} -+ - /* This contains preempt+recovery+tls test altogether */ - static void test_postcopy_preempt_all(void) - { -@@ -409,6 +422,20 @@ static void test_postcopy_preempt_all(void) - test_postcopy_recovery_common(&args); - } - -+static void test_multifd_postcopy_preempt_recovery_tls_psk(void) -+{ -+ MigrateCommon args = { -+ .start_hook = migrate_hook_start_tls_psk_match, -+ .end_hook = migrate_hook_end_tls_psk, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true, -+ }, -+ }; -+ -+ test_postcopy_recovery_common(&args); -+} -+ - static void test_precopy_unix_tls_psk(void) - { - g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); -@@ -657,6 +684,21 @@ static void test_multifd_tcp_tls_psk_mismatch(void) - test_precopy_common(&args); - } - -+static void test_multifd_postcopy_tcp_tls_psk_match(void) -+{ -+ MigrateCommon args = { -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ .caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] = true, -+ }, -+ .listen_uri = "defer", -+ .start_hook = migrate_hook_start_multifd_tcp_tls_psk_match, -+ .end_hook = migrate_hook_end_tls_psk, -+ }; -+ -+ test_precopy_common(&args); -+} -+ - #ifdef CONFIG_TASN1 - static void test_multifd_tcp_tls_x509_default_host(void) - { -@@ -774,6 +816,11 @@ void migration_test_add_tls(MigrationTestEnv *env) - test_postcopy_preempt_tls_psk); - migration_test_add("/migration/postcopy/preempt/recovery/tls/psk", - test_postcopy_preempt_all); -+ migration_test_add("/migration/multifd+postcopy/recovery/tls/psk", -+ test_multifd_postcopy_recovery_tls_psk); -+ migration_test_add( -+ "/migration/multifd+postcopy/preempt/recovery/tls/psk", -+ test_multifd_postcopy_preempt_recovery_tls_psk); - } - #ifdef CONFIG_TASN1 - migration_test_add("/migration/precopy/unix/tls/x509/default-host", -@@ -805,6 +852,10 @@ void migration_test_add_tls(MigrationTestEnv *env) - test_multifd_tcp_tls_psk_match); - migration_test_add("/migration/multifd/tcp/tls/psk/mismatch", - test_multifd_tcp_tls_psk_mismatch); -+ if (env->has_uffd) { -+ migration_test_add("/migration/multifd+postcopy/tcp/tls/psk/match", -+ test_multifd_postcopy_tcp_tls_psk_match); -+ } - #ifdef CONFIG_TASN1 - migration_test_add("/migration/multifd/tcp/tls/x509/default-host", - test_multifd_tcp_tls_x509_default_host); --- -2.39.3 - diff --git a/kvm-tests-qtest-migration-consolidate-set-capabilities.patch b/kvm-tests-qtest-migration-consolidate-set-capabilities.patch deleted file mode 100644 index d216826..0000000 --- a/kvm-tests-qtest-migration-consolidate-set-capabilities.patch +++ /dev/null @@ -1,659 +0,0 @@ -From 450029655e9fe0b958d05ae3ba1469a2e322b59a Mon Sep 17 00:00:00 2001 -From: Prasad Pandit -Date: Fri, 11 Apr 2025 17:15:33 +0530 -Subject: [PATCH 05/33] tests/qtest/migration: consolidate set capabilities - -RH-Author: Prasad Pandit -RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only -RH-Jira: RHEL-59697 -RH-Acked-by: Juraj Marcin -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/11] 9cfa760da90b57cb554a9766ad79635436fca4b9 (pjp/cs-qemu-kvm) - -Migration capabilities are set in multiple '.start_hook' -functions for various tests. Instead, consolidate setting -capabilities in 'migrate_start_set_capabilities()' function -which is called from the 'migrate_start()' function. -While simplifying the capabilities setting, it helps -to declutter the qtest sources. - -Jira: https://issues.redhat.com/browse/RHEL-59697 -Suggested-by: Fabiano Rosas -Signed-off-by: Prasad Pandit -Reviewed-by: Fabiano Rosas -Message-ID: <20250411114534.3370816-7-ppandit@redhat.com> -[fix open brace] -Signed-off-by: Fabiano Rosas -(cherry picked from commit 115cec9d663c1a2f5a73df4a5ca02b3a676e8a2a) -Signed-off-by: Prasad Pandit ---- - tests/qtest/migration/compression-tests.c | 22 +++++-- - tests/qtest/migration/cpr-tests.c | 6 +- - tests/qtest/migration/file-tests.c | 58 ++++++++---------- - tests/qtest/migration/framework.c | 75 +++++++++++++++-------- - tests/qtest/migration/framework.h | 9 ++- - tests/qtest/migration/misc-tests.c | 4 +- - tests/qtest/migration/postcopy-tests.c | 8 ++- - tests/qtest/migration/precopy-tests.c | 29 +++++---- - tests/qtest/migration/tls-tests.c | 23 ++++++- - 9 files changed, 150 insertions(+), 84 deletions(-) - -diff --git a/tests/qtest/migration/compression-tests.c b/tests/qtest/migration/compression-tests.c -index 8b58401b84..41e79f031b 100644 ---- a/tests/qtest/migration/compression-tests.c -+++ b/tests/qtest/migration/compression-tests.c -@@ -35,6 +35,9 @@ static void test_multifd_tcp_zstd(void) - { - MigrateCommon args = { - .listen_uri = "defer", -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - .start_hook = migrate_hook_start_precopy_tcp_multifd_zstd, - }; - test_precopy_common(&args); -@@ -56,6 +59,9 @@ static void test_multifd_tcp_qatzip(void) - { - MigrateCommon args = { - .listen_uri = "defer", -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - .start_hook = migrate_hook_start_precopy_tcp_multifd_qatzip, - }; - test_precopy_common(&args); -@@ -74,6 +80,9 @@ static void test_multifd_tcp_qpl(void) - { - MigrateCommon args = { - .listen_uri = "defer", -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - .start_hook = migrate_hook_start_precopy_tcp_multifd_qpl, - }; - test_precopy_common(&args); -@@ -92,6 +101,9 @@ static void test_multifd_tcp_uadk(void) - { - MigrateCommon args = { - .listen_uri = "defer", -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - .start_hook = migrate_hook_start_precopy_tcp_multifd_uadk, - }; - test_precopy_common(&args); -@@ -103,10 +115,6 @@ migrate_hook_start_xbzrle(QTestState *from, - QTestState *to) - { - migrate_set_parameter_int(from, "xbzrle-cache-size", 33554432); -- -- migrate_set_capability(from, "xbzrle", true); -- migrate_set_capability(to, "xbzrle", true); -- - return NULL; - } - -@@ -118,6 +126,9 @@ static void test_precopy_unix_xbzrle(void) - .listen_uri = uri, - .start_hook = migrate_hook_start_xbzrle, - .iterations = 2, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_XBZRLE] = true, -+ }, - /* - * XBZRLE needs pages to be modified when doing the 2nd+ round - * iteration to have real data pushed to the stream. -@@ -146,6 +157,9 @@ static void test_multifd_tcp_zlib(void) - { - MigrateCommon args = { - .listen_uri = "defer", -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - .start_hook = migrate_hook_start_precopy_tcp_multifd_zlib, - }; - test_precopy_common(&args); -diff --git a/tests/qtest/migration/cpr-tests.c b/tests/qtest/migration/cpr-tests.c -index 4758841824..5536e14610 100644 ---- a/tests/qtest/migration/cpr-tests.c -+++ b/tests/qtest/migration/cpr-tests.c -@@ -24,9 +24,6 @@ static void *migrate_hook_start_mode_reboot(QTestState *from, QTestState *to) - migrate_set_parameter_str(from, "mode", "cpr-reboot"); - migrate_set_parameter_str(to, "mode", "cpr-reboot"); - -- migrate_set_capability(from, "x-ignore-shared", true); -- migrate_set_capability(to, "x-ignore-shared", true); -- - return NULL; - } - -@@ -39,6 +36,9 @@ static void test_mode_reboot(void) - .connect_uri = uri, - .listen_uri = "defer", - .start_hook = migrate_hook_start_mode_reboot, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED] = true, -+ }, - }; - - test_file_common(&args, true); -diff --git a/tests/qtest/migration/file-tests.c b/tests/qtest/migration/file-tests.c -index f260e2871d..4d78ce0855 100644 ---- a/tests/qtest/migration/file-tests.c -+++ b/tests/qtest/migration/file-tests.c -@@ -107,15 +107,6 @@ static void test_precopy_file_offset_bad(void) - test_file_common(&args, false); - } - --static void *migrate_hook_start_mapped_ram(QTestState *from, -- QTestState *to) --{ -- migrate_set_capability(from, "mapped-ram", true); -- migrate_set_capability(to, "mapped-ram", true); -- -- return NULL; --} -- - static void test_precopy_file_mapped_ram_live(void) - { - g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs, -@@ -123,7 +114,9 @@ static void test_precopy_file_mapped_ram_live(void) - MigrateCommon args = { - .connect_uri = uri, - .listen_uri = "defer", -- .start_hook = migrate_hook_start_mapped_ram, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true, -+ }, - }; - - test_file_common(&args, false); -@@ -136,26 +129,14 @@ static void test_precopy_file_mapped_ram(void) - MigrateCommon args = { - .connect_uri = uri, - .listen_uri = "defer", -- .start_hook = migrate_hook_start_mapped_ram, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true, -+ }, - }; - - test_file_common(&args, true); - } - --static void *migrate_hook_start_multifd_mapped_ram(QTestState *from, -- QTestState *to) --{ -- migrate_hook_start_mapped_ram(from, to); -- -- migrate_set_parameter_int(from, "multifd-channels", 4); -- migrate_set_parameter_int(to, "multifd-channels", 4); -- -- migrate_set_capability(from, "multifd", true); -- migrate_set_capability(to, "multifd", true); -- -- return NULL; --} -- - static void test_multifd_file_mapped_ram_live(void) - { - g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs, -@@ -163,7 +144,10 @@ static void test_multifd_file_mapped_ram_live(void) - MigrateCommon args = { - .connect_uri = uri, - .listen_uri = "defer", -- .start_hook = migrate_hook_start_multifd_mapped_ram, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true, -+ }, - }; - - test_file_common(&args, false); -@@ -176,7 +160,10 @@ static void test_multifd_file_mapped_ram(void) - MigrateCommon args = { - .connect_uri = uri, - .listen_uri = "defer", -- .start_hook = migrate_hook_start_multifd_mapped_ram, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true, -+ }, - }; - - test_file_common(&args, true); -@@ -185,8 +172,6 @@ static void test_multifd_file_mapped_ram(void) - static void *migrate_hook_start_multifd_mapped_ram_dio(QTestState *from, - QTestState *to) - { -- migrate_hook_start_multifd_mapped_ram(from, to); -- - migrate_set_parameter_bool(from, "direct-io", true); - migrate_set_parameter_bool(to, "direct-io", true); - -@@ -201,6 +186,10 @@ static void test_multifd_file_mapped_ram_dio(void) - .connect_uri = uri, - .listen_uri = "defer", - .start_hook = migrate_hook_start_multifd_mapped_ram_dio, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true, -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - }; - - if (!probe_o_direct_support(tmpfs)) { -@@ -246,7 +235,6 @@ static void *migrate_hook_start_multifd_mapped_ram_fdset_dio(QTestState *from, - fdset_add_fds(from, file, O_WRONLY, 2, true); - fdset_add_fds(to, file, O_RDONLY, 2, true); - -- migrate_hook_start_multifd_mapped_ram(from, to); - migrate_set_parameter_bool(from, "direct-io", true); - migrate_set_parameter_bool(to, "direct-io", true); - -@@ -261,8 +249,6 @@ static void *migrate_hook_start_multifd_mapped_ram_fdset(QTestState *from, - fdset_add_fds(from, file, O_WRONLY, 2, false); - fdset_add_fds(to, file, O_RDONLY, 2, false); - -- migrate_hook_start_multifd_mapped_ram(from, to); -- - return NULL; - } - -@@ -275,6 +261,10 @@ static void test_multifd_file_mapped_ram_fdset(void) - .listen_uri = "defer", - .start_hook = migrate_hook_start_multifd_mapped_ram_fdset, - .end_hook = migrate_hook_end_multifd_mapped_ram_fdset, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true, -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - }; - - test_file_common(&args, true); -@@ -289,6 +279,10 @@ static void test_multifd_file_mapped_ram_fdset_dio(void) - .listen_uri = "defer", - .start_hook = migrate_hook_start_multifd_mapped_ram_fdset_dio, - .end_hook = migrate_hook_end_multifd_mapped_ram_fdset, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true, -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - }; - - if (!probe_o_direct_support(tmpfs)) { -diff --git a/tests/qtest/migration/framework.c b/tests/qtest/migration/framework.c -index 10e1d04b58..e48b80a127 100644 ---- a/tests/qtest/migration/framework.c -+++ b/tests/qtest/migration/framework.c -@@ -30,6 +30,7 @@ - #define QEMU_VM_FILE_MAGIC 0x5145564d - #define QEMU_ENV_SRC "QTEST_QEMU_BINARY_SRC" - #define QEMU_ENV_DST "QTEST_QEMU_BINARY_DST" -+#define MULTIFD_TEST_CHANNELS 4 - - unsigned start_address; - unsigned end_address; -@@ -207,6 +208,51 @@ static QList *migrate_start_get_qmp_capabilities(const MigrateStart *args) - return capabilities; - } - -+static void migrate_start_set_capabilities(QTestState *from, QTestState *to, -+ MigrateStart *args) -+{ -+ /* -+ * MigrationCapability_lookup and MIGRATION_CAPABILITY_ constants -+ * are from qapi-types-migration.h. -+ */ -+ for (uint8_t i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -+ if (!args->caps[i]) { -+ continue; -+ } -+ if (from) { -+ migrate_set_capability(from, -+ MigrationCapability_lookup.array[i], true); -+ } -+ if (to) { -+ migrate_set_capability(to, -+ MigrationCapability_lookup.array[i], true); -+ } -+ } -+ -+ /* -+ * Always enable migration events. Libvirt always uses it, let's try -+ * to mimic as closer as that. -+ */ -+ migrate_set_capability(from, "events", true); -+ if (!args->defer_target_connect) { -+ migrate_set_capability(to, "events", true); -+ } -+ -+ /* -+ * Default number of channels should be fine for most -+ * tests. Individual tests can override by calling -+ * migrate_set_parameter() directly. -+ */ -+ if (args->caps[MIGRATION_CAPABILITY_MULTIFD]) { -+ migrate_set_parameter_int(from, "multifd-channels", -+ MULTIFD_TEST_CHANNELS); -+ migrate_set_parameter_int(to, "multifd-channels", -+ MULTIFD_TEST_CHANNELS); -+ } -+ -+ return; -+} -+ - int migrate_start(QTestState **from, QTestState **to, const char *uri, - MigrateStart *args) - { -@@ -379,14 +425,7 @@ int migrate_start(QTestState **from, QTestState **to, const char *uri, - unlink(shmem_path); - } - -- /* -- * Always enable migration events. Libvirt always uses it, let's try -- * to mimic as closer as that. -- */ -- migrate_set_capability(*from, "events", true); -- if (!args->defer_target_connect) { -- migrate_set_capability(*to, "events", true); -- } -+ migrate_start_set_capabilities(*from, *to, args); - - return 0; - } -@@ -432,6 +471,10 @@ static int migrate_postcopy_prepare(QTestState **from_ptr, - { - QTestState *from, *to; - -+ /* set postcopy capabilities */ -+ args->start.caps[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME] = true; -+ args->start.caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] = true; -+ - if (migrate_start(&from, &to, "defer", &args->start)) { - return -1; - } -@@ -440,17 +483,7 @@ static int migrate_postcopy_prepare(QTestState **from_ptr, - args->postcopy_data = args->start_hook(from, to); - } - -- migrate_set_capability(from, "postcopy-ram", true); -- migrate_set_capability(to, "postcopy-ram", true); -- migrate_set_capability(to, "postcopy-blocktime", true); -- -- if (args->postcopy_preempt) { -- migrate_set_capability(from, "postcopy-preempt", true); -- migrate_set_capability(to, "postcopy-preempt", true); -- } -- - migrate_ensure_non_converge(from); -- - migrate_prepare_for_dirty_mem(from); - qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming'," - " 'arguments': { " -@@ -948,15 +981,9 @@ void *migrate_hook_start_precopy_tcp_multifd_common(QTestState *from, - QTestState *to, - const char *method) - { -- migrate_set_parameter_int(from, "multifd-channels", 16); -- migrate_set_parameter_int(to, "multifd-channels", 16); -- - migrate_set_parameter_str(from, "multifd-compression", method); - migrate_set_parameter_str(to, "multifd-compression", method); - -- migrate_set_capability(from, "multifd", true); -- migrate_set_capability(to, "multifd", true); -- - /* Start incoming migration from the 1st socket */ - migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}"); - -diff --git a/tests/qtest/migration/framework.h b/tests/qtest/migration/framework.h -index e4a11870f6..01e425e64e 100644 ---- a/tests/qtest/migration/framework.h -+++ b/tests/qtest/migration/framework.h -@@ -12,6 +12,7 @@ - #define TEST_FRAMEWORK_H - - #include "libqtest.h" -+#include - - #define FILE_TEST_FILENAME "migfile" - #define FILE_TEST_OFFSET 0x1000 -@@ -120,6 +121,13 @@ typedef struct { - - /* Do not connect to target monitor and qtest sockets in qtest_init */ - bool defer_target_connect; -+ -+ /* -+ * Migration capabilities to be set in both source and -+ * destination. For unilateral capabilities, use -+ * migration_set_capabilities(). -+ */ -+ bool caps[MIGRATION_CAPABILITY__MAX]; - } MigrateStart; - - typedef enum PostcopyRecoveryFailStage { -@@ -207,7 +215,6 @@ typedef struct { - - /* Postcopy specific fields */ - void *postcopy_data; -- bool postcopy_preempt; - PostcopyRecoveryFailStage postcopy_recovery_fail_stage; - } MigrateCommon; - -diff --git a/tests/qtest/migration/misc-tests.c b/tests/qtest/migration/misc-tests.c -index 2e612d9e38..54995256d8 100644 ---- a/tests/qtest/migration/misc-tests.c -+++ b/tests/qtest/migration/misc-tests.c -@@ -98,6 +98,7 @@ static void test_ignore_shared(void) - QTestState *from, *to; - MigrateStart args = { - .use_shmem = true, -+ .caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED] = true, - }; - - if (migrate_start(&from, &to, uri, &args)) { -@@ -107,9 +108,6 @@ static void test_ignore_shared(void) - migrate_ensure_non_converge(from); - migrate_prepare_for_dirty_mem(from); - -- migrate_set_capability(from, "x-ignore-shared", true); -- migrate_set_capability(to, "x-ignore-shared", true); -- - /* Wait for the first serial output from the source */ - wait_for_serial("src_serial"); - -diff --git a/tests/qtest/migration/postcopy-tests.c b/tests/qtest/migration/postcopy-tests.c -index 982457bed1..483e3ff99f 100644 ---- a/tests/qtest/migration/postcopy-tests.c -+++ b/tests/qtest/migration/postcopy-tests.c -@@ -39,7 +39,9 @@ static void test_postcopy_suspend(void) - static void test_postcopy_preempt(void) - { - MigrateCommon args = { -- .postcopy_preempt = true, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true, -+ }, - }; - - test_postcopy_common(&args); -@@ -73,7 +75,9 @@ static void test_postcopy_recovery_fail_reconnect(void) - static void test_postcopy_preempt_recovery(void) - { - MigrateCommon args = { -- .postcopy_preempt = true, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true, -+ }, - }; - - test_postcopy_recovery_common(&args); -diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c -index ba273d10b9..f8404793b8 100644 ---- a/tests/qtest/migration/precopy-tests.c -+++ b/tests/qtest/migration/precopy-tests.c -@@ -108,23 +108,14 @@ static void test_precopy_tcp_plain(void) - test_precopy_common(&args); - } - --static void *migrate_hook_start_switchover_ack(QTestState *from, QTestState *to) --{ -- -- migrate_set_capability(from, "return-path", true); -- migrate_set_capability(to, "return-path", true); -- -- migrate_set_capability(from, "switchover-ack", true); -- migrate_set_capability(to, "switchover-ack", true); -- -- return NULL; --} -- - static void test_precopy_tcp_switchover_ack(void) - { - MigrateCommon args = { - .listen_uri = "tcp:127.0.0.1:0", -- .start_hook = migrate_hook_start_switchover_ack, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_RETURN_PATH] = true, -+ .caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK] = true, -+ }, - /* - * Source VM must be running in order to consider the switchover ACK - * when deciding to do switchover or not. -@@ -393,6 +384,9 @@ static void test_multifd_tcp_uri_none(void) - MigrateCommon args = { - .listen_uri = "defer", - .start_hook = migrate_hook_start_precopy_tcp_multifd, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - /* - * Multifd is more complicated than most of the features, it - * directly takes guest page buffers when sending, make sure -@@ -408,6 +402,9 @@ static void test_multifd_tcp_zero_page_legacy(void) - MigrateCommon args = { - .listen_uri = "defer", - .start_hook = migrate_hook_start_precopy_tcp_multifd_zero_page_legacy, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - /* - * Multifd is more complicated than most of the features, it - * directly takes guest page buffers when sending, make sure -@@ -423,6 +420,9 @@ static void test_multifd_tcp_no_zero_page(void) - MigrateCommon args = { - .listen_uri = "defer", - .start_hook = migrate_hook_start_precopy_tcp_multifd_no_zero_page, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - /* - * Multifd is more complicated than most of the features, it - * directly takes guest page buffers when sending, make sure -@@ -439,6 +439,9 @@ static void test_multifd_tcp_channels_none(void) - .listen_uri = "defer", - .start_hook = migrate_hook_start_precopy_tcp_multifd, - .live = true, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - .connect_channels = ("[ { 'channel-type': 'main'," - " 'addr': { 'transport': 'socket'," - " 'type': 'inet'," -diff --git a/tests/qtest/migration/tls-tests.c b/tests/qtest/migration/tls-tests.c -index 2cb4a44bcd..72f44defbb 100644 ---- a/tests/qtest/migration/tls-tests.c -+++ b/tests/qtest/migration/tls-tests.c -@@ -375,9 +375,11 @@ static void test_postcopy_tls_psk(void) - static void test_postcopy_preempt_tls_psk(void) - { - MigrateCommon args = { -- .postcopy_preempt = true, - .start_hook = migrate_hook_start_tls_psk_match, - .end_hook = migrate_hook_end_tls_psk, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true, -+ }, - }; - - test_postcopy_common(&args); -@@ -397,9 +399,11 @@ static void test_postcopy_recovery_tls_psk(void) - static void test_postcopy_preempt_all(void) - { - MigrateCommon args = { -- .postcopy_preempt = true, - .start_hook = migrate_hook_start_tls_psk_match, - .end_hook = migrate_hook_end_tls_psk, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true, -+ }, - }; - - test_postcopy_recovery_common(&args); -@@ -631,6 +635,9 @@ static void test_multifd_tcp_tls_psk_match(void) - .listen_uri = "defer", - .start_hook = migrate_hook_start_multifd_tcp_tls_psk_match, - .end_hook = migrate_hook_end_tls_psk, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - }; - test_precopy_common(&args); - } -@@ -640,6 +647,7 @@ static void test_multifd_tcp_tls_psk_mismatch(void) - MigrateCommon args = { - .start = { - .hide_stderr = true, -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, - }, - .listen_uri = "defer", - .start_hook = migrate_hook_start_multifd_tcp_tls_psk_mismatch, -@@ -656,6 +664,9 @@ static void test_multifd_tcp_tls_x509_default_host(void) - .listen_uri = "defer", - .start_hook = migrate_hook_start_multifd_tls_x509_default_host, - .end_hook = migrate_hook_end_tls_x509, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - }; - test_precopy_common(&args); - } -@@ -666,6 +677,9 @@ static void test_multifd_tcp_tls_x509_override_host(void) - .listen_uri = "defer", - .start_hook = migrate_hook_start_multifd_tls_x509_override_host, - .end_hook = migrate_hook_end_tls_x509, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - }; - test_precopy_common(&args); - } -@@ -688,6 +702,7 @@ static void test_multifd_tcp_tls_x509_mismatch_host(void) - MigrateCommon args = { - .start = { - .hide_stderr = true, -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, - }, - .listen_uri = "defer", - .start_hook = migrate_hook_start_multifd_tls_x509_mismatch_host, -@@ -703,6 +718,9 @@ static void test_multifd_tcp_tls_x509_allow_anon_client(void) - .listen_uri = "defer", - .start_hook = migrate_hook_start_multifd_tls_x509_allow_anon_client, - .end_hook = migrate_hook_end_tls_x509, -+ .start = { -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, -+ }, - }; - test_precopy_common(&args); - } -@@ -712,6 +730,7 @@ static void test_multifd_tcp_tls_x509_reject_anon_client(void) - MigrateCommon args = { - .start = { - .hide_stderr = true, -+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true, - }, - .listen_uri = "defer", - .start_hook = migrate_hook_start_multifd_tls_x509_reject_anon_client, --- -2.39.3 - diff --git a/kvm-tests-unit-test-util-sockets-fix-mem-leak-on-error-o.patch b/kvm-tests-unit-test-util-sockets-fix-mem-leak-on-error-o.patch deleted file mode 100644 index 74a0b67..0000000 --- a/kvm-tests-unit-test-util-sockets-fix-mem-leak-on-error-o.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 450ca242645a6a2c68ea8aacbedfafec11c75fec Mon Sep 17 00:00:00 2001 -From: Matheus Tavares Bernardino -Date: Mon, 26 May 2025 10:20:55 -0700 -Subject: [PATCH 9/9] tests/unit/test-util-sockets: fix mem-leak on error - object - -RH-Author: Juraj Marcin -RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive -RH-Jira: RHEL-67706 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/7] ed714ea8693fe69166e7f991904bebd20636804a (JurajMarcin/centos-src-qemu-kvm) - -The test fails with --enable-asan as the error struct is never freed. -In the case where the test expects a success but it fails, let's also -report the error for debugging (it will be freed internally). - -Fixes 316e8ee8d6 ("util/qemu-sockets: Refactor inet_parse() to use QemuOpts") - -Signed-off-by: Matheus Tavares Bernardino -Reviewed-by: Juraj Marcin -Message-ID: <518d94c7db20060b2a086cf55ee9bffab992a907.1748280011.git.matheus.bernardino@oss.qualcomm.com> -Signed-off-by: Thomas Huth - -(cherry picked from commit 5c54a367265ec19ed94a535cd15d178c16b8cae0) - -JIRA: https://issues.redhat.com/browse/RHEL-67706 - -Signed-off-by: Juraj Marcin ---- - tests/unit/test-util-sockets.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c -index 8492f4d68f..ee66d727c3 100644 ---- a/tests/unit/test-util-sockets.c -+++ b/tests/unit/test-util-sockets.c -@@ -341,8 +341,12 @@ static void inet_parse_test_helper(const char *str, - int rc = inet_parse(&addr, str, &error); - - if (success) { -+ if (error) { -+ error_report_err(error); -+ } - g_assert_cmpint(rc, ==, 0); - } else { -+ error_free(error); - g_assert_cmpint(rc, <, 0); - } - if (exp_addr != NULL) { --- -2.39.3 - diff --git a/kvm-ui-vnc-Update-display-update-interval-when-VM-state-.patch b/kvm-ui-vnc-Update-display-update-interval-when-VM-state-.patch deleted file mode 100644 index 3f323f3..0000000 --- a/kvm-ui-vnc-Update-display-update-interval-when-VM-state-.patch +++ /dev/null @@ -1,97 +0,0 @@ -From d2ae6b7855efeb865f6a93db057378142ca172ea Mon Sep 17 00:00:00 2001 -From: Juraj Marcin -Date: Wed, 21 May 2025 17:16:13 +0200 -Subject: [PATCH 43/43] ui/vnc: Update display update interval when VM state - changes to RUNNING -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juraj Marcin -RH-MergeRequest: 384: ui/vnc: Update display update interval when VM state changes to RUNNING -RH-Jira: RHEL-83883 -RH-Acked-by: Peter Xu -RH-Acked-by: Marc-André Lureau -RH-Commit: [1/1] 1231bccaeb9ea0ca78dd6fd53ba4b523f3bc569f (JurajMarcin/centos-src-qemu-kvm) - -If a virtual machine is paused for an extended period time, for example, -due to an incoming migration, there are also no changes on the screen. -VNC in such case increases the display update interval by -VNC_REFRESH_INTERVAL_INC (50 ms). The update interval can then grow up -to VNC_REFRESH_INTERVAL_MAX (3000 ms). - -When the machine resumes, it can then take up to 3 seconds for the first -display update. Furthermore, the update interval is then halved with -each display update with changes on the screen. If there are moving -elements on the screen, such as a video, this can be perceived as -freezing and stuttering for few seconds before the movement is smooth -again. - -This patch resolves this issue, by adding a listener to VM state changes -and changing the update interval when the VM state changes to RUNNING. -The update_displaychangelistener() function updates the internal timer, -and the display is refreshed immediately if the timer is expired. - -Signed-off-by: Juraj Marcin -Reviewed-by: Marc-André Lureau -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Link: https://lore.kernel.org/r/20250521151616.3951178-1-jmarcin@redhat.com -Signed-off-by: Peter Xu - -(cherry picked from commit 0310d594d98b39f9dde79b87fd8b0ad16e7c5459) - -JIRA: https://issues.redhat.com/browse/RHEL-83883 - -Signed-off-by: Juraj Marcin ---- - ui/vnc.c | 12 ++++++++++++ - ui/vnc.h | 2 ++ - 2 files changed, 14 insertions(+) - -diff --git a/ui/vnc.c b/ui/vnc.c -index 9241caaad9..75fb1e74f0 100644 ---- a/ui/vnc.c -+++ b/ui/vnc.c -@@ -3386,6 +3386,16 @@ static const DisplayChangeListenerOps dcl_ops = { - .dpy_cursor_define = vnc_dpy_cursor_define, - }; - -+static void vmstate_change_handler(void *opaque, bool running, RunState state) -+{ -+ VncDisplay *vd = opaque; -+ -+ if (state != RUN_STATE_RUNNING) { -+ return; -+ } -+ update_displaychangelistener(&vd->dcl, VNC_REFRESH_INTERVAL_BASE); -+} -+ - void vnc_display_init(const char *id, Error **errp) - { - VncDisplay *vd; -@@ -3422,6 +3432,8 @@ void vnc_display_init(const char *id, Error **errp) - vd->dcl.ops = &dcl_ops; - register_displaychangelistener(&vd->dcl); - vd->kbd = qkbd_state_init(vd->dcl.con); -+ vd->vmstate_handler_entry = qemu_add_vm_change_state_handler( -+ &vmstate_change_handler, vd); - } - - -diff --git a/ui/vnc.h b/ui/vnc.h -index acc53a2cc1..3bb23acd34 100644 ---- a/ui/vnc.h -+++ b/ui/vnc.h -@@ -185,6 +185,8 @@ struct VncDisplay - #endif - - AudioState *audio_state; -+ -+ VMChangeStateEntry *vmstate_handler_entry; - }; - - typedef struct VncTight { --- -2.39.3 - diff --git a/kvm-util-qemu-sockets-Add-support-for-keep-alive-flag-to.patch b/kvm-util-qemu-sockets-Add-support-for-keep-alive-flag-to.patch deleted file mode 100644 index 32d7358..0000000 --- a/kvm-util-qemu-sockets-Add-support-for-keep-alive-flag-to.patch +++ /dev/null @@ -1,86 +0,0 @@ -From df0a8441d5352faaa1e2a3a27a48cafd6fa737e4 Mon Sep 17 00:00:00 2001 -From: Juraj Marcin -Date: Wed, 21 May 2025 15:52:33 +0200 -Subject: [PATCH 6/9] util/qemu-sockets: Add support for keep-alive flag to - passive sockets -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juraj Marcin -RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive -RH-Jira: RHEL-67706 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/7] af13774c325fc152814885c858813bdc2f1d62e7 (JurajMarcin/centos-src-qemu-kvm) - -Commit aec21d3175 (qapi: Add InetSocketAddress member keep-alive) -introduces the keep-alive flag, which enables the SO_KEEPALIVE socket -option, but only on client-side sockets. However, this option is also -useful for server-side sockets, so they can check if a client is still -reachable or drop the connection otherwise. - -This patch enables the SO_KEEPALIVE socket option on passive server-side -sockets if the keep-alive flag is enabled. This socket option is then -inherited by active server-side sockets communicating with connected -clients. - -Signed-off-by: Juraj Marcin -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Daniel P. Berrangé - -(cherry picked from commit 00064705ed1f3943d3634be25da434466c87e7d5) - -JIRA: https://issues.redhat.com/browse/RHEL-67706 - -Signed-off-by: Juraj Marcin ---- - qapi/sockets.json | 4 ++-- - util/qemu-sockets.c | 9 +++------ - 2 files changed, 5 insertions(+), 8 deletions(-) - -diff --git a/qapi/sockets.json b/qapi/sockets.json -index 6a95023315..62797cd027 100644 ---- a/qapi/sockets.json -+++ b/qapi/sockets.json -@@ -56,8 +56,8 @@ - # @ipv6: whether to accept IPv6 addresses, default try both IPv4 and - # IPv6 - # --# @keep-alive: enable keep-alive when connecting to this socket. Not --# supported for passive sockets. (Since 4.2) -+# @keep-alive: enable keep-alive when connecting to/listening on this socket. -+# (Since 4.2, not supported for listening sockets until 10.1) - # - # @mptcp: enable multi-path TCP. (Since 6.1) - # -diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c -index 329fdbfd97..4fbf1ed5bf 100644 ---- a/util/qemu-sockets.c -+++ b/util/qemu-sockets.c -@@ -236,12 +236,6 @@ static int inet_listen_saddr(InetSocketAddress *saddr, - int saved_errno = 0; - bool socket_created = false; - -- if (saddr->keep_alive) { -- error_setg(errp, "keep-alive option is not supported for passive " -- "sockets"); -- return -1; -- } -- - memset(&ai,0, sizeof(ai)); - ai.ai_flags = AI_PASSIVE; - if (saddr->has_numeric && saddr->numeric) { -@@ -349,6 +343,9 @@ static int inet_listen_saddr(InetSocketAddress *saddr, - goto fail; - } - /* We have a listening socket */ -+ if (inet_set_sockopts(slisten, saddr, errp) < 0) { -+ goto fail; -+ } - freeaddrinfo(res); - return slisten; - } --- -2.39.3 - diff --git a/kvm-util-qemu-sockets-Introduce-inet-socket-options-cont.patch b/kvm-util-qemu-sockets-Introduce-inet-socket-options-cont.patch deleted file mode 100644 index a4b1629..0000000 --- a/kvm-util-qemu-sockets-Introduce-inet-socket-options-cont.patch +++ /dev/null @@ -1,314 +0,0 @@ -From 51becf11d7727e6f6e224ca2a1d56e26fd3d0fb2 Mon Sep 17 00:00:00 2001 -From: Juraj Marcin -Date: Wed, 21 May 2025 15:52:35 +0200 -Subject: [PATCH 8/9] util/qemu-sockets: Introduce inet socket options - controlling TCP keep-alive -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juraj Marcin -RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive -RH-Jira: RHEL-67706 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/7] 4c8bac67f739c923fa1590e1cc34cd84c08653c1 (JurajMarcin/centos-src-qemu-kvm) - -With the default TCP stack configuration, it could be even 2 hours -before the connection times out due to the other side not being -reachable. However, in some cases, the application needs to be aware of -a connection issue much sooner. - -This is the case, for example, for postcopy live migration. If there is -no traffic from the migration destination guest (server-side) to the -migration source guest (client-side), the destination keeps waiting for -pages indefinitely and does not switch to the postcopy-paused state. -This can happen, for example, if the destination QEMU instance is -started with the '-S' command line option and the machine is not started -yet, or if the machine is idle and produces no new page faults for -not-yet-migrated pages. - -This patch introduces new inet socket parameters that control count, -idle period, and interval of TCP keep-alive packets before the -connection is considered broken. These parameters are available on -systems where the respective TCP socket options are defined, that -includes Linux, Windows, macOS, but not OpenBSD. Additionally, macOS -defines TCP_KEEPIDLE as TCP_KEEPALIVE instead, so the patch supplies its -own definition. - -The default value for all is 0, which means the system configuration is -used. - -Signed-off-by: Juraj Marcin -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Daniel P. Berrangé - -(cherry picked from commit 1bd4237cb1095d71c16afad3ce93b4a1e453173e) - -JIRA: https://issues.redhat.com/browse/RHEL-67706 - -Signed-off-by: Juraj Marcin ---- - meson.build | 30 +++++++++++++ - qapi/sockets.json | 19 ++++++++ - tests/unit/test-util-sockets.c | 39 +++++++++++++++++ - util/qemu-sockets.c | 80 ++++++++++++++++++++++++++++++++++ - 4 files changed, 168 insertions(+) - -diff --git a/meson.build b/meson.build -index dadd47d362..23b56175d5 100644 ---- a/meson.build -+++ b/meson.build -@@ -2745,6 +2745,36 @@ if linux_io_uring.found() - config_host_data.set('HAVE_IO_URING_PREP_WRITEV2', - cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2')) - endif -+config_host_data.set('HAVE_TCP_KEEPCNT', -+ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPCNT') or -+ cc.compiles(''' -+ #include -+ #ifndef TCP_KEEPCNT -+ #error -+ #endif -+ int main(void) { return 0; }''', -+ name: 'Win32 TCP_KEEPCNT')) -+# On Darwin TCP_KEEPIDLE is available under different name, TCP_KEEPALIVE. -+# https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172 -+config_host_data.set('HAVE_TCP_KEEPIDLE', -+ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPIDLE') or -+ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPALIVE') or -+ cc.compiles(''' -+ #include -+ #ifndef TCP_KEEPIDLE -+ #error -+ #endif -+ int main(void) { return 0; }''', -+ name: 'Win32 TCP_KEEPIDLE')) -+config_host_data.set('HAVE_TCP_KEEPINTVL', -+ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPINTVL') or -+ cc.compiles(''' -+ #include -+ #ifndef TCP_KEEPINTVL -+ #error -+ #endif -+ int main(void) { return 0; }''', -+ name: 'Win32 TCP_KEEPINTVL')) - - # has_member - config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID', -diff --git a/qapi/sockets.json b/qapi/sockets.json -index 62797cd027..f9f559daba 100644 ---- a/qapi/sockets.json -+++ b/qapi/sockets.json -@@ -59,6 +59,22 @@ - # @keep-alive: enable keep-alive when connecting to/listening on this socket. - # (Since 4.2, not supported for listening sockets until 10.1) - # -+# @keep-alive-count: number of keep-alive packets sent before the connection is -+# closed. Only supported for TCP sockets on systems where TCP_KEEPCNT -+# socket option is defined (this includes Linux, Windows, macOS, FreeBSD, -+# but not OpenBSD). When set to 0, system setting is used. (Since 10.1) -+# -+# @keep-alive-idle: time in seconds the connection needs to be idle before -+# sending a keepalive packet. Only supported for TCP sockets on systems -+# where TCP_KEEPIDLE socket option is defined (this includes Linux, -+# Windows, macOS, FreeBSD, but not OpenBSD). When set to 0, system setting -+# is used. (Since 10.1) -+# -+# @keep-alive-interval: time in seconds between keep-alive packets. Only -+# supported for TCP sockets on systems where TCP_KEEPINTVL is defined (this -+# includes Linux, Windows, macOS, FreeBSD, but not OpenBSD). When set to -+# 0, system setting is used. (Since 10.1) -+# - # @mptcp: enable multi-path TCP. (Since 6.1) - # - # Since: 1.3 -@@ -71,6 +87,9 @@ - '*ipv4': 'bool', - '*ipv6': 'bool', - '*keep-alive': 'bool', -+ '*keep-alive-count': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPCNT' }, -+ '*keep-alive-idle': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPIDLE' }, -+ '*keep-alive-interval': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPINTVL' }, - '*mptcp': { 'type': 'bool', 'if': 'HAVE_IPPROTO_MPTCP' } } } - - ## -diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c -index 9e39b92e7c..8492f4d68f 100644 ---- a/tests/unit/test-util-sockets.c -+++ b/tests/unit/test-util-sockets.c -@@ -359,6 +359,24 @@ static void inet_parse_test_helper(const char *str, - g_assert_cmpint(addr.ipv6, ==, exp_addr->ipv6); - g_assert_cmpint(addr.has_keep_alive, ==, exp_addr->has_keep_alive); - g_assert_cmpint(addr.keep_alive, ==, exp_addr->keep_alive); -+#ifdef HAVE_TCP_KEEPCNT -+ g_assert_cmpint(addr.has_keep_alive_count, ==, -+ exp_addr->has_keep_alive_count); -+ g_assert_cmpint(addr.keep_alive_count, ==, -+ exp_addr->keep_alive_count); -+#endif -+#ifdef HAVE_TCP_KEEPIDLE -+ g_assert_cmpint(addr.has_keep_alive_idle, ==, -+ exp_addr->has_keep_alive_idle); -+ g_assert_cmpint(addr.keep_alive_idle, ==, -+ exp_addr->keep_alive_idle); -+#endif -+#ifdef HAVE_TCP_KEEPINTVL -+ g_assert_cmpint(addr.has_keep_alive_interval, ==, -+ exp_addr->has_keep_alive_interval); -+ g_assert_cmpint(addr.keep_alive_interval, ==, -+ exp_addr->keep_alive_interval); -+#endif - #ifdef HAVE_IPPROTO_MPTCP - g_assert_cmpint(addr.has_mptcp, ==, exp_addr->has_mptcp); - g_assert_cmpint(addr.mptcp, ==, exp_addr->mptcp); -@@ -460,6 +478,18 @@ static void test_inet_parse_all_options_good(void) - .ipv6 = true, - .has_keep_alive = true, - .keep_alive = true, -+#ifdef HAVE_TCP_KEEPCNT -+ .has_keep_alive_count = true, -+ .keep_alive_count = 10, -+#endif -+#ifdef HAVE_TCP_KEEPIDLE -+ .has_keep_alive_idle = true, -+ .keep_alive_idle = 60, -+#endif -+#ifdef HAVE_TCP_KEEPINTVL -+ .has_keep_alive_interval = true, -+ .keep_alive_interval = 30, -+#endif - #ifdef HAVE_IPPROTO_MPTCP - .has_mptcp = true, - .mptcp = false, -@@ -467,6 +497,15 @@ static void test_inet_parse_all_options_good(void) - }; - inet_parse_test_helper( - "[::1]:5000,numeric=on,to=5006,ipv4=off,ipv6=on,keep-alive=on" -+#ifdef HAVE_TCP_KEEPCNT -+ ",keep-alive-count=10" -+#endif -+#ifdef HAVE_TCP_KEEPIDLE -+ ",keep-alive-idle=60" -+#endif -+#ifdef HAVE_TCP_KEEPINTVL -+ ",keep-alive-interval=30" -+#endif - #ifdef HAVE_IPPROTO_MPTCP - ",mptcp=off" - #endif -diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c -index 403dc26b36..4773755fd5 100644 ---- a/util/qemu-sockets.c -+++ b/util/qemu-sockets.c -@@ -45,6 +45,14 @@ - # define AI_NUMERICSERV 0 - #endif - -+/* -+ * On macOS TCP_KEEPIDLE is available under a different name, TCP_KEEPALIVE. -+ * https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172 -+ */ -+#if defined(TCP_KEEPALIVE) && !defined(TCP_KEEPIDLE) -+# define TCP_KEEPIDLE TCP_KEEPALIVE -+#endif -+ - - static int inet_getport(struct addrinfo *e) - { -@@ -218,6 +226,42 @@ static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp) - "Unable to set keep-alive option on socket"); - return -1; - } -+#ifdef HAVE_TCP_KEEPCNT -+ if (saddr->has_keep_alive_count && saddr->keep_alive_count) { -+ int keep_count = saddr->keep_alive_count; -+ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &keep_count, -+ sizeof(keep_count)); -+ if (ret < 0) { -+ error_setg_errno(errp, errno, -+ "Unable to set TCP keep-alive count option on socket"); -+ return -1; -+ } -+ } -+#endif -+#ifdef HAVE_TCP_KEEPIDLE -+ if (saddr->has_keep_alive_idle && saddr->keep_alive_idle) { -+ int keep_idle = saddr->keep_alive_idle; -+ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &keep_idle, -+ sizeof(keep_idle)); -+ if (ret < 0) { -+ error_setg_errno(errp, errno, -+ "Unable to set TCP keep-alive idle option on socket"); -+ return -1; -+ } -+ } -+#endif -+#ifdef HAVE_TCP_KEEPINTVL -+ if (saddr->has_keep_alive_interval && saddr->keep_alive_interval) { -+ int keep_interval = saddr->keep_alive_interval; -+ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &keep_interval, -+ sizeof(keep_interval)); -+ if (ret < 0) { -+ error_setg_errno(errp, errno, -+ "Unable to set TCP keep-alive interval option on socket"); -+ return -1; -+ } -+ } -+#endif - } - return 0; - } -@@ -630,6 +674,24 @@ static QemuOptsList inet_opts = { - .name = "keep-alive", - .type = QEMU_OPT_BOOL, - }, -+#ifdef HAVE_TCP_KEEPCNT -+ { -+ .name = "keep-alive-count", -+ .type = QEMU_OPT_NUMBER, -+ }, -+#endif -+#ifdef HAVE_TCP_KEEPIDLE -+ { -+ .name = "keep-alive-idle", -+ .type = QEMU_OPT_NUMBER, -+ }, -+#endif -+#ifdef HAVE_TCP_KEEPINTVL -+ { -+ .name = "keep-alive-interval", -+ .type = QEMU_OPT_NUMBER, -+ }, -+#endif - #ifdef HAVE_IPPROTO_MPTCP - { - .name = "mptcp", -@@ -695,6 +757,24 @@ int inet_parse(InetSocketAddress *addr, const char *str, Error **errp) - addr->has_keep_alive = true; - addr->keep_alive = qemu_opt_get_bool(opts, "keep-alive", false); - } -+#ifdef HAVE_TCP_KEEPCNT -+ if (qemu_opt_find(opts, "keep-alive-count")) { -+ addr->has_keep_alive_count = true; -+ addr->keep_alive_count = qemu_opt_get_number(opts, "keep-alive-count", 0); -+ } -+#endif -+#ifdef HAVE_TCP_KEEPIDLE -+ if (qemu_opt_find(opts, "keep-alive-idle")) { -+ addr->has_keep_alive_idle = true; -+ addr->keep_alive_idle = qemu_opt_get_number(opts, "keep-alive-idle", 0); -+ } -+#endif -+#ifdef HAVE_TCP_KEEPINTVL -+ if (qemu_opt_find(opts, "keep-alive-interval")) { -+ addr->has_keep_alive_interval = true; -+ addr->keep_alive_interval = qemu_opt_get_number(opts, "keep-alive-interval", 0); -+ } -+#endif - #ifdef HAVE_IPPROTO_MPTCP - if (qemu_opt_find(opts, "mptcp")) { - addr->has_mptcp = true; --- -2.39.3 - diff --git a/kvm-util-qemu-sockets-Refactor-inet_parse-to-use-QemuOpt.patch b/kvm-util-qemu-sockets-Refactor-inet_parse-to-use-QemuOpt.patch deleted file mode 100644 index 036c406..0000000 --- a/kvm-util-qemu-sockets-Refactor-inet_parse-to-use-QemuOpt.patch +++ /dev/null @@ -1,460 +0,0 @@ -From 56ed06502da893f9fd756cbe683917c64f4af0a6 Mon Sep 17 00:00:00 2001 -From: Juraj Marcin -Date: Wed, 21 May 2025 15:52:34 +0200 -Subject: [PATCH 7/9] util/qemu-sockets: Refactor inet_parse() to use QemuOpts -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juraj Marcin -RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive -RH-Jira: RHEL-67706 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/7] 49ea5df8850a7518eb546c27878551cdb1aaa9ff (JurajMarcin/centos-src-qemu-kvm) - -Currently, the inet address parser cannot handle multiple options where -one is prefixed with the name of the other. For example, with the -'keep-alive-idle' option added, the current parser cannot parse -'127.0.0.1:5000,keep-alive-idle=60,keep-alive' correctly. Instead, it -fails with "error parsing 'keep-alive' flag '-idle=60,keep-alive'". - -To resolve these issues, this patch rewrites the inet address parsing -using the QemuOpts parser, which the inet_parse_flag() function tries to -mimic. This new parser supports all previously supported options and on -top of that the 'numeric' flag is now also supported. The only -difference is, the new parser produces an error if an unknown option is -passed, instead of silently ignoring it. - -Signed-off-by: Juraj Marcin -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Daniel P. Berrangé - -(cherry picked from commit 316e8ee8d614f049bfae697570a5e62af450491c) - -JIRA: https://issues.redhat.com/browse/RHEL-67706 - -Signed-off-by: Juraj Marcin ---- - tests/unit/test-util-sockets.c | 196 +++++++++++++++++++++++++++++++++ - util/qemu-sockets.c | 158 +++++++++++++------------- - 2 files changed, 270 insertions(+), 84 deletions(-) - -diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c -index 4c9dd0b271..9e39b92e7c 100644 ---- a/tests/unit/test-util-sockets.c -+++ b/tests/unit/test-util-sockets.c -@@ -332,6 +332,177 @@ static void test_socket_unix_abstract(void) - - #endif /* CONFIG_LINUX */ - -+static void inet_parse_test_helper(const char *str, -+ InetSocketAddress *exp_addr, bool success) -+{ -+ InetSocketAddress addr; -+ Error *error = NULL; -+ -+ int rc = inet_parse(&addr, str, &error); -+ -+ if (success) { -+ g_assert_cmpint(rc, ==, 0); -+ } else { -+ g_assert_cmpint(rc, <, 0); -+ } -+ if (exp_addr != NULL) { -+ g_assert_cmpstr(addr.host, ==, exp_addr->host); -+ g_assert_cmpstr(addr.port, ==, exp_addr->port); -+ /* Own members: */ -+ g_assert_cmpint(addr.has_numeric, ==, exp_addr->has_numeric); -+ g_assert_cmpint(addr.numeric, ==, exp_addr->numeric); -+ g_assert_cmpint(addr.has_to, ==, exp_addr->has_to); -+ g_assert_cmpint(addr.to, ==, exp_addr->to); -+ g_assert_cmpint(addr.has_ipv4, ==, exp_addr->has_ipv4); -+ g_assert_cmpint(addr.ipv4, ==, exp_addr->ipv4); -+ g_assert_cmpint(addr.has_ipv6, ==, exp_addr->has_ipv6); -+ g_assert_cmpint(addr.ipv6, ==, exp_addr->ipv6); -+ g_assert_cmpint(addr.has_keep_alive, ==, exp_addr->has_keep_alive); -+ g_assert_cmpint(addr.keep_alive, ==, exp_addr->keep_alive); -+#ifdef HAVE_IPPROTO_MPTCP -+ g_assert_cmpint(addr.has_mptcp, ==, exp_addr->has_mptcp); -+ g_assert_cmpint(addr.mptcp, ==, exp_addr->mptcp); -+#endif -+ } -+ -+ g_free(addr.host); -+ g_free(addr.port); -+} -+ -+static void test_inet_parse_nohost_good(void) -+{ -+ char host[] = ""; -+ char port[] = "5000"; -+ InetSocketAddress exp_addr = { -+ .host = host, -+ .port = port, -+ }; -+ inet_parse_test_helper(":5000", &exp_addr, true); -+} -+ -+static void test_inet_parse_empty_bad(void) -+{ -+ inet_parse_test_helper("", NULL, false); -+} -+ -+static void test_inet_parse_only_colon_bad(void) -+{ -+ inet_parse_test_helper(":", NULL, false); -+} -+ -+static void test_inet_parse_ipv4_good(void) -+{ -+ char host[] = "127.0.0.1"; -+ char port[] = "5000"; -+ InetSocketAddress exp_addr = { -+ .host = host, -+ .port = port, -+ }; -+ inet_parse_test_helper("127.0.0.1:5000", &exp_addr, true); -+} -+ -+static void test_inet_parse_ipv4_noport_bad(void) -+{ -+ inet_parse_test_helper("127.0.0.1", NULL, false); -+} -+ -+static void test_inet_parse_ipv6_good(void) -+{ -+ char host[] = "::1"; -+ char port[] = "5000"; -+ InetSocketAddress exp_addr = { -+ .host = host, -+ .port = port, -+ }; -+ inet_parse_test_helper("[::1]:5000", &exp_addr, true); -+} -+ -+static void test_inet_parse_ipv6_noend_bad(void) -+{ -+ inet_parse_test_helper("[::1", NULL, false); -+} -+ -+static void test_inet_parse_ipv6_noport_bad(void) -+{ -+ inet_parse_test_helper("[::1]:", NULL, false); -+} -+ -+static void test_inet_parse_ipv6_empty_bad(void) -+{ -+ inet_parse_test_helper("[]:5000", NULL, false); -+} -+ -+static void test_inet_parse_hostname_good(void) -+{ -+ char host[] = "localhost"; -+ char port[] = "5000"; -+ InetSocketAddress exp_addr = { -+ .host = host, -+ .port = port, -+ }; -+ inet_parse_test_helper("localhost:5000", &exp_addr, true); -+} -+ -+static void test_inet_parse_all_options_good(void) -+{ -+ char host[] = "::1"; -+ char port[] = "5000"; -+ InetSocketAddress exp_addr = { -+ .host = host, -+ .port = port, -+ .has_numeric = true, -+ .numeric = true, -+ .has_to = true, -+ .to = 5006, -+ .has_ipv4 = true, -+ .ipv4 = false, -+ .has_ipv6 = true, -+ .ipv6 = true, -+ .has_keep_alive = true, -+ .keep_alive = true, -+#ifdef HAVE_IPPROTO_MPTCP -+ .has_mptcp = true, -+ .mptcp = false, -+#endif -+ }; -+ inet_parse_test_helper( -+ "[::1]:5000,numeric=on,to=5006,ipv4=off,ipv6=on,keep-alive=on" -+#ifdef HAVE_IPPROTO_MPTCP -+ ",mptcp=off" -+#endif -+ , &exp_addr, true); -+} -+ -+static void test_inet_parse_all_implicit_bool_good(void) -+{ -+ char host[] = "::1"; -+ char port[] = "5000"; -+ InetSocketAddress exp_addr = { -+ .host = host, -+ .port = port, -+ .has_numeric = true, -+ .numeric = true, -+ .has_to = true, -+ .to = 5006, -+ .has_ipv4 = true, -+ .ipv4 = true, -+ .has_ipv6 = true, -+ .ipv6 = true, -+ .has_keep_alive = true, -+ .keep_alive = true, -+#ifdef HAVE_IPPROTO_MPTCP -+ .has_mptcp = true, -+ .mptcp = true, -+#endif -+ }; -+ inet_parse_test_helper( -+ "[::1]:5000,numeric,to=5006,ipv4,ipv6,keep-alive" -+#ifdef HAVE_IPPROTO_MPTCP -+ ",mptcp" -+#endif -+ , &exp_addr, true); -+} -+ - int main(int argc, char **argv) - { - bool has_ipv4, has_ipv6; -@@ -377,6 +548,31 @@ int main(int argc, char **argv) - test_socket_unix_abstract); - #endif - -+ g_test_add_func("/util/socket/inet-parse/nohost-good", -+ test_inet_parse_nohost_good); -+ g_test_add_func("/util/socket/inet-parse/empty-bad", -+ test_inet_parse_empty_bad); -+ g_test_add_func("/util/socket/inet-parse/only-colon-bad", -+ test_inet_parse_only_colon_bad); -+ g_test_add_func("/util/socket/inet-parse/ipv4-good", -+ test_inet_parse_ipv4_good); -+ g_test_add_func("/util/socket/inet-parse/ipv4-noport-bad", -+ test_inet_parse_ipv4_noport_bad); -+ g_test_add_func("/util/socket/inet-parse/ipv6-good", -+ test_inet_parse_ipv6_good); -+ g_test_add_func("/util/socket/inet-parse/ipv6-noend-bad", -+ test_inet_parse_ipv6_noend_bad); -+ g_test_add_func("/util/socket/inet-parse/ipv6-noport-bad", -+ test_inet_parse_ipv6_noport_bad); -+ g_test_add_func("/util/socket/inet-parse/ipv6-empty-bad", -+ test_inet_parse_ipv6_empty_bad); -+ g_test_add_func("/util/socket/inet-parse/hostname-good", -+ test_inet_parse_hostname_good); -+ g_test_add_func("/util/socket/inet-parse/all-options-good", -+ test_inet_parse_all_options_good); -+ g_test_add_func("/util/socket/inet-parse/all-bare-bool-good", -+ test_inet_parse_all_implicit_bool_good); -+ - end: - return g_test_run(); - } -diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c -index 4fbf1ed5bf..403dc26b36 100644 ---- a/util/qemu-sockets.c -+++ b/util/qemu-sockets.c -@@ -30,6 +30,7 @@ - #include "qapi/qobject-input-visitor.h" - #include "qapi/qobject-output-visitor.h" - #include "qemu/cutils.h" -+#include "qemu/option.h" - #include "trace.h" - - #ifndef AI_ADDRCONFIG -@@ -600,115 +601,104 @@ err: - return -1; - } - --/* compatibility wrapper */ --static int inet_parse_flag(const char *flagname, const char *optstr, bool *val, -- Error **errp) --{ -- char *end; -- size_t len; -- -- end = strstr(optstr, ","); -- if (end) { -- if (end[1] == ',') { /* Reject 'ipv6=on,,foo' */ -- error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr); -- return -1; -- } -- len = end - optstr; -- } else { -- len = strlen(optstr); -- } -- if (len == 0 || (len == 3 && strncmp(optstr, "=on", len) == 0)) { -- *val = true; -- } else if (len == 4 && strncmp(optstr, "=off", len) == 0) { -- *val = false; -- } else { -- error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr); -- return -1; -- } -- return 0; --} -+static QemuOptsList inet_opts = { -+ .name = "InetSocketAddress", -+ .head = QTAILQ_HEAD_INITIALIZER(inet_opts.head), -+ .implied_opt_name = "addr", -+ .desc = { -+ { -+ .name = "addr", -+ .type = QEMU_OPT_STRING, -+ }, -+ { -+ .name = "numeric", -+ .type = QEMU_OPT_BOOL, -+ }, -+ { -+ .name = "to", -+ .type = QEMU_OPT_NUMBER, -+ }, -+ { -+ .name = "ipv4", -+ .type = QEMU_OPT_BOOL, -+ }, -+ { -+ .name = "ipv6", -+ .type = QEMU_OPT_BOOL, -+ }, -+ { -+ .name = "keep-alive", -+ .type = QEMU_OPT_BOOL, -+ }, -+#ifdef HAVE_IPPROTO_MPTCP -+ { -+ .name = "mptcp", -+ .type = QEMU_OPT_BOOL, -+ }, -+#endif -+ { /* end of list */ } -+ }, -+}; - - int inet_parse(InetSocketAddress *addr, const char *str, Error **errp) - { -- const char *optstr, *h; -- char host[65]; -- char port[33]; -- int to; -- int pos; -- char *begin; -- -+ QemuOpts *opts = qemu_opts_parse(&inet_opts, str, true, errp); -+ if (!opts) { -+ return -1; -+ } - memset(addr, 0, sizeof(*addr)); - - /* parse address */ -- if (str[0] == ':') { -- /* no host given */ -- host[0] = '\0'; -- if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) { -- error_setg(errp, "error parsing port in address '%s'", str); -- return -1; -- } -- } else if (str[0] == '[') { -+ const char *addr_str = qemu_opt_get(opts, "addr"); -+ if (!addr_str) { -+ error_setg(errp, "error parsing address ''"); -+ return -1; -+ } -+ if (str[0] == '[') { - /* IPv6 addr */ -- if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) { -- error_setg(errp, "error parsing IPv6 address '%s'", str); -+ const char *ip_end = strstr(addr_str, "]:"); -+ if (!ip_end || ip_end - addr_str < 2 || strlen(ip_end) < 3) { -+ error_setg(errp, "error parsing IPv6 address '%s'", addr_str); - return -1; - } -+ addr->host = g_strndup(addr_str + 1, ip_end - addr_str - 1); -+ addr->port = g_strdup(ip_end + 2); - } else { -- /* hostname or IPv4 addr */ -- if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) { -- error_setg(errp, "error parsing address '%s'", str); -+ /* no host, hostname or IPv4 addr */ -+ const char *port = strchr(addr_str, ':'); -+ if (!port || strlen(port) < 2) { -+ error_setg(errp, "error parsing address '%s'", addr_str); - return -1; - } -+ addr->host = g_strndup(addr_str, port - addr_str); -+ addr->port = g_strdup(port + 1); - } - -- addr->host = g_strdup(host); -- addr->port = g_strdup(port); -- - /* parse options */ -- optstr = str + pos; -- h = strstr(optstr, ",to="); -- if (h) { -- h += 4; -- if (sscanf(h, "%d%n", &to, &pos) != 1 || -- (h[pos] != '\0' && h[pos] != ',')) { -- error_setg(errp, "error parsing to= argument"); -- return -1; -- } -+ if (qemu_opt_find(opts, "numeric")) { -+ addr->has_numeric = true, -+ addr->numeric = qemu_opt_get_bool(opts, "numeric", false); -+ } -+ if (qemu_opt_find(opts, "to")) { - addr->has_to = true; -- addr->to = to; -+ addr->to = qemu_opt_get_number(opts, "to", 0); - } -- begin = strstr(optstr, ",ipv4"); -- if (begin) { -- if (inet_parse_flag("ipv4", begin + 5, &addr->ipv4, errp) < 0) { -- return -1; -- } -+ if (qemu_opt_find(opts, "ipv4")) { - addr->has_ipv4 = true; -+ addr->ipv4 = qemu_opt_get_bool(opts, "ipv4", false); - } -- begin = strstr(optstr, ",ipv6"); -- if (begin) { -- if (inet_parse_flag("ipv6", begin + 5, &addr->ipv6, errp) < 0) { -- return -1; -- } -+ if (qemu_opt_find(opts, "ipv6")) { - addr->has_ipv6 = true; -+ addr->ipv6 = qemu_opt_get_bool(opts, "ipv6", false); - } -- begin = strstr(optstr, ",keep-alive"); -- if (begin) { -- if (inet_parse_flag("keep-alive", begin + strlen(",keep-alive"), -- &addr->keep_alive, errp) < 0) -- { -- return -1; -- } -+ if (qemu_opt_find(opts, "keep-alive")) { - addr->has_keep_alive = true; -+ addr->keep_alive = qemu_opt_get_bool(opts, "keep-alive", false); - } - #ifdef HAVE_IPPROTO_MPTCP -- begin = strstr(optstr, ",mptcp"); -- if (begin) { -- if (inet_parse_flag("mptcp", begin + strlen(",mptcp"), -- &addr->mptcp, errp) < 0) -- { -- return -1; -- } -+ if (qemu_opt_find(opts, "mptcp")) { - addr->has_mptcp = true; -+ addr->mptcp = qemu_opt_get_bool(opts, "mptcp", 0); - } - #endif - return 0; --- -2.39.3 - diff --git a/kvm-util-qemu-sockets-Refactor-setting-client-sockopts-i.patch b/kvm-util-qemu-sockets-Refactor-setting-client-sockopts-i.patch deleted file mode 100644 index b9329d6..0000000 --- a/kvm-util-qemu-sockets-Refactor-setting-client-sockopts-i.patch +++ /dev/null @@ -1,83 +0,0 @@ -From e6348c4dd343f1a367a30f08c01a0f25c764a93c Mon Sep 17 00:00:00 2001 -From: Juraj Marcin -Date: Wed, 21 May 2025 15:52:31 +0200 -Subject: [PATCH 4/9] util/qemu-sockets: Refactor setting client sockopts into - a separate function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juraj Marcin -RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive -RH-Jira: RHEL-67706 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/7] c06976f96dbdf70b15079aa81dd1ac87abb0f1ab (JurajMarcin/centos-src-qemu-kvm) - -This is done in preparation for enabling the SO_KEEPALIVE support for -server sockets and adding settings for more TCP keep-alive socket -options. - -Signed-off-by: Juraj Marcin -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Daniel P. Berrangé - -(cherry picked from commit b8b5278aca78be4a1c2e7cbb11c6be176f63706d) - -JIRA: https://issues.redhat.com/browse/RHEL-67706 - -Signed-off-by: Juraj Marcin ---- - util/qemu-sockets.c | 29 +++++++++++++++++++---------- - 1 file changed, 19 insertions(+), 10 deletions(-) - -diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c -index 77477c1cd5..4a878e0527 100644 ---- a/util/qemu-sockets.c -+++ b/util/qemu-sockets.c -@@ -205,6 +205,22 @@ static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) - #endif - } - -+static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp) -+{ -+ if (saddr->keep_alive) { -+ int keep_alive = 1; -+ int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, -+ &keep_alive, sizeof(keep_alive)); -+ -+ if (ret < 0) { -+ error_setg_errno(errp, errno, -+ "Unable to set keep-alive option on socket"); -+ return -1; -+ } -+ } -+ return 0; -+} -+ - static int inet_listen_saddr(InetSocketAddress *saddr, - int port_offset, - int num, -@@ -475,16 +491,9 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error **errp) - return sock; - } - -- if (saddr->keep_alive) { -- int val = 1; -- int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, -- &val, sizeof(val)); -- -- if (ret < 0) { -- error_setg_errno(errp, errno, "Unable to set KEEPALIVE"); -- close(sock); -- return -1; -- } -+ if (inet_set_sockopts(sock, saddr, errp) < 0) { -+ close(sock); -+ return -1; - } - - return sock; --- -2.39.3 - diff --git a/kvm-util-qemu-sockets-Refactor-success-and-failure-paths.patch b/kvm-util-qemu-sockets-Refactor-success-and-failure-paths.patch deleted file mode 100644 index ff7ecb4..0000000 --- a/kvm-util-qemu-sockets-Refactor-success-and-failure-paths.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 5001e77f9a8b91bee86e5c8ecbc417d0d2551855 Mon Sep 17 00:00:00 2001 -From: Juraj Marcin -Date: Wed, 21 May 2025 15:52:32 +0200 -Subject: [PATCH 5/9] util/qemu-sockets: Refactor success and failure paths in - inet_listen_saddr() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juraj Marcin -RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive -RH-Jira: RHEL-67706 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/7] 2e18c6f6eeadf99d6a526a089009bb89cd2fd6a8 (JurajMarcin/centos-src-qemu-kvm) - -To get a listening socket, we need to first create a socket, try binding -it to a certain port, and lastly starting listening to it. Each of these -operations can fail due to various reasons, one of them being that the -requested address/port is already in use. In such case, the function -tries the same process with a new port number. - -This patch refactors the port number loop, so the success path is no -longer buried inside the 'if' statements in the middle of the loop. Now, -the success path is not nested and ends at the end of the iteration -after successful socket creation, binding, and listening. In case any of -the operations fails, it either continues to the next iteration (and the -next port) or jumps out of the loop to handle the error and exits the -function. - -Signed-off-by: Juraj Marcin -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Daniel P. Berrangé - -(cherry picked from commit 911e0f2c6e2d00c985affa75ec188c8edcf480f2) - -JIRA: https://issues.redhat.com/browse/RHEL-67706 - -Signed-off-by: Juraj Marcin ---- - util/qemu-sockets.c | 51 ++++++++++++++++++++++++--------------------- - 1 file changed, 27 insertions(+), 24 deletions(-) - -diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c -index 4a878e0527..329fdbfd97 100644 ---- a/util/qemu-sockets.c -+++ b/util/qemu-sockets.c -@@ -303,11 +303,20 @@ static int inet_listen_saddr(InetSocketAddress *saddr, - port_min = inet_getport(e); - port_max = saddr->has_to ? saddr->to + port_offset : port_min; - for (p = port_min; p <= port_max; p++) { -+ if (slisten >= 0) { -+ /* -+ * We have a socket we tried with the previous port. It cannot -+ * be rebound, we need to close it and create a new one. -+ */ -+ close(slisten); -+ slisten = -1; -+ } - inet_setport(e, p); - - slisten = create_fast_reuse_socket(e); - if (slisten < 0) { -- /* First time we expect we might fail to create the socket -+ /* -+ * First time we expect we might fail to create the socket - * eg if 'e' has AF_INET6 but ipv6 kmod is not loaded. - * Later iterations should always succeed if first iteration - * worked though, so treat that as fatal. -@@ -317,40 +326,38 @@ static int inet_listen_saddr(InetSocketAddress *saddr, - } else { - error_setg_errno(errp, errno, - "Failed to recreate failed listening socket"); -- goto listen_failed; -+ goto fail; - } - } - socket_created = true; - - rc = try_bind(slisten, saddr, e); - if (rc < 0) { -- if (errno != EADDRINUSE) { -- error_setg_errno(errp, errno, "Failed to bind socket"); -- goto listen_failed; -- } -- } else { -- if (!listen(slisten, num)) { -- goto listen_ok; -+ if (errno == EADDRINUSE) { -+ /* This port is already used, try the next one */ -+ continue; - } -- if (errno != EADDRINUSE) { -- error_setg_errno(errp, errno, "Failed to listen on socket"); -- goto listen_failed; -+ error_setg_errno(errp, errno, "Failed to bind socket"); -+ goto fail; -+ } -+ if (listen(slisten, num)) { -+ if (errno == EADDRINUSE) { -+ /* This port is already used, try the next one */ -+ continue; - } -+ error_setg_errno(errp, errno, "Failed to listen on socket"); -+ goto fail; - } -- /* Someone else managed to bind to the same port and beat us -- * to listen on it! Socket semantics does not allow us to -- * recover from this situation, so we need to recreate the -- * socket to allow bind attempts for subsequent ports: -- */ -- close(slisten); -- slisten = -1; -+ /* We have a listening socket */ -+ freeaddrinfo(res); -+ return slisten; - } - } - error_setg_errno(errp, errno, - socket_created ? - "Failed to find an available port" : - "Failed to create a socket"); --listen_failed: -+fail: - saved_errno = errno; - if (slisten >= 0) { - close(slisten); -@@ -358,10 +365,6 @@ listen_failed: - freeaddrinfo(res); - errno = saved_errno; - return -1; -- --listen_ok: -- freeaddrinfo(res); -- return slisten; - } - - #ifdef _WIN32 --- -2.39.3 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e6d2c9d..cfb4c7d 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -142,8 +142,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 10.0.0 -Release: 12%{?rcrel}%{?dist}%{?cc_suffix} +Version: 10.1.0 +Release: 1%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -178,248 +178,14 @@ Patch0010: 0010-Increase-deletion-schedule-to-4-releases.patch Patch0011: 0011-Add-downstream-aarch64-versioned-virt-machine-types.patch Patch0012: 0012-Add-downstream-s390x-versioned-s390-ccw-virtio-machi.patch Patch0013: 0013-Add-downstream-x86_64-versioned-pc-q35-machine-types.patch -Patch0014: 0014-Revert-meson-temporarily-disable-Wunused-function.patch -Patch0015: 0015-Enable-make-check.patch -Patch0016: 0016-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0017: 0017-Add-support-statement-to-help-output.patch -Patch0018: 0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0019: 0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -# For RHEL-87642 - QEMU sends unaligned discards on 4K devices[RHEL-10] -Patch20: kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch -# For RHEL-87642 - QEMU sends unaligned discards on 4K devices[RHEL-10] -Patch21: kvm-block-io-skip-head-tail-requests-on-EINVAL.patch -# For RHEL-87642 - QEMU sends unaligned discards on 4K devices[RHEL-10] -Patch22: kvm-file-posix-Fix-crash-on-discard_granularity-0.patch -# For RHEL-86056 - Enable 'vhost-user-gpu-pci' in qemu-kvm for RHIVOS -Patch23: kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch -# For RHEL-85635 - Video stuck about 1 min after switchover phase when play one video during postcopy-preempt migration -Patch24: kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch -# For RHEL-88457 - qemu inadvertantly built with valgrind coroutine stack debugging on x86_64 -Patch25: kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch -# Fixing s390x build issues -Patch26: kvm-docs-Don-t-define-duplicate-label-in-qemu-block-driv.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch27: kvm-block-Expand-block-status-mode-from-bool-to-flags.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch28: kvm-file-posix-gluster-Handle-zero-block-status-hint-bet.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch29: kvm-block-Let-bdrv_co_is_zero_fast-consolidate-adjacent-.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch30: kvm-block-Add-new-bdrv_co_is_all_zeroes-function.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch31: kvm-iotests-Improve-iotest-194-to-mirror-data.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch32: kvm-mirror-Minor-refactoring.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch33: kvm-mirror-Pass-full-sync-mode-rather-than-bool-to-inter.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch34: kvm-mirror-Allow-QMP-override-to-declare-target-already-.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch35: kvm-mirror-Drop-redundant-zero_target-parameter.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch36: kvm-mirror-Skip-pre-zeroing-destination-if-it-is-already.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch37: kvm-mirror-Skip-writing-zeroes-when-target-is-already-ze.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch38: kvm-iotests-common.rc-add-disk_usage-function.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch39: kvm-tests-Add-iotest-mirror-sparse-for-recent-patches.patch -# For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] -# For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] -Patch40: kvm-mirror-Reduce-I-O-when-destination-is-detect-zeroes-.patch -# For RHEL-65852 - Support multipath failover with scsi-block -Patch41: kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch -# For RHEL-65852 - Support multipath failover with scsi-block -Patch42: kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch -# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' -Patch43: kvm-io-Fix-partial-struct-copy-in-qio_dns_resolver_looku.patch -# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' -Patch44: kvm-util-qemu-sockets-Refactor-setting-client-sockopts-i.patch -# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' -Patch45: kvm-util-qemu-sockets-Refactor-success-and-failure-paths.patch -# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' -Patch46: kvm-util-qemu-sockets-Add-support-for-keep-alive-flag-to.patch -# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' -Patch47: kvm-util-qemu-sockets-Refactor-inet_parse-to-use-QemuOpt.patch -# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' -Patch48: kvm-util-qemu-sockets-Introduce-inet-socket-options-cont.patch -# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' -Patch49: kvm-tests-unit-test-util-sockets-fix-mem-leak-on-error-o.patch -# For RHEL-71962 - [RFE] Implement FUA support in scsi-disk -Patch50: kvm-scsi-disk-Add-native-FUA-write-support.patch -# For RHEL-96057 - qemu-kvm: Various small issues in the spec file -Patch51: kvm-Disable-virtio-net-pci-romfile-loading-on-riscv64.patch -# For RHEL-98555 - [s390x][RHEL10.1][ccw-device] there would be memory leak with virtio_blk disks -Patch52: kvm-s390x-Fix-leak-in-machine_set_loadparm.patch -# For RHEL-98555 - [s390x][RHEL10.1][ccw-device] there would be memory leak with virtio_blk disks -Patch53: kvm-hw-s390x-ccw-device-Fix-memory-leak-in-loadparm-sett.patch -# For RHEL-52650 - [AMDSERVER 10.1 Feature] Turin: Qemu EPYC-Turin Model -Patch54: kvm-target-i386-Update-EPYC-CPU-model-for-Cache-property.patch -# For RHEL-52650 - [AMDSERVER 10.1 Feature] Turin: Qemu EPYC-Turin Model -Patch55: kvm-target-i386-Update-EPYC-Rome-CPU-model-for-Cache-pro.patch -# For RHEL-52650 - [AMDSERVER 10.1 Feature] Turin: Qemu EPYC-Turin Model -Patch56: kvm-target-i386-Update-EPYC-Milan-CPU-model-for-Cache-pr.patch -# For RHEL-52650 - [AMDSERVER 10.1 Feature] Turin: Qemu EPYC-Turin Model -Patch57: kvm-target-i386-Add-couple-of-feature-bits-in-CPUID_Fn80.patch -# For RHEL-52650 - [AMDSERVER 10.1 Feature] Turin: Qemu EPYC-Turin Model -Patch58: kvm-target-i386-Update-EPYC-Genoa-for-Cache-property-per.patch -# For RHEL-52650 - [AMDSERVER 10.1 Feature] Turin: Qemu EPYC-Turin Model -Patch59: kvm-target-i386-Add-support-for-EPYC-Turin-model.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch60: kvm-include-qemu-compiler-add-QEMU_UNINITIALIZED-attribu.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch61: kvm-hw-virtio-virtio-avoid-cost-of-ftrivial-auto-var-ini.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch62: kvm-block-skip-automatic-zero-init-of-large-array-in-ioq.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch63: kvm-chardev-char-fd-skip-automatic-zero-init-of-large-ar.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch64: kvm-chardev-char-pty-skip-automatic-zero-init-of-large-a.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch65: kvm-chardev-char-socket-skip-automatic-zero-init-of-larg.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch66: kvm-hw-audio-ac97-skip-automatic-zero-init-of-large-arra.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch67: kvm-hw-audio-cs4231a-skip-automatic-zero-init-of-large-a.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch68: kvm-hw-audio-es1370-skip-automatic-zero-init-of-large-ar.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch69: kvm-hw-audio-gus-skip-automatic-zero-init-of-large-array.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch70: kvm-hw-audio-marvell_88w8618-skip-automatic-zero-init-of.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch71: kvm-hw-audio-sb16-skip-automatic-zero-init-of-large-arra.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch72: kvm-hw-audio-via-ac97-skip-automatic-zero-init-of-large-.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch73: kvm-hw-char-sclpconsole-lm-skip-automatic-zero-init-of-l.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch74: kvm-hw-dma-xlnx_csu_dma-skip-automatic-zero-init-of-larg.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch75: kvm-hw-display-vmware_vga-skip-automatic-zero-init-of-la.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch76: kvm-hw-hyperv-syndbg-skip-automatic-zero-init-of-large-a.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch77: kvm-hw-misc-aspeed_hace-skip-automatic-zero-init-of-larg.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch78: kvm-hw-net-rtl8139-skip-automatic-zero-init-of-large-arr.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch79: kvm-hw-net-tulip-skip-automatic-zero-init-of-large-array.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch80: kvm-hw-net-virtio-net-skip-automatic-zero-init-of-large-.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch81: kvm-hw-net-xgamc-skip-automatic-zero-init-of-large-array.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch82: kvm-hw-nvme-ctrl-skip-automatic-zero-init-of-large-array.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch83: kvm-hw-ppc-pnv_occ-skip-automatic-zero-init-of-large-str.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch84: kvm-hw-ppc-spapr_tpm_proxy-skip-automatic-zero-init-of-l.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch85: kvm-hw-usb-hcd-ohci-skip-automatic-zero-init-of-large-ar.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch86: kvm-hw-scsi-lsi53c895a-skip-automatic-zero-init-of-large.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch87: kvm-hw-scsi-megasas-skip-automatic-zero-init-of-large-ar.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch88: kvm-hw-ufs-lu-skip-automatic-zero-init-of-large-array.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch89: kvm-net-socket-skip-automatic-zero-init-of-large-array.patch -# For RHEL-95479 - -ftrivial-auto-var-init=zero reduced performance -Patch90: kvm-net-stream-skip-automatic-zero-init-of-large-array.patch -# For RHEL-85649 - [RHEL 10]Qemu/amd-iommu: Add ability to manually specify the AMDVI-PCI device -Patch91: kvm-hw-i386-amd_iommu-Isolate-AMDVI-PCI-from-amd-iommu-d.patch -# For RHEL-85649 - [RHEL 10]Qemu/amd-iommu: Add ability to manually specify the AMDVI-PCI device -Patch92: kvm-hw-i386-amd_iommu-Allow-migration-when-explicitly-cr.patch -# For RHEL-85649 - [RHEL 10]Qemu/amd-iommu: Add ability to manually specify the AMDVI-PCI device -Patch93: kvm-Enable-amd-iommu-device.patch -# For RHEL-83883 - Video stuck after switchover phase when play one video during migration -Patch94: kvm-ui-vnc-Update-display-update-interval-when-VM-state-.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch95: kvm-migration-multifd-move-macros-to-multifd-header.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch96: kvm-migration-refactor-channel-discovery-mechanism.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch97: kvm-migration-Add-save_postcopy_prepare-savevm-handler.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch98: kvm-migration-ram-Implement-save_postcopy_prepare.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch99: kvm-tests-qtest-migration-consolidate-set-capabilities.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch100: kvm-migration-write-zero-pages-when-postcopy-enabled.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch101: kvm-migration-enable-multifd-and-postcopy-together.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch102: kvm-migration-Add-qtest-for-migration-over-RDMA.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch103: kvm-qtest-migration-rdma-Enforce-RLIMIT_MEMLOCK-128MB-re.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch104: kvm-qtest-migration-rdma-Add-test-for-rdma-migration-wit.patch -# For RHEL-59697 - Allow multifd+postcopy features being enabled together, but only use multifd during precopy -Patch105: kvm-tests-qtest-migration-add-postcopy-tests-with-multif.patch -# For RHEL-96854 - Performance Degradation(aio=threads) between Upstream Commit b75c5f9 and 984a32f -Patch106: kvm-file-posix-Fix-aio-threads-performance-regression-af.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch107: kvm-block-remove-outdated-comments-about-AioContext-lock.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch108: kvm-block-move-drain-outside-of-read-locked-bdrv_reopen_.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch109: kvm-block-snapshot-move-drain-outside-of-read-locked-bdr.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch110: kvm-block-move-drain-outside-of-read-locked-bdrv_inactiv.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch111: kvm-block-mark-bdrv_parent_change_aio_context-GRAPH_RDLO.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch112: kvm-block-mark-change_aio_ctx-callback-and-instances-as-.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch113: kvm-block-mark-bdrv_child_change_aio_context-GRAPH_RDLOC.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch114: kvm-block-move-drain-outside-of-bdrv_change_aio_context-.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch115: kvm-block-move-drain-outside-of-bdrv_try_change_aio_cont.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch116: kvm-block-move-drain-outside-of-bdrv_attach_child_common.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch117: kvm-block-move-drain-outside-of-bdrv_set_backing_hd_drai.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch118: kvm-block-move-drain-outside-of-bdrv_root_attach_child.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch119: kvm-block-move-drain-outside-of-bdrv_attach_child.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch120: kvm-block-move-drain-outside-of-quorum_add_child.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch121: kvm-block-move-drain-outside-of-bdrv_root_unref_child.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch122: kvm-block-move-drain-outside-of-quorum_del_child.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch123: kvm-blockdev-drain-while-unlocked-in-internal_snapshot_a.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch124: kvm-blockdev-drain-while-unlocked-in-external_snapshot_a.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch125: kvm-block-mark-bdrv_drained_begin-and-friends-as-GRAPH_U.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch126: kvm-iotests-graph-changes-while-io-remove-image-file-aft.patch -# For RHEL-88561 - qemu graph deadlock during job-dismiss -Patch127: kvm-iotests-graph-changes-while-io-add-test-case-with-re.patch -# For RHEL-45624 - Deprecate rtl8139 NIC in QEMU -Patch128: kvm-Declare-rtl8139-as-deprecated.patch -# For RHEL-102325 - [qemu] enable variable service for edk2 -Patch129: kvm-Enable-uefi-variable-service-for-edk2.patch -# For RHEL-105440 - Openstack guest becomes inaccessible via network when storage network on the hypervisor is disabled/lost [rhel-10.1] -Patch130: kvm-rbd-Fix-.bdrv_get_specific_info-implementation.patch +Patch0014: 0014-Disable-virtio-net-pci-romfile-loading-on-riscv64.patch +Patch0015: 0015-Revert-meson-temporarily-disable-Wunused-function.patch +Patch0016: 0016-Enable-make-check.patch +Patch0017: 0017-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0018: 0018-Add-support-statement-to-help-output.patch +Patch0019: 0019-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0020: 0020-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +Patch0021: 0021-file-posix-Define-DM_MPATH_PROBE_PATHS.patch %if %{have_clang} BuildRequires: clang @@ -631,8 +397,8 @@ Requires: %{name} = %{epoch}:%{version}-%{release} The %{name}-tests rpm contains tests that can be used to verify the functionality of the installed %{name} package -Install this package if you want access to the avocado_qemu -tests, or qemu-iotests. +Install this package if you want access to the qemu tests, +or qemu-iotests. %package block-blkio @@ -774,8 +540,6 @@ ulimit -n 10240 --disable-asan \\\ --disable-attr \\\ --disable-auth-pam \\\ - --disable-avx2 \\\ - --disable-avx512bw \\\ --disable-blkio \\\ --disable-block-drv-whitelist-in-tools \\\ --disable-bochs \\\ @@ -1118,7 +882,6 @@ install -D -p -m 0644 %{modprobe_kvm_conf} $RPM_BUILD_ROOT%{_sysconfdir}/modprob # Create new directories and put them all under tests-src mkdir -p %{buildroot}%{testsdir}/python mkdir -p %{buildroot}%{testsdir}/tests -mkdir -p %{buildroot}%{testsdir}/tests/avocado mkdir -p %{buildroot}%{testsdir}/tests/qemu-iotests mkdir -p %{buildroot}%{testsdir}/scripts/qmp @@ -1126,10 +889,8 @@ mkdir -p %{buildroot}%{testsdir}/scripts/qmp install -m 0644 scripts/dump-guest-memory.py \ %{buildroot}%{_datadir}/%{name} -# Install avocado_qemu tests -cp -R %{qemu_kvm_build}/tests/avocado/* %{buildroot}%{testsdir}/tests/avocado/ -# Install qemu.py and qmp/ scripts required to run avocado_qemu tests +# Install qemu.py and qmp/ scripts required to run tests cp -R %{qemu_kvm_build}/python/qemu %{buildroot}%{testsdir}/python cp -R %{qemu_kvm_build}/scripts/qmp/* %{buildroot}%{testsdir}/scripts/qmp install -p -m 0644 tests/Makefile.include %{buildroot}%{testsdir}/tests/ @@ -1194,8 +955,8 @@ rm -rf %{buildroot}%{_datadir}/%{name}/slof.bin # Remove unpackaged files. rm -rf %{buildroot}%{_datadir}/%{name}/palcode-clipper -rm -rf %{buildroot}%{_datadir}/%{name}/petalogix*.dtb -rm -f %{buildroot}%{_datadir}/%{name}/bamboo.dtb +rm -rf %{buildroot}%{_datadir}/%{name}/dtb/petalogix*.dtb +rm -f %{buildroot}%{_datadir}/%{name}/dtb/bamboo.dtb rm -f %{buildroot}%{_datadir}/%{name}/ppc_rom.bin rm -rf %{buildroot}%{_datadir}/%{name}/s390-zipl.rom rm -rf %{buildroot}%{_datadir}/%{name}/u-boot.e500 @@ -1207,7 +968,7 @@ rm -rf %{buildroot}%{_datadir}/%{name}/pnv-pnor.bin rm -rf %{buildroot}%{_datadir}/%{name}/s390-ccw.img rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware.img rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware64.img -rm -rf %{buildroot}%{_datadir}/%{name}/canyonlands.dtb +rm -rf %{buildroot}%{_datadir}/%{name}/dtb/canyonlands.dtb rm -rf %{buildroot}%{_datadir}/%{name}/u-boot-sam460-20100605.bin rm -rf %{buildroot}%{_datadir}/%{name}/firmware @@ -1223,6 +984,7 @@ rm -rf %{buildroot}%{_datadir}/%{name}/opensbi-riscv64-generic-fw_dynamic.* rm -rf %{buildroot}%{_datadir}/%{name}/qemu-nsis.bmp rm -rf %{buildroot}%{_datadir}/%{name}/npcm7xx_bootrom.bin rm -rf %{buildroot}%{_datadir}/%{name}/npcm8xx_bootrom.bin +rm -rf %{buildroot}%{_datadir}/%{name}/ast27x0_bootrom.bin # Remove virtfs-proxy-helper files rm -rf %{buildroot}%{_libexecdir}/virtfs-proxy-helper @@ -1503,6 +1265,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Aug 29 2025 Miroslav Rezanina - 10.1.0-1 +- Rebase to QEMU 10.1.0 [RHEL-105035] +- Resolves: RHEL-105035 + (Rebase qemu-kvm to QEMU 10.1.0) + * Thu Aug 21 2025 Miroslav Rezanina - 10.0.0-12 - kvm-RHEL-Pack-uefi-vars-module.patch [RHEL-102325] - Resolves: RHEL-102325 @@ -1919,10 +1686,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: RHEL-45110 ([Intel 10.0 FEAT] [CWF][DMR] Virt-QEMU: Advertise new instructions SHA2-512NI, SM3, and SM4) -* Tue Oct 29 2024 Troy Dawson - 18:9.1.0-3.1 -- Bump release for October 2024 mass rebuild: - Resolves: RHEL-64018 - * Mon Oct 07 2024 Miroslav Rezanina - 9.1.0-3 - kvm-hostmem-Apply-merge-property-after-the-memory-region.patch [RHEL-58936] - Resolves: RHEL-58936 diff --git a/sources b/sources index b2a8ce2..cf81344 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-10.0.0.tar.xz) = 2215458ed8be3ab0b0032fe2a96e79183f5fc2da323d927018412ea3d980b022a07ba87d4f446229eaaa7d1b19a577d438dbcaa3af3bd537c7720b56734a2d8b +SHA512 (qemu-10.1.0.tar.xz) = 20552a524b6b298181df1af7084b470ded3fe8d1505f05011dda3c33cbc3d91f518ce026b44ba1a8b7f34c64ae81afddceda383066f4772a3a2a6333a2638caf